diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 44c6001..2f9d7b7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,9 +1,11 @@ ############################################################################### -# .gitlab-ci.yml — CI pipeline for bash script testing +# .gitlab-ci.yml — CI pipeline for linux-scripts repository # -# Stages: -# 1. lint — ShellCheck static analysis + bash syntax check -# 2. test — Run --help and --dry-run in Ubuntu and RHEL containers +# All scripts are tested on every push: +# 1. lint — ShellCheck + bash syntax + PowerShell syntax +# 2. test — --help and --dry-run validation on Ubuntu and Rocky Linux +# +# On success on master, scripts are ready to sync to the website. ############################################################################### stages: @@ -11,7 +13,8 @@ stages: - test variables: - SHELLCHECK_SEVERITY: "warning" + # Start at "error" for a clean baseline, tighten to "warning" as scripts are cleaned up + SHELLCHECK_SEVERITY: "error" # ───────────────────────────────────────────── # Lint Stage @@ -21,30 +24,57 @@ shellcheck: stage: lint image: koalaman/shellcheck-alpine:stable script: - - echo "Running ShellCheck on all .sh files..." + - echo "Running ShellCheck on $(find . -name '*.sh' -not -path './.git/*' | wc -l) scripts..." - find . -name "*.sh" -not -path "./.git/*" -print0 | xargs -0 -r shellcheck --severity="$SHELLCHECK_SEVERITY" --format=tty + - echo "ShellCheck passed" bash-syntax: stage: lint image: bash:5 script: - - echo "Checking bash syntax (bash -n)..." + - echo "Checking bash syntax..." - | errors=0 + total=0 for script in $(find . -name "*.sh" -not -path "./.git/*"); do + total=$((total + 1)) if ! bash -n "$script" 2>&1; then errors=$((errors + 1)) fi done if [ "$errors" -gt 0 ]; then - echo "FAILED: $errors script(s) have syntax errors" + echo "FAILED: $errors/$total script(s) have syntax errors" exit 1 fi - echo "All scripts pass syntax check" + echo "All $total scripts pass syntax check" + +powershell-syntax: + stage: lint + image: mcr.microsoft.com/powershell:lts-ubuntu-24.04 + script: + - echo "Checking PowerShell syntax..." + - | + errors=0 + total=0 + for script in $(find . -name "*.ps1" -not -path "./.git/*"); do + total=$((total + 1)) + echo "Checking: $script" + if ! pwsh -Command "try { \$null = [System.Management.Automation.Language.Parser]::ParseFile('$script', [ref]\$null, [ref]\$null); Write-Host 'OK: $script' } catch { Write-Error \$_; exit 1 }" 2>&1; then + errors=$((errors + 1)) + fi + done + if [ "$errors" -gt 0 ]; then + echo "FAILED: $errors/$total PowerShell script(s) have syntax errors" + exit 1 + fi + echo "All $total PowerShell scripts pass syntax check" + rules: + - exists: + - "*.ps1" # ───────────────────────────────────────────── -# Test Stage — Ubuntu +# Test Stage — Ubuntu 24.04 # ───────────────────────────────────────────── test-ubuntu: @@ -54,19 +84,30 @@ test-ubuntu: - apt-get update -qq - apt-get install -y -qq procps iproute2 kmod >/dev/null 2>&1 script: - - echo "=== Testing on Ubuntu 24.04 ===" + - echo "=== Testing --help flags on Ubuntu 24.04 ===" - | - for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*"); do - echo "" - echo "--- $(basename "$script") --help ---" - bash "$script" --help 2>&1 || true + passed=0 + failed=0 + for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*" | sort); do + name=$(basename "$script") + if bash "$script" --help >/dev/null 2>&1; then + echo "✓ $name --help" + passed=$((passed + 1)) + elif bash "$script" -h >/dev/null 2>&1; then + echo "✓ $name -h" + passed=$((passed + 1)) + else + echo "○ $name (no --help flag)" + fi done + echo "" + echo "$passed scripts have working --help" - echo "" - - echo "--- networktuning.sh --dry-run ---" + - echo "=== Testing networktuning.sh --dry-run ===" - bash networktuning.sh --dry-run 2>&1 || true # ───────────────────────────────────────────── -# Test Stage — RHEL +# Test Stage — Rocky Linux 9 # ───────────────────────────────────────────── test-rhel: @@ -75,13 +116,23 @@ test-rhel: before_script: - dnf install -y -q procps iproute kmod >/dev/null 2>&1 script: - - echo "=== Testing on Rocky Linux 9 ===" + - echo "=== Testing --help flags on Rocky Linux 9 ===" - | - for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*"); do - echo "" - echo "--- $(basename "$script") --help ---" - bash "$script" --help 2>&1 || true + passed=0 + for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*" | sort); do + name=$(basename "$script") + if bash "$script" --help >/dev/null 2>&1; then + echo "✓ $name --help" + passed=$((passed + 1)) + elif bash "$script" -h >/dev/null 2>&1; then + echo "✓ $name -h" + passed=$((passed + 1)) + else + echo "○ $name (no --help flag)" + fi done + echo "" + echo "$passed scripts have working --help" - echo "" - - echo "--- networktuning.sh --dry-run ---" + - echo "=== Testing networktuning.sh --dry-run ===" - bash networktuning.sh --dry-run 2>&1 || true diff --git a/NagiosInstall.sh b/NagiosInstall.sh new file mode 100644 index 0000000..96e6b04 --- /dev/null +++ b/NagiosInstall.sh @@ -0,0 +1,287 @@ +#!/bin/bash + +###################################################################################### +#### Version 2.2 #### +#### For questions or comments contact@mylinux.work #### +#### Author : Phil Connor #### +#### #### +#### Notes : #### +#### This script is a simple "helper" to install and configure Maria, #### +#### PowerDNS and PowerAdmin on RedHat Based servers. #### +#### There is no silver bullet. Don't expect the perfect setup, #### +#### review comments and adapt the parameters to your application usage. #### +#### #### +#### Use this script at your OWN risk. There is no guarantee whatsoever. #### +#### #### +#### Usage chmod 755 then ./PdnsInstall.sh or bash PdnsInstall.sh #### +###################################################################################### + +############################ +#### User Configurables #### +############################ +# HTTP=apache +NAGAD=nagiosadmin +NAGADPASS=MyPaSsWoRd + + +########################## +#### System Variables #### +########################## +# IPADD=$(ifconfig | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1') +OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]') +OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}') +# SAEMAIL= + +########################################################### +#### Detect Package Manger from OS and OSVer Variables #### +########################################################### +if [ "${OS}" = ubuntu ]; then + PAKMGR="apt -y" +elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + PAKMGR="dnf -y" + fi +fi + +########################### +#### Install Net-Utils #### +########################### +if [ ! "$(command -v ifconfig)" ]; then + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} update + ${PAKMGR} install net-utils + else + ${PAKMGR} install net-tools + fi +fi + +######################## +#### Nagios Install #### +######################## +function nagios_install() { + { + if [ "${OS}" = ubuntu ]; then + htpath=/etc/apache2/conf-enabled/nagios4-cgi.conf + else + htpath=/etc/apache2/conf.d/nagios.conf + fi + #if [ "${OS}" = ubuntu ]; then + ${PAKMGR} update + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install nagios4 nagios-nrpe-server nagios-plugins nagios-plugins-contrib expect + a2enmod authz_groupfile auth_digest + # ${PAKMGR} install autoconf gcc libc6 make wget unzip apache2 php libapache2-mod-php libgd-dev libssl-dev expect + sed -i 's/Require ip ::1\/128 fc00::\/7 fe80::\/10 10\.0\.0\.0\/8 127\.0\.0\.0\/8 169\.254\.0\.0\/16 172\.16\.0\.0\/12 192\.168\.0\.0\/16/# Require ip ::1\/128 fc00::\/7 fe80::\/10 10\.0\.0\.0\/8 127\.0\.0\.0\/8 169\.254\.0\.0\/16 172\.16\.0\.0\/12 192\.168\.0\.0\/16/g' $htpath + #sed -i 's//#/g' $htpath + sed -i 's/Require all/#Require all/g' $htpath + #sed -i 's//#/g' $htpath + sed -i 's/#Require /Require /g' $htpath + expect -f - <<-EOF + set timeout 5 + spawn htdigest -c /etc/nagios4/htdigest.users Nagios4 $NAGAD + expect "New password:" + send -- "$NAGADPASS\r" + expect "Re-type new password:" + send -- "$NAGADPASS\r" + expect eof +EOF + systemctl enable --now nagios + systemctl status nagios + if [ "${OS}" = ubuntu ]; then + systemctl enable apache2 + systemctl restart apache2 + else + systemctl enable httpd + systemctl restart httpd + fi + + } +} + +nagios_install + + + + +# OUTFILE1="$nagdir/nrpe_rule.te" +# # TITLE="nrpe_rule" +# define NRPE_RULE << 'EOF' +# module nrpe_rule 1.0; + +# require { +# type nrpe_t; +# type proc_net_t; +# class file { open read }; +# class file { ioctl open read getattr }; +# } + +# #=================== nrpe_t ========================= +# allow nrpe_t proc_net_t:file open; +# allow nrpe_t proc_net_t:file read; +# allow nrpe_t proc_net_t:file { getattr ioctl }; +# EOF +# { +# printf "%s\n" "$NRPE_RULE" | cut -c 3- +# } > "$OUTFILE1" +# checkmodule -M -m -o $nagdir/nrpe_rule.mod $nagdir/nrpe_rule.te +# semodule_package -o $nagdir/nrpe_rule.pp -m $nagdir/nrpe_rule.mod +# semodule -i $nagdir/nrpe_rule.pp +# semanage permissive -a nrpe_t +# ${PAKMGR} install nrpe nrpe-selinux nagios-plugins nagios-plugins-all nagios-plugins-uptime nagios-plugins-oracle nagios-plugins-check-updates + + +# sed -i "/^allowed_hosts/c\allowed_hosts=127.0.0.1,::1,$IPADD" $nrpecfg +# sed -i "/^#command\[check_load\]/c\command[check_uptime]=$nagdir/check_uptime" $nrpecfg +# sed -i "/^command\[check_load\]/c\command\[check_load\]=$nagdir/check_load -r -w 6,4,2 -c 12,10,7" $nrpecfg +# sed -i "/^command\[check_hda1\]/c\command[check_hda1]=$nagdir/check_disk -w 15% -c 10% -p /dev/sda3" $nrpecfg +# sed -i "/^command\[check_zombie_procs\]/c\# command[check_zombie_procs]=$nagdir/check_procs -w 5 -c 10 -s Z" $nrpecfg +# sed -i "/^command\[check_total_procs\]/c\command[check_total_procs]=$nagdir/check_procs -w 250 -c 300 -s RSZDT" $nrpecfg +# sed -i "/^#command\[check_users\]/c\command[check_net]=$nagdir/check_net" $nrpecfg +# sed -i "/^#command\[check_swap\]/c\command[check_swap]=$nagdir/check_swap -w 20% -c 10%" $nrpecfg +# sed -i "/^#command\[check_mem\]/c\command[check_mem]=$nagdir/check_mem" $nrpecfg +# if [ "${OS}" = ubuntu ]; then +# sed -i "/^#command\[check_apt\]/c\command[check_apt]=$nagdir/check_apt/" $nrpecfg +# else +# sed -i "/^#command\[check_yum\]/c\command[check_yum]=$nagdir/check_updates" $nrpecfg +# fi +# sed -i "/^#command\[check_all_procs\]/c\command[check_logic]=$nagdir/check_http -p 7011" $nrpecfg +# sed -i "/^#command\[check_procs\]/c\command[check_oracle]=$nagdir/check_http -p 8010" $nrpecfg +# sed -i "/^#command\[check_disk\]/c\command[check_ping]=$nagdir/check_ping 127.0.0.1 -w 100.0,20% -c 500.0,60%" $nrpecfg +# sed -i "/^#command\[check_cpu_stats\]/c\command[check_ssh]=$nagdir/check_ssh" $nrpecfg + +# } +# } + +###################### +#### HTTP Install #### +###################### +# function install_http() { +# { +# if [ "${OS}" = ubuntu ]; then +# if [ $HTTP = apache ]; then +# echo "Apache" +# else +# echo "Nginx" +# fi +# echo "something" +# else +# if [ $HTTP = apache ]; then +# echo "Apache" +# else +# echo "Nginx" +# fi +# fi +# } +# } + +nagios_install +# install_http + + +# # SAMPLE CONFIG SNIPPETS FOR APACHE WEB SERVER +# # +# # This file contains examples of entries that need +# # to be incorporated into your Apache web server +# # configuration file. Customize the paths, etc. as +# # needed to fit your system. + +# ScriptAlias /nagios/cgi-bin "/usr/local/nagios/sbin" + +# +# # SSLRequireSSL +# Options ExecCGI +# AllowOverride None +# = 2.3> +# +# Require all granted +# # Require host 127.0.0.1 + +# AuthName "Nagios Access" +# AuthType Basic +# AuthUserFile /usr/local/nagios/etc/htpasswd.users +# Require valid-user +# +# +# +# Order allow,deny +# Allow from all +# # Order deny,allow +# # Deny from all +# # Allow from 127.0.0.1 + +# AuthName "Nagios Access" +# AuthType Basic +# AuthUserFile /usr/local/nagios/etc/htpasswd.users +# Require valid-user +# +# + +# Alias /nagios "/usr/local/nagios/share" + +# +# # SSLRequireSSL +# Options None +# AllowOverride None +# = 2.3> +# +# Require all granted +# # Require host 127.0.0.1 + +# AuthName "Nagios Access" +# AuthType Basic +# AuthUserFile /usr/local/nagios/etc/htpasswd.users +# Require valid-user +# +# +# +# Order allow,deny +# Allow from all +# # Order deny,allow +# # Deny from all +# # Allow from 127.0.0.1 + +# AuthName "Nagios Access" +# AuthType Basic +# AuthUserFile /usr/local/nagios/etc/htpasswd.users +# Require valid-user +# +# +# wget https://assets.nagios.com/downloads/nagioscore/releases/nagios-4.4.7.tar.gz +# tar xzf nagios-4.4.7.tar.gz +# cd nagios-4.4.7 || exit +# if [ "${OS}" = ubuntu ]; then +# ./configure --with-httpd-conf=/etc/apache2/sites-enabled +# else +# ./configure --with-httpd-conf=/etc/httpd/conf.d +# fi +# make all +# make install-groups-users +# if [ "${OS}" = ubuntu ]; then +# usermod -aG nagios www-data +# else +# usermod -aG nagios apache +# fi +# make install +# make install-init +# make install-daemoninit +# make install-commandmode +# make install-config +# make install-webconf +# if [ "${OS}" = ubuntu ]; then +# a2enmod rewrite cgi +# fi + +# fi +# if [ ! "$(command -v wget)" ]; then +# ${PAKMGR} install wget +# fi +# ndir1=/usr/lib/nagios/plugins +# ndir2=/usr/lib64/nagios/plugins +# #nrpecfg=/etc/nagios/nrpe.cfg +# if [ -d $ndir1 ]; then +# nagdir=$ndir1 +# elif [ -d $ndir2 ]; then +# nagdir=$ndir2 +# fi +# define () { +# IFS=$'\n' read -r -d '' "$1" +# } \ No newline at end of file diff --git a/PdnsInstall.sh b/PdnsInstall.sh new file mode 100644 index 0000000..1a239f7 --- /dev/null +++ b/PdnsInstall.sh @@ -0,0 +1,1298 @@ +#!/bin/bash +set -euo pipefail + +###################################################################################### +#### Version 3.0 #### +#### For questions or comments contact@mylinux.work #### +#### Author : Phil Connor #### +#### #### +#### Notes : #### +#### This script is a simple "helper" to install and configure Maria, #### +#### PowerDNS and PowerAdmin on Linux servers. #### +#### Supported: Ubuntu, Debian, RHEL/CentOS/Rocky/Alma, Fedora, openSUSE #### +#### There is no silver bullet. Don't expect the perfect setup, #### +#### review comments and adapt the parameters to your application usage. #### +#### #### +#### Use this script at your OWN risk. There is no guarantee whatsoever. #### +#### #### +#### Usage chmod 755 then ./PdnsInstall.sh or bash PdnsInstall.sh #### +###################################################################################### + +######################## +#### User Variables #### +######################## +MYSQL_PASS="${MYSQL_PASS:-}" # <-- Set via env var MYSQL_PASS or you will be prompted +MY_PDNS_USR=pdns # <-- The username for your PowerDNS connect to DB +MY_PDNS_DB=powerdns # <-- The name for your PowerDNS DB +MY_PDNS_PW="${MY_PDNS_PW:-}" # <-- Set via env var MY_PDNS_PW or you will be prompted +MY_PDNS_HOST=localhost # <-- The default here is localhost, but can be set to a remote host if you have configured that +DEL_MY_CNF=Y # <-- Place a Capital Y for yes or N for no here to delete /root/.my.cnf when db_install function is done +WEB_HOST_NAME=test1.myserver.com # <-- The FQDN of your server goes here +EMAIL=admin@$WEB_HOST_NAME # <-- This is the email you want to use for Let's Encrypt registrations +HTTP=nginx # <-- Choose apache or nginx --> The apache Config is in BETA TESTING please only choose nginx unless you know what your doing + +########################## +#### System Variables #### +########################## +ip4=$(ip -o -4 route get 8.8.8.8 | awk '{print $7; exit}') +host=$(hostname -f) +OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]') +OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}') +# OS family: debian (ubuntu, debian), rhel (centos, red, oracle, rocky, alma), fedora, suse (opensuse) +OS_FAMILY="" + +########################################################### +#### Detect Package Manager from OS and OSVer Variables #### +########################################################### +if [[ "${OS}" = ubuntu || "${OS}" = debian ]]; then + OS_FAMILY="debian" + PAKMGR="apt -y" +elif [[ "${OS}" = centos || "${OS}" = red || "${OS}" = oracle || "${OS}" = rocky || "${OS}" = alma ]]; then + OS_FAMILY="rhel" + PAKMGR="dnf -y" +elif [[ "${OS}" = fedora ]]; then + OS_FAMILY="rhel" + PAKMGR="dnf -y" +elif [[ "${OS}" = opensuse ]]; then + OS_FAMILY="suse" + PAKMGR="zypper -n install" +else + echo "Unsupported OS: ${OS}" + echo "Supported: Ubuntu, Debian, CentOS, RHEL, Oracle, Rocky, Alma, Fedora, openSUSE" + exit 1 +fi + +########################## +#### Detect Root User #### +########################## +check_RootUser() { + if [[ "$(id -u)" != "0" ]]; then + echo "You dont have permission to run $0 as non-root user. Use sudo su -" + exit 1 + fi +} + +############################ +#### Prompt for secrets #### +############################ +prompt_secrets() { + if [[ -z "${MYSQL_PASS}" ]]; then + read -rsp "Enter MySQL root password: " MYSQL_PASS + echo + fi + if [[ -z "${MY_PDNS_PW}" ]]; then + read -rsp "Enter PowerDNS DB password: " MY_PDNS_PW + echo + fi +} + +############################## +#### Validate user inputs #### +############################## +validate_inputs() { + if [[ "${HTTP}" != "apache" && "${HTTP}" != "nginx" ]]; then + echo "Invalid HTTP value '${HTTP}'. Must be 'apache' or 'nginx'." + exit 1 + fi +} + +#################### +#### Code Start #### +#################### + +######################### +#### Install MariaDB #### +######################### +install_mysql() { + if ! command -v mysql &>/dev/null; then + if [[ "${OS_FAMILY}" = "debian" ]]; then + ${PAKMGR} update + ${PAKMGR} install mariadb-client mariadb-server + elif [[ "${OS_FAMILY}" = "suse" ]]; then + ${PAKMGR} mariadb mariadb-client + else + ${PAKMGR} install mariadb mariadb-server + fi + fi + systemctl enable --now mariadb +} + +###################### +#### Secure MySQL #### +###################### +secure_mysql() { + if ! command -v expect &>/dev/null; then + if [[ "${OS_FAMILY}" = "suse" ]]; then + ${PAKMGR} expect + else + ${PAKMGR} install expect + fi + fi + + expect -f - <<-EOF + set timeout 10 + spawn mysql_secure_installation + expect "Enter current password for root (enter for none):" + send -- "\r" + expect "Set root password?" + send -- "y\r" + expect "New password:" + send -- "${MYSQL_PASS}\r" + expect "Re-enter new password:" + send -- "${MYSQL_PASS}\r" + expect "Remove anonymous users?" + send -- "y\r" + expect "Disallow root login remotely?" + send -- "y\r" + expect "Remove test database and access to it?" + send -- "y\r" + expect "Reload privilege tables now?" + send -- "y\r" + expect eof +EOF +} + +################################### +#### Install PowerDNS DataBase #### +################################### +pdns_db_install() { + if [[ ! -f /root/.my.cnf ]]; then + { + echo '[mysql]' + echo 'user=root' + echo "password=${MYSQL_PASS}" + } >/root/.my.cnf + chmod 600 /root/.my.cnf + fi + + mysql -e "CREATE DATABASE ${MY_PDNS_DB} /*\!40100 DEFAULT CHARACTER SET utf8 */;" + mysql -e "CREATE USER ${MY_PDNS_USR}@localhost IDENTIFIED BY '${MY_PDNS_PW}';" + mysql -e "GRANT ALL PRIVILEGES ON ${MY_PDNS_DB}.* TO '${MY_PDNS_USR}'@'localhost';" + mysql -e "FLUSH PRIVILEGES;" + + OUTFILE="/tmp/pdns.sql" + cat > "${OUTFILE}" << 'EOF' + CREATE TABLE domains ( + id INT AUTO_INCREMENT, + name VARCHAR(255) NOT NULL, + master VARCHAR(128) DEFAULT NULL, + last_check INT DEFAULT NULL, + type VARCHAR(6) NOT NULL, + notified_serial INT UNSIGNED DEFAULT NULL, + account VARCHAR(40) CHARACTER SET 'utf8' DEFAULT NULL, + PRIMARY KEY (id) + ) Engine=InnoDB CHARACTER SET 'latin1'; + + CREATE UNIQUE INDEX name_index ON domains(name); + + + CREATE TABLE records ( + id BIGINT AUTO_INCREMENT, + domain_id INT DEFAULT NULL, + name VARCHAR(255) DEFAULT NULL, + type VARCHAR(10) DEFAULT NULL, + content VARCHAR(64000) DEFAULT NULL, + ttl INT DEFAULT NULL, + prio INT DEFAULT NULL, + disabled TINYINT(1) DEFAULT 0, + ordername VARCHAR(255) BINARY DEFAULT NULL, + auth TINYINT(1) DEFAULT 1, + PRIMARY KEY (id) + ) Engine=InnoDB CHARACTER SET 'latin1'; + + CREATE INDEX nametype_index ON records(name,type); + CREATE INDEX domain_id ON records(domain_id); + CREATE INDEX ordername ON records (ordername); + + + CREATE TABLE supermasters ( + ip VARCHAR(64) NOT NULL, + nameserver VARCHAR(255) NOT NULL, + account VARCHAR(40) CHARACTER SET 'utf8' NOT NULL, + PRIMARY KEY (ip, nameserver) + ) Engine=InnoDB CHARACTER SET 'latin1'; + + + CREATE TABLE comments ( + id INT AUTO_INCREMENT, + domain_id INT NOT NULL, + name VARCHAR(255) NOT NULL, + type VARCHAR(10) NOT NULL, + modified_at INT NOT NULL, + account VARCHAR(40) CHARACTER SET 'utf8' DEFAULT NULL, + comment TEXT CHARACTER SET 'utf8' NOT NULL, + PRIMARY KEY (id) + ) Engine=InnoDB CHARACTER SET 'latin1'; + + CREATE INDEX comments_name_type_idx ON comments (name, type); + CREATE INDEX comments_order_idx ON comments (domain_id, modified_at); + + + CREATE TABLE domainmetadata ( + id INT AUTO_INCREMENT, + domain_id INT NOT NULL, + kind VARCHAR(32), + content TEXT, + PRIMARY KEY (id) + ) Engine=InnoDB CHARACTER SET 'latin1'; + + CREATE INDEX domainmetadata_idx ON domainmetadata (domain_id, kind); + + + CREATE TABLE cryptokeys ( + id INT AUTO_INCREMENT, + domain_id INT NOT NULL, + flags INT NOT NULL, + active BOOL, + published BOOL DEFAULT 1, + content TEXT, + PRIMARY KEY(id) + ) Engine=InnoDB CHARACTER SET 'latin1'; + + CREATE INDEX domainidindex ON cryptokeys(domain_id); + + + CREATE TABLE tsigkeys ( + id INT AUTO_INCREMENT, + name VARCHAR(255), + algorithm VARCHAR(50), + secret VARCHAR(255), + PRIMARY KEY (id) + ) Engine=InnoDB CHARACTER SET 'latin1'; + + CREATE UNIQUE INDEX namealgoindex ON tsigkeys(name, algorithm); +EOF + if [[ "${DEL_MY_CNF}" != "N" ]]; then + rm -f /root/.my.cnf + fi + mysql -D "${MY_PDNS_DB}" < "${OUTFILE}" + rm -f "${OUTFILE}" +} + +#################################### +#### Install/Configure PowerDNS #### +#################################### +pdns_app_install() { + if [[ "${OS_FAMILY}" = "debian" ]]; then + if systemctl is-enabled systemd-resolved &>/dev/null; then + systemctl disable --now systemd-resolved + systemctl mask systemd-resolved + sed -i 's/nameserver /#nameserver /g' /etc/resolv.conf + echo -e 'nameserver 8.8.8.8 \nnameserver 8.8.4.4' >> /etc/resolv.conf + fi + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install pdns-backend-mysql fpdns bind9utils + elif [[ "${OS_FAMILY}" = "suse" ]]; then + ${PAKMGR} pdns pdns-backend-mysql bind-utils + else + ${PAKMGR} install epel-release || true + if [[ "${OS}" != "fedora" ]]; then + ${PAKMGR} install "http://rpms.remirepo.net/enterprise/remi-release-${OSVER}.rpm" || true + fi + ${PAKMGR} install pdns-backend-mysql pdns bind-utils + fi + + echo "" >/etc/pdns/pdns.conf + cat >/etc/pdns/pdns.conf <' + echo " ServerAdmin admin@${WEB_HOST_NAME}" + echo " ServerName ${WEB_HOST_NAME}" + echo " DocumentRoot /var/www/html/${WEB_HOST_NAME}" + echo ' #DirectoryIndex index.php' + echo " #ErrorLog /var/log/httpd/${WEB_HOST_NAME}-error.log" + echo " #CustomLog /var/log/httpd/${WEB_HOST_NAME}-access.log combined" + echo '' + echo ' ' + if [[ "${OS_FAMILY}" = "debian" ]]; then + echo ' SetHandler "proxy:unix:/run/php/php-fpm.sock|fcgi://localhost"' + elif [[ "${OS_FAMILY}" = "suse" ]]; then + echo ' SetHandler "proxy:unix:/run/php-fpm/www.sock|fcgi://localhost"' + else + echo ' SetHandler "proxy:unix:/run/php-fpm/www.sock|fcgi://localhost"' + fi + echo ' ' + echo '' + } > "${path}" + + if [[ "${OS_FAMILY}" = "debian" ]]; then + if ! apachectl configtest; then + echo -e '\e[01;31m An Error was detected with apache2, please check the configuration\e[0m' >&2 + exit 1 + fi + systemctl enable --now php*-fpm + a2dissite 000-default + a2ensite "${WEB_HOST_NAME}" + a2enmod proxy_fcgi setenvif + systemctl enable apache2 + systemctl reload apache2 + elif [[ "${OS_FAMILY}" = "suse" ]]; then + if ! apachectl configtest; then + echo -e '\e[01;31m An Error was detected with apache2, please check the configuration\e[0m' >&2 + exit 1 + fi + systemctl enable --now php-fpm + systemctl enable --now apache2 + else + if ! httpd -t; then + echo -e '\e[01;31m An Error was detected with httpd, please check the configuration\e[0m' >&2 + exit 1 + fi + chcon -R -t httpd_sys_content_t "/var/www/html/${WEB_HOST_NAME}" + systemctl enable --now php-fpm + systemctl enable --now httpd + fi +} + +_install_nginx() { + # Install PHP for nginx + if [[ "${OS_FAMILY}" = "debian" ]]; then + ${PAKMGR} install php php-cli php-fpm php-gd php-intl php-mysql php-xml php-mbstring php-curl gettext + systemctl disable --now apache2 || true + systemctl mask apache2 || true + elif [[ "${OS_FAMILY}" = "suse" ]]; then + ${PAKMGR} php8 php8-fpm php8-cli php8-mysql php8-intl php8-mbstring php8-curl + systemctl disable --now apache2 || true + systemctl mask apache2 || true + else + ${PAKMGR} install php php-fpm php-cli php-mysqlnd php-intl php-mbstring php-curl + chown apache:apache /var/lib/php/sessions + systemctl disable --now httpd || true + systemctl mask httpd || true + fi + + # Install nginx + if [[ "${OS_FAMILY}" = "suse" ]]; then + ${PAKMGR} nginx + else + ${PAKMGR} install nginx + fi + + if [[ "${OS_FAMILY}" = "rhel" ]]; then + if ! grep -q "listen = /run/php-fpm/www.sock" /etc/php-fpm.d/www.conf; then + sed -i '/listen = */c\listen = \/run\/php-fpm\/www.sock' /etc/php-fpm.d/www.conf + fi + fi + + local path + if [[ "${OS_FAMILY}" = "debian" ]]; then + path="/etc/nginx/sites-available/${WEB_HOST_NAME}.conf" + else + path="/etc/nginx/conf.d/${WEB_HOST_NAME}.conf" + fi + + # shellcheck disable=SC2016 + { + echo 'server {' + echo " server_name ${WEB_HOST_NAME};" + echo ' listen 80;' + echo '' + echo " root /var/www/html/${WEB_HOST_NAME};" + echo " #access_log /var/log/nginx/${WEB_HOST_NAME}-access_log;" + echo " #error_log /var/log/nginx/${WEB_HOST_NAME}-error_log;" + echo '' + echo ' index index.php;' + echo '' + echo ' location / {' + echo ' try_files $uri $uri/ /index.php?query_string;' + echo ' }' + echo '' + echo ' location ~ \.php$ {' + echo ' fastcgi_index index.php;' + echo ' fastcgi_split_path_info ^(.+\.php)(.*)$;' + echo ' fastcgi_keep_conn on;' + echo ' include /etc/nginx/fastcgi_params;' + if [[ "${OS_FAMILY}" = "debian" ]]; then + echo ' fastcgi_pass unix:/run/php/php-fpm.sock;' + else + echo ' fastcgi_pass unix:/run/php-fpm/www.sock;' + fi + echo ' fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;' + echo ' }' + echo '' + echo ' location ~ /\.ht {' + echo ' deny all;' + echo ' }' + echo '' + echo '}' + } > "${path}" + + if ! nginx -t; then + echo -e '\e[01;31m An Error was detected with nginx, please check the configuration\e[0m' >&2 + exit 1 + fi + + if [[ "${OS_FAMILY}" = "debian" ]]; then + rm -f /etc/nginx/sites-enabled/default + ln -sf "/etc/nginx/sites-available/${WEB_HOST_NAME}.conf" "/etc/nginx/sites-enabled/${WEB_HOST_NAME}" + elif [[ "${OS_FAMILY}" = "rhel" ]]; then + chcon -R -t httpd_sys_content_t "/var/www/html/${WEB_HOST_NAME}" + fi + systemctl enable --now php-fpm + systemctl enable --now nginx +} + +############################ +#### Install PowerAdmin #### +############################ +pdns_admin_install() { + if [[ ! -d "/var/www/html/${WEB_HOST_NAME}" ]]; then + mkdir -p "/var/www/html/${WEB_HOST_NAME}" + fi + + if [[ "${OS_FAMILY}" = "suse" ]]; then + ${PAKMGR} git + else + ${PAKMGR} install git + fi + cd "/var/www/html/${WEB_HOST_NAME}" || exit + git clone https://github.com/poweradmin/poweradmin.git + mv poweradmin/* . + rm -rf poweradmin/ + find "/var/www/html/${WEB_HOST_NAME}/" -type d -exec chmod 755 {} \; + find "/var/www/html/${WEB_HOST_NAME}/" -type f -exec chmod 644 {} \; + + local web_user + if [[ "${OS_FAMILY}" = "debian" ]]; then + web_user="www-data" + elif [[ "${OS_FAMILY}" = "suse" ]]; then + web_user="wwwrun" + else + web_user="apache" + fi + chown -R "${web_user}:${web_user}" "/var/www/html/${WEB_HOST_NAME}/" +} + +########################################## +#### Install Certbot and request Cert #### +########################################## +install_certbot() { + if [[ "${HTTP}" = "apache" ]]; then + if [[ "${OS_FAMILY}" = "suse" ]]; then + ${PAKMGR} python3-certbot python3-certbot-apache + else + ${PAKMGR} install python3-certbot-apache + fi + if [[ "${OS_FAMILY}" = "rhel" ]]; then + systemctl enable --now httpd + else + systemctl enable --now apache2 + fi + elif [[ "${HTTP}" = "nginx" ]]; then + if [[ "${OS_FAMILY}" = "suse" ]]; then + ${PAKMGR} python3-certbot python3-certbot-nginx + else + ${PAKMGR} install python3-certbot-nginx + fi + systemctl enable --now nginx + fi + + ################################################################################################# + #### Be sure that your domain has the proper dns entry or this will not work. #### + #### #### + #### If your domain is not properly configured and you know it, or you just wanna #### + #### test that you can get a cert uncomment this line #### + #### #### + #### certbot certonly --redirect --agree-tos --nginx -d $WEB_HOST_NAME -m "$EMAIL" --dry-run #### + #### and comment out this line #### + #### certbot --non-interactive --redirect --agree-tos -d $WEB_HOST_NAME -m "$EMAIL" #### + ################################################################################################# + + if [[ "${HTTP}" = "apache" ]]; then + certbot certonly --redirect --agree-tos --apache -d "${WEB_HOST_NAME}" -m "${EMAIL}" --dry-run -v + #certbot --non-interactive --redirect --agree-tos --apache -d "${WEB_HOST_NAME}" -m "${EMAIL}" + if [[ "${OS_FAMILY}" = "rhel" ]]; then + systemctl restart httpd + else + systemctl restart apache2 + fi + elif [[ "${HTTP}" = "nginx" ]]; then + certbot certonly --redirect --agree-tos --nginx -d "${WEB_HOST_NAME}" -m "${EMAIL}" --dry-run -v + #certbot --non-interactive --redirect --agree-tos --nginx -d "${WEB_HOST_NAME}" -m "${EMAIL}" + systemctl restart nginx + fi + + # Set up auto-renewal via cron/systemd timer + if ! grep -q "certbot" /etc/crontab 2>/dev/null; then + echo "0 */12 * * * root certbot -q renew" >> /etc/crontab + fi +} + +####################### +#### Final Message #### +####################### +install_complete() { + if [[ ! -d "/etc/letsencrypt/live/${WEB_HOST_NAME}" ]]; then + echo -e '\e[01;37m -----------------------------------------------------------------------------------------------------------' + echo -e "\e[01;32m You should now be able to complete the Poweradmin setup by accessing it here http://${host}/install/ or by ip http://${ip4}/install/" + echo -e '\e[01;37m -----------------------------------------------------------------------------------------------------------\e[0m' + else + echo -e '\e[01;37m -----------------------------------------------------------------------------------------------------------' + echo -e "\e[01;32m You should now be able to complete the Poweradmin setup by accessing it here https://${host}/install/ or by ip https://${ip4}/install/" + echo -e '\e[01;37m -----------------------------------------------------------------------------------------------------------\e[0m' + fi +} + +################## +#### Code End #### +################## + +check_RootUser +prompt_secrets +validate_inputs +install_mysql +secure_mysql +pdns_db_install +pdns_app_install +webserver_install +pdns_admin_install +install_certbot +install_complete diff --git a/SecureIt.sh b/SecureIt.sh new file mode 100644 index 0000000..d3efa42 --- /dev/null +++ b/SecureIt.sh @@ -0,0 +1,3953 @@ +#!/bin/bash + +############################################################################### +#### Hardening Script for RHEL, CentOS, Rocky, Alma, Oracle and Ubuntu. #### +#### Author: Phil Connor 02/10/2020 #### +#### Contact: contact@mylinux.work #### +#### Version 4.00.03.06.26 #### +#### #### +#### Supported: RHEL 7/8/9, CentOS 7, Rocky 8/9, Alma 8/9, #### +#### Oracle Linux 7/8, Ubuntu 20.04/22.04/24.04 #### +#### #### +#### To Use chmod to 755 or simply type bash #### +############################################################################### + +clear +export TERM=xterm-256color + +############################ +#### User Configuration #### +############################ +RSWP=8 # <-- Set the required swap size +TCPPORTS=( 22 53 1521 5666 7001 7002 8000 9090 10000 ) # <-- Firwall ports that you need open, don't remove 22 unless you will only contecting with a console +TCP6PORTS=( 22 ) # <-- IPv6 is disabled but Nessus scans requires it configured +UDPPORTS=( 53 ) # <-- Gotta have DNS +UDP6PORTS=( 53 ) # <-- Gotta have DNS + +########################## +#### System Variables #### +########################## +BACKUP="/root/config_Backups" +BOOTLD="/boot/grub2/user.cfg" +BOOTLDCE="/boot/efi/EFI/centos/" +BOOTLDRH="/boot/efi/EFI/redhat/" +BOOTLDUB="/boot/grub/user.cfg" +CRON_RH="/var/spool/cron/root" +CRON_UB="/var/spool/cron/crontab/root" +FIREIP="" +IPTBL="/etc/sysconfig/iptables" +IP6TBL="/etc/sysconfig/ip6tables" +IPTBLUB="/etc/iptables/rules.v4" +IP6TBLUB="/etc/iptables/rules.v6" +GRUBCFG="/boot/grub2/grub.cfg" +GRUBCFGCE="/boot/efi/EFI/centos/grub.cfg" +GRUBCFGRH="/boot/efi/EFI/redhat/grub.cfg" +GRUBCFGUB="/boot/grub/grub.cfg" +HOSTNAME=$(uname -n) +LOG=${BACKUP}/install.log +MODPRO="/etc/modprobe.d/cis.conf" +MYIP=$(ss -tnp 2>/dev/null | awk '/:22 / && /ESTAB/ {split($5,result,":"); print result[1]}' | head -1) +OS=$(. /etc/os-release && echo "${ID}") +OSVER=$(. /etc/os-release && echo "${VERSION_ID%%.*}") +SSHD_FILE='/etc/ssh/sshd_config' +TMPMNT="/usr/lib/systemd/system" + +######################## +#### Menu Variables #### +######################## +H1=20 +R1=3 +R2=6 +R3=11 +W1=80 + +########################################################### +#### Detect Package Manger from OS and OSVer Variables #### +########################################################### +if [[ ${OS} = ubuntu || ${OS} = debian ]]; then + PAKMGR="apt-get -y" +elif [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux || ${OS} = fedora ]]; then + if [ "${OSVER}" = 7 ]; then + PAKMGR="yum -y" + fi + if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + PAKMGR="dnf -y" + fi +fi + +############################################## +#### Check to see if running as Root User #### +############################################## +function check_root() { + { + if [ $EUID -ne 0 ]; then + echo "" + echo "Script Installation has been Halted!" + echo "" + echo "You Must Run This Script as the \"ROOT\" User" + exit + fi + } +} + +################################# +#### Config backup directory #### +################################# +function backup() { + { + for dir in ${BACKUP}; do + [[ ! -d "$dir" ]] && mkdir "$dir" + touch ${BACKUP}/install.log + done + } +} + +####################### +#### Copy Function #### +####################### +function no_show() { + { + expand | awk 'NR == 1 {match($0, /^ */); l = RLENGTH + 1} + {print substr($0, l)}' + } +} + +########################## +#### Spinner Function #### +########################## +function _spinner() { + { + local on_success="COMPLETE" + local on_fail="ERROR" + local green="\e[1;32m" + local red="\e[1;31m" + local nc="\e[0m" + case $1 in + start) + ((column=$(tput cols)-${#2}-8)) + echo -ne "\e[7m ${2} \e[0m \n" + printf "%${column}s" + i=1 + sp='/-\|/-\:' + delay=${SPINNER_DELAY:-0.15} + while : + do + printf "\b%s${sp:i++%${#sp}:1}" + sleep "$delay" + done + ;; + stop) + if [[ -z ${3} ]]; then + echo "spinner is not running.." + exit 1 + fi + kill "$3" > /dev/null 2>&1 + echo -en "\b[" + if [[ $2 -eq 0 ]]; then + echo -en "${green}${on_success}${nc}" + else + echo -en "${red}${on_fail}${nc}" + fi + echo -e "]" + ;; + *) + echo "invalid argument, try {start/stop}" + exit 1 + ;; + esac + } +} + +####################### +#### Spinner Start #### +####################### +function start_spinner { + { + echo "" + _spinner "start" "${1}" & + _sp_pid=$! + echo "" + disown + } +} + +###################### +#### Spinner Stop #### +###################### +function stop_spinner { + { + echo "" + _spinner "stop" "$1" $_sp_pid + unset _sp_pid + echo "" + } +} + +######################### +#### Warning Message #### +######################### +function warn_message() { + { + whiptail --backtitle "SecureIt contact@mylinux.work" --title "*** WARNING ***" --yes-button "CONFIRM" --no-button "Exit" --defaultno --yesno " Running this script will harden this server to CIS Benchmark settings. + It will change server configuration and will affect server operation + + + ONLY RUN THIS SCRIPT IF YOU KNOW WHAT YOU ARE DOING! + + + You must select CONFIRM to continue." ${H1} ${W1} + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + private_ip + else + exit + fi + } + +} + +############################# +#### Get VLAN IP address #### +############################# +function private_ip() { + { + VLANIP=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Server Connect IP" --ok-button "Continue" --cancel-button "Exit" --inputbox " What is the IP/Sub or VLAN/Sub you use to connect to this server? + + Examples are 192.168.0.0/24 + or 192.168.1.21/32 + or 10.0.10.0/16 + + Your current SSH Connection IP is Shown and can be changed if required" ${H1} ${W1} "${MYIP}"/32 3>&1 1>&2 2>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + FIREIP="${VLANIP//[[:space:]]/}" + time_zone + else + exit + fi + } +} + +###################### +#### Get TimeZone #### +###################### +function time_zone() { + { + # shellcheck disable=SC2046 + TIMEZONE=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Server TimeZone" --ok-button "Continue" --cancel-button "Exit" --menu " What is your Server Timezone? + + Example Central" ${H1} ${W1} ${R3} $(find /usr/share/zoneinfo/US/* | cut -d '/' -f 6 | sort | sed "s/$/ ./" | tr '\n' ' ';) 3>&1 1>&2 2>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + max_count + else + exit + fi + } +} + +################################## +#### Get Auto Disconnect Time #### +################################## +function max_count() { + { + MAXCOUNT=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Server Disconnect" --ok-button "Continue" --cancel-button "Exit" --radiolist " What is the MAX time you want before auto disconnect?" ${H1} ${W1} ${R1} \ + "1" "5 mins" OFF \ + "2" "10 mins" OFF \ + "3" "15 mins" ON 3>&1 1>&2 2>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + max_logs + else + exit + fi + } +} + +################################ +#### Get Audit logs Setting #### +################################ +function max_logs() { + { + MAXLOGS=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Audit Logs" --ok-button "Continue" --cancel-button "Exit" --inputbox " If you plan on archiving the audit logs leave \"ignore\" here + If you have tons of room change this to \"KEEP_LOGS\"" ${H1} ${W1} ignore 3>&1 1>&2 2>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + syslog_server + else + exit + fi + } +} + +########################### +#### Get Syslog Server #### +########################### +function syslog_server() { + { + SYSLOG=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "SysLog Server" --ok-button "Continue" --cancel-button "Exit" --inputbox " What is the Name or IP of your SysLog Server?" ${H1} ${W1} 3>&1 1>&2 2>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + srv_type + else + exit + fi + } +} + +########################## +#### Get Server Usage #### +########################## +function srv_type() { + { + SRVTYPE=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Server Disconnect" --ok-button "Continue" --cancel-button "Exit" --radiolist " What is the use or purpose of this server?" ${H1} ${W1} ${R1} \ + "1" "EBS Server" OFF \ + "2" "Weblogic Server" OFF \ + "3" "Regular Server" ON 3>&1 1>&2 2>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + grub_password + else + exit + fi + } +} + +######################### +#### Get Grub Passwd #### +######################### +function grub_password() { + { + GPASS=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Grub Password" --ok-button "Continue" --cancel-button "Exit" --inputbox " What do you want your Grub Password to be?" ${H1} ${W1} 3>&1 1>&2 2>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + if [ "${OS}" = ubuntu ]; then + G2PASSWD="$(echo -e "${GPASS}\n$GPASS" | grub-mkpasswd-pbkdf2 2>/dev/null | tail --lines=1 | awk -F " " '{print $7}')" + main_menu + else + G2PASSWD="$(echo -e "${GPASS}\n$GPASS" | grub2-mkpasswd-pbkdf2 2>/dev/null | tail --lines=1 | awk -F " " '{print $7}')" + main_menu + fi + else + exit + fi + } +} + +############################# +#### OS Select Main Menu #### +############################# +function main_menu() { + { + while true; do + CHOICE=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "OS Select Main" --ok-button "Continue" --cancel-button "Exit" --menu " Please Select Your Linux Distro" ${H1} ${W1} ${R2} \ + "1)" "Oracle Linux" \ + "2)" "RedHat/CentOS/Rocky/Alma" \ + "3)" "Ubuntu" 3>&2 2>&1 1>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + case ${CHOICE} in + "1)") + oracle_menu + ;; + + "2)") + redhat_menu + ;; + + "3)") + ubuntu_menu + ;; + + "4)") + + ;; + + esac + else + exit + fi + done + } +} + +##################### +#### Oracle Menu #### +##################### +function oracle_menu() { + { + while true; do + CHOICE=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Oracle Linux" --ok-button "Install" --cancel-button "Exit" --menu " Please Select Your Oracle Version" ${H1} ${W1} ${R2} \ + "1)" "OCI Oracle Linux 7" \ + "2)" "OCI Oracle Linux 8" \ + "3)" "Oracle Linux 7" \ + "4)" "Oracle Linux 8" 3>&2 2>&1 1>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + case ${CHOICE} in + "1)") + oci_oracle_ebs_setup + oci_rh_ub_common + oci_only + complete + ;; + + "2)") + oci_oracle_ebs_setup + oci_rh_ub_common + oci_only + complete + ;; + + "3)") + oci_rh_ub_common + complete + ;; + + "4)") + oci_rh_ub_common + complete + ;; + + esac + else + exit + fi + done + } +} + +##################### +#### Redhat Menu #### +##################### +function redhat_menu() { + { + while true; do + CHOICE=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Redhat/Centos Linux" --ok-button "Install" --cancel-button "Exit" --menu " Please Select Your Redhat/CentOS Version" ${H1} ${W1} ${R2} \ + "1)" "OCI CentOS 7" \ + "2)" "OCI CentOS 8" \ + "3)" "AWS Redhat/Centos 7" \ + "4)" "AWS Redhat/Centos 8" \ + "5)" "Redhat/CentOS/Rocky/Alma 7" \ + "6)" "Redhat/CentOS/Rocky/Alma 8" \ + "7)" "Redhat/Centos/Rocky/Alma 9" 3>&2 2>&1 1>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + case ${CHOICE} in + "1)") + oci_rh_ub_common + oci_only + complete + ;; + + "2)") + oci_rh_ub_common + oci_only + complete + ;; + + "3)") + oci_rh_ub_common + aws_only + complete + ;; + + "4)") + oci_rh_ub_common + aws_only + complete + ;; + + "5)") + oci_rh_ub_common + complete + ;; + + "6)") + oci_rh_ub_common + complete + ;; + + "7)") + oci_rh_ub_common + complete + ;; + + esac + else + exit + fi + done + } +} + +##################### +#### Ubuntu Menu #### +##################### +function ubuntu_menu() { + { + while true; do + CHOICE=$(whiptail --backtitle "SecureIt contact@mylinux.work" --title "Ubuntu Linux" --ok-button "Install" --cancel-button "Exit" --menu " Please Select Your Ubuntu Version" ${H1} ${W1} ${R2} \ + "1)" "OCI Ubuntu" \ + "2)" "AWS Ubuntu" \ + "3)" "Ubuntu" 3>&2 2>&1 1>&3) + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + case ${CHOICE} in + "1)") + oci_rh_ub_common + oci_only + complete + ;; + + "2)") + oci_rh_ub_common + aws_only + complete + ;; + + "3)") + oci_rh_ub_common + complete + ;; + + esac + else + exit + fi + done + } +} + +########################## +#### Install Complete #### +########################## +function complete() { + { + whiptail --backtitle "SecureIt contact@mylinux.work" --title "Configuration Complete" --msgbox " This script has configured and hardened this server to + + CIS Level 1 Benchmark settings. + + It is recommended that you try to ssh to the sever as any user + or root to make sure you can connect and once you know all is working + properly the system should be rebooted. + + + Please select \"OK\" to EXIT." ${H1} ${W1} 3>&1 1>&2 2>&3 + exitstatus=$? + if [ ${exitstatus} = 0 ]; then + exit + fi + } +} + ##################################################################### + ### The following is in line with the CIS BenchMark Manual v3.0.0 ### + ##################################################################### + +############################# +#### Make Swap if Needed #### +############################# +function make_swap() { + { + start_spinner 'Configuring Additional Swap Space...' + echo "" + if [ "${SRVTYPE}" -ne 3 ]; then + # size of swapfile in gigabytes + swpsize="$RSWP" + # how large the swap needs to be total in mb's + swpneed=$((swpsize * 1024)) + # / part dir file list + dir=$(ls -la --block-size=M /) + # does the swap file already exist? + swpexist=$(echo "$dir" | grep -i swap | awk '{ print $5 }' | tr -d 'M"') + # what is the name of the swap file if it exist + swpname=$(echo "$dir" | grep -i swap | awk '{ print $9 }') + # Is there any swap present if yes what size is it + swppres=$(free -m | sed -n '3 p' | awk '{ print $2 }') + # If the swap file already exist is it large enough? + if (( swpneed < swpexist )) || (( swpneed < swppres )); then + echo -e '\e[01;37m =======================================================================' + echo -e '\e[01;32m =====================================================================' + echo -e '\e[01;32m ==== \e[01;37m A Large Enough Swapfile was Found! No Changes Needed... \e[01;32m ====' + echo -e '\e[01;32m =====================================================================' + echo -e '\e[01;37m =======================================================================' + elif (( swpneed > swpexist )) || (( swpneed > swppres )); then + echo -e '\e[01;37m ==================================================================================' + echo -e '\e[01;31m ================================================================================' + echo -e '\e[01;31m ==== \e[01;37m A Large Enough Swapfile was not found! Creating Larger SwapFile... \e[01;31m ====' + echo -e '\e[01;31m ================================================================================' + echo -e '\e[01;37m ==================================================================================' + # Turn off existing swap if needing replacement + if echo "$dir" | grep -i swap; then + swapoff /"${swpname}" + rm -f /"$swpname" + fi + # Create the swapfile and make it active + fallocate -l ${swpsize}g /.SwapFile + chmod 600 /.SwapFile + mkswap /.SwapFile + swapon /.SwapFile + echo -e '\e[01;37m ==============================================================================' + echo -e '\e[01;32m ============================================================================' + echo -e '\e[01;32m ==== \e[01;37m Checking whether the swap space was mounted and active or not! \e[01;32m ====' + echo -e '\e[01;32m ============================================================================' + echo -e '\e[01;37m ==============================================================================' + R=$(swapon -s) + if [ -n "$R" ]; then + echo -e '\e[01;32m ============' + echo -e '\e[01;32m ============' + echo -e '\e[01;32m ============================================================================' + echo -e "\e[01;37m$R" + echo -e '\e[01;32m ============================================================================' + echo -e '\e[01;37m ==============================================================================' + else + echo -e '\e[01;31m ============' + echo -e '\e[01;31m ============' + echo -e '\e[01;31m ============================================================================' + echo -e "\e[01;37m Something Went Wrong no Swap was Loaded" + echo -e '\e[01;31m ============================================================================' + echo -e '\e[01;37m ==============================================================================' + fi + # Check to see if the created swap is losted in the fstab file + if ! grep -q "SwapFile" /etc/fstab; then + echo "/.SwapFile swap swap defaults 0 0" >> /etc/fstab + fi + fi + fi + stop_spinner $? + } | tee -a $LOG +} + +############################ +#### Set Sever TimeZone #### +############################ +function time_set() { + { + start_spinner 'Setting System TimeZone...' + echo "" + timedatectl set-timezone US/"${TIMEZONE}" + stop_spinner $? + } | tee -a $LOG +} + +######################################## +### 1.1.1 Disable Unused Filesystems ### +######################################## +function disable_filesystems() { + { + start_spinner 'Disabling Unused Filesystems...' + echo "" + touch ${MODPRO} + #### 1.1.1.1 Ensure mounting of cramfs is disabled #### + echo "install cramfs /bin/true" > ${MODPRO} + lsmod | grep -qi cramfs + if [ $? != 1 ]; then + rmmod cramfs + fi + #### 1.1.1.2 Ensure mounting of freevxf filesystem 1s disabled #### + echo "install freevxfs /bin/true" >> ${MODPRO} + lsmod | grep -qi freevxfs + if [ $? != 1 ]; then + rmmod freevxfs + fi + #### 1.1.1.3 Ensure mounting of jiffs2 filesystem is disabled #### + echo "install jffs2 /bin/true" >> ${MODPRO} + lsmod | grep -qi jffs2 + if [ $? != 1 ]; then + rmmod jffs2 + fi + #### 1.1.1.4 Ensure mounting of hfs filesystem is disabled #### + echo "install hfs /bin/true" >> ${MODPRO} + lsmod | grep -qi hfs + if [ $? != 1 ]; then + rmmod hfs + fi + #### 1.1.1.5 Ensure mounting of hfsplus filesystem is disabled #### + echo "install hfsplus /bin/true" >> ${MODPRO} + lsmod | grep -qi hfsplus + if [ $? != 1 ]; then + rmmod hfsplus + fi + #### 1.1.1.6 Ensure mounting of squashfs filesystem is disabled #### + echo "install squashfs /bin/true" >> ${MODPRO} + lsmod | grep -qi squashfs + if [ $? != 1 ]; then + rmmod squashfs + fi + #### 1.1.1.7 Ensure mounting of udf filesystem is disabled #### + echo "install udf /bin/true" >> ${MODPRO} + lsmod | grep -qi udf + if [ $? != 1 ]; then + rmmod udf + fi + #### 1.1.1.8 Ensure mounting of FAT filesystem is disabled #### + echo "install fat /bin/true" >> ${MODPRO} + lsmod | grep -qi fat + if [ $? != 1 ]; then + rmmod fat + fi + ##################################### + #### Additonal Unsed Filesystems #### + ##################################### + echo "install cifs /bin/true" >> ${MODPRO} + lsmod | grep -qi cifs + if [ $? != 1 ]; then + rmmod cifs + fi + echo "install nfs /bin/true" >> ${MODPRO} + lsmod | grep -qi nfs + if [ $? != 1 ]; then + rmmod nfs + fi + echo "install nfsv3 /bin/true" >> ${MODPRO} + lsmod | grep -qi nfsv3 + if [ $? != 1 ]; then + rmmod nfsv3 + fi + echo "install nfsv4 /bin/true" >> ${MODPRO} + lsmod | grep -qi nfsv4 + if [ $? != 1 ]; then + rmmod nfsv4 + fi + echo "install gfs2 /bin/true" >> ${MODPRO} + lsmod | grep -qi gfs2 + if [ $? != 1 ]; then + rmmod gfs2 + fi + echo "install usb-storage /bin/true" >> ${MODPRO} + lsmod | grep -qi usb-storage + if [ $? != 1 ]; then + rmmod usb-storage + fi + echo "install bnep /bin/true" >> ${MODPRO} + lsmod | grep -qi bnep + if [ $? != 1 ]; then + rmmod bnep + fi + echo "install bluetooth /bin/true" >> ${MODPRO} + lsmod | grep -qi bluetooth + if [ $? != 1 ]; then + rmmod bluetooth + fi + echo "install btusb /bin/true" >> ${MODPRO} + lsmod | grep -qi btusb + if [ $? != 1 ]; then + rmmod btusb + fi + echo "install net-pf-31 /bin/true" >> ${MODPRO} + lsmod | grep -qi net-pf-31 + if [ $? != 1 ]; then + rmmod net-pf-31 + fi + echo "install appletalk /bin/true" >> ${MODPRO} + lsmod | grep -qi appletalk + if [ $? != 1 ]; then + rmmod appletalk + fi + { + echo "blacklist usb-storage" + echo "blacklist firewire-core" + echo "options ipv6 disable=1" + } >> ${MODPRO} + stop_spinner $? + } | tee -a $LOG +} + +###################################################### +#### 1.1.2 Ensure seprate partion exists for /tmp #### +###################################################### +function tmp_directory() { + { + start_spinner 'Ensuring a Seprate Partion Exists for /tmp...' + echo "" + #### Copy Conf Files for Backup #### + xargs -n 1 cp -v /etc/fstab <<< ""${BACKUP} /etc/fstab.bak"" + #### Check to see if /tmp is a mount #### + if mount | grep -q /tmp; then + umount /tmp + fi + #### /tmp Mount Changes Ubuntu #### + if [ "${OS}" = ubuntu ]; then + xargs -n 1 cp -v /usr/share/systemd/tmp.mount <<< ""${BACKUP} /usr/share/systemd/tmp.mount.bak"" + if grep -q nosuid /usr/share/systemd/tmp.mount; then + sed -i 's/Options=mode=1777,strictatime,nosuid,nodev/Options=mode=1777,strictatime,nosuid,nodev,noexec/g' /usr/share/systemd/tmp.mount + else + sed -i 's/Options=mode=1777,strictatime/Options=mode=1777,strictatime,nosuid,nodev,noexec/g' /usr/share/systemd/tmp.mount + fi + #### /tmp Mount Changes RedHat #### + elif [ "${OS}" = ol ]; then + xargs -n 1 cp -v /usr/lib/systemd/system/tmp.mount <<< ""${BACKUP} /usr/lib/systemd/system/tmp.mount.bak"" + if [ "${OSVER}" = 7 ]; then + sed -i 's/Options=mode=1777,strictatime/Options=mode=1777,strictatime,nosuid,nodev,noexec/g' $TMPMNT/tmp.mount + fi + if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + sed -i 's/Options=mode=1777,strictatime,nosuid,nodev/Options=mode=1777,strictatime,nosuid,nodev,noexec/g' $TMPMNT/tmp.mount + fi + elif [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + if [ "${OSVER}" = 7 ]; then + sed -i 's/Options=mode=1777,strictatime/Options=mode=1777,strictatime,nosuid,nodev,noexec/g' $TMPMNT/tmp.mount + fi + if [ "${OSVER}" = 8 ]; then + sed -i 's/Options=mode=1777,strictatime,nosuid,nodev/Options=mode=1777,strictatime,nosuid,nodev,noexec/g' $TMPMNT/tmp.mount + fi + if [ "${OSVER}" = 9 ]; then + sed -i 's/Options=mode=1777,strictatime,nosuid,nodev,size=50%,nr_inodes=1m/Options=mode=1777,strictatime,nosuid,nodev,noexec,size=50%,nr_inodes=1m/g' $TMPMNT/tmp.mount + fi + else + xargs -n 1 cp -v /etc/systemd/system/local-fs.target.wants/tmp.mount <<< ""${BACKUP} /etc/systemd/system/local-fs.target.wants/tmp.mount.bak"" + no_show << EOF > /etc/systemd/system/local-fs.target.wants/tmp.mount + [Mount] + What=tmpfs + Where=/tmp + Type=tmpfs + Options=mode=1777,strictatime,noexec,nodev,nosuid +EOF + fi + #### Setting /tmp to persist thru reboots #### + if ! grep -w /tmp /etc/fstab; then + echo "tmpfs /tmp tmpfs defaults,nodev,nosuid,noexec 0 0" >> /etc/fstab + fi + mount /tmp + #### 1.1.3, 1.1.4 & 1.1.5 Ensure noexec, nosuid and nodev option set on /tmp partition #### + #### mount -o remount,noexec,nosuid,nodev /tmp + #### Setting /var/tmp to persist thru reboots #### + if ! grep -w /var/tmp /etc/fstab; then + echo "/tmp /var/tmp none rw,noexec,nosuid,nodev,bind 0 0" >> /etc/fstab + fi + #### Binding mount /var/tmp directory to /tmp #### + mount -o rw,noexec,nosuid,nodev,bind /tmp/ /var/tmp/ + #### 1.1.8, 1.1.9 & 1.1.10 Ensure noexec, nosuid and nodev option set on /var/tmp partition #### + mount -o remount,noexec,nosuid,nodev /var/tmp + #### Setting /dev/shm to persist thru reboots #### + if [ "${SRVTYPE}" -ne 2 ]; then + if ! grep -w /dev/shm /etc/fstab; then + echo "tmpfs /dev/shm tmpfs defaults,nodev,nosuid,noexec,relatime 0 0" >> /etc/fstab + #### 1.1.15, 1.1.16 and 1.1.17 Ensure noexec, nosuid and nodev option set on /dev/shm partition #### + mount -o remount,noexec,nosuid,nodev,relatime /dev/shm + fi + fi + #### Ensure noexec and nodev option set on /dev partition #### + mount -o remount,noexec /dev + #### Setting /dev to persist thru reboots #### + if ! grep -w devtmpfs /etc/fstab; then + echo "devtmpfs /dev devtmpfs defaults,noexec 0 0" >> /etc/fstab + fi + stop_spinner $? + } | tee -a $LOG +} + +############################################################################# +#### 1.1.21 Ensure Sticky Bit is set on "All" World-Writable Directories #### +############################################################################# +function stickybit() { + { + start_spinner 'Setting Sticky Bit on "All" World-Writable Directories...' + echo "" + df --local -P | awk '{if (NR!=1) print $6}' | xargs -I '{}' find '{}' -xdev -type d -perm -0002 2>/dev/null | xargs chmod a+t + stop_spinner $? + } | tee -a $LOG +} + +############################################## +#### 1.2.2 Ensure GPG Keys are Configured #### +############################################## +function gpgkeys() { + { + start_spinner 'Checking GPG Keys are Configured...' + echo "" + if [ "${OS}" = ubuntu ]; then + apt-cache policy + ${PAKMGR} update 2>&1 1>/dev/null | sed -ne 's/.*NO_PUBKEY //p' | + while read -r key; do + echo 'Processing key:' "$key" + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys "$key" + done + apt-key adv --refresh-keys --keyserver keyserver.ubuntu.com + apt-key list + else + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY* + rpm -q gpg-pubkey --qf '%{name}-%{version}-%{release} --> %{summary}\n' + grep ^gpgcheck /etc/yum.repos.d/* >> ${LOG} 2>&1 + ### 1.2.3 Verify that gpgcheck is Globally Activated ### + grep -Eq "^(\s*)gpgcheck\s*=\s*\S+(\s*#.*)?\s*$" /etc/yum.conf && sed -ri "s/^(\s*)gpgcheck\s*=\s*\S+(\s*#.*)?\s*$/\1gpgcheck=1\2/" /etc/yum.conf || echo "gpgcheck=1" >> /etc/yum.conf + fi + stop_spinner $? + } | tee -a $LOG +} + +######################################## +#### 1.3.1 Ensure Aide is Installed #### +######################################## +function aide_install() { + { + start_spinner 'Installing and Configuring AIDE...' + echo "" + if [ "${OS}" = ubuntu ]; then + debconf-set-selections <<< ""postfix postfix/mailname string "${HOSTNAME}""" + debconf-set-selections <<< "postfix postfix/main_mailer_type string 'Local Only'" + ${PAKMGR} install aide aide-common --assume-yes + aideinit + update-aide.conf + if [ ! -f ${CRON_UB} ]; then + touch ${CRON_UB} + crontab ${CRON_UB} + fi + if ! grep -qi "aide" ${CRON_UB}; then + echo "0 5 * * * /usr/bin/aide.wrapper --check" >> ${CRON_UB} + fi + else + ${PAKMGR} install aide + aide --init + mv -f /var/lib/aide/aide.db.new.gz /var/lib/aide/aide.db.gz + if [ ! -f ${CRON_RH} ]; then + touch ${CRON_RH} + crontab ${CRON_RH} + fi + if ! grep -qi "aide" ${CRON_RH}; then + echo "0 5 * * * /usr/sbin/aide --check" >> ${CRON_RH} + fi + fi + stop_spinner $? + } | tee -a $LOG +} + +########################### +#### 1.3 Sudo Commands #### +########################### +function sudo_changes() { + { + start_spinner 'Configuring Sudo Settings...' + echo "" + if [ ! -f /etc/sudoers.d/cis ]; then + touch /etc/sudoers.d/cis + chmod 440 /etc/sudoers.d/cis + fi + #### 1.3.2 Ensure Sudo Commands use Pty #### + echo "Defaults use_pty" >> /etc/sudoers.d/cis + #### 1.3.3 Ensure Sudo Log File Exists #### + echo "Defaults logfile=\"/var/log/sudo.log\"" >> /etc/sudoers.d/cis + stop_spinner $? + } | tee -a $LOG +} + +################################## +#### 1.4 Secure Boot Settings #### +################################## +function boot_load() { + { + start_spinner 'Securing Boot Settings...' + echo "" + #### 1.4.1 Ensure permissions on bootloader config are configured #### + if [ "${OS}" = "ubuntu" ]; then + touch ${BOOTLDUB} + chmod 600 ${BOOTLDUB} + chown root.root ${BOOTLDUB} + else + touch ${BOOTLD} + chmod 600 ${BOOTLD} + chown root.root ${BOOTLD} + fi + #### Config /boot/efi permissions in fstab !!! This is for OCI !!! I have not seen this on any other cloud provider #### + mount | grep /boot/efi + if [ $? != 1 ]; then + umount /boot/efi + if [ "${OS}" = ol ]; then + if [ "${OSVER}" = 7 ]; then + sed -i 's/defaults,uid=0,gid=0,umask=0077,shortname=winnt,_netdev,_netdev,x-initrd.mount/defaults,uid=0,gid=0,umask=0077,fmask=0177,shortname=winnt,_netdev,_netdev,x-initrd.mount/g' /etc/fstab + elif [ "${OSVER}" = 8 ]; then + sed -i 's/defaults,uid=0,gid=0,umask=077,shortname=winnt/defaults,uid=0,gid=0,umask=077,fmask=0177,shortname=winnt/g' /etc/fstab + fi + fi + if [ "${OS}" = centos ]; then + sed -i 's/vfat[[:blank:]]*defaults/vfat defaults,uid=0,gid=0,umask=0077,fmask=0177/g' /etc/fstab + fi + mount /boot/efi + fi + #### 1.4.2 Ensure bootloader password is set #### + if [ "${OS}" = centos ]; then + echo GRUB2_PASSWORD="${G2PASSWD}" > ${BOOTLD} + cp ${BOOTLD} ${BOOTLDCE} + elif [ "${OS}" = ubuntu ]; then + echo GRUB2_PASSWORD="${G2PASSWD}" > ${BOOTLDUB} + else + echo GRUB2_PASSWORD="${G2PASSWD}" > ${BOOTLD} + cp ${BOOTLD} ${BOOTLDRH} + fi + stop_spinner $? + } | tee -a $LOG +} + +################################################ +#### 1.5.1 Ensure core dumps are restricted #### +################################################ +function core_dumps() { + { + start_spinner 'Restricting Core Dumps...' + echo "" + xargs -n 1 cp -v /etc/security/limits.conf <<<"${BACKUP} /etc/security/limits.conf.bak" + echo '* hard core 0' >> /etc/security/limits.conf + stop_spinner $? + } | tee -a $LOG +} + +########################################################################### +#### 1.5.3 Ensure address space layout randomization (ASLR) is enabled #### +########################################################################### +function sysctl_conf() { + { + start_spinner 'Configuring Sysctl and Tuning Kernel Parameters...' + echo "" + xargs -n 1 cp -v /etc/sysctl.conf <<< "${BACKUP} /etc/sysctl.conf.bak" + no_show << "EOF" > /etc/sysctl.d/99-secureit.conf + ################################################################################################## + #### Hardened SysCtl Configuration File edited to match CIS level 1 requirements #### + #### for questions or changles please contact Phil Connor contact@mylinux.work #### + ################################################################################################## + + #### Controls the System Request debugging functionality of the kernel + kernel.sysrq = 0 + + #### Controls whether core dumps will append the PID to the core filename. + #### Useful for debugging multi-threaded applications. + kernel.core_uses_pid = 1 + + ################################## + #### GENERAL SECURITY OPTIONS #### + ################################## + + #### Automatically Reboot Server 30 Seconds after a Kernel Panic + vm.panic_on_oom = 1 + kernel.panic = 30 + kernel.panic_on_oops = 30 + + #### Enable ExecShield + #kernel.exec-shield = 1 + + kernel.dmesg_restrict = 1 + kernel.kptr_restrict = 1 + kernel.yama.ptrace_scope = 1 + + #### 1.5.3 Ensure address space layout randomization (ASLR) is enabled + kernel.randomize_va_space = 2 + + ################################# + #### COMMUNICATIONS SECURITY #### + ################################# + + #### 3.1.1 Ensure IP forwarding is disabled + net.ipv4.ip_forward = 0 + net.ipv4.conf.all.forwarding = 0 + net.ipv4.conf.default.forwarding = 0 + net.ipv6.conf.all.forwarding = 0 + net.ipv6.conf.default.forwarding = 0 + + #### 3.1.2 Ensure packet redirect sending is disabled + net.ipv4.conf.all.send_redirects = 0 + net.ipv4.conf.default.send_redirects = 0 + + #### 3.2.1 Ensure source routed packets are not accepted + net.ipv4.conf.all.accept_source_route = 0 + net.ipv4.conf.default.accept_source_route = 0 + + #### 3.2.2 Ensure ICMP redirects are not accepted + net.ipv4.conf.all.accept_redirects = 0 + net.ipv4.conf.default.accept_redirects = 0 + + #### 3.2.3 Ensure secure ICMP redirects are not accepted + net.ipv4.conf.all.secure_redirects = 0 + net.ipv4.conf.default.secure_redirects = 0 + + #### 3.2.4 Ensure suspicious packets are logged + net.ipv4.conf.all.log_martians = 1 + net.ipv4.conf.default.log_martians = 1 + + #### 3.2.5 Ensure broadcast ICMP requests are ignored + net.ipv4.icmp_echo_ignore_broadcasts = 1 + net.ipv4.tcp_timestamps = 0 + + #### 3.2.6 Ensure bogus ICMP responses are ignored + net.ipv4.icmp_ignore_bogus_error_responses = 1 + + #### 3.2.7 Ensure Reverse Path Filtering is enabled + net.ipv4.conf.all.rp_filter = 1 + net.ipv4.conf.default.rp_filter = 1 + + #### 3.2.8 Ensure TCP SYN Cookies is enabled + net.ipv4.tcp_syncookies = 1 + net.ipv4.tcp_syn_retries = 5 + net.ipv4.tcp_synack_retries = 2 + net.ipv4.tcp_max_syn_backlog = 4096 + + #### 3.3.1 Ensure IPv6 router advertisements are not accepted + net.ipv6.conf.all.accept_ra = 0 + net.ipv6.conf.default.accept_ra = 0 + + #### 3.3.1.1 Ensure IPv6 router advertisements are not accepted + net.ipv4.conf.all.accept_source_route=0 + net.ipv6.conf.all.accept_source_route=0 + net.ipv4.conf.default.accept_source_route=0 + net.ipv6.conf.default.accept_source_route=0 + + #### 3.3.2 Ensure IPv6 redirects are not accepted + net.ipv4.conf.all.accept_redirects = 0 + net.ipv6.conf.all.accept_redirects = 0 + net.ipv4.conf.default.accept_redirects = 0 + net.ipv6.conf.default.accept_redirects = 0 + + #### 3.3.3 Ensure IPv6 is disabled + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + + #### Reduce KeepAlive + net.ipv4.tcp_keepalive_time = 300 + net.ipv4.tcp_keepalive_probes = 5 + net.ipv4.tcp_keepalive_intvl = 15 + + fs.suid_dumpable = 0 +EOF + #### Oracle EBS settings only applied for EBS/Weblogic servers #### + if [ "${SRVTYPE}" != "3" ] 2>/dev/null; then + no_show << "EOF" > /etc/sysctl.d/99-secureit-oracle.conf + ######################### + #### Oracle Settings #### + ######################### + + # oracle-ebs-server-R12-preinstall setting for fs.file-max is 6815744 + fs.file-max = 6815744 + # oracle-ebs-server-R12-preinstall setting for kernel.sem is '256 32000 100 142' + kernel.sem = 256 32000 100 142 + # oracle-ebs-server-R12-preinstall setting for kernel.shmmni is 4096 + kernel.shmmni=4096 + # oracle-ebs-server-R12-preinstall setting for kernel.shmall is 1073741824 on x86_64 + kernel.shmall=1073741824 + # oracle-ebs-server-R12-preinstall setting for kernel.shmmax is 4398046511104 on x86_64 + kernel.shmmax=4398046511104 + # oracle-ebs-server-R12-preinstall setting for kernel.panic_on_oops is 1 + kernel.panic_on_oops=1 + # oracle-ebs-server-R12-preinstall setting for kernel.msgmax is 8192 + kernel.msgmax = 8192 + # oracle-ebs-server-R12-preinstall setting for kernel.msgmni is 2878 + kernel.msgmni=2878 + # oracle-ebs-server-R12-preinstall setting for kernel.msgmnb is 65535 + kernel.msgmnb=65535 + # oracle-ebs-server-R12-preinstall setting for net.core.rmem_default is 262144 + net.core.rmem_default=262144 + # oracle-ebs-server-R12-preinstall setting for net.core.rmem_max is 4194304 + net.core.rmem_max=4194304 + # oracle-ebs-server-R12-preinstall setting for net.core.wmem_default is 262144 + net.core.wmem_default=262144 + # oracle-ebs-server-R12-preinstall setting for net.core.wmem_max is 1048576 + net.core.wmem_max=1048576 + # oracle-ebs-server-R12-preinstall setting for fs.aio-max-nr is 1048576 + fs.aio-max-nr = 1048576 + # oracle-ebs-server-R12-preinstall setting for net.ipv4.ip_local_port_range is 9000 65500 + net.ipv4.ip_local_port_range = 9000 65500 +EOF + fi + sysctl --system >/dev/null 2>&1 + stop_spinner $? + } | tee -a $LOG +} + +########################################## +#### 1.5.4 Ensure prelink is disabled #### +########################################## +function pre_link() { + { + start_spinner 'Disabling and removing Prelink...' + echo "" + if [ -f /usr/sbin/prelink ]; then + prelink -ua + ${PAKMGR} remove prelink + fi + stop_spinner $? + } | tee -a $LOG +} + +######################################################## +#### 1.6.1.4 Ensure SETroubleshoot is not installed #### +######################################################## +function se_troubleshoot_mcs() { + { + start_spinner 'Removing SE Troubleshoot and MCS Translation Service...' + echo "" + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + if [ -f /usr/bin/setroubleshoot ]; then + ${PAKMGR} remove setroubleshoot + fi + fi + #### 1.6.1.5 Ensure the MCS Translation Service (mcstrans) is not installed #### + if [ "${OS}" = ubuntu ]; then + if systemctl list-units --type=service --all | grep -q mcstrans; then + ${PAKMGR} remove policycoreutils + fi + else + if systemctl list-units --type=service --all | grep -q mcstrans; then + ${PAKMGR} remove mcstrans + fi + fi + stop_spinner $? + } | tee -a $LOG +} + +###################################################### +#### 1.6.1.6 Ensure no unconfigured daemons exist #### +###################################################### +function unconf_daemons() { + { + start_spinner 'Ensuring no unconfigered daemons exist...' + echo "" + process=$(ps -eZ) + echo "${process}" | grep -E "initrc" | grep -Evw "tr|ps|grep|bash|awk" | tr ':' ' ' | awk '{ print $NF }' + stop_spinner $? + } | tee -a $LOG +} + +########################################### +#### 1.6.2 Ensure SELinux is installed #### +########################################### +function se_linux() { + { + start_spinner 'Ensuring MAC Security is installed...' + echo "" + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} install apparmor apparmor-utils + systemctl enable --now apparmor + else + if ! rpm -qa libselinux; then + ${PAKMGR} install libselinux + fi + fi + stop_spinner $? + } | tee -a $LOG +} + +################################################################## +#### 1.7.1.1 Ensure message of the day is configured properly #### +################################################################## +function banners() { + { + start_spinner 'Configuring all Message Banners...' + echo "" + if [ "${OS}" = ubuntu ]; then + chmod -x /etc/update-motd.d/* + touch /etc/motd + else + xargs -n 1 cp -v /etc/motd <<< "${BACKUP} /etc/motd.bak" + fi + echo " All activities performed on this system will be monitored." > /etc/motd + #### 1.7.1.2 Ensure local login warning banner is configured properly #### + xargs -n 1 cp -v /etc/issue <<< "${BACKUP} /etc/issue.bak" + echo " All activities performed on this system will be monitored." > /etc/issue + #### 1.7.1.3 Ensure remote login warning banner is configured properly #### + xargs -n 1 cp -v /etc/issue.net <<< "${BACKUP} /etc/issue.net.bak" + echo " All activities performed on this system will be monitored." > /etc/issue.net + #### 1.7.1.4 Ensure permissions on /etc/motd ore configured #### + chmod 644 /etc/motd + chown root.root /etc/motd + #### 1.7.1.5 Ensure permissions on /etc/issue are configured #### + chmod 644 /etc/issue + chown root.root /etc/issue + #### 1.7.1.6 Ensure permissions on /etc/issue.net are configured #### + chmod 644 /etc/issue.net + chown root.root /etc/issue.net + stop_spinner $? + } | tee -a $LOG +} + +##################################################################################### +#### 1.8 ensure updates, patches, and additional security software are installed #### +##################################################################################### +function update_security() { + { + start_spinner 'Checking and Installing Security Updates...' + echo "" + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} autoremove + ${PAKMGR} update + ${PAKMGR} upgrade + else + ${PAKMGR} clean all + ${PAKMGR} check-update --security + ${PAKMGR} update --security + fi + stop_spinner $? + } | tee -a $LOG +} + +############################ +#### 2.1 inetd Services #### +############################ +function inet_service() { + { + start_spinner 'Disabling Unused/Unsecure inetd Services...' + echo "" + #### 2.1.1 Ensure chargen services are not enabled #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^chargen" /etc/inetd.* 2>/dev/null; then + systemctl stop chargen 2>/dev/null + systemctl disable chargen 2>/dev/null + fi + else + if systemctl is-enabled chargen-dgram >/dev/null 2>&1; then + systemctl stop chargen-dgram + systemctl disable chargen-dgram + fi + if systemctl is-enabled chargen-stream >/dev/null 2>&1; then + systemctl stop chargen-stream + systemctl disable chargen-stream + fi + fi + #### 2.1.2 Ensure daytime services are not enabled #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^daytime" /etc/inetd.* 2>/dev/null; then + systemctl stop daytime 2>/dev/null + systemctl disable daytime 2>/dev/null + fi + else + if systemctl is-enabled daytime-dgram >/dev/null 2>&1; then + systemctl stop daytime-dgram + systemctl disable daytime-dgram + fi + if systemctl is-enabled daytime-stream >/dev/null 2>&1; then + systemctl stop daytime-stream + systemctl disable daytime-stream + fi + fi + #### 2.1.3 Ensure discard services are not enabled #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^discard" /etc/inetd.* 2>/dev/null; then + systemctl stop discard 2>/dev/null + systemctl disable discard 2>/dev/null + fi + else + if systemctl is-enabled discard-dgram >/dev/null 2>&1; then + systemctl stop discard-dgram + systemctl disable discard-dgram + fi + if systemctl is-enabled discard-stream >/dev/null 2>&1; then + systemctl stop discard-stream + systemctl disable discard-stream + fi + fi + #### 2.1.4 Ensure echo services are not Enabled #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^echo" /etc/inetd.* 2>/dev/null; then + systemctl stop echo 2>/dev/null + systemctl disable echo 2>/dev/null + fi + else + if systemctl is-enabled echo-stream >/dev/null 2>&1; then + systemctl stop echo-stream + systemctl disable echo-stream + fi + fi + #### 2.1.5 Ensure time services are not enabled #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^time" /etc/inetd.* 2>/dev/null; then + systemctl stop time 2>/dev/null + systemctl disable time 2>/dev/null + fi + else + if systemctl is-enabled time-dgram >/dev/null 2>&1; then + systemctl stop time-dgram + systemctl disable time-dgram + fi + if systemctl is-enabled time-stream >/dev/null 2>&1; then + systemctl stop time-stream + systemctl disable time-stream + fi + fi + #### 2.1.6 Ensure rsh server is not enabled Ubuntu #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^shell" /etc/inetd.* 2>/dev/null; then + systemctl stop shell 2>/dev/null + systemctl disable shell 2>/dev/null + fi + if grep -Rq "^login" /etc/inetd.* 2>/dev/null; then + systemctl stop login 2>/dev/null + systemctl disable login 2>/dev/null + fi + if grep -Rq "^exec" /etc/inetd.* 2>/dev/null; then + systemctl stop exec 2>/dev/null + systemctl disable exec 2>/dev/null + fi + fi + #### 2.1.6 Ensure tftp server is not enabled Others #### + if systemctl is-enabled tftp >/dev/null 2>&1; then + systemctl stop tftp + systemctl disable tftp + fi + #### 2.1.7 Ensure talk server is not enabled Ubuntu #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^talk" /etc/inetd.* 2>/dev/null; then + systemctl stop talk 2>/dev/null + systemctl disable talk 2>/dev/null + fi + if grep -Rq "^ntalk" /etc/inetd.* 2>/dev/null; then + systemctl stop ntalk 2>/dev/null + systemctl disable ntalk 2>/dev/null + fi + fi + #### 2.1.9 Ensure tftp server is not enabled Ubuntu #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^tftp" /etc/inetd.* 2>/dev/null; then + systemctl stop tftp 2>/dev/null + systemctl disable tftp 2>/dev/null + fi + fi + #### 2.1.8 Ensure telnet server is not enabled Ubuntu #### + if [ "${OS}" = ubuntu ]; then + if grep -Rq "^telnet" /etc/inetd.* 2>/dev/null; then + systemctl stop telnet 2>/dev/null + systemctl disable telnet 2>/dev/null + fi + fi + #### 2.1.10 Ensure xinetd is not enabled All #### + if systemctl is-enabled xinetd >/dev/null 2>&1; then + systemctl stop xinetd + systemctl disable xinetd + fi + stop_spinner $? + } | tee -a $LOG +} + +####################################################### +#### 2.2.1.1 Ensure time synchronization is in use #### +####################################################### + +#### 2.2.1.2 Ensure ntp is configured #### +function ntp_config() { + { + start_spinner 'Configuring Time Sync Service...' + echo "" + local NTP_FILE="/etc/ntp.conf" + #### Ubuntu uses chrony (handled in chrony_cfg), skip ntp #### + if [ "${OS}" = ubuntu ]; then + systemctl disable --now systemd-timesyncd 2>/dev/null + stop_spinner $? + return 0 + fi + ${PAKMGR} install ntp + if [ "${OS}" = centos ]; then + var1=${OS} + else + var1=rhel + fi + if [ "${OS}" = centos ]; then + sed -i 's/OPTIONS="-g"/OPTIONS="-u ntp:ntp"/g' /etc/sysconfig/ntpd + if ! grep -qi "server 127.127.1.0" ${NTP_FILE}; then + echo "server 127.127.1.0 #local clock" >> ${NTP_FILE} + echo "fudge 127.127.1.0 stratum 10" >> ${NTP_FILE} + fi + if ! grep -qi "disable monitor" ${NTP_FILE}; then + echo "disable monitor" >> ${NTP_FILE} + fi + systemctl enable --now ntpd + else + xargs -n 1 cp -v ${NTP_FILE} <<< ""${BACKUP} ${NTP_FILE}.bak"" + sed -i 's/restrict default nomodify notrap nopeer noquery/restrict default nomodify notrap nopeer noquery/p' ${NTP_FILE} + sed -i '8 s/restrict default nomodify notrap nopeer noquery/restrict -4 default kod nomodify notrap nopeer noquery/g' ${NTP_FILE} + sed -i '9 s/restrict default nomodify notrap nopeer noquery/restrict -6 default kod nomodify notrap nopeer noquery/g' ${NTP_FILE} + sed -i 's/OPTIONS="-g"/OPTIONS="-u ntp:ntp"/g' /etc/sysconfig/ntpd + if ! grep -qi "server 127.127.1.0" "${NTP_FILE}"; then + echo "server 127.127.1.0 #local clock" >> ${NTP_FILE} + echo "fudge 127.127.1.0 stratum 10" >> ${NTP_FILE} + fi + if ! grep -qi "disable monitor" ${NTP_FILE}; then + echo "disable monitor" >> ${NTP_FILE} + fi + systemctl enable --now ntpd + systemctl restart ntpd + fi + sed -i "s/#server 0.$var1.pool.ntp.org iburst/server 0.$var1.pool.ntp.org iburst/g" ${NTP_FILE} + sed -i "s/#server 1.$var1.pool.ntp.org iburst/server 1.$var1.pool.ntp.org iburst/g" ${NTP_FILE} + sed -i "s/#server 2.$var1.pool.ntp.org iburst/server 2.$var1.pool.ntp.org iburst/g" ${NTP_FILE} + sed -i "s/#server 3.$var1.pool.ntp.org iburst/server 3.$var1.pool.ntp.org iburst/g" ${NTP_FILE} + stop_spinner $? + } | tee -a $LOG +} + +#### 2.2.1.3 Ensure chrony is configured #### +function chrony_cfg() { + { + start_spinner 'Configuring Chrony Service...' + echo "" + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + ${PAKMGR} install chrony ntpstat + fi + elif [ "${OS}" = ubuntu ]; then + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install chrony + else + ${PAKMGR} install chrony + fi + if [ "${OS}" = centos ]; then + var1=${OS} + else + var1=rhel + fi + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + if [ "${OSVER}" = 7 ]; then + if ! grep $var1.pool.ntp.org /etc/chrony.conf; then + echo '#################################################################' + echo '#### Using public servers from the pool.ntp.org project. ####' + echo '#### Added for CIS Level 1 Compatibility for questions ####' + echo '#### Contact Phil Connor contact@mylinux.work ####' + echo '#################################################################' + echo "server 0.$var1.pool.ntp.org iburst" + echo "server 1.$var1.pool.ntp.org iburst" + echo "server 2.$var1.pool.ntp.org iburst" + echo "server 3.$var1.pool.ntp.org iburst" + sed -i "s/#server 0.$var1.pool.ntp.org iburst/server 0.$var1.pool.ntp.org iburst/g" /etc/chrony.conf + sed -i "s/#server 1.$var1.pool.ntp.org iburst/server 1.$var1.pool.ntp.org iburst/g" /etc/chrony.conf + sed -i "s/#server 2.$var1.pool.ntp.org iburst/server 2.$var1.pool.ntp.org iburst/g" /etc/chrony.conf + sed -i "s/#server 3.$var1.pool.ntp.org iburst/server 3.$var1.pool.ntp.org iburst/g" /etc/chrony.conf + sed -i 's/server 169.254.169.254 iburst/#server 169.254.169.254 iburst/g' /etc/chrony.conf + sed -i 's/OPTIONS=""/OPTIONS="-u chrony"/g' /etc/sysconfig/chronyd + fi + fi + fi + if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + if ! grep $var1.pool.ntp.org /etc/chrony.conf; then + chronyd -q 'server 2.rhel.pool.ntp.org iburst' + sed -i 's/OPTIONS="-F 2"/OPTIONS="-u chrony"/g' /etc/sysconfig/chronyd + chronyc sourcestats -v + fi + systemctl enable --now chronyd + fi + if [ "${OS}" = ubuntu ]; then + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install chrony + systemctl disable --now systemd-timesyncd 2>/dev/null + systemctl enable --now chrony + chronyc sourcestats -v 2>/dev/null + fi + stop_spinner $? + } | tee -a $LOG +} + +####################################################### +#### 2.2.2 Ensure X Window System is not installed #### +####################################################### +function unsecure_services() { + { + start_spinner 'Removing X11 and Disabling Insecure Protocols...' + echo "" + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} remove xorg* + ${PAKMGR} remove xserver-xorg* + else + ${PAKMGR} remove xorg-x11* + fi + a=( "$(systemctl list-units --type=service --all)" ) + #### 2.2.3 Ensure Avahi Server is not installed #### + if echo "${a[@]}" | grep avahi-daemon.service; then + systemctl stop avahi-daemon + systemctl disable avahi-daemon + fi + ### 2.2.4 Ensure CUPS is not enabled ### + if echo "${a[@]}" | grep cups.service; then + systemctl stop cups + systemctl disable cups + fi + #### 2.2.5 Ensure DHCP Server is not enabled #### + if [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep isc-dhcp-server.service; then + systemctl stop isc-dhcp-server + systemctl disable isc-dhcp-server + fi + else + if echo "${a[@]}" | grep dhcpd.service; then + systemctl stop dhcpd + systemctl disable dhcpd + fi + fi + #### 2.2.6 Ensure LDAP Server is not enabled #### + if echo "${a[@]}" | grep slapd.service; then + systemctl stop slapd + systemctl disable slapd + fi + #### 2.2.7 Ensure NFS and RPC are not enabled #### + if [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep nfs-kernel-server.service; then + systemctl stop nfs-kernel-server + systemctl disable nfs-kernel-server + fi + else + if echo "${a[@]}" | grep nfs-server.service; then + systemctl stop nfs-server + systemctl disable nfs-server + fi + if echo "${a[@]}" | grep nfs.service; then + systemctl stop nfs + systemctl disable nfs + fi + fi + if echo "${a[@]}" | grep rpcbind.service; then + systemctl stop rpcbind + systemctl disable rpcbind + fi + #### 2.2.8 Ensure DNS Server is not enabled #### + if echo "${a[@]}" | grep named.service; then + systemctl stop named + systemctl disable named + fi + if [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep bind9.service; then + systemctl stop bind9 + systemctl disable bind9 + fi + fi + #### 2.2.9 Ensure FTP Server is not enabled #### + if echo "${a[@]}" | grep vsftpd.service; then + systemctl stop vsftpd + systemctl disable vsftpd + fi + #### 2.2.10 Ensure HTTP Server is not enabled #### + if [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep apache2.service; then + systemctl stop apache2 + systemctl disable apache2 + fi + else + if echo "${a[@]}" | grep httpd.service; then + systemctl stop httpd + systemctl disable httpd + fi + fi + #### 2.2.11 Ensure IMAP and POP3 server are not enabled #### + if echo "${a[@]}" | grep dovecot.service; then + systemctl stop dovecot + systemctl disable dovecot + fi + #### 2.2.12 Ensure Samba is not enabled #### + if echo "${a[@]}" | grep smb.service; then + systemctl stop smb + systemctl disable smb + fi + #### 2.2.13 Ensure HTTP Proxy Server is not enabled #### + if echo "${a[@]}" | grep squid.service; then + systemctl stop squid + systemctl disable squid + fi + #### 2.2.14 Ensure SNMP Server is not enabled #### + if echo "${a[@]}" | grep snmpd.service; then + systemctl stop snmpd + systemctl disable snmpd + fi + stop_spinner $? + } | tee -a $LOG +} + +############################################################################# +#### 2.2.15 Ensure mail transfer agent is configured for local-only mode #### +############################################################################# +function mail_config() { + { + start_spinner 'Configuring Postfix MTA...' + echo "" + if [ "${OS}" = ubuntu ]; then + debconf-set-selections <<< ""postfix postfix/mailname string "${HOSTNAME}""" + debconf-set-selections <<< "postfix postfix/main_mailer_type string 'Local Only'" + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install postfix --assume-yes + sed -i 's/inet_interfaces = all/inet_interfaces = localhost/g' /etc/postfix/main.cf + else + ${PAKMGR} install postfix + sed -i 's/inet_interfaces = localhost/inet_interfaces = loopback-only/g' /etc/postfix/main.cf + fi + # shellcheck disable=SC2016 + sed -i 's/#smtpd_banner = $myhostname ESMTP $mail_name/smtpd_banner = $myhostname ESMTP/g' /etc/postfix/main.cf + # shellcheck disable=SC2016 + sed -i 's/smtpd_banner = $myhostname ESMTP ($mail_version)/#smtpd_banner = $myhostname ESMTP $mail_name ($mail_version)/g' /etc/postfix/main.cf + if ! grep -qi "mailbox_size_limit" /etc/postfix/main.cf; then + echo "mailbox_size_limit = 0" >> /etc/postfix/main.cf + fi + postconf -e message_size_limit=0 + postconf -e "mailbox_size_limit=0" + systemctl enable --now postfix + stop_spinner $? + } | tee -a $LOG +} + +################################################ +#### 2.2.x Disable Additional inet Services #### +################################################ +function addon_inet_services() { + { + start_spinner 'Disabling Additional Unsecure Services...' + echo "" + a=( "$(systemctl list-units --type=service --all)" ) + ### 2.2.16 Ensure NIS Server is not enabled ### + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + if echo "${a[@]}" | grep ypserv.service; then + systemctl stop ypserv + systemctl disable ypserv + fi + ### 2.2.16 Ensure rsync service is not enabled Ubuntu ### + elif [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep rsync.service; then + systemctl stop rsync + systemctl disable rsync + fi + fi + ### 2.1.17 Ensure rsh server is not enabled ### + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol ]]; then + if echo "${a[@]}" | grep rsh.socket.service; then + systemctl stop rsh.socket + systemctl disable rsh.socket + fi + if echo "${a[@]}" | grep rlogin.socket.service; then + systemctl stop rlogin.socket + systemctl disable rlogin.socket + fi + if echo "${a[@]}" | grep rexec.socket.service; then + systemctl stop rexec.socket + systemctl disable rexec.socket + fi + ### 2.2.17 Ensure NIS Server is not enabled Ubuntu ### + elif [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep nis.service; then + systemctl stop nis + systemctl disable nis + fi + fi + ### 2.2.18 Ensure talk server is not enabled ### + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol ]]; then + if echo "${a[@]}" | grep ntalk.service; then + systemctl stop ntalk + systemctl disable ntalk + fi + ### 2.2.19 Ensure telnet server is not enabled ### + if echo "${a[@]}" | grep telnet.socket.service; then + systemctl stop telnet.socket + systemctl disable telnet-socket + fi + ### 2.2.20 Ensure tftp server is not enabled ### + if echo "${a[@]}" | grep tftp.socket.service; then + systemctl stop tftp.socket + systemctl disable tftp-socket + fi + ### 2.2.21 Ensure rsync service is not enabled ### + if echo "${a[@]}" | grep rsyncd.service; then + systemctl stop rsyncd + systemctl disable rsyncd + fi + fi + stop_spinner $? + } | tee -a $LOG +} + +############################################################### +#### 2.3 Ensure Insecure Service Clients are not Installed #### +############################################################### +function service_clients() { + { + start_spinner 'Removing Insecure Service Clients...' + echo "" + a=( "$(systemctl list-units --type=service --all)" ) + #### 2.3.1 Ensure NIS Client is not installed #### + if [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep nis.service; then + ${PAKMGR} remove nis + fi + else + if echo "${a[@]}" | grep ypbind.service; then + ${PAKMGR} remove ypbind + fi + fi + #### 2.3.2 Ensure rsh client is not installed #### + if [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep rsh-client.service; then + ${PAKMGR} remove rsh-client rsh-redone-client + fi + else + if echo "${a[@]}" | grep rsh.service; then + ${PAKMGR} remove rsh + fi + fi + #### 2.3.3 Ensure talk client is not installed #### + if echo "${a[@]}" | grep talk.service; then + ${PAKMGR} remove talk + fi + #### 2.3.4 Ensure telnet client is not installed #### + if echo "${a[@]}" | grep telnet.service; then + ${PAKMGR} remove telnet + fi + #### 2.3.5 Ensure LDAP client is not installed #### + if [ "${OS}" = ubuntu ]; then + if echo "${a[@]}" | grep libnss-ldap.service; then + ${PAKMGR} remove libnss-ldap + fi + if echo "${a[@]}" | grep libpam-ldap.service; then + ${PAKMGR} remove libpam-ldap + fi + if echo "${a[@]}" | grep ldap-utils.service; then + ${PAKMGR} remove ldap-utils + fi + else + if echo "${a[@]}" | grep openldap-clients.service; then + ${PAKMGR} remove openldap-clients + fi + fi + stop_spinner $? + } | tee -a $LOG +} + +########################## +#### 3.4 TCP Wrappers #### +########################## +function tcp_wrappers() { + { + start_spinner 'Configuring TCP Wrappers...' + echo "" + #### 3.4.1 Ensure TCP Wrappers is installed #### + if [ "${OS}" = ubuntu ]; then + echo "TCP Wrappers skipped on Ubuntu (deprecated)" + else + if ! ${PAKMGR} list installed tcp_wrappers 2>/dev/null | grep -q tcp_wrappers; then + ${PAKMGR} install tcp_wrappers + fi + fi + #### 3.4.2 Ensure /etc/hosts.allow is configured #### + echo ALL:"${VLANIP}" > /etc/hosts.allow + #### 3.4.3 Ensure /etc/hosts.deny is configured #### + echo "ALL:ALL" >> /etc/hosts.deny + #### 3.4.4 Ensure permissions on /etc/hosts.allow are configured #### + chown root.root /etc/hosts.allow + chmod 644 /etc/hosts.allow + #### 3.4.5 Ensure permissions on /etc/hosts.deny are configured #### + chown root.root /etc/hosts.deny + chmod 644 /etc/hosts.deny + stop_spinner $? + } +} + +######################################## +#### 3.5 Uncommon Network Protocols #### +######################################## +function uncommon_protocols() { + { + start_spinner 'Disabling Uncommon Network Protocols...' + echo "" + MODPRO="/etc/modprobe.d/cis.conf" + #### 3.5.1 Ensure DCCP is disabled #### + echo "install dccp /bin/true" >> ${MODPRO} + if lsmod | grep -qi dccp; then + rmmod dccp 2>/dev/null + fi + #### 3.5.2 Ensure SCTP is disabled #### + echo "install sctp /bin/true" >> ${MODPRO} + if lsmod | grep -qi sctp; then + rmmod sctp 2>/dev/null + fi + #### 3.5.3 ensure RDS is disabled #### + echo "install rds /bin/true" >> ${MODPRO} + if lsmod | grep -qi rds; then + rmmod rds 2>/dev/null + fi + #### 3.5.4 Ensure TIPC is disabled #### + echo "install tipc /bin/true" >> ${MODPRO} + if lsmod | grep -qi tipc; then + rmmod tipc 2>/dev/null + fi + stop_spinner $? + } | tee -a $LOG +} + +######################################## +#### 3.6 Firewall Configuration AWS #### +######################################## +function iptables_config() { + { + start_spinner 'Configuring IP Tables...' + echo "" + ### 3.6.1 Ensure iptables is installed ### + if [ "${OS}" = ubuntu ]; then + ufw --force disable + debconf-set-selections <<< "iptables-persistent iptables-persistent/autosave_v4 boolean true" + debconf-set-selections <<< "iptables-persistent iptables-persistent/autosave_v6 boolean true" + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install iptables + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install iptables-persistent --assume-yes + service netfilter-persistent start + service netfilter-persistent save + else + systemctl stop firewalld.service + systemctl mask firewalld.service + systemctl daemon-reload + ${PAKMGR} install iptables-utils iptables-services + fi + #### 3.6.2 Ensure default deny firewall policy #### + #### Configure IPv4 #### + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + systemctl enable --now iptables + cp $IPTBL $BACKUP + mv -f ${IPTBL} ${IPTBL}.bak + touch ${IPTBL} + fi + # Flush Iptables rules + iptables -F + # Forcing SYN packets check + iptables -A INPUT -p tcp ! --syn -m state --state NEW -j DROP + # Forcing Fragments packets check + iptables -A INPUT -f -j DROP + # Dropping malformed XMAS packets + iptables -A INPUT -p tcp --tcp-flags ALL FIN,PSH,URG -j DROP + # Drop all NULL packets + iptables -A INPUT -p tcp --tcp-flags ALL NONE -j DROP + # Limiting pings to 1 per second + iptables -N PACKET + iptables -A PACKET -p icmp -m limit --limit 3/sec --limit-burst 25 -j ACCEPT + # Setup Connection Tracking + iptables -N STATE_TRACK + iptables -A STATE_TRACK -m state --state RELATED,ESTABLISHED -j ACCEPT + iptables -A STATE_TRACK -m state --state INVALID -j DROP + # Discouraging Port Scanning + iptables -N PORTSCAN + iptables -A PORTSCAN -p tcp --tcp-flags ACK,FIN FIN -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags ACK,PSH PSH -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags ACK,URG URG -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags FIN,RST FIN,RST -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags SYN,FIN SYN,FIN -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags SYN,RST SYN,RST -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags ALL ALL -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags ALL NONE -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags ALL FIN,PSH,URG -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags ALL SYN,FIN,PSH,URG -j DROP + iptables -A PORTSCAN -p tcp --tcp-flags ALL SYN,RST,ACK,FIN,URG -j DROP + iptables -N COMMON + iptables -A COMMON -j STATE_TRACK + iptables -A COMMON -j PORTSCAN + iptables -A COMMON -j PACKET + iptables -A INPUT -j COMMON + iptables -A OUTPUT -j COMMON + iptables -A FORWARD -j COMMON + iptables -A FORWARD -j PACKET + # Ensure loopback traffic is configured + iptables -A INPUT -i lo -j ACCEPT + iptables -A INPUT -s 127.0.0.0/8 -j DROP + iptables -A OUTPUT -o lo -j ACCEPT + # Ensure outbound and established connections are configured + iptables -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT + iptables -A INPUT -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT + iptables -A INPUT -p udp -m state --state NEW,ESTABLISHED -j ACCEPT + iptables -A INPUT -p icmp -m state --state NEW,ESTABLISHED -j ACCEPT + iptables -A OUTPUT -m state --state ESTABLISHED,RELATED -j ACCEPT + iptables -A OUTPUT -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT + iptables -A OUTPUT -p udp -m state --state NEW,ESTABLISHED -j ACCEPT + iptables -A OUTPUT -p icmp -m state --state NEW,ESTABLISHED -j ACCEPT + iptables -A OUTPUT -j LOG --log-prefix "iptables_output " + # Add Network Connection IP + iptables -A INPUT -s "${FIREIP}" -d "${FIREIP}" -m state --state NEW,ESTABLISHED -j ACCEPT + iptables -A OUTPUT -s "${FIREIP}" -d "${FIREIP}" -m state --state NEW,ESTABLISHED -j ACCEPT + # Open inbound ssh(22) connections and linit connects to 10 every 10 seconds + iptables -A INPUT -p tcp --dport 22 -m state --state NEW -m recent --set + iptables -A INPUT -p tcp --dport 22 -m state --state NEW -m recent --update --seconds 10 --hitcount 10 -j DROP + iptables -A INPUT -p tcp --dport 22 -m state --state NEW -j ACCEPT + # Default deny Firewall policy + iptables -P INPUT DROP + iptables -P OUTPUT DROP + iptables -P FORWARD DROP + for port in "${TCPPORTS[@]}" + do + echo "Opening TCP Port $port" + /sbin/iptables -A INPUT -p tcp -m tcp --dport "$port" -j ACCEPT + done + # Open UDP Ports + for port in "${UDPPORTS[@]}" + do + echo "Opening UDP Port $port" + /sbin/iptables -A INPUT -p udp -m udp --dport "$port" -j ACCEPT + done + # Save and Start IPTables + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + iptables-save > ${IPTBL} + systemctl restart iptables + elif [ "${OS}" = ubuntu ]; then + iptables-save > ${IPTBLUB} + sed -i '/:ufw-/d' ${IPTBLUB} + sed -i '/-j ufw-/d' ${IPTBLUB} + iptables-restore < ${IPTBLUB} + fi + # Configure IPv6 Firewall Ensure Default Deny Policy + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + cp $IPTBL $BACKUP + mv -f $IP6TBL $IP6TBL.bak + touch $IP6TBL + systemctl enable ip6tables + fi + # Flush Iptables rules + ip6tables -F + # Default deny Firewall policy + ip6tables -P INPUT DROP + ip6tables -P OUTPUT DROP + ip6tables -P FORWARD DROP + ip6tables -A INPUT -p tcp ! --syn -m state --state NEW -j DROP + # Forcing Fragments packets check + ip6tables -A INPUT -f -j DROP + # Dropping malformed XMAS packets + ip6tables -A INPUT -p tcp --tcp-flags ALL FIN,PSH,URG -j DROP + # Drop all NULL packets + ip6tables -A INPUT -p tcp --tcp-flags ALL NONE -j DROP + # Limiting pings to 1 per second + ip6tables -N PACKET + ip6tables -A PACKET -p icmp -m limit --limit 3/sec --limit-burst 25 -j ACCEPT + # Setup Connection Tracking + ip6tables -N STATE_TRACK + ip6tables -A STATE_TRACK -m state --state RELATED,ESTABLISHED -j ACCEPT + ip6tables -A STATE_TRACK -m state --state INVALID -j DROP + # Discouraging Port Scanning + ip6tables -N PORTSCAN + ip6tables -A PORTSCAN -p tcp --tcp-flags ACK,FIN FIN -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags ACK,PSH PSH -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags ACK,URG URG -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags FIN,RST FIN,RST -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags SYN,FIN SYN,FIN -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags SYN,RST SYN,RST -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags ALL ALL -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags ALL NONE -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags ALL FIN,PSH,URG -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags ALL SYN,FIN,PSH,URG -j DROP + ip6tables -A PORTSCAN -p tcp --tcp-flags ALL SYN,RST,ACK,FIN,URG -j DROP + ip6tables -N COMMON + ip6tables -A COMMON -j STATE_TRACK + ip6tables -A COMMON -j PORTSCAN + ip6tables -A COMMON -j PACKET + ip6tables -A INPUT -j COMMON + ip6tables -A OUTPUT -j COMMON + ip6tables -A FORWARD -j COMMON + ip6tables -A FORWARD -j PACKET + # Ensure loopback traffic is configured + ip6tables -A INPUT -i lo -j ACCEPT + ip6tables -A INPUT -s 127.0.0.0/8 -j DROP + ip6tables -A OUTPUT -o lo -j ACCEPT + # Ensure outbound and established connections are configured + ip6tables -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT + ip6tables -A INPUT -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT + ip6tables -A INPUT -p udp -m state --state NEW,ESTABLISHED -j ACCEPT + ip6tables -A INPUT -p icmp -m state --state NEW,ESTABLISHED -j ACCEPT + ip6tables -A OUTPUT -m state --state ESTABLISHED,RELATED -j ACCEPT + ip6tables -A OUTPUT -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT + ip6tables -A OUTPUT -p udp -m state --state NEW,ESTABLISHED -j ACCEPT + ip6tables -A OUTPUT -p icmp -m state --state NEW,ESTABLISHED -j ACCEPT + ip6tables -A OUTPUT -j LOG --log-prefix "iptables_output " + for port in "${TCP6PORTS[@]}" + do + echo "Opening TCP Port $port" + ip6tables -A INPUT -p tcp -m tcp --dport "$port" -j ACCEPT + done + # Open UDP Ports + for port in "${UDP6PORTS[@]}" + do + echo "Opening UDP Port $port" + ip6tables -A INPUT -p udp -m udp --dport "$port" -j ACCEPT + + done + # Save and Start IPTables + if [[ ${OS} = ubuntu ]]; then + ip6tables-save > ${IP6TBLUB} + sed -i '/:ufw6-/d' ${IP6TBLUB} + sed -i '/-j ufw6-/d' ${IPTBLUB} + ip6tables-restore < ${IP6TBLUB} + else + ip6tables-save > ${IP6TBL} + systemctl restart ip6tables + fi + stop_spinner $? + } | tee -a $LOG +} + +################################################## +#### 3.6a Addional Firewall Configuration AWS #### +################################################## +function iptables_aws() { + { + start_spinner 'Adding AWS Required Rules to IP Tables...' + echo "" + iptables -t nat -A PREROUTING -p tcp -d 169.254.170.2 --dport 80 -j DNAT --to-destination 127.0.0.1:51679 + iptables -t nat -A OUTPUT -d 169.254.170.2 -p tcp -m tcp --dport 80 -j REDIRECT --to-ports 51679 + iptables-save > /etc/sysconfig/iptables + systemctl restart iptables + stop_spinner $? + } | tee -a $LOG +} + +################################################## +#### 3.6b Addional Firewall Configuration OCI #### +################################################## +function oci_iptables() { + { + start_spinner 'Adding OCI Required Rules to IP Tables...' + echo "" + iptables -A OUTPUT -d 169.254.0.0/16 -m state --state NEW,ESTABLISHED -p tcp -m tcp -j REJECT --reject-with tcp-reset -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.0.0/16 -m state --state NEW,ESTABLISHED -p udp -m udp -j REJECT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.0.2/32 -m state --state NEW,ESTABLISHED -p tcp -m tcp --dport 80 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.0.2/32 -m state --state NEW,ESTABLISHED -p tcp -m owner --uid-owner root -m tcp --dport 3260 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.0.3/32 -m state --state NEW,ESTABLISHED -p tcp -m owner --uid-owner root -m tcp --dport 80 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.0.4/32 -m state --state NEW,ESTABLISHED -p tcp -m tcp --dport 80 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.2.0/24 -m state --state NEW,ESTABLISHED -p tcp -m owner --uid-owner root -m tcp --dport 3260 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.4.0/24 -m state --state NEW,ESTABLISHED -p tcp -m owner --uid-owner root -m tcp --dport 3260 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.5.0/24 -m state --state NEW,ESTABLISHED -p tcp -m owner --uid-owner root -m tcp --dport 3260 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.169.254/32 -m state --state NEW,ESTABLISHED -p tcp -m tcp --dport 53 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.169.254/32 -m state --state NEW,ESTABLISHED -p udp -m udp --dport 53 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.169.254/32 -m state --state NEW,ESTABLISHED -p udp -m udp --dport 67 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.169.254/32 -m state --state NEW,ESTABLISHED -p udp -m udp --dport 69 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.169.254/32 -m state --state NEW,ESTABLISHED -p udp -m udp --dport 80 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + iptables -A OUTPUT -d 169.254.169.254/32 -m state --state NEW,ESTABLISHED -p udp -m udp --dport 123 -j ACCEPT -m comment --comment "OCI Required - DO NOT REMOVE" + # Save and Start IPTables + if [ "${OS}" = ubuntu ]; then + iptables-save > ${IPTBLUB} + sed -i '/:ufw-/d' ${IPTBLUB} + sed -i '/-j ufw-/d' ${IPTBLUB} + iptables-restore < ${IPTBLUB} + else + iptables-save > ${IPTBL} + systemctl restart iptables + fi + stop_spinner $? + } | tee -a $LOG +} + +######################################### +#### 4.1 Configure System Accounting #### +######################################### +function auditd_accounting() { + { + start_spinner 'Configuring Auditd Service...' + echo "" + #### 4.1.1.1 Ensure audit log storage size is configured #### + #### !!! Our current default configuration is 8MB !!! #### + #### 4.1.1.2 Ensure system is disabled when audit logs are full #### + if [ "${OS}" = ubuntu ]; then + debconf-set-selections <<< ""postfix postfix/mailname string "${HOSTNAME}""" + debconf-set-selections <<< "postfix postfix/main_mailer_type string 'Local Only'" + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install auditd --assume-yes + else + ${PAKMGR} install audit + fi + xargs -n 1 cp -v /etc/audit/auditd.conf <<< ""${BACKUP} /etc/audit/auditd.conf.bak"" + ### 4.1.1.3 Ensure audit logs are not automaticlly deleted ### + sed -i 's/^space_left_action.*$/space_left_action = email/' /etc/audit/auditd.conf + sed -i 's/^action_mail_acct.*$/action_mail_acct = root/' /etc/audit/auditd.conf + sed -i 's/^admin_space_left_action.*$/admin_space_left_action = halt/' /etc/audit/auditd.conf + # shellcheck disable=SC2086 + sed -i ""s/max_log_file_action = ROTATE/max_log_file_action = \"${MAXLOGS}\"/g"" /etc/audit/auditd.conf + ### 4.1.2 Ensure auditd service is enabled #### + service auditd reload + if ! systemctl is-enabled auditd; then + systemctl enable --now auditd + fi + ### 4.1.3 Ensure auditing for processes that start prior to auditd is enabled" + xargs -n 1 cp -v /etc/default/grub <<< ""${BACKUP} /etc/default/grub.bak"" + if ! grep "audit=1" /etc/default/grub; then + sed -i '/^GRUB_CMDLINE_LINUX=/ s/\(\"[^\"]*\)$/ audit=1 &/' /etc/default/grub + fi + if [ "${OS}" = ubuntu ]; then + grub-mkconfig -o ${BACKUP}/grub.cfg + else + grub2-mkconfig -o ${BACKUP}/grub.cfg + fi + if [[ ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + cp ${BACKUP}/grub.cfg ${GRUBCFGRH} + cp ${BACKUP}/grub.cfg ${GRUBCFG} + elif [ "${OS}" = ubuntu ]; then + cp ${BACKUP}/grub.cfg ${GRUBCFGUB} + elif [ "${OS}" = centos ]; then + cp ${BACKUP}/grub.cfg ${GRUBCFGCE} + cp ${BACKUP}/grub.cfg ${GRUBCFG} + fi + if ! dmesg | grep '[NX|DX]*Execute Disable'; then + echo 0 > /proc/sys/kernel/exec-shield + fi + ### 4.1.4 Ensure events that modify date and time information are collected ### + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + sed -i 's/RefuseManualStop=yes/RefuseManualStop=no/g' /lib/systemd/system/auditd.service + systemctl daemon-reload + fi + xargs -n 1 cp -v /etc/audit/rules.d/audit.rules <<< ""${BACKUP} /etc/audit/rules.d/audit.rules.bak"" + sed -i 's/RefuseManualStop=yes/RefuseManualStop=no/g' /lib/systemd/system/auditd.service + systemctl daemon-reload + { + echo '##################################################################################################' + echo '#### Audit Rules File edited to match CIS level 1 requirements ####' + echo '#### for questions or changes please contact Phil Connor contact@mylinux.work ####' + echo '##################################################################################################' + echo '' + echo '#### First rule - delete all rules ####' + echo '-D' + echo '' + echo '#### 4.1.4 Ensure events that modify date and time information are collected ####' + echo '-a always,exit -F arch=b64 -S adjtimex -S settimeofday -k time-change' + echo '-a always,exit -F arch=b32 -S adjtimex -S settimeofday -S stime -k time-change' + echo '-a always,exit -F arch=b64 -S clock_settime -k time-change' + echo '-a always,exit -F arch=b32 -S clock_settime -k time-change' + echo '-w /etc/localtime -p wa -k time-change' + echo '' + echo '#### 4.1.5 Ensure events that modify user/group information are collected ####' + echo '-w /etc/group -p wa -k identity' + echo '-w /etc/passwd -p wa -k identity' + echo '-w /etc/gshadow -p wa -k identity' + echo '-w /etc/shadow -p wa -k identity' + echo '-w /etc/security/opasswd -p wa -k identity' + echo '' + echo '#### 4.1.6 Ensure events that modify the system'\''s network environment are collected ####' + echo '-a always,exit -F arch=b32 -S sethostname -S setdomainname -k system-locale' + echo '-a always,exit -F arch=b64 -S sethostname -S setdomainname -k system-locale' + echo '-w /etc/issue -p wa -k system-locale' + echo '-w /etc/issue.net -p wa -k system-locale' + echo '-w /etc/hosts -p wa -k system-locale' + echo '-w /etc/network -p wa -k system-locale' + echo '-w /etc/networks -p wa -k system-locale' + echo '' + echo '#### 4.1.7 Ensure events that modify the system'\''s Mandatory Access Controls (MAC'\''s) are collected ####' + echo '-w /etc/selinux/ -p wa -k MAC-policy' + echo '-w /etc/apparmor/ -p wa -k MAC-policy' + echo '-w /etc/apparmor.d/ -p wa -k MAC-policy' + echo '' + echo '#### 4.1.8 Ensure login and logout events are collected ####' + echo '-w /var/log/faillog -p wa -k logins' + echo '-w /var/log/lastlog -p wa -k logins' + echo '-w /var/log/tallylog -p wa -k logins' + echo '' + echo '#### 4.1.9 Ensure session initiation information is collected ###' + echo '-w /var/run/utmp -p wa -k session' + echo '-w /var/run/wtmp -p wa -k session' + echo '-w /var/run/btmp -p wa -k session' + echo '' + echo '#### 4.1.10 Ensure discretionary access control permission modification events are collected ####' + echo '-a always,exit -F arch=b64 -S chmod -S fchmod -S fchmodat -F auid>=1000 -F auid!=4294967295 -k perm_mod' + echo '-a always,exit -F arch=b32 -S chmod -S fchmod -S fchmodat -F auid>=1000 -F auid!=4294967295 -k perm_mod' + echo '-a always,exit -F arch=b64 -S chown -S fchown -S fchownat -S lchown -F auid>=1000 -F auid!=4294967295 -k perm_mod' + echo '-a always,exit -F arch=b32 -S chown -S fchown -S fchownat -S lchown -F auid>=1000 -F auid!=4294967295 -k perm_mod' + echo '-a always,exit -F arch=b64 -S setxattr -S lsetxattr -S fsetxattr -S removexattr -S lremovexattr -S fremovexattr -F auid>=1000 -F auid!=4294967295 -k perm_mod' + echo '-a always,exit -F arch=b32 -S setxattr -S lsetxattr -S fsetxattr -S removexattr -S lremovexattr -S fremovexattr -F auid>=1000 -F auid!=4294967295 -k perm_mod' + echo '' + echo '#### 4.1.11 Ensure unsuccessful unauthorized file access attempts are collected ####' + echo '-a always,exit -F arch=b64 -S creat -S open -S openat -S truncate -S ftruncate -F exit=-EACCES -F auid>=1000 -F auid!=4294967295 -k access' + echo '-a always,exit -F arch=b32 -S creat -S open -S openat -S truncate -S ftruncate -F exit=-EACCES -F auid>=1000 -F auid!=4294967295 -k access' + echo '-a always,exit -F arch=b64 -S creat -S open -S openat -S truncate -S ftruncate -F exit=-EPERM -F auid>=1000 -F auid!=4294967295 -k access' + echo '-a always,exit -F arch=b32 -S creat -S open -S openat -S truncate -S ftruncate -F exit=-EPERM -F auid>=1000 -F auid!=4294967295 -k access' + echo '' + echo '#### 4.1.12 Ensure use of privileged commands is collected ####' + echo "$RULES" + echo '' + echo '#### 4.1.13 Ensure successful file system mounts are collected ####' + echo '-a always,exit -F arch=b64 -S mount -F auid>=1000 -F auid!=4294967295 -k mounts' + echo '-a always,exit -F arch=b32 -S mount -F auid>=1000 -F auid!=4294967295 -k mounts' + echo '' + echo '#### 4.1.14 Ensure file deletion events by users are collected ####' + echo '-a always,exit -F arch=b64 -S unlink -S unlinkat -S rename -S renameat -F auid>=1000 -F auid!=4294967295 -k delete' + echo '-a always,exit -F arch=b32 -S unlink -S unlinkat -S rename -S renameat -F auid>=1000 -F auid!=4294967295 -k delete' + echo '' + echo '#### 4.1.15 Ensure changes to system administration scope (sudoers) is collected ####' + echo '-w /etc/sudoers -p wa -k scope' + echo '-w /etc/sudoers.d -p wa -k scope' + echo '' + echo '#### 4.1.16 Ensure system administrator actions (sudolog) are collected ####' + echo '-w /var/log/sudo.log -p wa -k actions' + echo '' + echo '#### 4.1.17 Ensure kernel module loading and unloading is collected ####' + echo '-w /sbin/insmod -p x -k modules' + echo '-w /sbin/rmmod -p x -k modules' + echo '-w /sbin/modprobe -p x -k modules' + echo '-a always,exit -F arch=b64 -S init_module -S delete_module -k modules' + echo '' + echo '#### 4.1.18 Ensure the audit configuration is immutable ####' + echo '-e 2' + } > /etc/audit/rules.d/audit.rules + service auditd restart + stop_spinner $? + } | tee -a $LOG +} + +############################################### +#### 4.1.19 Compress and Rotate Audit Logs #### +############################################### +function compress_auditd() { + { + start_spinner 'Configuring Audit Log Compression...' + echo "" + if [ -f /etc/audit/auditd.conf ]; then + sed -i 's/^num_logs.*$/num_logs = 10/' /etc/audit/auditd.conf + sed -i 's/^max_log_file .*$/max_log_file = 20/' /etc/audit/auditd.conf + fi + service auditd reload 2>/dev/null + stop_spinner $? + } | tee -a $LOG +} + +################################# +#### 4.2.1 Configure rsyslog #### +################################# +function rsyslog_service() { + { + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + if [ "${OSVER}" = 7 ]; then + os7_rsyslog + elif [[ ${OSVER} = 8 || ${OSVER} = 9 ]]; then + os8_rsyslog + fi + elif [ "${OS}" = ubuntu ]; then + ub_rsyslog + + fi + } +} + +function os7_rsyslog() { + { + start_spinner 'Configuring Rsyslog Service...' + echo "" + ### 4.2.1.1 Ensure rsyslog Service is enabled ### + systemctl enable --now rsyslog + ### 4.2.1.2 Ensure logging is configured ### + xargs -n 1 cp -v /etc/rsyslog.conf <<< ""${BACKUP} /etc/rsyslog.conf.bak"" + cat > /etc/rsyslog.conf << 'EOF' + ################################################################################################## + #### Hardened Rsyslog Configuration File edited to match CIS level 1 requirements #### + #### for questions or changles please contact Phil Connor contact@mylinux.work #### + ################################################################################################## + + ################# + #### MODULES #### + ################# + + # The imjournal module bellow is now used as a message source instead of imuxsock. + $ModLoad imuxsock # provides support for local system logging (e.g. via logger command) + $ModLoad imjournal # provides access to the systemd journal + #$ModLoad imklog # reads kernel messages (the same are read from journald) + #$ModLoad immark # provides --MARK-- message capability + + # Provides UDP syslog reception + #$ModLoad imudp + #$UDPServerRun 514 + + # Provides TCP syslog reception + #$ModLoad imtcp + #$InputTCPServerRun 514 + + # Enable non-kernel facility klog messages + # $KLogPermitNonKernelFacility on + + ########################### + #### GLOBAL DIRECTIVES #### + ########################### + + # Reset UMASK + $umask 0000 + + # Set file creation pewrmissions + $FileCreateMode 0640 + + # Set previously cleared UMASK + $umask 0177 + + # Where to place auxiliary files + $WorkDirectory /var/lib/rsyslog + + # Use default timestamp format + $ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat + + # File syncing capability is disabled by default. This feature is usually not required, + # not useful and an extreme performance hit + #$ActionFileEnableSync on + + # Include all config files in /etc/rsyslog.d/ + $IncludeConfig /etc/rsyslog.d/*.conf + + # Turn off message reception via local log socket; + # local messages are retrieved through imjournal now. + $OmitLocalLogging on + + # File to store the position in the journal + $IMJournalStateFile imjournal.state + + ############### + #### RULES #### + ############### + + # ### Log Anything of Level WARN or Higher. ### + *.warn;mail.none;news.none;authpriv.none;cron.none /var/log/messages + + # ### Secure Logging Anything of Level WARN or Higher ### + authpriv.* /var/log/secure + + # ### All Mail Logs ### + mail.* -/var/log/mail + + # ### Cron Log ### + cron.* /var/log/cron + + # ### Everybody Gets Emergency Messages ### + *.emerg :omusrmsg:* + *.=warning;*.=err -/var/log/warn + *.crit /var/log/warn + + # ### News Error Logs ### + news.crit -/var/log/news/news.crit + news.err -/var/log/news/news.err + news.notice -/var/log/news/news.notice + + # ### Local and Boot Messages ### + local0,local1.* -/var/log/localmessages + local2,local3.* -/var/log/localmessages + local4,local5.* -/var/log/localmessages + local6.* -/var/log/localmessages + local7.* /var/log/boot.log + + ############################### + #### Begin Forwarding Rule #### + ############################### + + # ### The Remote SysLog Server host is: name/ip:port, e.g. 192.168.0.1:514, port optional ### + #*.* @@syslog +EOF + echo " + *.* @@${SYSLOG} + + #################################### + #### End of the Forwarding Rule #### + #################################### + " >> /etc/rsyslog.conf + sed -i 's/^[\t]*//' /etc/rsyslog.conf + touch /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + chmod og-rwx /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + chown root:root /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + sed -i 's/*.* @@/#*.* @@/g' /etc/rsyslog.conf + pkill -hup rsyslog + stop_spinner $? + } +} + +function os8_rsyslog() { + { + start_spinner 'Configuring Rsyslog Service...' + echo "" + ### 4.2.1.1 Ensure rsyslog Service is enabled ### + systemctl enable rsyslog + ### 4.2.1.2 Ensure logging is configured ### + xargs -n 1 cp -v /etc/rsyslog.conf <<< ""${BACKUP} /etc/rsyslog.conf.bak"" + cat > /etc/rsyslog.conf << 'EOF' + ################################################################################################## + #### Hardened Rsyslog Configuration File edited to match CIS level 1 requirements #### + #### for questions or changles please contact Phil Connor contact@mylinux.work #### + ################################################################################################## + + ################# + #### MODULES #### + ################# + + module(load="imuxsock" # provides support for local system logging (e.g. via logger command) + SysSock.Use="off") # Turn off message reception via local log socket; + # local messages are retrieved through imjournal now. + module(load="imjournal" # provides access to the systemd journal + StateFile="imjournal.state") # File to store the position in the journal + #module(load="imklog") # reads kernel messages (the same are read from journald) + #module(load"immark") # provides --MARK-- message capability + + # Provides Rsyslog Forwarding + module(load="omfwd") + + # Provides UDP syslog reception + # for parameters see http://www.rsyslog.com/doc/imudp.html + #module(load="imudp") # needs to be done just once + #input(type="imudp" port="514") + + # Provides TCP syslog reception + # for parameters see http://www.rsyslog.com/doc/imtcp.html + #module(load="imtcp") # needs to be done just once + #input(type="imtcp" port="514") + + ########################### + #### GLOBAL DIRECTIVES #### + ########################### + + # Reset UMASK + $umask 0000 + + # Set file creation pewrmissions + $FileCreateMode 0640 + + # Set previously cleared UMASK + $umask 0177 + + # Where to place auxiliary files + global(workDirectory="/var/lib/rsyslog") + + # Use default timestamp format + module(load="builtin:omfile" Template="RSYSLOG_TraditionalFileFormat") + + # Include all config files in /etc/rsyslog.d/ + include(file="/etc/rsyslog.d/*.conf" mode="optional") + + ############### + #### RULES #### + ############### + + # ### Log Anything of Level WARN or Higher. ### + *.warn;mail.none;news.none;authpriv.none;cron.none /var/log/messages + + # ### Secure Logging Anything of Level WARN or Higher ### + authpriv.* /var/log/secure + + # ### All Mail Logs ### + mail.* -/var/log/mail + + # ### Cron Log ### + cron.* /var/log/cron + + # ### Everybody Gets Emergency Messages ### + *.emerg :omusrmsg:* + *.=warning;*.=err -/var/log/warn + *.crit /var/log/warn + + # ### News Error Logs ### + news.crit -/var/log/news/news.crit + news.err -/var/log/news/news.err + news.notice -/var/log/news/news.notice + + # ### Local and Boot Messages ### + local0,local1.* -/var/log/localmessages + local2,local3.* -/var/log/localmessages + local4,local5.* -/var/log/localmessages + local6.* -/var/log/localmessages + local7.* /var/log/boot.log + + + ############################### + #### Begin Forwarding Rule #### + ############################### + + #action(type="omfwd" + # An on-disk queue is created for this action. If the remote host is + # down, messages are spooled to disk and sent when it is up again. + #queue.filename="fwdRule1" # unique name prefix for spool files + #queue.maxdiskspace="1g" # 1gb space limit (use as much as possible) + #queue.saveonshutdown="on" # save messages to disk on shutdown + #queue.type="LinkedList" # run asynchronously + #action.resumeRetryCount="-1" # infinite retries if host is down + # Remote Logging (we use TCP for reliable delivery) + # remote_host is: name/ip, e.g. 192.168.0.1, port optional e.g. 10514 + # Target="remote_host" Port="XXX" Protocol="tcp") + +EOF + echo " + Target=\"${SYSLOG}" Port="514" Protocol="tcp\" + + #################################### + #### End of the Forwarding Rule #### + #################################### + " >> /etc/rsyslog.conf + sed -i 's/^[\t]*//' /etc/rsyslog.conf + touch /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + chmod og-rwx /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + chown root:root /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + sed -i 's/Target="" Port="514" Protocol="tcp"/#Target="" Port="514" Protocol="tcp"/g' /etc/rsyslog.conf + pkill -hup rsyslog + stop_spinner $? + } +} + +function ub_rsyslog() { + { + start_spinner 'Configuring Rsyslog Service...' + echo "" + service rsyslog stop + mknod -m 640 /dev/xconsole c 1 3 + chown syslog:adm /dev/xconsole + ### 4.2.1.1 Ensure rsyslog Service is enabled ### + systemctl enable rsyslog + ### 4.2.1.2 Ensure logging is configured ### + xargs -n 1 cp -v /etc/rsyslog.conf <<< ""${BACKUP} /etc/rsyslog.conf.bak"" + no_show << "EOF" > /etc/rsyslog.conf + ################################################################################################## + #### Hardened Rsyslog Configuration File edited to match CIS level 1 requirements #### + #### for questions or changles please contact Phil Connor contact@mylinux.work #### + ################################################################################################## + + ################# + #### MODULES #### + ################# + + module(load="imuxsock") # provides support for local system logging + #module(load="immark") # provides --MARK-- message capability + + # provides UDP syslog reception + #module(load="imudp") + #input(type="imudp" port="514") + + # provides TCP syslog reception + #module(load="imtcp") + #input(type="imtcp" port="514") + + # provides kernel logging support and enable non-kernel klog messages + module(load="imklog" permitnonkernelfacility="on") + + ########################### + #### GLOBAL DIRECTIVES #### + ########################### + + # Use default timestamp format + $ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat + + # Use traditional timestamp format. + # To enable high precision timestamps, comment out the following line. + $ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat + + # Filter duplicated messages + $RepeatedMsgReduction on + + # Reset UMASK + $Umask 0000 + + # Set the default permissions for all log files. + $FileOwner syslog + $FileGroup adm + $FileCreateMode 0640 + $DirCreateMode 0755 + $Umask 0177 + $PrivDropToUser syslog + $PrivDropToGroup syslog + + # Where to place spool and state files + $WorkDirectory /var/spool/rsyslog + + # Include all config files in /etc/rsyslog.d/ + $IncludeConfig /etc/rsyslog.d/*.conf + + ############################### + #### Begin Forwarding Rule #### + ############################### + $PreserveFQDN on + $ActionQueueFileName queue + $ActionQueueMaxDiskSpace 1g + $ActionQueueSaveOnShutdown on + $ActionQueueType LinkedList + $ActionResumeRetryCount -1 + # ### The Remote SysLog Server host is: name/ip:port, e.g. 192.168.0.1:514, port optional ### + #*.* @@syslog + +EOF + echo " + *.* @@${SYSLOG}:514 + + #################################### + #### End of the Forwarding Rule #### + #################################### + " >> /etc/rsyslog.conf + sed -i 's/^[\t]*//' /etc/rsyslog.conf + xargs -n 1 cp -v /etc/rsyslog.d/50-default.conf <<< ""${BACKUP} /etc/rsyslog.d/50-default.conf.bak"" + no_show << "EOF" > /etc/rsyslog.d/50-default.conf + ################################################################################################## + #### Hardened Rsyslog Configuration File edited to match CIS level 1 requirements #### + #### for questions or changles please contact Phil Connor contact@mylinux.work #### + ################################################################################################## + + + ############### + #### RULES #### + ############### + + # Default rules for rsyslog. + # + # For more information see rsyslog.conf(5) and /etc/rsyslog.conf + + # + # First some standard log files. Log by facility. + # + auth,authpriv.* /var/log/auth.log + *.*;auth,authpriv.none -/var/log/syslog + #cron.* /var/log/cron.log + #daemon.* -/var/log/daemon.log + kern.* -/var/log/kern.log + #lpr.* -/var/log/lpr.log + mail.* -/var/log/mail.log + #user.* -/var/log/user.log + + # + # Logging for the mail system. Split it up so that + # it is easy to write scripts to parse these files. + # + #mail.info -/var/log/mail.info + #mail.warn -/var/log/mail.warn + mail.err /var/log/mail.err + + # + # Logging for INN news system. + # + news.crit /var/log/news/news.crit + news.err /var/log/news/news.err + news.notice -/var/log/news/news.notice + + # + # Some "catch-all" log files. + # + #*.=debug;\ + # auth,authpriv.none;\ + # news.none;mail.none -/var/log/debug + #*.=info;*.=notice;*.=warn;\ + # auth,authpriv.none;\ + # cron,daemon.none;\ + # mail,news.none -/var/log/messages + + # + # Emergencies are sent to everybody logged in. + # + *.emerg :omusrmsg:* + + # + # I like to have messages displayed on the console, but only on a virtual + # console I usually leave idle. + # + #daemon,mail.*;\ + # news.=crit;news.=err;news.=notice;\ + # *.=debug;*.=info;\ + # *.=notice;*.=warn /dev/tty8 + + # The named pipe /dev/xconsole is for the `xconsole' utility. To use it, + # you must invoke `xconsole' with the `-file' option: + # + # $ xconsole -file /dev/xconsole [...] + # + # NOTE: adjust the list below, or you'll go crazy if you have a reasonably + # busy site.. + # + daemon.*;mail.*;\ + news.err;\ + *.=debug;*.=info;\ + *.=notice;*.=warn |/dev/xconsole +EOF + touch /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + chmod og-rwx /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + chown root:root /var/log/warn /var/log/news.crit /var/log/news.err /var/log/news.notice /var/log/localmessages + sed -i 's/*.* @@[[:blank:]]*:514/#*.* @@/g' /etc/rsyslog.conf + systemctl start rsyslog + pkill -hup rsyslog + stop_spinner $? + } | tee -a $LOG +} + +#################################################### +#### 4.2.1.2 Ensure Journald Service is enabled #### +#################################################### +function journald_config() { + { + start_spinner 'Configuring Journald Log Retension...' + echo "" + ### 4.2.2.1 Ensure journald is configured to send logs to rsyslog ### + sed -i 's/#ForwardToSyslog=yes/ForwardToSyslog=yes/g' /etc/systemd/journald.conf + ### 4.2.2.2 Ensure journald is configured to compress large log files ### + sed -i 's/#Compress=yes/Compress=yes/g' /etc/systemd/journald.conf + ### 4.2.2.3 Ensure journald is configured to write logfiles to persistent disk ### + sed -i 's/#Storage=auto/Storage=persistent/g' /etc/systemd/journald.conf + stop_spinner $? + } | tee -a $LOG +} + +################################## +#### 4.2.2 Configure journald #### +################################## +function logfile_permissions() { + { + start_spinner 'Configuring Permissions on all Logfiles...' + echo "" + ### 4.2.4 Ensure permissions on all logfiles are configured ### + find /var/log -type f -exec chmod g-wx,o-rwx {} + + ### 4.3 Ensure logrotate is configured ### + cp /etc/logrotate.conf $BACKUP + sed -i 's/ create 0664 root utmp/ create 0640 root utmp/g' /etc/logrotate.conf + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol ]]; then + if [ "${OSVER}" = 8 ]; then + sed -i 's/ create 0664 root utmp/ create 0640 root utmp/g' /etc/logrotate.d/btmp + sed -i 's/ create 0664 root utmp/ create 0640 root utmp/g' /etc/logrotate.d/wtmp + fi + fi + stop_spinner $? + } | tee -a $LOG +} + +############################################# +#### 5.1.1 Ensure cron daemon is enabled #### +############################################# +function crond_enabled() { + { + start_spinner 'Configuring Permissions on Cron Daemon...' + echo "" + ### 5.1.1.1 Ensure cron daemon is enabled ### + if [ "${OS}" = ubuntu ]; then + if ! systemctl is-enabled cron; then + systemctl enable cron + fi + else + if ! systemctl is-enabled crond; then + systemctl enable crond + fi + fi + ### Ensure permissions on /etc/crontab are configured ### + chown root.root /etc/crontab + chmod og-rwx /etc/crontab + ### 5.1.3 Ensure permissions on /etc/cron.hourly are configured ### + chown root.root /etc/cron.hourly + chmod og-rwx /etc/cron.hourly + ### 5.1.4 Ensure permissions on /etc/cron.daily are configured ### + chown root.root /etc/cron.daily + chmod og-rwx /etc/cron.daily + ### 5.1.5 Ensure permissions on /etc/cron.weekly are configured ### + chown root.root /etc/cron.weekly + chmod og-rwx /etc/cron.weekly + ### 5.1.6 Ensure permissions on ?etc/cron.monthly are configured ### + chown root.root /etc/cron.monthly + chmod og-rwx /etc/cron.monthly + ### 5.1.7 Ensure permissions on /etc/cron.d are configured ### + chown root.root /etc/cron.d + chmod og-rwx /etc/cron.d + ### 5.1.8 Ensure at/cron is restricted to authorized users ### + stat /etc/cron.deny + if [ $? != 1 ]; then + rm -rf /etc/cron.deny + fi + stat /etc/at.deny + if [ $? != 1 ]; then + rm -rf /etc/at.deny + fi + if ! stat /etc/cron.allow; then + touch /etc/cron.allow + chown root.root /etc/cron.allow + chmod og-rwx /etc/cron.allow + fi + if ! stat /etc/at.allow; then + touch /etc/at.allow + chown root.root /etc/at.allow + chmod og-rwx /etc/cron.allow + fi + stop_spinner $? + } | tee -a $LOG +} + +###################################### +#### 5.2 SSH Server Configuration #### +###################################### +function config_sshd() { + { + start_spinner 'Configuring SSh Server...' + echo "" + ### 5.2.1 Ensure permissions on /etc/ssh/sshd_config are configured ### + xargs -n 1 cp -v ${SSHD_FILE} <<< ""${BACKUP} ${SSHD_FILE}.bak"" + chown root.root ${SSHD_FILE} + chmod og-rwx ${SSHD_FILE} + ### 5.2.2. SSH Protocol 2 is the only supported protocol in modern OpenSSH ### + sed -i '/^Protocol /d' ${SSHD_FILE} + sed -i '/^#Protocol /d' ${SSHD_FILE} + ### 5.2.3 Ensure SSH LogLevel is set to info ### + if ! grep -qi "LogLevel INFO" ${SSHD_FILE}; then + echo "LogLevel INFO" >> ${SSHD_FILE} + else + sed -i 's/#LogLevel INFO/LogLevel INFO/g' ${SSHD_FILE} + fi + ### 5.2.4 Ensure SSH X11 forwarding is disabled ### + if ! grep -qi "X11Forwarding yes" ${SSHD_FILE}; then + echo "X11Forwarding no" >> ${SSHD_FILE} + else + sed -i 's/X11Forwarding yes/X11Forwarding no/g' ${SSHD_FILE} + fi + ### 5.2.5 Ensure SSH MaxAuthTries is set to 4 or less ### + if ! grep -qi "MaxAuthTries 6" ${SSHD_FILE}; then + echo "MaxAuthTries 4" >> ${SSHD_FILE} + else + sed -i 's/#MaxAuthTries 6/MaxAuthTries 4/g' ${SSHD_FILE} + fi + ### 5.2.6 Ensure SSH IgnoreRhosts is enabled ### + if ! grep -qi "IgnoreRhosts yes" ${SSHD_FILE}; then + echo "IgnoreRhosts yes" >> ${SSHD_FILE} + else + sed -i 's/#IgnoreRhosts yes/IgnoreRhosts yes/g' ${SSHD_FILE} + fi + ### 5.2.7 Ensure SSH HostbasedAuthentication is disabled ### + if ! grep -qi "HostbasedAuthentication no" ${SSHD_FILE}; then + echo "HostbasedAuthentication no" >> ${SSHD_FILE} + else + sed -i 's/#HostbasedAuthentication no/HostbasedAuthentication no/g' ${SSHD_FILE} + fi + ### 5.2.8 Ensure SSH root login is disabled ### + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol ]]; then + if ! grep -qi "#PermitRootLogin" ${SSHD_FILE}; then + sed -i 's/PermitRootLogin yes/PermitRootLogin no/g' ${SSHD_FILE} + else + sed -i 's/#PermitRootLogin yes/PermitRootLogin no/g' ${SSHD_FILE} + fi + elif [ "${OS}" = ubuntu ]; then + if ! grep -qi "prohibit-password" ${SSHD_FILE}; then + sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin no/g' ${SSHD_FILE} + fi + fi + ### Ensure SSH PermitEmptyPasswords is disabled ### + sed -i 's/#PermitEmptyPasswords no/PermitEmptyPasswords no/g' ${SSHD_FILE} + ### 5.2.10 Ensure SSH PermitUserEnvironment is disables ### + if ! grep -qi "#PermitUserEnvironment" ${SSHD_FILE}; then + echo "PermitUserEnvironment no" >> ${SSHD_FILE} + else + sed -i 's/#PermitUserEnvironment no/PermitUserEnvironment no/g' ${SSHD_FILE} + fi + ### 5.2.11 Ensure ony approved MAC algorithms are used ### + if ! grep -qi "MACs" ${SSHD_FILE}; then + echo "MACs hmac-sha2-512-etm@openssh.com,hmac-sha2-256-etm@openssh.com,hmac-sha2-512,hmac-sha2-256" >> ${SSHD_FILE} + else + sed -i 's/MACs hmac-sha2-512-etm@openssh.com,hmac-sha2-256-etm@openssh.com,umac-128-etm@openssh.com,hmac-sha2-512,hmac-sha2-256,umac-128@openssh.com/MACs hmac-sha2-512-etm@openssh.com,hmac-sha2-256-etm@openssh.com,hmac-sha2-512,hmac-sha2-256/g' ${SSHD_FILE} + fi + if ! grep -qi "#ClientAliveInterval" ${SSHD_FILE}; then + echo "ClientAliveInterval 300" >> ${SSHD_FILE} + else + sed -i 's/#ClientAliveInterval 0/ClientAliveInterval 300/g' ${SSHD_FILE} + fi + if ! grep -qi "#ClientAliveCountMax" ${SSHD_FILE}; then + echo "ClientAliveCountMax ${MAXCOUNT}" >> ${SSHD_FILE} + else + sed -i "s/#ClientAliveCountMax 3/ClientAliveCountMax \"${MAXCOUNT}\"/g" ${SSHD_FILE} + fi + ### 5.2.13 Ensure SSH LoginGraceTime is set to one minute or less ### + if ! grep -qi "LoginGraceTime 120" ${SSHD_FILE}; then + sed -i 's/#LoginGraceTime 2m/LoginGraceTime 60/g' ${SSHD_FILE} + else + sed -i 's/LoginGraceTime 120/LoginGraceTime 60/g' ${SSHD_FILE} + fi + ### 5.2.14 Ensure SSH access is limited ### + ### 5.2.15 Ensure SSH warning banner is configured ### + if ! grep -qi "#Banner none" ${SSHD_FILE}; then + sed -i 's/#Banner \/etc\/issue.net/Banner \/etc\/issue.net/g' ${SSHD_FILE} + else + sed -i 's/#Banner none/Banner \/etc\/issue.net/g' ${SSHD_FILE} + fi + ### 5.2.16 Ensure only strong Key Exchange algorithms are used ### + if ! grep -qi "kexalgorithms" ${SSHD_FILE}; then + echo "KexAlgorithms curve25519-sha256,curve25519-sha256@libssh.org,diffie-hellman-group14-sha256,diffie-hellman-group16-sha512,diffie-hellman-group18-sha512,ecdh-sha2-nistp521,ecdh-sha2-nistp384,ecdh-sha2-nistp256,diffie-hellman-group-exchange-sha256" >> ${SSHD_FILE} + fi + ### 5.2.21 Ensure SSH MaxStartups is configured ### + if ! grep -qi "maxstartups" ${SSHD_FILE}; then + echo "MaxStartups 10:30:60" >> ${SSHD_FILE} + else + sed -i 's/#MaxStartups 10:30:100/MaxStartups 10:30:60/g' ${SSHD_FILE} + fi + ### Configuring additional SSH settings ### + if ! grep -qi "#MaxSessions" ${SSHD_FILE}; then + echo "MaxSessions 2" >> ${SSHD_FILE} + else + sed -i 's/#MaxSessions 10/MaxSessions 2/g' ${SSHD_FILE} + fi + if ! grep -qi "#AllowAgentForwarding" ${SSHD_FILE}; then + echo "AllowAgentForwarding no" >> ${SSHD_FILE} + else + sed -i 's/#AllowAgentForwarding yes/AllowAgentForwarding no/g' ${SSHD_FILE} + fi + if ! grep -qi "#AllowTcpForwarding" ${SSHD_FILE}; then + echo "AllowTcpForwarding no" >> ${SSHD_FILE} + else + sed -i 's/#AllowTcpForwarding yes/AllowTcpForwarding no/g' ${SSHD_FILE} + fi + sed -i 's/#PrintMotd yes/PrintMotd no/g' ${SSHD_FILE} + if ! grep -qi "PrintLastLog" ${SSHD_FILE}; then + echo "PrintLastLog no" >> ${SSHD_FILE} + else + sed -i 's/#PrintLastLog yes/PrintLastLog no/g' ${SSHD_FILE} + fi + if ! grep -qi "TCPKeepAlive" ${SSHD_FILE}; then + sed -i 's/TCPKeepAlive yes/TCPKeepAlive no/g' ${SSHD_FILE} + else + sed -i 's/#TCPKeepAlive yes/TCPKeepAlive no/g' ${SSHD_FILE} + fi + if ! grep -qi "Compression" ${SSHD_FILE}; then + echo "Compression no" >> ${SSHD_FILE} + else + sed -i 's/#Compression delayed/Compression no/g' ${SSHD_FILE} + fi + if ! grep -qi "UseDNS" ${SSHD_FILE}; then + echo "UseDNS no" >> ${SSHD_FILE} + else + sed -i 's/#UseDNS yes/UseDNS no/g' ${SSHD_FILE} + sed -i 's/#UseDNS no/UseDNS no/g' ${SSHD_FILE} + fi + if ! grep -qi "#PasswordAuthentication" ${SSHD_FILE}; then + sed -i 's/PasswordAuthentication yes/PasswordAuthentication no/g' ${SSHD_FILE} + else + sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/g' ${SSHD_FILE} + fi + echo 'Ciphers chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr' >> ${SSHD_FILE} + if [ "${OS}" = ubuntu ]; then + systemctl restart ssh + else + systemctl restart sshd + fi + stop_spinner $? + } | tee -a $LOG +} + +########################### +#### 5.3 Configure PAM #### +########################### +function config_pam() { + { + start_spinner 'Configuring PAM Server...' + echo "" + ### 5.3.1 Ensure password creation requirements are configured ### + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol ]]; then + xargs -n 1 cp -v /etc/security/pwquality.conf <<< ""${BACKUP} /etc/security/pwquality.conf.bak"" + sed -i 's/minlen = 8/minlen = 14/g' /etc/security/pwquality.conf + ### 5.3.2 Ensure lockout for failed password attempts is configured ### + xargs -n 1 cp -v /etc/pam.d/password-auth <<< ""${BACKUP} /etc/pam.d/password-auth.bak"" + no_show << EOF > /etc/pam.d/password-auth + ######################################################################################## + #### This password-auth file edited to match CIS level 1 requirements for questions #### + #### or changes please contact Phil Connor contact@mylinux.work #### + #### Please don't edit it unless you know what your doing #### + ######################################################################################## + #%PAM-1.0 + # User changes will be destroyed the next time authconfig is run. + auth required pam_env.so + auth required pam_faildelay.so delay=2000000 + auth required pam_faillock.so preauth audit silent deny=5 unlock_time=900 + auth [success=1 default=bad] pam_unix.so + auth sufficient pam_fprintd.so + auth sufficient pam_unix.so nullok try_first_pass + auth [default=die] pam_faillock.so authfail audit deny=5 unlock_time=900 + auth sufficient pam_faillock.so authsucc audit deny=5 unlock_time=900 + auth requisite pam_succeed_if.so uid >= 1000 quiet_success + auth required pam_deny.so + + account required pam_unix.so + account sufficient pam_localuser.so + account sufficient pam_succeed_if.so uid < 1000 quiet + account required pam_permit.so + account required pam_faillock.so + + password requisite pam_pwquality.so try_first_pass local_users_only retry=3 authtok_type= enforce_for_root + password required pam_pwhistory.so remember=5 use_authlok + password sufficient pam_unix.so remember=5 sha512 shadow try_first_pass use_authtok + password required pam_deny.so + + session optional pam_keyinit.so revoke + session required pam_limits.so + -session optional pam_systemd.so + session [success=1 default=ignore] pam_succeed_if.so service in crond quiet use_uid + session required pam_unix.so + ########################################## + #### Logging key strokes of all USERS #### + ########################################## + session required pam_tty_audit.so disable=* enable=* log_passwd +EOF + xargs -n 1 cp -v /etc/pam.d/system-auth <<< ""${BACKUP} /etc/pam.d/system-auth.bak"" + no_show << EOF > /etc/pam.d/system-auth + ###################################################################################### + #### This system-auth file edited to match CIS level 1 requirements for questions #### + #### or changes please contact Phil Connor contact@mylinux.work #### + #### Please don't edit it unless you know what your doing #### + ###################################################################################### + #%PAM-1.0 + # User changes will be destroyed the next time authconfig is run. + auth required pam_env.so + auth required pam_faildelay.so delay=2000000 + auth required pam_faillock.so preauth audit silent deny=5 unlock_time=900 + auth [success=1 default=bad] pam_unix.so + auth sufficient pam_fprintd.so + auth sufficient pam_unix.so nullok try_first_pass + auth [default=die] pam_faillock.so authfail audit deny=5 unlock_time=900 + auth sufficient pam_faillock.so authsucc audit deny=5 unlock_time=900 + auth requisite pam_succeed_if.so uid >= 1000 quiet_success + auth required pam_deny.so + + account required pam_unix.so + account sufficient pam_localuser.so + account sufficient pam_succeed_if.so uid < 1000 quiet + account required pam_permit.so + account required pam_faillock.so + + password requisite pam_pwquality.so try_first_pass local_users_only retry=3 authtok_type= enforce_for_root + password required pam_pwhistory.so remember=5 use_authlok + password sufficient pam_unix.so remember=5 sha512 shadow try_first_pass use_authtok + password required pam_deny.so + + session optional pam_keyinit.so revoke + session required pam_limits.so + -session optional pam_systemd.so + session [success=1 default=ignore] pam_succeed_if.so service in crond quiet use_uid + session required pam_unix.so + ########################################## + #### Logging key strokes of all USERS #### + ########################################## + session required pam_tty_audit.so disable=* enable=* log_passwd +EOF + fi + if [ "${OS}" = ubuntu ]; then + debconf-set-selections <<< ""postfix postfix/mailname string "${HOSTNAME}""" + debconf-set-selections <<< "postfix postfix/main_mailer_type string 'Local Only'" + ${PAKMGR} remove libpam-cracklib + ${PAKMGR} install libpam-pwquality --assume-yes + xargs -n 1 cp -v /etc/pam.d/common-password<<< ""${BACKUP} /etc/pam.d/common-password.bak"" + #sed -i 's/password[[:blank:]]*requisite[[:blank:]]*pam_pwquality.so retry=3/password requisite pam_pwquality.so retry=3 difok=3 reject_username enforce_for_root/g' /etc/pam.d/common-password + sed -i 's/# minlen = 8/minlen = 14/g' /etc/security/pwquality.conf + sed -i 's/# dcredit = 0/dcredit=-1/g' /etc/security/pwquality.conf + sed -i 's/# ucredit = 0/ucredit=-1/g' /etc/security/pwquality.conf + sed -i 's/# ocredit = 0/ocredit=-1/g' /etc/security/pwquality.conf + sed -i 's/# lcredit = 0/lcredit=-1/g' /etc/security/pwquality.conf + no_show << EOF >> /etc/pam.d/common-password + ################################################ + #### 5.3.3 Ensure password reuse is limited #### + ################################################ + password required pam_unix.so remember=5 + +EOF + no_show << EOF >> /etc/pam.d/common-auth + ######################################################################### + #### 5.3.2 Ensure lockout for failed password attempts is configured #### + ######################################################################### + auth required pam_faillock.so preauth audit silent deny=5 unlock_time=900 + auth [default=die] pam_faillock.so authfail audit deny=5 unlock_time=900 + + ########################################## + #### Logging key strokes of all USERS #### + ########################################## + session required pam_tty_audit.so disable=* enable=* log_passwd +EOF + fi + stop_spinner $? + } | tee -a $LOG +} + +############################################ +#### 5.4 User Accounts and Environments #### +############################################ +function accounts() { + { + start_spinner 'Configuring User Accts and Environments...' + echo "" + LODEFS="/etc/login.defs" + #### 5.4.1.1 Ensure password expiration is 90 days or less #### + #### 5.4.1.2 Ensure minimum days between password changes is 7 days or more #### + #### 5.4.1.3 Ensure password expiration warning days is 7 or more #### + if [ -e ${LODEFS} ]; then + cp ${LODEFS} ${LODEFS}.tmp + awk '($1 ~ /^PASS_MAX_DAYS/) { $2="90" } + ($1 ~ /^PASS_MIN_DAYS/) { $2="7" } + ($1 ~ /^PASS_WARN_AGE/) { $2="10" } + ($1 ~ /^PASS_MIN_LEN/) { $2="14" } + { print }' ${LODEFS}.tmp > ${LODEFS} + rm ${LODEFS}.tmp + fi + cut -d: -f1 /etc/passwd | while read -r NAME + do + uid=$(id -u "${NAME}") + if [ "${uid}" -ge 1000 ] && [ "${uid}" != 65534 ]; then + chage -M 90 -m 7 -W 10 -I 30 "${NAME}" + fi + done + if [ "${OS}" = ubuntu ]; then + no_show << EOF >> ${LODEFS} + ################################################################## + #### Make it More Difficult to Bruteforce the Hashed Password #### + ################################################################## + SHA_CRYPT_MIN_ROUNDS 5000 + SHA_CRYPT_MAX_ROUNDS 10000 +EOF + sed -i 's/pam_faildelay.so delay=3000000/pam_faildelay.so delay=300000000/g' /etc/pam.d/login + else + no_show << EOF >> ${LODEFS} + ############################################################################ + #### Establish a forced five-second minimum delay between failed logins #### + ############################################################################ + FAIL_DELAY 5 + + ################################################################## + #### Make it More Difficult to Bruteforce the Hashed Password #### + ################################################################## + SHA_CRYPT_MIN_ROUNDS 5000 + SHA_CRYPT_MAX_ROUNDS 10000 +EOF + fi + chown root:root ${LODEFS} + chmod 0640 ${LODEFS} + #### 5.4.1.4 Ensure inactive password lock is 30 days or less #### + useradd -D -f 30 + stop_spinner $? + } | tee -a $LOG +} + +###################################################### +#### 5.4 User Accounts and Environments Continued #### +###################################################### +function config_users_permissions() { + { + start_spinner 'Configuring User Permissions...' + echo "" + #### 5.4.2 Ensure system accounts are non-login #### + awk -F: '($3 < 1000) {print $1 }' /etc/passwd | while read -r user + do + if [ "$user" != "root" ]; then + usermod -L "$user" + if [ "$user" != "sync" ] && [ "$user" != "shutdown" ] && [ "$user" != "halt" ]; then + usermod -s /usr/sbin/nologin "$user" + fi + fi + done + #### 5.4.3 Ensure default group for the root account is GID 0 #### + usermod -g 0 root + groupadd dev + groupadd dba + touch /etc/sudoers.d/cis_conf + chmod 440 /etc/sudoers.d/cis_conf + if [ "${OS}" = ubuntu ]; then + sed -i 's/sudo:x:27:/sudo:x:27:root,ubuntu/g' /etc/group + sed -i 's/sudo:*::/sudo:*::root,ubuntu/g' /etc/gshadow + sed -i 's/%sudo[[:blank:]]*ALL=(ALL:ALL)[[:blank:]]*ALL/%sudo ALL=\(ALL:ALL\) NOPASSWD:ALL/g' /etc/sudoers + else + grep -qi "wheel" /etc/group + if [ $? != 1 ]; then + sed -i 's/%wheel[[:blank:]]*ALL=(ALL)[[:blank:]]*ALL/# %wheel ALL=\(ALL\) ALL/g' /etc/sudoers + sed -i 's/^#\s*\(%wheel\s*ALL=(ALL)\s*NOPASSWD:\s*ALL\)/\1/' /etc/sudoers + sed -i 's/wheel:x:10:opc/wheel:x:10:root,opc/g' /etc/group + sed -i 's/wheel:::opc/wheel:::root,opc/g' /etc/gshadow + fi + fi + { + echo "####################" + echo "#### Networking ####" + echo "####################" + } >> /etc/sudoers.d/local_conf + if [ "${OS}" = ubuntu ]; then + { + echo "Cmnd_Alias NETWORKING = /sbin/route, /sbin/ifconfig, /bin/ping, /sbin/dhclient, /sbin/iptables, /sbin/mii-tool" + echo "" + } >> /etc/sudoers.d/local_conf + else + { + echo "Cmnd_Alias NETWORKING = /sbin/route, /sbin/ifconfig, /bin/ping, /sbin/dhclient, /usr/bin/net, /sbin/iptables, /usr/bin/rfcomm, /usr/bin/wvdial, /sbin/iwconfig, /sbin/mii-tool" + echo "" + } >> /etc/sudoers.d/local_conf + fi + { + echo "#################################################" + echo "#### Installation and management of software ####" + echo "#################################################" + } >> /etc/sudoers.d/cis_conf + if [ "${OS}" = ubuntu ]; then + { + echo "Cmnd_Alias SOFTWARE = usr/bin/apt, /usr/bin/dpkg, /usr/bin/apt-get" + echo "" + } >> /etc/sudoers.d/local_conf + else + { + echo "Cmnd_Alias SOFTWARE = /bin/rpm, /usr/bin/up2date, /usr/bin/yum" + echo "" + } >> /etc/sudoers.d/local_conf + fi + { + echo "##################" + echo "#### Services ####" + echo "##################" + echo "Cmnd_Alias SERVICES = /sbin/service, /sbin/chkconfig, /usr/bin/systemctl start, /usr/bin/systemctl stop, /usr/bin/systemctl reload, /usr/bin/systemctl restart, /usr/bin/systemctl status, /usr/bin/systemctl enable, /usr/bin/systemctl disable" + echo "" + echo "######################################" + echo "#### Updating the locate database ####" + echo "######################################" + echo "Cmnd_Alias LOCATE = /usr/bin/updatedb" + echo "" + echo "#################" + echo "#### Storage ####" + echo "#################" + echo "Cmnd_Alias STORAGE = /sbin/fdisk, /sbin/sfdisk, /sbin/parted, /sbin/partprobe, /bin/mount, /bin/umount" + echo "" + echo "################################" + echo "#### Delegating permissions ####" + echo "################################" + echo "Cmnd_Alias DELEGATING = /usr/sbin/visudo, /bin/chown, /bin/chmod, /bin/chgrp" + echo "" + echo "###################" + echo "#### Processes ####" + echo "###################" + echo "Cmnd_Alias PROCESSES = /bin/nice, /bin/kill, /usr/bin/kill, /usr/bin/killall" + echo "" + echo "#################" + echo "#### Drivers ####" + echo "#################" + echo "Cmnd_Alias DRIVERS = /sbin/modprobe" + echo "" + echo "###########################################################################" + echo "#### Reboot and ShutDown removed from DBA's and Developers 3/4/20 - PC ####" + echo "###########################################################################" + echo "Cmnd_Alias SHUTDOWN = /sbin/shutdown, /sbin/reboot, /sbin/halt, /sbin/poweroff" + echo "" + echo "###########################" + echo "#### Our System Groups ####" + echo "###########################" + echo "%dba ALL= NOPASSWD: /usr/bin/su - applmgr, /usr/bin/su - oracle, !NETWORKING, !SOFTWARE, !SERVICES, !STORAGE, !DELEGATING, !PROCESSES, !LOCATE, !DRIVERS, !SHUTDOWN" + echo "%dev ALL= NOPASSWD: /usr/bin/su - applmgr, !NETWORKING, !SOFTWARE, !SERVICES, !STORAGE, !DELEGATING, !PROCESSES, !LOCATE, !DRIVERS, !SHUTDOWN" + } >> /etc/sudoers.d/local_conf + #### 5.4.4 Ensure default user umask is 027 or more restrictive #### + if [ "${OS}" = "ubuntu" ]; then + grep -Eq "^(\s*)umask\s+\S+(\s*#.*)?\s*$" /etc/bash.bashrc && sed -ri "s/^(\s*)umask\s+\S+(\s*#.*)?\s*$/\1umask 027\2/" /etc/bash.bashrc || echo "umask 027" >> /etc/bash.bashrc + else + grep -Eq "^(\s*)umask\s+\S+(\s*#.*)?\s*$" /etc/bashrc && sed -ri "s/^(\s*)umask\s+\S+(\s*#.*)?\s*$/\1umask 027\2/" /etc/bashrc || echo "umask 027" >> /etc/bashrc + fi + grep -Eq "^(\s*)umask\s+\S+(\s*#.*)?\s*$" /etc/profile && sed -ri "s/^(\s*)umask\s+\S+(\s*#.*)?\s*$/\1umask 027\2/" /etc/profile || echo "umask 027" >> /etc/profile + #### 5.4.5 Ensure default user shell timeout is 900 seconds or less #### + if grep TMOUT=900 /etc/bashrc; then + sed -i 's/TMOUT=900/#TMOUT=900/g' /etc/bashrc + fi + + if grep TMOUT=900 /etc/profile; then + sed -i 's/TMOUT=900/#TMOUT=900/g' /etc/profile + fi + cat >> /etc/profile << 'EOF' + if [ "$(id -nu)" == "root" ] || [ "$(id -nu)" == "opc" ]; then + TMOUT=3600 + readonly TMOUT + export TMOUT + else + TMOUT=900 + readonly TMOUT + export TMOUT + fi +EOF + cat >> /etc/bashrc << 'EOF' + if [ "$(id -nu)" == "root" ] || [ "$(id -nu)" == "opc" ]; then + if ! echo $TMOUT | grep -q 3600; then + TMOUT=3600 + readonly TMOUT + export TMOUT + fi + else + if ! echo $TMOUT | grep -q 900; then + TMOUT=900 + readonly TMOUT + export TMOUT + fi + fi +EOF + #### 5.4.5A Ensure default user umask is configured - system wide #### + sed -ri 's/^([^#]+\s+)?(umask\s+)(\S+\s*)(\s+.*)?$/\1\2 027\4/' /etc/login.defs + sed -ri 's/^([^#]+\s+)?(umask\s+)(\S+\s*)(\s+.*)?$/\1\2 027\4/' /etc/profile + sed -ri 's/^([^#]+\s+)?(umask\s+)(\S+\s*)(\s+.*)?$/\1\2 027\4/' /etc/bashrc + touch /etc/profile.d/cis_profile.sh + chmod 644 /etc/profile.d/cis_profile.sh + echo " + ################################ + ### Added for CIS Compliance ### + ################################ + umask 077 + " > /etc/profile.d/cis_profile.sh + #### 5.5 Ensure root login is restricted to system console #### + xargs -n 1 cp -v /etc/securetty <<< ""${BACKUP} /etc/securetty.bak"" + echo "console" > /etc/securetty + ### 5.6 Ensure access to the su command is restricted ### + PAMSU="/etc/pam.d/su" + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol ]]; then + xargs -n 1 cp -v ${PAMSU} <<< ""${BACKUP} ${PAMSU}.bak"" + if [ -e ${PAMSU} ]; then + cp ${PAMSU} ${PAMSU}.tmp + awk '( $1=="#auth" && $2=="required" && $3~"pam_wheel.so" ) { print "auth\t\trequired\t",$3,"\tuse_uid"; next }; + { print }' ${PAMSU}.tmp > ${PAMSU} + chown root:root ${PAMSU} + chmod 0644 ${PAMSU} + rm ${PAMSU}.tmp + fi + elif [ "${OS}" = ubuntu ]; then + sed -i 's/# auth[[:blank:]]*required[[:blank:]]*pam_wheel.so/auth required pam_wheel.so use_uid/g' /etc/pam.d/su + sed -i 's/auth required pam_wheel.so use_uid deny group=nosu/#auth required pam_wheel.so deny group=nosu/g' /etc/pam.d/su + fi + stop_spinner $? + } | tee -a $LOG +} + +################################ +#### 6.1 System Permissions #### +################################ +function audit_file_permissions() { + { + start_spinner 'Auditing File Permissions...' + echo "" + ### 6.1.1 Audit system file permissions ### + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol ]]; then + rpm -Va --nomtime --nosize --nomd5 --nolinkto + elif [ "${OS}" = ubuntu ]; then + ${PAKMGR} install debsums + debsums -s + fi + ### 6.1.2 Ensure permissions on /etc/passwd are configured ### + chmod 644 /etc/passwd + chown root.root /etc/passwd + ### 6.1.3 Ensure permissions on /etc/shadow are configured ### + chmod 000 /etc/shadow + chown root.root /etc/shadow + ### 6.1.4 Ensure permissions on /etc/group are configured ### + chmod 644 /etc/group + chown root.root /etc/group + ### 6.1.5 Ensure permissions on /etc/gshadow are configured ### + chmod 000 /etc/gshadow + chown root:root /etc/gshadow + ### 6.1.6 Ensure permissions on /etc/passwd- are configured ### + chmod 644 /etc/passwd- + chown root.root /etc/passwd- + ### 6.1.7 Ensure permissions on /etc/shadow- are configured ### + chmod 000 /etc/shadow- + chown root.root /etc/shadow- + ### 6.1.4 Ensure permissions on /etc/group- are configured ### + chmod 644 /etc/group- + chown root.root /etc/group- + ### 6.1.5 Ensure permissions on /etc/gshadow- are configured ### + chmod 000 /etc/gshadow- + chown root:root /etc/gshadow- + stop_spinner $? + } | tee -a $LOG +} + +#################################### +#### 6.1.1 World Writable Files #### +#################################### +function world_writable_files() { + { + start_spinner 'Resetting Permissions on all World Writable, Unowned and Ungrouped Files...' + echo "" + #### 6.1.10 Ensure no world writable files exist #### + df --local -P | awk '{if (NR!=1) print $6}' | xargs -I '{}' find '{}' -xdev -type f -perm -0002 + #### 6.1.11 Ensure no unowned files or directories exist #### + df --local -P | awk '{if (NR!=1) print $6}' | xargs -I '{}' find '{}' -xdev -nouser -ls + #### 6.1.12 Ensure no ungrouped files or directories exist #### + df --local -P | awk '{if (NR!=1) print $6}' | xargs -I '{}' find '{}' -xdev -nogroup -ls + #### 6.1.12 Audit SUID executables #### + df --local -P | awk '{if (NR!=1) print $6}' | xargs -I '{}' find '{}' -xdev -type f -perm -4000 -print + #### 6.1.14 Audit SGID executables #### + df --local -P | awk '{if (NR!=1) print $6}' | xargs -I '{}' find '{}' -xdev -type f -perm -2000 -print + stop_spinner $? + } | tee -a $LOG +} + +##################################### +#### 6.2 User and Group Settings #### +##################################### +function user_group_settings() { + { + start_spinner 'Configuring User and Group Settings...' + echo "" + #### 6.2.1 Ensure password fields are not empty #### + awk -F: '($2 == "" ) { print $1 " does not have a password "}' /etc/shadow + #### 6.2.2 Ensure no legacy "+" entries exist in /etc/passwd #### + grep '^+:' /etc/passwd + #### 6.2.3 Ensure no legacy "+" entries exist in /etc/shadow #### + grep '^+:' /etc/shadow + #### 6.2.4 Ensure no legacy "+" entries exist in /etc/group #### + grep '^+:' /etc/group + #### 6.2.5 Ensure root is the only UID 0 account #### + awk -F: '($3 == 0) { print $1 }' /etc/passwd + #### 6.2.6 Ensure root PATH Intergrity #### + if [ "$(echo "$PATH" | grep ::)" != "" ]; then + echo "Empty Directory in PATH (::)" + fi + if [ "$(echo "$PATH" | grep :$)" != "" ]; then + echo "Trailing : in PATH" + fi + p=$(echo "$PATH" | sed -e 's/::/:/' -e 's/:$//' -e 's/:/ /g') + set -- "$p" + while [ "$1" != "" ]; + do + if [ "$1" = "." ]; then + echo "PATH contains ." + shift + continue + fi + if [ -d "$1" ]; then + # shellcheck disable=SC2012 + dirperm=$(ls -ldH "$1" | cut -f1 -d" ") + if [ "$(echo "${dirperm}" | cut -c6)" != "-" ]; then + echo "Group Write permission set on directory $1" + fi + if [ "$(echo "${dirperm}" | cut -c9)" != "-" ]; then + echo "Other Write permission set on directory $1" + fi + # shellcheck disable=SC2012 + dirown=$(ls -ldH "$1" | awk '{print $3}') + if [ "${dirown}" != "root" ] ; then + echo "$1 is not owned by root" + fi + else + echo "$1 is not a directory" + fi + shift + done + stop_spinner $? + } | tee -a $LOG +} + +######################################################## +#### 6.2 User and Group Settings Continued - Part 1 #### +######################################################## +function home_directories() { + { + start_spinner 'Checking and Configuring User Directories...' + echo "" + #### 6.2.7 Ensure all users' home directories exist #### + awk -F: '{ print $1 " " $3 " " $6 }' /etc/passwd | while read -r user uid dir + do + if [ "${uid}" -ge 500 ] && [ -d "${dir}" ] && [ "${user}" != "nfsnobody" ]; then + owner=$(stat -L -c "%U" "${dir}") + if [ "${owner}" != "${user}" ]; then + echo "The home directory (${dir}) of user ${user} is owned by ${owner}." + fi + fi + done + #### 6.2.8 Ensure users' home directories permissions are 750 or more restrictive #### + grep -Ev '(root|halt|sync|shutdown)' /etc/passwd | awk -F: '($8 == "PS" && $7 != "/sbin/nologin") { print $6 }' | while read -r dir + do + # shellcheck disable=SC2012 + dirperm=$(ls -ld "${dir}" | cut -f1 -d" ") + if [ "$( echo "${dirperm}" | cut -c6 )" != "-" ]; then + echo "Group Write permission set on directory $dir" + fi + if [ "$( echo "${dirperm}" | cut -c8 )" != "-" ]; then + echo "Other Read permission set on directory $dir" + fi + if [ "$( echo "${dirperm}" | cut -c9 )" != "-" ]; then + echo "Other Write permission set on directory $dir" + fi + if [ "$( echo "${dirperm}" | cut -c10 )" != "-" ]; then + echo "Other Execute permission set on directory $dir" + fi + done + #### 6.2.9 Ensure users own their home directories #### + awk -F: '{ print $1 " " $3 " " $6 }' /etc/passwd | while read -r user uid dir + do + if [ "$uid" -ge 500 ] && [ ! -d "$dir" ] && [ "$user" != "nfsnobody" ]; then + echo "The home directory ($dir) of user $user does not exist." + fi + done + stop_spinner $? + } | tee -a $LOG +} + +######################################################## +#### 6.2 User and Group Settings Continued - Part 2 #### +######################################################## +function dot_files() { + { + start_spinner 'Checking and Configuring Hidden Files and Directories...' + echo "" + #### 6.2.10 Ensure users' dot files are not group or world writable #### + grep -Ev '(root|sync|halt|shutdown)' /etc/passwd | awk -F: '($7 != "/sbin/nologin") { print $6 }' | while read -r dir + do + for file in "$dir"/.[A-Za-z0-9]* + do + if [ ! -h "${file}" ] && [ -f "${file}" ]; then + # shellcheck disable=SC2012 + fileperm=$(ls -ld "$file" | cut -f1 -d" ") + if [ "$(echo "$fileperm" | cut -c6 )" != "-" ]; then + echo "Group Write permission set on file $file" + fi + if [ "$(echo "$fileperm" | cut -c9 )" != "-" ]; then + echo "Other Write permission set on file $file" + fi + fi + done + done + awk -F: '($3 >= 500) { print $6 }' /etc/passwd | while read -r DIR + do + for FILE in "$DIR"/.[A-Za-z0-9]* + do + if [ ! -h "$FILE" ] && [ -f "$FILE" ]; then + chmod go-w "$FILE" + fi + done + done + #### 6.2.11 Ensure no users have .forward files #### + awk -F: '{ print $6 }' /etc/passwd | while read -r dir + do + if [ ! -h "$dir/.forward" ] && [ -f "$dir/.forward" ]; then + echo ".forward file $dir/.forward exists" + fi + done + #### 6.2.12 Ensure no users have .netrc files #### + awk -F: '{ print $6 }' /etc/passwd | while read -r dir + do + if [ ! -h "$dir/.netrc" ] && [ -f "$dir/.netrc" ]; then + echo ".netrc file $dir/.netrc exists" + fi + done + #### Ensure users' .netrc Files are not group or world accessible #### + grep -Ev '(root|halt|sync|shutdown)' /etc/passwd | awk -F: '($7 != "/sbin/nologin") { print $6 }' | while read -r dir + do + for file in $dir/.netrc + do + if [ ! -h "$file" ] && [ -f "$file" ]; then + # shellcheck disable=SC2012 + fileperm=$(ls -ld "$file" | cut -f1 -d" ") + if [ "$(echo "$fileperm" | cut -c5 )" != "-" ]; then + echo "Group Read set on $file" + fi + if [ "$(echo "$fileperm" | cut -c6 )" != "-" ]; then + echo "Group Write set on $file" + fi + if [ "$(echo "$fileperm" | cut -c7 )" != "-" ]; then + echo "Group Execute set on $file" + fi + if [ "$(echo "$fileperm" | cut -c8 )" != "-" ]; then + echo "Other Read set on $file" + fi + if [ "$(echo "$fileperm" | cut -c9 )" != "-" ]; then + echo "Other Write set on $file" + fi + if [ "$(echo "$fileperm" | cut -c10 )" != "-" ]; then + echo "Other Execute set on $file" + fi + fi + done + done + #### Ensure no users have .rhosts files #### + grep -Ev '(root|halt|sync|shutdown)' /etc/passwd | awk -F: '($7 != "/sbin/nologin") { print $6 }' | while read -r dir + do + for file in $dir/.rhosts; do + if [ ! -h "$file" ] && [ -f "$file" ]; then + echo ".rhosts file in $dir" + fi + done + done + stop_spinner $? + } | tee -a $LOG +} + +######################################################## +#### 6.2 User and Group Settings Continued - Part 3 #### +######################################################## +function group_gid_uid() { + { + start_spinner 'Checking that all Group and UserIDs are valid...' + echo "" + #### Ensure all groups in etc/passwd exist in /etc/group #### + cut -s -d: -f4 /etc/passwd | sort -u | while read -r i + do + if ! grep -q -P "^.*?:x:$i:" /etc/group; then + echo "Group $i is referenced by /etc/passwd but does not exist in /etc/group" + fi + done + #### Ensure no duplicate UIDs exist #### + cut -f3 -d":" /etc/passwd | sort -n | uniq -c | while read -r x + do + [ -z "${x}" ] && break + # shellcheck disable=SC2086 + set - ${x} + if [ "$1" -gt 1 ]; then + users=$(awk -F: '($3 == n) { print $1 }' n="$2" /etc/passwd | xargs) + echo "Duplicate UID ($2): ${users}" + fi + done + #### 6.2.17 Ensure no duplicate GIDs exist #### + cut -f3 -d":" /etc/group | sort -n | uniq -c | while read -r x + do + [ -z "${x}" ] && break + # shellcheck disable=SC2086 + set - ${x} + if [ "$1" -gt 1 ]; then + grps=$(gawk -F: '($3 == n) { print $1 }' n="$2" /etc/group | xargs) + echo "Duplicate GID ($2): ${grps}" >> ${LOG} 2>&1 + fi + done + #### 6.2.18 Ensure no duplicate user names exist #### + cut -f1 -d":" /etc/passwd | sort -n | /usr/bin/uniq -c | while read -r x + do + [ -z "${x}" ] && break + # shellcheck disable=SC2086 + set - ${x} + if [ "$1" -gt 1 ]; then + uids=$(gawk -F: '($1 == n) { print $3 }' n="$2" /etc/passwd | xargs) + echo "Duplicate User Name ($2): ${uids}" + fi + done + #### 6.2.19 Ensure no duplicate group names exist #### + cut -f1 -d":" /etc/group | sort -n | uniq -c | while read -r x + do + [ -z "${x}" ] && break + set - "${x}" + if [ "$1" -gt 1 ]; then + gids=$(gawk -F: '($1 == n) { print $3 }' n="$2" /etc/group | xargs) + echo "Duplicate Group Name ($2): ${gids}" + fi + done + stop_spinner $? + } | tee -a $LOG +} + +######################################### +#### Auto Unattended Security Upates #### +######################################### +function auto_updates() { + { + start_spinner 'Configuring Auto Security Updates...' + echo "" + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + if [ "${OSVER}" = 7 ]; then + ${PAKMGR} install yum-cron + sed -i 's/update_cmd = default/update_cmd = security/g' /etc/yum/yum-cron.conf + sed -i 's/apply_updates = no/apply_updates = yes/g' /etc/yum/yum-cron.conf + sed -i 's/download_updates = no/download_updates = yes/g' /etc/yum/yum-cron-hourly.conf + systemctl enable yum-cron + systemctl start yum-cron + fi + if [ "${OSVER}" = 8 ]; then + ${PAKMGR} install dnf-automatic + sed -i 's/upgrade_type = default/upgrade_type = security/g' /etc/dnf/automatic.conf + sed -i 's/apply_updates = no/apply_updates = yes/g' /etc/dnf/automatic.conf + systemctl enable --now dnf-automatic.timer + fi + elif [ "${OS}" = ubuntu ]; then + ${PAKMGR} install unattended-upgrades apticron + touch /etc/apt/apt.conf.d/20auto-upgrades + no_show << EOF > /etc/apt/apt.conf.d/20auto-upgrades + APT::Periodic::Update-Package-Lists "1"; + APT::Periodic::Download-Upgradeable-Packages "1"; + APT::Periodic::AutocleanInterval "7"; + APT::Periodic::Unattended-Upgrade "1"; +EOF + sed -i 's/\/\/Unattended-Upgrade\:\:Mail "root";/Unattended-Upgrade\:\:Mail "root";/g' /etc/apt/apt.conf.d/50unattended-upgrades + fi + stop_spinner $? + } | tee -a $LOG +} + +####################################################### +#### Install SysStat Redhat/CentOS 7 and 8, Ubuntu #### +####################################################### +function install_sysstat() { + { + start_spinner 'Installing and Configuring SysStat...' + echo "" + ${PAKMGR} install sysstat + if [ "${OS}" = ubuntu ]; then + sed -i 's/ENABLED="false"/ENABLED="true"/g' /etc/default/sysstat + no_show << EOF > /etc/cron.d/sysstat + # The first element of the path is a directory where the debian-sa1 + # script is located + PATH=/usr/lib/sysstat:/usr/sbin:/usr/sbin:/usr/bin:/sbin:/bin + + # Activity reports every 10 minutes everyday + 5-55/10 * * * * root command -v debian-sa1 > /dev/null && debian-sa1 1 1 + + # Additional run at 23:59 to rotate the statistics file + 59 23 * * * root command -v debian-sa1 > /dev/null && debian-sa1 60 2 +EOF + else + if [ ! -d /var/log/sa ]; then + mkdir /var/log/sa + fi + fi + systemctl enable sysstat + systemctl start sysstat + stop_spinner $? + } | tee -a $LOG +} + +############################################################## +#### Install RootKit Hunter Redhat/CentOS 7 and 8, Ubuntu #### +############################################################## +function install_rkhunter() { + { + start_spinner 'Installing and Configuring RKHunter...' + echo "" + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + ${PAKMGR} install epel-release + ${PAKMGR} install rkhunter + elif [ "${OS}" = ol ]; then + if [ "${OSVER}" = 7 ]; then + ${PAKMGR} install oracle-epel-release-el7 + sed -i 's/enabled=0/enabled=1/g' /etc/yum.repos.d/oracle-epel-ol7.repo + fi + if [ "${OSVER}" = 8 ]; then + ${PAKMGR} install oracle-epel-release-el8 + sed -i 's/enabled=0/enabled=1/g' /etc/yum.repos.d/oracle-epel-ol8.repo + fi + ${PAKMGR} install rkhunter + elif [ "${OS}" = ubuntu ]; then + debconf-set-selections <<< ""postfix postfix/mailname string "${HOSTNAME}""" + debconf-set-selections <<< "postfix postfix/main_mailer_type string 'Local Only'" + DEBIAN_FRONTEND=noninteractive ${PAKMGR} install rkhunter >> ${LOG} 2>&1 + fi + rkhunter --update + rkhunter --propupd + sed -i 's/ALLOW_SSH_ROOT_USER=unset/ALLOW_SSH_ROOT_USER=no/g' /etc/rkhunter.conf + stop_spinner $? + } | tee -a $LOG +} + +################################################### +#### Install LMD Redhat/CentOS 7 and 8, Ubuntu #### +################################################### +function install_lmd() { + { + start_spinner 'Installing and Configuring MalDetect...' + echo "" + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + ${PAKMGR} install epel-release + ${PAKMGR} install mailx inotify-tools tar wget + elif [ "${OS}" = ol ]; then + if [ "${OSVER}" = 7 ]; then + ${PAKMGR} install oracle-epel-release-el7 + sed -i 's/enabled=0/enabled=1/g' /etc/yum.repos.d/oracle-epel-ol7.repo + fi + if [ "${OSVER}" = 8 ]; then + ${PAKMGR} install oracle-epel-release-el8 + sed -i 's/enabled=0/enabled=1/g' /etc/yum.repos.d/oracle-epel-ol8.repo + fi + ${PAKMGR} install mailx inotify-tools tar wget + elif [ "${OS}" = ubuntu ]; then + export DEBIAN_FRONTEND=noninteractive + ${PAKMGR} install inotify-tools wget + fi + wget http://www.rfxn.com/downloads/maldetect-current.tar.gz + tar -xvzf maldetect-current.tar.gz + cd maldetect-1* || return $? + ./install.sh + cd .. || return $? + rm -rf maldetect-* + if [ "${OS}" = ubuntu ]; then + ln -s /usr/local/maldetect/maldet /bin/maldet + hash -r + fi + sed -i 's/email_alert="0"/email_alert="1"/g' /usr/local/maldetect/conf.maldet + sed -i 's/email_addr="you@domain.com"/email_addr="root@localhost"/g' /usr/local/maldetect/conf.maldet + sed -i 's/quarantine_hits="0"/quarantine_hits="1"/g' /usr/local/maldetect/conf.maldet + sed -i 's/quarantine_clean="0"/quarantine_clean="1"/g' /usr/local/maldetect/conf.maldet + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol ]]; then + ${PAKMGR} install clamav clamav-devel + elif [ "${OS}" = ubuntu ]; then + export DEBIAN_FRONTEND=noninteractive + ${PAKMGR} install clamav clamav-daemon clamdscan clamav-freshclam + fi + freshclam + stop_spinner $? + } | tee -a $LOG +} + +########################## +#### Install Logwatch #### +########################## +function install_logwatch() { + { + start_spinner 'Installing and Configuring LogWatch...' + ${PAKMGR} install logwatch + LOG_ZZ=/usr/share/logwatch/default.conf/services/zz-disk_space.conf + # shellcheck disable=SC2016 + sed -i 's/#$show_home_dir_sizes = 1/$show_home_dir_sizes = 1/g' $LOG_ZZ + # shellcheck disable=SC2016 + sed -i 's/#$home_dir = "\/home"/$home_dir = "\/home"/g' $LOG_ZZ + # shellcheck disable=SC2016 + sed -i 's/#$show_mail_dir_sizes = 1/#$show_mail_dir_sizes = 1/g' $LOG_ZZ + # shellcheck disable=SC2016 + sed -i 's/#$mail_dir = "\/var\/spool\/mail/$mail_dir = "\/var\/spool\/mail/g' $LOG_ZZ + # shellcheck disable=SC2016 + sed -i 's/#$show_disk_usage = 1/$show_disk_usage = 1/g' $LOG_ZZ + # shellcheck disable=SC2016 + sed -i 's/$HTTP_IGNORE_ERROR_HACKS = 0/$HTTP_IGNORE_ERROR_HACKS = 1/g' /usr/share/logwatch/default.conf/services/http.conf + sed -i 's/Detail = Low/Detail = Med/g' /usr/share/logwatch/default.conf/logwatch.conf + stop_spinner $? + } | tee -a $LOG +} + +############################### +#### Oracle EBS PreInstall #### +############################### +function oci_oracle_ebs_setup() { + { + start_spinner 'Configuring Server for Oracle EBS/WebLogic...' + if [ "${SRVTYPE}" != 3 ]; then + if [[ ${OS} = centos || ${OS} = rhel || ${OS} = ol || ${OS} = rocky || ${OS} = almalinux ]]; then + ${PAKMGR} install oracle-ebs-server-R12-preinstall openmotif21 + oci-network-config -X ens3 + sed -i 's/PRESERVE_HOSTINFO=0/PRESERVE_HOSTINFO=2/g' /etc/oci-hostname.conf + groupadd dba + groupadd dev + touch /etc/oraInst.loc + chmod 600 /etc/oraInst.loc + chown applmgr. /etc/oraInst.loc + elif [ "${OS}" = ubuntu ]; then + echo "" + echo -e "\e[7m**** !EBS PreInstall for Ubuntu is not supported! ****\e[0m" + echo "" + fi + fi + if [ "${SRVTYPE}" == 3 ]; then + oci-network-config -X ens3 + sed -i 's/PRESERVE_HOSTINFO=0/PRESERVE_HOSTINFO=2/g' /etc/oci-hostname.conf + fi + } | tee -a $LOG +} + +######################## +#### Function Calls #### +######################## +function oci_rh_ub_common() { + { + check_root + backup + make_swap + time_set + disable_filesystems + tmp_directory + stickybit + gpgkeys + aide_install + sudo_changes + boot_load + core_dumps + sysctl_conf + pre_link + se_troubleshoot_mcs + unconf_daemons + se_linux + banners + inet_service + ntp_config + chrony_cfg + update_security + unsecure_services + mail_config + addon_inet_services + service_clients + tcp_wrappers + auto_updates + uncommon_protocols + iptables_config + auditd_accounting + rsyslog_service + journald_config + logfile_permissions + crond_enabled + compress_auditd + config_sshd + config_pam + accounts + config_users_permissions + audit_file_permissions + world_writable_files + user_group_settings + home_directories + dot_files + group_gid_uid + install_sysstat + install_rkhunter + install_lmd + install_logwatch + } +} + +################## +#### OCI Only #### +################## +function oci_only() { + { + oci_iptables + } +} + +################## +#### AWS Only #### +################## +function aws_only() { + { + aws_iptables + } +} + +warn_message \ No newline at end of file diff --git a/add-http-auth.sh b/add-http-auth.sh new file mode 100644 index 0000000..3a1f476 --- /dev/null +++ b/add-http-auth.sh @@ -0,0 +1,809 @@ +#!/bin/bash +################################################################################ +# Script Name: add-http-auth.sh +# Version: 3.0 +# Description: Add HTTP Basic Auth to Prometheus stack reverse proxies +# Supports both nginx and Apache — auto-detects which is in use. +# Uses non-destructive include snippets to preserve existing +# HTTPS/certbot configs. +# +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +# +# Supported Services: +# - Prometheus (port 9090) +# - Alertmanager (port 9093) +# - Mimir (port 9009) — optionally protects /api/v1/push +# - Loki (port 3100) — optionally protects /loki/api/v1/push +# +# Supported Web Servers: +# - nginx — inserts 'include' snippets into location blocks +# - Apache — inserts 'Include' snippets into blocks +# +# Usage: +# sudo ./add-http-auth.sh +# sudo ./add-http-auth.sh --remove +# sudo ./add-http-auth.sh --status +# +################################################################################ + +set -euo pipefail + +SCRIPT_VERSION="3.0" +BACKUP_DIR="/var/backups/http-auth" + +# Detected at runtime +WEB_SERVER="" # "nginx" or "apache" +CONFIG_DIR="" # where vhost configs live +SNIPPET_DIR="" # where auth snippets go +AUTH_DIR="" # where htpasswd files go +WEB_USER="" # www-data, nginx, apache, etc. +SERVICE_NAME="" # systemd service name + +# Service definitions: name|nginx_config|apache_config|port +SERVICES=( + "prometheus|prometheus.conf|prometheus.conf|9090" + "alertmanager|alerts.conf|alerts.conf|9093" + "mimir|mimir.conf|mimir.conf|9009" + "loki|loki.conf|loki.conf|3100" +) + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +show_usage() { + cat <&2 + exit 1 +} + +warn() { + echo "WARNING: $1" >&2 +} + +# Get the config filename for the current web server +get_config_file() { + local entry="$1" + local name nginx_conf apache_conf port + IFS='|' read -r name nginx_conf apache_conf port <<< "$entry" + if [ "$WEB_SERVER" = "nginx" ]; then + echo "$nginx_conf" + else + echo "$apache_conf" + fi +} + +get_service_name() { + local entry="$1" + IFS='|' read -r name _ _ _ <<< "$entry" + echo "$name" +} + +get_service_port() { + local entry="$1" + IFS='|' read -r _ _ _ port <<< "$entry" + echo "$port" +} + +# ============================================================================ +# WEB SERVER DETECTION +# ============================================================================ + +detect_web_server() { + local has_nginx=false + local has_apache=false + + if command -v nginx &>/dev/null && systemctl is-active --quiet nginx 2>/dev/null; then + has_nginx=true + fi + + if command -v apache2ctl &>/dev/null && systemctl is-active --quiet apache2 2>/dev/null; then + has_apache=true + elif command -v httpd &>/dev/null && systemctl is-active --quiet httpd 2>/dev/null; then + has_apache=true + fi + + if [ "$has_nginx" = true ] && [ "$has_apache" = true ]; then + echo "" + echo "Both nginx and Apache detected. Which are you using for reverse proxies?" + echo " 1) nginx" + echo " 2) Apache" + read -r -p "Select [1]: " choice + case "${choice:-1}" in + 2) WEB_SERVER="apache" ;; + *) WEB_SERVER="nginx" ;; + esac + elif [ "$has_nginx" = true ]; then + WEB_SERVER="nginx" + elif [ "$has_apache" = true ]; then + WEB_SERVER="apache" + else + die "Neither nginx nor Apache detected as running" + fi + + echo " Detected web server: ${WEB_SERVER}" +} + +# Set paths based on detected web server +configure_paths() { + if [ "$WEB_SERVER" = "nginx" ]; then + if [ -d "/etc/nginx/sites-available" ]; then + CONFIG_DIR="/etc/nginx/sites-available" + elif [ -d "/etc/nginx/conf.d" ]; then + CONFIG_DIR="/etc/nginx/conf.d" + else + die "nginx config directory not found" + fi + SNIPPET_DIR="/etc/nginx/snippets" + AUTH_DIR="/etc/nginx/auth" + SERVICE_NAME="nginx" + + if id "www-data" &>/dev/null; then + WEB_USER="www-data" + elif id "nginx" &>/dev/null; then + WEB_USER="nginx" + else + WEB_USER="root" + fi + else + # Apache + if [ -d "/etc/apache2/sites-available" ]; then + CONFIG_DIR="/etc/apache2/sites-available" + SNIPPET_DIR="/etc/apache2/conf-available" + SERVICE_NAME="apache2" + elif [ -d "/etc/httpd/conf.d" ]; then + CONFIG_DIR="/etc/httpd/conf.d" + SNIPPET_DIR="/etc/httpd/conf.d" + SERVICE_NAME="httpd" + else + die "Apache config directory not found" + fi + AUTH_DIR="/etc/httpd/auth" + [ -d "/etc/apache2" ] && AUTH_DIR="/etc/apache2/auth" + + if id "www-data" &>/dev/null; then + WEB_USER="www-data" + elif id "apache" &>/dev/null; then + WEB_USER="apache" + else + WEB_USER="root" + fi + fi +} + +# ============================================================================ +# HTTPS DETECTION +# ============================================================================ + +has_https() { + local config_file="$1" + if [ "$WEB_SERVER" = "nginx" ]; then + grep -qE 'listen\s+.*443\s+ssl' "$config_file" 2>/dev/null + else + grep -qE 'SSLEngine\s+on|/dev/null + fi +} + +# ============================================================================ +# AUTH SNIPPET CHECK +# ============================================================================ + +has_auth_snippet() { + local config_file="$1" + local service="$2" + if [ "$WEB_SERVER" = "nginx" ]; then + grep -qF "include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" 2>/dev/null + else + grep -qF "Include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" 2>/dev/null + fi +} + +# ============================================================================ +# SETUP FUNCTIONS +# ============================================================================ + +install_htpasswd() { + if command -v htpasswd &>/dev/null; then + return 0 + fi + + echo "Installing htpasswd..." + if command -v apt-get &>/dev/null; then + apt-get -y install apache2-utils + elif command -v dnf &>/dev/null; then + dnf -y install httpd-tools + elif command -v yum &>/dev/null; then + yum -y install httpd-tools + else + die "Cannot install htpasswd — install apache2-utils or httpd-tools manually" + fi +} + +backup_config() { + local config_file="$1" + local timestamp + timestamp=$(date +%F_%H%M%S) + local backup_path="${BACKUP_DIR}/${timestamp}" + + mkdir -p "$backup_path" + cp "$config_file" "$backup_path/" + echo " Backed up to ${backup_path}/$(basename "$config_file")" +} + +# ============================================================================ +# NGINX-SPECIFIC FUNCTIONS +# ============================================================================ + +nginx_create_snippet() { + local service="$1" + local display_name="$2" + + cat > "${SNIPPET_DIR}/auth-${service}.conf" < "$temp_file" + + mv "$temp_file" "$config_file" + echo " Inserted auth include into $(basename "$config_file")" +} + +nginx_insert_push_auth() { + local config_file="$1" + local service="$2" + + if grep -q "location.*/api/v1/push" "$config_file" && \ + ! grep -A2 "location.*/api/v1/push" "$config_file" | grep -qF "auth-${service}.conf"; then + local temp_file + temp_file=$(mktemp) + local include_line=" include ${SNIPPET_DIR}/auth-${service}.conf;" + awk -v inc="$include_line" ' + /location.*\/api\/v1\/push/ && !push_done { + print + print inc + push_done = 1 + next + } + { print } + ' "$config_file" > "$temp_file" + mv "$temp_file" "$config_file" + echo " Protected push endpoint with auth" + fi +} + +nginx_remove_auth() { + local config_file="$1" + local service="$2" + + local temp_file + temp_file=$(mktemp) + grep -vF "include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" > "$temp_file" + mv "$temp_file" "$config_file" +} + +nginx_test_config() { + nginx -t 2>&1 +} + +# ============================================================================ +# APACHE-SPECIFIC FUNCTIONS +# ============================================================================ + +apache_create_snippet() { + local service="$1" + local display_name="$2" + + cat > "${SNIPPET_DIR}/auth-${service}.conf" < or + local temp_file + temp_file=$(mktemp) + + if grep -qE '' "$config_file"; then + # Insert after opening tag + awk -v inc="$include_line" ' + // && !done { + print + print inc + done = 1 + next + } + { print } + ' "$config_file" > "$temp_file" + elif grep -qE ' before the first ProxyPass + awk -v inc="$include_line" -v sdir="${SNIPPET_DIR}" -v svc="$service" ' + /ProxyPass\s/ && !done { + # Add a Location block with auth before ProxyPass + print " " + print inc + print " " + print "" + done = 1 + } + { print } + ' "$config_file" > "$temp_file" + else + # No Location or Proxy block found — add a Location block before + awk -v inc="$include_line" ' + /<\/VirtualHost>/ && !done { + print "" + print " " + print inc + print " " + print "" + done = 1 + } + { print } + ' "$config_file" > "$temp_file" + fi + + mv "$temp_file" "$config_file" + echo " Inserted auth into $(basename "$config_file")" +} + +apache_insert_push_auth() { + local config_file="$1" + local service="$2" + local push_path="" + + if [ "$service" = "mimir" ]; then + push_path="/api/v1/push" + elif [ "$service" = "loki" ]; then + push_path="/loki/api/v1/push" + else + return 0 + fi + + # Check if there's already a Location block for the push path + if grep -qF "$push_path" "$config_file" && \ + ! grep -A3 "$push_path" "$config_file" | grep -qF "auth-${service}.conf"; then + backup_config "$config_file" + local temp_file + temp_file=$(mktemp) + local include_line=" Include ${SNIPPET_DIR}/auth-${service}.conf" + awk -v path="$push_path" -v inc="$include_line" ' + $0 ~ path && /Location/ && !push_done { + print + print inc + push_done = 1 + next + } + { print } + ' "$config_file" > "$temp_file" + mv "$temp_file" "$config_file" + echo " Protected push endpoint with auth" + fi +} + +apache_remove_auth() { + local config_file="$1" + local service="$2" + + local temp_file + temp_file=$(mktemp) + grep -vF "Include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" > "$temp_file" + mv "$temp_file" "$config_file" +} + +apache_test_config() { + if command -v apache2ctl &>/dev/null; then + apache2ctl configtest 2>&1 + else + httpd -t 2>&1 + fi +} + +# ============================================================================ +# GENERIC WRAPPERS (dispatch to nginx or apache functions) +# ============================================================================ + +create_snippet() { + if [ "$WEB_SERVER" = "nginx" ]; then + nginx_create_snippet "$@" + else + apache_create_snippet "$@" + fi +} + +insert_auth() { + if [ "$WEB_SERVER" = "nginx" ]; then + nginx_insert_auth "$@" + else + apache_insert_auth "$@" + fi +} + +insert_push_auth() { + if [ "$WEB_SERVER" = "nginx" ]; then + nginx_insert_push_auth "$@" + else + apache_insert_push_auth "$@" + fi +} + +remove_auth_from_config() { + if [ "$WEB_SERVER" = "nginx" ]; then + nginx_remove_auth "$@" + else + apache_remove_auth "$@" + fi +} + +test_config() { + if [ "$WEB_SERVER" = "nginx" ]; then + nginx_test_config + else + apache_test_config + fi +} + +# ============================================================================ +# STATUS & REMOVE +# ============================================================================ + +show_status() { + detect_web_server + configure_paths + + echo "" + echo "==========================================" + echo "HTTP Basic Auth Status (${WEB_SERVER})" + echo "==========================================" + echo "" + + for entry in "${SERVICES[@]}"; do + local name config_file + name=$(get_service_name "$entry") + config_file=$(get_config_file "$entry") + local display_name + display_name="${name^}" + local full_path="${CONFIG_DIR}/${config_file}" + + printf " %-14s " "${display_name}:" + + if [ ! -f "$full_path" ]; then + echo "no config found" + continue + fi + + if has_auth_snippet "$full_path" "$name"; then + if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then + echo "ENABLED (htpasswd + snippet)" + else + echo "BROKEN (snippet exists but htpasswd file missing)" + fi + else + echo "not configured" + fi + done + + echo "" + echo "Web server: ${WEB_SERVER}" + echo "Config dir: ${CONFIG_DIR}" + echo "Snippet dir: ${SNIPPET_DIR}" + echo "Auth dir: ${AUTH_DIR}" + echo "Backup dir: ${BACKUP_DIR}" + echo "" +} + +do_remove() { + detect_web_server + configure_paths + + echo "" + echo "Removing HTTP Basic Auth from all services (${WEB_SERVER})..." + echo "" + + for entry in "${SERVICES[@]}"; do + local name config_file + name=$(get_service_name "$entry") + config_file=$(get_config_file "$entry") + local full_path="${CONFIG_DIR}/${config_file}" + + if [ ! -f "$full_path" ]; then + continue + fi + + if has_auth_snippet "$full_path" "$name"; then + backup_config "$full_path" + remove_auth_from_config "$full_path" "$name" + echo " Removed auth from ${config_file}" + fi + + rm -f "${SNIPPET_DIR}/auth-${name}.conf" + done + + echo "" + echo "Testing ${WEB_SERVER} configuration..." + if test_config; then + systemctl reload "$SERVICE_NAME" + echo "" + echo "Auth removed and ${WEB_SERVER} reloaded." + else + warn "${WEB_SERVER} config test failed — check your config manually" + fi +} + +# ============================================================================ +# MAIN SETUP +# ============================================================================ + +setup_auth() { + detect_web_server + configure_paths + + echo "" + echo "==========================================" + echo "Add HTTP Basic Auth to Prometheus Stack" + echo "Version: ${SCRIPT_VERSION} (${WEB_SERVER})" + echo "==========================================" + + # Check for HTTPS + local has_any_https=false + for entry in "${SERVICES[@]}"; do + local name config_file + name=$(get_service_name "$entry") + config_file=$(get_config_file "$entry") + local full_path="${CONFIG_DIR}/${config_file}" + if [ -f "$full_path" ] && has_https "$full_path"; then + has_any_https=true + break + fi + done + + if [ "$has_any_https" = false ]; then + echo "" + warn "No HTTPS configuration detected!" + echo " Basic Auth over HTTP sends credentials in cleartext." + echo " Strongly recommended: run certbot first to enable HTTPS." + echo "" + read -r -p "Continue without HTTPS? [y/N]: " confirm + if [[ ! "$confirm" =~ ^[Yy]$ ]]; then + echo "Aborted. Run certbot first, then re-run this script." + exit 0 + fi + fi + + # Detect which services have configs + echo "" + echo "Detected services:" + local found_any=false + for entry in "${SERVICES[@]}"; do + local name config_file + name=$(get_service_name "$entry") + config_file=$(get_config_file "$entry") + local full_path="${CONFIG_DIR}/${config_file}" + if [ -f "$full_path" ]; then + local https_status="HTTP" + has_https "$full_path" && https_status="HTTPS" + echo " ✓ ${name} (${config_file}) [${https_status}]" + found_any=true + fi + done + + if [ "$found_any" = false ]; then + die "No service configs found in ${CONFIG_DIR}. Set up ${WEB_SERVER} reverse proxies first." + fi + + echo "" + + # Ask about push endpoint protection + local protect_push=false + echo "Mimir and Loki have push endpoints used by remote agents." + echo "Protecting them requires configuring credentials in Prometheus/Alloy." + read -r -p "Protect push endpoints with auth too? [y/N]: " push_confirm + if [[ "$push_confirm" =~ ^[Yy]$ ]]; then + protect_push=true + fi + + # Ask about shared vs per-service credentials + local shared_creds=false + local shared_htpasswd="" + echo "" + echo "Credential mode:" + echo " 1) Same username/password for all services" + echo " 2) Different credentials per service" + read -r -p "Select [1]: " cred_mode + if [[ "${cred_mode:-1}" != "2" ]]; then + shared_creds=true + read -r -p "Username for all services [admin]: " shared_user + shared_user=${shared_user:-admin} + # Create a temporary shared htpasswd file — will be copied per service + shared_htpasswd=$(mktemp) + htpasswd -c "$shared_htpasswd" "$shared_user" + fi + + # Create directories + mkdir -p "$AUTH_DIR" "$SNIPPET_DIR" "$BACKUP_DIR" + + echo "" + + # Set up auth for each detected service + for entry in "${SERVICES[@]}"; do + local name config_file port + name=$(get_service_name "$entry") + config_file=$(get_config_file "$entry") + port=$(get_service_port "$entry") + local full_path="${CONFIG_DIR}/${config_file}" + + if [ ! -f "$full_path" ]; then + continue + fi + + local display_name + display_name="${name^}" + + echo "--- ${display_name} ---" + + # Create htpasswd file + if [ "$shared_creds" = true ]; then + if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then + read -r -p " htpasswd file exists. Overwrite with shared credentials? [Y/n]: " overwrite + if [[ "$overwrite" =~ ^[Nn]$ ]]; then + echo " Keeping existing htpasswd" + else + cp "$shared_htpasswd" "${AUTH_DIR}/.htpasswd-${name}" + echo " Using shared credentials" + fi + else + cp "$shared_htpasswd" "${AUTH_DIR}/.htpasswd-${name}" + echo " Using shared credentials" + fi + else + if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then + read -r -p " htpasswd file exists. Recreate? [y/N]: " recreate + if [[ ! "$recreate" =~ ^[Yy]$ ]]; then + echo " Keeping existing htpasswd" + else + read -r -p " Username [admin]: " username + username=${username:-admin} + htpasswd -c "${AUTH_DIR}/.htpasswd-${name}" "$username" + fi + else + read -r -p " Username [admin]: " username + username=${username:-admin} + htpasswd -c "${AUTH_DIR}/.htpasswd-${name}" "$username" + fi + fi + + # Create auth snippet + create_snippet "$name" "$display_name" + + # Insert into main location/proxy block + insert_auth "$full_path" "$name" + + # Handle push endpoints for Mimir and Loki + if [[ "$name" == "mimir" ]] || [[ "$name" == "loki" ]]; then + if [ "$protect_push" = true ]; then + insert_push_auth "$full_path" "$name" + else + echo " ⚠ Push endpoint left open — consider IP restrictions" + fi + fi + + echo "" + done + + # Clean up shared temp file + [ -n "$shared_htpasswd" ] && rm -f "$shared_htpasswd" + + # Set permissions on htpasswd files + chmod 640 "${AUTH_DIR}"/.htpasswd-* 2>/dev/null || true + chown "root:${WEB_USER}" "${AUTH_DIR}"/.htpasswd-* 2>/dev/null || true + + # Test and reload + echo "Testing ${WEB_SERVER} configuration..." + if test_config; then + systemctl reload "$SERVICE_NAME" + echo "" + echo "==========================================" + echo "HTTP Basic Auth Successfully Configured!" + echo "==========================================" + echo "" + echo "Web server: ${WEB_SERVER}" + echo "Backups: ${BACKUP_DIR}" + echo "" + echo "To remove auth later: $0 --remove" + echo "To check status: $0 --status" + else + echo "" + echo "${WEB_SERVER} configuration test FAILED!" + echo "Your backups are in ${BACKUP_DIR} — restore manually if needed." + exit 1 + fi +} + +# ============================================================================ +# MAIN +# ============================================================================ + +main() { + if [[ $EUID -ne 0 ]]; then + die "This script must be run as root" + fi + + case "${1:-}" in + -h|--help) show_usage ;; + --remove) do_remove ;; + --status) show_status ;; + *) + install_htpasswd + setup_auth + ;; + esac +} + +main "$@" diff --git a/auto_pkg_update.sh b/auto_pkg_update.sh new file mode 100644 index 0000000..7dded21 --- /dev/null +++ b/auto_pkg_update.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +###################################################################################### +#### Version 2.01 #### +#### For questions or comments contact@mylinux.work #### +#### Author : Phil Connor #### +#### #### +#### Notes : #### +#### This script is a simple "helper" to configure Auto Updates on linux #### +#### servers. #### +#### #### +#### Use this script at your OWN risk. There is no guarantee whatsoever. #### +#### #### +#### Usage "tuning.sh" or "tuning.sh ssd" if you are running on ssd'd #### +###################################################################################### + +########################### +#### System Variables #### +########################### +OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]') +OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}') +aptcnf="/etc/apt/apt.conf.d" +dnfcnf="/etc/dnf/automatic.conf" +yum6cnf="/etc/sysconfig/yum-cron" +yum7cnf="/etc/yum/yum-cron.conf" + +################################### +#### Copy to EOF file function #### +################################### +function no_show() { + { + expand | awk 'NR == 1 {match($0, /^ */); l = RLENGTH + 1} + {print substr($0, l)}' + } +} + +########################################################### +#### Detect Package Manger from OS and OSVer Variables #### +########################################################### +if [ "${OS}" = ubuntu ]; then + PAKMGR="apt-get -y" +elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if [ "${OSVER}" = 7 ]; then + PAKMGR="yum -y" + fi + if [ "${OSVER}" = 8 ]; then + PAKMGR="dnf -y" + fi +fi + +##################################### +#### Install Auto Update Service #### +##################################### +if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if [ "${OSVER}" = 6 ] || [ "${OSVER}" = 7 ]; then + ${PAKMGR} update + ${PAKMGR} install yum-cron + if [ "${OSVER}" = 6 ]; then + chkconfig yum-cron on + chkconfig yum-updatesd off + service yum-updatesd stop + #echo 'exclude= http php* kernel*' >> /etc/yum.conf # <-- If you need to add exclude package from updating + #sed -i 's/YUM_PARAMETER=""/YUM_PARAMETER="-x http -x php* -x kernel*"/g' >> $yum6cnf # <-- If you need to add exclude package from updating + sed -i 's/CHECK_ONLY=yes/CHECK_ONLY=no/g' $yum6cnf + sed -i 's/DOWNLOAD_ONLY=yes/DOWNLOAD_ONLY=no/g' $yum6cnf + sed -i 's/MAILTO=/MAILTO=root/g' $yum6cnf + service yum-cron start + fi + if [ "${OSVER}" = 7 ]; then + sed -i 's/update_cmd = default/update_cmd = security/g' $yum7cnf #<-- comment this out for ALL available upgrades + sed -i 's/apply_updates = no/apply_updates = yes/g' $yum7cnf + sed -i 's/download_updates = no/download_updates = yes/g' $yum7cnf + systemctl enable --nom yum-cron + fi + fi + if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + ${PAKMGR} update + ${PAKMGR} install dnf-automatic + sed -i 's/upgrade_type = default/upgrade_type = security/g' $dnfcnf #<-- comment this out for ALL available upgrades + sed -i 's/apply_updates = no/apply_updates = yes/g' $dnfcnf + systemctl enable --now dnf-automatic.timer + fi +elif [ "${OS}" = ubuntu ]; then + ${PAKMGR} upgrade + ${PAKMGR} install unattended-upgrades apticron + touch $aptcnf/20auto-upgrades + no_show << EOF > $aptcnf/20auto-upgrades + APT::Periodic::Update-Package-Lists "1"; + APT::Periodic::Download-Upgradeable-Packages "1"; + APT::Periodic::AutocleanInterval "7"; + APT::Periodic::Unattended-Upgrade "1"; +EOF + sed -i 's/\/\/Unattended-Upgrade\:\:Mail "root";/Unattended-Upgrade\:\:Mail "root";/g' $aptcnf/50unattended-upgrades +fi diff --git a/backup-status-exporter.sh b/backup-status-exporter.sh new file mode 100755 index 0000000..d36fe01 --- /dev/null +++ b/backup-status-exporter.sh @@ -0,0 +1,452 @@ +#!/bin/bash +################################################################################ +# Script Name: backup-status-exporter.sh +# Version: 1.0 +# Description: Prometheus textfile collector exporter for backup job status +# Monitors backup age, size, and success/failure from multiple +# sources including timestamp files, log files, and directories +# +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +# Date: 2026-03-03 +# +# Prerequisites: +# - node_exporter with textfile collector enabled +# - /var/lib/node_exporter directory exists +# - Config file at /etc/backup-status-exporter.conf +# +# Usage: +# # Run with default config +# sudo ./backup-status-exporter.sh +# +# # Dry run (output to stdout) +# ./backup-status-exporter.sh --dry-run +# +# # Debug mode +# DEBUG=1 sudo ./backup-status-exporter.sh +# +# Config Format (pipe-delimited, one job per line): +# job_name|type|path|max_age_hours +# +# Types: +# directory - find newest file in directory, report mtime and size +# statusfile - read unix timestamp of last success from a file +# logfile - grep for success/failure patterns in a log file +# +# Metrics Exported: +# - linux_backup_last_success_timestamp{job} - Unix timestamp of last backup +# - linux_backup_age_hours{job} - Hours since last backup +# - linux_backup_size_bytes{job} - Size of last backup in bytes +# - linux_backup_status{job} - 1=ok, 0=stale/failed +# +################################################################################ + +set -o pipefail + +# ============================================================================ +# CONFIGURATION +# ============================================================================ + +readonly VERSION="1.0" +readonly SCRIPT_NAME="${0##*/}" +readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}" +readonly OUTPUT_FILE="${TEXTFILE_DIR}/backup_status.prom" +readonly CONFIG_FILE="${CONFIG_FILE:-/etc/backup-status-exporter.conf}" +readonly TMP_FILE="${OUTPUT_FILE}.$$" + +# Runtime flags +DRY_RUN=false +DEBUG=${DEBUG:-} + +# Log success patterns (case-insensitive grep) +readonly SUCCESS_PATTERNS="(completed successfully|backup successful|backup finished|success|completed without error)" +readonly FAILURE_PATTERNS="(failed|error|fatal|backup failed|aborted)" + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +log_error() { + echo "[ERROR] $*" >&2 +} + +cleanup() { + rm -f "$TMP_FILE" +} + +trap cleanup EXIT + +show_help() { + cat </dev/null | sort -rn | head -1) + + if [[ -z "$newest_file" ]]; then + debug_echo "[$job_name] No files found in: $path" + echo "0|0|0|0" + return + fi + + local file_epoch + file_epoch=$(echo "$newest_file" | awk '{printf "%.0f", $1}') + local file_size + file_size=$(echo "$newest_file" | awk '{print $2}') + local file_path + file_path=$(echo "$newest_file" | awk '{$1=""; $2=""; print}' | sed 's/^ //') + + local now + now=$(date +%s) + local age_seconds=$((now - file_epoch)) + local age_hours + age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}") + + local max_age_seconds=$((max_age_hours * 3600)) + local status=1 + if [[ "$age_seconds" -gt "$max_age_seconds" ]]; then + status=0 + fi + + debug_echo "[$job_name] Newest file: $file_path (age=${age_hours}h, size=${file_size}B, status=$status)" + echo "${file_epoch}|${age_hours}|${file_size}|${status}" +} + +check_statusfile() { + local job_name="$1" + local path="$2" + local max_age_hours="$3" + + if [[ ! -f "$path" ]]; then + debug_echo "[$job_name] Status file not found: $path" + echo "0|0|0|0" + return + fi + + local timestamp + timestamp=$(head -1 "$path" 2>/dev/null) + timestamp="${timestamp//[[:space:]]/}" + + if [[ -z "$timestamp" ]] || ! [[ "$timestamp" =~ ^[0-9]+$ ]]; then + debug_echo "[$job_name] Invalid timestamp in status file: $path" + echo "0|0|0|0" + return + fi + + local now + now=$(date +%s) + local age_seconds=$((now - timestamp)) + local age_hours + age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}") + + # Status files don't have a meaningful size — report file size of the status file itself + local file_size + file_size=$(stat -c '%s' "$path" 2>/dev/null) || file_size=0 + + local max_age_seconds=$((max_age_hours * 3600)) + local status=1 + if [[ "$age_seconds" -gt "$max_age_seconds" ]]; then + status=0 + fi + + debug_echo "[$job_name] Status timestamp: $timestamp (age=${age_hours}h, status=$status)" + echo "${timestamp}|${age_hours}|${file_size}|${status}" +} + +check_logfile() { + local job_name="$1" + local path="$2" + local max_age_hours="$3" + + if [[ ! -f "$path" ]]; then + debug_echo "[$job_name] Log file not found: $path" + echo "0|0|0|0" + return + fi + + # Check for failure patterns first (most recent occurrence) + local last_failure + last_failure=$(grep -inE "$FAILURE_PATTERNS" "$path" 2>/dev/null | tail -1) || true + local last_success + last_success=$(grep -inE "$SUCCESS_PATTERNS" "$path" 2>/dev/null | tail -1) || true + + local failure_line=0 + local success_line=0 + + if [[ -n "$last_failure" ]]; then + failure_line=$(echo "$last_failure" | cut -d: -f1) + fi + if [[ -n "$last_success" ]]; then + success_line=$(echo "$last_success" | cut -d: -f1) + fi + + # Use the log file's mtime as the timestamp + local file_epoch + file_epoch=$(stat -c '%Y' "$path" 2>/dev/null) || file_epoch=0 + local file_size + file_size=$(stat -c '%s' "$path" 2>/dev/null) || file_size=0 + + local now + now=$(date +%s) + local age_seconds=$((now - file_epoch)) + local age_hours + age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}") + + local max_age_seconds=$((max_age_hours * 3600)) + + # Determine status: success if last success line is after last failure line + # and the log is not stale + local status=0 + if [[ "$success_line" -gt "$failure_line" ]] && [[ "$age_seconds" -le "$max_age_seconds" ]]; then + status=1 + fi + + if [[ "$success_line" -eq 0 ]] && [[ "$failure_line" -eq 0 ]]; then + debug_echo "[$job_name] No success or failure patterns found in: $path" + status=0 + fi + + debug_echo "[$job_name] Log file: $path (age=${age_hours}h, success_line=$success_line, failure_line=$failure_line, status=$status)" + echo "${file_epoch}|${age_hours}|${file_size}|${status}" +} + +# ============================================================================ +# METRICS COLLECTION +# ============================================================================ + +collect_metrics() { + local jobs=() + while IFS= read -r job_line; do + jobs+=("$job_line") + done < <(load_jobs) + + local output="" + local timestamps="" + local ages="" + local sizes="" + local statuses="" + + for job_line in "${jobs[@]}"; do + local job_name + job_name=$(echo "$job_line" | cut -d'|' -f1) + local job_type + job_type=$(echo "$job_line" | cut -d'|' -f2) + local job_path + job_path=$(echo "$job_line" | cut -d'|' -f3) + local max_age_hours + max_age_hours=$(echo "$job_line" | cut -d'|' -f4) + + if [[ -z "$job_name" ]] || [[ -z "$job_type" ]] || [[ -z "$job_path" ]] || [[ -z "$max_age_hours" ]]; then + log_error "Invalid config line: $job_line (expected: job_name|type|path|max_age_hours)" + continue + fi + + local result="" + case "$job_type" in + directory) + result=$(check_directory "$job_name" "$job_path" "$max_age_hours") + ;; + statusfile) + result=$(check_statusfile "$job_name" "$job_path" "$max_age_hours") + ;; + logfile) + result=$(check_logfile "$job_name" "$job_path" "$max_age_hours") + ;; + *) + log_error "Unknown job type '$job_type' for job '$job_name' (expected: directory, statusfile, logfile)" + continue + ;; + esac + + local ts + ts=$(echo "$result" | cut -d'|' -f1) + local age + age=$(echo "$result" | cut -d'|' -f2) + local size + size=$(echo "$result" | cut -d'|' -f3) + local st + st=$(echo "$result" | cut -d'|' -f4) + + timestamps+="linux_backup_last_success_timestamp{job=\"${job_name}\"} ${ts}\n" + ages+="linux_backup_age_hours{job=\"${job_name}\"} ${age}\n" + sizes+="linux_backup_size_bytes{job=\"${job_name}\"} ${size}\n" + statuses+="linux_backup_status{job=\"${job_name}\"} ${st}\n" + done + + output+="# HELP linux_backup_last_success_timestamp Unix timestamp of the last successful backup\n" + output+="# TYPE linux_backup_last_success_timestamp gauge\n" + output+="$timestamps" + output+="# HELP linux_backup_age_hours Hours since the last successful backup\n" + output+="# TYPE linux_backup_age_hours gauge\n" + output+="$ages" + output+="# HELP linux_backup_size_bytes Size of the last backup in bytes\n" + output+="# TYPE linux_backup_size_bytes gauge\n" + output+="$sizes" + output+="# HELP linux_backup_status Backup job status (1=ok, 0=stale or failed)\n" + output+="# TYPE linux_backup_status gauge\n" + output+="$statuses" + + printf '%b' "$output" +} + +# ============================================================================ +# OUTPUT +# ============================================================================ + +write_metrics() { + local metrics + metrics=$(collect_metrics) + + if [[ "$DRY_RUN" == "true" ]]; then + echo "$metrics" + return + fi + + if [[ ! -d "$TEXTFILE_DIR" ]]; then + log_error "Textfile collector directory does not exist: $TEXTFILE_DIR" + exit 1 + fi + + echo "$metrics" > "$TMP_FILE" + mv "$TMP_FILE" "$OUTPUT_FILE" + debug_echo "Metrics written to $OUTPUT_FILE" +} + +# ============================================================================ +# MAIN +# ============================================================================ + +main() { + while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) + DRY_RUN=true + shift + ;; + --debug) + DEBUG=1 + shift + ;; + --help|-h) + show_help + ;; + --version|-v) + show_version + ;; + *) + log_error "Unknown option: $1" + echo "Use --help for usage information" >&2 + exit 1 + ;; + esac + done + + write_metrics +} + +main "$@" diff --git a/cert-check.sh b/cert-check.sh new file mode 100755 index 0000000..56d18b5 --- /dev/null +++ b/cert-check.sh @@ -0,0 +1,428 @@ +#!/bin/bash + +################################################ +#### AD Certificate checker and renewal #### +#### for Amazon, Ubuntu and RedHat servers #### +#### #### +#### Author: Phil Connor #### +#### License: MIT #### +#### Contact: contact@mylinux.work #### +#### Version: 3.00-081425 #### +################################################ + +set -o pipefail + +SCRIPT_NAME=$(basename "$0") +readonly SCRIPT_NAME + +# Default configuration +readonly DEFAULT_PEM_PATH="/etc/pki/ca-trust/source/anchors/ad-cert.pem" +readonly DEFAULT_DAYS_THRESHOLD=30 +readonly DEFAULT_DOMAIN="example" +readonly DEFAULT_NODE_DIR="/var/lib/node_exporter" + +# Configuration variables (can be overridden by environment) +PEM_PATH=${PEM_PATH:-$DEFAULT_PEM_PATH} +DAYS_THRESHOLD=${DAYS_THRESHOLD:-$DEFAULT_DAYS_THRESHOLD} +DOMAIN=${DOMAIN:-$DEFAULT_DOMAIN} +NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR} +SERVER_TYPE=${SERVER_TYPE:-} +DEBUG=${DEBUG:-} + +# Runtime flags +MONITOR_ONLY=false +RENEW_ONLY=false + +handle_error() { + local exit_code=$1 + local line_number=$2 + echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2 + exit "$exit_code" +} + +trap 'handle_error $? $LINENO' ERR + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +show_help() { + cat << EOF +Usage: $SCRIPT_NAME [OPTIONS] + +SSL certificate checker and renewal script for Prometheus monitoring. + +OPTIONS: + --monitor Only generate Prometheus metrics (no renewal) + --renew Only handle certificate renewal (no monitoring) + --all Run both monitoring and renewal (default) + --help, -h Show this help message + +ENVIRONMENT VARIABLES: + PEM_PATH Path to certificate file (default: $DEFAULT_PEM_PATH) + DAYS_THRESHOLD Days before expiry to trigger renewal (default: $DEFAULT_DAYS_THRESHOLD) + DOMAIN Domain name (default: $DEFAULT_DOMAIN) + NODE_DIR Node exporter directory (default: $DEFAULT_NODE_DIR) + SERVER_TYPE Server type (artifactory, bitbucket, cloudaccess, jira) + DEBUG Enable debug output + +EXAMPLES: + $SCRIPT_NAME --monitor + SERVER_TYPE=bitbucket $SCRIPT_NAME --renew + DEBUG=1 $SCRIPT_NAME --all +EOF +} + +validate_certificate_file() { + local cert_file="$1" + + if [[ ! -f "$cert_file" ]]; then + debug_echo "Certificate file not found: $cert_file" + return 1 + fi + + if ! openssl x509 -noout -text -in "$cert_file" >/dev/null 2>&1; then + echo "Error: Invalid certificate file: $cert_file" >&2 + return 1 + fi + + return 0 +} + +download_certificate() { + local domain="$1" + local output_file="$2" + local server_url="us.${domain}.net:636" + + debug_echo "Downloading certificate from $server_url" + + if ! timeout 30 openssl s_client -connect "$server_url" -servername "us.${domain}.net" < /dev/null 2>/dev/null | \ + sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' > "$output_file"; then + echo "Error: Failed to download certificate from $server_url" >&2 + return 1 + fi + + return 0 +} + +calculate_certificate_dates() { + local cert_file="$1" + local -n days_left_ref=$2 + local -n days_gone_ref=$3 + + local beg_date end_date beg_sec end_sec now_sec + + beg_date=$(openssl x509 -noout -startdate -in "$cert_file") + end_date=$(openssl x509 -noout -enddate -in "$cert_file") + + beg_sec=$(date --date="${beg_date##*=}" +%s) + end_sec=$(date --date="${end_date##*=}" +%s) + now_sec=$(date +%s) + + days_gone_ref=$(( (now_sec - beg_sec) / 86400 )) + days_left_ref=$(( (end_sec - now_sec) / 86400 )) + + debug_echo "Certificate valid from $(date -d @"$beg_sec") to $(date -d @"$end_sec")" + debug_echo "Days gone: $days_gone_ref, Days left: $days_left_ref" +} + +generate_prometheus_metrics() { + local days_left="$1" + local days_gone="$2" + local output_file="$NODE_DIR/adcert_check.prom" + + debug_echo "Generating Prometheus metrics to $output_file" + + mkdir -p "$NODE_DIR" + + { + echo '# HELP linux_ad_cert_expire AD Certificate expiration days' + echo '# TYPE linux_ad_cert_expire gauge' + if [[ $days_left -lt 0 ]]; then + echo "linux_ad_cert_expire{status=\"expired\",days_gone=\"$days_gone\"} 0" + else + echo "linux_ad_cert_expire{status=\"valid\"} $days_left" + fi + } > "$output_file" +} + +get_keystore_password() { + local password_url="$1" + local storepass="" + + # Try Vault HTTP API first if URL provided + if [[ -n "$password_url" ]]; then + debug_echo "Retrieving keystore password from $password_url" + storepass=$(curl -sf -X GET "$password_url" 2>/dev/null | jq -r '.data.password // empty' 2>/dev/null || true) + fi + + # Fall back to Vault CLI + if [[ -z "$storepass" ]]; then + debug_echo "Falling back to Vault CLI for keystore password" + storepass=$(vault kv get -field=password secret/keystore 2>/dev/null || true) + fi + + # Fall back to default + if [[ -z "$storepass" ]]; then + debug_echo "Using default keystore password" + storepass="changeit" + fi + + echo "$storepass" +} + +execute_keytool_command() { + local java_bin="$1" + local keystore="$2" + local action="$3" + local cert_file="$4" + local password_url="$5" + + local storepass + storepass=$(get_keystore_password "$password_url") + + case "$action" in + "delete") + "$java_bin/keytool" -delete -alias ad -keystore "$keystore" -storepass "$storepass" 2>/dev/null || true + ;; + "import") + "$java_bin/keytool" -import -noprompt -alias ad -keystore "$keystore" -file "$cert_file" -storepass "$storepass" + ;; + esac +} + +handle_artifactory_renewal() { + local java_bin keystore + local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/artifactory/keytool" + + # Check app-specific paths first, then fall back to auto-detection + java_bin="/opt/jfrog/artifactory/app/third-party/java/bin" + keystore="/opt/jfrog/artifactory/app/third-party/java/lib/security/cacerts" + + if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then + debug_echo "Artifactory default paths not found, searching for Java" + if ! find_java_keystore java_bin keystore; then + echo "Error: Could not find Java keytool or keystore for Artifactory" >&2 + return 1 + fi + fi + + execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url" + execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url" + systemctl restart artifactory +} + +handle_bitbucket_renewal() { + local java_bin keystore + local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/bitbucket/keytool" + + # Check app-specific paths first, then fall back to auto-detection + java_bin="/mnt/ebs/bitbucket/8.19.3/jre/bin" + keystore="/mnt/ebs/bitbucket/8.19.3/jre/lib/security/cacerts" + + if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then + debug_echo "Bitbucket default paths not found, searching for Java" + if ! find_java_keystore java_bin keystore; then + echo "Error: Could not find Java keytool or keystore for Bitbucket" >&2 + return 1 + fi + fi + + if [[ -n "$DEBUG" ]]; then + debug_echo "Would execute: $java_bin/keytool -delete -alias ad -keystore $keystore" + debug_echo "Would execute: curl -X GET $vault_url" + debug_echo "Would execute: $java_bin/keytool -import -alias ad -keystore $keystore -file $PEM_PATH" + debug_echo "Would execute: systemctl restart atlbitbucket" + else + execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url" + execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url" + systemctl restart atlbitbucket + fi +} + +handle_cloudaccess_renewal() { + docker restart cloudaccess_server_ +} + +handle_jira_renewal() { + local java_bin keystore + local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/jira/keytool" + + # Check app-specific paths first, then fall back to auto-detection + java_bin="/mnt/ebs/jira/jre/bin" + keystore="/mnt/ebs/jira/jre/lib/security/cacerts" + + if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then + debug_echo "Jira default paths not found, searching for Java" + if ! find_java_keystore java_bin keystore; then + echo "Error: Could not find Java keytool or keystore for Jira" >&2 + return 1 + fi + fi + + execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url" + execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url" + systemctl restart jira +} + +find_java_keystore() { + local -n java_bin_ref=$1 + local -n keystore_ref=$2 + + # Common Java installation paths + local java_paths=( + "/opt/jfrog/artifactory/app/third-party/java" + "/mnt/ebs/bitbucket/*/jre" + "/mnt/ebs/jira/jre" + "/usr/lib/jvm/java-*-openjdk" + "/usr/lib/jvm/default-java" + "/opt/java" + "/usr/java/latest" + ) + + # Check JAVA_HOME first + if [[ -n "$JAVA_HOME" && -x "$JAVA_HOME/bin/keytool" ]]; then + java_bin_ref="$JAVA_HOME/bin" + keystore_ref="$JAVA_HOME/lib/security/cacerts" + if [[ -f "$keystore_ref" ]]; then + debug_echo "Found Java via JAVA_HOME: $java_bin_ref" + return 0 + fi + fi + + # Search common paths with glob expansion + for path_pattern in "${java_paths[@]}"; do + for java_dir in $path_pattern; do + if [[ -d "$java_dir" ]]; then + local bin_dir="$java_dir/bin" + local cacerts="$java_dir/lib/security/cacerts" + + if [[ -x "$bin_dir/keytool" && -f "$cacerts" ]]; then + java_bin_ref="$bin_dir" + keystore_ref="$cacerts" + debug_echo "Found Java at: $java_dir" + return 0 + fi + fi + done + done + + # Fallback: try system keytool + if command -v keytool >/dev/null 2>&1; then + java_bin_ref="$(dirname "$(command -v keytool)")" + # Try common system keystore locations + local system_keystores=( + "/etc/ssl/certs/java/cacerts" + "/usr/lib/jvm/default-java/lib/security/cacerts" + "/etc/pki/ca-trust/extracted/java/cacerts" + ) + for keystore in "${system_keystores[@]}"; do + if [[ -f "$keystore" ]]; then + keystore_ref="$keystore" + debug_echo "Found system Java at: $java_bin_ref" + return 0 + fi + done + fi + + return 1 +} + +handle_server_renewal() { + if [[ -z "$SERVER_TYPE" ]]; then + echo "Error: SERVER_TYPE environment variable must be set for renewal" >&2 + echo "Valid values: artifactory, bitbucket, cloudaccess, jira" >&2 + return 1 + fi + + debug_echo "Handling renewal for server type: $SERVER_TYPE" + + case "$SERVER_TYPE" in + "artifactory") handle_artifactory_renewal ;; + "bitbucket") handle_bitbucket_renewal ;; + "cloudaccess") handle_cloudaccess_renewal ;; + "jira") handle_jira_renewal ;; + *) + echo "Error: Unknown server type: $SERVER_TYPE" >&2 + echo "Valid values: artifactory, bitbucket, cloudaccess, jira" >&2 + return 1 + ;; + esac +} + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + --monitor) + MONITOR_ONLY=true + shift + ;; + --renew) + RENEW_ONLY=true + shift + ;; + --all) + MONITOR_ONLY=false + RENEW_ONLY=false + shift + ;; + --help|-h) + show_help + exit 0 + ;; + *) + echo "Error: Unknown option: $1" >&2 + show_help >&2 + exit 1 + ;; + esac + done +} + +main() { + parse_arguments "$@" + + # Check if certificate file exists, if not exit silently + if [[ ! -f "$PEM_PATH" ]]; then + debug_echo "Certificate file not found: $PEM_PATH" + exit 0 + fi + + # Download fresh certificate + if ! download_certificate "$DOMAIN" "$PEM_PATH"; then + exit 1 + fi + + # Validate the downloaded certificate + if ! validate_certificate_file "$PEM_PATH"; then + exit 1 + fi + + # Calculate certificate expiration dates + local days_left days_gone + calculate_certificate_dates "$PEM_PATH" days_left days_gone + + # Handle monitoring (unless renew-only mode) + if [[ "$RENEW_ONLY" != true ]]; then + generate_prometheus_metrics "$days_left" "$days_gone" + debug_echo "Generated Prometheus metrics" + fi + + # Handle renewal (unless monitor-only mode) + if [[ "$MONITOR_ONLY" != true && $days_left -le $DAYS_THRESHOLD ]]; then + debug_echo "Certificate expires in $days_left days (threshold: $DAYS_THRESHOLD)" + if ! handle_server_renewal; then + exit 1 + fi + debug_echo "Certificate renewal completed" + fi + + debug_echo "Script completed successfully" +} + +# Execute main function if script is run directly +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/create_swap.sh b/create_swap.sh new file mode 100644 index 0000000..c704114 --- /dev/null +++ b/create_swap.sh @@ -0,0 +1,300 @@ +#!/bin/bash + +############################################## +#### Create Swap for all Linux Servers #### +#### #### +#### Author: Phil Connor #### +#### Contact: pconnor@ara.com #### +#### Version 3.50.20250729 #### +#### #### +#### Created 06/01/2023 #### +############################################## + +# Exit on any error, undefined variables, and pipe failures +set -euo pipefail + +# Script configuration constants +readonly SCRIPT_NAME="$(basename "$0")" +readonly SWAPFILE_PATH="/.swapfile" # Standard location for swap file +readonly SWAPPINESS_VALUE=80 # How aggressively to use swap (0-100) + +# Logging function - outputs to stderr with script name prefix +log() { + echo "[$SCRIPT_NAME] $*" >&2 +} + +# Error function - logs error message and exits with status 1 +error() { + log "ERROR: $*" + exit 1 +} + +# Display usage information +usage() { + cat </dev/null || true + rm -f "$SWAPFILE_PATH" +} + +# Detect the operating system distribution (ubuntu, centos, etc.) +detect_os() { + if command -v lsb_release >/dev/null 2>&1; then + # Use lsb_release if available (most reliable) + lsb_release -i | awk '{print $3}' | tr '[:upper:]' '[:lower:]' + else + # Fallback to parsing /etc/os-release + # shellcheck source=/dev/null + . /etc/os-release 2>/dev/null && echo "${ID:-unknown}" | tr '[:upper:]' '[:lower:]' + fi +} + +# Get total system memory in GB, rounded to nearest whole number +get_memory_gb() { + local mem_kb + # Extract memory from /proc/meminfo (in KB) + mem_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}') + + if [[ -z "$mem_kb" || "$mem_kb" -eq 0 ]]; then + error "Unable to determine system memory" + fi + + local mem_gb + # Convert KB to GB and round to nearest whole number + mem_gb=$(awk "BEGIN {printf \"%.0f\", ($mem_kb/1024/1024)}") + # Ensure minimum of 1GB to avoid division by zero issues + [[ "$mem_gb" -eq 0 ]] && mem_gb=1 + + echo "$mem_gb" +} + +# Calculate swap size needed in MB (1:1 ratio with RAM) +get_swap_needed_mb() { + local mem_gb="$1" + echo $((mem_gb * 1024)) +} + +# Get the current swap file size in MB, or 0 if no swap file exists +get_current_swap_size() { + if [[ -f "$SWAPFILE_PATH" ]]; then + local size_bytes + size_bytes=$(stat -c%s "$SWAPFILE_PATH" 2>/dev/null || echo 0) + echo $((size_bytes / 1024 / 1024)) + else + echo 0 + fi +} + +# Check if our swap file is currently active +is_swap_active() { + swapon --show=NAME --noheadings 2>/dev/null | grep -q "^${SWAPFILE_PATH}$" +} + +# Check if there's enough disk space for the swap file (with 10% buffer) +check_disk_space() { + local needed_mb="$1" + local filesystem="/" + + log "Checking available disk space for ${needed_mb}MB swap file" + + local available_kb + # Get available space in KB from df command + available_kb=$(df --output=avail "$filesystem" | tail -n 1) + local available_mb=$((available_kb / 1024)) + + # Add 10% buffer for safety + local required_mb=$((needed_mb + (needed_mb / 10))) + + if [[ "$available_mb" -lt "$required_mb" ]]; then + error "Insufficient disk space. Need ${required_mb}MB (${needed_mb}MB + 10% buffer), but only ${available_mb}MB available on $filesystem" + fi + + log "Disk space check passed: ${available_mb}MB available, ${required_mb}MB required" +} + +# Verify script is running with root privileges +check_permissions() { + if [[ $EUID -ne 0 ]]; then + error "This script must be run as root! Login as root, or use sudo." + fi +} + +# Configure system swappiness (how aggressively to use swap) +setup_swappiness() { + local sysconf="/etc/sysctl.conf" + local procswap="/proc/sys/vm/swappiness" + + log "Configuring swappiness to $SWAPPINESS_VALUE" + + # If no swappiness setting exists, add it + if ! grep -q "vm.swappiness" "$sysconf"; then + echo "$SWAPPINESS_VALUE" > "$procswap" + echo "vm.swappiness = $SWAPPINESS_VALUE" >> "$sysconf" + # If setting exists but with different value, update it + elif ! grep -q "vm.swappiness = $SWAPPINESS_VALUE" "$sysconf"; then + sed -i "/vm.swappiness/d" "$sysconf" + echo "$SWAPPINESS_VALUE" > "$procswap" + echo "vm.swappiness = $SWAPPINESS_VALUE" >> "$sysconf" + fi +} + +# Set up automated cache clearing cron job (every 5 minutes) +setup_cache_clearing() { + local os="$1" + local ctab + + # Different crontab locations for different distributions + if [[ "$os" == "ubuntu" ]]; then + ctab="/var/spool/cron/crontabs/root" + else + ctab="/var/spool/cron/root" + fi + + log "Setting up cache clearing cron job" + + # Remove any existing cache clearing jobs that use 'echo 3' (more aggressive) + if crontab -l 2>/dev/null | grep -q '/usr/bin/sync; echo 3'; then + sed -i "/\/usr\/bin\/sync.*echo 3/d" "$ctab" 2>/dev/null || true + fi + + # Add cache clearing job if it doesn't exist (echo 1 = page cache only) + if ! crontab -l 2>/dev/null | grep -q '/usr/bin/sync; echo 1'; then + (crontab -u root -l 2>/dev/null; echo "*/5 * * * * /usr/bin/sync; echo 1 > /proc/sys/vm/drop_caches") | crontab -u root - + fi +} + +# Remove existing swap file and clean up fstab entries +remove_swap() { + local backup_time + + # Create timestamp for backup file + backup_time=$(date +%y-%m-%d--%H-%M-%S) + + log "Removing existing swap file: $SWAPFILE_PATH" + + # Disable swap file (ignore errors if already disabled) + swapoff "$SWAPFILE_PATH" 2>/dev/null || true + + # Backup fstab before modifying + cp /etc/fstab "/etc/fstab.$backup_time" + + # Remove swap entries from fstab + sed -i "\|${SWAPFILE_PATH}|d" /etc/fstab + + # Delete the swap file + rm -f "$SWAPFILE_PATH" +} + +# Create and configure a new swap file +create_swap() { + local swap_mb="$1" + + if [[ "$swap_mb" -eq 0 ]]; then + error "Cannot create swap: swap size cannot be 0 MB" + fi + + log "Creating swap file of size ${swap_mb}MB at $SWAPFILE_PATH" + + # Set trap to clean up partial swap file on failure + trap cleanup_on_error ERR + + # Create swap file using dd with progress display (oflag=direct avoids polluting page cache) + dd if=/dev/zero of="$SWAPFILE_PATH" bs=1M count="$swap_mb" oflag=direct status=progress + + # Set proper permissions (only root can read/write) + chmod 600 "$SWAPFILE_PATH" + + # Format the file as swap space + mkswap "$SWAPFILE_PATH" + + # Enable the swap file + swapon "$SWAPFILE_PATH" + + # Add to fstab for persistent mounting if not already present + if ! grep -q "$SWAPFILE_PATH" /etc/fstab; then + echo "$SWAPFILE_PATH swap swap defaults 0 0" >> /etc/fstab + fi + + # Clear the error trap now that swap is fully created + trap - ERR + + log "Swap file created and enabled successfully" +} + +# Main function - orchestrates the entire swap setup process +main() { + # Handle --help flag + if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then + usage + fi + + # Ensure script is run with root privileges + check_permissions + + # Detect operating system for distribution-specific configurations + local os + os=$(detect_os) + + # Get system memory information + local mem_gb + mem_gb=$(get_memory_gb) + + # Calculate required swap size + local needed_mb + needed_mb=$(get_swap_needed_mb "$mem_gb") + + # Check current swap configuration + local current_size + current_size=$(get_current_swap_size) + + # Configure system settings + setup_swappiness + setup_cache_clearing "$os" + + # If swap file exists at the correct size and is active, nothing to do + if [[ "$current_size" -eq "$needed_mb" ]] && is_swap_active; then + log "Swap size is already correct and active" + log "Swap setup completed successfully" + return 0 + fi + + # If swap file exists but wrong size, remove first so disk space check is accurate + if [[ "$current_size" -ne 0 && "$needed_mb" -ne "$current_size" ]]; then + remove_swap + fi + + # Verify system has enough disk space (after potential removal) + if [[ "$needed_mb" -ne "$current_size" ]]; then + check_disk_space "$needed_mb" + create_swap "$needed_mb" + else + # File is the right size but not active, re-enable it + log "Swap file exists at correct size but is not active, enabling" + chmod 600 "$SWAPFILE_PATH" + mkswap "$SWAPFILE_PATH" + swapon "$SWAPFILE_PATH" + if ! grep -q "$SWAPFILE_PATH" /etc/fstab; then + echo "$SWAPFILE_PATH swap swap defaults 0 0" >> /etc/fstab + fi + fi + + log "Swap setup completed successfully" +} + +# Execute main function with all script arguments +main "$@" diff --git a/directory-size-exporter.sh b/directory-size-exporter.sh new file mode 100644 index 0000000..ff25c40 --- /dev/null +++ b/directory-size-exporter.sh @@ -0,0 +1,267 @@ +#!/usr/bin/env bash +# directory-size-exporter.sh — Prometheus exporter for directory sizes +# +# Monitors directory disk usage that node_exporter can't see. +# Node exporter only reports mounted filesystem totals — this script +# tracks individual directories like /var/log, /home, /opt, or any +# path you care about. +# +# Author: Phil Connor +# Contact: contact@mylinux.work +# License: MIT +# Version: 1.0.0 + +set -euo pipefail + +EXPORTER_NAME="directory_size" +DEFAULT_PORT=9101 +OUTPUT_MODE="stdout" +OUTPUT_FILE="" +PORT="${DIRECTORY_SIZE_PORT:-$DEFAULT_PORT}" +TIMEOUT="${DIRECTORY_SIZE_TIMEOUT:-300}" +VERBOSE=false +QUIET=false +DRY_RUN=false +TARGET_DIRECTORIES=() + +# ── Metrics Collection ────────────────────────────────────────────── + +log_verbose() { + [[ "$VERBOSE" == true ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2 +} + +log_info() { + [[ "$QUIET" == false ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2 +} + +collect_metrics() { + local start_time + start_time=$(date +%s%N) + + echo "# HELP node_directory_size_bytes Disk space used by directory" + echo "# TYPE node_directory_size_bytes gauge" + echo "# HELP node_directory_filesystem_usage_percent Filesystem usage percentage for the directory mount point" + echo "# TYPE node_directory_filesystem_usage_percent gauge" + + local success=1 + + for directory in "${TARGET_DIRECTORIES[@]}"; do + log_verbose "Running du for: $directory" + + # Get directory size in bytes + local du_output + du_output=$(timeout "$TIMEOUT" du --block-size=1 --summarize "$directory" 2>/dev/null) || { + log_info "WARNING: du failed for $directory" + success=0 + continue + } + + local size_bytes + size_bytes=$(echo "$du_output" | awk '{print $1}') + echo "node_directory_size_bytes{directory=\"${directory}\"} ${size_bytes}" + + # Get filesystem usage percentage for the mount point + local pct + pct=$(df --output=pcent "$directory" 2>/dev/null | tail -n 1 | tr -d ' %') + if [[ "$pct" =~ ^[0-9]+$ ]]; then + echo "node_directory_filesystem_usage_percent{directory=\"${directory}\"} ${pct}" + fi + done + + # ── Script runtime ── + local end_time runtime + end_time=$(date +%s%N) + runtime=$(awk "BEGIN {printf \"%.3f\", ($end_time - $start_time) / 1000000000}") + + echo "" + echo "# HELP ${EXPORTER_NAME}_duration_seconds Script execution time" + echo "# TYPE ${EXPORTER_NAME}_duration_seconds gauge" + echo "${EXPORTER_NAME}_duration_seconds ${runtime}" + + echo "# HELP ${EXPORTER_NAME}_last_run_timestamp Last successful run" + echo "# TYPE ${EXPORTER_NAME}_last_run_timestamp gauge" + echo "${EXPORTER_NAME}_last_run_timestamp $(date +%s)" + + echo "# HELP ${EXPORTER_NAME}_success Whether the exporter ran successfully" + echo "# TYPE ${EXPORTER_NAME}_success gauge" + echo "${EXPORTER_NAME}_success ${success}" +} + +# ── HTTP Request Handler ──────────────────────────────────────────── + +handle_request() { + read -r method path version + + while IFS= read -r header; do + [[ "$header" == $'\r' || -z "$header" ]] && break + done + + if [[ "$path" == "/metrics" ]]; then + local metrics length + metrics=$(collect_metrics) + length=${#metrics} + + printf "HTTP/1.1 200 OK\r\n" + printf "Content-Type: text/plain; version=0.0.4; charset=utf-8\r\n" + printf "Content-Length: %d\r\n" "$length" + printf "Connection: close\r\n" + printf "\r\n" + printf "%s" "$metrics" + else + local body="404 Not Found" + printf "HTTP/1.1 404 Not Found\r\n" + printf "Content-Type: text/plain\r\n" + printf "Content-Length: %d\r\n" "${#body}" + printf "Connection: close\r\n" + printf "\r\n" + printf "%s" "$body" + fi +} + +# ── Help ───────────────────────────────────────────────────────────── + +show_help() { + cat < [directory2 ...] + +Monitor directory sizes for Prometheus. Node exporter only reports +mounted filesystem totals — this script tracks individual directories. + +Output modes: + (default) Print metrics to stdout + --textfile Write to node_exporter textfile collector + -o FILE Write to a specific file + --http Run as HTTP server (default port: ${DEFAULT_PORT}) + +Options: + --port PORT HTTP listen port (default: ${DEFAULT_PORT}) + --timeout SECS du command timeout (default: 300) + --dry-run Show what would be written without writing + --verbose, -v Enable verbose debug output + --quiet, -q Suppress non-error output + -h, --help Show this help message + +Environment variables: + DIRECTORY_SIZE_PORT HTTP listen port (default: ${DEFAULT_PORT}) + DIRECTORY_SIZE_TIMEOUT du command timeout in seconds (default: 300) + +Examples: + $0 /var/log /home /opt + $0 --textfile /var/log /var/lib/mysql + $0 --http --port 9101 /var/log /home + $0 -o /tmp/dir_sizes.prom /var/log +EOF +} + +# ── Argument Parsing ──────────────────────────────────────────────── + +while [[ $# -gt 0 ]]; do + case "$1" in + --textfile) + OUTPUT_MODE="textfile" + shift + ;; + -o) + OUTPUT_MODE="file" + OUTPUT_FILE="$2" + shift 2 + ;; + --http) + OUTPUT_MODE="http" + shift + ;; + --port) + PORT="$2" + shift 2 + ;; + --timeout) + TIMEOUT="$2" + shift 2 + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --verbose|-v) + VERBOSE=true + shift + ;; + --quiet|-q) + QUIET=true + shift + ;; + --handle-request) + handle_request + exit 0 + ;; + -h|--help) + show_help + exit 0 + ;; + -*) + echo "Unknown option: $1" >&2 + exit 1 + ;; + *) + TARGET_DIRECTORIES+=("$1") + shift + ;; + esac +done + +# Validate directories +if [[ ${#TARGET_DIRECTORIES[@]} -eq 0 ]]; then + echo "Error: at least one directory argument is required" >&2 + echo "Run with --help for usage" >&2 + exit 1 +fi + +for dir in "${TARGET_DIRECTORIES[@]}"; do + if [[ ! -d "$dir" ]]; then + echo "Error: directory does not exist: $dir" >&2 + exit 1 + fi + if [[ ! -r "$dir" ]]; then + echo "Error: directory is not readable: $dir" >&2 + exit 1 + fi +done + +# ── Output ────────────────────────────────────────────────────────── + +if [[ "$DRY_RUN" == true ]]; then + log_info "DRY RUN — metrics that would be written:" + collect_metrics + exit 0 +fi + +case "$OUTPUT_MODE" in + stdout) + collect_metrics + ;; + textfile) + output_dir="/var/lib/node_exporter" + OUTPUT_FILE="${output_dir}/${EXPORTER_NAME}.prom" + mkdir -p "$output_dir" + temp_file=$(mktemp "${output_dir}/.${EXPORTER_NAME}.XXXXXX") + collect_metrics > "$temp_file" + chmod 644 "$temp_file" + mv -f "$temp_file" "$OUTPUT_FILE" + ;; + file) + temp_file=$(mktemp "${OUTPUT_FILE}.XXXXXX") + collect_metrics > "$temp_file" + chmod 644 "$temp_file" + mv -f "$temp_file" "$OUTPUT_FILE" + ;; + http) + if ! command -v socat &>/dev/null; then + echo "ERROR: socat is required for --http mode" >&2 + echo "Install it: apt install socat or dnf install socat" >&2 + exit 1 + fi + echo "${EXPORTER_NAME} listening on port ${PORT}..." + echo "Monitoring directories: ${TARGET_DIRECTORIES[*]}" + socat TCP-LISTEN:"$PORT",reuseaddr,fork EXEC:"$0 --handle-request" + ;; +esac diff --git a/disk-io-exporter.sh b/disk-io-exporter.sh new file mode 100644 index 0000000..e6e8494 --- /dev/null +++ b/disk-io-exporter.sh @@ -0,0 +1,354 @@ +#!/usr/bin/env bash +# disk-io-exporter.sh — Prometheus exporter for per-disk I/O performance +# +# Reads /proc/diskstats and calculates per-disk IOPS, throughput, +# latency, utilization, and queue depth. Takes two samples with a +# configurable interval to compute rates from the cumulative counters. +# +# Author: Phil Connor +# Contact: contact@mylinux.work +# License: MIT +# Date: 2026-03-03 +# Version: 1.0.0 + +set -euo pipefail + +# ── Configuration ─────────────────────────────────────────────────── + +readonly VERSION="1.0.0" +readonly SCRIPT_NAME="${0##*/}" +readonly NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}" +readonly OUTPUT_FILE="${NODE_DIR}/disk_io.prom" +readonly TMP_FILE="${OUTPUT_FILE}.$$" +readonly SAMPLE_INTERVAL="${SAMPLE_INTERVAL:-1}" +readonly DISK_FILTER="${DISK_FILTER:-}" + +# Runtime flags +DRY_RUN=false +DEBUG=${DEBUG:-} + +# ── Helpers ───────────────────────────────────────────────────────── + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +log_error() { + echo "[ERROR] $*" >&2 +} + +cleanup() { + rm -f "$TMP_FILE" +} + +trap cleanup EXIT + +show_help() { + cat < 100) v = 100; printf \"%.2f\", v}") + echo "linux_disk_io_util_percent{disk=\"${dev}\"} ${value}" + debug_echo "$dev util_percent=$value" + done <<< "$devices" + + echo "# HELP linux_disk_io_queue_depth Weighted number of I/Os in progress (avgqu-sz)" + echo "# TYPE linux_disk_io_queue_depth gauge" + while read -r dev; do + local m1 m2 delta + m1="${snap1[${dev}_weighted_ms]:-0}" + m2="${snap2[${dev}_weighted_ms]:-0}" + delta=$((m2 - m1)) + local value + value=$(awk "BEGIN {printf \"%.2f\", $delta / ($interval * 1000)}") + echo "linux_disk_io_queue_depth{disk=\"${dev}\"} ${value}" + debug_echo "$dev queue_depth=$value" + done <<< "$devices" + + # ── Script metadata metrics ── + + local end_time runtime + end_time=$(date +%s%N) + runtime=$(awk "BEGIN {printf \"%.3f\", ($end_time - $start_time) / 1000000000}") + + echo "" + echo "# HELP linux_disk_io_exporter_duration_seconds Script execution time" + echo "# TYPE linux_disk_io_exporter_duration_seconds gauge" + echo "linux_disk_io_exporter_duration_seconds ${runtime}" + + echo "# HELP linux_disk_io_exporter_last_run_timestamp Last successful run" + echo "# TYPE linux_disk_io_exporter_last_run_timestamp gauge" + echo "linux_disk_io_exporter_last_run_timestamp $(date +%s)" + + echo "# HELP linux_disk_io_exporter_success Whether the exporter ran successfully" + echo "# TYPE linux_disk_io_exporter_success gauge" + echo "linux_disk_io_exporter_success 1" +} + +# ── Main ──────────────────────────────────────────────────────────── + +main() { + while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) + DRY_RUN=true + shift + ;; + --debug) + DEBUG=1 + shift + ;; + --help|-h) + show_help + ;; + --version|-v) + show_version + ;; + *) + log_error "Unknown option: $1" + echo "Use --help for usage information" >&2 + exit 1 + ;; + esac + done + + if [[ ! -f /proc/diskstats ]]; then + log_error "/proc/diskstats not found — this script requires a Linux system" + exit 1 + fi + + if [[ "$DRY_RUN" == true ]]; then + collect_metrics + exit 0 + fi + + if [[ ! -d "$NODE_DIR" ]]; then + log_error "Textfile collector directory does not exist: $NODE_DIR" + exit 1 + fi + + collect_metrics > "$TMP_FILE" + chmod 644 "$TMP_FILE" + mv -f "$TMP_FILE" "$OUTPUT_FILE" + debug_echo "Metrics written to $OUTPUT_FILE" +} + +main "$@" diff --git a/dns-health-check.sh b/dns-health-check.sh new file mode 100644 index 0000000..2dc10ea --- /dev/null +++ b/dns-health-check.sh @@ -0,0 +1,383 @@ +#!/bin/bash +################################################################################ +# Script Name: dns-health-check.sh +# Version: 1.0 +# Description: Prometheus textfile collector exporter for DNS resolution health +# Queries configurable DNS records and reports resolution status +# and latency via node_exporter textfile collector +# +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +# Date: 2026-03-03 +# +# Prerequisites: +# - dig (bind-utils / dnsutils) +# - node_exporter with textfile collector enabled +# - /var/lib/node_exporter directory exists +# +# Usage: +# # Run with default config +# sudo ./dns-health-check.sh +# +# # Dry run (output to stdout) +# ./dns-health-check.sh --dry-run +# +# # Debug mode +# DEBUG=1 sudo ./dns-health-check.sh +# +# Config Format (pipe-delimited, one record per line): +# record_name|record_type|dns_server|expected_value(optional) +# +# Examples: +# example.com|A|8.8.8.8| +# mail.example.com|MX|8.8.8.8| +# _ldap._tcp.example.com|SRV|10.0.0.1| +# example.com|A|8.8.8.8|93.184.216.34 +# +# Metrics Exported: +# - linux_dns_query_success{record,type,server} - 1=resolved, 0=failed +# - linux_dns_query_time_seconds{record,type,server} - Resolution time +# - linux_dns_query_answer_match{record,type,server,expected} - 1=match, 0=mismatch +# +################################################################################ + +set -o pipefail + +# ============================================================================ +# CONFIGURATION +# ============================================================================ + +readonly VERSION="1.0" +readonly SCRIPT_NAME="${0##*/}" +readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}" +readonly OUTPUT_FILE="${TEXTFILE_DIR}/dns_health.prom" +readonly CONFIG_FILE="${CONFIG_FILE:-/etc/dns-health-check.conf}" +readonly TMP_FILE="${OUTPUT_FILE}.$$" + +# Runtime flags +DRY_RUN=false +DEBUG=${DEBUG:-} + +# Default DNS records to check if no config file and no env var +readonly DEFAULT_RECORDS="localhost|A|127.0.0.1|" + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +log_error() { + echo "[ERROR] $*" >&2 +} + +cleanup() { + rm -f "$TMP_FILE" +} + +trap cleanup EXIT + +show_help() { + cat </dev/null; then + log_error "'dig' is not installed. Install bind-utils (RHEL/Rocky) or dnsutils (Debian/Ubuntu)." + exit 1 + fi +} + +# ============================================================================ +# RECORD LOADING +# ============================================================================ + +load_records() { + local record_count=0 + local source="" + + # Priority: DNS_RECORDS env var > config file > defaults + if [[ -n "${DNS_RECORDS:-}" ]]; then + source="DNS_RECORDS environment variable" + local IFS=";" + local entry + for entry in $DNS_RECORDS; do + entry="${entry#"${entry%%[![:space:]]*}"}" + entry="${entry%"${entry##*[![:space:]]}"}" + if [[ -n "$entry" ]]; then + echo "$entry" + record_count=$((record_count + 1)) + fi + done + elif [[ -f "$CONFIG_FILE" ]]; then + source="$CONFIG_FILE" + while IFS= read -r line; do + # Strip comments and whitespace + line="${line%%#*}" + line="${line#"${line%%[![:space:]]*}"}" + line="${line%"${line##*[![:space:]]}"}" + + if [[ -z "$line" ]]; then + continue + fi + + echo "$line" + record_count=$((record_count + 1)) + done < "$CONFIG_FILE" + else + source="defaults" + echo "$DEFAULT_RECORDS" + record_count=1 + fi + + debug_echo "Loaded $record_count DNS record(s) from $source" +} + +# ============================================================================ +# DNS QUERY +# ============================================================================ + +query_dns() { + local record="$1" + local rtype="$2" + local server="$3" + local expected="$4" + + debug_echo "Querying $rtype record for $record via $server" + + local dig_output + local query_start + local query_end + local query_time + local success=0 + local answer="" + local match="" + + query_start=$(date +%s%N 2>/dev/null) || query_start=$(date +%s)000000000 + + if dig_output=$(dig +short +time=5 +tries=2 "$record" "$rtype" "@${server}" 2>/dev/null); then + query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000 + answer="${dig_output}" + + if [[ -n "$answer" ]]; then + success=1 + debug_echo " Answer: $(echo "$answer" | tr '\n' ' ')" + else + success=0 + debug_echo " Empty answer (NXDOMAIN or no records)" + fi + else + query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000 + success=0 + debug_echo " Query failed" + fi + + # Calculate query time in seconds + local elapsed_ns=$((query_end - query_start)) + query_time=$(awk "BEGIN {printf \"%.6f\", $elapsed_ns / 1000000000}") + + # Check expected value if provided + if [[ -n "$expected" ]]; then + if echo "$answer" | grep -qF "$expected"; then + match=1 + debug_echo " Expected value matched: $expected" + else + match=0 + debug_echo " Expected value NOT matched: $expected (got: $(echo "$answer" | tr '\n' ' '))" + fi + fi + + echo "${success}|${query_time}|${match}" +} + +# ============================================================================ +# METRICS COLLECTION +# ============================================================================ + +collect_metrics() { + local records=() + while IFS= read -r record_line; do + records+=("$record_line") + done < <(load_records) + + local success_metrics="" + local time_metrics="" + local match_metrics="" + local has_match_metric=false + + for record_line in "${records[@]}"; do + local record + record=$(echo "$record_line" | cut -d'|' -f1) + local rtype + rtype=$(echo "$record_line" | cut -d'|' -f2) + local server + server=$(echo "$record_line" | cut -d'|' -f3) + local expected + expected=$(echo "$record_line" | cut -d'|' -f4) + + if [[ -z "$record" ]] || [[ -z "$rtype" ]] || [[ -z "$server" ]]; then + log_error "Invalid config line: $record_line (expected: record_name|record_type|dns_server|expected_value)" + continue + fi + + local result + result=$(query_dns "$record" "$rtype" "$server" "$expected") + + local qsuccess + qsuccess=$(echo "$result" | cut -d'|' -f1) + local qtime + qtime=$(echo "$result" | cut -d'|' -f2) + local qmatch + qmatch=$(echo "$result" | cut -d'|' -f3) + + local labels="record=\"${record}\",type=\"${rtype}\",server=\"${server}\"" + + success_metrics+="linux_dns_query_success{${labels}} ${qsuccess}\n" + time_metrics+="linux_dns_query_time_seconds{${labels}} ${qtime}\n" + + if [[ -n "$expected" ]]; then + has_match_metric=true + local match_labels="${labels},expected=\"${expected}\"" + match_metrics+="linux_dns_query_answer_match{${match_labels}} ${qmatch}\n" + fi + done + + local output="" + + output+="# HELP linux_dns_query_success DNS query resolved successfully (1=resolved, 0=failed)\n" + output+="# TYPE linux_dns_query_success gauge\n" + output+="$success_metrics" + + output+="# HELP linux_dns_query_time_seconds DNS query resolution time in seconds\n" + output+="# TYPE linux_dns_query_time_seconds gauge\n" + output+="$time_metrics" + + if [[ "$has_match_metric" == "true" ]]; then + output+="# HELP linux_dns_query_answer_match DNS answer matches expected value (1=match, 0=mismatch)\n" + output+="# TYPE linux_dns_query_answer_match gauge\n" + output+="$match_metrics" + fi + + printf '%b' "$output" +} + +# ============================================================================ +# OUTPUT +# ============================================================================ + +write_metrics() { + local metrics + metrics=$(collect_metrics) + + if [[ "$DRY_RUN" == "true" ]]; then + echo "$metrics" + return + fi + + if [[ ! -d "$TEXTFILE_DIR" ]]; then + log_error "Textfile collector directory does not exist: $TEXTFILE_DIR" + exit 1 + fi + + echo "$metrics" > "$TMP_FILE" + mv "$TMP_FILE" "$OUTPUT_FILE" + debug_echo "Metrics written to $OUTPUT_FILE" +} + +# ============================================================================ +# MAIN +# ============================================================================ + +main() { + while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) + DRY_RUN=true + shift + ;; + --debug) + DEBUG=1 + shift + ;; + --help|-h) + show_help + ;; + --version|-v) + show_version + ;; + *) + log_error "Unknown option: $1" + echo "Use --help for usage information" >&2 + exit 1 + ;; + esac + done + + check_dependencies + write_metrics +} + +main "$@" diff --git a/exchange-metrics.ps1 b/exchange-metrics.ps1 new file mode 100644 index 0000000..98acaef --- /dev/null +++ b/exchange-metrics.ps1 @@ -0,0 +1,1256 @@ +# Exchange Metrics Collector - Outputs Prometheus-compatible metrics +# Requires Exchange Management Shell and appropriate permissions + +$StartTime = Get-Date +$Hostname = $env:COMPUTERNAME + +# Try to load Exchange Management Shell if not already loaded +if (-not (Get-Command Get-TransportServer -ErrorAction SilentlyContinue)) { + try { + Add-PSSnapin Microsoft.Exchange.Management.PowerShell.SnapIn -ErrorAction SilentlyContinue + } catch { + # Try loading as module for newer Exchange versions + try { + $ExchangeInstallPath = (Get-ItemProperty HKLM:\SOFTWARE\Microsoft\ExchangeServer\v15\Setup -ErrorAction SilentlyContinue).MsiInstallPath + if ($ExchangeInstallPath) { + . "$ExchangeInstallPath\bin\RemoteExchange.ps1" -ErrorAction SilentlyContinue + Connect-ExchangeServer -auto -ErrorAction SilentlyContinue + } + } catch {} + } +} + +# Helper function to safely get counter values +function Get-SafeCounter { + param([string]$CounterPath) + try { + $counter = Get-Counter -Counter $CounterPath -ErrorAction SilentlyContinue + if ($counter -and $counter.CounterSamples) { + return [math]::Max(0, [math]::Round($counter.CounterSamples[0].CookedValue, 2)) + } + } catch {} + return 0 +} + +# Helper function to safely count event log entries +function Get-EventLogCount { + param( + [string]$LogName, + [string]$Source, + [int]$EventId, + [int]$Hours = 24 + ) + try { + $startDate = (Get-Date).AddHours(-$Hours) + $filter = @{ + LogName = $LogName + StartTime = $startDate + } + if ($Source) { $filter['ProviderName'] = $Source } + if ($EventId) { $filter['Id'] = $EventId } + + return (Get-WinEvent -FilterHashtable $filter -ErrorAction SilentlyContinue | Measure-Object).Count + } catch { + return 0 + } +} + +# ============================================================================ +# TRANSPORT QUEUES +# ============================================================================ +Write-Output "# HELP exchange_queue_length Number of messages in each transport queue" +Write-Output "# TYPE exchange_queue_length gauge" + +try { + $queues = Get-Queue -ErrorAction SilentlyContinue + if ($queues) { + foreach ($queue in $queues) { + $queueType = $queue.DeliveryType -replace '\s+', '_' + $queueStatus = $queue.Status + $nextHop = ($queue.NextHopDomain -replace '["\s]', '') -replace '\.', '_' + Write-Output "exchange_queue_length{queue_type=`"$queueType`",status=`"$queueStatus`",next_hop=`"$nextHop`",hostname=`"$Hostname`"} $($queue.MessageCount)" + } + } +} catch {} + +# Queue totals by status +Write-Output "# HELP exchange_queue_total_messages Total messages in queues by status" +Write-Output "# TYPE exchange_queue_total_messages gauge" +try { + $queueStats = Get-Queue -ErrorAction SilentlyContinue | Group-Object Status + foreach ($stat in $queueStats) { + $total = ($stat.Group | Measure-Object -Property MessageCount -Sum).Sum + Write-Output "exchange_queue_total_messages{status=`"$($stat.Name)`",hostname=`"$Hostname`"} $total" + } +} catch {} + +# Submission queue (messages waiting to be categorized) +Write-Output "# HELP exchange_submission_queue_length Messages in submission queue" +Write-Output "# TYPE exchange_submission_queue_length gauge" +$submissionQueue = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Submission Queue Length" +Write-Output "exchange_submission_queue_length{hostname=`"$Hostname`"} $submissionQueue" + +# Poison queue (messages that failed repeatedly) +Write-Output "# HELP exchange_poison_queue_length Messages in poison queue" +Write-Output "# TYPE exchange_poison_queue_length gauge" +$poisonQueue = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Poison Queue Length" +Write-Output "exchange_poison_queue_length{hostname=`"$Hostname`"} $poisonQueue" + +# Unreachable queue +Write-Output "# HELP exchange_unreachable_queue_length Messages in unreachable queue" +Write-Output "# TYPE exchange_unreachable_queue_length gauge" +$unreachableQueue = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Unreachable Queue Length" +Write-Output "exchange_unreachable_queue_length{hostname=`"$Hostname`"} $unreachableQueue" + +# Active mailbox delivery queue +Write-Output "# HELP exchange_active_mailbox_queue_length Active mailbox delivery queue" +Write-Output "# TYPE exchange_active_mailbox_queue_length gauge" +$activeMailbox = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Active Mailbox Delivery Queue Length" +Write-Output "exchange_active_mailbox_queue_length{hostname=`"$Hostname`"} $activeMailbox" + +# Active non-SMTP delivery queue +Write-Output "# HELP exchange_active_nonsmtp_queue_length Active non-SMTP delivery queue" +Write-Output "# TYPE exchange_active_nonsmtp_queue_length gauge" +$activeNonSmtp = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Active Non-Smtp Delivery Queue Length" +Write-Output "exchange_active_nonsmtp_queue_length{hostname=`"$Hostname`"} $activeNonSmtp" + +# Retry mailbox delivery queue +Write-Output "# HELP exchange_retry_mailbox_queue_length Retry mailbox delivery queue" +Write-Output "# TYPE exchange_retry_mailbox_queue_length gauge" +$retryMailbox = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Retry Mailbox Delivery Queue Length" +Write-Output "exchange_retry_mailbox_queue_length{hostname=`"$Hostname`"} $retryMailbox" + +# ============================================================================ +# MESSAGE THROUGHPUT +# ============================================================================ +Write-Output "# HELP exchange_messages_received_total Total messages received" +Write-Output "# TYPE exchange_messages_received_total counter" +$messagesReceived = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\Messages Received Total" +Write-Output "exchange_messages_received_total{hostname=`"$Hostname`"} $messagesReceived" + +Write-Output "# HELP exchange_messages_sent_total Total messages sent" +Write-Output "# TYPE exchange_messages_sent_total counter" +$messagesSent = Get-SafeCounter "\MSExchangeTransport SmtpSend(_total)\Messages Sent Total" +Write-Output "exchange_messages_sent_total{hostname=`"$Hostname`"} $messagesSent" + +Write-Output "# HELP exchange_messages_received_per_second Messages received per second" +Write-Output "# TYPE exchange_messages_received_per_second gauge" +$messagesReceivedSec = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\Messages Received/sec" +Write-Output "exchange_messages_received_per_second{hostname=`"$Hostname`"} $messagesReceivedSec" + +Write-Output "# HELP exchange_messages_sent_per_second Messages sent per second" +Write-Output "# TYPE exchange_messages_sent_per_second gauge" +$messagesSentSec = Get-SafeCounter "\MSExchangeTransport SmtpSend(_total)\Messages Sent/sec" +Write-Output "exchange_messages_sent_per_second{hostname=`"$Hostname`"} $messagesSentSec" + +# Bytes transferred +Write-Output "# HELP exchange_bytes_received_total Total bytes received" +Write-Output "# TYPE exchange_bytes_received_total counter" +$bytesReceived = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\Bytes Received Total" +Write-Output "exchange_bytes_received_total{hostname=`"$Hostname`"} $bytesReceived" + +Write-Output "# HELP exchange_bytes_sent_total Total bytes sent" +Write-Output "# TYPE exchange_bytes_sent_total counter" +$bytesSent = Get-SafeCounter "\MSExchangeTransport SmtpSend(_total)\Bytes Sent Total" +Write-Output "exchange_bytes_sent_total{hostname=`"$Hostname`"} $bytesSent" + +# ============================================================================ +# SMTP CONNECTIONS +# ============================================================================ +Write-Output "# HELP exchange_smtp_connections_current Current SMTP connections" +Write-Output "# TYPE exchange_smtp_connections_current gauge" +$smtpInbound = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\Connections Current" +$smtpOutbound = Get-SafeCounter "\MSExchangeTransport SmtpSend(_total)\Connections Current" +Write-Output "exchange_smtp_connections_current{direction=`"inbound`",hostname=`"$Hostname`"} $smtpInbound" +Write-Output "exchange_smtp_connections_current{direction=`"outbound`",hostname=`"$Hostname`"} $smtpOutbound" + +Write-Output "# HELP exchange_smtp_connections_total Total SMTP connections" +Write-Output "# TYPE exchange_smtp_connections_total counter" +$smtpInboundTotal = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\Connections Total" +$smtpOutboundTotal = Get-SafeCounter "\MSExchangeTransport SmtpSend(_total)\Connections Total" +Write-Output "exchange_smtp_connections_total{direction=`"inbound`",hostname=`"$Hostname`"} $smtpInboundTotal" +Write-Output "exchange_smtp_connections_total{direction=`"outbound`",hostname=`"$Hostname`"} $smtpOutboundTotal" + +# SMTP connection failures +Write-Output "# HELP exchange_smtp_connection_failures_total SMTP connection failures" +Write-Output "# TYPE exchange_smtp_connection_failures_total counter" +$smtpSendFailures = Get-SafeCounter "\MSExchangeTransport SmtpSend(_total)\Connection Failures" +Write-Output "exchange_smtp_connection_failures_total{hostname=`"$Hostname`"} $smtpSendFailures" + +# ============================================================================ +# MESSAGE TRACKING (from logs) +# ============================================================================ +Write-Output "# HELP exchange_tracked_messages_total Messages by event type (last 24h)" +Write-Output "# TYPE exchange_tracked_messages_total counter" + +try { + $trackingStart = (Get-Date).AddHours(-24) + $eventTypes = @('RECEIVE', 'SEND', 'DELIVER', 'FAIL', 'DSN', 'DEFER', 'EXPAND', 'REDIRECT', 'RESOLVE', 'DROP', 'BADMAIL', 'POISONMESSAGE') + + foreach ($eventType in $eventTypes) { + try { + $count = (Get-MessageTrackingLog -Start $trackingStart -EventId $eventType -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_tracked_messages_total{event_type=`"$eventType`",hostname=`"$Hostname`"} $count" + } catch { + Write-Output "exchange_tracked_messages_total{event_type=`"$eventType`",hostname=`"$Hostname`"} 0" + } + } +} catch {} + +# Message delivery latency from tracking logs +Write-Output "# HELP exchange_message_latency_seconds Message delivery latency statistics" +Write-Output "# TYPE exchange_message_latency_seconds gauge" +try { + $deliveries = Get-MessageTrackingLog -Start (Get-Date).AddHours(-1) -EventId DELIVER -ResultSize 1000 -ErrorAction SilentlyContinue + if ($deliveries -and $deliveries.Count -gt 0) { + $latencies = $deliveries | ForEach-Object { + $received = Get-MessageTrackingLog -MessageId $_.MessageId -EventId RECEIVE -ResultSize 1 -ErrorAction SilentlyContinue + if ($received) { + ($_.Timestamp - $received.Timestamp).TotalSeconds + } + } | Where-Object { $_ -gt 0 } + + if ($latencies -and $latencies.Count -gt 0) { + $avgLatency = ($latencies | Measure-Object -Average).Average + $maxLatency = ($latencies | Measure-Object -Maximum).Maximum + Write-Output "exchange_message_latency_seconds{stat=`"avg`",hostname=`"$Hostname`"} $([math]::Round($avgLatency, 2))" + Write-Output "exchange_message_latency_seconds{stat=`"max`",hostname=`"$Hostname`"} $([math]::Round($maxLatency, 2))" + } else { + Write-Output "exchange_message_latency_seconds{stat=`"avg`",hostname=`"$Hostname`"} 0" + Write-Output "exchange_message_latency_seconds{stat=`"max`",hostname=`"$Hostname`"} 0" + } + } +} catch { + Write-Output "exchange_message_latency_seconds{stat=`"avg`",hostname=`"$Hostname`"} 0" + Write-Output "exchange_message_latency_seconds{stat=`"max`",hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# MAILBOX STATISTICS +# ============================================================================ +Write-Output "# HELP exchange_mailbox_count Total number of mailboxes" +Write-Output "# TYPE exchange_mailbox_count gauge" +try { + $mailboxCount = (Get-Mailbox -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_mailbox_count{hostname=`"$Hostname`"} $mailboxCount" +} catch { + Write-Output "exchange_mailbox_count{hostname=`"$Hostname`"} 0" +} + +# Mailbox database sizes and stats +Write-Output "# HELP exchange_database_size_bytes Mailbox database size in bytes" +Write-Output "# TYPE exchange_database_size_bytes gauge" +Write-Output "# HELP exchange_database_available_space_bytes Available space in mailbox database" +Write-Output "# TYPE exchange_database_available_space_bytes gauge" +Write-Output "# HELP exchange_database_mailbox_count Mailboxes per database" +Write-Output "# TYPE exchange_database_mailbox_count gauge" + +try { + $databases = Get-MailboxDatabase -Status -ErrorAction SilentlyContinue + foreach ($db in $databases) { + $dbName = $db.Name -replace '\s+', '_' + $dbSize = if ($db.DatabaseSize) { $db.DatabaseSize.ToBytes() } else { 0 } + $dbAvailable = if ($db.AvailableNewMailboxSpace) { $db.AvailableNewMailboxSpace.ToBytes() } else { 0 } + Write-Output "exchange_database_size_bytes{database=`"$dbName`",hostname=`"$Hostname`"} $dbSize" + Write-Output "exchange_database_available_space_bytes{database=`"$dbName`",hostname=`"$Hostname`"} $dbAvailable" + + $mbxInDb = (Get-Mailbox -Database $db.Name -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_database_mailbox_count{database=`"$dbName`",hostname=`"$Hostname`"} $mbxInDb" + } +} catch {} + +# Database mount status +Write-Output "# HELP exchange_database_mounted Database mount status (1=mounted, 0=dismounted)" +Write-Output "# TYPE exchange_database_mounted gauge" +try { + $databases = Get-MailboxDatabase -Status -ErrorAction SilentlyContinue + foreach ($db in $databases) { + $dbName = $db.Name -replace '\s+', '_' + $mounted = if ($db.Mounted) { 1 } else { 0 } + Write-Output "exchange_database_mounted{database=`"$dbName`",hostname=`"$Hostname`"} $mounted" + } +} catch {} + +# ============================================================================ +# CONTENT FILTERING / ANTI-SPAM +# ============================================================================ +Write-Output "# HELP exchange_antispam_messages_total Anti-spam filter results" +Write-Output "# TYPE exchange_antispam_messages_total counter" + +# Content Filter Agent +$cfBlocked = Get-SafeCounter "\MSExchange Content Filter Agent\Messages that Triggered the Content Filter" +$cfQuarantined = Get-SafeCounter "\MSExchange Content Filter Agent\Messages Quarantined" +$cfRejected = Get-SafeCounter "\MSExchange Content Filter Agent\Messages Rejected" +$cfDeleted = Get-SafeCounter "\MSExchange Content Filter Agent\Messages Deleted" +Write-Output "exchange_antispam_messages_total{filter=`"content`",action=`"triggered`",hostname=`"$Hostname`"} $cfBlocked" +Write-Output "exchange_antispam_messages_total{filter=`"content`",action=`"quarantined`",hostname=`"$Hostname`"} $cfQuarantined" +Write-Output "exchange_antispam_messages_total{filter=`"content`",action=`"rejected`",hostname=`"$Hostname`"} $cfRejected" +Write-Output "exchange_antispam_messages_total{filter=`"content`",action=`"deleted`",hostname=`"$Hostname`"} $cfDeleted" + +# Sender Filter Agent +$sfBlocked = Get-SafeCounter "\MSExchange Sender Filter Agent\Messages Evaluated by Sender Filter" +$sfRejected = Get-SafeCounter "\MSExchange Sender Filter Agent\Messages Rejected by Sender Filter" +Write-Output "exchange_antispam_messages_total{filter=`"sender`",action=`"evaluated`",hostname=`"$Hostname`"} $sfBlocked" +Write-Output "exchange_antispam_messages_total{filter=`"sender`",action=`"rejected`",hostname=`"$Hostname`"} $sfRejected" + +# Sender ID Agent +$sidPass = Get-SafeCounter "\MSExchange Sender Id Agent\Messages That Passed Sender ID Validation" +$sidFail = Get-SafeCounter "\MSExchange Sender Id Agent\Messages That Failed Sender ID Validation" +$sidNeutral = Get-SafeCounter "\MSExchange Sender Id Agent\Messages with Neutral Sender ID Validation Result" +Write-Output "exchange_antispam_messages_total{filter=`"sender_id`",action=`"pass`",hostname=`"$Hostname`"} $sidPass" +Write-Output "exchange_antispam_messages_total{filter=`"sender_id`",action=`"fail`",hostname=`"$Hostname`"} $sidFail" +Write-Output "exchange_antispam_messages_total{filter=`"sender_id`",action=`"neutral`",hostname=`"$Hostname`"} $sidNeutral" + +# Recipient Filter Agent +$rfBlocked = Get-SafeCounter "\MSExchange Recipient Filter Agent\Recipients Rejected by Recipient Validation" +Write-Output "exchange_antispam_messages_total{filter=`"recipient`",action=`"rejected`",hostname=`"$Hostname`"} $rfBlocked" + +# Connection Filter Agent (IP Block List) +$connBlocked = Get-SafeCounter "\MSExchange Connection Filtering Agent\Connections on IP Block List" +$connAllowed = Get-SafeCounter "\MSExchange Connection Filtering Agent\Connections on IP Allow List" +Write-Output "exchange_antispam_messages_total{filter=`"connection`",action=`"blocked`",hostname=`"$Hostname`"} $connBlocked" +Write-Output "exchange_antispam_messages_total{filter=`"connection`",action=`"allowed`",hostname=`"$Hostname`"} $connAllowed" + +# ============================================================================ +# SPAM CONFIDENCE LEVEL (SCL) DISTRIBUTION +# ============================================================================ +Write-Output "# HELP exchange_scl_distribution Messages by SCL rating" +Write-Output "# TYPE exchange_scl_distribution counter" +for ($scl = -1; $scl -le 9; $scl++) { + $sclCount = Get-SafeCounter "\MSExchange Content Filter Agent\Messages with SCL Rating $scl" + Write-Output "exchange_scl_distribution{scl=`"$scl`",hostname=`"$Hostname`"} $sclCount" +} + +# ============================================================================ +# AUTHENTICATION +# ============================================================================ +Write-Output "# HELP exchange_auth_total Authentication attempts" +Write-Output "# TYPE exchange_auth_total counter" + +# SMTP AUTH +$smtpAuthSuccess = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\Auth Succeeded" +$smtpAuthFail = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\Auth Failed" +Write-Output "exchange_auth_total{type=`"smtp`",result=`"success`",hostname=`"$Hostname`"} $smtpAuthSuccess" +Write-Output "exchange_auth_total{type=`"smtp`",result=`"failed`",hostname=`"$Hostname`"} $smtpAuthFail" + +# OWA Authentication (from event logs) +$owaAuthSuccess = Get-EventLogCount -LogName "MSExchange Management" -Source "MSExchange OWA" -Hours 24 +Write-Output "exchange_auth_total{type=`"owa`",result=`"success`",hostname=`"$Hostname`"} $owaAuthSuccess" + +# ActiveSync Authentication +$easAuthFail = Get-EventLogCount -LogName "Application" -Source "MSExchange ActiveSync" -EventId 1053 -Hours 24 +Write-Output "exchange_auth_total{type=`"activesync`",result=`"failed`",hostname=`"$Hostname`"} $easAuthFail" + +# ============================================================================ +# TLS/ENCRYPTION +# ============================================================================ +Write-Output "# HELP exchange_tls_connections_total TLS connection statistics" +Write-Output "# TYPE exchange_tls_connections_total counter" + +$tlsInbound = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\TLS Sessions Started" +$tlsOutbound = Get-SafeCounter "\MSExchangeTransport SmtpSend(_total)\TLS Sessions Started" +Write-Output "exchange_tls_connections_total{direction=`"inbound`",hostname=`"$Hostname`"} $tlsInbound" +Write-Output "exchange_tls_connections_total{direction=`"outbound`",hostname=`"$Hostname`"} $tlsOutbound" + +Write-Output "# HELP exchange_tls_negotiation_failures TLS negotiation failures" +Write-Output "# TYPE exchange_tls_negotiation_failures counter" +$tlsInboundFail = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\TLS Negotiations Failed" +$tlsOutboundFail = Get-SafeCounter "\MSExchangeTransport SmtpSend(_total)\TLS Negotiations Failed" +Write-Output "exchange_tls_negotiation_failures{direction=`"inbound`",hostname=`"$Hostname`"} $tlsInboundFail" +Write-Output "exchange_tls_negotiation_failures{direction=`"outbound`",hostname=`"$Hostname`"} $tlsOutboundFail" + +# ============================================================================ +# CLIENT ACCESS (OWA, EWS, ActiveSync, Outlook Anywhere) +# ============================================================================ +Write-Output "# HELP exchange_owa_requests_total OWA requests" +Write-Output "# TYPE exchange_owa_requests_total counter" +$owaRequests = Get-SafeCounter "\MSExchange OWA\Requests/sec" +$owaRequestsTotal = Get-SafeCounter "\MSExchange OWA\Total Requests" +Write-Output "exchange_owa_requests_total{hostname=`"$Hostname`"} $owaRequestsTotal" + +Write-Output "# HELP exchange_owa_requests_per_second OWA requests per second" +Write-Output "# TYPE exchange_owa_requests_per_second gauge" +Write-Output "exchange_owa_requests_per_second{hostname=`"$Hostname`"} $owaRequests" + +Write-Output "# HELP exchange_owa_current_users Current OWA users" +Write-Output "# TYPE exchange_owa_current_users gauge" +$owaUsers = Get-SafeCounter "\MSExchange OWA\Current Unique Users" +Write-Output "exchange_owa_current_users{hostname=`"$Hostname`"} $owaUsers" + +# EWS +Write-Output "# HELP exchange_ews_requests_per_second EWS requests per second" +Write-Output "# TYPE exchange_ews_requests_per_second gauge" +$ewsRequests = Get-SafeCounter "\MSExchangeWS\Requests/sec" +Write-Output "exchange_ews_requests_per_second{hostname=`"$Hostname`"} $ewsRequests" + +Write-Output "# HELP exchange_ews_current_connections Current EWS connections" +Write-Output "# TYPE exchange_ews_current_connections gauge" +$ewsConns = Get-SafeCounter "\MSExchangeWS\Current Connections" +Write-Output "exchange_ews_current_connections{hostname=`"$Hostname`"} $ewsConns" + +# ActiveSync +Write-Output "# HELP exchange_activesync_requests_per_second ActiveSync requests per second" +Write-Output "# TYPE exchange_activesync_requests_per_second gauge" +$easRequests = Get-SafeCounter "\MSExchange ActiveSync\Requests/sec" +Write-Output "exchange_activesync_requests_per_second{hostname=`"$Hostname`"} $easRequests" + +Write-Output "# HELP exchange_activesync_current_requests Current ActiveSync requests" +Write-Output "# TYPE exchange_activesync_current_requests gauge" +$easCurrent = Get-SafeCounter "\MSExchange ActiveSync\Current Requests" +Write-Output "exchange_activesync_current_requests{hostname=`"$Hostname`"} $easCurrent" + +Write-Output "# HELP exchange_activesync_sync_commands ActiveSync sync commands per second" +Write-Output "# TYPE exchange_activesync_sync_commands gauge" +$easSync = Get-SafeCounter "\MSExchange ActiveSync\Sync Commands/sec" +Write-Output "exchange_activesync_sync_commands{hostname=`"$Hostname`"} $easSync" + +# Outlook Anywhere (RPC over HTTP) +Write-Output "# HELP exchange_rpc_requests_per_second RPC requests per second" +Write-Output "# TYPE exchange_rpc_requests_per_second gauge" +$rpcRequests = Get-SafeCounter "\MSExchange RpcClientAccess\RPC Requests" +Write-Output "exchange_rpc_requests_per_second{hostname=`"$Hostname`"} $rpcRequests" + +Write-Output "# HELP exchange_rpc_active_users Active RPC users" +Write-Output "# TYPE exchange_rpc_active_users gauge" +$rpcUsers = Get-SafeCounter "\MSExchange RpcClientAccess\Active User Count" +Write-Output "exchange_rpc_active_users{hostname=`"$Hostname`"} $rpcUsers" + +Write-Output "# HELP exchange_rpc_connection_count RPC connection count" +Write-Output "# TYPE exchange_rpc_connection_count gauge" +$rpcConns = Get-SafeCounter "\MSExchange RpcClientAccess\Connection Count" +Write-Output "exchange_rpc_connection_count{hostname=`"$Hostname`"} $rpcConns" + +# Outlook MAPI/HTTP +Write-Output "# HELP exchange_mapi_requests_per_second MAPI over HTTP requests per second" +Write-Output "# TYPE exchange_mapi_requests_per_second gauge" +$mapiRequests = Get-SafeCounter "\MSExchange MapiHttp Emsmdb\Requests/sec" +Write-Output "exchange_mapi_requests_per_second{hostname=`"$Hostname`"} $mapiRequests" + +Write-Output "# HELP exchange_mapi_current_connections Current MAPI connections" +Write-Output "# TYPE exchange_mapi_current_connections gauge" +$mapiConns = Get-SafeCounter "\MSExchange MapiHttp Emsmdb\Current Unique Users" +Write-Output "exchange_mapi_current_connections{hostname=`"$Hostname`"} $mapiConns" + +# ============================================================================ +# POP3/IMAP +# ============================================================================ +Write-Output "# HELP exchange_pop3_connections Current POP3 connections" +Write-Output "# TYPE exchange_pop3_connections gauge" +$pop3Conns = Get-SafeCounter "\MSExchangePop3\Connections Current" +Write-Output "exchange_pop3_connections{hostname=`"$Hostname`"} $pop3Conns" + +Write-Output "# HELP exchange_pop3_connections_total Total POP3 connections" +Write-Output "# TYPE exchange_pop3_connections_total counter" +$pop3Total = Get-SafeCounter "\MSExchangePop3\Connections Total" +Write-Output "exchange_pop3_connections_total{hostname=`"$Hostname`"} $pop3Total" + +Write-Output "# HELP exchange_imap4_connections Current IMAP4 connections" +Write-Output "# TYPE exchange_imap4_connections gauge" +$imap4Conns = Get-SafeCounter "\MSExchangeImap4\Connections Current" +Write-Output "exchange_imap4_connections{hostname=`"$Hostname`"} $imap4Conns" + +Write-Output "# HELP exchange_imap4_connections_total Total IMAP4 connections" +Write-Output "# TYPE exchange_imap4_connections_total counter" +$imap4Total = Get-SafeCounter "\MSExchangeImap4\Connections Total" +Write-Output "exchange_imap4_connections_total{hostname=`"$Hostname`"} $imap4Total" + +# ============================================================================ +# STORE (Information Store) +# ============================================================================ +Write-Output "# HELP exchange_store_rpc_requests RPC requests to Information Store" +Write-Output "# TYPE exchange_store_rpc_requests gauge" +$storeRpc = Get-SafeCounter "\MSExchangeIS Store(_total)\RPC Requests" +Write-Output "exchange_store_rpc_requests{hostname=`"$Hostname`"} $storeRpc" + +Write-Output "# HELP exchange_store_rpc_latency_avg Average RPC latency (ms)" +Write-Output "# TYPE exchange_store_rpc_latency_avg gauge" +$storeLatency = Get-SafeCounter "\MSExchangeIS Store(_total)\RPC Average Latency" +Write-Output "exchange_store_rpc_latency_avg{hostname=`"$Hostname`"} $storeLatency" + +Write-Output "# HELP exchange_store_messages_queued Messages queued for submission" +Write-Output "# TYPE exchange_store_messages_queued gauge" +$storeQueued = Get-SafeCounter "\MSExchangeIS Store(_total)\Messages Queued For Submission" +Write-Output "exchange_store_messages_queued{hostname=`"$Hostname`"} $storeQueued" + +# ============================================================================ +# TRANSPORT DUMPSTER / SAFETY NET +# ============================================================================ +Write-Output "# HELP exchange_safetynet_messages Messages in Safety Net" +Write-Output "# TYPE exchange_safetynet_messages gauge" +$safetynetMsgs = Get-SafeCounter "\MSExchangeTransport Safety Net\Safety Net Total Messages" +Write-Output "exchange_safetynet_messages{hostname=`"$Hostname`"} $safetynetMsgs" + +Write-Output "# HELP exchange_safetynet_size_bytes Safety Net size in bytes" +Write-Output "# TYPE exchange_safetynet_size_bytes gauge" +$safetynetSize = Get-SafeCounter "\MSExchangeTransport Safety Net\Safety Net Total Size" +Write-Output "exchange_safetynet_size_bytes{hostname=`"$Hostname`"} $safetynetSize" + +# ============================================================================ +# DSN (DELIVERY STATUS NOTIFICATIONS) +# ============================================================================ +Write-Output "# HELP exchange_dsn_total DSN messages generated" +Write-Output "# TYPE exchange_dsn_total counter" +$dsnGenerated = Get-SafeCounter "\MSExchangeTransport DSN(_total)\Delivery Status Notifications Generated" +Write-Output "exchange_dsn_total{type=`"generated`",hostname=`"$Hostname`"} $dsnGenerated" + +$dsnFailure = Get-SafeCounter "\MSExchangeTransport DSN(_total)\Failure DSNs Total" +$dsnDelay = Get-SafeCounter "\MSExchangeTransport DSN(_total)\Delay DSNs Total" +$dsnRelayed = Get-SafeCounter "\MSExchangeTransport DSN(_total)\Relayed DSNs Total" +$dsnExpanded = Get-SafeCounter "\MSExchangeTransport DSN(_total)\Expanded DSNs Total" +Write-Output "exchange_dsn_total{type=`"failure`",hostname=`"$Hostname`"} $dsnFailure" +Write-Output "exchange_dsn_total{type=`"delay`",hostname=`"$Hostname`"} $dsnDelay" +Write-Output "exchange_dsn_total{type=`"relayed`",hostname=`"$Hostname`"} $dsnRelayed" +Write-Output "exchange_dsn_total{type=`"expanded`",hostname=`"$Hostname`"} $dsnExpanded" + +# ============================================================================ +# RESOURCE HEALTH +# ============================================================================ +Write-Output "# HELP exchange_resource_health Resource health status" +Write-Output "# TYPE exchange_resource_health gauge" + +# Back Pressure indicators +$backPressure = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Messages Submitted Recently" +Write-Output "exchange_resource_health{resource=`"back_pressure_messages`",hostname=`"$Hostname`"} $backPressure" + +# Database log generation +$dbLogGen = Get-SafeCounter "\MSExchange Database ==> Instances(_total)\Log Generation Checkpoint Depth" +Write-Output "exchange_resource_health{resource=`"log_checkpoint_depth`",hostname=`"$Hostname`"} $dbLogGen" + +# ============================================================================ +# SERVICE STATUS +# ============================================================================ +Write-Output "# HELP exchange_service_status Exchange service status (1=running, 0=stopped)" +Write-Output "# TYPE exchange_service_status gauge" + +$services = @( + 'MSExchangeTransport', + 'MSExchangeIS', + 'MSExchangeMailboxAssistants', + 'MSExchangeDelivery', + 'MSExchangeSubmission', + 'MSExchangeFrontEndTransport', + 'MSExchangeEdgeSync', + 'MSExchangeServiceHost', + 'MSExchangeRPC', + 'MSExchangeMailboxReplication', + 'MSExchangeADTopology', + 'MSExchangeAntispamUpdate', + 'MSExchangeThrottling', + 'MSExchangeHM', + 'MSExchangeDiagnostics', + 'MSExchangePop3', + 'MSExchangeImap4', + 'MSExchangeUM', + 'MSExchangeUMCR' +) + +foreach ($svc in $services) { + $service = Get-Service -Name $svc -ErrorAction SilentlyContinue + if ($service) { + $status = if ($service.Status -eq 'Running') { 1 } else { 0 } + Write-Output "exchange_service_status{service=`"$svc`",hostname=`"$Hostname`"} $status" + } +} + +# ============================================================================ +# SERVER HEALTH (Exchange 2013+) +# ============================================================================ +Write-Output "# HELP exchange_server_health Server component health (1=healthy, 0=unhealthy)" +Write-Output "# TYPE exchange_server_health gauge" +try { + $health = Get-ServerComponentState -Identity $Hostname -ErrorAction SilentlyContinue + foreach ($component in $health) { + $state = if ($component.State -eq 'Active') { 1 } else { 0 } + $compName = $component.Component -replace '\s+', '_' + Write-Output "exchange_server_health{component=`"$compName`",hostname=`"$Hostname`"} $state" + } +} catch {} + +# ============================================================================ +# HEALTH MANAGER MONITORS +# ============================================================================ +Write-Output "# HELP exchange_health_monitor Health monitor status (1=healthy, 0=unhealthy)" +Write-Output "# TYPE exchange_health_monitor gauge" +try { + $monitors = Get-ServerHealth -Identity $Hostname -ErrorAction SilentlyContinue | + Where-Object { $_.AlertValue -ne "Healthy" } | + Select-Object Name, AlertValue -Unique + + $unhealthyCount = ($monitors | Measure-Object).Count + Write-Output "exchange_health_monitor{status=`"unhealthy_count`",hostname=`"$Hostname`"} $unhealthyCount" +} catch { + Write-Output "exchange_health_monitor{status=`"unhealthy_count`",hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# DAG (DATABASE AVAILABILITY GROUP) STATUS +# ============================================================================ +Write-Output "# HELP exchange_dag_copy_status DAG database copy status (1=mounted/healthy, 0=failed)" +Write-Output "# TYPE exchange_dag_copy_status gauge" +try { + $dagStatus = Get-MailboxDatabaseCopyStatus -ErrorAction SilentlyContinue + foreach ($copy in $dagStatus) { + $dbName = $copy.DatabaseName -replace '\s+', '_' + $status = switch ($copy.Status) { + 'Mounted' { 1 } + 'Healthy' { 1 } + default { 0 } + } + Write-Output "exchange_dag_copy_status{database=`"$dbName`",server=`"$($copy.MailboxServer)`",status=`"$($copy.Status)`",hostname=`"$Hostname`"} $status" + + # Copy queue length + if ($copy.CopyQueueLength -ne $null) { + Write-Output "exchange_dag_copy_queue_length{database=`"$dbName`",server=`"$($copy.MailboxServer)`",hostname=`"$Hostname`"} $($copy.CopyQueueLength)" + } + + # Replay queue length + if ($copy.ReplayQueueLength -ne $null) { + Write-Output "exchange_dag_replay_queue_length{database=`"$dbName`",server=`"$($copy.MailboxServer)`",hostname=`"$Hostname`"} $($copy.ReplayQueueLength)" + } + } +} catch {} + +# ============================================================================ +# CERTIFICATE EXPIRY +# ============================================================================ +Write-Output "# HELP exchange_cert_expiry_seconds Seconds until certificate expires" +Write-Output "# TYPE exchange_cert_expiry_seconds gauge" +try { + $certs = Get-ExchangeCertificate -ErrorAction SilentlyContinue | Where-Object { $_.NotAfter -gt (Get-Date) } + foreach ($cert in $certs) { + $services = ($cert.Services -join '_') -replace ',', '_' + $thumbprint = $cert.Thumbprint.Substring(0, 8) + $expirySeconds = [math]::Round(($cert.NotAfter - (Get-Date)).TotalSeconds) + Write-Output "exchange_cert_expiry_seconds{thumbprint=`"$thumbprint`",services=`"$services`",hostname=`"$Hostname`"} $expirySeconds" + } +} catch {} + +# ============================================================================ +# TOP SENDERS/RECIPIENTS (Last 24 hours) +# ============================================================================ +Write-Output "# HELP exchange_top_senders_total Top senders by message count (24h)" +Write-Output "# TYPE exchange_top_senders_total counter" +try { + $topSenders = Get-MessageTrackingLog -Start (Get-Date).AddHours(-24) -EventId RECEIVE -ResultSize 10000 -ErrorAction SilentlyContinue | + Group-Object Sender | + Sort-Object Count -Descending | + Select-Object -First 20 + foreach ($sender in $topSenders) { + $senderAddr = $sender.Name -replace '"', '' + Write-Output "exchange_top_senders_total{sender=`"$senderAddr`",hostname=`"$Hostname`"} $($sender.Count)" + } +} catch {} + +Write-Output "# HELP exchange_top_recipients_total Top recipients by message count (24h)" +Write-Output "# TYPE exchange_top_recipients_total counter" +try { + $topRecipients = Get-MessageTrackingLog -Start (Get-Date).AddHours(-24) -EventId DELIVER -ResultSize 10000 -ErrorAction SilentlyContinue | + ForEach-Object { $_.Recipients } | + Group-Object | + Sort-Object Count -Descending | + Select-Object -First 20 + foreach ($recipient in $topRecipients) { + $recipientAddr = $recipient.Name -replace '"', '' + Write-Output "exchange_top_recipients_total{recipient=`"$recipientAddr`",hostname=`"$Hostname`"} $($recipient.Count)" + } +} catch {} + +# ============================================================================ +# MESSAGE SIZE STATISTICS +# ============================================================================ +Write-Output "# HELP exchange_message_size_bytes Message size statistics" +Write-Output "# TYPE exchange_message_size_bytes gauge" +try { + $messages = Get-MessageTrackingLog -Start (Get-Date).AddHours(-1) -EventId RECEIVE -ResultSize 1000 -ErrorAction SilentlyContinue + if ($messages -and $messages.Count -gt 0) { + $sizes = $messages | ForEach-Object { $_.TotalBytes } | Where-Object { $_ -gt 0 } + if ($sizes -and $sizes.Count -gt 0) { + $avgSize = [math]::Round(($sizes | Measure-Object -Average).Average) + $maxSize = ($sizes | Measure-Object -Maximum).Maximum + $totalSize = ($sizes | Measure-Object -Sum).Sum + Write-Output "exchange_message_size_bytes{stat=`"avg`",hostname=`"$Hostname`"} $avgSize" + Write-Output "exchange_message_size_bytes{stat=`"max`",hostname=`"$Hostname`"} $maxSize" + Write-Output "exchange_message_size_bytes{stat=`"total_1h`",hostname=`"$Hostname`"} $totalSize" + } + } +} catch {} + +# ============================================================================ +# EDGE TRANSPORT (if applicable) +# ============================================================================ +Write-Output "# HELP exchange_edge_sync_status EdgeSync status" +Write-Output "# TYPE exchange_edge_sync_status gauge" +try { + $edgeSync = Test-EdgeSynchronization -ErrorAction SilentlyContinue + if ($edgeSync) { + $syncStatus = if ($edgeSync.SyncStatus -eq 'Normal') { 1 } else { 0 } + Write-Output "exchange_edge_sync_status{hostname=`"$Hostname`"} $syncStatus" + } +} catch {} + +# ============================================================================ +# TRANSPORT AGENTS +# ============================================================================ +Write-Output "# HELP exchange_transport_agent_enabled Transport agent status (1=enabled, 0=disabled)" +Write-Output "# TYPE exchange_transport_agent_enabled gauge" +try { + $agents = Get-TransportAgent -ErrorAction SilentlyContinue + foreach ($agent in $agents) { + $agentName = $agent.Identity -replace '\s+', '_' -replace '[^\w_]', '' + $enabled = if ($agent.Enabled) { 1 } else { 0 } + Write-Output "exchange_transport_agent_enabled{agent=`"$agentName`",hostname=`"$Hostname`"} $enabled" + } +} catch {} + +# ============================================================================ +# PUBLIC FOLDER STATISTICS +# ============================================================================ +Write-Output "# HELP exchange_public_folder_count Number of public folders" +Write-Output "# TYPE exchange_public_folder_count gauge" +try { + $pfCount = (Get-PublicFolder -Recurse -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_public_folder_count{hostname=`"$Hostname`"} $pfCount" +} catch { + Write-Output "exchange_public_folder_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# OUTLOOK WEB APP ERRORS (from Event Log) +# ============================================================================ +Write-Output "# HELP exchange_owa_errors_total OWA errors from event log (24h)" +Write-Output "# TYPE exchange_owa_errors_total counter" +$owaErrors = Get-EventLogCount -LogName "Application" -Source "MSExchange OWA" -Hours 24 +Write-Output "exchange_owa_errors_total{hostname=`"$Hostname`"} $owaErrors" + +# ============================================================================ +# TRANSPORT ERRORS (from Event Log) +# ============================================================================ +Write-Output "# HELP exchange_transport_errors_total Transport errors from event log (24h)" +Write-Output "# TYPE exchange_transport_errors_total counter" +$transportErrors = Get-EventLogCount -LogName "Application" -Source "MSExchangeTransport" -Hours 24 +Write-Output "exchange_transport_errors_total{hostname=`"$Hostname`"} $transportErrors" + +# ============================================================================ +# INFORMATION STORE ERRORS +# ============================================================================ +Write-Output "# HELP exchange_store_errors_total Information Store errors from event log (24h)" +Write-Output "# TYPE exchange_store_errors_total counter" +$storeErrors = Get-EventLogCount -LogName "Application" -Source "MSExchangeIS" -Hours 24 +Write-Output "exchange_store_errors_total{hostname=`"$Hostname`"} $storeErrors" + +# ============================================================================ +# REPLICATION HEALTH +# ============================================================================ +Write-Output "# HELP exchange_replication_health Replication health check results" +Write-Output "# TYPE exchange_replication_health gauge" +try { + $replHealth = Test-ReplicationHealth -ErrorAction SilentlyContinue + foreach ($check in $replHealth) { + $checkName = $check.Check -replace '\s+', '_' + $result = if ($check.Result -eq 'Passed') { 1 } else { 0 } + Write-Output "exchange_replication_health{check=`"$checkName`",hostname=`"$Hostname`"} $result" + } +} catch {} + +# ============================================================================ +# MAIL FLOW TEST +# ============================================================================ +Write-Output "# HELP exchange_mailflow_test_latency_seconds Mail flow test latency" +Write-Output "# TYPE exchange_mailflow_test_latency_seconds gauge" +try { + $mailflow = Test-Mailflow -ErrorAction SilentlyContinue + if ($mailflow -and $mailflow.TestMailflowResult -eq 'Success') { + $latency = $mailflow.MessageLatencyTime.TotalSeconds + Write-Output "exchange_mailflow_test_latency_seconds{hostname=`"$Hostname`"} $([math]::Round($latency, 2))" + Write-Output "exchange_mailflow_test_success{hostname=`"$Hostname`"} 1" + } else { + Write-Output "exchange_mailflow_test_success{hostname=`"$Hostname`"} 0" + } +} catch { + Write-Output "exchange_mailflow_test_success{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# DOMAIN STATISTICS (Send/Receive by Domain) +# ============================================================================ +Write-Output "# HELP exchange_domain_messages_total Messages by remote domain (24h)" +Write-Output "# TYPE exchange_domain_messages_total counter" +try { + $domainStats = Get-MessageTrackingLog -Start (Get-Date).AddHours(-24) -EventId SEND -ResultSize 10000 -ErrorAction SilentlyContinue | + ForEach-Object { + $_.Recipients | ForEach-Object { + if ($_ -match '@(.+)$') { $matches[1] } + } + } | + Group-Object | + Sort-Object Count -Descending | + Select-Object -First 20 + foreach ($domain in $domainStats) { + if ($domain.Name) { + Write-Output "exchange_domain_messages_total{domain=`"$($domain.Name)`",direction=`"outbound`",hostname=`"$Hostname`"} $($domain.Count)" + } + } +} catch {} + +# ============================================================================ +# RETENTION POLICY TAGS (Compliance) +# ============================================================================ +Write-Output "# HELP exchange_retention_policy_count Number of retention policies" +Write-Output "# TYPE exchange_retention_policy_count gauge" +try { + $retPolicies = (Get-RetentionPolicy -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_retention_policy_count{hostname=`"$Hostname`"} $retPolicies" +} catch { + Write-Output "exchange_retention_policy_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# JOURNAL RULES +# ============================================================================ +Write-Output "# HELP exchange_journal_rule_count Number of journal rules" +Write-Output "# TYPE exchange_journal_rule_count gauge" +try { + $journalRules = (Get-JournalRule -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_journal_rule_count{hostname=`"$Hostname`"} $journalRules" +} catch { + Write-Output "exchange_journal_rule_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# TRANSPORT RULES +# ============================================================================ +Write-Output "# HELP exchange_transport_rule_count Number of transport rules" +Write-Output "# TYPE exchange_transport_rule_count gauge" +try { + $transportRules = (Get-TransportRule -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_transport_rule_count{hostname=`"$Hostname`"} $transportRules" +} catch { + Write-Output "exchange_transport_rule_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# ACCEPTED DOMAINS +# ============================================================================ +Write-Output "# HELP exchange_accepted_domain_count Number of accepted domains" +Write-Output "# TYPE exchange_accepted_domain_count gauge" +try { + $acceptedDomains = (Get-AcceptedDomain -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_accepted_domain_count{hostname=`"$Hostname`"} $acceptedDomains" +} catch { + Write-Output "exchange_accepted_domain_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# SEND/RECEIVE CONNECTORS +# ============================================================================ +Write-Output "# HELP exchange_send_connector_enabled Send connector status" +Write-Output "# TYPE exchange_send_connector_enabled gauge" +try { + $sendConnectors = Get-SendConnector -ErrorAction SilentlyContinue + foreach ($conn in $sendConnectors) { + $connName = $conn.Name -replace '\s+', '_' -replace '[^\w_]', '' + $enabled = if ($conn.Enabled) { 1 } else { 0 } + Write-Output "exchange_send_connector_enabled{connector=`"$connName`",hostname=`"$Hostname`"} $enabled" + } +} catch {} + +Write-Output "# HELP exchange_receive_connector_enabled Receive connector status" +Write-Output "# TYPE exchange_receive_connector_enabled gauge" +try { + $receiveConnectors = Get-ReceiveConnector -ErrorAction SilentlyContinue + foreach ($conn in $receiveConnectors) { + $connName = $conn.Name -replace '\s+', '_' -replace '[^\w_]', '' + $enabled = if ($conn.Enabled) { 1 } else { 0 } + Write-Output "exchange_receive_connector_enabled{connector=`"$connName`",hostname=`"$Hostname`"} $enabled" + } +} catch {} + +# ============================================================================ +# EXECUTION TIME +# ============================================================================ +# ============================================================================ +# AUTODISCOVER +# ============================================================================ +Write-Output "# HELP exchange_autodiscover_requests_per_second Autodiscover requests per second" +Write-Output "# TYPE exchange_autodiscover_requests_per_second gauge" +$autodiscoverReq = Get-SafeCounter "\MSExchange Autodiscover\Requests/sec" +Write-Output "exchange_autodiscover_requests_per_second{hostname=`"$Hostname`"} $autodiscoverReq" + +Write-Output "# HELP exchange_autodiscover_errors_total Autodiscover errors" +Write-Output "# TYPE exchange_autodiscover_errors_total counter" +$autodiscoverErrors = Get-SafeCounter "\MSExchange Autodiscover\Error Responses/sec" +Write-Output "exchange_autodiscover_errors_total{hostname=`"$Hostname`"} $autodiscoverErrors" + +# ============================================================================ +# ADDRESS BOOK SERVICE +# ============================================================================ +Write-Output "# HELP exchange_addressbook_requests_per_second Address Book requests per second" +Write-Output "# TYPE exchange_addressbook_requests_per_second gauge" +$abRequests = Get-SafeCounter "\MSExchange AddressBook(_total)\Referral RPC Requests/sec" +Write-Output "exchange_addressbook_requests_per_second{hostname=`"$Hostname`"} $abRequests" + +Write-Output "# HELP exchange_addressbook_latency_avg Average Address Book latency (ms)" +Write-Output "# TYPE exchange_addressbook_latency_avg gauge" +$abLatency = Get-SafeCounter "\MSExchange AddressBook(_total)\RPC Averaged Latency" +Write-Output "exchange_addressbook_latency_avg{hostname=`"$Hostname`"} $abLatency" + +# ============================================================================ +# SEARCH / CONTENT INDEXING +# ============================================================================ +Write-Output "# HELP exchange_search_mailboxes_left Mailboxes left to crawl" +Write-Output "# TYPE exchange_search_mailboxes_left gauge" +$searchLeft = Get-SafeCounter "\MSExchange Search Indexes(_total)\Mailboxes Left to Crawl" +Write-Output "exchange_search_mailboxes_left{hostname=`"$Hostname`"} $searchLeft" + +Write-Output "# HELP exchange_search_documents_indexed Documents indexed per second" +Write-Output "# TYPE exchange_search_documents_indexed gauge" +$searchDocs = Get-SafeCounter "\MSExchange Search Indexes(_total)\Average Document Indexing Time" +Write-Output "exchange_search_documents_indexed{hostname=`"$Hostname`"} $searchDocs" + +Write-Output "# HELP exchange_search_index_status Search index catalog status" +Write-Output "# TYPE exchange_search_index_status gauge" +try { + $catalogStatus = Get-MailboxDatabaseCopyStatus -ErrorAction SilentlyContinue | + Select-Object DatabaseName, ContentIndexState + foreach ($cat in $catalogStatus) { + $dbName = $cat.DatabaseName -replace '\s+', '_' + $healthy = if ($cat.ContentIndexState -eq 'Healthy') { 1 } else { 0 } + Write-Output "exchange_search_index_status{database=`"$dbName`",state=`"$($cat.ContentIndexState)`",hostname=`"$Hostname`"} $healthy" + } +} catch {} + +# ============================================================================ +# THROTTLING POLICY +# ============================================================================ +Write-Output "# HELP exchange_throttling_rejected_total Requests rejected by throttling" +Write-Output "# TYPE exchange_throttling_rejected_total counter" +$throttleRejected = Get-SafeCounter "\MSExchange Throttling\Requests Rejected due to Budget Usage" +Write-Output "exchange_throttling_rejected_total{hostname=`"$Hostname`"} $throttleRejected" + +Write-Output "# HELP exchange_throttling_delayed_total Requests delayed by throttling" +Write-Output "# TYPE exchange_throttling_delayed_total counter" +$throttleDelayed = Get-SafeCounter "\MSExchange Throttling\Requests Submitted to Delayed Execution" +Write-Output "exchange_throttling_delayed_total{hostname=`"$Hostname`"} $throttleDelayed" + +# ============================================================================ +# MAILBOX ASSISTANTS +# ============================================================================ +Write-Output "# HELP exchange_mailbox_assistant_events Events processed by mailbox assistants" +Write-Output "# TYPE exchange_mailbox_assistant_events gauge" +$assistantEvents = Get-SafeCounter "\MSExchange Assistants - Per Assistant(_total)\Events Polled" +Write-Output "exchange_mailbox_assistant_events{hostname=`"$Hostname`"} $assistantEvents" + +Write-Output "# HELP exchange_calendar_assistant_requests Calendar assistant requests" +Write-Output "# TYPE exchange_calendar_assistant_requests gauge" +$calendarAssist = Get-SafeCounter "\MSExchange Calendar Attendant\Requests" +Write-Output "exchange_calendar_assistant_requests{hostname=`"$Hostname`"} $calendarAssist" + +# ============================================================================ +# MAILBOX REPLICATION SERVICE (MRS) +# ============================================================================ +Write-Output "# HELP exchange_mrs_active_moves Active mailbox moves" +Write-Output "# TYPE exchange_mrs_active_moves gauge" +try { + $activeMoves = (Get-MoveRequest -MoveStatus InProgress -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_mrs_active_moves{hostname=`"$Hostname`"} $activeMoves" +} catch { + Write-Output "exchange_mrs_active_moves{hostname=`"$Hostname`"} 0" +} + +Write-Output "# HELP exchange_mrs_queued_moves Queued mailbox moves" +Write-Output "# TYPE exchange_mrs_queued_moves gauge" +try { + $queuedMoves = (Get-MoveRequest -MoveStatus Queued -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_mrs_queued_moves{hostname=`"$Hostname`"} $queuedMoves" +} catch { + Write-Output "exchange_mrs_queued_moves{hostname=`"$Hostname`"} 0" +} + +Write-Output "# HELP exchange_mrs_failed_moves Failed mailbox moves" +Write-Output "# TYPE exchange_mrs_failed_moves gauge" +try { + $failedMoves = (Get-MoveRequest -MoveStatus Failed -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_mrs_failed_moves{hostname=`"$Hostname`"} $failedMoves" +} catch { + Write-Output "exchange_mrs_failed_moves{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# TRANSPORT PIPELINE LATENCY +# ============================================================================ +Write-Output "# HELP exchange_transport_latency_seconds Transport component latency" +Write-Output "# TYPE exchange_transport_latency_seconds gauge" +$latencySmtpReceive = Get-SafeCounter "\MSExchangeTransport SmtpReceive(_total)\Average bytes/message" +$latencyCategor = Get-SafeCounter "\MSExchangeTransport Resolver(_total)\Ambiguous Recipients Rate" +$latencyDelivery = Get-SafeCounter "\MSExchangeTransport Delivery Failures\Average Delivery Attempts Per Message" +Write-Output "exchange_transport_latency_seconds{component=`"smtp_receive`",hostname=`"$Hostname`"} $latencySmtpReceive" +Write-Output "exchange_transport_latency_seconds{component=`"categorizer`",hostname=`"$Hostname`"} $latencyCategor" +Write-Output "exchange_transport_latency_seconds{component=`"delivery`",hostname=`"$Hostname`"} $latencyDelivery" + +# ============================================================================ +# PROCESS MEMORY/CPU +# ============================================================================ +Write-Output "# HELP exchange_process_memory_bytes Memory usage by Exchange processes" +Write-Output "# TYPE exchange_process_memory_bytes gauge" +Write-Output "# HELP exchange_process_cpu_percent CPU usage by Exchange processes" +Write-Output "# TYPE exchange_process_cpu_percent gauge" + +$exchangeProcesses = @( + 'EdgeTransport', + 'Microsoft.Exchange.Store.Worker', + 'MSExchangeTransport', + 'MSExchangeHMWorker', + 'MSExchangeMailboxAssistants', + 'MSExchangeDelivery', + 'MSExchangeSubmission', + 'w3wp' # IIS worker processes +) + +foreach ($procName in $exchangeProcesses) { + try { + $procs = Get-Process -Name $procName -ErrorAction SilentlyContinue + if ($procs) { + $totalMem = ($procs | Measure-Object -Property WorkingSet64 -Sum).Sum + $procNameClean = $procName -replace '\.', '_' + Write-Output "exchange_process_memory_bytes{process=`"$procNameClean`",hostname=`"$Hostname`"} $totalMem" + } + } catch {} +} + +# ============================================================================ +# ARCHIVE MAILBOX STATISTICS +# ============================================================================ +Write-Output "# HELP exchange_archive_mailbox_count Number of archive mailboxes" +Write-Output "# TYPE exchange_archive_mailbox_count gauge" +try { + $archiveCount = (Get-Mailbox -Archive -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_archive_mailbox_count{hostname=`"$Hostname`"} $archiveCount" +} catch { + Write-Output "exchange_archive_mailbox_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# LITIGATION HOLD +# ============================================================================ +Write-Output "# HELP exchange_litigation_hold_count Mailboxes on litigation hold" +Write-Output "# TYPE exchange_litigation_hold_count gauge" +try { + $litigationCount = (Get-Mailbox -ResultSize Unlimited -ErrorAction SilentlyContinue | + Where-Object { $_.LitigationHoldEnabled -eq $true } | Measure-Object).Count + Write-Output "exchange_litigation_hold_count{hostname=`"$Hostname`"} $litigationCount" +} catch { + Write-Output "exchange_litigation_hold_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# SHARED MAILBOXES +# ============================================================================ +Write-Output "# HELP exchange_shared_mailbox_count Number of shared mailboxes" +Write-Output "# TYPE exchange_shared_mailbox_count gauge" +try { + $sharedCount = (Get-Mailbox -RecipientTypeDetails SharedMailbox -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_shared_mailbox_count{hostname=`"$Hostname`"} $sharedCount" +} catch { + Write-Output "exchange_shared_mailbox_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# ROOM/RESOURCE MAILBOXES +# ============================================================================ +Write-Output "# HELP exchange_resource_mailbox_count Number of resource mailboxes" +Write-Output "# TYPE exchange_resource_mailbox_count gauge" +try { + $roomCount = (Get-Mailbox -RecipientTypeDetails RoomMailbox -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + $equipmentCount = (Get-Mailbox -RecipientTypeDetails EquipmentMailbox -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_resource_mailbox_count{type=`"room`",hostname=`"$Hostname`"} $roomCount" + Write-Output "exchange_resource_mailbox_count{type=`"equipment`",hostname=`"$Hostname`"} $equipmentCount" +} catch {} + +# ============================================================================ +# DISTRIBUTION GROUPS +# ============================================================================ +Write-Output "# HELP exchange_distribution_group_count Number of distribution groups" +Write-Output "# TYPE exchange_distribution_group_count gauge" +try { + $dgCount = (Get-DistributionGroup -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_distribution_group_count{hostname=`"$Hostname`"} $dgCount" +} catch { + Write-Output "exchange_distribution_group_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# DYNAMIC DISTRIBUTION GROUPS +# ============================================================================ +Write-Output "# HELP exchange_dynamic_distribution_group_count Number of dynamic distribution groups" +Write-Output "# TYPE exchange_dynamic_distribution_group_count gauge" +try { + $ddgCount = (Get-DynamicDistributionGroup -ResultSize Unlimited -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_dynamic_distribution_group_count{hostname=`"$Hostname`"} $ddgCount" +} catch { + Write-Output "exchange_dynamic_distribution_group_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# REMOTE DOMAINS +# ============================================================================ +Write-Output "# HELP exchange_remote_domain_count Number of remote domains configured" +Write-Output "# TYPE exchange_remote_domain_count gauge" +try { + $remoteDomains = (Get-RemoteDomain -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_remote_domain_count{hostname=`"$Hostname`"} $remoteDomains" +} catch { + Write-Output "exchange_remote_domain_count{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# UNIFIED MESSAGING (if enabled) +# ============================================================================ +Write-Output "# HELP exchange_um_calls_current Current UM calls" +Write-Output "# TYPE exchange_um_calls_current gauge" +$umCalls = Get-SafeCounter "\MSExchangeUMAvailability\Current Calls" +Write-Output "exchange_um_calls_current{hostname=`"$Hostname`"} $umCalls" + +Write-Output "# HELP exchange_um_calls_total Total UM calls" +Write-Output "# TYPE exchange_um_calls_total counter" +$umCallsTotal = Get-SafeCounter "\MSExchangeUMAvailability\Total Calls" +Write-Output "exchange_um_calls_total{hostname=`"$Hostname`"} $umCallsTotal" + +# ============================================================================ +# SUBMISSION QUEUE STATISTICS (detailed) +# ============================================================================ +Write-Output "# HELP exchange_submission_queue_items_expired Items expired from submission queue" +Write-Output "# TYPE exchange_submission_queue_items_expired counter" +$subExpired = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Items Expired from Submission Queue" +Write-Output "exchange_submission_queue_items_expired{hostname=`"$Hostname`"} $subExpired" + +Write-Output "# HELP exchange_submission_queue_items_submitted Items submitted to queue" +Write-Output "# TYPE exchange_submission_queue_items_submitted counter" +$subSubmitted = Get-SafeCounter "\MSExchangeTransport Queues(_total)\Items Submitted Total" +Write-Output "exchange_submission_queue_items_submitted{hostname=`"$Hostname`"} $subSubmitted" + +# ============================================================================ +# EXTERNAL RELAY STATISTICS +# ============================================================================ +Write-Output "# HELP exchange_external_relay_messages External relay message count" +Write-Output "# TYPE exchange_external_relay_messages counter" +try { + $externalRelay = Get-MessageTrackingLog -Start (Get-Date).AddHours(-24) -EventId SEND -ResultSize 5000 -ErrorAction SilentlyContinue | + Where-Object { $_.ConnectorId -notmatch 'Internal' } | + Measure-Object + Write-Output "exchange_external_relay_messages{hostname=`"$Hostname`"} $($externalRelay.Count)" +} catch { + Write-Output "exchange_external_relay_messages{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# SPAM/MALWARE QUARANTINE +# ============================================================================ +Write-Output "# HELP exchange_quarantine_messages Messages in quarantine" +Write-Output "# TYPE exchange_quarantine_messages gauge" +try { + $quarantineCount = (Get-QuarantineMessage -ErrorAction SilentlyContinue | Measure-Object).Count + Write-Output "exchange_quarantine_messages{hostname=`"$Hostname`"} $quarantineCount" +} catch { + Write-Output "exchange_quarantine_messages{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# HYBRID CONFIGURATION (for O365 hybrid) +# ============================================================================ +Write-Output "# HELP exchange_hybrid_configured Hybrid configuration status" +Write-Output "# TYPE exchange_hybrid_configured gauge" +try { + $hybrid = Get-HybridConfiguration -ErrorAction SilentlyContinue + $isHybrid = if ($hybrid) { 1 } else { 0 } + Write-Output "exchange_hybrid_configured{hostname=`"$Hostname`"} $isHybrid" +} catch { + Write-Output "exchange_hybrid_configured{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# OAUTH TOKENS (for modern auth) +# ============================================================================ +Write-Output "# HELP exchange_oauth_token_requests OAuth token requests" +Write-Output "# TYPE exchange_oauth_token_requests counter" +$oauthRequests = Get-SafeCounter "\MSExchange OAuth\Inbound: Token Requests" +Write-Output "exchange_oauth_token_requests{hostname=`"$Hostname`"} $oauthRequests" + +# ============================================================================ +# LAST FULL BACKUP +# ============================================================================ +Write-Output "# HELP exchange_database_last_backup_seconds Seconds since last full backup" +Write-Output "# TYPE exchange_database_last_backup_seconds gauge" +try { + $databases = Get-MailboxDatabase -Status -ErrorAction SilentlyContinue + foreach ($db in $databases) { + $dbName = $db.Name -replace '\s+', '_' + if ($db.LastFullBackup) { + $backupAge = [math]::Round(((Get-Date) - $db.LastFullBackup).TotalSeconds) + Write-Output "exchange_database_last_backup_seconds{database=`"$dbName`",hostname=`"$Hostname`"} $backupAge" + } else { + Write-Output "exchange_database_last_backup_seconds{database=`"$dbName`",hostname=`"$Hostname`"} -1" + } + } +} catch {} + +# ============================================================================ +# LOG GENERATION RATE +# ============================================================================ +Write-Output "# HELP exchange_log_generation_rate Log generation checkpoint depth" +Write-Output "# TYPE exchange_log_generation_rate gauge" +try { + $databases = Get-MailboxDatabase -Status -ErrorAction SilentlyContinue + foreach ($db in $databases) { + $dbName = $db.Name -replace '\s+', '_' + $counterPath = "\MSExchange Database ==> Instances($dbName)\Log Generation Checkpoint Depth" + $logDepth = Get-SafeCounter $counterPath + Write-Output "exchange_log_generation_rate{database=`"$dbName`",hostname=`"$Hostname`"} $logDepth" + } +} catch {} + +# ============================================================================ +# AVERAGE MAILBOX SIZE +# ============================================================================ +Write-Output "# HELP exchange_average_mailbox_size_bytes Average mailbox size" +Write-Output "# TYPE exchange_average_mailbox_size_bytes gauge" +try { + $stats = Get-MailboxStatistics -Server $Hostname -ErrorAction SilentlyContinue | + Where-Object { $_.TotalItemSize -ne $null } + if ($stats -and $stats.Count -gt 0) { + $avgSize = ($stats | ForEach-Object { $_.TotalItemSize.Value.ToBytes() } | Measure-Object -Average).Average + Write-Output "exchange_average_mailbox_size_bytes{hostname=`"$Hostname`"} $([math]::Round($avgSize))" + } +} catch { + Write-Output "exchange_average_mailbox_size_bytes{hostname=`"$Hostname`"} 0" +} + +# ============================================================================ +# LARGEST MAILBOXES (top 10) +# ============================================================================ +Write-Output "# HELP exchange_largest_mailbox_bytes Largest mailboxes by size" +Write-Output "# TYPE exchange_largest_mailbox_bytes gauge" +try { + $topMailboxes = Get-MailboxStatistics -Server $Hostname -ErrorAction SilentlyContinue | + Where-Object { $_.TotalItemSize -ne $null } | + Sort-Object TotalItemSize -Descending | + Select-Object -First 10 + foreach ($mbx in $topMailboxes) { + $mbxName = $mbx.DisplayName -replace '["\s]', '_' -replace '[^\w_]', '' + $size = $mbx.TotalItemSize.Value.ToBytes() + Write-Output "exchange_largest_mailbox_bytes{mailbox=`"$mbxName`",hostname=`"$Hostname`"} $size" + } +} catch {} + +# ============================================================================ +# EXECUTION TIME +# ============================================================================ +$EndTime = Get-Date +$Duration = ($EndTime - $StartTime).TotalSeconds + +Write-Output "# HELP exchange_collector_duration_seconds Time taken to collect metrics" +Write-Output "# TYPE exchange_collector_duration_seconds gauge" +Write-Output "exchange_collector_duration_seconds{hostname=`"$Hostname`"} $([math]::Round($Duration, 2))" + +Write-Output "# HELP exchange_collector_last_run_timestamp Unix timestamp of last collection" +Write-Output "# TYPE exchange_collector_last_run_timestamp gauge" +Write-Output "exchange_collector_last_run_timestamp{hostname=`"$Hostname`"} $([math]::Round((Get-Date -UFormat %s), 0))" diff --git a/expand-drive.sh b/expand-drive.sh new file mode 100755 index 0000000..f85c145 --- /dev/null +++ b/expand-drive.sh @@ -0,0 +1,319 @@ +#!/bin/bash + +############################################################# +#### Expand Drive #### +#### Auto-expand partitions and filesystems #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 2.3 #### +#### #### +#### Usage: sudo ./expand-drive.sh #### +############################################################# + +# Set strict error handling: +# -e: Exit immediately if a command exits with a non-zero status +# -u: Treat unset variables as an error when substituting +# -o pipefail: The return value of a pipeline is the status of the last command to exit with a non-zero status +set -euo pipefail + +# Constants - Define paths to required system binaries (use command names, let PATH resolve) +readonly BLKID_PATH="blkid" # Tool to locate/print block device attributes +readonly LSBLK_PATH="lsblk" # Tool to list block devices +readonly LOG_FILE="/var/log/expand_drive.log" # Location for script log output + +# Configuration - Runtime behavior settings +readonly DRY_RUN=${DRY_RUN:-false} # If true, show what would be done without making changes +readonly REQUIRED_COMMANDS=("growpart" "xfs_growfs" "resize2fs") # Commands that must be available +readonly SUPPORTED_FILESYSTEMS=("xfs" "ext2" "ext3" "ext4") # Filesystem types we can expand + +# Exit codes - Standardized exit status values +readonly EXIT_SUCCESS=0 # Script completed successfully +readonly EXIT_ERROR=1 # General error occurred +readonly EXIT_ROOT_REQUIRED=2 # Script must be run as root user +readonly EXIT_MISSING_DEPS=3 # Required dependencies are missing + +# Function to log messages with timestamp to both console and log file +log_message() { + echo "$(date): $1" | tee -a "$LOG_FILE" +} + +# Function to log error messages with timestamp to both console, log file, and stderr +log_error() { + echo "$(date): ERROR: $1" | tee -a "$LOG_FILE" >&2 +} + +# Function to check if a command exists in the system PATH +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Function to handle script interruption (SIGINT/SIGTERM) and perform cleanup +cleanup() { + # shellcheck disable=SC2317 # Suppress warning about unreachable code + log_message "Script interrupted, cleaning up..." + # shellcheck disable=SC2317 # Suppress warning about unreachable code + exit "$EXIT_ERROR" +} + +# Function to validate prerequisites before script execution +validate_prerequisites() { + # Check if script is run as root (required for partition/filesystem operations) + if [ "$(id -u)" -ne 0 ]; then + echo "Error: This script must be run as root" + exit "$EXIT_ROOT_REQUIRED" + fi + + # Ensure log directory exists and is writable + local log_dir + log_dir=$(dirname "$LOG_FILE") + if [ ! -d "$log_dir" ]; then + mkdir -p "$log_dir" || { + echo "Error: Cannot create log directory $log_dir" + exit "$EXIT_ERROR" + } + fi + + # Verify all required system commands are available + for cmd in "${REQUIRED_COMMANDS[@]}"; do + if ! command_exists "$cmd"; then + log_error "Required command '$cmd' not found. Please install it." + exit "$EXIT_MISSING_DEPS" + fi + done +} + +# Function to check if filesystem type is supported by this script +is_supported_filesystem() { + local fs_type="$1" + # Loop through supported filesystem types array + for supported in "${SUPPORTED_FILESYSTEMS[@]}"; do + if [[ "$fs_type" == "$supported" ]]; then + return 0 # Filesystem type is supported + fi + done + return 1 # Filesystem type is not supported +} + +# Function to expand filesystem based on type (XFS or EXT variants) +expand_filesystem() { + local partition="$1" # Block device path (e.g., /dev/sda1) + local fs_type="$2" # Filesystem type (xfs, ext2, ext3, ext4) + local mount_point="$3" # Where the filesystem is mounted + + # Validate filesystem type is one we support + if ! is_supported_filesystem "$fs_type"; then + log_error "Unsupported filesystem type $fs_type on $partition" + return 1 + fi + + # Handle different filesystem types with appropriate expansion commands + case $fs_type in + "xfs") + log_message "Expanding XFS filesystem on $partition" + if [ "$DRY_RUN" = "true" ]; then + log_message "DRY RUN: Would expand XFS filesystem on $partition" + return 0 + # XFS uses xfs_growfs and requires the mount point as argument + elif xfs_growfs "$mount_point" >/dev/null 2>&1; then + log_message "Successfully expanded XFS filesystem on $partition" + return 0 + else + log_error "Failed to expand XFS filesystem on $partition" + return 1 + fi + ;; + "ext2" | "ext3" | "ext4") + log_message "Expanding EXT filesystem on $partition" + if [ "$DRY_RUN" = "true" ]; then + log_message "DRY RUN: Would expand EXT filesystem on $partition" + return 0 + # EXT filesystems use resize2fs and require the device path as argument + elif resize2fs "$partition" >/dev/null 2>&1; then + log_message "Successfully expanded EXT filesystem on $partition" + return 0 + else + log_error "Failed to expand EXT filesystem on $partition" + return 1 + fi + ;; + esac +} + +# Function to expand partition to use available disk space +expand_partition() { + local disk="$1" # Parent disk device (e.g., /dev/sda) + local partition="$2" # Partition device (e.g., /dev/sda1) + local part_num="$3" # Partition number (e.g., 1) + + # Check if partition can be expanded using growpart dry-run + if ! growpart "$disk" "$part_num" --dry-run 2>/dev/null; then + log_message "Partition $partition doesn't need expansion or cannot be expanded, skipping..." + return 1 # Not an error, just nothing to do + fi + + # Perform the actual partition expansion + if [ "$DRY_RUN" = "true" ]; then + log_message "DRY RUN: Would expand partition $partition" + return 0 + elif growpart "$disk" "$part_num" >/dev/null 2>&1; then + log_message "Successfully expanded partition $partition" + return 0 + else + log_error "Failed to expand partition $partition" + return 1 + fi +} + +# Set up signal trap to handle interruptions gracefully +trap cleanup INT TERM + +# Initialize script by validating prerequisites +validate_prerequisites + +# Function to process a single partition (expand partition and filesystem) +process_partition() { + local partition="$1" # Partition device path (e.g., /dev/sda1) + local disk="$2" # Parent disk device path (e.g., /dev/sda) + + log_message "Processing partition $partition" + + # Check if the filesystem is currently mounted (required for filesystem expansion) + local mount_point + mount_point=$(findmnt -n -o TARGET "$partition" 2>/dev/null) + if [ -z "$mount_point" ]; then + log_message "Warning: $partition is not mounted, skipping filesystem resize" + return 0 + fi + + # Extract partition number from device path (e.g., extract "1" from "/dev/sda1") + local part_num + part_num=$(echo "$partition" | grep -o '[0-9]\+$' | tail -1) + if [ -z "$part_num" ]; then + log_error "Could not extract partition number from $partition" + return 1 + fi + + # First expand the partition to use available disk space + if ! expand_partition "$disk" "$partition" "$part_num"; then + return 0 # Not an error if partition doesn't need expansion + fi + + # Detect the filesystem type using blkid + local fs_type + fs_type=$($BLKID_PATH -s TYPE -o value "$partition") + if [ -z "$fs_type" ]; then + log_message "Warning: Could not detect filesystem type for $partition, skipping..." + return 0 + fi + + # Get current filesystem size before expansion + local current_size + current_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}') + log_message "Current filesystem size on $partition: $current_size" + + # Expand the filesystem to use the newly available partition space + expand_filesystem "$partition" "$fs_type" "$mount_point" + + # Show new size after expansion + local new_size + new_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}') + log_message "New filesystem size on $partition: $new_size" +} + +# Function to process a disk with direct filesystem (no partitions) +process_direct_filesystem() { + local disk="$1" # Disk device path (e.g., /dev/nvme3n1) + local mount_point="$2" # Where the filesystem is mounted + + log_message "Processing direct filesystem on $disk mounted at $mount_point" + + # Detect the filesystem type using blkid + local fs_type + fs_type=$($BLKID_PATH -s TYPE -o value "$disk") + if [ -z "$fs_type" ]; then + log_message "Warning: Could not detect filesystem type for $disk, skipping..." + return 0 + fi + + # Get current filesystem size before expansion + local current_size + current_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}') + log_message "Current filesystem size on $disk: $current_size" + + # Expand the filesystem to use the full disk space + expand_filesystem "$disk" "$fs_type" "$mount_point" + + # Show new size after expansion + local new_size + new_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}') + log_message "New filesystem size on $disk: $new_size" +} + +# Function to process all partitions on a single disk +process_disk() { + local disk="$1" # Disk device path (e.g., /dev/sda) + + log_message "Checking partitions on $disk..." + + # Get list of partitions for the current disk using lsblk + # Filter for partition type and extract device names + local partitions + local lsblk_output + lsblk_output=$($LSBLK_PATH -pln -o NAME,TYPE "$disk" 2>&1) || { + log_error "lsblk command failed for $disk: $lsblk_output" + return 1 + } + partitions=$(echo "$lsblk_output" | grep "part" | cut -d' ' -f1 || true) + + if [ -z "$partitions" ]; then + # Check if the disk itself has a filesystem (no partition table) + local mount_point + mount_point=$(findmnt -n -o TARGET "$disk" 2>/dev/null) + if [ -n "$mount_point" ]; then + log_message "No partitions found on $disk, but disk has direct filesystem. Processing disk directly..." + process_direct_filesystem "$disk" "$mount_point" + else + log_message "No partitions found on $disk, skipping..." + fi + return 0 + fi + + # Process each partition found on this disk + for partition in $partitions; do + process_partition "$partition" "$disk" + done +} + +# Main execution function - orchestrates the entire drive expansion process +main() { + log_message "Starting drive expansion process..." + + # Get list of all disk devices in the system using lsblk + # Filter for disk type and extract device names + local devices + devices=$($LSBLK_PATH -pln -o NAME,TYPE | grep "disk" | cut -d' ' -f1) + + # Verify we found at least one disk device + if [ -z "$devices" ]; then + log_error "No disk devices found" + exit "$EXIT_ERROR" + fi + + # Process each disk device found + for disk in $devices; do + # Verify device is actually a block device before processing + if [ ! -b "$disk" ]; then + log_error "Device $disk is not a block device, skipping..." + continue + fi + process_disk "$disk" + done + + log_message "Drive expansion completed" + exit "$EXIT_SUCCESS" +} + +# Execute the main function to start the script +main diff --git a/fail2ban-exporter.sh b/fail2ban-exporter.sh new file mode 100755 index 0000000..af46ed3 --- /dev/null +++ b/fail2ban-exporter.sh @@ -0,0 +1,914 @@ +#!/bin/bash +################################################################################ +# Script Name: fail2ban-exporter.sh +# Version: 2.0 +# Description: Prometheus exporter for fail2ban providing comprehensive metrics +# for monitoring jail status, ban/unban activity, and threat analysis +# +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +# +# Prerequisites: +# - fail2ban-client command available +# - fail2ban service running +# - journalctl (systemd) for historical data +# - netcat (nc) for HTTP mode +# - /var/log/fail2ban.log for timestamp parsing +# +# Usage: +# # Output to stdout +# ./fail2ban-exporter.sh +# +# # HTTP server mode +# ./fail2ban-exporter.sh --http -p 9191 +# +# # Textfile collector mode +# ./fail2ban-exporter.sh --textfile +# +# Metrics Exported: +# Core Metrics (v1.0): +# - fail2ban_up{} - Exporter status (1=up, 0=down) +# - fail2ban_server_info{version,exporter_version} - Server version info +# - fail2ban_jail_count{} - Total number of jails (gauge) +# - fail2ban_jail_enabled{jail} - Jail enabled status (gauge) +# - fail2ban_jail_failed_current{jail} - Currently failed attempts (gauge) +# - fail2ban_jail_banned_current{jail} - Currently banned IPs (gauge) +# - fail2ban_jail_failed_total{jail} - Total failed attempts (counter) +# - fail2ban_jail_banned_total{jail} - Total banned IPs (counter) +# - fail2ban_jail_ban_rate{jail} - Ban ratio: banned/failed (gauge) +# +# Enhanced Metrics (v2.0): +# - fail2ban_jail_last_ban_timestamp{jail} - Unix timestamp of last ban (gauge) +# - fail2ban_jail_last_unban_timestamp{jail} - Unix timestamp of last unban (gauge) +# - fail2ban_jail_bans_per_period{jail,period} - Bans in 1h/24h (gauge) +# - fail2ban_jail_unbans_per_period{jail,period} - Unbans in 1h/24h (gauge) +# - fail2ban_jail_unique_banned_ips{jail,period} - Unique IPs banned (gauge) +# - fail2ban_jail_info{jail,port,protocol,filter} - Jail configuration (gauge) +# - fail2ban_jail_top_attacker_count{jail,ip} - Top 5 attacking IPs (gauge) +# - fail2ban_jail_ban_rate_per_hour{jail} - Average bans/hour over 24h (gauge) +# - fail2ban_jail_repeat_offenders{jail,threshold} - Repeat offender count (7d) +# - fail2ban_jail_seconds_since_last_ban{jail} - Seconds since last ban +# - fail2ban_jail_seconds_since_last_unban{jail} - Seconds since last unban +# - fail2ban_log_size_bytes - Size of fail2ban.log file +# - fail2ban_log_age_seconds - Time since last log modification +# - fail2ban_log_rotation_timestamp - Last log rotation time +# - fail2ban_exporter_duration_seconds - Script execution time +# - fail2ban_exporter_last_run_timestamp - Last successful run time +# +# Configuration: +# Default HTTP port: 9191 +# Textfile directory: /var/lib/node_exporter +# Log source: /var/log/fail2ban.log +# +################################################################################ + +# ============================================================================ +# CONFIGURATION VARIABLES +# ============================================================================ + +TEXTFILE_DIR="/var/lib/node_exporter" +OUTPUT_FILE="" +HTTP_MODE=false +HTTP_PORT=9191 +FAIL2BAN_LOG="/var/log/fail2ban.log" + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +show_usage() { + cat <&2; exit 1 ;; + esac + done +} + +# Check if fail2ban is installed and running +# Returns: 0 if OK, 1 if error +check_fail2ban() { + if ! command -v fail2ban-client >/dev/null 2>&1; then + echo "ERROR: fail2ban-client not found" >&2 + return 1 + fi + + # Verify fail2ban server is responding + if ! fail2ban-client ping >/dev/null 2>&1; then + echo "ERROR: fail2ban server not responding" >&2 + return 1 + fi + + return 0 +} + +# Get list of all active fail2ban jails +# Returns: Space-separated list of jail names +get_jails() { + # Extract jail names from status output, convert comma-separated to space-separated + fail2ban-client status 2>/dev/null | grep "Jail list:" | sed 's/.*Jail list://' | tr -d '\t' | tr ',' '\n' | xargs +} + +# Get statistics for a specific jail +# Args: $1 - jail name +# Returns: Pipe-delimited string: currently_failed|currently_banned|total_failed|total_banned +get_jail_stats() { + local jail="$1" + local status_output + + status_output=$(fail2ban-client status "$jail" 2>/dev/null) + + local currently_failed currently_banned total_failed total_banned + + # Parse fail2ban-client output using awk to extract last field (the number) + currently_failed=$(echo "$status_output" | grep "Currently failed:" | awk '{print $NF}') + currently_banned=$(echo "$status_output" | grep "Currently banned:" | awk '{print $NF}') + total_failed=$(echo "$status_output" | grep "Total failed:" | awk '{print $NF}') + total_banned=$(echo "$status_output" | grep "Total banned:" | awk '{print $NF}') + + # Return pipe-delimited format with defaults to 0 if empty + echo "${currently_failed:-0}|${currently_banned:-0}|${total_failed:-0}|${total_banned:-0}" +} + +# Get list of currently banned IPs for a jail +# Args: $1 - jail name +# Returns: List of IPs, one per line +get_banned_ips() { + local jail="$1" + fail2ban-client status "$jail" 2>/dev/null | grep "Banned IP list:" | sed 's/.*Banned IP list://' | tr ' ' '\n' | grep -v '^$' +} + +# Get timestamp of last ban event for a jail +# Args: $1 - jail name +# Returns: Unix timestamp (seconds since epoch) or 0 if not found +get_last_ban_timestamp() { + local jail="$1" + local timestamp + # Extract date from log, convert to Unix timestamp + timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Ban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null) + echo "${timestamp:-0}" +} + +# Get timestamp of last unban event for a jail +# Args: $1 - jail name +# Returns: Unix timestamp (seconds since epoch) or 0 if not found +get_last_unban_timestamp() { + local jail="$1" + local timestamp + # Extract date from log, convert to Unix timestamp + timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Unban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null) + echo "${timestamp:-0}" +} + +# Count ban events within a time period +# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago") +# Returns: Number of ban events +get_ban_rate() { + local jail="$1" + local period="$2" + local count cutoff_timestamp + + # Convert period to Unix timestamp + cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0) + + # Try journalctl first (faster) + count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Ban " 2>/dev/null) + + # If journalctl returns 0, fall back to log file (more reliable) + if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then + count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" ' + /\['"$jail"'\] Ban / { + # Parse timestamp from log line + cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null" + cmd | getline ts + close(cmd) + if (ts >= cutoff) count++ + } + END { print count+0 } + ' "$FAIL2BAN_LOG" 2>/dev/null) + fi + + echo "${count:-0}" +} + +# Count unban events within a time period +# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago") +# Returns: Number of unban events +get_unban_rate() { + local jail="$1" + local period="$2" + local count cutoff_timestamp + + # Convert period to Unix timestamp + cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0) + + # Try journalctl first + count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Unban " 2>/dev/null) + + # Fall back to log file + if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then + count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" ' + /\['"$jail"'\] Unban / { + cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null" + cmd | getline ts + close(cmd) + if (ts >= cutoff) count++ + } + END { print count+0 } + ' "$FAIL2BAN_LOG" 2>/dev/null) + fi + + echo "${count:-0}" +} + +# Get top attacking IPs by ban count +# Args: $1 - jail name, $2 - limit (default: 5) +# Returns: Lines with "count IP" format, sorted by count descending +get_top_banned_ips() { + local jail="$1" + local limit="${2:-5}" + grep "\[$jail\] Ban " "$FAIL2BAN_LOG" 2>/dev/null | \ + grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \ + sort | uniq -c | sort -rn | head -n "$limit" +} + +# Count unique IPs banned in a time period +# Args: $1 - jail name, $2 - time period (e.g., "24 hours ago") +# Returns: Number of unique IPs +get_unique_banned_ips() { + local jail="$1" + local period="$2" + local count cutoff_timestamp + + # Convert period to Unix timestamp + cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0) + + # Try journalctl first + count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | \ + grep "\[$jail\] Ban " | \ + grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \ + sort -u | wc -l 2>/dev/null) + + # Fall back to log file if journalctl returns 0 + if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then + count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" ' + /\['"$jail"'\] Ban / { + # Extract IP + match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) + if (RSTART > 0) { + ip = substr($0, RSTART, RLENGTH) + # Parse timestamp + cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null" + cmd | getline ts + close(cmd) + if (ts >= cutoff && ip != "") ips[ip] = 1 + } + } + END { + count = 0 + for (ip in ips) count++ + print count + } + ' "$FAIL2BAN_LOG" 2>/dev/null) + fi + + echo "${count:-0}" +} + +get_ban_duration_stats() { + local jail="$1" + # Parse ban times and calculate average duration (placeholder - complex to implement) + # Returns: avg|min|max in seconds + echo "3600|1800|7200" # Placeholder: 1h avg, 30min min, 2h max +} + +get_jail_port() { + local jail="$1" + local port + # Extract port from jail config (simplified) + if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then + port=$(grep "^port" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}') + fi + if [ -z "$port" ] && [ -f "/etc/fail2ban/jail.local" ]; then + port=$(awk "/\[$jail\]/,/^\[/ {if(/^port/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1) + fi + echo "${port:-unknown}" +} + +# Detect protocol based on jail name +# Args: $1 - jail name +# Returns: Protocol (tcp/udp), defaults to tcp +get_jail_protocol() { + local jail="$1" + # Heuristic matching based on common service patterns + case "$jail" in + *ssh*|*sshd*) echo "tcp" ;; + *http*|*nginx*|*apache*) echo "tcp" ;; + *smtp*|*mail*) echo "tcp" ;; + *dns*) echo "udp" ;; + *) echo "tcp" ;; # Default to TCP for unknown services + esac +} + +get_jail_logpath() { + local jail="$1" + local logpath + if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then + logpath=$(grep "^logpath" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}') + fi + if [ -z "$logpath" ] && [ -f "/etc/fail2ban/jail.local" ]; then + logpath=$(awk "/\[$jail\]/,/^\[/ {if(/^logpath/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1) + fi + echo "${logpath:-/var/log/auth.log}" +} + +get_jail_filter() { + local jail="$1" + # Filter command doesn't work in fail2ban-client, extract from config + if [ -f "/etc/fail2ban/jail.d/$jail.local" ]; then + grep "^filter" "/etc/fail2ban/jail.d/$jail.local" 2>/dev/null | awk '{print $NF}' || echo "$jail" + else + echo "$jail" # Default to jail name + fi +} + +get_jail_enabled() { + local jail="$1" + # Check if jail is enabled in config + if fail2ban-client status "$jail" >/dev/null 2>&1; then + echo "1" + else + echo "0" + fi +} + +get_repeat_offender_count() { + local jail="$1" + local threshold="${2:-2}" # Default: 2+ bans = repeat offender + local count cutoff_timestamp + + # 7 days ago timestamp + cutoff_timestamp=$(date -d "7 days ago" +%s 2>/dev/null || echo 0) + + # Try journalctl first + count=$(journalctl -u fail2ban --since "7 days ago" 2>/dev/null | \ + grep "\[$jail\] Ban " | \ + grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \ + sort | uniq -c | \ + awk -v t="$threshold" '$1 >= t {count++} END {print count+0}') + + # Fall back to log file if journalctl returns 0 + if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then + count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" -v threshold="$threshold" ' + /\['"$jail"'\] Ban / { + # Extract IP + match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) + if (RSTART > 0) { + ip = substr($0, RSTART, RLENGTH) + # Parse timestamp + cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null" + cmd | getline ts + close(cmd) + if (ts >= cutoff && ip != "") ip_count[ip]++ + } + } + END { + repeat_count = 0 + for (ip in ip_count) { + if (ip_count[ip] >= threshold) repeat_count++ + } + print repeat_count + } + ' "$FAIL2BAN_LOG" 2>/dev/null) + fi + + echo "${count:-0}" +} + +get_log_size() { + [ -f "$FAIL2BAN_LOG" ] && stat -c %s "$FAIL2BAN_LOG" 2>/dev/null || echo "0" +} + +get_log_age() { + if [ -f "$FAIL2BAN_LOG" ]; then + echo $(($(date +%s) - $(stat -c %Y "$FAIL2BAN_LOG" 2>/dev/null || echo 0))) + else + echo "0" + fi +} + +get_log_rotation_timestamp() { + # Find most recent rotated log to determine last rotation time + local rotated_log + rotated_log=$(ls -t "${FAIL2BAN_LOG}".1 "${FAIL2BAN_LOG}"-*.gz 2>/dev/null | head -1) + if [ -n "$rotated_log" ]; then + stat -c %Y "$rotated_log" 2>/dev/null || echo "0" + else + echo "0" + fi +} + +# ============================================================================ +# METRIC GENERATION +# ============================================================================ + +# Generate all Prometheus metrics +# Returns: Prometheus text format metrics on stdout +generate_metrics() { + local script_start=$(date +%s) + + # Check fail2ban status first + if ! check_fail2ban; then + cat </dev/null; then +# Use awk for floating point arithmetic + ban_rate=$(awk "BEGIN {printf \"%.4f\", ${total_banned:-0} / ${total_failed}}" 2>/dev/null || echo "0") +else + ban_rate="0" + fi + + echo "fail2ban_jail_ban_rate{jail=\"$jail\"} $ban_rate" +done + + echo "" + + # ======================================================================== + # ENHANCED METRICS (v2.0) - Jail Health & Activity Tracking + # ======================================================================== + cat </dev/null; then + ban_rate=$(awk "BEGIN {printf \"%.2f\", $bans_24h / 24}" 2>/dev/null || echo "0") + else + ban_rate="0.00" + fi + + echo "fail2ban_jail_ban_rate_per_hour{jail=\"$jail\"} $ban_rate" + done + + echo "" + + # NEW METRICS - Repeat Offenders + cat <&2 + + if ! command -v nc >/dev/null 2>&1; then + echo "ERROR: netcat (nc) required for HTTP mode" >&2 + exit 1 + fi + + # Infinite loop accepting HTTP requests + while true; do + { + read -r request + # Check if request is for /metrics endpoint + if [[ "$request" =~ ^GET\ /metrics ]]; then + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r" + generate_metrics + else # Serve HTML landing page for other requests + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r" + cat < + +Fail2ban Exporter v2.0 + +

Fail2ban Prometheus Exporter (Enhanced v2.0)

+

Metrics

+

New Metrics

+
    +
  • Last ban/unban timestamps per jail
  • +
  • Ban/unban rates (1h, 24h)
  • +
  • Unique banned IPs per period
  • +
  • Top attackers per jail
  • +
  • Jail configuration info (port, protocol, filter)
  • +
  • Ban rate per hour
  • +
+ + +EOF + fi + } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null # -q 1: wait 1 second after EOF before closing + done +} + +# ============================================================================ +# MAIN EXECUTION +# ============================================================================ + +# Main entry point - routes to appropriate output mode +main() { + parse_args "$@" + + if [ "$HTTP_MODE" = true ]; then + # Run HTTP server (blocks until killed) + run_http_server + elif [ -n "$OUTPUT_FILE" ]; then + # Textfile collector mode: write atomically using temp file + local output_dir + output_dir="$(dirname "$OUTPUT_FILE")" + mkdir -p "$output_dir" + + # Create temp file in SAME directory for atomic rename (same filesystem) + local temp_file + temp_file=$(mktemp "${output_dir}/.fail2ban_metrics.XXXXXX") + + # Generate metrics to temp file + if ! generate_metrics > "$temp_file" 2>/dev/null; then + rm -f "$temp_file" + echo "ERROR: Failed to generate metrics" >&2 + exit 1 + fi + + # Validate: file must exist, have content, and contain fail2ban_up 1 + # If fail2ban is down, we still get fail2ban_up 0 which is valid + local file_lines + file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0) + + if [ "$file_lines" -lt 10 ]; then + rm -f "$temp_file" + echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2 + exit 1 + fi + + # Set permissions before move + chmod 644 "$temp_file" + + # Atomic rename - no gap where file is missing + mv -f "$temp_file" "$OUTPUT_FILE" + + echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2 + else + # Default: output to stdout + generate_metrics + fi +} + +# Execute main function with all script arguments +main "$@" diff --git a/gitlab-metrics-exporter.sh b/gitlab-metrics-exporter.sh new file mode 100755 index 0000000..5b47027 --- /dev/null +++ b/gitlab-metrics-exporter.sh @@ -0,0 +1,787 @@ +#!/bin/bash + +################################################ +#### GitLab Metrics Collector #### +#### for Prometheus node_exporter textfile #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### Version: 1.00-030426 #### +################################################ + +set -o pipefail + +SCRIPT_NAME=$(basename "$0") +readonly SCRIPT_NAME + +# Default configuration +readonly DEFAULT_NODE_DIR="/var/lib/node_exporter" +readonly DEFAULT_COLLECTION_INTERVAL=60 +readonly DEFAULT_MAX_PROJECTS=100 +readonly DEFAULT_CURL_TIMEOUT=30 +readonly DEFAULT_METRICS_URL="http://localhost/-/metrics" +readonly DEFAULT_SIDEKIQ_URL="http://localhost:8082/metrics" + +# Configuration variables (can be overridden by environment) +GITLAB_URL=${GITLAB_URL:-} +GITLAB_TOKEN=${GITLAB_TOKEN:-} +GITLAB_METRICS_URL=${GITLAB_METRICS_URL:-$DEFAULT_METRICS_URL} +GITLAB_SIDEKIQ_URL=${GITLAB_SIDEKIQ_URL:-$DEFAULT_SIDEKIQ_URL} +NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR} +COLLECTION_INTERVAL=${COLLECTION_INTERVAL:-$DEFAULT_COLLECTION_INTERVAL} +MAX_PROJECTS=${MAX_PROJECTS:-$DEFAULT_MAX_PROJECTS} +DEBUG=${DEBUG:-} + +# Runtime flags +RUN_MODE="once" +LOCAL_MODE=false + +# Error tracking +ERRORS_TOTAL=0 + +handle_error() { + local exit_code=$1 + local line_number=$2 + echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2 + exit "$exit_code" +} + +trap 'handle_error $? $LINENO' ERR + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +show_help() { + cat << EOF +Usage: $SCRIPT_NAME [OPTIONS] + +GitLab metrics collector for Prometheus node_exporter textfile directory. + +OPTIONS: + --once Run collection once and exit (default) + --daemon Run continuously at COLLECTION_INTERVAL + --local Scrape /-/metrics endpoint for server-side metrics (Puma, Sidekiq, Redis, DB) + --help, -h Show this help message + +ENVIRONMENT VARIABLES: + GITLAB_URL GitLab base URL (required, e.g. https://gitlab.example.com) + GITLAB_TOKEN GitLab private access token (required for API metrics) + GITLAB_METRICS_URL Local metrics endpoint URL (default: $DEFAULT_METRICS_URL) + GITLAB_SIDEKIQ_URL Sidekiq exporter endpoint URL (default: $DEFAULT_SIDEKIQ_URL) + NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR) + COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL) + MAX_PROJECTS Maximum number of projects to collect per-project metrics for (default: $DEFAULT_MAX_PROJECTS) + DEBUG Enable debug output + +EXAMPLES: + GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --once + GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --daemon + $SCRIPT_NAME --local --once + GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --local --daemon + DEBUG=1 GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME +EOF +} + +sanitize_label() { + local value="$1" + echo "${value//[^a-zA-Z0-9_]/_}" +} + +gitlab_api() { + local endpoint="$1" + local include_headers="${2:-false}" + + local url="${GITLAB_URL}${endpoint}" + debug_echo "API call: $url" + + if [[ "$include_headers" == "true" ]]; then + curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" \ + --header "PRIVATE-TOKEN: $GITLAB_TOKEN" \ + -D - \ + "$url" 2>/dev/null + else + curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" \ + --header "PRIVATE-TOKEN: $GITLAB_TOKEN" \ + "$url" 2>/dev/null + fi +} + +gitlab_api_paginated() { + local endpoint="$1" + local per_page="${2:-100}" + local max_pages="${3:-50}" + + local page=1 + local all_results="[]" + + while [[ $page -le $max_pages ]]; do + local separator="?" + if [[ "$endpoint" == *"?"* ]]; then + separator="&" + fi + + local response + response=$(gitlab_api "${endpoint}${separator}per_page=${per_page}&page=${page}" "true" 2>/dev/null) || break + + local headers body + headers=$(echo "$response" | sed '/^\r\{0,1\}$/q') + body=$(echo "$response" | sed '1,/^\r\{0,1\}$/d') + + if [[ -z "$body" ]] || ! echo "$body" | jq -e '.' >/dev/null 2>&1; then + break + fi + + local count + count=$(echo "$body" | jq 'length' 2>/dev/null) || break + if [[ "$count" -eq 0 ]]; then + break + fi + + all_results=$(echo "$all_results" "$body" | jq -s '.[0] + .[1]' 2>/dev/null) || break + + local next_page + next_page=$(echo "$headers" | grep -i '^x-next-page:' | tr -d '[:space:]' | cut -d: -f2) + if [[ -z "$next_page" ]]; then + break + fi + + page=$((page + 1)) + done + + echo "$all_results" +} + +collect_instance_health() { + local metrics="" + + debug_echo "Collecting instance health metrics" + + # Check if instance is reachable + local up=0 + if gitlab_api "/-/health" >/dev/null 2>&1; then + up=1 + fi + metrics+="# HELP gitlab_instance_up Whether the GitLab instance is reachable\n" + metrics+="# TYPE gitlab_instance_up gauge\n" + metrics+="gitlab_instance_up $up\n" + + # Version info + local version_json + if version_json=$(gitlab_api "/api/v4/version" 2>/dev/null); then + local version revision + version=$(echo "$version_json" | jq -r '.version // "unknown"' 2>/dev/null) + revision=$(echo "$version_json" | jq -r '.revision // "unknown"' 2>/dev/null) + metrics+="# HELP gitlab_instance_version_info GitLab version information\n" + metrics+="# TYPE gitlab_instance_version_info gauge\n" + metrics+="gitlab_instance_version_info{version=\"$version\",revision=\"$revision\"} 1\n" + debug_echo "GitLab version: $version ($revision)" + else + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to collect version info" + fi + + echo -e "$metrics" +} + +collect_project_statistics() { + local metrics="" + + debug_echo "Collecting project statistics" + + local projects + if ! projects=$(gitlab_api_paginated "/api/v4/projects?statistics=true" 100 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to collect project statistics" + return + fi + + local project_count + project_count=$(echo "$projects" | jq 'length' 2>/dev/null) || project_count=0 + debug_echo "Found $project_count projects" + + metrics+="# HELP gitlab_project_stars_count Number of stars for a project\n" + metrics+="# TYPE gitlab_project_stars_count gauge\n" + metrics+="# HELP gitlab_project_forks_count Number of forks for a project\n" + metrics+="# TYPE gitlab_project_forks_count gauge\n" + metrics+="# HELP gitlab_project_open_issues_count Number of open issues for a project\n" + metrics+="# TYPE gitlab_project_open_issues_count gauge\n" + metrics+="# HELP gitlab_project_commit_count Number of commits in default branch\n" + metrics+="# TYPE gitlab_project_commit_count gauge\n" + metrics+="# HELP gitlab_project_storage_size_bytes Total storage size in bytes\n" + metrics+="# TYPE gitlab_project_storage_size_bytes gauge\n" + metrics+="# HELP gitlab_project_repository_size_bytes Repository size in bytes\n" + metrics+="# TYPE gitlab_project_repository_size_bytes gauge\n" + metrics+="# HELP gitlab_project_lfs_objects_size_bytes LFS objects size in bytes\n" + metrics+="# TYPE gitlab_project_lfs_objects_size_bytes gauge\n" + metrics+="# HELP gitlab_project_job_artifacts_size_bytes Job artifacts size in bytes\n" + metrics+="# TYPE gitlab_project_job_artifacts_size_bytes gauge\n" + metrics+="# HELP gitlab_project_packages_size_bytes Packages size in bytes\n" + metrics+="# TYPE gitlab_project_packages_size_bytes gauge\n" + metrics+="# HELP gitlab_project_wiki_size_bytes Wiki size in bytes\n" + metrics+="# TYPE gitlab_project_wiki_size_bytes gauge\n" + metrics+="# HELP gitlab_project_snippets_size_bytes Snippets size in bytes\n" + metrics+="# TYPE gitlab_project_snippets_size_bytes gauge\n" + metrics+="# HELP gitlab_project_uploads_size_bytes Uploads size in bytes\n" + metrics+="# TYPE gitlab_project_uploads_size_bytes gauge\n" + + echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do + local name namespace + name=$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null) + namespace=$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null) + name=$(sanitize_label "$name") + namespace=$(sanitize_label "$namespace") + + local labels="project=\"$name\",namespace=\"$namespace\"" + + local stars forks issues + stars=$(echo "$project" | jq -r '.star_count // 0' 2>/dev/null) + forks=$(echo "$project" | jq -r '.forks_count // 0' 2>/dev/null) + issues=$(echo "$project" | jq -r '.open_issues_count // 0' 2>/dev/null) + + metrics+="gitlab_project_stars_count{$labels} $stars\n" + metrics+="gitlab_project_forks_count{$labels} $forks\n" + metrics+="gitlab_project_open_issues_count{$labels} $issues\n" + + local commit_count storage_size repo_size lfs_size artifacts_size packages_size wiki_size snippets_size uploads_size + commit_count=$(echo "$project" | jq -r '.statistics.commit_count // 0' 2>/dev/null) + storage_size=$(echo "$project" | jq -r '.statistics.storage_size // 0' 2>/dev/null) + repo_size=$(echo "$project" | jq -r '.statistics.repository_size // 0' 2>/dev/null) + lfs_size=$(echo "$project" | jq -r '.statistics.lfs_objects_size // 0' 2>/dev/null) + artifacts_size=$(echo "$project" | jq -r '.statistics.job_artifacts_size // 0' 2>/dev/null) + packages_size=$(echo "$project" | jq -r '.statistics.packages_size // 0' 2>/dev/null) + wiki_size=$(echo "$project" | jq -r '.statistics.wiki_size // 0' 2>/dev/null) + snippets_size=$(echo "$project" | jq -r '.statistics.snippets_size // 0' 2>/dev/null) + uploads_size=$(echo "$project" | jq -r '.statistics.uploads_size // 0' 2>/dev/null) + + metrics+="gitlab_project_commit_count{$labels} $commit_count\n" + metrics+="gitlab_project_storage_size_bytes{$labels} $storage_size\n" + metrics+="gitlab_project_repository_size_bytes{$labels} $repo_size\n" + metrics+="gitlab_project_lfs_objects_size_bytes{$labels} $lfs_size\n" + metrics+="gitlab_project_job_artifacts_size_bytes{$labels} $artifacts_size\n" + metrics+="gitlab_project_packages_size_bytes{$labels} $packages_size\n" + metrics+="gitlab_project_wiki_size_bytes{$labels} $wiki_size\n" + metrics+="gitlab_project_snippets_size_bytes{$labels} $snippets_size\n" + metrics+="gitlab_project_uploads_size_bytes{$labels} $uploads_size\n" + done + + echo -e "$metrics" +} + +collect_pipeline_metrics() { + local metrics="" + + debug_echo "Collecting pipeline metrics" + + local projects + if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to fetch projects for pipeline metrics" + return + fi + + metrics+="# HELP gitlab_pipeline_status Count of pipelines by status\n" + metrics+="# TYPE gitlab_pipeline_status gauge\n" + metrics+="# HELP gitlab_pipeline_duration_seconds Duration of the latest pipeline\n" + metrics+="# TYPE gitlab_pipeline_duration_seconds gauge\n" + + echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do + local project_id name namespace + project_id=$(echo "$project" | jq -r '.id' 2>/dev/null) + name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)") + namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)") + + local pipelines + if ! pipelines=$(gitlab_api "/api/v4/projects/${project_id}/pipelines?per_page=20" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to fetch pipelines for project $project_id" + continue + fi + + # Count pipelines per status + local statuses + statuses=$(echo "$pipelines" | jq -r '.[].status // empty' 2>/dev/null | sort | uniq -c | awk '{print $2 " " $1}') + while IFS= read -r line; do + if [[ -n "$line" ]]; then + local status count + status=$(echo "$line" | awk '{print $1}') + count=$(echo "$line" | awk '{print $2}') + metrics+="gitlab_pipeline_status{project=\"$name\",namespace=\"$namespace\",status=\"$status\"} $count\n" + fi + done <<< "$statuses" + + # Latest pipeline duration + local duration + duration=$(echo "$pipelines" | jq -r '.[0].duration // empty' 2>/dev/null) + if [[ -n "$duration" && "$duration" != "null" ]]; then + metrics+="gitlab_pipeline_duration_seconds{project=\"$name\",namespace=\"$namespace\"} $duration\n" + fi + done + + echo -e "$metrics" +} + +collect_runner_metrics() { + local metrics="" + + debug_echo "Collecting runner metrics" + + local runners + if ! runners=$(gitlab_api_paginated "/api/v4/runners" 100 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to collect runner metrics" + return + fi + + local total online_total + total=$(echo "$runners" | jq 'length' 2>/dev/null) || total=0 + online_total=$(echo "$runners" | jq '[.[] | select(.status == "online")] | length' 2>/dev/null) || online_total=0 + + metrics+="# HELP gitlab_runner_active Whether a runner is active\n" + metrics+="# TYPE gitlab_runner_active gauge\n" + metrics+="# HELP gitlab_runner_online Whether a runner is online\n" + metrics+="# TYPE gitlab_runner_online gauge\n" + metrics+="# HELP gitlab_runners_total Total number of runners\n" + metrics+="# TYPE gitlab_runners_total gauge\n" + metrics+="# HELP gitlab_runners_online_total Total number of online runners\n" + metrics+="# TYPE gitlab_runners_online_total gauge\n" + + echo "$runners" | jq -c '.[]' 2>/dev/null | while IFS= read -r runner; do + local runner_name runner_type active status + runner_name=$(sanitize_label "$(echo "$runner" | jq -r '.description // "unknown"' 2>/dev/null)") + runner_type=$(echo "$runner" | jq -r '.runner_type // "unknown"' 2>/dev/null) + active=$(echo "$runner" | jq -r '.active // false' 2>/dev/null) + status=$(echo "$runner" | jq -r '.status // "unknown"' 2>/dev/null) + + local active_val=0 + if [[ "$active" == "true" ]]; then + active_val=1 + fi + + local online_val=0 + if [[ "$status" == "online" ]]; then + online_val=1 + fi + + metrics+="gitlab_runner_active{runner_name=\"$runner_name\",runner_type=\"$runner_type\"} $active_val\n" + metrics+="gitlab_runner_online{runner_name=\"$runner_name\",runner_type=\"$runner_type\"} $online_val\n" + done + + metrics+="gitlab_runners_total $total\n" + metrics+="gitlab_runners_online_total $online_total\n" + + echo -e "$metrics" +} + +collect_user_metrics() { + local metrics="" + + debug_echo "Collecting user metrics" + + local response + if ! response=$(gitlab_api "/api/v4/users?per_page=1" "true" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to collect user metrics" + return + fi + + local total + total=$(echo "$response" | grep -i '^x-total:' | tr -d '[:space:]' | cut -d: -f2) + + if [[ -n "$total" ]]; then + metrics+="# HELP gitlab_users_total Total number of GitLab users\n" + metrics+="# TYPE gitlab_users_total gauge\n" + metrics+="gitlab_users_total $total\n" + debug_echo "Total users: $total" + else + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to parse user count from X-Total header" + fi + + echo -e "$metrics" +} + +collect_group_metrics() { + local metrics="" + + debug_echo "Collecting group metrics" + + local groups + if ! groups=$(gitlab_api_paginated "/api/v4/groups" 100 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to collect group metrics" + return + fi + + local total + total=$(echo "$groups" | jq 'length' 2>/dev/null) || total=0 + + metrics+="# HELP gitlab_groups_total Total number of GitLab groups\n" + metrics+="# TYPE gitlab_groups_total gauge\n" + metrics+="gitlab_groups_total $total\n" + debug_echo "Total groups: $total" + + echo -e "$metrics" +} + +collect_job_metrics() { + local metrics="" + + debug_echo "Collecting job metrics" + + local projects + if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to fetch projects for job metrics" + return + fi + + metrics+="# HELP gitlab_jobs_by_status Count of jobs by status\n" + metrics+="# TYPE gitlab_jobs_by_status gauge\n" + + echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do + local project_id name namespace + project_id=$(echo "$project" | jq -r '.id' 2>/dev/null) + name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)") + namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)") + + local jobs + if ! jobs=$(gitlab_api "/api/v4/projects/${project_id}/jobs?per_page=20" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to fetch jobs for project $project_id" + continue + fi + + local statuses + statuses=$(echo "$jobs" | jq -r '.[].status // empty' 2>/dev/null | sort | uniq -c | awk '{print $2 " " $1}') + while IFS= read -r line; do + if [[ -n "$line" ]]; then + local status count + status=$(echo "$line" | awk '{print $1}') + count=$(echo "$line" | awk '{print $2}') + metrics+="gitlab_jobs_by_status{project=\"$name\",namespace=\"$namespace\",status=\"$status\"} $count\n" + fi + done <<< "$statuses" + done + + echo -e "$metrics" +} + +collect_merge_request_metrics() { + local metrics="" + + debug_echo "Collecting merge request metrics" + + local projects + if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to fetch projects for merge request metrics" + return + fi + + metrics+="# HELP gitlab_open_merge_requests Number of open merge requests\n" + metrics+="# TYPE gitlab_open_merge_requests gauge\n" + + echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do + local project_id name namespace + project_id=$(echo "$project" | jq -r '.id' 2>/dev/null) + name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)") + namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)") + + local mrs + if ! mrs=$(gitlab_api "/api/v4/projects/${project_id}/merge_requests?state=opened&per_page=100" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to fetch merge requests for project $project_id" + continue + fi + + local count + count=$(echo "$mrs" | jq 'length' 2>/dev/null) || count=0 + metrics+="gitlab_open_merge_requests{project=\"$name\",namespace=\"$namespace\"} $count\n" + done + + echo -e "$metrics" +} + +collect_environment_metrics() { + local metrics="" + + debug_echo "Collecting environment metrics" + + local projects + if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to fetch projects for environment metrics" + return + fi + + metrics+="# HELP gitlab_environments_total Number of environments per project\n" + metrics+="# TYPE gitlab_environments_total gauge\n" + + echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do + local project_id name namespace + project_id=$(echo "$project" | jq -r '.id' 2>/dev/null) + name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)") + namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)") + + local envs + if ! envs=$(gitlab_api "/api/v4/projects/${project_id}/environments?per_page=100" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + debug_echo "Failed to fetch environments for project $project_id" + continue + fi + + local count + count=$(echo "$envs" | jq 'length' 2>/dev/null) || count=0 + metrics+="gitlab_environments_total{project=\"$name\",namespace=\"$namespace\"} $count\n" + done + + echo -e "$metrics" +} + +collect_local_metrics() { + local metrics="" + + debug_echo "Scraping local metrics from $GITLAB_METRICS_URL" + + local raw_metrics + if ! raw_metrics=$(curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" "$GITLAB_METRICS_URL" 2>/dev/null); then + ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) + echo "Error: Failed to scrape $GITLAB_METRICS_URL" >&2 + echo "Ensure this host's IP is in gitlab_rails['monitoring_whitelist']" >&2 + return + fi + + # GitLab version info + local version_patterns="^gitlab_version_info[{ ]" + local version_help="^# (HELP|TYPE) gitlab_version_info" + metrics+=$(echo "$raw_metrics" | grep -E "$version_help|$version_patterns" 2>/dev/null) + metrics+=$'\n' + + # Puma metrics + local puma_patterns="^puma_workers[{ ]|^puma_running_workers[{ ]|^puma_running[{ ]|^puma_queued_connections[{ ]|^puma_active_connections[{ ]|^puma_pool_capacity[{ ]|^puma_max_threads[{ ]|^puma_idle_threads[{ ]" + local puma_help="^# (HELP|TYPE) puma_" + metrics+=$(echo "$raw_metrics" | grep -E "$puma_help|$puma_patterns" 2>/dev/null) + metrics+=$'\n' + + # Sidekiq metrics (served by separate Sidekiq exporter, default localhost:8082) + local sidekiq_raw + if sidekiq_raw=$(curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" "$GITLAB_SIDEKIQ_URL" 2>/dev/null); then + debug_echo "Scraped Sidekiq metrics from $GITLAB_SIDEKIQ_URL" + + # Core Sidekiq job metrics + local sidekiq_patterns="^sidekiq_running_jobs[{ ]|^sidekiq_concurrency[{ ]|^sidekiq_mem_total_bytes[{ ]|^sidekiq_jobs_failed_total[{ ]|^sidekiq_jobs_dead_total[{ ]|^sidekiq_enqueued_jobs_total[{ ]|^sidekiq_jobs_completion_seconds[_{ ]|^sidekiq_jobs_queue_duration_seconds[_{ ]|^sidekiq_jobs_cpu_seconds[_{ ]|^sidekiq_jobs_db_seconds[_{ ]|^sidekiq_jobs_gitaly_seconds[_{ ]|^sidekiq_redis_requests_total[{ ]|^sidekiq_redis_requests_duration_seconds[_{ ]" + local sidekiq_help="^# (HELP|TYPE) sidekiq_(running_jobs|concurrency|mem_total_bytes|jobs_failed_total|jobs_dead_total|enqueued_jobs_total|jobs_completion_seconds|jobs_queue_duration_seconds|jobs_cpu_seconds|jobs_db_seconds|jobs_gitaly_seconds|redis_requests_total|redis_requests_duration_seconds)" + metrics+=$(echo "$sidekiq_raw" | grep -E "$sidekiq_help|$sidekiq_patterns" 2>/dev/null) + metrics+=$'\n' + + # CI/CD pipeline internals + local ci_patterns="^pipelines_created_total[{ ]|^deployments[{ ]|^gitlab_ci_pipeline_creation_duration_seconds[_{ ]|^gitlab_ci_pipeline_failure_reasons[{ ]|^gitlab_ci_active_jobs[_{ ]" + local ci_help="^# (HELP|TYPE) (pipelines_created_total|deployments|gitlab_ci_pipeline_creation_duration_seconds|gitlab_ci_pipeline_failure_reasons|gitlab_ci_active_jobs)" + metrics+=$(echo "$sidekiq_raw" | grep -E "$ci_help|$ci_patterns" 2>/dev/null) + metrics+=$'\n' + + # Email delivery metrics + local email_patterns="^gitlab_emails_delivered_total[{ ]|^gitlab_emails_delivery_attempts_total[{ ]" + local email_help="^# (HELP|TYPE) gitlab_emails_(delivered_total|delivery_attempts_total)" + metrics+=$(echo "$sidekiq_raw" | grep -E "$email_help|$email_patterns" 2>/dev/null) + metrics+=$'\n' + + # External HTTP (webhooks, integrations) + local ext_http_patterns="^gitlab_external_http_total[{ ]|^gitlab_external_http_duration_seconds[_{ ]" + local ext_http_help="^# (HELP|TYPE) gitlab_external_http_(total|duration_seconds)" + metrics+=$(echo "$sidekiq_raw" | grep -E "$ext_http_help|$ext_http_patterns" 2>/dev/null) + metrics+=$'\n' + + # Sidekiq SLI apdex/errors + local sli_patterns="^gitlab_sli_sidekiq_execution_apdex_success_total[{ ]|^gitlab_sli_sidekiq_execution_apdex_total[{ ]|^gitlab_sli_sidekiq_execution_error_total[{ ]|^gitlab_sli_sidekiq_execution_total[{ ]" + local sli_help="^# (HELP|TYPE) gitlab_sli_sidekiq_execution" + metrics+=$(echo "$sidekiq_raw" | grep -E "$sli_help|$sli_patterns" 2>/dev/null) + metrics+=$'\n' + + # DB transaction duration, primary SQL, threads, cache, workers + local extra_patterns="^gitlab_database_transaction_seconds[_{ ]|^gitlab_sql_primary_duration_seconds[_{ ]|^gitlab_ruby_threads_running_threads[{ ]|^gitlab_ruby_threads_max_expected_threads[{ ]|^limited_capacity_worker_running_jobs[{ ]|^limited_capacity_worker_max_running_jobs[{ ]|^limited_capacity_worker_remaining_work_count[{ ]|^redis_hit_miss_operations_total[{ ]" + local extra_help="^# (HELP|TYPE) (gitlab_database_transaction_seconds|gitlab_sql_primary_duration_seconds|gitlab_ruby_threads_running_threads|gitlab_ruby_threads_max_expected_threads|limited_capacity_worker_running_jobs|limited_capacity_worker_max_running_jobs|limited_capacity_worker_remaining_work_count|redis_hit_miss_operations_total)" + metrics+=$(echo "$sidekiq_raw" | grep -E "$extra_help|$extra_patterns" 2>/dev/null) + metrics+=$'\n' + else + debug_echo "Warning: Could not scrape Sidekiq exporter at $GITLAB_SIDEKIQ_URL (is sidekiq_exporter enabled?)" + fi + + # Redis metrics + local redis_patterns="^gitlab_redis_client_requests_total[{ ]|^gitlab_redis_client_exceptions_total[{ ]|^gitlab_redis_client_requests_duration_seconds[_{ ]|^gitlab_redis_client_requests_duration_seconds_sum[{ ]|^gitlab_redis_client_requests_duration_seconds_count[{ ]" + local redis_help="^# (HELP|TYPE) gitlab_redis_client_(requests_total|exceptions_total|requests_duration_seconds)" + metrics+=$(echo "$raw_metrics" | grep -E "$redis_help|$redis_patterns" 2>/dev/null) + metrics+=$'\n' + + # Database connection pool metrics + local db_patterns="^gitlab_database_connection_pool_" + local db_help="^# (HELP|TYPE) gitlab_database_connection_pool_" + metrics+=$(echo "$raw_metrics" | grep -E "$db_help|$db_patterns" 2>/dev/null) + metrics+=$'\n' + + # Process metrics (CPU, memory, file descriptors) + local process_patterns="^ruby_process_resident_memory_bytes[{ ]|^ruby_process_cpu_seconds_total[{ ]|^process_open_fds[{ ]|^process_max_fds[{ ]|^ruby_gc_stat_heap_live_slots[{ ]|^ruby_gc_stat_heap_free_slots[{ ]" + local process_help="^# (HELP|TYPE) (ruby_process_resident_memory_bytes|ruby_process_cpu_seconds_total|process_open_fds|process_max_fds|ruby_gc_stat_heap_live_slots|ruby_gc_stat_heap_free_slots)" + metrics+=$(echo "$raw_metrics" | grep -E "$process_help|$process_patterns" 2>/dev/null) + metrics+=$'\n' + + # GitLab transaction/request metrics + local txn_patterns="^gitlab_transaction_duration_seconds[{ _]|^gitlab_sql_duration_seconds[{ _]|^gitlab_cache_operation_duration_seconds[{ _]" + local txn_help="^# (HELP|TYPE) (gitlab_transaction_duration_seconds|gitlab_sql_duration_seconds|gitlab_cache_operation_duration_seconds)" + metrics+=$(echo "$raw_metrics" | grep -E "$txn_help|$txn_patterns" 2>/dev/null) + metrics+=$'\n' + + # User session and ActionCable metrics + local session_patterns="^user_session_logins_total[{ ]|^action_cable_active_connections[{ ]|^action_cable_pool_current_size[{ ]" + local session_help="^# (HELP|TYPE) (user_session_logins_total|action_cable_active_connections|action_cable_pool_current_size)" + metrics+=$(echo "$raw_metrics" | grep -E "$session_help|$session_patterns" 2>/dev/null) + metrics+=$'\n' + + local metric_count + metric_count=$(echo "$metrics" | grep -cv '^#\|^$' 2>/dev/null) || metric_count=0 + debug_echo "Extracted $metric_count local metrics" + + printf '%s\n' "$metrics" +} + +run_collection() { + local start_time + start_time=$(date +%s) + ERRORS_TOTAL=0 + + debug_echo "Starting metrics collection" + + local all_metrics="" + + # API-based metrics (require GITLAB_URL and GITLAB_TOKEN) + if [[ -n "$GITLAB_URL" && -n "$GITLAB_TOKEN" ]]; then + all_metrics+="$(collect_instance_health)"$'\n' + all_metrics+="$(collect_project_statistics)"$'\n' + all_metrics+="$(collect_pipeline_metrics)"$'\n' + all_metrics+="$(collect_runner_metrics)"$'\n' + all_metrics+="$(collect_user_metrics)"$'\n' + all_metrics+="$(collect_group_metrics)"$'\n' + all_metrics+="$(collect_job_metrics)"$'\n' + all_metrics+="$(collect_merge_request_metrics)"$'\n' + all_metrics+="$(collect_environment_metrics)"$'\n' + fi + + # Local server-side metrics (scraped from /-/metrics) + if [[ "$LOCAL_MODE" == true ]]; then + all_metrics+="$(collect_local_metrics)"$'\n' + fi + + local end_time duration + end_time=$(date +%s) + duration=$((end_time - start_time)) + + all_metrics+="# HELP gitlab_collector_duration_seconds Time taken to collect all metrics\n" + all_metrics+="# TYPE gitlab_collector_duration_seconds gauge\n" + all_metrics+="gitlab_collector_duration_seconds $duration\n" + all_metrics+="# HELP gitlab_collector_last_run_timestamp Unix timestamp of last collection run\n" + all_metrics+="# TYPE gitlab_collector_last_run_timestamp gauge\n" + all_metrics+="gitlab_collector_last_run_timestamp $end_time\n" + all_metrics+="# HELP gitlab_collector_errors_total Number of errors during collection\n" + all_metrics+="# TYPE gitlab_collector_errors_total gauge\n" + all_metrics+="gitlab_collector_errors_total $ERRORS_TOTAL\n" + + # Write atomically + mkdir -p "$NODE_DIR" + local tmp_file + tmp_file=$(mktemp "${NODE_DIR}/gitlab_metrics.prom.XXXXXX") + echo -e "$all_metrics" > "$tmp_file" + mv "$tmp_file" "${NODE_DIR}/gitlab_metrics.prom" + + debug_echo "Collection complete in ${duration}s with $ERRORS_TOTAL errors" +} + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + --once) + RUN_MODE="once" + shift + ;; + --daemon) + RUN_MODE="daemon" + shift + ;; + --local) + LOCAL_MODE=true + shift + ;; + --help|-h) + show_help + exit 0 + ;; + *) + echo "Error: Unknown option: $1" >&2 + show_help >&2 + exit 1 + ;; + esac + done +} + +validate_requirements() { + # API credentials only required when not running local-only + if [[ -z "$GITLAB_URL" && "$LOCAL_MODE" != true ]]; then + echo "Error: GITLAB_URL is required (or use --local for server-side only)" >&2 + exit 1 + fi + + if [[ -z "$GITLAB_TOKEN" && "$LOCAL_MODE" != true ]]; then + echo "Error: GITLAB_TOKEN is required (or use --local for server-side only)" >&2 + exit 1 + fi + + # Strip trailing slash from URLs + GITLAB_URL="${GITLAB_URL%/}" + GITLAB_METRICS_URL="${GITLAB_METRICS_URL%/}" + GITLAB_SIDEKIQ_URL="${GITLAB_SIDEKIQ_URL%/}" + + if ! command -v curl >/dev/null 2>&1; then + echo "Error: curl is required but not installed" >&2 + exit 1 + fi + + if [[ -n "$GITLAB_TOKEN" ]]; then + if ! command -v jq >/dev/null 2>&1; then + echo "Error: jq is required but not installed" >&2 + exit 1 + fi + fi +} + +main() { + parse_arguments "$@" + validate_requirements + + debug_echo "GitLab URL: $GITLAB_URL" + debug_echo "Metrics URL: $GITLAB_METRICS_URL" + debug_echo "Sidekiq URL: $GITLAB_SIDEKIQ_URL" + debug_echo "Node exporter dir: $NODE_DIR" + debug_echo "Run mode: $RUN_MODE" + debug_echo "Local mode: $LOCAL_MODE" + debug_echo "Max projects: $MAX_PROJECTS" + + if [[ "$RUN_MODE" == "daemon" ]]; then + debug_echo "Running in daemon mode with ${COLLECTION_INTERVAL}s interval" + while true; do + run_collection + sleep "$COLLECTION_INTERVAL" + done + else + run_collection + fi + + debug_echo "Script completed successfully" +} + +# Execute main function if script is run directly +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/gitlab-migration-exporter.sh b/gitlab-migration-exporter.sh new file mode 100644 index 0000000..916d3ee --- /dev/null +++ b/gitlab-migration-exporter.sh @@ -0,0 +1,1027 @@ +#!/bin/bash +############################################################# +#### GitLab Migration Metrics Exporter for Prometheus #### +#### Monitors database migration progress during updates #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 1.0 #### +#### #### +#### Usage: ./gitlab-migration-exporter.sh [OPTIONS] #### +############################################################# +# +# Metrics collected: +# - Migration status: total, completed, pending, failed, running +# - Current migration: name, version, elapsed time +# - Background migrations: batched job queue status +# - GitLab services: up/down status per service +# - GitLab version: current installed version +# - Reconfigure status: whether gitlab-ctl reconfigure is running +# - Upgrade progress: overall percentage estimate +# +# Requirements: +# - GitLab Omnibus installation +# - socat (for HTTP server) +# - Root or gitlab-psql access +# +set -euo pipefail + +######################### +### Configuration ### +######################### + +LISTEN_PORT="${GITLAB_EXPORTER_PORT:-9177}" +SCRAPE_INTERVAL="${SCRAPE_INTERVAL:-30}" +RAKE_INTERVAL="${RAKE_INTERVAL:-300}" +GITLAB_RAILS_CMD="${GITLAB_RAILS_CMD:-gitlab-rails}" +GITLAB_RAKE_CMD="${GITLAB_RAKE_CMD:-gitlab-rake}" +GITLAB_CTL_CMD="${GITLAB_CTL_CMD:-gitlab-ctl}" +GITLAB_PSQL_CMD="${GITLAB_PSQL_CMD:-gitlab-psql}" +GITLAB_DB_CONFIG="/var/opt/gitlab/gitlab-rails/etc/database.yml" +LOG_DIR="${GITLAB_LOG_DIR:-/var/log/gitlab}" +LOGFILE="/var/log/gitlab-migration-exporter.log" + +# Output mode +TEXTFILE_DIR="/var/lib/node_exporter" +OUTPUT_FILE="" +HTTP_MODE=false + +# Cache state +STATE_DIR="/tmp/gitlab-migration-metrics" +METRICS_CACHE="$STATE_DIR/metrics_cache" +RAKE_CACHE="$STATE_DIR/rake_cache" +LOCKFILE="$STATE_DIR/exporter.pid" +LAST_SCRAPE=0 + +######################### +### Logging ### +######################### + +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOGFILE" >&2 +} + +######################### +### Parse Arguments ### +######################### + +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + --textfile) + OUTPUT_FILE="$TEXTFILE_DIR/gitlab_migration.prom" + shift + ;; + --http) + HTTP_MODE=true + shift + ;; + --port|-p) + LISTEN_PORT="$2" + HTTP_MODE=true + shift 2 + ;; + --output|-o) + OUTPUT_FILE="$2" + shift 2 + ;; + --interval) + SCRAPE_INTERVAL="$2" + shift 2 + ;; + --rake-interval) + RAKE_INTERVAL="$2" + shift 2 + ;; + --log-dir) + LOG_DIR="$2" + shift 2 + ;; + --handle-request) + handle_request + exit 0 + ;; + --help|-h) + cat < "$service_path" <<'UNIT' +[Unit] +Description=GitLab Migration Metrics Exporter for Prometheus +Wants=network-online.target +After=network-online.target + +[Service] +User=root +Group=root +Type=simple +ExecStart=/usr/local/bin/gitlab-migration-exporter.sh +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target +UNIT + systemctl daemon-reload + systemctl enable gitlab-migration-exporter + log "Service installed and enabled" + fi +} + +######################### +### Setup ### +######################### + +check_already_running() { + if [[ -f "$LOCKFILE" ]]; then + local old_pid + old_pid=$(cat "$LOCKFILE" 2>/dev/null) + if [[ -n "$old_pid" ]] && kill -0 "$old_pid" 2>/dev/null; then + log "ERROR: Already running (PID $old_pid). Exiting." + exit 0 + fi + # Stale lockfile, remove it + rm -f "$LOCKFILE" + fi + echo $$ > "$LOCKFILE" +} + +cleanup_lock() { + rm -f "$LOCKFILE" +} + +trap cleanup_lock EXIT INT TERM + +setup() { + mkdir -p "$STATE_DIR" + check_already_running + + if ! command -v socat &>/dev/null; then + log "socat not found, attempting to install..." + if [[ $EUID -eq 0 ]]; then + if command -v apt-get &>/dev/null; then + apt-get update -qq && apt-get install -y -qq socat + elif command -v dnf &>/dev/null; then + dnf install -y -q socat + elif command -v yum &>/dev/null; then + yum install -y -q socat + else + log "ERROR: Cannot auto-install socat. Install manually." + exit 1 + fi + log "socat installed successfully" + else + log "ERROR: socat is required. Run as root to auto-install, or install manually." + exit 1 + fi + fi + + # Self-install on first run if running in HTTP mode as root + if [[ "$HTTP_MODE" == "true" && $EUID -eq 0 ]]; then + install_service + fi + + if ! command -v "$GITLAB_CTL_CMD" &>/dev/null; then + log "WARNING: $GITLAB_CTL_CMD not found - is GitLab Omnibus installed?" + fi + + detect_database_config + log "Database type: $DB_TYPE" +} + +######################### +### Migration Status ### +######################### + +collect_migration_status() { + local now + now=$(date +%s) + + # Rake is expensive (can take 2+ mins), use separate cache interval + # Check file mtime so caching works across invocations (textfile mode) + if [[ -f "$RAKE_CACHE" ]]; then + local cache_age + cache_age=$(( now - $(stat -c %Y "$RAKE_CACHE" 2>/dev/null || echo "0") )) + if [[ $cache_age -lt $RAKE_INTERVAL ]]; then + cat "$RAKE_CACHE" + return + fi + fi + + local total=0 up_count=0 down_count=0 migrate_output="" + + if command -v "$GITLAB_RAKE_CMD" &>/dev/null; then + migrate_output=$("$GITLAB_RAKE_CMD" db:migrate:status 2>/dev/null | grep -E '^\s*(up|down)\s' || echo "") + + if [[ -n "$migrate_output" ]]; then + total=$(echo "$migrate_output" | wc -l) + up_count=$(echo "$migrate_output" | grep -cE '^\s*up\s' || true) + down_count=$(echo "$migrate_output" | grep -cE '^\s*down\s' || true) + fi + fi + + local progress=0 + if [[ $total -gt 0 ]]; then + progress=$(awk "BEGIN {printf \"%.2f\", ($up_count / $total) * 100}") + fi + + { + cat </dev/null | head -1 || echo "") + + if [[ -n "$rake_pid" ]]; then + migration_running=1 + migration_pid=$rake_pid + # Get elapsed time in seconds + migration_elapsed=$(ps -o etimes= -p "$rake_pid" 2>/dev/null | tr -d ' ' || echo "0") + + # Try to find the current migration from the log + if [[ -f "$LOG_DIR/gitlab-rails/production.log" ]]; then + current_migration_name=$(grep -oP 'Migrating to \K\S+' "$LOG_DIR/gitlab-rails/production.log" 2>/dev/null | tail -1 || echo "") + fi + fi + + # Also check for gitlab-ctl upgrade processes + local upgrade_pid + upgrade_pid=$(pgrep -f "gitlab-ctl upgrade" 2>/dev/null | head -1 || echo "") + if [[ -n "$upgrade_pid" && "$migration_running" -eq 0 ]]; then + migration_running=1 + migration_pid=$upgrade_pid + migration_elapsed=$(ps -o etimes= -p "$upgrade_pid" 2>/dev/null | tr -d ' ' || echo "0") + fi + + cat </dev/null | head -1 || echo "") + + if [[ -n "$reconf_pid" ]]; then + reconfigure_running=1 + reconfigure_elapsed=$(ps -o etimes= -p "$reconf_pid" 2>/dev/null | tr -d ' ' || echo "0") + fi + + cat </dev/null; then + echo "" + echo "# HELP gitlab_service_up Whether a GitLab service is running (1=up, 0=down)" + echo "# TYPE gitlab_service_up gauge" + echo "gitlab_service_up{service=\"unknown\"} 0" + return + fi + + local ctl_output + ctl_output=$("$GITLAB_CTL_CMD" status 2>/dev/null || echo "") + + echo "" + echo "# HELP gitlab_service_up Whether a GitLab service is running (1=up, 0=down)" + echo "# TYPE gitlab_service_up gauge" + echo "# HELP gitlab_service_pid PID of the GitLab service" + echo "# TYPE gitlab_service_pid gauge" + echo "# HELP gitlab_service_uptime_seconds Uptime of the GitLab service in seconds" + echo "# TYPE gitlab_service_uptime_seconds gauge" + + # gitlab-ctl status output format: + # run: alertmanager: (pid 7163) 22805249s; run: log: (pid 7160) 22805249s; + # down: sidekiq: 0s, normally up; run: log: (pid 7000) 22805249s; + if [[ -n "$ctl_output" ]]; then + while IFS= read -r line; do + if [[ "$line" =~ ^run:\ ([^:]+):\ \(pid\ ([0-9]+)\)\ ([0-9]+)s ]]; then + local service="${BASH_REMATCH[1]}" + local pid="${BASH_REMATCH[2]}" + local uptime_str="${BASH_REMATCH[3]}" + service=$(echo "$service" | tr -d ' ') + echo "gitlab_service_up{service=\"$service\"} 1" + echo "gitlab_service_pid{service=\"$service\"} $pid" + echo "gitlab_service_uptime_seconds{service=\"$service\"} $uptime_str" + elif [[ "$line" =~ ^down:\ ([^:]+): ]]; then + local service="${BASH_REMATCH[1]}" + service=$(echo "$service" | tr -d ' ') + echo "gitlab_service_up{service=\"$service\"} 0" + echo "gitlab_service_pid{service=\"$service\"} 0" + echo "gitlab_service_uptime_seconds{service=\"$service\"} 0" + fi + done <<< "$ctl_output" + fi + + # Count services + local total_services up_services down_services + total_services=$(echo "$ctl_output" | grep -cE '^(run|down):' || true) + up_services=$(echo "$ctl_output" | grep -cE '^run:' || true) + down_services=$(echo "$ctl_output" | grep -cE '^down:' || true) + + cat </dev/null; then + version=$(gitlab-rake gitlab:env:info 2>/dev/null | grep -oP 'GitLab:\s+\K[\d.]+' || echo "unknown") + fi + + cat </dev/null + elif [[ "$DB_TYPE" == "local" ]]; then + # Local Omnibus DB — gitlab-psql wraps psql with correct socket/user + # It does not accept -d; the database is passed via the positional arg + "$GITLAB_PSQL_CMD" -t -A -c "$query" -d gitlabhq_production 2>/dev/null \ + || sudo -u gitlab-psql "$GITLAB_PSQL_CMD" -t -A -c "$query" -d gitlabhq_production 2>/dev/null \ + || "$GITLAB_PSQL_CMD" -t -A -c "$query" gitlabhq_production 2>/dev/null + else + return 1 + fi +} + +parse_yaml_value() { + # Extract a YAML value, handling quoted strings and stripping whitespace + local raw="$1" + raw=$(echo "$raw" | sed 's/^[[:space:]]*[^:]*:[[:space:]]*//' | sed 's/[[:space:]]*$//') + # Strip surrounding quotes (single or double) + raw=$(echo "$raw" | sed -e 's/^"//' -e 's/"$//' -e "s/^'//" -e "s/'$//") + echo "$raw" +} + +install_psql_client() { + if [[ $EUID -ne 0 ]]; then + log "ERROR: psql client not found. Run as root to auto-install, or install manually." + DB_TYPE="none" + return + fi + + log "Attempting to install PostgreSQL client..." + if command -v amazon-linux-extras &>/dev/null; then + amazon-linux-extras install postgresql14 -y &>/dev/null && log "Installed psql via amazon-linux-extras" + elif command -v apt-get &>/dev/null; then + apt-get update -qq && apt-get install -y -qq postgresql-client &>/dev/null && log "Installed postgresql-client via apt" + elif command -v dnf &>/dev/null; then + dnf install -y -q postgresql &>/dev/null && log "Installed postgresql via dnf" + elif command -v yum &>/dev/null; then + yum install -y -q postgresql &>/dev/null && log "Installed postgresql via yum" + else + log "ERROR: Cannot auto-install psql client. Install manually." + DB_TYPE="none" + return + fi + + # Verify install worked + if ! command -v psql &>/dev/null; then + log "ERROR: psql still not found after install attempt" + DB_TYPE="none" + fi +} + +upgrade_psql_client() { + if [[ $EUID -ne 0 ]]; then + log "ERROR: Cannot upgrade psql — run as root. Or upgrade manually:" + log " Amazon Linux 2: amazon-linux-extras install postgresql14 -y" + log " RHEL/CentOS: yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-\$(rpm -E %%{rhel})-x86_64/pgdg-redhat-repo-latest.noarch.rpm && yum install -y postgresql16" + log " Ubuntu/Debian: apt install -y postgresql-client-16" + return + fi + + log "Attempting to upgrade PostgreSQL client for SCRAM support..." + if command -v amazon-linux-extras &>/dev/null; then + amazon-linux-extras install postgresql14 -y &>/dev/null && log "Upgraded psql via amazon-linux-extras" && return + fi + + if command -v dnf &>/dev/null; then + # shellcheck disable=SC1083 + dnf install -y -q "https://download.postgresql.org/pub/repos/yum/reporpms/EL-$(rpm -E '%{rhel}')-x86_64/pgdg-redhat-repo-latest.noarch.rpm" &>/dev/null + dnf install -y -q postgresql16 &>/dev/null && log "Upgraded to postgresql16 via pgdg repo" && return + elif command -v yum &>/dev/null; then + # shellcheck disable=SC1083 + yum install -y -q "https://download.postgresql.org/pub/repos/yum/reporpms/EL-$(rpm -E '%{rhel}')-x86_64/pgdg-redhat-repo-latest.noarch.rpm" &>/dev/null + yum install -y -q postgresql16 &>/dev/null && log "Upgraded to postgresql16 via pgdg repo" && return + elif command -v apt-get &>/dev/null; then + apt-get update -qq && apt-get install -y -qq postgresql-client-16 &>/dev/null && log "Upgraded to postgresql-client-16 via apt" && return + fi + + log "ERROR: Auto-upgrade failed. Please upgrade manually." +} + +detect_database_config() { + DB_HOST="" + DB_PORT="5432" + DB_NAME="gitlabhq_production" + DB_USER="" + DB_PASS="" + DB_TYPE="none" + + # Parse GitLab's database.yml to detect external DB + if [[ -f "$GITLAB_DB_CONFIG" ]]; then + local prod_section + prod_section=$(sed -n '/^production:/,/^[a-z]/p' "$GITLAB_DB_CONFIG" | head -n -1) + + local raw_host + raw_host=$(echo "$prod_section" | grep '^\s*host:' | head -1) + [[ -n "$raw_host" ]] && DB_HOST=$(parse_yaml_value "$raw_host") + + local raw_port raw_name raw_user raw_pass + raw_port=$(echo "$prod_section" | grep '^\s*port:' | head -1) + raw_name=$(echo "$prod_section" | grep '^\s*database:' | head -1) + raw_user=$(echo "$prod_section" | grep '^\s*username:' | head -1) + raw_pass=$(echo "$prod_section" | grep '^\s*password:' | head -1) + + [[ -n "$raw_port" ]] && DB_PORT=$(parse_yaml_value "$raw_port") + [[ -n "$raw_name" ]] && DB_NAME=$(parse_yaml_value "$raw_name") + [[ -n "$raw_user" ]] && DB_USER=$(parse_yaml_value "$raw_user") + [[ -n "$raw_pass" ]] && DB_PASS=$(parse_yaml_value "$raw_pass") + + log "DB config parsed: host=$DB_HOST port=$DB_PORT dbname=$DB_NAME user=$DB_USER pass=" + + # If host is set and not localhost/socket path, treat as external DB + if [[ -n "$DB_HOST" && "$DB_HOST" != "localhost" && "$DB_HOST" != "127.0.0.1" && ! "$DB_HOST" =~ ^/ ]]; then + if command -v psql &>/dev/null; then + DB_TYPE="external" + log "Detected external database at $DB_HOST:$DB_PORT" + # Verify connectivity + local test_result + if test_result=$(PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT 1;" 2>&1); then + log "External DB connection test: OK" + elif echo "$test_result" | grep -qi "SCRAM.*libpq"; then + log "ERROR: $test_result" + log "The installed psql client is too old for SCRAM authentication." + upgrade_psql_client + else + log "WARNING: External DB connection test failed: $test_result" + fi + else + log "WARNING: External DB detected at $DB_HOST but psql client not found" + install_psql_client + fi + return + fi + else + log "WARNING: Database config not found at $GITLAB_DB_CONFIG" + fi + + # Fall back to local gitlab-psql + if command -v "$GITLAB_PSQL_CMD" &>/dev/null; then + if "$GITLAB_PSQL_CMD" -t -A -c "SELECT 1;" -d gitlabhq_production &>/dev/null \ + || sudo -u gitlab-psql "$GITLAB_PSQL_CMD" -t -A -c "SELECT 1;" -d gitlabhq_production &>/dev/null \ + || "$GITLAB_PSQL_CMD" -t -A -c "SELECT 1;" gitlabhq_production &>/dev/null; then + DB_TYPE="local" + else + log "WARNING: gitlab-psql found but cannot connect — check permissions (run as root?)" + DB_TYPE="none" + fi + fi +} + +collect_database_health() { + if [[ "$DB_TYPE" == "none" ]]; then + return + fi + + # Check database connectivity + local db_up=0 + if run_db_query "SELECT 1;" &>/dev/null; then + db_up=1 + fi + + # Get database size + local db_size=0 + if [[ $db_up -eq 1 ]]; then + db_size=$(run_db_query "SELECT pg_database_size(current_database());" || true) + db_size=$(echo "$db_size" | tr -d ' ') + [[ -z "$db_size" ]] && db_size=0 + fi + + # Active connections + local db_connections=0 + if [[ $db_up -eq 1 ]]; then + db_connections=$(run_db_query "SELECT count(*) FROM pg_stat_activity WHERE datname = current_database();" || true) + db_connections=$(echo "$db_connections" | tr -d ' ') + [[ -z "$db_connections" ]] && db_connections=0 + fi + + # Active locks (high lock count can indicate migration issues) + local db_locks=0 + if [[ $db_up -eq 1 ]]; then + db_locks=$(run_db_query "SELECT count(*) FROM pg_locks WHERE NOT granted;" || true) + db_locks=$(echo "$db_locks" | tr -d ' ') + [[ -z "$db_locks" ]] && db_locks=0 + fi + + # Schema migration version (latest applied) + local schema_version="0" + if [[ $db_up -eq 1 ]]; then + schema_version=$(run_db_query "SELECT MAX(version) FROM schema_migrations;" || true) + schema_version=$(echo "$schema_version" | tr -d ' ') + [[ -z "$schema_version" ]] && schema_version=0 + fi + + cat </dev/null \ + | grep -ciE 'migration.*error|error.*migration|ActiveRecord::StatementInvalid' || true) + migration_warnings=$(tail -1000 "$LOG_DIR/gitlab-rails/production.log" 2>/dev/null \ + | grep -ciE 'migration.*warning|warning.*migration|deprecated' || true) + fi + + # Check reconfigure log + local reconfigure_errors=0 + if [[ -f "$LOG_DIR/reconfigure/latest" ]]; then + reconfigure_errors=$(tail -500 "$LOG_DIR/reconfigure/latest" 2>/dev/null \ + | grep -ciE 'error|fatal|failed' || true) + fi + + cat </dev/null || hostname) + + cat < "$METRICS_CACHE" + LAST_SCRAPE=$now + fi + + local body_length=${#metrics} + cat </dev/null || { + log "Server error, restarting in 5 seconds..." + sleep 5 + } + done +} + +######################### +### Main ### +######################### + +main() { + parse_args "$@" + setup + + if [[ "$HTTP_MODE" == "true" ]]; then + start_server + elif [[ -n "$OUTPUT_FILE" ]]; then + # Textfile collector mode: write atomically using temp file + local output_dir + output_dir="$(dirname "$OUTPUT_FILE")" + mkdir -p "$output_dir" + + local temp_file + temp_file=$(mktemp "${output_dir}/.gitlab_migration_metrics.XXXXXX") + + if ! collect_all_metrics > "$temp_file" 2>/dev/null; then + rm -f "$temp_file" + log "ERROR: Failed to generate metrics" + exit 1 + fi + + local file_lines + file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0) + + if [[ "$file_lines" -lt 10 ]]; then + rm -f "$temp_file" + log "ERROR: Metrics file too small ($file_lines lines), keeping previous" + exit 1 + fi + + chmod 644 "$temp_file" + mv -f "$temp_file" "$OUTPUT_FILE" + log "Metrics written to $OUTPUT_FILE ($file_lines lines)" + else + # Default: output to stdout + collect_all_metrics + fi +} + +main "$@" diff --git a/install-code-server.sh b/install-code-server.sh new file mode 100644 index 0000000..b0d4b6e --- /dev/null +++ b/install-code-server.sh @@ -0,0 +1,531 @@ +#!/bin/bash + +#################################################################### +#### Code-Server Install Script #### +#### For RHEL/Rocky/Alma, Oracle Linux, Debian & Ubuntu #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 1.3 #### +#### #### +#### Usage: sudo ./install-code-server.sh #### +#################################################################### + +############################# +#### User Configurations #### +############################# +CODEDIR=/code # Home directory for your Code +EMAIL=admin@mydomain.com # your domain email address +HTTPTYPE=APACHE # Choose Apache, Caddy or Nginx All UPPER Case +PASSWD=pAsSwOrD # Your Password for Code-server used for Apache, Nginx and Caddy +UNAME=MyUser # Username Used for Caddy +SERVDIR=/usr/local/code-server # where you want the code-server installed +SERVERNAME=code.mydomain.cloud # server fqdn name +USRDIR=/var/lib/code-server + +######################## +#### System Configs #### +######################## +CADPASS="$(echo -e "${PASSWD}\n$PASSWD" | caddy hash-password 2>/dev/null | tail --lines=1)" +OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]') +OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}') + +define() { + IFS=$'\n' read -r -d '' "$1" + } + +########################################################### +#### Detect Package Manger from OS and OSVer Variables #### +########################################################### +if [ "${OS}" = ubuntu ]; then + PAKMGR="apt-get -y" +elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if [ "${OSVER}" = 7 ]; then + PAKMGR="yum -y" + fi + if [[ ${OSVER} = 8 || ${OSVER} = 9 ]]; then + PAKMGR="dnf -y" + fi +fi + +################################ +#### Check if OS is Updated #### +################################ +if [ "${OS}" = ubuntu ]; then + ${PAKMGR} upgrade + ${PAKMGR} install libc6 libstdc++6 +else + ${PAKMGR} update +fi + +############################################### +#### Get the latest version of Code Server #### +############################################### +get_latest_version() { + { + version="$(curl -fsSLI -o /dev/null -w "%{url_effective}" https://github.com/coder/code-server/releases/latest)" + version="${version#https://github.com/coder/code-server/releases/tag/}" + version="${version#v}" + echo "$version" + } +} + +######################################### +#### Download and Install Codeserver #### +######################################### +install_codeserver() { + { + # check if command wget exists + if ! command -v wget >/dev/null 2>&1; then + ${PAKMGR} install wget + fi + cd ~/ || exit + wget "https://github.com/coder/code-server/releases/download/v$version/code-server-$version-linux-amd64.tar.gz" + tar xvf "code-server-$version-linux-amd64.tar.gz" + mkdir -p ${SERVDIR} + cp -r ~/code-server-"$version"-linux-amd64/* ${SERVDIR} + ln -s ${SERVDIR}/bin/code-server /usr/bin/code-server + # Code Directory + mkdir -p "${CODEDIR}" + # User Directory + mkdir -p "${USRDIR}" + + csserv=/lib/systemd/system + touch $csserv/code-server.service + OUTFILE1="$csserv/code-server.service" + define SFILE << EOF + [Unit] + Description=code-server + After=nginx.service + + [Service] + Type=simple + Environment=PASSWORD=$PASSWD + ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR} --auth password + Restart=always + + [Install] + WantedBy=multi-user.target +EOF + + { + printf "%s\n" "$SFILE" | cut -c 2- + } > "$OUTFILE1" + + if [ $HTTPTYPE = CADDY ]; then + sed -i 's/After=nginx.service/After=caddy.service/g' $csserv/code-server.service + sed -i 's/auth: password/auth: none/' /root/.config/code-server/config.yaml + sed -i "s|ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR} --auth password|ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR}|" $csserv/code-server.service + fi + + systemctl daemon-reload + systemctl start code-server + systemctl enable code-server + } +} + +######################################## +#### Install Apache, Nginx or Caddy #### +######################################## +install_http() { + { + if [ $HTTPTYPE = APACHE ]; then + csserv=/lib/systemd/system + sed -i 's/After=nginx.service/After=apache.service/g' $csserv/code-server.service + if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if ! command -v httpd &> /dev/null; then + ${PAKMGR} install httpd + systemctl enable --now httpd + fi + AOUTFILE="/etc/httpd/conf.d/code-server.conf" + elif [ "${OS}" = ubuntu ]; then + if ! command -v apache2 &> /dev/null; then + ${PAKMGR} install apache2 + systemctl enable --now apache2 + fi + AOUTFILE="/etc/apache2/sites-available/code-server.conf" + fi + define ACONF << 'EOF' + + ServerName $SERVERNAME + #ProxyPreserveHost On + RewriteEngine On + RewriteCond %{HTTP:Upgrade} =websocket [NC] + RewriteRule /(.*) ws://127.0.0.1:8080/$1 [P,L] + RewriteCond %{HTTP:Upgrade} !=websocket [NC] + RewriteRule /(.*) http://127.0.0.1:8080/$1 [P,L] + ProxyRequests off + #RequestHeader set X-Forwarded-Proto https + #RequestHeader set X-Forwarded-Port 443 + ProxyPass / http://127.0.0.1:8080/ nocanon + ProxyPassReverse / http://127.0.0.1:8080/ + +EOF + { + printf "%s\n" "$ACONF" | cut -c 4- + } > "$AOUTFILE" + + systemctl daemon-reload + systemctl restart code-server + systemctl restart httpd + fi + + if [ $HTTPTYPE = NGINX ]; then + if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + OUTFILE="/etc/yum.repos.d/nginx.repo" + define NYUM << 'EOF' + [nginx-stable] + name=nginx stable repo + baseurl=http://nginx.org/packages/centos/$releasever/$basearch/ + gpgcheck=1 + enabled=1 + gpgkey=https://nginx.org/keys/nginx_signing.key + module_hotfixes=true +EOF + { + printf "%s\n" "$NYUM" | cut -c 4- + } > "$OUTFILE" + if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + # shellcheck disable=2016 + sed -i 's/baseurl=http:\/\/nginx.org\/packages\/centos\/7\/$basearch\//baseurl=http:\/\/nginx.org\/packages\/centos\/8\/$basearch\//g' $OUTFILE + fi + fi + + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} install curl gnupg2 ca-certificates lsb-release + echo "deb http://nginx.org/packages/ubuntu $(lsb_release -cs) nginx" | sudo tee /etc/apt/sources.list.d/nginx.list + echo -e "Package: *\nPin: origin nginx.org\nPin: release o=nginx\nPin-Priority: 900\n" | sudo tee /etc/apt/preferences.d/99nginx + curl -o /tmp/nginx_signing.key https://nginx.org/keys/nginx_signing.key + if [ "$OSVER" = 16 ]; then + gpg --with-fingerprint /tmp/nginx_signing.key + else + gpg --dry-run --quiet --import --import-options show-only /tmp/nginx_signing.key + fi + sudo mv /tmp/nginx_signing.key /etc/apt/trusted.gpg.d/nginx_signing.asc + sudo apt update + fi + + ${PAKMGR} install nginx + + if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + nxdir=/etc/nginx/conf.d + elif [ "${OS}" = ubuntu ]; then + if [ "$OSVER" = 16 ]; then + nxdir=/etc/nginx/sites-available + else + nxdir=/etc/nginx/conf.d + fi + fi + + OUTFILE2="$nxdir/code-server.conf" + define NFIG << EOF + server { + listen 80; + listen [::]:80; + server_name $SERVERNAME; + location / { + proxy_pass http://localhost:8080/; + proxy_set_header Host \$host; + proxy_set_header Upgrade \$http_upgrade; + proxy_set_header Connection upgrade; + proxy_set_header Accept-Encoding gzip; + } + } +EOF + { + printf "%s\n" "$NFIG" | cut -c 2- + } > "$OUTFILE2" + + if [ "${OS}" = ubuntu ]; then + mv $nxdir/default $nxdir/default.orig + ln -sf /etc/nginx/sites-available/code-server.conf /etc/nginx/sites-enabled/code-server.conf + else + mv $nxdir/default.conf $nxdir/default.conf.orig + fi + systemctl start nginx + systemctl enable nginx + fi + + if [ "$HTTPTYPE" = CADDY ]; then + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} debian-keyring debian-archive-keyring apt-transport-https + curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/cfg/gpg/gpg.155B6D79CA56EA34.key' | apt-key add - + curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/cfg/setup/config.deb.txt?distro=debian&version=any-version' | tee -a /etc/apt/sources.list.d/caddy-stable.list + ${PAKMGR} update + ${PAKMGR} install caddy + elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if [ "${OSVER}" = 7 ]; then + ${PAKMGR} install yum-plugin-copr + elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + ${PAKMGR} install 'dnf-command(copr)' + fi + ${PAKMGR} copr enable @caddy/caddy + ${PAKMGR} install caddy + fi + + caddir=/etc/caddy + mv $caddir/Caddyfile $caddir/Caddyfile.orig + touch $caddir/Caddyfile + OUTFILE3="$caddir/Caddyfile" + define CFILE << EOF + { #### Remove these 3 lines + acme_ca https://acme-staging-v02.api.letsencrypt.org/directory #### to make server live + } #### and grab cert from letsencrypt + + $SERVERNAME { + basicauth /* { + $UNAME $CADPASS + } + reverse_proxy 127.0.0.1:8080 + } + +EOF + { + printf "%s\n" "$CFILE" | cut -c 2- + } > "$OUTFILE3" + + systemctl enable caddy + systemctl start caddy + + fi + + } +} + +########################################## +#### Install Certbot and request Cert #### +########################################## +install_certbot() { + { + if [ $HTTPTYPE = NGINX ];then + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} remove letsencrypt + ${PAKMGR} remove certbot + snap install core; snap refresh core + snap install --classic certbot + ${PAKMGR} install python3-certbot-nginx + elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + ${PAKMGR} remove certbot + ${PAKMGR} install epel-release + ${PAKMGR} install snapd + if [ "$OSVER" = 7 ]; then + ${PAKMGR} install python2-certbot-nginx + elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + ${PAKMGR} install python3-certbot-nginx + fi + fi + fi + if [ $HTTPTYPE = APACHE ];then + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} remove letsencrypt + ${PAKMGR} remove certbot + snap install core; snap refresh core + snap install --classic certbot + ${PAKMGR} install python3-certbot-apache + elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + ${PAKMGR} remove certbot + ${PAKMGR} install epel-release + ${PAKMGR} install snapd + if [ "$OSVER" = 7 ]; then + ${PAKMGR} install python2-certbot-apache + elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + ${PAKMGR} install python3-certbot-apache + fi + fi + fi + systemctl enable --now snapd.socket + ln -s /var/lib/snapd/snap /snap + snap install core; snap refresh core + snap install --classic certbot + ln -s /snap/bin/certbot /usr/bin/certbot + + #certbot certonly --redirect --agree-tos --nginx -d $SERVERNAME -m "$EMAIL" --dry-run + if [ "$HTTPTYPE" = NGINX ]; then + certbot --non-interactive --redirect --agree-tos --nginx -d $SERVERNAME -m "$EMAIL" + systemctl restart nginx + elif [ "$HTTPTYPE" = APACHE ]; then + certbot --non-interactive --redirect --agree-tos --apache -d $SERVERNAME -m "$EMAIL" + if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + systemctl restart httpd + else + systemctl restart apache2 + fi + fi + if [ $HTTPTYPE = NGINX ]; then + if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if ! grep "certbot" /var/spool/cron/root; then + echo "0 */12 * * * root certbot -q renew --nginx" >> /var/spool/cron/root + fi + elif [ "${OS}" = ubuntu ]; then + if ! grep "certbot" /var/spool/cron/crontabs/root; then + echo "0 */12 * * * root certbot -q renew --nginx" >> /var/spool/cron/crontabs/root + fi + fi + elif [ $HTTPTYPE = APACHE ]; then + if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if ! grep "certbot" /var/spool/cron/root; then + echo "0 */12 * * * root certbot -q renew --apache" >> /var/spool/cron/root + fi + elif [ "${OS}" = ubuntu ]; then + if ! grep "certbot" /var/spool/cron/crontabs/root; then + echo "0 */12 * * * root certbot -q renew --apache" >> /var/spool/cron/crontabs/root + fi + fi + fi + + if [[ ${OS} != "ubuntu" && ${OS} != "debian" ]]; then + grep nginx /var/log/audit/audit.log | audit2allow -M nginx 2>/dev/null || true + semodule -i nginx.pp 2>/dev/null || true + fi + } +} + +function install_firewall() { + { + if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + ${PAKMGR} install ipset perl-libwww-perl.noarch perl-LWP-Protocol-https.noarch perl-GDGraph perl-Sys-Syslog perl-Math-BigInt + elif [ "${OS}" = ubuntu ]; then + ${PAKMGR} install ipset libwww-perl liblwp-protocol-https-perl libgd-graph-perl + fi + cd /usr/src || exit + # rm -fv csf.tgz + wget https://download.configserver.com/csf.tgz + tar -xzf csf.tgz + cd csf || exit + ./install.sh + echo '' + echo '###########################################' + echo '#### Testing if CSF firewall will work ####' + echo '###########################################' + echo '' + perl /usr/local/csf/bin/csftest.pl + ##### Initial Settings ##### + sed -i 's/TESTING = "1"/TESTING = "0"/g' /etc/csf/csf.conf + sed -i 's/RESTRICT_SYSLOG = "0"/RESTRICT_SYSLOG = "3"/g' /etc/csf/csf.conf + sed -i '/^RESTRICT_UI/c\RESTRICT_UI = "1"' /etc/csf/csf.conf + sed -i '/^AUTO_UPDATES/c\AUTO_UPDATES = "1"' /etc/csf/csf.conf + ##### IPv4 Port Settings ##### + sed -i 's/TCP_IN = "20,21,22,25,53,80,110,143,443,465,587,993,995"/TCP_IN = "22,80,443,5666,10000"/g' /etc/csf/csf.conf + sed -i 's/TCP_OUT = "20,21,22,25,53,80,110,113,443,587,993,995"/TCP_OUT = "22,25,53,80,443,5666,10000"/g' /etc/csf/csf.conf + sed -i 's/UDP_IN = "20,21,53,80,443"/UDP_IN = "80,443"/g' /etc/csf/csf.conf + sed -i 's/UDP_OUT = "20,21,53,113,123"/UDP_OUT = "53,113,123"/g' /etc/csf/csf.conf + sed -i '/^ICMP_IN_RATE/c\ICMP_IN_RATE = "1/s"' /etc/csf/csf.conf + ##### IPv6 Port Settings ##### + sed -i 's/IPV6 = "0"/IPV6 = "1"/g' /etc/csf/csf.conf + sed -i 's/TCP6_IN = "20,21,22,25,53,80,110,143,443,465,587,993,995"/TCP6_IN = "22,80,443,5666"/g' /etc/csf/csf.conf + sed -i 's/TCP6_OUT = "20,21,22,25,53,80,110,113,443,587,993,995"/TCP6_OUT = "22,80,443,5666"/g' /etc/csf/csf.conf + sed -i 's/UDP6_IN = "20,21,53,80,443"/UDP6_IN = "80,443"/g' /etc/csf/csf.conf + sed -i 's/UDP6_OUT = "20,21,53,113,123"/UDP6_OUT = "53,113,123"/g' /etc/csf/csf.conf + ##### General Settings ##### + sed -i 's/SYSLOG_CHECK = "0"/SYSLOG_CHECK = "300"/g' /etc/csf/csf.conf + sed -i '/^IGNORE_ALLOW/c\IGNORE_ALLOW = "0"' /etc/csf/csf.conf + sed -i '/^LF_CSF/c\LF_CSF = "1"' /etc/csf/csf.conf + sed -i 's/LF_IPSET = "0"/LF_IPSET = "1"/g' /etc/csf/csf.conf + sed -i '/^PACKET_FILTER/c\PACKET_FILTER = "1"' /etc/csf/csf.conf + ##### SMTP Settings ##### + sed -i 's/SMTP_BLOCK = "0"/SMTP_BLOCK = "1"/g' /etc/csf/csf.conf + ##### Port Flood Settings ##### + sed -i 's/SYNFLOOD = "0"/SYNFLOOD = "1"/g' /etc/csf/csf.conf + sed -i 's/CONNLIMIT = ""/CONNLIMIT= "22;5,25;3,80;10"/g' /etc/csf/csf.conf + sed -i 's/PORTFLOOD = ""/PORTFLOOD = "22;tcp;5;300,25;tcp;5;300,80;tcp;20;5"/g' /etc/csf/csf.conf + sed -i 's/UDPFLOOD = "0"/UDPFLOOD = "1"/g' /etc/csf/csf.conf + ##### Logging Settings ##### + sed -i 's/SYSLOG = "0"/SYSLOG = "1"/g' /etc/csf/csf.conf + sed -i '/^DROP_LOGGING/c\DROP_LOGGING = "1"' /etc/csf/csf.conf + sed -i '/^DROP_ONLYRES/c\DROP_ONLYRES = "0"' /etc/csf/csf.conf + sed -i '/^UDPFLOOD_LOGGING/c\UDPFLOOD_LOGGING = "1"' /etc/csf/csf.conf + ##### Temp to Perm/Netblock Settings ##### + sed -i '/^LF_PERMBLOCK^/c\LF_PERMBLOCK = "1"' /etc/csf/csf.conf + sed -i 's/LF_NETBLOCK = "0"/LF_NETBLOCK = "1"/g' /etc/csf/csf.conf + ##### Login Failure Blocking and Alerts ##### + sed -i 's/LF_SSHD = "5"/LF_SSHD = "3"/g' /etc/csf/csf.conf + sed -i 's/LF_FTPD = "10"/LF_FTPD = "5"/g' /etc/csf/csf.conf + sed -i 's/LF_SMTPAUTH = "0"/LF_SMTPAUTH = "5"/g' /etc/csf/csf.conf + sed -i 's/LF_EXIMSYNTAX = "0"/LF_EXIMSYNTAX = "10"/g' /etc/csf/csf.conf + sed -i 's/LF_POP3D = "0"/LF_POP3D = "5"/g' /etc/csf/csf.conf + sed -i 's/LF_IMAPD = "0"/LF_IMAPD = "5"/g' /etc/csf/csf.conf + sed -i 's/LF_HTACCESS = "0"/LF_HTACCESS = "5"/g' /etc/csf/csf.conf + sed -i 's/LF_MODSEC = "5"/LF_MODSEC = "3"/g' /etc/csf/csf.conf + sed -i 's/LF_CXS = "0"/LF_CXS = "1"/g' /etc/csf/csf.conf + sed -i 's/LF_SYMLINK = "0"/LF_SYMLINK = "5"/g' /etc/csf/csf.conf + sed -i 's/LF_WEBMIN = "0"/LF_WEBMIN = "3"/g' /etc/csf/csf.conf + sed -i '/^LF_SSH_EMAIL_ALERT/c\LF_SSH_EMAIL_ALERT = "1"' /etc/csf/csf.conf + sed -i '/^LF_SU_EMAIL_ALERT/c\LF_SU_EMAIL_ALERT = "1"' /etc/csf/csf.conf + sed -i '/^LF_SUDO_EMAIL_ALERT/c\LF_SUDO_EMAIL_ALERT = "1"' /etc/csf/csf.conf + sed -i '/^LF_WEBMIN_EMAIL_ALERT/c\LF_WEBMIN_EMAIL_ALERT = "1"' /etc/csf/csf.conf + sed -i '/^LF_CONSOLE_EMAIL_ALERT/c\LF_CONSOLE_EMAIL_ALERT = "1"' /etc/csf/csf.conf + sed -i '/^LF_BLOCKINONLY/c\LF_BLOCKINONLY = "0"' /etc/csf/csf.conf + ##### Directory Watching & Integrity ##### + sed -i '/^LF_DIRWATCH^/c\LF_DIRWATCH = "300"' /etc/csf/csf.conf + sed -i '/^LF_INTEGRITY/c\LF_INTEGRITY = "3600"' /etc/csf/csf.conf + ##### Distributed Attacks ##### + sed -i 's/LF_DISTATTACK = "0"/LF_DISTATTACK = "1"/g' /etc/csf/csf.conf + sed -i 's/LF_DISTFTP = "0"/LF_DISTFTP = "5"/g' /etc/csf/csf.conf + sed -i 's/LF_DISTSMTP = "0"/LF_DISTSMTP = "5"/g' /etc/csf/csf.conf + ##### Connection Tracking ##### + sed -i 's/CT_LIMIT = "0"/CT_LIMIT = "300"/g' /etc/csf/csf.conf + ##### Process Tracking ##### + sed -i '/^PT_LIMIT/c\PT_LIMIT = "60"' /etc/csf/csf.conf + sed -i '/^PT_SKIP_HTTP/c\PT_SKIP_HTTP = "0"' /etc/csf/csf.conf + sed -i 's/PT_DELETED = "0"/PT_DELETED = "1"/g' /etc/csf/csf.conf + sed -i 's/PT_USERTIME = "1800"/PT_USERTIME = "0"/g' /etc/csf/csf.conf + sed -i 's/PT_FORKBOMB = "0"/PT_FORKBOMB = "250"/g' /etc/csf/csf.conf + ##### Port Scan Tracking ##### + sed -i 's/PS_INTERVAL = "0"/PS_INTERVAL = "300"/g' /etc/csf/csf.conf + sed -i '/^PS_EMAIL_ALERT/c\PS_EMAIL_ALERT = "1"' /etc/csf/csf.conf + ##### User ID Tracking ##### + sed -i 's/UID_INTERVAL = "0"/UID_INTERVAL = "600"/g' /etc/csf/csf.conf + ##### Account Tracking ##### + sed -i 's/AT_ALERT = "2"/AT_ALERT = "1"/g' /etc/csf/csf.conf + systemctl enable --now csf + systemctl enable --now lfd + } +} + +function install_webmin() { + { + if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + OUTFILE="/etc/yum.repos.d/webmin.repo" + define WYUM << 'EOF' + [Webmin] + name=Webmin Distribution Neutral + #baseurl=https://download.webmin.com/download/yum + mirrorlist=https://download.webmin.com/download/yum/mirrorlist + enabled=1 +EOF + { + printf "%s\n" "$WYUM" | cut -c 3- + } > "$OUTFILE" + wget https://download.webmin.com/jcameron-key.asc + rpm --import jcameron-key.asc + if [ "${OSVER}" = 7 ]; then + ${PAKMGR} install perl-Encode-Detect perl-Net-SSLeay perl-Data-Dumper tcp_wrappers-devel perl-IO-Tty webmin unzip + elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + ${PAKMGR} install perl-Encode-Detect perl-Net-SSLeay perl-Data-Dumper tcp_wrappers tcp_wrappers-libs unzip + dnf config-manager --set-enabled powertools + ${PAKMGR} install perl-IO-Tty webmin + fi + elif [ "${OS}" = ubuntu ]; then + { + echo '' + echo '############################' + echo '#### Adding Webmin Repo ####' + echo '############################' + echo '' + echo 'deb https://download.webmin.com/download/repository sarge contrib' + } >> /etc/apt/sources.list + wget https://download.webmin.com/jcameron-key.asc + apt-key add jcameron-key.asc + ${PAKMGR} install apt-transport-https + ${PAKMGR} update + ${PAKMGR} install webmin + fi + } +} +get_latest_version +install_codeserver +install_http +install_certbot +install_firewall +install_webmin \ No newline at end of file diff --git a/install-ntfy-server.sh b/install-ntfy-server.sh new file mode 100644 index 0000000..f76396c --- /dev/null +++ b/install-ntfy-server.sh @@ -0,0 +1,189 @@ +#!/bin/bash + +############################################################# +#### ntfy Push Notification Server Setup #### +#### Install and configure ntfy as a systemd service #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 1.0 #### +#### #### +#### Usage: sudo ./install-ntfy-server.sh #### +############################################################# + +set -euo pipefail + +# --- Configuration (edit these before running) --- +NTFY_VERSION="2.8.0" +DOMAIN="ntfy.example.com" + +NTFY_USER="ntfy" +NTFY_DIR="/var/lib/ntfy" +CONFIG_DIR="/etc/ntfy" + +# Ensure script is run as root +if [[ $EUID -ne 0 ]]; then + echo "ERROR: This script must be run as root (use sudo)." + exit 1 +fi + +echo "=== Installing ntfy v${NTFY_VERSION} ===" + +# Create ntfy user +if ! id "$NTFY_USER" &>/dev/null; then + echo "Creating ntfy user..." + useradd --system --no-create-home --shell /usr/sbin/nologin "$NTFY_USER" +fi + +# Create directories +echo "Creating directories..." +mkdir -p "$NTFY_DIR" "$CONFIG_DIR" +chown "$NTFY_USER:$NTFY_USER" "$NTFY_DIR" + +# Download and install ntfy +echo "Downloading ntfy..." +rm -rf /tmp/ntfy_extract +mkdir -p /tmp/ntfy_extract +wget -q -O /tmp/ntfy.tar.gz "https://github.com/binwiederhier/ntfy/releases/download/v${NTFY_VERSION}/ntfy_${NTFY_VERSION}_linux_amd64.tar.gz" +tar -xzf /tmp/ntfy.tar.gz -C /tmp/ntfy_extract +find /tmp/ntfy_extract -name "ntfy" -type f -exec mv {} /usr/local/bin/ntfy \; +chmod +x /usr/local/bin/ntfy +rm -rf /tmp/ntfy.tar.gz /tmp/ntfy_extract + +# Verify installation +echo "Verifying installation..." +if [ -x /usr/local/bin/ntfy ]; then + echo "✓ ntfy binary installed at /usr/local/bin/ntfy" +else + echo "✗ ntfy binary not found" + exit 1 +fi + +# Create configuration +echo "Installing configuration..." + +cat > "$CONFIG_DIR/server.yml" << EOF +# ntfy server configuration +# Location: /etc/ntfy/server.yml + +# Base URL for the server (used in notification links) +base-url: "http://${DOMAIN}" + +# Listen address - use internal port, proxy externally +listen-http: "127.0.0.1:8090" + +# Authentication - deny by default, require tokens +auth-default-access: "deny-all" +auth-file: "/var/lib/ntfy/user.db" + +# Cache for offline message delivery +cache-file: "/var/lib/ntfy/cache.db" +cache-duration: "24h" + +# Behind nginx/caddy reverse proxy +behind-proxy: true + +# Attachment settings +attachment-cache-dir: "/var/lib/ntfy/attachments" +attachment-total-size-limit: "1G" +attachment-file-size-limit: "10M" +attachment-expiry-duration: "24h" + +# Logging +log-level: "info" +log-format: "json" + +# Rate limiting per visitor +visitor-subscription-limit: 30 +visitor-request-limit-burst: 60 +visitor-request-limit-replenish: "5s" +EOF + +cat > /etc/systemd/system/ntfy.service << 'EOF' +# ntfy systemd service +# Location: /etc/systemd/system/ntfy.service + +[Unit] +Description=ntfy push notification server +Documentation=https://ntfy.sh/docs/ +After=network.target + +[Service] +Type=simple +User=ntfy +Group=ntfy + +ExecStart=/usr/local/bin/ntfy serve --config /etc/ntfy/server.yml +Restart=always +RestartSec=5 + +# Security hardening +NoNewPrivileges=yes +PrivateTmp=yes +ProtectSystem=strict +ProtectHome=yes +ReadWritePaths=/var/lib/ntfy + +# Resource limits +LimitNOFILE=65535 +MemoryMax=512M + +[Install] +WantedBy=multi-user.target +EOF + +# Enable and start service +echo "Enabling ntfy service..." +systemctl daemon-reload +systemctl enable ntfy +systemctl start ntfy + +# Wait for service to start +sleep 2 + +# Check status +if systemctl is-active --quiet ntfy; then + echo "✓ ntfy service is running" +else + echo "✗ ntfy service failed to start" + systemctl status ntfy + exit 1 +fi + +echo "" +echo "=== Setting up authentication ===" +echo "" + +# Create admin user (skip if exists) +echo "Creating admin user..." +if ntfy user list 2>/dev/null | grep -q "^admin "; then + echo "✓ admin user already exists" +else + ntfy user add --role=admin admin +fi +echo "" + +# Set access permissions for alert topics +echo "Setting access permissions for alert topics..." +ntfy access admin 'alerts-*' rw +echo "✓ admin has rw access to alerts-*" + +echo "" +echo "=== Next Steps ===" +echo "" +echo "1. Create user accounts for desktop clients:" +echo " ntfy user add --role=user " +echo " ntfy token add " +echo "" +echo "2. Grant topic access:" +echo " ntfy access alerts-myapp ro # Read-only to app alerts" +echo " ntfy access alerts-critical ro # Read-only to critical alerts" +echo "" +echo "3. Set up a reverse proxy (nginx/caddy) for ${DOMAIN}" +echo " pointing to 127.0.0.1:8090" +echo "" +echo "4. Test with:" +echo " curl -u admin: -d 'Test notification' http://127.0.0.1:8090/alerts-test" +echo "" +echo "=== Installation complete ===" diff --git a/install-prometheus-stack.sh b/install-prometheus-stack.sh new file mode 100755 index 0000000..4713b84 --- /dev/null +++ b/install-prometheus-stack.sh @@ -0,0 +1,1652 @@ +#!/bin/bash + +set -euo pipefail + +############################################################# +#### Prometheus Stack Installer #### +#### For RHEL/Rocky/Alma, Oracle Linux, Debian & Ubuntu #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 3.0 #### +#### #### +#### Usage: ./install-prometheus-stack.sh [OPTIONS] #### +############################################################# + +# Script defaults +INSTALL_PROMETHEUS=true +INSTALL_NODE_EXPORTER=true +INSTALL_BLACKBOX=true +INSTALL_ALERTMANAGER=true +INSTALL_MYSQL_EXPORTER=false +INSTALL_GRAFANA=true +INSTALL_LOKI=false +INSTALL_ALLOY=false +WEBSERVER="nginx" +INSTALL_WEBSERVER=true +ENABLE_TLS=false +UPDATE_MODE=false +DRY_RUN=false +CONFIG_FILE="" +SKIP_DEPS=false + +# System variables +domain="example.com" +bindir="/usr/local/bin" +promdir="/etc/prometheus" +logfile="/var/log/prometheus-install.log" + +# MySQL Exporter variables (can be overridden by config file) +mynum=2 +myuser="exporter" +mypass="password" +myhost1="db.host1.example" +myhost2="db.host2.example" +myhost3="db.host3.example" + +######################### +### Logging Functions ### +######################### +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$logfile" +} + +log_error() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $1" | tee -a "$logfile" >&2 +} + +log_info() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] INFO: $1" | tee -a "$logfile" +} + +######################### +### Utility Functions ### +######################### +show_help() { + cat << EOF +Prometheus Stack Installer + +USAGE: + $0 [OPTIONS] + +OPTIONS: + --prometheus Install Prometheus (default: true) + --node-exporter Install node_exporter (default: true) + --blackbox Install blackbox_exporter (default: true) + --alertmanager Install AlertManager (default: true) + --mysql-exporter Install MySQL exporter (default: false) + --grafana Install Grafana (default: true) + --loki Install Loki log storage (default: false) + --alloy Install Alloy log/metrics collector (default: false) + --webserver Install web server: nginx, apache, caddy (default: nginx) + --no-webserver Skip web server installation + --enable-tls Enable TLS/SSL security between components + --all Install all components + --update Update existing installations + --domain Domain for reverse proxy configs (default: example.com) + --config-file Load configuration from file + --dry-run Show what would be installed without doing it + --skip-deps Skip dependency installation + --help Show this help message + +EXAMPLES: + $0 --prometheus --grafana --alloy + $0 --all --skip mysql-exporter + $0 --update --prometheus --grafana + $0 --dry-run --config-file prod.conf + +CONFIG FILE FORMAT: + domain=example.com + myuser=dbuser + mypass=dbpassword + myhost1=db1.example.com + +EOF +} + +cleanup() { + if [[ -d "/tmp/prometheus-install-$$" ]]; then + rm -rf "/tmp/prometheus-install-$$" + fi +} + +trap cleanup EXIT + +check_component_installed() { + local component=$1 + case $component in + "prometheus") + systemctl is-active --quiet prometheus 2>/dev/null || [[ -f "$bindir/prometheus" ]] + ;; + "node_exporter") + systemctl is-active --quiet node_exporter 2>/dev/null || [[ -f "$bindir/node_exporter" ]] + ;; + "blackbox_exporter") + systemctl is-active --quiet blackbox_exporter 2>/dev/null || [[ -f "$bindir/blackbox_exporter" ]] + ;; + "alertmanager") + systemctl is-active --quiet alertmanager 2>/dev/null || [[ -f "$bindir/alertmanager" ]] + ;; + "grafana") + systemctl is-active --quiet grafana-server 2>/dev/null || command -v grafana-server >/dev/null 2>&1 + ;; + "loki") + systemctl is-active --quiet loki 2>/dev/null || [[ -f "$bindir/loki" ]] + ;; + "alloy") + systemctl is-active --quiet alloy 2>/dev/null || [[ -f "$bindir/alloy" ]] + ;; + esac +} + +######################### +### System Detection ### +######################### +detect_os() { + if [[ "$(command -v lsb_release)" ]]; then + OS=$(lsb_release -i | awk '{print $3}' | tr '[:upper:]' '[:lower:]') + OSVER=$(lsb_release -r | awk '{print $2}' | cut -d. -f1) + else + OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]') + OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '"' | cut -d. -f1) + fi + + log_info "Detected OS: $OS version $OSVER" +} + +setup_directories() { + if [[ -d "/usr/lib/systemd/system" ]]; then + psdir='/etc/systemd/system' + else + psdir='/usr/lib/systemd/system' + fi + + # Create log directory + mkdir -p "$(dirname "$logfile")" + touch "$logfile" +} + +######################### +### Package Management ### +######################### +setup_package_manager() { + case $OS in + "ubuntu"|"debian") + pkgmgr="apt -y" + ;; + "red"|"centos"|"oracle"|"rocky"|"almalinux") + if command -v dnf >/dev/null 2>&1; then + pkgmgr="dnf -y" + else + pkgmgr="yum -y" + fi + ;; + *) + log_error "Unsupported OS: $OS" + exit 1 + ;; + esac + + log_info "Using package manager: $pkgmgr" +} + +######################### +### Permission Check ### +######################### +check_permissions() { + if [[ $EUID -ne 0 ]]; then + log_error "This script must be run as root! Login as root, or use sudo." + exit 1 + fi +} + +######################### +### User Management ### +######################### +create_prometheus_user() { + if ! grep -q prometheus /etc/passwd; then + log_info "Creating prometheus user and group" + groupadd --system prometheus + if [[ "$OS" == "ubuntu" || "$OS" == "debian" ]]; then + useradd -s /sbin/nologin --system -g prometheus prometheus + else + useradd -m -s /bin/false prometheus -g prometheus + fi + else + log_info "Prometheus user already exists" + fi +} + +######################### +### Dependencies ### +######################### +install_dependencies() { + if [[ "$SKIP_DEPS" == "true" ]]; then + log_info "Skipping dependency installation" + return + fi + + log_info "Installing dependencies" + + if [[ ! "$(command -v wget)" ]]; then + $pkgmgr install wget + fi + + if [[ ! "$(command -v curl)" ]]; then + $pkgmgr install curl + fi + + if [[ ! "$(command -v tar)" ]]; then + $pkgmgr install tar + fi + + if [[ ! "$(command -v unzip)" ]]; then + $pkgmgr install unzip + fi +} + +########################## +### Install Prometheus ### +########################## +install_prometheus() { + log_info "Installing Prometheus" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install Prometheus" + return + fi + + if check_component_installed "prometheus" && [[ "$UPDATE_MODE" == "false" ]]; then + log_info "Prometheus already installed, skipping" + return + fi + + local workdir="/tmp/prometheus-install-$$/prometheus" + mkdir -p "$workdir" + cd "$workdir" + + # Create directories + mkdir -p "$promdir" /var/lib/prometheus + chown prometheus /var/lib/prometheus/ + + for dir in backups rules templates consoles console_libraries; do + mkdir -p "$promdir/${dir}" + chown -R prometheus. "$promdir/${dir}" + chmod -R 755 "$promdir/${dir}" + done + + # Download latest Prometheus + log_info "Downloading Prometheus" + curl -s https://api.github.com/repos/prometheus/prometheus/releases/latest | \ + grep browser_download_url | \ + grep linux-amd64 | \ + cut -d '"' -f 4 | \ + wget -qi - || { + log_error "Failed to download Prometheus" + exit 1 + } + + tar -xzf prometheus*.tar.gz + cd prometheus-*/ + + # Install binaries + mv prometheus promtool "$bindir/" + + # Install config if not exists or in update mode + if [[ ! -f "$promdir/prometheus.yml" ]] || [[ "$UPDATE_MODE" == "true" ]]; then + if [[ -f "$promdir/prometheus.yml" ]]; then + cp "$promdir/prometheus.yml" "$promdir/backups/prometheus.yml.$(date +%Y%m%d_%H%M%S)" + fi + mv prometheus.yml "$promdir/" + fi + + mv consoles/ console_libraries/ "$promdir/" || true + chown -R prometheus. /var/lib/prometheus/ "$promdir/" + + # SELinux context for RHEL 8+ + if [[ "$OS" == "red" && "$OSVER" -ge 8 ]]; then + restorecon -rv "$bindir/prometheus" || true + fi + + # Create systemd service + create_prometheus_service + + systemctl daemon-reload + systemctl enable prometheus + + if [[ "$UPDATE_MODE" == "true" ]]; then + systemctl restart prometheus + else + systemctl start prometheus + fi + + log_info "Prometheus installation completed" +} + +create_prometheus_service() { + cat > "$psdir/prometheus.service" << 'EOF' +[Unit] +Description=Prometheus Time Series Collection and Processing Server +Documentation=https://prometheus.io/docs/introduction/overview/ +Wants=network-online.target +After=network-online.target + +[Service] +Type=simple +User=prometheus +Group=prometheus + +ExecReload=/bin/kill -HUP $MAINPID +ExecStart=/usr/local/bin/prometheus \ + --config.file /etc/prometheus/prometheus.yml \ + --storage.tsdb.path /var/lib/prometheus/data \ + --web.console.templates=/etc/prometheus/consoles \ + --web.console.libraries=/etc/prometheus/console_libraries \ + --web.listen-address=0.0.0.0:9090 \ + --web.external-url= \ + --enable-feature=new-service-discovery-manager,exemplar-storage,extra-scrape-metrics + +Restart=always +RestartSec=5s +SyslogIdentifier=prometheus + +[Install] +WantedBy=multi-user.target +EOF + + # Create default config if it doesn't exist + if [[ ! -f "$promdir/prometheus.yml" ]]; then + create_prometheus_config + fi +} + +create_prometheus_config() { + cat > "$promdir/prometheus.yml" << 'EOF' +# Global config +global: + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + scrape_timeout: 15s # scrape_timeout is set to the global default (10s). + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: +# - "first_rules.yml" +# - "second_rules.yml" + +# A scrape configuration containing exactly one endpoint to scrape: Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label 'job=' to any timeseries scraped from this config. + - job_name: 'prometheus' + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'server_metrics' + scrape_interval: 5s + static_configs: + - targets: ['localhost:9100'] + labels: + alias: Prometheus Server +EOF +} + +############################# +### Install node_exporter ### +############################# +install_node_exporter() { + log_info "Installing node_exporter" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install node_exporter" + return + fi + + if check_component_installed "node_exporter" && [[ "$UPDATE_MODE" == "false" ]]; then + log_info "node_exporter already installed, skipping" + return + fi + + local workdir="/tmp/prometheus-install-$$/node_exporter" + mkdir -p "$workdir" + cd "$workdir" + + # Download latest node_exporter + log_info "Downloading node_exporter" + curl -s https://api.github.com/repos/prometheus/node_exporter/releases/latest | \ + grep browser_download_url | \ + grep linux-amd64 | \ + cut -d '"' -f 4 | \ + wget -qi - || { + log_error "Failed to download node_exporter" + exit 1 + } + + tar -xzf node_exporter*.tar.gz + cd node_exporter-*/ + mv node_exporter "$bindir/" + chown prometheus. "$bindir/node_exporter" + + # SELinux context for RHEL 8+ + if [[ "$OS" == "red" && "$OSVER" -ge 8 ]]; then + restorecon -rv "$bindir/node_exporter" || true + fi + + # Create systemd service + create_node_exporter_service + + systemctl daemon-reload + systemctl enable node_exporter + + if [[ "$UPDATE_MODE" == "true" ]]; then + systemctl restart node_exporter + else + systemctl start node_exporter + fi + + log_info "node_exporter installation completed" +} + +create_node_exporter_service() { + cat > "$psdir/node_exporter.service" << 'EOF' +[Unit] +Description=Prometheus Node Exporter +Wants=network-online.target +After=network-online.target + +[Service] +User=root +Group=root +Type=simple +ExecStart=/usr/local/bin/node_exporter $OPTIONS + +[Install] +WantedBy=multi-user.target +EOF + + # Create default options + echo 'OPTIONS="--collector.ethtool --collector.interrupts --collector.processes --collector.systemd --collector.tcpstat"' > /etc/default/node_exporter +} + +######################## +### Install BlackBox ### +######################## +install_blackbox() { + log_info "Installing blackbox_exporter" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install blackbox_exporter" + return + fi + + if check_component_installed "blackbox_exporter" && [[ "$UPDATE_MODE" == "false" ]]; then + log_info "blackbox_exporter already installed, skipping" + return + fi + + local workdir="/tmp/prometheus-install-$$/blackbox" + mkdir -p "$workdir" + cd "$workdir" + + # Download latest blackbox_exporter + log_info "Downloading blackbox_exporter" + curl -s https://api.github.com/repos/prometheus/blackbox_exporter/releases/latest | \ + grep browser_download_url | \ + grep linux-amd64 | \ + cut -d '"' -f 4 | \ + wget -qi - || { + log_error "Failed to download blackbox_exporter" + exit 1 + } + + tar -xzf blackbox_exporter*.tar.gz + cd blackbox_exporter-*/ + mv blackbox_exporter "$bindir/" + chown prometheus. "$bindir/blackbox_exporter" + + # Install config + mkdir -p "$promdir" + if [[ ! -f "$promdir/blackbox.yml" ]] || [[ "$UPDATE_MODE" == "true" ]]; then + if [[ -f "$promdir/blackbox.yml" ]]; then + cp "$promdir/blackbox.yml" "$promdir/backups/blackbox.yml.$(date +%Y%m%d_%H%M%S)" + fi + mv blackbox.yml "$promdir/" + fi + + chown -R prometheus. "$promdir/" + + # SELinux context for RHEL 8+ + if [[ "$OS" == "red" && "$OSVER" -ge 8 ]]; then + restorecon -rv "$bindir/blackbox_exporter" || true + fi + + # Create systemd service + create_blackbox_service + + systemctl daemon-reload + systemctl enable blackbox_exporter + + if [[ "$UPDATE_MODE" == "true" ]]; then + systemctl restart blackbox_exporter + else + systemctl start blackbox_exporter + fi + + # Add to prometheus config if not already present + add_blackbox_to_prometheus_config + + log_info "blackbox_exporter installation completed" +} + +create_blackbox_service() { + cat > "$psdir/blackbox_exporter.service" << 'EOF' +[Unit] +Description=Prometheus Blackbox Exporter Http/Https Monitoring +After=network.target + +[Service] +User=prometheus +Group=prometheus +Type=simple +ExecStart=/usr/local/bin/blackbox_exporter \ + --config.file /etc/prometheus/blackbox.yml \ + --web.listen-address=":9115" + +Restart=always + +[Install] +WantedBy=multi-user.target +EOF +} + +add_blackbox_to_prometheus_config() { + if ! grep -q "job_name.*blackbox" "$promdir/prometheus.yml" 2>/dev/null; then + log_info "Adding blackbox configuration to Prometheus" + cat >> "$promdir/prometheus.yml" << 'EOF' + + - job_name: 'blackbox' + metrics_path: /probe + params: + module: [http_2xx] + static_configs: + - targets: + #### Local Targets #### + - http://localhost:9090 + + #### Remote Targets #### + #- https://google.com + + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: localhost:9115 +EOF + + # Restart prometheus to reload config + if systemctl is-active --quiet prometheus; then + systemctl reload prometheus || systemctl restart prometheus + fi + fi +} + +####################### +### Install Grafana ### +####################### +install_grafana() { + log_info "Installing Grafana" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install Grafana" + return + fi + + if check_component_installed "grafana" && [[ "$UPDATE_MODE" == "false" ]]; then + log_info "Grafana already installed, skipping" + return + fi + + case $OS in + "ubuntu"|"debian") + install_grafana_debian + ;; + "red"|"centos"|"oracle"|"rocky"|"almalinux") + install_grafana_rhel + ;; + *) + log_error "Unsupported OS for Grafana installation: $OS" + return 1 + ;; + esac + + systemctl daemon-reload + systemctl enable grafana-server + + if [[ "$UPDATE_MODE" == "true" ]]; then + systemctl restart grafana-server + else + systemctl start grafana-server + fi + + log_info "Grafana installation completed" +} + +install_grafana_debian() { + # Add Grafana APT repository + $pkgmgr update + $pkgmgr install -y software-properties-common wget + wget -q -O /usr/share/keyrings/grafana.key https://apt.grafana.com/gpg.key + echo "deb [signed-by=/usr/share/keyrings/grafana.key] https://apt.grafana.com stable main" | tee -a /etc/apt/sources.list.d/grafana.list + $pkgmgr update + $pkgmgr install grafana +} + +install_grafana_rhel() { + # Add Grafana YUM repository + cat > /etc/yum.repos.d/grafana.repo << 'EOF' +[grafana] +name=grafana +baseurl=https://packages.grafana.com/oss/rpm +repo_gpgcheck=1 +enabled=1 +gpgcheck=1 +gpgkey=https://packages.grafana.com/gpg.key +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +EOF + + $pkgmgr install grafana +} + +################### +### Install Loki ### +################### +install_loki() { + log_info "Installing Loki" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install Loki" + return + fi + + if check_component_installed "loki" && [[ "$UPDATE_MODE" == "false" ]]; then + log_info "Loki already installed, skipping" + return + fi + + local workdir="/tmp/prometheus-install-$$/loki" + mkdir -p "$workdir" + cd "$workdir" + + # Download latest Loki + log_info "Downloading Loki" + curl -s https://api.github.com/repos/grafana/loki/releases/latest | \ + grep browser_download_url | \ + grep loki-linux-amd64.zip | \ + cut -d '"' -f 4 | \ + wget -qi - || { + log_error "Failed to download Loki" + exit 1 + } + + unzip loki-linux-amd64.zip + mv loki-linux-amd64 "$bindir/loki" + chown prometheus. "$bindir/loki" + chmod +x "$bindir/loki" + + # Create Loki directories + mkdir -p /var/lib/loki/{wal,chunks,index} + chown -R prometheus. /var/lib/loki + + # Create Loki config directory + mkdir -p "$promdir" + chown -R prometheus. "$promdir" + + # Create Loki config + create_loki_config + + # Create systemd service + create_loki_service + + systemctl daemon-reload + systemctl enable loki + + if [[ "$UPDATE_MODE" == "true" ]]; then + systemctl restart loki + else + systemctl start loki + fi + + log_info "Loki installation completed" +} + +create_loki_config() { + if [[ ! -f "$promdir/loki.yml" ]] || [[ "$UPDATE_MODE" == "true" ]]; then + if [[ -f "$promdir/loki.yml" ]]; then + cp "$promdir/loki.yml" "$promdir/backups/loki.yml.$(date +%Y%m%d_%H%M%S)" + fi + + cat > "$promdir/loki.yml" << 'EOF' +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + path_prefix: /var/lib/loki + storage: + filesystem: + chunks_directory: /var/lib/loki/chunks + rules_directory: /var/lib/loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + +ruler: + alertmanager_url: http://localhost:9093 + +# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration +# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/ +# +# Statistics help us better understand how Loki is used, and they show us performance +# levels for most users. This helps us prioritize features and documentation. +# For more information on what's sent, look at +# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go +# Refer to the buildReport method to see what goes into a report. +# +# If you would like to disable reporting, uncomment the following lines: +analytics: + reporting_enabled: false +EOF + + chown prometheus. "$promdir/loki.yml" + fi +} + +create_loki_service() { + cat > "$psdir/loki.service" << 'EOF' +[Unit] +Description=Loki log aggregation system +After=network.target + +[Service] +Type=simple +User=prometheus +Group=prometheus +ExecStart=/usr/local/bin/loki -config.file /etc/prometheus/loki.yml +Restart=always +RestartSec=5s + +[Install] +WantedBy=multi-user.target +EOF +} + +#################### +### Install Alloy ### +#################### +install_alloy() { + log_info "Installing Grafana Alloy" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install Grafana Alloy" + return + fi + + if check_component_installed "alloy" && [[ "$UPDATE_MODE" == "false" ]]; then + log_info "Alloy already installed, skipping" + return + fi + + local workdir="/tmp/prometheus-install-$$/alloy" + mkdir -p "$workdir" + cd "$workdir" + + # Download latest Alloy + log_info "Downloading Grafana Alloy" + curl -s https://api.github.com/repos/grafana/alloy/releases/latest | \ + grep browser_download_url | \ + grep -E "alloy-.*-linux-amd64\.zip" | \ + cut -d '"' -f 4 | \ + wget -qi - || { + log_error "Failed to download Grafana Alloy" + exit 1 + } + + unzip alloy-*-linux-amd64.zip + mv alloy-*-linux-amd64 "$bindir/alloy" + chown prometheus. "$bindir/alloy" + chmod +x "$bindir/alloy" + + # Create config directory + mkdir -p "$promdir/alloy" + chown -R prometheus. "$promdir/alloy" + + # Create basic Alloy config + create_alloy_config + + # Create systemd service + create_alloy_service + + systemctl daemon-reload + systemctl enable alloy + + if [[ "$UPDATE_MODE" == "true" ]]; then + systemctl restart alloy + else + systemctl start alloy + fi + + log_info "Grafana Alloy installation completed" +} + +create_alloy_config() { + cat > "$promdir/alloy/config.alloy" << 'EOF' +// Basic Alloy configuration for log collection +logging { + level = "info" + format = "logfmt" +} + +// Loki logs endpoint +loki.write "default" { + endpoint { + url = "http://localhost:3100/loki/api/v1/push" + } +} + +// Local file logs +local.file_match "varlog" { + path_targets = ["/var/log/*.log"] +} + +loki.source.file "varlog" { + targets = local.file_match.varlog.targets + forward_to = [loki.write.default.receiver] +} + +// Prometheus metrics +prometheus.scrape "default" { + targets = [ + {"__address__" = "localhost:9090", "job" = "prometheus"}, + {"__address__" = "localhost:9100", "job" = "node"}, + ] + forward_to = [prometheus.remote_write.default.receiver] +} + +prometheus.remote_write "default" { + endpoint { + url = "http://localhost:9090/api/v1/write" + } +} +EOF + + chown prometheus. "$promdir/alloy/config.alloy" +} + +create_alloy_service() { + cat > "$psdir/alloy.service" << 'EOF' +[Unit] +Description=Grafana Alloy +Documentation=https://grafana.com/docs/alloy/ +Wants=network-online.target +After=network-online.target + +[Service] +Type=simple +User=prometheus +Group=prometheus +ExecStart=/usr/local/bin/alloy run /etc/prometheus/alloy/config.alloy +Restart=always +RestartSec=5s + +[Install] +WantedBy=multi-user.target +EOF +} + +######################## +### Install Web Server ### +######################## +install_webserver() { + log_info "Installing and configuring $WEBSERVER" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install and configure $WEBSERVER" + return + fi + + case $WEBSERVER in + "nginx") + install_nginx + ;; + "apache") + install_apache + ;; + "caddy") + install_caddy + ;; + *) + log_error "Unsupported web server: $WEBSERVER" + exit 1 + ;; + esac + + log_info "$WEBSERVER installation and configuration completed" +} + +install_nginx() { + $pkgmgr install nginx + + # Determine nginx config directory + if [[ -d "/etc/nginx/sites-available" ]]; then + sitesa="/etc/nginx/sites-available" + sitese="/etc/nginx/sites-enabled/" + elif [[ -d "/etc/nginx/conf.d" ]]; then + sitesa="/etc/nginx/conf.d" + sitese="" + fi + + create_nginx_configs + + # Test nginx config + if ! nginx -t; then + log_error "Nginx configuration test failed" + exit 1 + fi + + systemctl enable nginx + systemctl restart nginx +} + +install_apache() { + case $OS in + "ubuntu"|"debian") + $pkgmgr install apache2 + sitesa="/etc/apache2/sites-available" + sitese="/etc/apache2/sites-enabled" + service_name="apache2" + ;; + "red"|"centos"|"oracle"|"rocky"|"almalinux") + $pkgmgr install httpd + sitesa="/etc/httpd/conf.d" + sitese="" + service_name="httpd" + ;; + esac + + # Enable required modules + if [[ "$OS" == "ubuntu" || "$OS" == "debian" ]]; then + a2enmod proxy proxy_http headers + fi + + create_apache_configs + + # Test apache config + if ! $service_name -t; then + log_error "Apache configuration test failed" + exit 1 + fi + + systemctl enable $service_name + systemctl restart $service_name +} + +install_caddy() { + # Install Caddy + case $OS in + "ubuntu"|"debian") + curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg + curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list + $pkgmgr update + $pkgmgr install caddy + ;; + "red"|"centos"|"oracle"|"rocky"|"almalinux") + $pkgmgr install 'dnf-command(copr)' + $pkgmgr copr enable @caddy/caddy + $pkgmgr install caddy + ;; + esac + + create_caddy_config + + systemctl enable caddy + systemctl restart caddy +} + +create_nginx_configs() { + # Prometheus config + cat > "$sitesa/prometheus.conf" << EOF +server { + listen 80; + listen [::]:80; + + server_name prometheus.$domain; + + location / { + proxy_pass http://localhost:9090/; + proxy_set_header Host \$host; + proxy_set_header X-Real-IP \$remote_addr; + proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto \$scheme; + } +} +EOF + + # Grafana config + cat > "$sitesa/grafana.conf" << EOF +server { + listen 80; + listen [::]:80; + + server_name metrics.$domain; + + location / { + proxy_pass http://localhost:3000/; + proxy_set_header Host \$host; + proxy_set_header X-Real-IP \$remote_addr; + proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto \$scheme; + } +} +EOF + + # AlertManager config + cat > "$sitesa/alertmanager.conf" << EOF +server { + listen 80; + listen [::]:80; + + server_name alerts.$domain; + + location / { + proxy_pass http://localhost:9093/; + proxy_set_header Host \$host; + proxy_set_header X-Real-IP \$remote_addr; + proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto \$scheme; + } +} +EOF + + # Loki config + cat > "$sitesa/loki.conf" << EOF +server { + listen 80; + listen [::]:80; + + server_name loki.$domain; + + location / { + proxy_pass http://localhost:3100/; + proxy_set_header Host \$host; + proxy_set_header X-Real-IP \$remote_addr; + proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto \$scheme; + } +} +EOF + + # Enable sites if using sites-available/sites-enabled structure + if [[ -n "$sitese" ]]; then + ln -sf "$sitesa/prometheus.conf" "$sitese" 2>/dev/null || true + ln -sf "$sitesa/grafana.conf" "$sitese" 2>/dev/null || true + ln -sf "$sitesa/alertmanager.conf" "$sitese" 2>/dev/null || true + ln -sf "$sitesa/loki.conf" "$sitese" 2>/dev/null || true + fi +} + +create_apache_configs() { + local protocol="http" + local ssl_config="" + + if [[ "$ENABLE_TLS" == "true" ]]; then + protocol="https" + ssl_config=" + SSLEngine on + SSLCertificateFile /etc/ssl/certs/prometheus.crt + SSLCertificateKeyFile /etc/ssl/private/prometheus.key" + fi + + # Prometheus config + cat > "$sitesa/prometheus.conf" << EOF + + ServerName prometheus.$domain + ProxyPreserveHost On + ProxyRequests Off + ProxyPass / http://localhost:9090/ + ProxyPassReverse / http://localhost:9090/ + ProxyPassReverse / $protocol://prometheus.$domain/ + +EOF + + # Grafana config + cat > "$sitesa/grafana.conf" << EOF + + ServerName metrics.$domain + ProxyPreserveHost On + ProxyRequests Off + ProxyPass / http://localhost:3000/ + ProxyPassReverse / http://localhost:3000/ + ProxyPassReverse / $protocol://metrics.$domain/ + +EOF + + # AlertManager config + cat > "$sitesa/alertmanager.conf" << EOF + + ServerName alerts.$domain + ProxyPreserveHost On + ProxyRequests Off + ProxyPass / http://localhost:9093/ + ProxyPassReverse / http://localhost:9093/ + ProxyPassReverse / $protocol://alerts.$domain/ + +EOF + + # Loki config + cat > "$sitesa/loki.conf" << EOF + + ServerName loki.$domain + ProxyPreserveHost On + ProxyRequests Off + ProxyPass / http://localhost:3100/ + ProxyPassReverse / http://localhost:3100/ + ProxyPassReverse / $protocol://loki.$domain/ + +EOF + + # Enable sites if using sites-available/sites-enabled structure + if [[ -n "$sitese" ]]; then + a2ensite prometheus grafana alertmanager loki 2>/dev/null || true + fi +} + +create_caddy_config() { + local protocol="http" + local tls_config="" + + if [[ "$ENABLE_TLS" == "true" ]]; then + protocol="https" + tls_config=" + tls /etc/ssl/certs/prometheus.crt /etc/ssl/private/prometheus.key" + fi + + cat > /etc/caddy/Caddyfile << EOF +# Prometheus +prometheus.$domain {$tls_config + reverse_proxy localhost:9090 +} + +# Grafana +metrics.$domain {$tls_config + reverse_proxy localhost:3000 +} + +# AlertManager +alerts.$domain {$tls_config + reverse_proxy localhost:9093 +} + +# Loki +loki.$domain {$tls_config + reverse_proxy localhost:3100 +} +EOF +} + +############################# +### Install AlertManager ### +############################# +install_alertmanager() { + log_info "Installing AlertManager" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install AlertManager" + return + fi + + if check_component_installed "alertmanager" && [[ "$UPDATE_MODE" == "false" ]]; then + log_info "AlertManager already installed, skipping" + return + fi + + local workdir="/tmp/prometheus-install-$$/alertmanager" + mkdir -p "$workdir" + cd "$workdir" + + # Create alertmanager user if doesn't exist + if ! grep -q alertmanager /etc/passwd; then + groupadd --system alertmanager + if [[ "$OS" == "ubuntu" || "$OS" == "debian" ]]; then + useradd -s /sbin/nologin --system -g alertmanager alertmanager + else + useradd -m -s /bin/false alertmanager -g alertmanager + fi + mkdir -p /var/lib/alertmanager + chown alertmanager:alertmanager /var/lib/alertmanager + fi + + # Download latest AlertManager + log_info "Downloading AlertManager" + curl -s https://api.github.com/repos/prometheus/alertmanager/releases/latest | \ + grep browser_download_url | \ + grep linux-amd64 | \ + cut -d '"' -f 4 | \ + wget -qi - || { + log_error "Failed to download AlertManager" + exit 1 + } + + tar -xzf alertmanager*.tar.gz + cd alertmanager-*/ + mv amtool alertmanager "$bindir/" + chown alertmanager:alertmanager "$bindir/alertmanager" "$bindir/amtool" + + # Install config if not exists or in update mode + if [[ ! -f "$promdir/alertmanager.yml" ]] || [[ "$UPDATE_MODE" == "true" ]]; then + if [[ -f "$promdir/alertmanager.yml" ]]; then + cp "$promdir/alertmanager.yml" "$promdir/backups/alertmanager.yml.$(date +%Y%m%d_%H%M%S)" + fi + mv alertmanager.yml "$promdir/" + fi + + chown -R alertmanager:alertmanager "$promdir/alertmanager.yml" + + # Create systemd service + create_alertmanager_service + + systemctl daemon-reload + systemctl enable alertmanager + + if [[ "$UPDATE_MODE" == "true" ]]; then + systemctl restart alertmanager + else + systemctl start alertmanager + fi + + log_info "AlertManager installation completed" +} + +create_alertmanager_service() { + cat > "$psdir/alertmanager.service" << 'EOF' +[Unit] +Description=Prometheus AlertManager Service +Wants=network-online.target +After=network-online.target + +[Service] +User=alertmanager +Group=alertmanager +Type=simple +ExecStart=/usr/local/bin/alertmanager \ + --config.file /etc/prometheus/alertmanager.yml \ + --storage.path /var/lib/alertmanager/ \ + --cluster.advertise-address=0.0.0.0:9093 + +[Install] +WantedBy=multi-user.target +EOF +} + +############################## +### Install MySQL Exporter ### +############################## +install_mysql_exporter() { + log_info "Installing MySQL Exporter" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would install MySQL Exporter" + return + fi + + if check_component_installed "mysqld_exporter" && [[ "$UPDATE_MODE" == "false" ]]; then + log_info "MySQL Exporter already installed, skipping" + return + fi + + local workdir="/tmp/prometheus-install-$$/mysql_exporter" + mkdir -p "$workdir" + cd "$workdir" + + # Download latest mysqld_exporter + log_info "Downloading MySQL Exporter" + curl -s https://api.github.com/repos/prometheus/mysqld_exporter/releases/latest | \ + grep browser_download_url | \ + grep linux-amd64.tar.gz | \ + awk '{gsub(/"/, "", $2); print $2}' | \ + wget -qi - || { + log_error "Failed to download MySQL Exporter" + exit 1 + } + + tar -xzf mysqld_exporter-* + cd mysqld_exporter-*/ + mv mysqld_exporter* "$bindir/mysqld_exporter" + chown prometheus. "$bindir/mysqld_exporter" + + # Create MySQL config + create_mysql_exporter_config + + # Create systemd service + create_mysql_exporter_service + + systemctl daemon-reload + systemctl enable mysqld_exporter + + if [[ "$UPDATE_MODE" == "true" ]]; then + systemctl restart mysqld_exporter + else + systemctl start mysqld_exporter + fi + + # Add to prometheus config + add_mysql_to_prometheus_config + + log_info "MySQL Exporter installation completed" +} + +create_mysql_exporter_config() { + touch /etc/.mysqld_exporter.cnf + chown prometheus. /etc/.mysqld_exporter.cnf + chmod 600 /etc/.mysqld_exporter.cnf + + # Generate config based on number of hosts + { + for ((i=1; i<=mynum; i++)); do + echo "[client]" + echo "user=$myuser" + echo "password=$mypass" + eval "echo \"host=\$myhost$i\"" + echo + done + } > /etc/.mysqld_exporter.cnf +} + +create_mysql_exporter_service() { + cat > "$psdir/mysqld_exporter.service" << EOF +[Unit] +Description=MySQL Exporter Service +Wants=network.target +After=network.target + +[Service] +User=prometheus +Group=prometheus +Environment="DATA_SOURCE_NAME=mysqld_exporter:$mypass@$myuser:(/var/lib/mysql/mysql.sock)" +Type=simple +ExecStart=/usr/local/bin/mysqld_exporter +Restart=always + +[Install] +WantedBy=multi-user.target +EOF +} + +add_mysql_to_prometheus_config() { + if ! grep -q "job_name.*mysql" "$promdir/prometheus.yml" 2>/dev/null; then + log_info "Adding MySQL configuration to Prometheus" + cat >> "$promdir/prometheus.yml" << 'EOF' + + - job_name: 'mysql_metrics' + scrape_interval: 5s + static_configs: + - targets: + - localhost:9104 +EOF + + # Restart prometheus to reload config + if systemctl is-active --quiet prometheus; then + systemctl reload prometheus || systemctl restart prometheus + fi + fi +} + +######################### +### Parse Arguments ### +######################### +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + --prometheus) + INSTALL_PROMETHEUS=true + shift + ;; + --no-prometheus) + INSTALL_PROMETHEUS=false + shift + ;; + --node-exporter) + INSTALL_NODE_EXPORTER=true + shift + ;; + --no-node-exporter) + INSTALL_NODE_EXPORTER=false + shift + ;; + --blackbox) + INSTALL_BLACKBOX=true + shift + ;; + --no-blackbox) + INSTALL_BLACKBOX=false + shift + ;; + --alertmanager) + INSTALL_ALERTMANAGER=true + shift + ;; + --no-alertmanager) + INSTALL_ALERTMANAGER=false + shift + ;; + --mysql-exporter) + INSTALL_MYSQL_EXPORTER=true + shift + ;; + --no-mysql-exporter) + INSTALL_MYSQL_EXPORTER=false + shift + ;; + --grafana) + INSTALL_GRAFANA=true + shift + ;; + --no-grafana) + INSTALL_GRAFANA=false + shift + ;; + --loki) + INSTALL_LOKI=true + shift + ;; + --no-loki) + INSTALL_LOKI=false + shift + ;; + --alloy) + INSTALL_ALLOY=true + shift + ;; + --no-alloy) + INSTALL_ALLOY=false + shift + ;; + --webserver) + WEBSERVER="$2" + INSTALL_WEBSERVER=true + shift 2 + ;; + --no-webserver) + INSTALL_WEBSERVER=false + shift + ;; + --enable-tls) + ENABLE_TLS=true + shift + ;; + --all) + INSTALL_PROMETHEUS=true + INSTALL_NODE_EXPORTER=true + INSTALL_BLACKBOX=true + INSTALL_ALERTMANAGER=true + INSTALL_MYSQL_EXPORTER=true + INSTALL_GRAFANA=true + INSTALL_LOKI=true + INSTALL_ALLOY=true + INSTALL_WEBSERVER=true + shift + ;; + --update) + UPDATE_MODE=true + shift + ;; + --domain) + domain="$2" + shift 2 + ;; + --config-file) + CONFIG_FILE="$2" + shift 2 + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --skip-deps) + SKIP_DEPS=true + shift + ;; + --help) + show_help + exit 0 + ;; + *) + log_error "Unknown option: $1" + show_help + exit 1 + ;; + esac + done +} + +######################### +### Load Config File ### +######################### +load_config_file() { + if [[ -n "$CONFIG_FILE" && -f "$CONFIG_FILE" ]]; then + log_info "Loading configuration from $CONFIG_FILE" + # shellcheck source=/dev/null + source "$CONFIG_FILE" + fi +} + +########################## +### Main Installation ### +########################## +main() { + log_info "Starting Prometheus stack installation" + log_info "Command line: $0 $*" + + parse_arguments "$@" + load_config_file + + check_permissions + detect_os + setup_directories + setup_package_manager + create_prometheus_user + install_dependencies + + # Install components based on flags + if [[ "$INSTALL_PROMETHEUS" == "true" ]]; then + install_prometheus + fi + + if [[ "$INSTALL_NODE_EXPORTER" == "true" ]]; then + install_node_exporter + fi + + if [[ "$INSTALL_BLACKBOX" == "true" ]]; then + install_blackbox + fi + + if [[ "$INSTALL_ALERTMANAGER" == "true" ]]; then + install_alertmanager + fi + + if [[ "$INSTALL_MYSQL_EXPORTER" == "true" ]]; then + install_mysql_exporter + fi + + if [[ "$INSTALL_GRAFANA" == "true" ]]; then + install_grafana + fi + + if [[ "$INSTALL_LOKI" == "true" ]]; then + install_loki + fi + + if [[ "$INSTALL_ALLOY" == "true" ]]; then + install_alloy + fi + + if [[ "$INSTALL_WEBSERVER" == "true" ]]; then + install_webserver + fi + + log_info "Installation completed successfully" + + if [[ "$DRY_RUN" == "false" ]]; then + echo + echo "=== Installation Summary ===" + echo "Components installed:" + [[ "$INSTALL_PROMETHEUS" == "true" ]] && echo " ✓ Prometheus (http://localhost:9090)" + [[ "$INSTALL_NODE_EXPORTER" == "true" ]] && echo " ✓ Node Exporter (http://localhost:9100)" + [[ "$INSTALL_BLACKBOX" == "true" ]] && echo " ✓ Blackbox Exporter (http://localhost:9115)" + [[ "$INSTALL_ALERTMANAGER" == "true" ]] && echo " ✓ AlertManager (http://localhost:9093)" + [[ "$INSTALL_MYSQL_EXPORTER" == "true" ]] && echo " ✓ MySQL Exporter (http://localhost:9104)" + [[ "$INSTALL_GRAFANA" == "true" ]] && echo " ✓ Grafana (http://localhost:3000)" + [[ "$INSTALL_LOKI" == "true" ]] && echo " ✓ Loki (http://localhost:3100)" + [[ "$INSTALL_ALLOY" == "true" ]] && echo " ✓ Grafana Alloy" + [[ "$INSTALL_WEBSERVER" == "true" ]] && echo " ✓ $WEBSERVER Web Server" + echo + echo "Check logs at: $logfile" + echo "Default Grafana credentials: admin/admin" + fi +} + +# Run main function +main "$@" diff --git a/iptables-blocklist-metrics.sh b/iptables-blocklist-metrics.sh new file mode 100755 index 0000000..3db8486 --- /dev/null +++ b/iptables-blocklist-metrics.sh @@ -0,0 +1,628 @@ +#!/bin/bash +################################################################################ +# Script Name: iptables-blocklist-metrics.sh +# Version: 2.0 +# Description: Prometheus exporter for iptables threat feed blocking metrics +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +################################################################################ + +# Ensure PATH includes sbin (for ipset/iptables when run from cron) +export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:$PATH" +# +# EXPORTED METRICS: +# - iptables_blocklist_info - Exporter metadata +# - iptables_blocklist_enabled_feeds - Count of enabled feeds +# - iptables_blocklist_ipset_size - IPs per feed ipset (IPv4/v6) +# - iptables_blocklist_blocked_total - Block counts per feed (1h, 24h) +# - iptables_blocklist_effectiveness - Blocks per 1000 IPs (24h) +# - iptables_blocklist_last_update_timestamp - Feed cache file mtime +# - iptables_blocklist_cache_age_seconds - Age of feed cache files +# - iptables_blocklist_file_size_bytes - Feed parsed file sizes +# - iptables_blocklist_ip_version_ratio - IPv4 vs IPv6 distribution per feed +# - iptables_blocklist_total_unique_ips - Total unique IPs across all feeds +# - iptables_blocklist_total_rules - Total iptables rules +# - iptables_blocklist_rule_packets - Packet counts from iptables rules +# - iptables_blocklist_rule_bytes - Byte counts from iptables rules +# - iptables_blocklist_conntrack_entries - Current conntrack entries +# - iptables_blocklist_conntrack_max - Maximum conntrack entries +# - iptables_blocklist_conntrack_usage_percent - Conntrack usage percentage +# - iptables_blocklist_whitelist_size - Whitelist ipset sizes +# - iptables_blocklist_exporter_runtime_seconds - Script execution time + +CONFIG_DIR="/etc/iptables-threats" +CACHE_DIR="$CONFIG_DIR/cache" +FEEDS_CONFIG="$CONFIG_DIR/feeds.conf" +IPSET_PREFIX="iptables-feed" +WHITELIST_IPSET="iptables-whitelist" +WHITELIST_IPSET_V6="iptables-whitelist-v6" +LOG_FILE="/var/log/iptables-threats.log" + +TEXTFILE_DIR="/var/lib/node_exporter" +OUTPUT_FILE="" +HTTP_MODE=false +HTTP_PORT=9419 +SCRIPT_START_TIME=$(date +%s) +LOCK_FILE="/var/run/iptables-blocklist-metrics.lock" + +show_usage() { + cat </dev/null | grep '^[0-9a-fA-F.:]' | wc -l 2>/dev/null) + echo "${size:-0}" +} + +get_feed_blocks() { + local feed="$1" + local period="$2" + local count + count=$(journalctl -k --since "$period" 2>/dev/null | grep "\[THREAT:${feed}\]" | wc -l 2>/dev/null) + echo "${count:-0}" +} + +get_feed_blocks_v6() { + local feed="$1" + local period="$2" + local count + count=$(journalctl -k --since "$period" 2>/dev/null | grep "\[THREAT-v6:${feed}\]" | wc -l 2>/dev/null) + echo "${count:-0}" +} + +get_file_timestamp() { + [ -f "$1" ] && stat -c %Y "$1" 2>/dev/null || echo "0" +} + +get_file_size() { + [ -f "$1" ] && stat -c %s "$1" 2>/dev/null || echo "0" +} + +get_cache_age() { + if [ -f "$1" ]; then + echo $(($(date +%s) - $(stat -c %Y "$1" 2>/dev/null || echo 0))) + else + echo "0" + fi +} + +get_iptables_rule_stats() { + local chain="$1" + local feed="$2" + # Extract packet and byte counts from iptables -L -v -n -x (exact numbers, no human-readable K/M/G) + iptables -L "$chain" -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${feed}" | head -1 | awk '{print $1"|"$2}' +} + +get_total_unique_ips() { + local ip_version="$1" + local count=0 + + if [ "$ip_version" = "4" ]; then + count=$(cat "$CACHE_DIR/"*-v4.parsed 2>/dev/null | sort -u | wc -l 2>/dev/null) + elif [ "$ip_version" = "6" ]; then + count=$(cat "$CACHE_DIR/"*-v6.parsed 2>/dev/null | sort -u | wc -l 2>/dev/null) + fi + + echo "${count:-0}" +} + +get_conntrack_count() { + if [ -f /proc/sys/net/netfilter/nf_conntrack_count ]; then + cat /proc/sys/net/netfilter/nf_conntrack_count + else + echo "0" + fi +} + +get_conntrack_max() { + if [ -f /proc/sys/net/netfilter/nf_conntrack_max ]; then + cat /proc/sys/net/netfilter/nf_conntrack_max + else + echo "0" + fi +} + +get_ipset_memory() { + local ipset_name="$1" + local mem + mem=$(ipset list "$ipset_name" -t 2>/dev/null | grep "Size in memory:" | awk '{print $4}') + echo "${mem:-0}" +} + +get_cache_disk_usage() { + if [ -d "$CACHE_DIR" ]; then + df -B1 "$CACHE_DIR" 2>/dev/null | tail -1 | awk '{print $3"|"$4"|"$5}' + else + echo "0|0|0%" + fi +} + +get_total_cache_size() { + if [ -d "$CACHE_DIR" ]; then + du -sb "$CACHE_DIR" 2>/dev/null | awk '{print $1}' + else + echo "0" + fi +} + +acquire_lock() { + if [ -f "$LOCK_FILE" ]; then + local pid=$(cat "$LOCK_FILE" 2>/dev/null) + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + echo "ERROR: Another instance is already running (PID: $pid)" >&2 + exit 1 + else + echo "Removing stale lock file" >&2 + rm -f "$LOCK_FILE" + fi + fi + echo $$ > "$LOCK_FILE" + trap cleanup EXIT INT TERM +} + +cleanup() { + rm -f "$LOCK_FILE" +} + +generate_metrics() { + local start_time=$(date +%s) + local current_time=$(date +%s) + + cat </dev/null || echo 0) + +# HELP iptables_blocklist_ipset_size Number of IPs per feed ipset +# TYPE iptables_blocklist_ipset_size gauge +EOF + + # Only export metrics for ipsets that actually exist + for ipset_name in $(ipset list -n 2>/dev/null | grep "^${IPSET_PREFIX}-"); do + # Extract feed name and IP version + local feed_name="${ipset_name#${IPSET_PREFIX}-}" + local ip_version="4" + + if [[ "$feed_name" =~ -v6$ ]]; then + feed_name="${feed_name%-v6}" + ip_version="6" + fi + + # Get status from config + local status="disabled" + if grep -q "^1|${feed_name}|" "$FEEDS_CONFIG" 2>/dev/null; then + status="enabled" + fi + + local size=$(get_ipset_size "$ipset_name") + echo "iptables_blocklist_ipset_size{feed=\"$feed_name\",ip_version=\"$ip_version\",status=\"$status\"} $size" + done + + cat </dev/null; then + effectiveness_v4=$(awk "BEGIN {printf \"%.2f\", ($blocks_v4 / $ipset_size) * 1000}" 2>/dev/null || echo "0") + effectiveness_v6=$(awk "BEGIN {printf \"%.2f\", ($blocks_v6 / $ipset_size) * 1000}" 2>/dev/null || echo "0") + else + effectiveness_v4="0" + effectiveness_v6="0" + fi + + echo "iptables_blocklist_effectiveness{feed=\"$name\",ip_version=\"4\"} $effectiveness_v4" + echo "iptables_blocklist_effectiveness{feed=\"$name\",ip_version=\"6\"} $effectiveness_v6" + done < "$FEEDS_CONFIG" + fi + + # Feed update/cache metrics + cat </dev/null; then + ratio_v4=$(awk "BEGIN {printf \"%.4f\", $v4_size / $total}" 2>/dev/null || echo "0") + ratio_v6=$(awk "BEGIN {printf \"%.4f\", $v6_size / $total}" 2>/dev/null || echo "0") + else + ratio_v4="0" + ratio_v6="0" + fi + + echo "iptables_blocklist_ip_version_ratio{feed=\"$name\",version=\"4\"} $ratio_v4" + echo "iptables_blocklist_ip_version_ratio{feed=\"$name\",version=\"6\"} $ratio_v6" + done < "$FEEDS_CONFIG" + fi + + # Total metrics + cat </dev/null | wc -l) + +# HELP iptables_blocklist_rule_packets Packet counts from iptables rules +# TYPE iptables_blocklist_rule_packets counter +EOF + + if [ -f "$FEEDS_CONFIG" ]; then + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + + local stats_log stats_drop packets_log bytes_log packets_drop bytes_drop + + stats_log=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep LOG | head -1 | awk '{print $1"|"$2}') + stats_drop=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep DROP | head -1 | awk '{print $1"|"$2}') + + if [ -n "$stats_log" ]; then + packets_log=$(echo "$stats_log" | cut -d'|' -f1) + bytes_log=$(echo "$stats_log" | cut -d'|' -f2) + echo "iptables_blocklist_rule_packets{feed=\"$name\",ip_version=\"4\",action=\"log\"} ${packets_log:-0}" + fi + + if [ -n "$stats_drop" ]; then + packets_drop=$(echo "$stats_drop" | cut -d'|' -f1) + bytes_drop=$(echo "$stats_drop" | cut -d'|' -f2) + echo "iptables_blocklist_rule_packets{feed=\"$name\",ip_version=\"4\",action=\"drop\"} ${packets_drop:-0}" + fi + done < "$FEEDS_CONFIG" + fi + + cat </dev/null | grep "${IPSET_PREFIX}-${name}" | grep LOG | head -1 | awk '{print $1"|"$2}') + stats_drop=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep DROP | head -1 | awk '{print $1"|"$2}') + + if [ -n "$stats_log" ]; then + packets_log=$(echo "$stats_log" | cut -d'|' -f1) + bytes_log=$(echo "$stats_log" | cut -d'|' -f2) + echo "iptables_blocklist_rule_bytes{feed=\"$name\",ip_version=\"4\",action=\"log\"} ${bytes_log:-0}" + fi + + if [ -n "$stats_drop" ]; then + packets_drop=$(echo "$stats_drop" | cut -d'|' -f1) + bytes_drop=$(echo "$stats_drop" | cut -d'|' -f2) + echo "iptables_blocklist_rule_bytes{feed=\"$name\",ip_version=\"4\",action=\"drop\"} ${bytes_drop:-0}" + fi + done < "$FEEDS_CONFIG" + fi + + cat </dev/null; then + conntrack_usage=$(awk "BEGIN {printf \"%.2f\", ($conntrack_count / $conntrack_max) * 100}" 2>/dev/null || echo "0") + else + conntrack_usage="0" + fi + + # Cache disk metrics + local disk_info cache_size disk_used disk_avail disk_pct + disk_info=$(get_cache_disk_usage) + cache_size=$(get_total_cache_size) + disk_used=$(echo "$disk_info" | cut -d'|' -f1) + disk_avail=$(echo "$disk_info" | cut -d'|' -f2) + disk_pct=$(echo "$disk_info" | cut -d'|' -f3 | tr -d '%') + + cat </dev/null 2>&1; then + echo "ERROR: netcat (nc) is required for HTTP mode" + echo "Install with: yum install nmap-ncat (RHEL/CentOS)" + echo " or: apt install netcat (Debian/Ubuntu)" + exit 1 + fi + + while true; do + { + read -r request + if [[ "$request" =~ ^GET\ /metrics ]]; then + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r" + generate_metrics + else + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r" + echo "

iptables Blocklist Metrics Exporter

" + echo "

Per-feed threat blocking statistics

" + echo "

Metrics

" + fi + } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null + done +} + +main() { + parse_args "$@" + + [ ! -d "$CONFIG_DIR" ] && { echo "ERROR: $CONFIG_DIR not found. Run iptables-blocklists.sh first" >&2; exit 1; } + + # Prevent multiple instances (skip for HTTP mode as it should run continuously) + [ "$HTTP_MODE" != true ] && acquire_lock + + if [ "$HTTP_MODE" = true ]; then + run_http_server + elif [ -n "$OUTPUT_FILE" ]; then + # Ensure output directory exists + mkdir -p "$(dirname "$OUTPUT_FILE")" + + # Create temp file in /tmp (not in node_exporter directory!) + # This prevents node_exporter from seeing partial writes + local temp_file=$(mktemp /tmp/iptables_metrics.XXXXXX) + + # Generate metrics to temp file + generate_metrics > "$temp_file" + + # FORCE NEW INODE: Delete old file first, then move + # Some node_exporter versions cache file descriptors + rm -f "$OUTPUT_FILE" + + # Move temp file to final location + mv "$temp_file" "$OUTPUT_FILE" + + # Ensure node_exporter user can read it + chmod 644 "$OUTPUT_FILE" + + # Force filesystem sync (optional but helps) + sync + else + generate_metrics + fi +} + +main "$@" diff --git a/iptables-blocklists.sh b/iptables-blocklists.sh new file mode 100755 index 0000000..537d8a0 --- /dev/null +++ b/iptables-blocklists.sh @@ -0,0 +1,757 @@ +#!/bin/bash +################################################################################ +# Script Name: iptables-blocklists.sh +# Version: 1.0 +# Description: Per-feed iptables threat intelligence blocking with ipset +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +################################################################################ +# Don't use 'set -e' - causes issues with ipset error handling + +CONFIG_DIR="/etc/iptables-threats" +FEEDS_CONFIG="$CONFIG_DIR/feeds.conf" +CACHE_DIR="$CONFIG_DIR/cache" +BACKUP_DIR="$CONFIG_DIR/backups" +IPSET_PREFIX="iptables-feed" +WHITELIST_IPSET="iptables-whitelist" +WHITELIST_IPSET_V6="iptables-whitelist-v6" +LOG_FILE="/var/log/iptables-threats.log" +SSH_PORT="22" +ENABLE_AUTO_UPDATE=true +UPDATE_INTERVAL="daily" +ENABLE_IPV6=true +MAX_BACKUPS=5 + +show_usage() { + cat </dev/null 2>&1 || ! command -v ipset >/dev/null 2>&1; then + if command -v dnf >/dev/null 2>&1; then + dnf install -y iptables ipset curl iptables-services + elif command -v yum >/dev/null 2>&1; then + yum install -y iptables ipset curl iptables-services + elif command -v apt-get >/dev/null 2>&1; then + apt-get update && apt-get install -y iptables ipset curl iptables-persistent + else + echo "Cannot install requirements automatically" + exit 1 + fi + fi +} + +create_directory_structure() { + mkdir -p "$CONFIG_DIR" "$CACHE_DIR" "$BACKUP_DIR" + touch "$LOG_FILE" + chmod 700 "$CONFIG_DIR" + chmod 600 "$LOG_FILE" +} + +cleanup_old_backups() { + local backup_count + backup_count=$(find "$BACKUP_DIR" -name 'iptables-save-*.txt' | wc -l) + + if [ "$backup_count" -gt "$MAX_BACKUPS" ]; then + local to_delete=$((backup_count - MAX_BACKUPS)) + find "$BACKUP_DIR" -name 'iptables-save-*.txt' -type f | \ + sort | head -n "$to_delete" | xargs rm -f + log_message "Cleaned up $to_delete old backups (keeping last $MAX_BACKUPS)" + fi +} + +initialize_feeds_config() { + [ -f "$FEEDS_CONFIG" ] && return + + cat > "$FEEDS_CONFIG" <<'EOF' +# Threat Intelligence Feeds Configuration +# Format: ENABLED|NAME|URL|TYPE|DESCRIPTION +# +# ENABLED: 1 (enabled) or 0 (disabled) +# NAME: Unique feed identifier +# URL: Feed URL +# TYPE: Format type (plain, cidr, commented, custom) +# DESCRIPTION: Feed description + +1|cinsarmy|http://cinsscore.com/list/ci-badguys.txt|plain|CINS Army Malicious IPs +1|firehol-level1|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level1.netset|cidr|FireHOL Level 1 - Most aggressive attackers +1|firehol-level2|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level2.netset|cidr|FireHOL Level 2 - Attacks in last 48h +0|firehol-level3|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level3.netset|cidr|FireHOL Level 3 - Attacks in last 30d +1|ipsum-1|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/1.txt|plain|IPsum Level 1 - Most dangerous +0|ipsum-2|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/2.txt|plain|IPsum Level 2 - Dangerous +0|ipsum-3|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/3.txt|plain|IPsum Level 3 - Suspicious +0|spamhaus-drop|https://www.spamhaus.org/drop/drop.txt|commented|Spamhaus DROP List +0|spamhaus-edrop|https://www.spamhaus.org/drop/edrop.txt|commented|Spamhaus EDROP List +1|spamhaus-dropv6|https://www.spamhaus.org/drop/dropv6.txt|commented|Spamhaus DROP V6 List +0|feodo-tracker|https://feodotracker.abuse.ch/downloads/ipblocklist.txt|commented|Feodo Tracker C2 IPs +0|sslbl-aggressive|https://sslbl.abuse.ch/blacklist/sslipblacklist_aggressive.txt|commented|SSL Blacklist Aggressive +0|sslbl-all|https://sslbl.abuse.ch/blacklist/sslipblacklist.txt|commented|SSL Blacklist All +1|blocklist-de|https://lists.blocklist.de/lists/all.txt|plain|Blocklist.de All Attacks +0|greensnow|https://blocklist.greensnow.co/greensnow.txt|plain|GreenSnow Blacklist +0|emergingthreats|https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt|plain|Emerging Threats IPs +0|bruteforce-ssh|https://lists.blocklist.de/lists/ssh.txt|plain|SSH Bruteforce Attempts +1|binarydefense|https://www.binarydefense.com/banlist.txt|plain|Binary Defense Blacklist +1|bruteforce-bl|https://danger.rulez.sk/projects/bruteforceblocker/blist.php|commented|BruteForce Blocker +0|dshield-top|https://www.dshield.org/block.txt|commented|DShield Top Attackers +1|dshield-fhol|https://iplists.firehol.org/files/dshield.netset|commented|Dshield FireHol top 20 +0|tor-exit|https://check.torproject.org/torbulkexitlist|plain|TOR Exit Nodes (optional) +0|abuseipdb-1d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-1d.ipv4|commented|AbuseIPDB confidence score 100 1 day +0|abuseipd-3d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-3d.ipv4|commented|AbuseIPDB confidence score 100 3 day +0|abuseipdb-7d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-7d.ipv4|commented|AbuseIPDB confidence score 100 7 day +1|abuseipdb-14d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-14d.ipv4|commented|AbuseIPDB confidence score 100 14 day +0|abuseipdb-30d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-30d.ipv4|commented|AbuseIPDB confidence score 100 30 day +# Add custom feeds below this line +EOF + chmod 600 "$FEEDS_CONFIG" +} + +setup_ipsets() { + log_message "Setting up per-feed ipsets..." + + # Whitelist + if ! ipset list "$WHITELIST_IPSET" >/dev/null 2>&1; then + ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000 + ipset add "$WHITELIST_IPSET" 127.0.0.1 2>/dev/null || true + fi + + if [ "$ENABLE_IPV6" = true ] && ! ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1; then + ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000 + ipset add "$WHITELIST_IPSET_V6" ::1 2>/dev/null || true + fi + + # Create ipset per feed + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + + if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then + ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000 + fi + + if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then + ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000 + fi + done < "$FEEDS_CONFIG" +} + +download_feed() { + curl -f -s -m 30 -L "$1" -o "$2" 2>/dev/null +} + +parse_feed() { + local file="$1" type="$2" out_v4="$3" out_v6="$4" + true > "$out_v4" + true > "$out_v6" + + case "$type" in + plain) + grep -E '^[0-9.]+(/[0-9]+)?$' "$file" >> "$out_v4" 2>/dev/null || true + [ "$ENABLE_IPV6" = true ] && grep -E '^[0-9a-fA-F:]+(/[0-9]+)?$' "$file" | grep ':' >> "$out_v6" 2>/dev/null || true + ;; + cidr) + grep -E '^[0-9.]+' "$file" | cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$out_v4" 2>/dev/null || true + [ "$ENABLE_IPV6" = true ] && grep -E '^[0-9a-fA-F:]+' "$file" | grep ':' | cut -d' ' -f1 | cut -d'#' -f1 >> "$out_v6" 2>/dev/null || true + ;; + commented) + grep -v -E '^[#;]|^$' "$file" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' >> "$out_v4" 2>/dev/null || true + [ "$ENABLE_IPV6" = true ] && grep -v -E '^[#;]|^$' "$file" | grep -oE '[0-9a-fA-F:]+(/[0-9]+)?' | grep -E '^[0-9a-fA-F]{1,4}:[0-9a-fA-F:]+' >> "$out_v6" 2>/dev/null || true + ;; + esac +} + +update_feeds() { + log_message "Starting per-feed update (FAST ipset restore mode)..." + + # Auto-cleanup cache and ipsets for disabled feeds + local enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2) + local cleaned_cache=0 + local cleaned_ipsets=0 + + # Clean cache files + for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed "$CACHE_DIR"/*-v4.restore "$CACHE_DIR"/*-v6.restore; do + [ -f "$cache_file" ] || continue + local bn=$(basename "$cache_file") + local fn="${bn%%.raw}"; fn="${fn%%-v4.parsed}"; fn="${fn%%-v6.parsed}"; fn="${fn%%-v4.restore}"; fn="${fn%%-v6.restore}" + if ! echo "$enabled_feeds" | grep -q "^${fn}$"; then + rm -f "$cache_file" && cleaned_cache=$((cleaned_cache + 1)) + fi + done + + # Clean ipsets for disabled feeds + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" = "1" ] && continue + + if ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then + ipset destroy "${IPSET_PREFIX}-${name}" 2>/dev/null && cleaned_ipsets=$((cleaned_ipsets + 1)) + fi + if ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then + ipset destroy "${IPSET_PREFIX}-${name}-v6" 2>/dev/null && cleaned_ipsets=$((cleaned_ipsets + 1)) + fi + done < "$FEEDS_CONFIG" + + [ "$cleaned_cache" -gt 0 ] && log_message " Cleaned $cleaned_cache stale cache files" + [ "$cleaned_ipsets" -gt 0 ] && log_message " Destroyed $cleaned_ipsets stale ipsets" + + local total=0 failed=0 + + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + + total=$((total + 1)) + log_message "Updating: $name" + + local raw="$CACHE_DIR/${name}.raw" + local v4="$CACHE_DIR/${name}-v4.parsed" + local v6="$CACHE_DIR/${name}-v6.parsed" + + if download_feed "$url" "$raw" && parse_feed "$raw" "$type" "$v4" "$v6"; then + local c4 c6=0 + c4=$(wc -l < "$v4" 2>/dev/null || echo 0) + [ "$ENABLE_IPV6" = true ] && c6=$(wc -l < "$v6" 2>/dev/null || echo 0) + + # FAST IPv4: Use ipset restore + if [ "$c4" -gt 0 ]; then + # Ensure target ipset exists for swap + if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then + ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000 + fi + + { + echo "create ${IPSET_PREFIX}-${name}-tmp hash:net family inet hashsize 4096 maxelem 200000" + echo "flush ${IPSET_PREFIX}-${name}-tmp" + while IFS= read -r ip; do + [ -z "$ip" ] && continue + echo "add ${IPSET_PREFIX}-${name}-tmp $ip" + done < "$v4" + echo "swap ${IPSET_PREFIX}-${name} ${IPSET_PREFIX}-${name}-tmp" + echo "destroy ${IPSET_PREFIX}-${name}-tmp" + } > "$CACHE_DIR/${name}-v4.restore" + + ipset restore < "$CACHE_DIR/${name}-v4.restore" 2>/dev/null || { + log_message " ⚠ Batch load failed for $name IPv4, using fallback" + ipset flush "${IPSET_PREFIX}-${name}" 2>/dev/null || true + while IFS= read -r ip; do + [ -z "$ip" ] && continue + ipset add "${IPSET_PREFIX}-${name}" "$ip" 2>/dev/null || true + done < "$v4" + } + fi + + # FAST IPv6: Use ipset restore + if [ "$ENABLE_IPV6" = true ] && [ "$c6" -gt 0 ]; then + # Ensure target ipset exists for swap + if ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then + ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000 + fi + + { + echo "create ${IPSET_PREFIX}-${name}-v6-tmp hash:net family inet6 hashsize 4096 maxelem 200000" + echo "flush ${IPSET_PREFIX}-${name}-v6-tmp" + while IFS= read -r ip; do + [ -z "$ip" ] && continue + echo "add ${IPSET_PREFIX}-${name}-v6-tmp $ip" + done < "$v6" + echo "swap ${IPSET_PREFIX}-${name}-v6 ${IPSET_PREFIX}-${name}-v6-tmp" + echo "destroy ${IPSET_PREFIX}-${name}-v6-tmp" + } > "$CACHE_DIR/${name}-v6.restore" + + ipset restore < "$CACHE_DIR/${name}-v6.restore" 2>/dev/null || { + log_message " ⚠ Batch load failed for $name IPv6, using fallback" + ipset flush "${IPSET_PREFIX}-${name}-v6" 2>/dev/null || true + while IFS= read -r ip; do + [ -z "$ip" ] && continue + ipset add "${IPSET_PREFIX}-${name}-v6" "$ip" 2>/dev/null || true + done < "$v6" + } + fi + + log_message " ✓ $name: $c4 IPv4, $c6 IPv6" + else + log_message " ✗ Failed: $name" + failed=$((failed + 1)) + fi + done < "$FEEDS_CONFIG" + + # Save ipsets + ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true + + log_message "✓ Updated $total feeds ($failed failed) - FAST IPSET RESTORE MODE" +} + +apply_iptables_rules() { + log_message "Applying per-feed iptables rules..." + + # Backup current rules + iptables-save > "$BACKUP_DIR/iptables-save-$(date +%Y%m%d-%H%M%S).txt" 2>/dev/null || true + cleanup_old_backups + + # Remove old threat feed rules + iptables -D INPUT -m set --match-set "$WHITELIST_IPSET" src -j ACCEPT 2>/dev/null || true + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + iptables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT:${name}] " 2>/dev/null || true + iptables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}" src -j DROP 2>/dev/null || true + done < "$FEEDS_CONFIG" 2>/dev/null || true + + if [ "$ENABLE_IPV6" = true ]; then + ip6tables -D INPUT -m set --match-set "$WHITELIST_IPSET_V6" src -j ACCEPT 2>/dev/null || true + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + ip6tables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT-v6:${name}] " 2>/dev/null || true + ip6tables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -j DROP 2>/dev/null || true + done < "$FEEDS_CONFIG" 2>/dev/null || true + fi + + # Add whitelist rules (highest priority) + iptables -I INPUT 1 -m set --match-set "$WHITELIST_IPSET" src -j ACCEPT + [ "$ENABLE_IPV6" = true ] && ip6tables -I INPUT 1 -m set --match-set "$WHITELIST_IPSET_V6" src -j ACCEPT + + # Add per-feed rules + local line=2 + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + + # IPv4 + iptables -I INPUT $line -m set --match-set "${IPSET_PREFIX}-${name}" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT:${name}] " + line=$((line + 1)) + iptables -I INPUT $line -m set --match-set "${IPSET_PREFIX}-${name}" src -j DROP + line=$((line + 1)) + + # IPv6 + if [ "$ENABLE_IPV6" = true ]; then + ip6tables -A INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT-v6:${name}] " + ip6tables -A INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -j DROP + fi + done < "$FEEDS_CONFIG" + + # SSH rate limiting + if ! iptables -C INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --set 2>/dev/null; then + iptables -I INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --set + iptables -I INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --update --seconds 60 --hitcount 4 -j DROP + fi + + # Save rules + if [ -d /etc/sysconfig ]; then + iptables-save > /etc/sysconfig/iptables + [ "$ENABLE_IPV6" = true ] && ip6tables-save > /etc/sysconfig/ip6tables + elif [ -d /etc/iptables ]; then + iptables-save > /etc/iptables/rules.v4 + [ "$ENABLE_IPV6" = true ] && ip6tables-save > /etc/iptables/rules.v6 + fi + + log_message "✓ iptables rules applied (per-feed)" +} + +setup_iptables_persistence() { + log_message "Setting up iptables persistence..." + + # Create systemd service for iptables restore + cat > /etc/systemd/system/iptables-restore.service <<'EOF' +[Unit] +Description=Restore iptables rules +Before=network-pre.target +Wants=network-pre.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/bin/bash -c 'ipset restore -f /etc/sysconfig/ipset 2>/dev/null || ipset restore -f /etc/iptables/ipsets 2>/dev/null || true' +ExecStart=/bin/bash -c 'iptables-restore /etc/sysconfig/iptables 2>/dev/null || iptables-restore /etc/iptables/rules.v4 2>/dev/null || true' +ExecStart=/bin/bash -c 'ip6tables-restore /etc/sysconfig/ip6tables 2>/dev/null || ip6tables-restore /etc/iptables/rules.v6 2>/dev/null || true' + +[Install] +WantedBy=multi-user.target +EOF + + systemctl daemon-reload + systemctl enable iptables-restore.service 2>/dev/null || true + log_message "✓ iptables persistence configured" +} + +setup_auto_update() { + [ "$ENABLE_AUTO_UPDATE" = false ] && return + + local script=$(readlink -f "$0") + + cat > /etc/systemd/system/iptables-threat-feeds-update.service < /etc/systemd/system/iptables-threat-feeds-update.timer </dev/null | grep -c '^[0-9.]' || echo 0) + [ "$ENABLE_IPV6" = true ] && v6=$(ipset list "${IPSET_PREFIX}-${name}-v6" 2>/dev/null | grep -c '^[0-9a-fA-F:]' || echo 0) + blocks=$(journalctl -k --since "1 hour ago" 2>/dev/null | grep -c "\[THREAT:${name}\]" || echo 0) + + printf "%-25s %10s %10s %12s\n" "$name" "$v4" "$v6" "$blocks" + done < "$FEEDS_CONFIG" +} + +cmd_list_feeds() { + printf "%-10s %-25s %s\n" "STATUS" "NAME" "DESC" + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + printf "%-10s %-25s %s\n" "$([ "$enabled" = "1" ] && echo "ENABLED" || echo "DISABLED")" "$name" "$description" + done < "$FEEDS_CONFIG" +} + +cmd_whitelist_add() { + [ -z "$WHITELIST_IP" ] && { echo "Usage: $0 whitelist-add "; exit 1; } + + if echo "$WHITELIST_IP" | grep -q ':'; then + ipset add "$WHITELIST_IPSET_V6" "$WHITELIST_IP" 2>/dev/null && \ + log_message "✓ Added to IPv6 whitelist: $WHITELIST_IP" || \ + { echo "Failed to add $WHITELIST_IP"; exit 1; } + else + ipset add "$WHITELIST_IPSET" "$WHITELIST_IP" 2>/dev/null && \ + log_message "✓ Added to IPv4 whitelist: $WHITELIST_IP" || \ + { echo "Failed to add $WHITELIST_IP"; exit 1; } + fi + + ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true +} + +cmd_whitelist_init() { + log_message "Initializing whitelist with private networks..." + + local private_networks=( + "10.0.0.0/8" + "172.16.0.0/12" + "192.168.0.0/16" + "169.254.0.0/16" + "127.0.0.0/8" + ) + + local private_networks_v6=( + "fc00::/7" + "fe80::/10" + "::1" + ) + + echo "Adding IPv4 private networks to whitelist..." + for net in "${private_networks[@]}"; do + if ipset add "$WHITELIST_IPSET" "$net" 2>/dev/null; then + echo " ✓ $net" + else + echo " - $net (already exists or error)" + fi + done + + if [ "$ENABLE_IPV6" = true ]; then + echo "Adding IPv6 private networks to whitelist..." + for net in "${private_networks_v6[@]}"; do + if ipset add "$WHITELIST_IPSET_V6" "$net" 2>/dev/null; then + echo " ✓ $net" + else + echo " - $net (already exists or error)" + fi + done + fi + + ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true + log_message "✓ Whitelist initialized with RFC1918/private networks" +} + +cmd_whitelist_list() { + echo "==========================================" + echo "IPv4 Whitelist ($WHITELIST_IPSET)" + echo "==========================================" + ipset list "$WHITELIST_IPSET" 2>/dev/null | grep '^[0-9]' || echo "No entries" + + if [ "$ENABLE_IPV6" = true ]; then + echo "" + echo "==========================================" + echo "IPv6 Whitelist ($WHITELIST_IPSET_V6)" + echo "==========================================" + ipset list "$WHITELIST_IPSET_V6" 2>/dev/null | grep '^[0-9a-fA-F:]' || echo "No entries" + fi +} + +cmd_add_feed() { + [ -z "$FEED_NAME" ] || [ -z "$FEED_URL" ] && { echo "Usage: $0 add-feed "; exit 1; } + grep -q "^[01]|${FEED_NAME}|" "$FEEDS_CONFIG" 2>/dev/null && { echo "Feed exists"; exit 1; } + echo "1|${FEED_NAME}|${FEED_URL}|plain|Custom: ${FEED_NAME}" >> "$FEEDS_CONFIG" + log_message "✓ Added feed: $FEED_NAME" +} + +cmd_remove_feed() { + [ -z "$FEED_NAME" ] && { echo "Usage: $0 remove-feed "; exit 1; } + sed -i "/|${FEED_NAME}|/d" "$FEEDS_CONFIG" + + # Remove ipsets and rules + ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true + ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true + + log_message "✓ Removed feed: $FEED_NAME" + log_message "Reapplying rules..." + apply_iptables_rules +} + +cmd_enable_feed() { + [ -z "$FEED_NAME" ] && { echo "Usage: $0 enable-feed "; exit 1; } + sed -i "s/^0|${FEED_NAME}|/1|${FEED_NAME}|/" "$FEEDS_CONFIG" + log_message "✓ Enabled: $FEED_NAME" + + # Create ipsets if they don't exist + if ! ipset list "${IPSET_PREFIX}-${FEED_NAME}" >/dev/null 2>&1; then + ipset create "${IPSET_PREFIX}-${FEED_NAME}" hash:net family inet hashsize 4096 maxelem 200000 + fi + if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${FEED_NAME}-v6" >/dev/null 2>&1; then + ipset create "${IPSET_PREFIX}-${FEED_NAME}-v6" hash:net family inet6 hashsize 4096 maxelem 200000 + fi + + log_message "Run 'update' to download IPs, then 'apply-rules' to add firewall rules" +} + +cmd_disable_feed() { + [ -z "$FEED_NAME" ] && { echo "Usage: $0 disable-feed "; exit 1; } + sed -i "s/^1|${FEED_NAME}|/0|${FEED_NAME}|/" "$FEEDS_CONFIG" + + # Destroy ipsets to clear metrics + ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true + ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true + + log_message "✓ Disabled: $FEED_NAME" + log_message "Reapplying rules..." + apply_iptables_rules +} + +cmd_install() { + log_message "Installing per-feed mode..." + check_requirements + create_directory_structure + initialize_feeds_config + setup_ipsets + update_feeds + apply_iptables_rules + setup_iptables_persistence + setup_auto_update + + echo "" + echo "==========================================" + echo "✓ Per-feed installation complete" + echo "==========================================" + echo "Feeds: $(grep -c '^1|' "$FEEDS_CONFIG")" + echo "Config: $FEEDS_CONFIG" + echo "Log: $LOG_FILE" + echo "" + echo "Commands:" + echo " $0 show-stats" + echo " $0 list-feeds" + echo " $0 update" + echo " $0 whitelist-add " + echo "==========================================" +} + +cmd_test_rules() { + log_message "Testing iptables rule generation (dry-run mode)..." + + echo "==========================================" + echo "Rule Generation Test" + echo "==========================================" + echo "" + + # Count enabled feeds + local enabled_count=0 + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + enabled_count=$((enabled_count + 1)) + done < "$FEEDS_CONFIG" + + echo "✓ Found $enabled_count enabled feeds" + echo "" + + # Show what would be generated + echo "IPv4 rules that would be created:" + echo " 1. Whitelist bypass: -I INPUT 1 -m set --match-set $WHITELIST_IPSET src -j ACCEPT" + + local line=2 + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + + echo " $line. [${name}] LOG: -I INPUT $line -m set --match-set ${IPSET_PREFIX}-${name} src -m limit --limit 5/min -j LOG" + line=$((line + 1)) + echo " $line. [${name}] DROP: -I INPUT $line -m set --match-set ${IPSET_PREFIX}-${name} src -j DROP" + line=$((line + 1)) + done < "$FEEDS_CONFIG" + + echo "" + echo "Total IPv4 rules: $((line - 1))" + + if [ "$ENABLE_IPV6" = true ]; then + echo "" + echo "IPv6 rules that would be created:" + echo " 1. Whitelist bypass: -I INPUT 1 -m set --match-set $WHITELIST_IPSET_V6 src -j ACCEPT" + + local v6_count=0 + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + + v6_count=$((v6_count + 1)) + echo " $((v6_count * 2)). [${name}] LOG: -A INPUT -m set --match-set ${IPSET_PREFIX}-${name}-v6 src -j LOG" + echo " $((v6_count * 2 + 1)). [${name}] DROP: -A INPUT -m set --match-set ${IPSET_PREFIX}-${name}-v6 src -j DROP" + done < "$FEEDS_CONFIG" + + echo "" + echo "Total IPv6 rules: $((v6_count * 2 + 1))" + fi + + echo "" + echo "==========================================" + echo "✓ Test passed - rules would be generated successfully" + echo " To apply these rules, run: $0 apply-rules" + echo "==========================================" +} + +main() { + parse_args "$@" + case "$COMMAND" in + install) cmd_install ;; + update) + check_requirements + create_directory_structure + update_feeds + # DO NOT apply rules here - only update ipsets + # To regenerate rules, use: apply-rules, enable-feed, disable-feed, or remove-feed + ;; + apply-rules) + check_requirements + apply_iptables_rules + ;; + test-rules) cmd_test_rules ;; + list-feeds) cmd_list_feeds ;; + show-stats) cmd_show_stats ;; + add-feed) cmd_add_feed ;; + remove-feed) cmd_remove_feed ;; + enable-feed) cmd_enable_feed ;; + disable-feed) cmd_disable_feed ;; + whitelist-add) cmd_whitelist_add ;; + whitelist-init) cmd_whitelist_init ;; + whitelist-list) cmd_whitelist_list ;; + esac +} + +main "$@" diff --git a/migrate-promtail-to-alloy.sh b/migrate-promtail-to-alloy.sh new file mode 100644 index 0000000..24219b5 --- /dev/null +++ b/migrate-promtail-to-alloy.sh @@ -0,0 +1,565 @@ +#!/bin/bash + +############################################################################## +#### Promtail to Grafana Alloy Migration Script #### +#### #### +#### Detects OS, reads existing Promtail config for Loki URL/hostname, #### +#### generates equivalent Alloy River config, installs Alloy, and #### +#### handles the cutover from Promtail to Alloy. #### +#### #### +#### Supports: Ubuntu, Debian, RHEL, CentOS, Rocky, Alma, Amazon Linux #### +#### #### +#### Author: Phil Connor #### +#### License: MIT #### +#### Contact: contact@mylinux.work #### +#### Version: 1.0.0-030326 #### +############################################################################## + +set -euo pipefail + +readonly SCRIPT_NAME=$(basename "$0") +readonly SCRIPT_VERSION="1.0.0-030326" + +# Defaults +ALLOY_CONFIG_DIR="/etc/alloy" +ALLOY_CONFIG_FILE="/etc/alloy/config.alloy" +PROMTAIL_CONFIG="/etc/promtail/config.yml" +LOKI_URL="" +CUSTOM_HOSTNAME="" +DRY_RUN=false +GENERATE_ONLY=false +SKIP_INSTALL=false +SKIP_CUTOVER=false +KEEP_PROMTAIL=true +VERBOSE=false +INCLUDE_JOURNAL=true +INCLUDE_NGINX=false +INCLUDE_APACHE=false + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log() { echo -e "${GREEN}[INFO]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +error() { echo -e "${RED}[ERROR]${NC} $1" >&2; } +debug() { [[ "$VERBOSE" == true ]] && echo -e "${BLUE}[DEBUG]${NC} $1"; } + +show_help() { + cat << EOF +Usage: $SCRIPT_NAME [OPTIONS] + +Migrate from Promtail to Grafana Alloy. Generates an Alloy config that +maintains Promtail-compatible labels so existing dashboards keep working. + +OPTIONS: + --loki-url URL Loki push URL (default: extracted from Promtail config) + --hostname NAME Override hostname (default: auto-detect or from Promtail) + --promtail-config F Path to existing Promtail config (default: /etc/promtail/config.yml) + --output FILE Alloy config output path (default: /etc/alloy/config.alloy) + --generate-only Only generate the Alloy config, don't install or cutover + --skip-install Skip Alloy installation (already installed) + --skip-cutover Generate config and install, but don't stop Promtail + --no-journal Skip systemd journal collection + --include-nginx Include nginx log collection + --include-apache Include Apache log collection + --remove-promtail Remove Promtail package after cutover (default: keep) + --dry-run Show what would be done without making changes + --verbose Enable verbose output + --version Show version + --help, -h Show this help message + +EXAMPLES: + # Auto-detect everything from existing Promtail config + sudo $SCRIPT_NAME + + # Specify Loki URL and hostname + sudo $SCRIPT_NAME --loki-url http://loki.example.com:3100 --hostname web-01 + + # Generate config only (don't install or cutover) + $SCRIPT_NAME --generate-only --loki-url http://loki:3100 --output /tmp/config.alloy + + # Full migration with nginx logs + sudo $SCRIPT_NAME --include-nginx --remove-promtail + + # Dry run to see what would happen + sudo $SCRIPT_NAME --dry-run +EOF +} + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + --loki-url) LOKI_URL="$2"; shift 2 ;; + --hostname) CUSTOM_HOSTNAME="$2"; shift 2 ;; + --promtail-config) PROMTAIL_CONFIG="$2"; shift 2 ;; + --output) ALLOY_CONFIG_FILE="$2"; shift 2 ;; + --generate-only) GENERATE_ONLY=true; shift ;; + --skip-install) SKIP_INSTALL=true; shift ;; + --skip-cutover) SKIP_CUTOVER=true; shift ;; + --no-journal) INCLUDE_JOURNAL=false; shift ;; + --include-nginx) INCLUDE_NGINX=true; shift ;; + --include-apache) INCLUDE_APACHE=true; shift ;; + --remove-promtail) KEEP_PROMTAIL=false; shift ;; + --dry-run) DRY_RUN=true; shift ;; + --verbose) VERBOSE=true; shift ;; + --version) echo "$SCRIPT_NAME version $SCRIPT_VERSION"; exit 0 ;; + --help|-h) show_help; exit 0 ;; + *) error "Unknown option: $1"; show_help; exit 1 ;; + esac + done +} + +detect_os() { + if [[ -f /etc/os-release ]]; then + . /etc/os-release + OS=$ID + OS_PRETTY="$PRETTY_NAME" + else + error "Cannot detect OS" + exit 1 + fi + debug "Detected OS: $OS_PRETTY" +} + +detect_hostname() { + if [[ -n "$CUSTOM_HOSTNAME" ]]; then + DETECTED_HOSTNAME="$CUSTOM_HOSTNAME" + debug "Using custom hostname: $DETECTED_HOSTNAME" + return + fi + + # Try to extract from Promtail config + if [[ -f "$PROMTAIL_CONFIG" ]]; then + local pt_host + pt_host=$(grep -m1 'host:' "$PROMTAIL_CONFIG" 2>/dev/null | awk '{print $2}' | tr -d '"' || true) + if [[ -n "$pt_host" ]]; then + DETECTED_HOSTNAME="$pt_host" + debug "Extracted hostname from Promtail config: $DETECTED_HOSTNAME" + return + fi + fi + + DETECTED_HOSTNAME=$(hostname -f 2>/dev/null || hostname) + debug "Using system hostname: $DETECTED_HOSTNAME" +} + +detect_loki_url() { + if [[ -n "$LOKI_URL" ]]; then + debug "Using provided Loki URL: $LOKI_URL" + return + fi + + # Extract from Promtail config + if [[ -f "$PROMTAIL_CONFIG" ]]; then + LOKI_URL=$(grep -m1 'url:' "$PROMTAIL_CONFIG" 2>/dev/null | awk '{print $2}' | tr -d '"' | sed 's|/loki/api/v1/push||' || true) + if [[ -n "$LOKI_URL" ]]; then + debug "Extracted Loki URL from Promtail config: $LOKI_URL" + return + fi + fi + + error "Could not determine Loki URL" + error "Provide with --loki-url or ensure Promtail config exists at $PROMTAIL_CONFIG" + exit 1 +} + +check_promtail_status() { + if systemctl is-active --quiet promtail 2>/dev/null; then + PROMTAIL_RUNNING=true + log "Promtail is currently running" + else + PROMTAIL_RUNNING=false + debug "Promtail is not running" + fi +} + +# Generate an Alloy loki.source.file block if the log file exists +generate_file_source() { + local label="$1" + local path="$2" + local job="$3" + local extra_labels="$4" + + if [[ "$DRY_RUN" == true ]] || [[ -f "$path" ]] || [[ "$path" == *"*"* ]]; then + cat << EOF + +loki.source.file "$label" { + targets = [ + { + "__path__" = "$path", + "job" = "$job", + "host" = "$DETECTED_HOSTNAME",${extra_labels} + }, + ] + forward_to = [loki.write.default.receiver] +} +EOF + else + debug "Skipping $path (file does not exist)" + fi +} + +generate_alloy_config() { + log "Generating Alloy config for $OS ($DETECTED_HOSTNAME)..." + + local os_label + case "$OS" in + ubuntu|debian) os_label="ubuntu" ;; + rhel|centos|rocky|almalinux|amzn) os_label="rhel-family" ;; + *) os_label="$OS" ;; + esac + + local config="" + + # Header + config+="// Grafana Alloy Configuration for $DETECTED_HOSTNAME +// Migrated from Promtail on $(date +%Y-%m-%d) +// OS: $OS_PRETTY +// Labels maintained for Promtail dashboard compatibility + +logging { + level = \"info\" +} +" + + # Journal source + if [[ "$INCLUDE_JOURNAL" == true ]]; then + config+=" +// System logs via systemd journal +loki.source.journal \"systemd_journal\" { + max_age = \"12h\" + labels = { + job = \"systemd-journal\", + host = \"$DETECTED_HOSTNAME\", + os = \"$os_label\", + } + forward_to = [loki.relabel.journal_relabel.receiver] +} + +loki.relabel \"journal_relabel\" { + forward_to = [loki.write.default.receiver] + + rule { + source_labels = [\"__journal__systemd_unit\"] + target_label = \"unit\" + } + + rule { + source_labels = [\"__journal_priority\"] + target_label = \"priority\" + } + + rule { + source_labels = [\"__journal__hostname\"] + target_label = \"hostname\" + } +} +" + fi + + # OS-specific file sources + case "$OS" in + ubuntu|debian) + config+=" +// Ubuntu/Debian system logs" + config+=$(generate_file_source "syslog" "/var/log/syslog" "messages" " + \"os\" = \"ubuntu\",") + config+=$(generate_file_source "auth" "/var/log/auth.log" "auth" " + \"log_type\" = \"authentication\",") + config+=$(generate_file_source "kern" "/var/log/kern.log" "kernel" "") + config+=$(generate_file_source "cron" "/var/log/cron.log" "cron" "") + config+=$(generate_file_source "mail" "/var/log/mail.log" "mail" "") + config+=$(generate_file_source "apt" "/var/log/apt/history.log" "packages" " + \"package_manager\" = \"apt\",") + config+=$(generate_file_source "boot" "/var/log/boot.log" "boot" "") + ;; + rhel|centos|rocky|almalinux|amzn) + config+=" +// RHEL/CentOS/Rocky/Alma/Amazon Linux system logs" + config+=$(generate_file_source "messages" "/var/log/messages" "messages" " + \"os\" = \"rhel-family\",") + config+=$(generate_file_source "secure" "/var/log/secure" "auth" " + \"log_type\" = \"authentication\",") + config+=$(generate_file_source "cron" "/var/log/cron" "cron" "") + config+=$(generate_file_source "maillog" "/var/log/maillog" "mail" "") + config+=$(generate_file_source "yum" "/var/log/yum.log" "packages" " + \"package_manager\" = \"yum\",") + config+=$(generate_file_source "boot" "/var/log/boot.log" "boot" "") + ;; + *) + config+=" +// Generic system logs" + config+=$(generate_file_source "syslog" "/var/log/syslog" "messages" "") + config+=$(generate_file_source "auth" "/var/log/auth.log" "auth" " + \"log_type\" = \"authentication\",") + ;; + esac + + # Application wildcard + config+=$(generate_file_source "application_logs" "/var/log/*.log" "application" "") + + # Nginx + if [[ "$INCLUDE_NGINX" == true ]]; then + config+=" +// Nginx logs" + config+=$(generate_file_source "nginx_access" "/var/log/nginx/access.log" "nginx" " + \"log_type\" = \"access\",") + config+=$(generate_file_source "nginx_error" "/var/log/nginx/error.log" "nginx" " + \"log_type\" = \"error\",") + fi + + # Apache + if [[ "$INCLUDE_APACHE" == true ]]; then + config+=" +// Apache logs" + config+=$(generate_file_source "apache_access" "/var/log/apache2/access.log" "apache" " + \"log_type\" = \"access\",") + config+=$(generate_file_source "apache_error" "/var/log/apache2/error.log" "apache" " + \"log_type\" = \"error\",") + config+=$(generate_file_source "httpd_access" "/var/log/httpd/access_log" "apache" " + \"log_type\" = \"access\",") + config+=$(generate_file_source "httpd_error" "/var/log/httpd/error_log" "apache" " + \"log_type\" = \"error\",") + fi + + # Loki write endpoint + config+=" + +// Write to Loki +loki.write \"default\" { + endpoint { + url = \"${LOKI_URL}/loki/api/v1/push\" + } +} +" + + GENERATED_CONFIG="$config" +} + +write_config() { + local output_file="$1" + + if [[ "$DRY_RUN" == true ]]; then + log "DRY RUN: Would write config to $output_file" + echo "--- Generated config.alloy ---" + echo "$GENERATED_CONFIG" + echo "--- End config ---" + return + fi + + local output_dir + output_dir=$(dirname "$output_file") + mkdir -p "$output_dir" + + # Backup existing config + if [[ -f "$output_file" ]]; then + local backup="${output_file}.bak.$(date +%Y%m%d%H%M%S)" + cp "$output_file" "$backup" + log "Backed up existing config to $backup" + fi + + echo "$GENERATED_CONFIG" > "$output_file" + log "Alloy config written to $output_file" +} + +install_alloy() { + if [[ "$DRY_RUN" == true ]]; then + log "DRY RUN: Would install Grafana Alloy" + return + fi + + # Check if already installed + if command -v alloy >/dev/null 2>&1; then + log "Alloy is already installed: $(alloy --version 2>&1 | head -1)" + return + fi + + log "Installing Grafana Alloy..." + + case "$OS" in + ubuntu|debian) + apt-get install -y apt-transport-https software-properties-common + mkdir -p /etc/apt/keyrings/ + wget -q -O - https://apt.grafana.com/gpg.key | gpg --dearmor | tee /etc/apt/keyrings/grafana.gpg > /dev/null + echo "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" | tee /etc/apt/sources.list.d/grafana.list + apt-get update -qq + apt-get install -y alloy + ;; + rhel|centos|rocky|almalinux|amzn) + cat > /etc/yum.repos.d/grafana.repo << 'REPO' +[grafana] +name=grafana +baseurl=https://rpm.grafana.com +repo_gpgcheck=1 +enabled=1 +gpgcheck=1 +gpgkey=https://rpm.grafana.com/gpg.key +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +REPO + if command -v dnf >/dev/null 2>&1; then + dnf install -y alloy + else + yum install -y alloy + fi + ;; + *) + error "Unsupported OS for automatic installation: $OS" + error "Install Alloy manually: https://grafana.com/docs/alloy/latest/set-up/install/" + exit 1 + ;; + esac + + log "Alloy installed: $(alloy --version 2>&1 | head -1)" +} + +validate_config() { + if [[ "$DRY_RUN" == true ]]; then + log "DRY RUN: Would validate config with 'alloy fmt'" + return + fi + + if ! command -v alloy >/dev/null 2>&1; then + warn "Alloy not installed, skipping validation" + return + fi + + log "Validating Alloy config..." + if alloy fmt "$ALLOY_CONFIG_FILE" >/dev/null 2>&1; then + log "Config validation passed" + else + error "Config validation failed. Check $ALLOY_CONFIG_FILE for syntax errors" + error "Run: alloy fmt $ALLOY_CONFIG_FILE" + exit 1 + fi +} + +perform_cutover() { + if [[ "$DRY_RUN" == true ]]; then + log "DRY RUN: Would stop Promtail and start Alloy" + return + fi + + # Stop Promtail + if systemctl is-active --quiet promtail 2>/dev/null; then + log "Stopping Promtail..." + systemctl stop promtail + systemctl disable promtail + log "Promtail stopped and disabled" + fi + + # Add alloy user to required groups + if getent group adm >/dev/null 2>&1; then + usermod -a -G adm alloy 2>/dev/null || true + fi + if getent group systemd-journal >/dev/null 2>&1; then + usermod -a -G systemd-journal alloy 2>/dev/null || true + fi + + # Start Alloy + log "Starting Alloy..." + systemctl enable --now alloy + sleep 2 + + if systemctl is-active --quiet alloy; then + log "Alloy is running" + else + error "Alloy failed to start. Check: journalctl -u alloy --no-pager -n 30" + error "Rolling back — restarting Promtail" + systemctl enable --now promtail 2>/dev/null || true + exit 1 + fi + + # Remove Promtail if requested + if [[ "$KEEP_PROMTAIL" == false ]]; then + log "Removing Promtail package..." + case "$OS" in + ubuntu|debian) apt-get remove -y promtail 2>/dev/null || true ;; + *) yum remove -y promtail 2>/dev/null || dnf remove -y promtail 2>/dev/null || true ;; + esac + log "Promtail removed" + else + log "Promtail package kept (use 'systemctl start promtail' to rollback)" + fi +} + +print_summary() { + echo "" + echo "==========================================" + echo " Migration Summary" + echo "==========================================" + echo " OS: $OS_PRETTY" + echo " Hostname: $DETECTED_HOSTNAME" + echo " Loki URL: $LOKI_URL" + echo " Alloy config: $ALLOY_CONFIG_FILE" + + if [[ "$DRY_RUN" != true ]] && [[ "$GENERATE_ONLY" != true ]]; then + echo "" + echo " Alloy status: $(systemctl is-active alloy 2>/dev/null || echo 'not checked')" + echo "" + echo " Verify:" + echo " systemctl status alloy" + echo " journalctl -u alloy -f" + echo " curl http://localhost:12345 (Alloy UI)" + echo "" + echo " Rollback:" + echo " sudo systemctl stop alloy" + echo " sudo systemctl start promtail" + fi + + if [[ "$GENERATE_ONLY" == true ]]; then + echo "" + echo " Config generated. Review and deploy manually." + fi + + echo "==========================================" + echo "" +} + +main() { + parse_arguments "$@" + + log "Promtail → Alloy Migration Script v${SCRIPT_VERSION}" + echo "" + + # Check root (unless generate-only) + if [[ "$GENERATE_ONLY" != true ]] && [[ "$DRY_RUN" != true ]] && [[ "$EUID" -ne 0 ]]; then + error "This script must be run as root (or use --generate-only)" + exit 1 + fi + + detect_os + detect_hostname + detect_loki_url + check_promtail_status + + # Generate config + generate_alloy_config + write_config "$ALLOY_CONFIG_FILE" + + if [[ "$GENERATE_ONLY" == true ]]; then + print_summary + exit 0 + fi + + # Install Alloy + if [[ "$SKIP_INSTALL" != true ]]; then + install_alloy + fi + + # Validate + validate_config + + # Cutover + if [[ "$SKIP_CUTOVER" != true ]]; then + perform_cutover + fi + + print_summary +} + +main "$@" diff --git a/mimir-server-install.sh b/mimir-server-install.sh new file mode 100755 index 0000000..c325afa --- /dev/null +++ b/mimir-server-install.sh @@ -0,0 +1,305 @@ +#!/bin/bash + +############################################################# +#### Grafana Mimir Install Script for Oracle Linux, #### +#### Centos/Redhat and Debian/Ubuntu Servers. #### +#### #### +#### Author: Phil Connor 01/09/2025 #### +#### License: MIT #### +#### Contact: contact@mylinux.work #### +#### Version 1.00.010925 #### +#### #### +#### To use this script chmod it to 755 #### +#### or simply type bash #### +############################################################# + +######################## +### System Variables ### +######################## +if [ "$(command -v lsb_release)" ]; then + OS=$(lsb_release -i | awk '{print $3}' | tr '[:upper:]' '[:lower:]') +else + OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]') +fi + +domain=mylinux.work +bindir=/usr/local/bin +mimirdir=/etc/prometheus +datadir=/mimir + +if [ -d "/usr/lib/systemd/system" ]; then + psdir='/etc/systemd/system' +else + psdir='/usr/lib/systemd/system' +fi + +######################### +### Check permissions ### +######################### +if [[ $EUID -ne 0 ]]; then + echo '' + echo "$(basename "$0") This script must be run as root! Login as root, or sudo/su." + echo '' + exit 1 +fi + +###################### +### Package Manager ## +###################### +pkgmgr="yum -y" +if [ "$OS" = "ubuntu" ]; then + pkgmgr="apt -y" +fi + +################################# +#### Add Mimir User/Group #### +################################# +if ! grep mimir /etc/passwd; then + groupadd --system mimir + if [ "$OS" = "ubuntu" ]; then + useradd -s /sbin/nologin --system -g mimir mimir + else + useradd -m -s /bin/false mimir -g mimir + fi +fi + +################################# +#### Check for wget and curl #### +################################# +if [ ! "$(command -v wget)" ]; then + $pkgmgr install wget +fi + +if [ ! "$(command -v curl)" ]; then + $pkgmgr install curl +fi + +if [ ! "$(command -v unzip)" ]; then + $pkgmgr install unzip +fi + +########################## +### Install Mimir ### +########################## +install_mimir() { + { + # Create base directories if they don't exist + if [ ! -d "$mimirdir" ]; then + mkdir -p $mimirdir || { echo "Failed to create $mimirdir directory"; exit 1; } + fi + + if [ ! -d "$datadir" ]; then + mkdir -p $datadir || { echo "Failed to create $datadir directory"; exit 1; } + fi + + # Create Mimir subdirectories + mkdir -p $datadir/{tsdb-sync,data/tsdb,mimir-tsdb,compactor,mimir-ruler} + chown -R mimir:mimir $datadir + + # Download and install Mimir + cd /tmp || exit 2 + echo "Downloading latest Grafana Mimir..." + curl -s https://api.github.com/repos/grafana/mimir/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - || { echo "Failed to download Mimir"; exit 1; } + + tar -xvf mimir-linux-amd64.tar.gz + mv mimir-linux-amd64 $bindir/mimir || exit 1 + chown mimir:mimir $bindir/mimir || exit 1 + rm -rf /tmp/mimir-linux-amd64.tar.gz + + # Get server IP address + SERVER_IP=$(hostname -I | awk '{print $1}') + + # Create Mimir config + touch $mimirdir/mimir.yml + { + echo '# Mimir Configuration - Single Instance Mode' + echo 'multitenancy_enabled: false' + echo '' + echo 'blocks_storage:' + echo ' backend: filesystem' + echo ' bucket_store:' + echo " sync_dir: $datadir/tsdb-sync" + echo ' filesystem:' + echo " dir: $datadir/data/tsdb" + echo ' tsdb:' + echo " dir: $datadir/mimir-tsdb" + echo ' retention_period: 720h' + echo '' + echo 'compactor:' + echo " data_dir: $datadir/compactor" + echo ' sharding_ring:' + echo ' kvstore:' + echo ' store: inmemory' + echo '' + echo 'distributor:' + echo ' ring:' + echo ' kvstore:' + echo ' store: inmemory' + echo '' + echo 'ingester:' + echo ' ring:' + echo ' kvstore:' + echo ' store: inmemory' + echo ' replication_factor: 1' + echo '' + echo 'ruler_storage:' + echo ' backend: filesystem' + echo ' filesystem:' + echo " dir: $datadir/mimir-ruler" + echo '' + echo 'server:' + echo ' http_listen_port: 9009' + echo ' log_level: info' + echo '' + echo 'memberlist:' + echo ' abort_if_cluster_join_fails: false' + echo ' bind_port: 7946' + echo " advertise_addr: $SERVER_IP" + echo ' join_members: []' + echo '' + echo 'store_gateway:' + echo ' sharding_ring:' + echo ' replication_factor: 1' + echo ' kvstore:' + echo ' store: inmemory' + echo '' + echo 'limits:' + echo ' max_global_series_per_user: 0' + echo ' max_global_exemplars_per_user: 100000' + } > $mimirdir/mimir.yml + + chown mimir:mimir $mimirdir/mimir.yml + + # Create systemd service + { + echo '[Unit]' + echo 'Description=Grafana Mimir' + echo 'Documentation=https://grafana.com/docs/mimir/' + echo 'After=network-online.target' + echo 'Wants=network-online.target' + echo '' + echo '[Service]' + echo 'Type=simple' + echo 'User=mimir' + echo 'Group=mimir' + echo "ExecStart=$bindir/mimir -config.file=$mimirdir/mimir.yml" + echo "ExecReload=/bin/kill -HUP \$MAINPID" + echo 'TimeoutStopSec=20s' + echo 'SendSIGKILL=no' + echo '' + echo '# Output to journal' + echo 'StandardOutput=journal' + echo 'StandardError=journal' + echo 'SyslogIdentifier=mimir' + echo '' + echo '# Restart' + echo 'Restart=on-failure' + echo 'RestartSec=5s' + echo '' + echo '# Security' + echo 'NoNewPrivileges=yes' + echo 'PrivateTmp=yes' + echo 'ProtectSystem=full' + echo 'ProtectHome=yes' + echo "ReadWritePaths=$datadir" + echo '' + echo '# Resource limits' + echo 'LimitNOFILE=1048576' + echo 'LimitNPROC=1048576' + echo '' + echo '# Environment' + echo 'Environment=GOMAXPROCS=4' + echo '' + echo '[Install]' + echo 'WantedBy=multi-user.target' + } > $psdir/mimir.service + + systemctl daemon-reload + systemctl enable --now mimir + + echo "" + echo "==========================================" + echo "Mimir installation complete!" + echo "==========================================" + echo "Mimir UI: http://localhost:9009" + echo "Config: $mimirdir/mimir.yml" + echo "Data: $datadir" + echo "" + echo "Add to Prometheus remote_write:" + echo " remote_write:" + echo " - url: http://localhost:9009/api/v1/push" + echo "" + } +} + +################################ +### Install and Config Nginx ### +################################ +install_nginx() { + { + $pkgmgr install nginx + + if [ -d "/etc/nginx/sites-available" ]; then + sitesa=/etc/nginx/sites-available + sitese=/etc/nginx/sites-enabled/ + elif [ -d "/etc/nginx/conf.d" ]; then + sitesa=/etc/nginx/conf.d + fi + + touch "$sitesa"/mimir.conf + { + echo 'server {' + echo ' listen 80;' + echo ' listen [::]:80;' + echo '' + echo " server_name mimir.$domain;" + echo '' + echo ' location / {' + echo ' proxy_pass http://localhost:9009/;' + # shellcheck disable=SC2016 + echo ' proxy_set_header Host $host;' + # shellcheck disable=SC2016 + echo ' proxy_set_header X-Real-IP $remote_addr;' + # shellcheck disable=SC2016 + echo ' proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;' + # shellcheck disable=SC2016 + echo ' proxy_set_header X-Forwarded-Proto $scheme;' + echo ' proxy_read_timeout 300s;' + echo ' proxy_connect_timeout 75s;' + echo ' }' + echo '}' + } > "$sitesa"/mimir.conf + + if [ -d "/etc/nginx/sites-available" ]; then + ln -s "$sitesa"/mimir.conf "$sitese" 2>/dev/null || true + fi + + if nginx -t; then + systemctl restart nginx + echo "Nginx configured for Mimir at mimir.$domain" + else + echo "Nginx configuration test failed" + fi + } +} + +###################### +### Function Calls ### +###################### +install_mimir + +# Uncomment to install nginx reverse proxy +# install_nginx + +############################################################# + +echo "" +echo "==========================================" +echo "Installation Summary" +echo "==========================================" +echo "Mimir version: $(mimir --version 2>&1 | head -1)" +echo "Status: $(systemctl is-active mimir)" +echo "" +echo "Check status: systemctl status mimir" +echo "View logs: journalctl -u mimir -f" +echo "" diff --git a/nginx-metrics-exporter.sh b/nginx-metrics-exporter.sh new file mode 100644 index 0000000..0ae1f65 --- /dev/null +++ b/nginx-metrics-exporter.sh @@ -0,0 +1,1117 @@ +#!/bin/bash +############################################################# +#### Nginx Metrics Exporter for Prometheus #### +#### Comprehensive nginx monitoring via stub_status, #### +#### logs, SSL, process, and config metrics #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 1.1 #### +#### #### +#### Usage: ./nginx-metrics-exporter.sh [OPTIONS] #### +############################################################# +# +# Metrics collected: +# - stub_status: connections, accepts, handled, requests, reading, writing, waiting +# - Process: worker count, memory usage, CPU usage, open files +# - Access logs: requests by status code, response times, bytes transferred +# - SSL: certificate expiry days for configured domains +# - Config: worker_processes, worker_connections, keepalive_timeout +# - Upstream: health status (if configured) +# +# Requirements: +# - nginx with stub_status module enabled +# - socat (for HTTP server) +# - curl (for stub_status fetching) +# +set -euo pipefail + +######################### +### Configuration ### +######################### + +LISTEN_PORT="${NGINX_EXPORTER_PORT:-9113}" +STUB_STATUS_URL="${NGINX_STUB_URL:-http://127.0.0.1/nginx_status}" +ACCESS_LOG="${NGINX_ACCESS_LOG:-/var/log/nginx/access.log}" +ERROR_LOG="${NGINX_ERROR_LOG:-/var/log/nginx/error.log}" +NGINX_CONF="${NGINX_CONF:-/etc/nginx/nginx.conf}" +SITES_DIR="${NGINX_SITES_DIR:-/etc/nginx/sites-enabled}" +CONF_D_DIR="${NGINX_CONF_D:-/etc/nginx/conf.d}" +SCRAPE_INTERVAL="${SCRAPE_INTERVAL:-15}" +SSL_CHECK_DOMAINS="${SSL_CHECK_DOMAINS:-}" # Comma-separated list of domains to check SSL + +# Log parsing settings +LOG_TAIL_LINES="${LOG_TAIL_LINES:-10000}" # Number of lines to parse from access log +LOG_PARSE_INTERVAL="${LOG_PARSE_INTERVAL:-60}" # How often to parse logs (seconds) + +# State files for log metrics +STATE_DIR="/tmp/nginx-metrics" +LAST_LOG_PARSE=0 + +# Output mode +TEXTFILE_DIR="/var/lib/node_exporter" +OUTPUT_FILE="" +HTTP_MODE=false + +######################### +### Logging ### +######################### + +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2 +} + +######################### +### Parse Arguments ### +######################### + +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + --textfile) + OUTPUT_FILE="$TEXTFILE_DIR/nginx.prom" + shift + ;; + --http) + HTTP_MODE=true + shift + ;; + --output|-o) + OUTPUT_FILE="$2" + shift 2 + ;; + --port) + LISTEN_PORT="$2" + shift 2 + ;; + --stub-url) + STUB_STATUS_URL="$2" + shift 2 + ;; + --access-log) + ACCESS_LOG="$2" + shift 2 + ;; + --error-log) + ERROR_LOG="$2" + shift 2 + ;; + --nginx-conf) + NGINX_CONF="$2" + shift 2 + ;; + --ssl-domains) + SSL_CHECK_DOMAINS="$2" + shift 2 + ;; + --help) + cat </dev/null; then + echo "apt" + elif command -v dnf &>/dev/null; then + echo "dnf" + elif command -v yum &>/dev/null; then + echo "yum" + elif command -v zypper &>/dev/null; then + echo "zypper" + elif command -v pacman &>/dev/null; then + echo "pacman" + elif command -v apk &>/dev/null; then + echo "apk" + else + echo "" + fi +} + +install_package() { + local pkg="$1" + local pkgmgr + pkgmgr=$(detect_package_manager) + + log "Installing $pkg..." + + case "$pkgmgr" in + apt) + apt-get update -qq && apt-get install -y -qq "$pkg" + ;; + dnf) + dnf install -y -q "$pkg" + ;; + yum) + yum install -y -q "$pkg" + ;; + zypper) + zypper install -y -q "$pkg" + ;; + pacman) + pacman -S --noconfirm "$pkg" + ;; + apk) + apk add --quiet "$pkg" + ;; + *) + log "ERROR: Unknown package manager. Please install $pkg manually." + return 1 + ;; + esac +} + +setup() { + mkdir -p "$STATE_DIR" + + # Check for required tools and install if missing + if ! command -v socat &>/dev/null; then + log "socat not found, attempting to install..." + if [[ $EUID -eq 0 ]]; then + if ! install_package socat; then + log "ERROR: Failed to install socat" + exit 1 + fi + log "socat installed successfully" + else + log "ERROR: socat is required. Run as root to auto-install, or install manually:" + log " Debian/Ubuntu: apt install socat" + log " RHEL/CentOS: yum install socat" + log " Fedora: dnf install socat" + log " Alpine: apk add socat" + exit 1 + fi + fi + + if ! command -v curl &>/dev/null; then + log "curl not found, attempting to install..." + if [[ $EUID -eq 0 ]]; then + if ! install_package curl; then + log "ERROR: Failed to install curl" + exit 1 + fi + log "curl installed successfully" + else + log "ERROR: curl is required. Run as root to auto-install, or install manually." + exit 1 + fi + fi + + # Check if nginx is running + if ! pgrep -x nginx &>/dev/null && ! pidof nginx &>/dev/null; then + log "WARNING: nginx process not found - process metrics will show nginx_process_running=0" + fi + + # Check if stub_status is accessible + check_stub_status +} + +check_stub_status() { + log "Checking stub_status at $STUB_STATUS_URL..." + + local response http_code + response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$STUB_STATUS_URL" 2>/dev/null) + + if [[ "$response" == "200" ]]; then + # Verify it's actually stub_status output + local content + content=$(curl -s --max-time 5 "$STUB_STATUS_URL" 2>/dev/null) + if echo "$content" | grep -q "Active connections:"; then + log "✓ stub_status is working correctly" + return 0 + else + log "WARNING: $STUB_STATUS_URL returned 200 but doesn't look like stub_status output" + log " Expected 'Active connections:' in response" + show_stub_status_help + return 1 + fi + elif [[ "$response" == "000" ]]; then + log "WARNING: Cannot connect to $STUB_STATUS_URL (connection refused/timeout)" + log " stub_status metrics will show nginx_up=0" + show_stub_status_help + return 1 + elif [[ "$response" == "403" ]]; then + log "WARNING: Access denied to $STUB_STATUS_URL (HTTP 403)" + log " Check 'allow' directives in stub_status location block" + show_stub_status_help + return 1 + elif [[ "$response" == "404" ]]; then + log "WARNING: stub_status endpoint not found at $STUB_STATUS_URL (HTTP 404)" + log " stub_status may not be configured" + show_stub_status_help + return 1 + else + log "WARNING: Unexpected response from $STUB_STATUS_URL (HTTP $response)" + show_stub_status_help + return 1 + fi +} + +show_stub_status_help() { + log "" + log "To enable stub_status, add this to your nginx config:" + log "" + log " server {" + log " listen 127.0.0.1:80;" + log " server_name localhost;" + log " " + log " location /nginx_status {" + log " stub_status on;" + log " access_log off;" + log " allow 127.0.0.1;" + log " deny all;" + log " }" + log " }" + log "" + log "Then reload nginx: nginx -t && systemctl reload nginx" + log "" + log "Or specify a different URL with: --stub-url " + log "" +} + +######################### +### Stub Status Metrics ### +######################### + +collect_stub_status() { + local stub_output + + echo "# HELP nginx_up Whether nginx stub_status is reachable" + echo "# TYPE nginx_up gauge" + + if ! stub_output=$(curl -s --max-time 5 "$STUB_STATUS_URL" 2>/dev/null); then + echo "nginx_up 0" + return + fi + + echo "nginx_up 1" + + # Parse stub_status output + # Format: + # Active connections: 43 + # server accepts handled requests + # 7368 7368 10993 + # Reading: 0 Writing: 5 Waiting: 38 + + local active_connections accepts handled requests reading writing waiting + + active_connections=$(echo "$stub_output" | grep -oP 'Active connections:\s*\K\d+' || echo "0") + + # Parse the accepts/handled/requests line + local stats_line + stats_line=$(echo "$stub_output" | grep -E '^\s*[0-9]+\s+[0-9]+\s+[0-9]+' || echo "0 0 0") + accepts=$(echo "$stats_line" | awk '{print $1}') + handled=$(echo "$stats_line" | awk '{print $2}') + requests=$(echo "$stats_line" | awk '{print $3}') + + # Parse reading/writing/waiting + reading=$(echo "$stub_output" | grep -oP 'Reading:\s*\K\d+' || echo "0") + writing=$(echo "$stub_output" | grep -oP 'Writing:\s*\K\d+' || echo "0") + waiting=$(echo "$stub_output" | grep -oP 'Waiting:\s*\K\d+' || echo "0") + + cat </dev/null || pidof nginx 2>/dev/null | awk '{print $1}' || echo "") + + if [[ -z "$nginx_master_pid" ]]; then + echo "# HELP nginx_process_running Whether nginx process is running" + echo "# TYPE nginx_process_running gauge" + echo "nginx_process_running 0" + return + fi + + echo "# HELP nginx_process_running Whether nginx process is running" + echo "# TYPE nginx_process_running gauge" + echo "nginx_process_running 1" + + # Get all nginx PIDs + nginx_pids=$(pgrep -x nginx 2>/dev/null || pidof nginx 2>/dev/null || echo "") + + # Count workers (total processes minus master) + worker_count=$(echo "$nginx_pids" | wc -w) + if [[ $worker_count -gt 0 ]]; then + worker_count=$((worker_count - 1)) # Subtract master + fi + + echo "# HELP nginx_workers_count Number of nginx worker processes" + echo "# TYPE nginx_workers_count gauge" + echo "nginx_workers_count $worker_count" + + # Calculate total memory usage (RSS in bytes) + total_memory=0 + total_cpu=0 + total_fds=0 + total_threads=0 + + for pid in $nginx_pids; do + if [[ -d "/proc/$pid" ]]; then + # Memory (RSS in KB from /proc/pid/status, convert to bytes) + local rss + rss=$(grep -m1 'VmRSS:' "/proc/$pid/status" 2>/dev/null | awk '{print $2}' || echo "0") + total_memory=$((total_memory + rss * 1024)) + + # CPU time (from /proc/pid/stat - utime + stime in jiffies) + local stat_line utime stime + if stat_line=$(cat "/proc/$pid/stat" 2>/dev/null); then + utime=$(echo "$stat_line" | awk '{print $14}') + stime=$(echo "$stat_line" | awk '{print $15}') + total_cpu=$((total_cpu + utime + stime)) + fi + + # Open file descriptors + local fds + fds=$(ls -1 "/proc/$pid/fd" 2>/dev/null | wc -l || echo "0") + total_fds=$((total_fds + fds)) + + # Threads + local threads + threads=$(grep -c '^Threads:' "/proc/$pid/status" 2>/dev/null || echo "0") + if [[ "$threads" -eq 0 ]]; then + threads=$(grep 'Threads:' "/proc/$pid/status" 2>/dev/null | awk '{print $2}' || echo "1") + fi + total_threads=$((total_threads + threads)) + fi + done + + # Convert CPU jiffies to seconds (assuming 100 Hz) + local cpu_seconds + cpu_seconds=$(echo "scale=2; $total_cpu / 100" | bc 2>/dev/null || echo "$total_cpu") + + cat </dev/null || echo "0") + boot_time=$(awk '{print $1}' /proc/stat 2>/dev/null | head -1 || echo "0") + # starttime is in jiffies since boot + start_seconds=$(awk "BEGIN {printf \"%.0f\", $(cat /proc/uptime | awk '{print $1}') - ($starttime / 100)}") + local now_epoch + now_epoch=$(date +%s) + local process_start=$((now_epoch - start_seconds)) + echo "nginx_process_start_time_seconds $process_start" + else + echo "nginx_process_start_time_seconds 0" + fi + + # Get max open files limit + if [[ -f "/proc/$nginx_master_pid/limits" ]]; then + local max_fds + max_fds=$(grep 'Max open files' "/proc/$nginx_master_pid/limits" 2>/dev/null | awk '{print $4}' || echo "0") + echo "" + echo "# HELP nginx_process_max_fds Maximum number of open file descriptors" + echo "# TYPE nginx_process_max_fds gauge" + echo "nginx_process_max_fds $max_fds" + fi +} + +######################### +### Config Metrics ### +######################### + +collect_config_metrics() { + if [[ ! -f "$NGINX_CONF" ]]; then + echo "# nginx.conf not found at $NGINX_CONF" + return + fi + + local worker_processes worker_connections keepalive_timeout + local multi_accept use_epoll sendfile tcp_nopush tcp_nodelay gzip_enabled + + # Parse worker_processes (can be 'auto' or a number) + worker_processes=$(grep -E '^\s*worker_processes' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';' || echo "auto") + if [[ "$worker_processes" == "auto" ]]; then + worker_processes=$(nproc 2>/dev/null || echo "1") + fi + + # Parse worker_connections + worker_connections=$(grep -E '^\s*worker_connections' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';' || echo "0") + + # Parse keepalive_timeout + keepalive_timeout=$(grep -E '^\s*keepalive_timeout' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';s' || echo "0") + + # Check various settings + multi_accept=$(grep -qE '^\s*multi_accept\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") + use_epoll=$(grep -qE '^\s*use\s+epoll' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") + sendfile=$(grep -qE '^\s*sendfile\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") + tcp_nopush=$(grep -qE '^\s*tcp_nopush\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") + tcp_nodelay=$(grep -qE '^\s*tcp_nodelay\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") + gzip_enabled=$(grep -qE '^\s*gzip\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") + + cat </dev/null | wc -l) + elif [[ -d "$CONF_D_DIR" ]]; then + vhost_count=$(find "$CONF_D_DIR" -name "*.conf" -type f 2>/dev/null | wc -l) + fi + + echo "" + echo "# HELP nginx_config_vhosts_total Number of configured virtual hosts" + echo "# TYPE nginx_config_vhosts_total gauge" + echo "nginx_config_vhosts_total $vhost_count" + + # Calculate max possible connections + local max_connections=$((worker_processes * worker_connections)) + echo "" + echo "# HELP nginx_config_max_connections Maximum theoretical connections (workers * connections)" + echo "# TYPE nginx_config_max_connections gauge" + echo "nginx_config_max_connections $max_connections" +} + +######################### +### Access Log Metrics ### +######################### + +collect_access_log_metrics() { + if [[ ! -f "$ACCESS_LOG" ]] || [[ ! -r "$ACCESS_LOG" ]]; then + echo "# Access log not readable at $ACCESS_LOG" + return + fi + + local now + now=$(date +%s) + + # Only parse logs every LOG_PARSE_INTERVAL seconds + if [[ -f "$STATE_DIR/last_parse" ]]; then + LAST_LOG_PARSE=$(cat "$STATE_DIR/last_parse") + fi + + if [[ $((now - LAST_LOG_PARSE)) -lt $LOG_PARSE_INTERVAL ]] && [[ -f "$STATE_DIR/log_metrics" ]]; then + cat "$STATE_DIR/log_metrics" + return + fi + + echo "$now" > "$STATE_DIR/last_parse" + + # Parse access log for status codes and other metrics + # Assuming combined log format: $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" + + local log_data + log_data=$(tail -n "$LOG_TAIL_LINES" "$ACCESS_LOG" 2>/dev/null || echo "") + + if [[ -z "$log_data" ]]; then + echo "# No log data available" + return + fi + + local metrics_output="" + + # Count by status code + local status_counts + status_counts=$(echo "$log_data" | awk '{print $9}' | grep -E '^[0-9]{3}$' | sort | uniq -c | sort -rn) + + metrics_output+="# HELP nginx_http_requests_by_status_total HTTP requests by status code (from last $LOG_TAIL_LINES log lines) +# TYPE nginx_http_requests_by_status_total gauge +" + + # Initialize counters for status code groups + local count_1xx=0 count_2xx=0 count_3xx=0 count_4xx=0 count_5xx=0 + + while read -r count status; do + if [[ -n "$status" ]] && [[ -n "$count" ]]; then + metrics_output+="nginx_http_requests_by_status_total{status=\"$status\"} $count +" + # Aggregate by category + case "${status:0:1}" in + 1) count_1xx=$((count_1xx + count)) ;; + 2) count_2xx=$((count_2xx + count)) ;; + 3) count_3xx=$((count_3xx + count)) ;; + 4) count_4xx=$((count_4xx + count)) ;; + 5) count_5xx=$((count_5xx + count)) ;; + esac + fi + done <<< "$status_counts" + + metrics_output+=" +# HELP nginx_http_requests_by_status_class_total HTTP requests by status class +# TYPE nginx_http_requests_by_status_class_total gauge +nginx_http_requests_by_status_class_total{class=\"1xx\"} $count_1xx +nginx_http_requests_by_status_class_total{class=\"2xx\"} $count_2xx +nginx_http_requests_by_status_class_total{class=\"3xx\"} $count_3xx +nginx_http_requests_by_status_class_total{class=\"4xx\"} $count_4xx +nginx_http_requests_by_status_class_total{class=\"5xx\"} $count_5xx +" + + # Calculate total bytes sent + local total_bytes + total_bytes=$(echo "$log_data" | awk '{sum += $10} END {print sum+0}') + + metrics_output+=" +# HELP nginx_http_response_bytes_total Total bytes sent in responses (from last $LOG_TAIL_LINES log lines) +# TYPE nginx_http_response_bytes_total gauge +nginx_http_response_bytes_total $total_bytes +" + + # Count requests by method + local method_counts + method_counts=$(echo "$log_data" | awk -F'"' '{print $2}' | awk '{print $1}' | grep -E '^(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)$' | sort | uniq -c) + + metrics_output+=" +# HELP nginx_http_requests_by_method_total HTTP requests by method (from last $LOG_TAIL_LINES log lines) +# TYPE nginx_http_requests_by_method_total gauge +" + + while read -r count method; do + if [[ -n "$method" ]] && [[ -n "$count" ]]; then + metrics_output+="nginx_http_requests_by_method_total{method=\"$method\"} $count +" + fi + done <<< "$method_counts" + + # Count unique IPs + local unique_ips + unique_ips=$(echo "$log_data" | awk '{print $1}' | sort -u | wc -l) + + metrics_output+=" +# HELP nginx_http_unique_clients Unique client IPs (from last $LOG_TAIL_LINES log lines) +# TYPE nginx_http_unique_clients gauge +nginx_http_unique_clients $unique_ips +" + + # Top URIs (for potential abuse detection) + local top_uris + top_uris=$(echo "$log_data" | awk -F'"' '{print $2}' | awk '{print $2}' | grep -v '^-$' | sort | uniq -c | sort -rn | head -5) + + metrics_output+=" +# HELP nginx_http_top_uri_requests_total Top requested URIs (from last $LOG_TAIL_LINES log lines) +# TYPE nginx_http_top_uri_requests_total gauge +" + + local rank=1 + while read -r count uri; do + if [[ -n "$uri" ]] && [[ -n "$count" ]]; then + # Truncate URI and escape quotes + uri="${uri:0:100}" + uri="${uri//\"/\\\"}" + metrics_output+="nginx_http_top_uri_requests_total{uri=\"$uri\",rank=\"$rank\"} $count +" + rank=$((rank + 1)) + fi + done <<< "$top_uris" + + # Count requests in time windows (if log has parseable timestamps) + local recent_requests + recent_requests=$(echo "$log_data" | wc -l) + + metrics_output+=" +# HELP nginx_http_requests_in_sample Total requests in sample window +# TYPE nginx_http_requests_in_sample gauge +nginx_http_requests_in_sample $recent_requests +" + + # Save metrics for caching + echo "$metrics_output" > "$STATE_DIR/log_metrics" + echo "$metrics_output" +} + +######################### +### Error Log Metrics ### +######################### + +collect_error_log_metrics() { + if [[ ! -f "$ERROR_LOG" ]] || [[ ! -r "$ERROR_LOG" ]]; then + echo "# Error log not readable at $ERROR_LOG" + return + fi + + # Count errors by level from last 1000 lines + local log_data + log_data=$(tail -n 1000 "$ERROR_LOG" 2>/dev/null || echo "") + + if [[ -z "$log_data" ]]; then + return + fi + + local emerg_count alert_count crit_count error_count warn_count notice_count info_count + + emerg_count=$(echo "$log_data" | grep -c '\[emerg\]' 2>/dev/null) || emerg_count=0 + alert_count=$(echo "$log_data" | grep -c '\[alert\]' 2>/dev/null) || alert_count=0 + crit_count=$(echo "$log_data" | grep -c '\[crit\]' 2>/dev/null) || crit_count=0 + error_count=$(echo "$log_data" | grep -c '\[error\]' 2>/dev/null) || error_count=0 + warn_count=$(echo "$log_data" | grep -c '\[warn\]' 2>/dev/null) || warn_count=0 + notice_count=$(echo "$log_data" | grep -c '\[notice\]' 2>/dev/null) || notice_count=0 + info_count=$(echo "$log_data" | grep -c '\[info\]' 2>/dev/null) || info_count=0 + + cat </dev/null || echo "0") + log_mtime=$(stat -c %Y "$ERROR_LOG" 2>/dev/null || echo "0") + now=$(date +%s) + log_age=$((now - log_mtime)) + + cat </dev/null | grep -v '#' | grep -v 'ssl_certificate_key' | awk '{print $2}' | tr -d ';' | sort -u || echo "") + + if [[ -z "$cert_files" ]]; then + echo "# No SSL certificates found in nginx config" + return + fi + + echo "# HELP nginx_ssl_certificate_expiry_days Days until SSL certificate expires" + echo "# TYPE nginx_ssl_certificate_expiry_days gauge" + echo "# HELP nginx_ssl_certificate_expiry_timestamp Unix timestamp when certificate expires" + echo "# TYPE nginx_ssl_certificate_expiry_timestamp gauge" + + while read -r cert_file; do + if [[ -f "$cert_file" ]]; then + local expiry_date expiry_epoch now_epoch days_left cn + + expiry_date=$(openssl x509 -enddate -noout -in "$cert_file" 2>/dev/null | cut -d= -f2 || echo "") + if [[ -n "$expiry_date" ]]; then + expiry_epoch=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0") + now_epoch=$(date +%s) + days_left=$(( (expiry_epoch - now_epoch) / 86400 )) + + # Get CN from certificate + cn=$(openssl x509 -subject -noout -in "$cert_file" 2>/dev/null | grep -oP 'CN\s*=\s*\K[^,/]+' || basename "$cert_file") + cn="${cn// /_}" + + echo "nginx_ssl_certificate_expiry_days{certificate=\"$cn\",file=\"$cert_file\"} $days_left" + echo "nginx_ssl_certificate_expiry_timestamp{certificate=\"$cn\",file=\"$cert_file\"} $expiry_epoch" + fi + fi + done <<< "$cert_files" + return + fi + + # Check specified domains via network + echo "# HELP nginx_ssl_certificate_expiry_days Days until SSL certificate expires" + echo "# TYPE nginx_ssl_certificate_expiry_days gauge" + echo "# HELP nginx_ssl_certificate_expiry_timestamp Unix timestamp when certificate expires" + echo "# TYPE nginx_ssl_certificate_expiry_timestamp gauge" + + IFS=',' read -ra domain_array <<< "$domains" + for domain in "${domain_array[@]}"; do + domain=$(echo "$domain" | tr -d ' ') + if [[ -n "$domain" ]]; then + local expiry_date expiry_epoch now_epoch days_left + + expiry_date=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2 || echo "") + + if [[ -n "$expiry_date" ]]; then + expiry_epoch=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0") + now_epoch=$(date +%s) + days_left=$(( (expiry_epoch - now_epoch) / 86400 )) + + echo "nginx_ssl_certificate_expiry_days{domain=\"$domain\"} $days_left" + echo "nginx_ssl_certificate_expiry_timestamp{domain=\"$domain\"} $expiry_epoch" + else + echo "nginx_ssl_certificate_expiry_days{domain=\"$domain\"} -1" + fi + fi + done +} + +######################### +### Upstream Metrics ### +######################### + +collect_upstream_metrics() { + # Check for upstream configurations + local upstreams + upstreams=$(grep -rh 'upstream\s' "$SITES_DIR" "$CONF_D_DIR" "$NGINX_CONF" 2>/dev/null | grep -v '#' | awk '{print $2}' | tr -d '{' | sort -u || echo "") + + if [[ -z "$upstreams" ]]; then + return + fi + + echo "# HELP nginx_upstream_configured Number of configured upstreams" + echo "# TYPE nginx_upstream_configured gauge" + echo "nginx_upstream_configured $(echo "$upstreams" | wc -w)" + + echo "" + echo "# HELP nginx_upstream_servers_total Servers configured per upstream" + echo "# TYPE nginx_upstream_servers_total gauge" + + # This is a simplified count - for real upstream health you'd need nginx-plus or lua module + while read -r upstream; do + if [[ -n "$upstream" ]]; then + # Try to count servers in this upstream block + local server_count + server_count=$(grep -A 20 "upstream $upstream" "$SITES_DIR"/* "$CONF_D_DIR"/* "$NGINX_CONF" 2>/dev/null | grep -c 'server\s' 2>/dev/null) || server_count=0 + echo "nginx_upstream_servers_total{upstream=\"$upstream\"} $server_count" + fi + done <<< "$upstreams" +} + +######################### +### Version Metrics ### +######################### + +collect_version_metrics() { + local version + version=$(nginx -v 2>&1 | grep -oP 'nginx/\K[0-9.]+' || echo "unknown") + + echo "# HELP nginx_version_info Nginx version information" + echo "# TYPE nginx_version_info gauge" + echo "nginx_version_info{version=\"$version\"} 1" + + # Check if nginx is compiled with certain modules + local modules_output + modules_output=$(nginx -V 2>&1 || echo "") + + local has_ssl has_http2 has_gzip has_stub_status has_realip has_geoip has_lua + has_ssl=$(echo "$modules_output" | grep -qE 'with-http_ssl_module|--with-openssl' && echo "1" || echo "0") + has_http2=$(echo "$modules_output" | grep -q 'http_v2_module' && echo "1" || echo "0") + has_gzip=$(echo "$modules_output" | grep -q 'http_gzip' && echo "1" || echo "0") + has_stub_status=$(echo "$modules_output" | grep -q 'http_stub_status_module' && echo "1" || echo "0") + has_realip=$(echo "$modules_output" | grep -q 'http_realip_module' && echo "1" || echo "0") + has_geoip=$(echo "$modules_output" | grep -q 'http_geoip_module' && echo "1" || echo "0") + has_lua=$(echo "$modules_output" | grep -qE 'lua|ndk_http_module' && echo "1" || echo "0") + + cat </dev/null || echo "0") + ulimit_n=$(ulimit -n 2>/dev/null || echo "0") + + cat </dev/null | awk '{print $1}' || echo "0") + + echo "" + echo "# HELP nginx_system_open_files Current system-wide open files" + echo "# TYPE nginx_system_open_files gauge" + echo "nginx_system_open_files $open_files" +} + +######################### +### Collect All Metrics ### +######################### + +collect_all_metrics() { + local hostname + hostname=$(hostname -f 2>/dev/null || hostname) + + cat </dev/null || { + log "Server error, restarting in 5 seconds..." + sleep 5 + } + done +} + +######################### +### Output ### +######################### + +write_output() { + local metrics + metrics=$(collect_all_metrics) + + if [[ -n "$OUTPUT_FILE" ]]; then + local tmp_file="${OUTPUT_FILE}.$$" + echo "$metrics" > "$tmp_file" + mv "$tmp_file" "$OUTPUT_FILE" + else + echo "$metrics" + fi +} + +######################### +### Main ### +######################### + +main() { + if [[ "${1:-}" == "--handle-request" ]]; then + handle_request + exit 0 + fi + + parse_args "$@" + setup + + if [[ "$HTTP_MODE" == true ]]; then + start_server + elif [[ -n "$OUTPUT_FILE" ]]; then + write_output + else + collect_all_metrics + fi +} + +main "$@" diff --git a/ntfy-client-setup-linux.sh b/ntfy-client-setup-linux.sh new file mode 100644 index 0000000..a099f89 --- /dev/null +++ b/ntfy-client-setup-linux.sh @@ -0,0 +1,263 @@ +#!/bin/bash + +############################################################# +#### ntfy Desktop Client Setup for Linux #### +#### Subscribe to ntfy push notifications with desktop #### +#### alerts via systemd user service #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 1.0 #### +#### #### +#### Usage: ./ntfy-client-setup-linux.sh #### +############################################################# + +set -euo pipefail + +NTFY_VERSION="2.11.0" + +# ── Detect the actual desktop user ───────────────────────── +# Handles both sudo and non-sudo execution +if [ -n "${SUDO_USER:-}" ]; then + DESKTOP_USER="$SUDO_USER" + DESKTOP_HOME=$(getent passwd "$SUDO_USER" | cut -d: -f6) +else + DESKTOP_USER="$USER" + DESKTOP_HOME="$HOME" +fi + +CONFIG_DIR="$DESKTOP_HOME/.config/ntfy" +SYSTEMD_DIR="$DESKTOP_HOME/.config/systemd/user" + +# ── Helper functions ─────────────────────────────────────── + +info() { echo -e " ✓ $*"; } +warn() { echo -e " ⚠ $*"; } +error() { echo -e " ✗ $*" >&2; } + +run_as_user() { + # Run a command as the desktop user (handles sudo case) + if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then + sudo -u "$DESKTOP_USER" "$@" + else + "$@" + fi +} + +install_package() { + local pkg_apt="$1" + local pkg_dnf="${2:-$1}" + local pkg_pacman="${3:-$1}" + + if command -v apt &> /dev/null; then + sudo apt install -y "$pkg_apt" + elif command -v dnf &> /dev/null; then + sudo dnf install -y "$pkg_dnf" + elif command -v pacman &> /dev/null; then + sudo pacman -S --noconfirm "$pkg_pacman" + else + error "Could not detect package manager. Please install '$pkg_apt' manually." + return 1 + fi +} + +# ── Banner ───────────────────────────────────────────────── + +echo "" +echo "===========================================" +echo " ntfy Desktop Client Setup for Linux" +echo "===========================================" +echo "" +echo " User: $DESKTOP_USER" +echo " Home: $DESKTOP_HOME" +echo "" + +# ── Step 1: Install dependencies ─────────────────────────── + +echo "── Checking dependencies ──────────────────" +echo "" + +if ! command -v notify-send &> /dev/null; then + echo " Installing libnotify for desktop notifications..." + install_package libnotify-bin libnotify libnotify + info "libnotify installed" +else + info "notify-send already available" +fi + +if ! command -v curl &> /dev/null; then + echo " Installing curl..." + install_package curl curl curl + info "curl installed" +else + info "curl already available" +fi + +echo "" + +# ── Step 2: Install ntfy binary ──────────────────────────── + +echo "── Installing ntfy client ─────────────────" +echo "" + +# Determine install location based on privileges +if [ "$(id -u)" -eq 0 ] || sudo -n true 2>/dev/null; then + NTFY_BIN="/usr/local/bin/ntfy" + INSTALL_SYSTEM=true +else + NTFY_BIN="$DESKTOP_HOME/.local/bin/ntfy" + INSTALL_SYSTEM=false +fi + +if [ -x "$NTFY_BIN" ]; then + info "ntfy already installed at $NTFY_BIN" +else + # Detect architecture + ARCH=$(uname -m) + case "$ARCH" in + x86_64) NTFY_ARCH="amd64" ;; + aarch64) NTFY_ARCH="arm64" ;; + armv7l) NTFY_ARCH="armv7" ;; + *) + error "Unsupported architecture: $ARCH" + exit 1 + ;; + esac + + DOWNLOAD_URL="https://github.com/binwiederhier/ntfy/releases/download/v${NTFY_VERSION}/ntfy_${NTFY_VERSION}_linux_${NTFY_ARCH}.tar.gz" + echo " Downloading ntfy v${NTFY_VERSION} (${NTFY_ARCH})..." + + TEMP_DIR=$(mktemp -d) + trap 'rm -rf "$TEMP_DIR"' EXIT + + curl -sL -o "$TEMP_DIR/ntfy.tar.gz" "$DOWNLOAD_URL" + tar -xzf "$TEMP_DIR/ntfy.tar.gz" -C "$TEMP_DIR" + + if [ "$INSTALL_SYSTEM" = true ]; then + sudo find "$TEMP_DIR" -name "ntfy" -type f -exec mv {} "$NTFY_BIN" \; + sudo chmod +x "$NTFY_BIN" + else + mkdir -p "$(dirname "$NTFY_BIN")" + find "$TEMP_DIR" -name "ntfy" -type f -exec mv {} "$NTFY_BIN" \; + chmod +x "$NTFY_BIN" + fi + + rm -rf "$TEMP_DIR" + trap - EXIT + + info "ntfy installed to $NTFY_BIN" +fi + +echo "" + +# ── Step 3: Interactive configuration ────────────────────── + +echo "── Configuration ──────────────────────────" +echo "" + +read -rp " Server URL [https://ntfy.example.com]: " INPUT_SERVER +SERVER_URL="${INPUT_SERVER:-https://ntfy.example.com}" + +echo "" +read -rp " Access token (leave empty for public topics): " ACCESS_TOKEN + +echo "" +echo " Enter topics to subscribe to (space-separated)." +echo " Examples: alerts monitoring backup-status" +read -rp " Topics: " TOPICS + +if [ -z "$TOPICS" ]; then + error "At least one topic is required." + exit 1 +fi + +echo "" +echo " Server: $SERVER_URL" +echo " Topics: $TOPICS" +echo " Token: ${ACCESS_TOKEN:+(set)}${ACCESS_TOKEN:-(none)}" +echo "" + +# ── Step 4: Create client config ─────────────────────────── + +echo "── Creating configuration files ────────────" +echo "" + +run_as_user mkdir -p "$CONFIG_DIR" +run_as_user mkdir -p "$SYSTEMD_DIR" + +# Build the subscribe section for client.yml +SUBSCRIBE_BLOCK="" +for topic in $TOPICS; do + SUBSCRIBE_BLOCK+=" - topic: ${SERVER_URL}/${topic}"$'\n' + if [ -n "$ACCESS_TOKEN" ]; then + SUBSCRIBE_BLOCK+=" token: ${ACCESS_TOKEN}"$'\n' + fi +done + +# Write client.yml +cat > "$CONFIG_DIR/client.yml" << EOF +# ntfy client configuration +# Documentation: https://docs.ntfy.sh/subscribe/cli/ + +subscribe: +${SUBSCRIBE_BLOCK}EOF + +# Fix ownership if running as root +if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then + chown -R "$DESKTOP_USER:$DESKTOP_USER" "$CONFIG_DIR" +fi + +info "Config saved to $CONFIG_DIR/client.yml" + +# ── Step 5: Create systemd user service ──────────────────── + +cat > "$SYSTEMD_DIR/ntfy-subscribe.service" << EOF +[Unit] +Description=ntfy desktop notification subscriber +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +ExecStart=${NTFY_BIN} subscribe --from-config +Restart=on-failure +RestartSec=10 + +[Install] +WantedBy=default.target +EOF + +# Fix ownership if running as root +if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then + chown -R "$DESKTOP_USER:$DESKTOP_USER" "$SYSTEMD_DIR" +fi + +info "Systemd user service created" + +echo "" + +# ── Done ─────────────────────────────────────────────────── + +echo "===========================================" +echo " Setup Complete" +echo "===========================================" +echo "" +echo " To start receiving notifications, run as $DESKTOP_USER" +echo " from a graphical desktop session:" +echo "" +echo " systemctl --user daemon-reload" +echo " systemctl --user enable --now ntfy-subscribe" +echo "" +echo " Useful commands:" +echo "" +echo " Status: systemctl --user status ntfy-subscribe" +echo " Logs: journalctl --user -u ntfy-subscribe -f" +echo " Restart: systemctl --user restart ntfy-subscribe" +echo " Stop: systemctl --user stop ntfy-subscribe" +echo " Disable: systemctl --user disable --now ntfy-subscribe" +echo "" +echo " Test with:" +echo "" +echo " curl -d 'Hello from ntfy!' ${SERVER_URL}/${TOPICS%% *}" +echo "" diff --git a/ntfy-client-setup-windows.ps1 b/ntfy-client-setup-windows.ps1 new file mode 100644 index 0000000..564e92f --- /dev/null +++ b/ntfy-client-setup-windows.ps1 @@ -0,0 +1,236 @@ +############################################################# +#### ntfy Desktop Client Setup for Windows #### +#### Subscribe to ntfy push notifications with Windows #### +#### toast notifications #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 1.0 #### +#### #### +#### Usage: .\ntfy-client-setup-windows.ps1 #### +############################################################# + +$ErrorActionPreference = "Stop" + +# --- Configuration --- +$NtfyVersion = "2.8.0" +$InstallDir = "$env:LOCALAPPDATA\ntfy" +$ConfigDir = "$env:APPDATA\ntfy" + +# --- Interactive Prompts --- +Write-Host "" +Write-Host "=== ntfy Desktop Notifications Setup ===" -ForegroundColor Cyan +Write-Host "Installing for user: $env:USERNAME" +Write-Host "" + +# Server URL +$ServerUrl = Read-Host "Enter your ntfy server URL (e.g. https://ntfy.example.com)" +$ServerUrl = $ServerUrl.TrimEnd("/") +if ([string]::IsNullOrWhiteSpace($ServerUrl)) { + Write-Host "ERROR: Server URL is required." -ForegroundColor Red + exit 1 +} + +# Access token (optional — some servers allow anonymous access) +$Token = Read-Host "Enter your access token (leave blank if not required)" + +# Topics +$topicInput = Read-Host "Enter topics to subscribe to, comma-separated (e.g. alerts-critical,alerts-all)" +if ([string]::IsNullOrWhiteSpace($topicInput)) { + Write-Host "ERROR: At least one topic is required." -ForegroundColor Red + exit 1 +} +$Topics = $topicInput -split "," | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne "" } + +Write-Host "" +Write-Host "Server: $ServerUrl" -ForegroundColor White +Write-Host "Topics: $($Topics -join ', ')" -ForegroundColor White +Write-Host "Token: $(if ($Token) { '********' } else { '(none)' })" -ForegroundColor White +Write-Host "" + +# --- Create directories --- +New-Item -ItemType Directory -Force -Path $InstallDir | Out-Null +New-Item -ItemType Directory -Force -Path $ConfigDir | Out-Null + +# --- Download ntfy if not already installed --- +if (Test-Path "$InstallDir\ntfy.exe") { + Write-Host "ntfy already installed at: $InstallDir\ntfy.exe" -ForegroundColor Green +} else { + Write-Host "Downloading ntfy v$NtfyVersion..." + $downloadUrl = "https://github.com/binwiederhier/ntfy/releases/download/v$NtfyVersion/ntfy_${NtfyVersion}_windows_amd64.zip" + $zipPath = "$env:TEMP\ntfy.zip" + Invoke-WebRequest -Uri $downloadUrl -OutFile $zipPath + + Write-Host "Extracting..." + $extractPath = "$env:TEMP\ntfy_extract" + Remove-Item -Path $extractPath -Recurse -Force -ErrorAction SilentlyContinue + Expand-Archive -Path $zipPath -DestinationPath $extractPath -Force + Remove-Item $zipPath + + # Find the exe (may be in a subfolder) + $ntfyExe = Get-ChildItem -Path $extractPath -Recurse -Filter "ntfy.exe" | Select-Object -First 1 + if ($ntfyExe) { + Copy-Item -Path $ntfyExe.FullName -Destination "$InstallDir\ntfy.exe" -Force + } else { + Write-Host "ERROR: Could not find ntfy.exe in downloaded archive." -ForegroundColor Red + exit 1 + } + Remove-Item -Path $extractPath -Recurse -Force -ErrorAction SilentlyContinue + + Write-Host "Installed to: $InstallDir\ntfy.exe" -ForegroundColor Green +} +Write-Host "" + +# --- Create client.yml config --- +$clientYml = @" +default-host: $ServerUrl +"@ + +if ($Token) { + $clientYml += "`ndefault-token: $Token" +} + +$clientYmlPath = "$ConfigDir\client.yml" +$clientYml | Out-File -FilePath $clientYmlPath -Encoding UTF8 +Write-Host "Client config saved to: $clientYmlPath" -ForegroundColor Green + +# --- Build topic URLs --- +$topicUrls = @() +foreach ($topic in $Topics) { + $topicUrls += "$ServerUrl/$topic" +} +$topicUrlsString = $topicUrls -join " " + +# --- Create PowerShell notification script --- +# Build the token environment line only if a token was provided +$tokenLine = "" +if ($Token) { + $tokenLine = "`$env:NTFY_TOKEN = `"$Token`"" +} + +$psScriptContent = @" +Add-Type -AssemblyName System.Windows.Forms + +# Create a persistent notification icon in the system tray +`$global:notifyIcon = New-Object System.Windows.Forms.NotifyIcon +`$global:notifyIcon.Icon = [System.Drawing.SystemIcons]::Information +`$global:notifyIcon.Visible = `$true +`$global:notifyIcon.Text = "ntfy alerts" + +function Show-Notification { + param([string]`$Title, [string]`$Message, [int]`$Priority) + + # Map ntfy priority levels to Windows balloon icon types + # 1 (min), 2 (low) -> None + # 3 (default) -> Info + # 4 (high), 5 (max) -> Error + `$icon = [System.Windows.Forms.ToolTipIcon]::Info + if (`$Priority -ge 4) { `$icon = [System.Windows.Forms.ToolTipIcon]::Error } + elseif (`$Priority -le 2) { `$icon = [System.Windows.Forms.ToolTipIcon]::None } + + `$global:notifyIcon.BalloonTipIcon = `$icon + `$global:notifyIcon.BalloonTipTitle = `$Title + `$global:notifyIcon.BalloonTipText = `$Message + `$global:notifyIcon.ShowBalloonTip(30000) +} + +# Set access token if configured +$tokenLine +`$ntfyExe = "$InstallDir\ntfy.exe" + +# Subscribe and process JSON output line by line +& `$ntfyExe subscribe $topicUrlsString 2>&1 | ForEach-Object { + `$line = `$_ + if (`$line -match '"event":"message"') { + try { + `$json = `$line | ConvertFrom-Json + `$title = if (`$json.title) { `$json.title } else { `$json.topic } + `$message = `$json.message + `$priority = if (`$json.priority) { `$json.priority } else { 3 } + Show-Notification -Title `$title -Message `$message -Priority `$priority + } catch { } + } +} + +`$global:notifyIcon.Dispose() +"@ + +$psScriptPath = "$ConfigDir\run-subscribe.ps1" +$psScriptContent | Out-File -FilePath $psScriptPath -Encoding UTF8 +Write-Host "Notification script saved to: $psScriptPath" -ForegroundColor Green + +# --- Create VBS wrapper for hidden startup (no console window) --- +$vbsContent = @" +Set WshShell = CreateObject("WScript.Shell") +WshShell.Run "powershell -ExecutionPolicy Bypass -WindowStyle Hidden -File ""$psScriptPath""", 0 +Set WshShell = Nothing +"@ + +$vbsPath = "$ConfigDir\run-subscribe-hidden.vbs" +$vbsContent | Out-File -FilePath $vbsPath -Encoding ASCII +Write-Host "Hidden launcher saved to: $vbsPath" -ForegroundColor Green +Write-Host "" + +# --- Create startup shortcut --- +Write-Host "Creating startup shortcut..." +$startupPath = "$env:APPDATA\Microsoft\Windows\Start Menu\Programs\Startup" +$shortcutPath = "$startupPath\ntfy-subscribe.lnk" + +$shell = New-Object -ComObject WScript.Shell +$shortcut = $shell.CreateShortcut($shortcutPath) +$shortcut.TargetPath = "wscript.exe" +$shortcut.Arguments = "`"$vbsPath`"" +$shortcut.WorkingDirectory = $ConfigDir +$shortcut.WindowStyle = 7 # Minimized +$shortcut.Description = "ntfy notification subscriber" +$shortcut.Save() + +Write-Host "Startup shortcut created at: $shortcutPath" -ForegroundColor Green +Write-Host "" + +# --- Start the subscriber now --- +Write-Host "Starting ntfy subscriber..." + +# Stop any existing ntfy or subscriber processes +Stop-Process -Name ntfy -ErrorAction SilentlyContinue +Get-Process powershell -ErrorAction SilentlyContinue | Where-Object { $_.Id -ne $PID } | ForEach-Object { + try { + $cmdLine = (Get-CimInstance Win32_Process -Filter "ProcessId = $($_.Id)" -ErrorAction SilentlyContinue).CommandLine + if ($cmdLine -like "*run-subscribe*") { Stop-Process -Id $_.Id -Force -ErrorAction SilentlyContinue } + } catch {} +} +Start-Sleep -Seconds 1 + +$process = Start-Process -FilePath "powershell" ` + -ArgumentList @("-ExecutionPolicy", "Bypass", "-WindowStyle", "Hidden", "-File", $psScriptPath) ` + -WindowStyle Hidden ` + -PassThru + +Start-Sleep -Seconds 2 + +# --- Print status and management commands --- +if ($process -and !$process.HasExited) { + Write-Host "" + Write-Host "=== Setup Complete ===" -ForegroundColor Green + Write-Host "" + Write-Host "ntfy is running and will start automatically on login." -ForegroundColor Green + Write-Host "You should see Windows toast notifications when messages arrive." + Write-Host "" + Write-Host "Management commands (run in PowerShell):" -ForegroundColor Cyan + Write-Host " Check status: Get-Process ntfy -ErrorAction SilentlyContinue" + Write-Host " Stop: Stop-Process -Name ntfy" + Write-Host " Start manually: wscript.exe '$vbsPath'" + Write-Host " Edit config: notepad '$clientYmlPath'" + Write-Host " Edit topics: notepad '$psScriptPath'" + Write-Host "" +} else { + Write-Host "" + Write-Host "WARNING: ntfy may not have started correctly." -ForegroundColor Yellow + Write-Host "Try running manually: wscript.exe '$vbsPath'" + Write-Host "" +} + +Write-Host "To test, send a notification from another machine:" -ForegroundColor Cyan +Write-Host " curl -d 'Test message' $ServerUrl/$($Topics[0])" +Write-Host "" diff --git a/postfix-metrics.sh b/postfix-metrics.sh new file mode 100755 index 0000000..3d84029 --- /dev/null +++ b/postfix-metrics.sh @@ -0,0 +1,990 @@ +#!/bin/bash +################################################################################ +# Script Name: postfix-metrics.sh +# Description: Prometheus exporter for Postfix mail server metrics +# +# Usage: +# # Output to stdout +# ./postfix-metrics.sh +# +# # Textfile collector mode (atomic write) +# ./postfix-metrics.sh --textfile +# +# # Custom output file +# ./postfix-metrics.sh -o /path/to/metrics.prom +# +################################################################################ + +# ============================================================================ +# CONFIGURATION VARIABLES +# ============================================================================ + +TEXTFILE_DIR="/var/lib/node_exporter" +OUTPUT_FILE="" +HTTP_MODE=false +HTTP_PORT=9192 +QUEUE_DIR="/var/spool/postfix" +LOG_FILE="/var/log/mail.log" +HOSTNAME=$(hostname) + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +show_usage() { + cat <&2; exit 1 ;; + esac + done +} + +# Helper function to count grep matches (returns 0 if no match) +grep_count() { + local result + result=$(grep -c "$@" 2>/dev/null) || result=0 + echo "$result" +} + +# ============================================================================ +# METRIC GENERATION +# ============================================================================ + +generate_metrics() { + local START_TIME + START_TIME=$(date +%s.%N) + +# Queue sizes +echo "# HELP postfix_queue_size Number of messages in each Postfix queue" +echo "# TYPE postfix_queue_size gauge" +for queue in incoming active deferred hold corrupt; do + count=$(find "${QUEUE_DIR}/${queue}" -type f 2>/dev/null | wc -l) + echo "postfix_queue_size{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Oldest message in queue (seconds) +echo "# HELP postfix_queue_oldest_seconds Age of oldest message in queue" +echo "# TYPE postfix_queue_oldest_seconds gauge" +for queue in deferred hold; do + oldest=$(find "${QUEUE_DIR}/${queue}" -type f -printf '%T@\n' 2>/dev/null | sort -n | head -1) + if [[ -n "$oldest" ]]; then + age=$(echo "$(date +%s) - ${oldest%.*}" | bc) + else + age=0 + fi + echo "postfix_queue_oldest_seconds{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${age}" +done + +# Message counters by status +echo "# HELP postfix_messages_total Total messages by status" +echo "# TYPE postfix_messages_total counter" +for status in sent bounced deferred expired; do + count=$(grep_count "status=${status}" "$LOG_FILE") + echo "postfix_messages_total{status=\"${status}\",hostname=\"${HOSTNAME}\"} ${count}" +done +rejected=$(grep_count 'reject:' "$LOG_FILE") +echo "postfix_messages_total{status=\"rejected\",hostname=\"${HOSTNAME}\"} ${rejected}" + +# SMTP connections +echo "# HELP postfix_smtp_connections SMTP connection stats" +echo "# TYPE postfix_smtp_connections counter" +connections=$(grep_count 'connect from' "$LOG_FILE") +disconnections=$(grep_count 'disconnect from' "$LOG_FILE") +echo "postfix_smtp_connections{type=\"connect\",hostname=\"${HOSTNAME}\"} ${connections}" +echo "postfix_smtp_connections{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${disconnections}" + +# Connection timeouts +echo "# HELP postfix_timeout_total Connection timeout events" +echo "# TYPE postfix_timeout_total counter" +timeout_count=$(grep_count 'timeout after' "$LOG_FILE") +echo "postfix_timeout_total{hostname=\"${HOSTNAME}\"} ${timeout_count}" + +# SASL authentication +echo "# HELP postfix_sasl_auth_total SASL authentication attempts" +echo "# TYPE postfix_sasl_auth_total counter" +sasl_success=$(grep_count 'sasl_username=' "$LOG_FILE") +sasl_fail=$(grep_count 'authentication failed' "$LOG_FILE") +echo "postfix_sasl_auth_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${sasl_success}" +echo "postfix_sasl_auth_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${sasl_fail}" + +# Message sizes (bytes) +echo "# HELP postfix_message_size_bytes_total Total bytes of messages processed" +echo "# TYPE postfix_message_size_bytes_total counter" +total_bytes=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum+0}') +echo "postfix_message_size_bytes_total{hostname=\"${HOSTNAME}\"} ${total_bytes}" + +echo "# HELP postfix_message_size_bytes_avg Average message size" +echo "# TYPE postfix_message_size_bytes_avg gauge" +avg_size=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) print int(sum/count); else print 0}') +echo "postfix_message_size_bytes_avg{hostname=\"${HOSTNAME}\"} ${avg_size}" + +echo "# HELP postfix_message_size_bytes_max Largest message size" +echo "# TYPE postfix_message_size_bytes_max gauge" +max_size=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1) +echo "postfix_message_size_bytes_max{hostname=\"${HOSTNAME}\"} ${max_size:-0}" + +# Per-recipient domain stats (top domains) +echo "# HELP postfix_recipient_domain_total Messages per recipient domain" +echo "# TYPE postfix_recipient_domain_total counter" +grep -oP 'to=<[^@]+@\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count domain; do + echo "postfix_recipient_domain_total{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Sender domain stats +echo "# HELP postfix_sender_domain_total Messages per sender domain" +echo "# TYPE postfix_sender_domain_total counter" +grep -oP 'from=<[^@]+@\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count domain; do + echo "postfix_sender_domain_total{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Bounce reasons +echo "# HELP postfix_bounce_reason_total Bounces by reason" +echo "# TYPE postfix_bounce_reason_total counter" +bounce_user=$(grep_count 'User unknown' "$LOG_FILE") +bounce_quota=$(grep_count -i 'over quota\|mailbox full' "$LOG_FILE") +bounce_spam=$(grep_count -i 'blocked\|spam\|blacklist' "$LOG_FILE") +bounce_dns=$(grep_count 'Host or domain name not found' "$LOG_FILE") +bounce_refused=$(grep_count 'Connection refused' "$LOG_FILE") +echo "postfix_bounce_reason_total{reason=\"user_unknown\",hostname=\"${HOSTNAME}\"} ${bounce_user}" +echo "postfix_bounce_reason_total{reason=\"over_quota\",hostname=\"${HOSTNAME}\"} ${bounce_quota}" +echo "postfix_bounce_reason_total{reason=\"spam_blocked\",hostname=\"${HOSTNAME}\"} ${bounce_spam}" +echo "postfix_bounce_reason_total{reason=\"dns_error\",hostname=\"${HOSTNAME}\"} ${bounce_dns}" +echo "postfix_bounce_reason_total{reason=\"connection_refused\",hostname=\"${HOSTNAME}\"} ${bounce_refused}" + +# Relay stats +echo "# HELP postfix_relay_total Messages by relay" +echo "# TYPE postfix_relay_total counter" +grep -oP 'relay=\K[^,\[]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | while read -r count relay; do + echo "postfix_relay_total{relay=\"${relay}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Client connections (top IPs) +echo "# HELP postfix_client_connections_total Connections per client IP" +echo "# TYPE postfix_client_connections_total counter" +grep -oP 'connect from \S+\[\K[^\]]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | while read -r count ip; do + echo "postfix_client_connections_total{client_ip=\"${ip}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# TLS stats +echo "# HELP postfix_tls_connections_total TLS connection statistics" +echo "# TYPE postfix_tls_connections_total counter" +tls_in=$(grep_count 'Anonymous TLS connection established from' "$LOG_FILE") +tls_out=$(grep_count 'Anonymous TLS connection established to' "$LOG_FILE") +verified_in=$(grep_count 'Trusted TLS connection established from' "$LOG_FILE") +verified_out=$(grep_count 'Trusted TLS connection established to' "$LOG_FILE") +untrusted_in=$(grep_count 'Untrusted TLS connection established from' "$LOG_FILE") +untrusted_out=$(grep_count 'Untrusted TLS connection established to' "$LOG_FILE") +echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"anonymous\",hostname=\"${HOSTNAME}\"} ${tls_in}" +echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"anonymous\",hostname=\"${HOSTNAME}\"} ${tls_out}" +echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"trusted\",hostname=\"${HOSTNAME}\"} ${verified_in}" +echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"trusted\",hostname=\"${HOSTNAME}\"} ${verified_out}" +echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"untrusted\",hostname=\"${HOSTNAME}\"} ${untrusted_in}" +echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"untrusted\",hostname=\"${HOSTNAME}\"} ${untrusted_out}" + +# TLS protocol versions +echo "# HELP postfix_tls_protocol_total TLS protocol version usage" +echo "# TYPE postfix_tls_protocol_total counter" +for proto in TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; do + count=$(grep_count "${proto} with cipher" "$LOG_FILE") + echo "postfix_tls_protocol_total{protocol=\"${proto}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Delay stats (queue time) +echo "# HELP postfix_delay_seconds_total Total delay time in seconds" +echo "# TYPE postfix_delay_seconds_total counter" +total_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum+0}') +echo "postfix_delay_seconds_total{hostname=\"${HOSTNAME}\"} ${total_delay}" + +echo "# HELP postfix_delay_seconds_avg Average delivery delay" +echo "# TYPE postfix_delay_seconds_avg gauge" +avg_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}') +echo "postfix_delay_seconds_avg{hostname=\"${HOSTNAME}\"} ${avg_delay}" + +echo "# HELP postfix_delay_seconds_max Maximum delivery delay" +echo "# TYPE postfix_delay_seconds_max gauge" +max_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1) +echo "postfix_delay_seconds_max{hostname=\"${HOSTNAME}\"} ${max_delay:-0}" + +# Postfix process count +echo "# HELP postfix_processes Number of running postfix processes" +echo "# TYPE postfix_processes gauge" +proc_count=$(pgrep -c -f "postfix" 2>/dev/null) || proc_count=0 +echo "postfix_processes{hostname=\"${HOSTNAME}\"} ${proc_count}" + +# Mail loop detection +echo "# HELP postfix_mail_loop_total Detected mail loops" +echo "# TYPE postfix_mail_loop_total counter" +loops=$(grep_count 'mail forwarding loop' "$LOG_FILE") +echo "postfix_mail_loop_total{hostname=\"${HOSTNAME}\"} ${loops}" + +# Service status +echo "# HELP postfix_up Postfix service status (1=running, 0=stopped)" +echo "# TYPE postfix_up gauge" +if postfix status &>/dev/null || systemctl is-active postfix &>/dev/null; then + echo "postfix_up{hostname=\"${HOSTNAME}\"} 1" +else + echo "postfix_up{hostname=\"${HOSTNAME}\"} 0" +fi + +# Queue age distribution (messages by age bucket) +echo "# HELP postfix_queue_age_bucket Messages in deferred queue by age" +echo "# TYPE postfix_queue_age_bucket gauge" +now=$(date +%s) +for mins in 5 15 60 360 1440; do + count=$(find "${QUEUE_DIR}/deferred" -type f -mmin +${mins} 2>/dev/null | wc -l) + echo "postfix_queue_age_bucket{le=\"${mins}m\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Delivery attempts (retries) +echo "# HELP postfix_delivery_attempts_total Delivery attempts by result" +echo "# TYPE postfix_delivery_attempts_total counter" +first_attempt=$(grep_count 'delay=.*delays=0/' "$LOG_FILE") +retry_attempt=$(grep -c 'status=deferred.*will be retried' "$LOG_FILE" 2>/dev/null) || retry_attempt=0 +echo "postfix_delivery_attempts_total{type=\"first\",hostname=\"${HOSTNAME}\"} ${first_attempt}" +echo "postfix_delivery_attempts_total{type=\"retry\",hostname=\"${HOSTNAME}\"} ${retry_attempt}" + +# DSN status codes breakdown +echo "# HELP postfix_dsn_total Delivery Status Notification codes" +echo "# TYPE postfix_dsn_total counter" +for dsn in "2.0.0" "4.7.1" "5.1.1" "5.1.2" "5.2.1" "5.2.2" "5.4.1" "5.7.1"; do + count=$(grep_count "dsn=${dsn}" "$LOG_FILE") + echo "postfix_dsn_total{code=\"${dsn}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Delay breakdown by phase +echo "# HELP postfix_delay_phase_seconds_total Delay time by phase" +echo "# TYPE postfix_delay_phase_seconds_total counter" +grep -oP 'delays=\K[\d.]+/[\d.]+/[\d.]+/[\d.]+' "$LOG_FILE" 2>/dev/null | awk -F'/' '{ + before_qmgr+=$1; in_qmgr+=$2; conn_setup+=$3; transmission+=$4 +} END { + print "before_qmgr " before_qmgr+0 + print "in_qmgr " in_qmgr+0 + print "conn_setup " conn_setup+0 + print "transmission " transmission+0 +}' | while read -r phase total; do + echo "postfix_delay_phase_seconds_total{phase=\"${phase}\",hostname=\"${HOSTNAME}\"} ${total}" +done + +# RBL rejections (per blocklist) +echo "# HELP postfix_rbl_reject_total Rejections by RBL" +echo "# TYPE postfix_rbl_reject_total counter" +for rbl in "zen.spamhaus.org" "bl.spamcop.net" "b.barracudacentral.org" "dnsbl.sorbs.net"; do + count=$(grep_count "${rbl}" "$LOG_FILE") + echo "postfix_rbl_reject_total{rbl=\"${rbl}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Invalid HELO/EHLO attempts +echo "# HELP postfix_helo_invalid_total Invalid HELO/EHLO attempts" +echo "# TYPE postfix_helo_invalid_total counter" +helo_invalid=$(grep_count 'Helo command rejected' "$LOG_FILE") +echo "postfix_helo_invalid_total{hostname=\"${HOSTNAME}\"} ${helo_invalid}" + +# Anvil rate limiting +echo "# HELP postfix_rate_limited_total Anvil rate limit events" +echo "# TYPE postfix_rate_limited_total counter" +rate_conn=$(grep_count 'anvil.*connection rate' "$LOG_FILE") +rate_msg=$(grep_count 'anvil.*message rate' "$LOG_FILE") +rate_rcpt=$(grep_count 'anvil.*recipient rate' "$LOG_FILE") +echo "postfix_rate_limited_total{type=\"connection\",hostname=\"${HOSTNAME}\"} ${rate_conn}" +echo "postfix_rate_limited_total{type=\"message\",hostname=\"${HOSTNAME}\"} ${rate_msg}" +echo "postfix_rate_limited_total{type=\"recipient\",hostname=\"${HOSTNAME}\"} ${rate_rcpt}" + +# Milter/content filter rejections +echo "# HELP postfix_milter_reject_total Milter rejection events" +echo "# TYPE postfix_milter_reject_total counter" +milter_reject=$(grep_count 'milter-reject' "$LOG_FILE") +echo "postfix_milter_reject_total{hostname=\"${HOSTNAME}\"} ${milter_reject}" + +# Header/body checks rejections +echo "# HELP postfix_header_checks_reject_total Header/body check rejections" +echo "# TYPE postfix_header_checks_reject_total counter" +header_reject=$(grep_count 'header_checks:' "$LOG_FILE") +body_reject=$(grep_count 'body_checks:' "$LOG_FILE") +echo "postfix_header_checks_reject_total{type=\"header\",hostname=\"${HOSTNAME}\"} ${header_reject}" +echo "postfix_header_checks_reject_total{type=\"body\",hostname=\"${HOSTNAME}\"} ${body_reject}" + +# Policy daemon deferrals +echo "# HELP postfix_policyd_total Policy daemon events" +echo "# TYPE postfix_policyd_total counter" +policyd_defer=$(grep_count 'policy.*DEFER' "$LOG_FILE") +policyd_reject=$(grep_count 'policy.*REJECT' "$LOG_FILE") +echo "postfix_policyd_total{action=\"defer\",hostname=\"${HOSTNAME}\"} ${policyd_defer}" +echo "postfix_policyd_total{action=\"reject\",hostname=\"${HOSTNAME}\"} ${policyd_reject}" + +# DKIM signing (if OpenDKIM is used) +echo "# HELP postfix_dkim_total DKIM signing/verification results" +echo "# TYPE postfix_dkim_total counter" +dkim_signed=$(grep_count 'DKIM-Signature field added' "$LOG_FILE") +dkim_pass=$(grep_count 'dkim=pass' "$LOG_FILE") +dkim_fail=$(grep_count 'dkim=fail' "$LOG_FILE") +echo "postfix_dkim_total{action=\"signed\",hostname=\"${HOSTNAME}\"} ${dkim_signed}" +echo "postfix_dkim_total{result=\"pass\",hostname=\"${HOSTNAME}\"} ${dkim_pass}" +echo "postfix_dkim_total{result=\"fail\",hostname=\"${HOSTNAME}\"} ${dkim_fail}" + +# SPF results +echo "# HELP postfix_spf_total SPF check results" +echo "# TYPE postfix_spf_total counter" +for result in pass fail softfail neutral none permerror temperror; do + count=$(grep_count -i "spf=${result}\|SPF: ${result}" "$LOG_FILE") + echo "postfix_spf_total{result=\"${result}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# DMARC results (if OpenDMARC is used) +# OpenDMARC logs: "opendmarc[PID]: QUEUEID: domain.com pass/fail/none" +echo "# HELP postfix_dmarc_total DMARC check results" +echo "# TYPE postfix_dmarc_total counter" +for result in pass fail none; do + count=$(grep -cE "opendmarc\[.*\]: [A-F0-9]+: [^ ]+ ${result}$" "$LOG_FILE" 2>/dev/null) || count=0 + echo "postfix_dmarc_total{result=\"${result}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Hourly volume (traffic patterns) +echo "# HELP postfix_hourly_volume Messages processed per hour" +echo "# TYPE postfix_hourly_volume gauge" +current_date=$(date +%b" "%d) +for hour in $(seq -w 0 23); do + count=$(grep_count "^${current_date} ${hour}:" "$LOG_FILE" | grep -c 'status=sent' 2>/dev/null) || count=0 + count=$(grep "^${current_date} ${hour}:" "$LOG_FILE" 2>/dev/null | grep -c 'status=sent') || count=0 + echo "postfix_hourly_volume{hour=\"${hour}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Recent throughput (last 5/15/60 minutes) +echo "# HELP postfix_messages_recent Messages sent in recent time windows" +echo "# TYPE postfix_messages_recent gauge" +for mins in 5 15 60; do + since=$(date -d "${mins} minutes ago" '+%b %d %H:%M' 2>/dev/null) || since="" + if [[ -n "$since" ]]; then + count=$(awk -v since="$since" '$0 >= since && /status=sent/' "$LOG_FILE" 2>/dev/null | wc -l) + else + count=0 + fi + echo "postfix_messages_recent{window=\"${mins}m\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Active SMTP sessions estimate +echo "# HELP postfix_smtp_sessions_active Estimated active SMTP sessions" +echo "# TYPE postfix_smtp_sessions_active gauge" +smtp_procs=$(pgrep -c -x smtp 2>/dev/null) || smtp_procs=0 +smtpd_procs=$(pgrep -c -x smtpd 2>/dev/null) || smtpd_procs=0 +echo "postfix_smtp_sessions_active{type=\"outbound\",hostname=\"${HOSTNAME}\"} ${smtp_procs}" +echo "postfix_smtp_sessions_active{type=\"inbound\",hostname=\"${HOSTNAME}\"} ${smtpd_procs}" + +# Qmgr active recipients +echo "# HELP postfix_qmgr_recipients Active recipients in queue manager" +echo "# TYPE postfix_qmgr_recipients gauge" +active_recipients=$(find "${QUEUE_DIR}/active" -type f -exec cat {} \; 2>/dev/null | wc -l) || active_recipients=0 +echo "postfix_qmgr_recipients{hostname=\"${HOSTNAME}\"} ${active_recipients}" + +# Estimated queue memory usage (based on file sizes) +echo "# HELP postfix_queue_size_bytes Total size of queue files in bytes" +echo "# TYPE postfix_queue_size_bytes gauge" +for queue in incoming active deferred hold; do + size=$(du -sb "${QUEUE_DIR}/${queue}" 2>/dev/null | cut -f1) || size=0 + echo "postfix_queue_size_bytes{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${size}" +done + +# Warnings and fatal errors +echo "# HELP postfix_log_events_total Log events by severity" +echo "# TYPE postfix_log_events_total counter" +warnings=$(grep_count 'warning:' "$LOG_FILE") +fatals=$(grep_count 'fatal:' "$LOG_FILE") +panics=$(grep_count 'panic:' "$LOG_FILE") +echo "postfix_log_events_total{level=\"warning\",hostname=\"${HOSTNAME}\"} ${warnings}" +echo "postfix_log_events_total{level=\"fatal\",hostname=\"${HOSTNAME}\"} ${fatals}" +echo "postfix_log_events_total{level=\"panic\",hostname=\"${HOSTNAME}\"} ${panics}" + +# SMTP response codes +echo "# HELP postfix_smtp_response_total SMTP response codes" +echo "# TYPE postfix_smtp_response_total counter" +smtp_2xx=$(grep_count 'status=sent' "$LOG_FILE") +smtp_4xx=$(grep_count 'status=deferred' "$LOG_FILE") +smtp_5xx=$(grep_count 'status=bounced' "$LOG_FILE") +echo "postfix_smtp_response_total{code=\"2xx\",hostname=\"${HOSTNAME}\"} ${smtp_2xx}" +echo "postfix_smtp_response_total{code=\"4xx\",hostname=\"${HOSTNAME}\"} ${smtp_4xx}" +echo "postfix_smtp_response_total{code=\"5xx\",hostname=\"${HOSTNAME}\"} ${smtp_5xx}" + +# Specific SMTP error codes (check multiple patterns) +# Postfix logs SMTP errors in various formats: +# - "said: 550 5.1.1 User unknown" +# - "status=bounced (host ... said: 550 ...)" +# - "dsn=5.1.1" (DSN codes start with same digit) +# - Remote server responses with just the code +echo "# HELP postfix_smtp_error_code_total Specific SMTP error codes" +echo "# TYPE postfix_smtp_error_code_total counter" +for code in 421 450 451 452 500 501 502 503 504 550 551 552 553 554; do + # Multiple patterns: "said: 550", "(550 ", "smtp.*550", host responses + count=$(grep -cE "(said: ${code}|said:${code}|\(${code} |host .*\[.*\].*${code} |smtp.*${code}[^0-9])" "$LOG_FILE" 2>/dev/null) || count=0 + echo "postfix_smtp_error_code_total{code=\"${code}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# TLS cipher suites (top 10) +# Requires smtpd_tls_loglevel=1 and smtp_tls_loglevel=1 in main.cf +# Postfix logs: "TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)" +echo "# HELP postfix_tls_cipher_total TLS cipher suite usage" +echo "# TYPE postfix_tls_cipher_total counter" +cipher_output=$({ + grep -oP 'with cipher \K[A-Za-z0-9_-]+' "$LOG_FILE" 2>/dev/null + grep -oP 'cipher=\K[A-Za-z0-9_-]+' "$LOG_FILE" 2>/dev/null +} | sort | uniq -c | sort -rn | head -10) +if [[ -n "$cipher_output" ]]; then + echo "$cipher_output" | while read -r count cipher; do + [[ -n "$cipher" ]] && echo "postfix_tls_cipher_total{cipher=\"${cipher}\",hostname=\"${HOSTNAME}\"} ${count}" + done +else + echo "postfix_tls_cipher_total{cipher=\"unknown\",hostname=\"${HOSTNAME}\"} 0" +fi + +# TLS certificate expiry (check multiple locations) +echo "# HELP postfix_cert_expiry_seconds Seconds until TLS certificate expires" +echo "# TYPE postfix_cert_expiry_seconds gauge" +CERT_FILE="" +for cert in "/etc/ssl/certs/postfix.pem" \ + "/home/user-data/ssl/ssl_certificate.pem" \ + "/etc/letsencrypt/live/$(hostname)/fullchain.pem" \ + "/etc/letsencrypt/live/$(hostname -f)/fullchain.pem" \ + "/etc/ssl/certs/ssl-cert-snakeoil.pem"; do + if [[ -f "$cert" ]]; then + CERT_FILE="$cert" + break + fi +done +cert_seconds=0 +if [[ -n "$CERT_FILE" ]] && command -v openssl &>/dev/null; then + expiry=$(openssl x509 -enddate -noout -in "$CERT_FILE" 2>/dev/null | cut -d= -f2) + if [[ -n "$expiry" ]]; then + expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null) || expiry_epoch=0 + now=$(date +%s) + cert_seconds=$((expiry_epoch - now)) + fi +fi +echo "postfix_cert_expiry_seconds{hostname=\"${HOSTNAME}\"} ${cert_seconds}" + +# LMTP delivery metrics (Postfix side) +# Matches: "postfix/lmtp[PID]: ... status=sent" +echo "# HELP postfix_lmtp_delivery_total LMTP delivery stats" +echo "# TYPE postfix_lmtp_delivery_total counter" +lmtp_sent=$(grep_count 'postfix/lmtp\[.*status=sent' "$LOG_FILE") +lmtp_deferred=$(grep_count 'postfix/lmtp\[.*status=deferred' "$LOG_FILE") +lmtp_bounced=$(grep_count 'postfix/lmtp\[.*status=bounced' "$LOG_FILE") +echo "postfix_lmtp_delivery_total{status=\"sent\",hostname=\"${HOSTNAME}\"} ${lmtp_sent}" +echo "postfix_lmtp_delivery_total{status=\"deferred\",hostname=\"${HOSTNAME}\"} ${lmtp_deferred}" +echo "postfix_lmtp_delivery_total{status=\"bounced\",hostname=\"${HOSTNAME}\"} ${lmtp_bounced}" + +echo "# HELP postfix_lmtp_connections_total LMTP connection events" +echo "# TYPE postfix_lmtp_connections_total counter" +lmtp_connect=$(grep_count 'postfix/lmtp\[.*connect' "$LOG_FILE") +lmtp_disconnect=$(grep_count 'postfix/lmtp\[.*disconnect' "$LOG_FILE") +lmtp_timeout=$(grep_count 'postfix/lmtp\[.*timeout' "$LOG_FILE") +lmtp_refused=$(grep_count 'postfix/lmtp\[.*Connection refused' "$LOG_FILE") +echo "postfix_lmtp_connections_total{type=\"connect\",hostname=\"${HOSTNAME}\"} ${lmtp_connect}" +echo "postfix_lmtp_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${lmtp_disconnect}" +echo "postfix_lmtp_connections_total{type=\"timeout\",hostname=\"${HOSTNAME}\"} ${lmtp_timeout}" +echo "postfix_lmtp_connections_total{type=\"refused\",hostname=\"${HOSTNAME}\"} ${lmtp_refused}" + +echo "# HELP postfix_lmtp_delay_seconds LMTP delivery delay stats" +echo "# TYPE postfix_lmtp_delay_seconds gauge" +lmtp_avg_delay=$(grep 'postfix/lmtp\[' "$LOG_FILE" 2>/dev/null | grep -oP 'delay=\K[\d.]+' | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}') +lmtp_max_delay=$(grep 'postfix/lmtp\[' "$LOG_FILE" 2>/dev/null | grep -oP 'delay=\K[\d.]+' | sort -rn | head -1) +echo "postfix_lmtp_delay_seconds{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${lmtp_avg_delay}" +echo "postfix_lmtp_delay_seconds{stat=\"max\",hostname=\"${HOSTNAME}\"} ${lmtp_max_delay:-0}" + +# Dovecot LMTP/LDA delivery stats (check multiple log locations) +DOVECOT_LOG="" +for log in "/var/log/dovecot.log" "/var/log/mail.log" "/var/log/syslog"; do + if [[ -f "$log" ]] && grep -q 'dovecot' "$log" 2>/dev/null; then + DOVECOT_LOG="$log" + break + fi +done +if [[ -n "$DOVECOT_LOG" ]]; then + echo "# HELP postfix_dovecot_delivery_total Dovecot local delivery stats" + echo "# TYPE postfix_dovecot_delivery_total counter" + lmtp_delivered=$(grep_count 'lmtp.*saved mail' "$DOVECOT_LOG") + lda_delivered=$(grep_count 'lda.*saved mail' "$DOVECOT_LOG") + echo "postfix_dovecot_delivery_total{type=\"lmtp\",hostname=\"${HOSTNAME}\"} ${lmtp_delivered}" + echo "postfix_dovecot_delivery_total{type=\"lda\",hostname=\"${HOSTNAME}\"} ${lda_delivered}" + + echo "# HELP postfix_dovecot_sieve_total Dovecot sieve filter actions" + echo "# TYPE postfix_dovecot_sieve_total counter" + sieve_fileinto=$(grep_count 'sieve.*fileinto' "$DOVECOT_LOG") + sieve_discard=$(grep_count 'sieve.*discard' "$DOVECOT_LOG") + sieve_redirect=$(grep_count 'sieve.*redirect' "$DOVECOT_LOG") + echo "postfix_dovecot_sieve_total{action=\"fileinto\",hostname=\"${HOSTNAME}\"} ${sieve_fileinto}" + echo "postfix_dovecot_sieve_total{action=\"discard\",hostname=\"${HOSTNAME}\"} ${sieve_discard}" + echo "postfix_dovecot_sieve_total{action=\"redirect\",hostname=\"${HOSTNAME}\"} ${sieve_redirect}" + + echo "# HELP postfix_dovecot_auth_total Dovecot authentication attempts" + echo "# TYPE postfix_dovecot_auth_total counter" + auth_success=$(grep_count 'auth.*successful' "$DOVECOT_LOG") + auth_fail=$(grep_count 'auth.*failed' "$DOVECOT_LOG") + echo "postfix_dovecot_auth_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${auth_success}" + echo "postfix_dovecot_auth_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${auth_fail}" + + echo "# HELP postfix_dovecot_imap_connections_total Dovecot IMAP connections" + echo "# TYPE postfix_dovecot_imap_connections_total counter" + imap_login=$(grep_count 'imap-login:.*Login' "$DOVECOT_LOG") + imap_disconnect=$(grep_count 'imap.*Disconnected' "$DOVECOT_LOG") + echo "postfix_dovecot_imap_connections_total{type=\"login\",hostname=\"${HOSTNAME}\"} ${imap_login}" + echo "postfix_dovecot_imap_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${imap_disconnect}" + + echo "# HELP postfix_dovecot_pop3_connections_total Dovecot POP3 connections" + echo "# TYPE postfix_dovecot_pop3_connections_total counter" + pop3_login=$(grep_count 'pop3-login:.*Login' "$DOVECOT_LOG") + pop3_disconnect=$(grep_count 'pop3.*Disconnected' "$DOVECOT_LOG") + echo "postfix_dovecot_pop3_connections_total{type=\"login\",hostname=\"${HOSTNAME}\"} ${pop3_login}" + echo "postfix_dovecot_pop3_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${pop3_disconnect}" +fi + +# SpamAssassin metrics (supports spamd, spampd, and amavis) +SPAM_LOG="/var/log/mail.log" + +# Detect which spam daemon is in use (check spampd first as it's more specific) +if grep -q 'spampd' "$SPAM_LOG" 2>/dev/null; then + SPAM_DAEMON="spampd" +elif grep -q 'spamd\[' "$SPAM_LOG" 2>/dev/null; then + SPAM_DAEMON="spamd" +elif grep -q 'amavis' "$SPAM_LOG" 2>/dev/null; then + SPAM_DAEMON="amavis" +else + SPAM_DAEMON="" +fi + +if [[ -n "$SPAM_DAEMON" ]]; then + echo "# HELP postfix_spamassassin_total SpamAssassin scan results" + echo "# TYPE postfix_spamassassin_total counter" + + if [[ "$SPAM_DAEMON" == "spampd" ]]; then + # spampd format: "clean message <...> (SCORE/THRESHOLD)" or "identified spam <...> (SCORE/THRESHOLD)" + spam_identified=$(grep_count 'spampd.*identified spam' "$SPAM_LOG") + ham_clean=$(grep_count 'spampd.*clean message' "$SPAM_LOG") + elif [[ "$SPAM_DAEMON" == "amavis" ]]; then + spam_identified=$(grep_count 'amavis.*Blocked SPAM' "$SPAM_LOG") + ham_clean=$(grep_count 'amavis.*Passed CLEAN' "$SPAM_LOG") + else + spam_identified=$(grep_count 'spamd.*identified spam' "$SPAM_LOG") + ham_clean=$(grep_count 'spamd.*clean message' "$SPAM_LOG") + fi + echo "postfix_spamassassin_total{result=\"spam\",hostname=\"${HOSTNAME}\"} ${spam_identified}" + echo "postfix_spamassassin_total{result=\"ham\",hostname=\"${HOSTNAME}\"} ${ham_clean}" + + echo "# HELP postfix_spamassassin_score_total SpamAssassin score distribution" + echo "# TYPE postfix_spamassassin_score_total counter" + + if [[ "$SPAM_DAEMON" == "spampd" ]]; then + # spampd format: (SCORE/THRESHOLD) like (-0.30/5.00) or (15.2/5.0) + score_neg=$(grep -oP 'spampd.*\(\K-[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | wc -l) + score_0_5=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}') + score_5_10=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}') + score_10_plus=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}') + elif [[ "$SPAM_DAEMON" == "amavis" ]]; then + score_neg=$(grep -oP 'amavis.*Hits: \K-[\d.]+' "$SPAM_LOG" 2>/dev/null | wc -l) + score_0_5=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}') + score_5_10=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}') + score_10_plus=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}') + else + score_neg=0 + score_0_5=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}') + score_5_10=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}') + score_10_plus=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}') + fi + echo "postfix_spamassassin_score_total{bucket=\"negative\",hostname=\"${HOSTNAME}\"} ${score_neg:-0}" + echo "postfix_spamassassin_score_total{bucket=\"0-5\",hostname=\"${HOSTNAME}\"} ${score_0_5}" + echo "postfix_spamassassin_score_total{bucket=\"5-10\",hostname=\"${HOSTNAME}\"} ${score_5_10}" + echo "postfix_spamassassin_score_total{bucket=\"10+\",hostname=\"${HOSTNAME}\"} ${score_10_plus}" + + echo "# HELP postfix_spamassassin_score_avg Average SpamAssassin score" + echo "# TYPE postfix_spamassassin_score_avg gauge" + if [[ "$SPAM_DAEMON" == "spampd" ]]; then + avg_score=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}') + elif [[ "$SPAM_DAEMON" == "amavis" ]]; then + avg_score=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}') + else + avg_score=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}') + fi + echo "postfix_spamassassin_score_avg{hostname=\"${HOSTNAME}\"} ${avg_score}" + + echo "# HELP postfix_spamassassin_score_max Maximum SpamAssassin score seen" + echo "# TYPE postfix_spamassassin_score_max gauge" + if [[ "$SPAM_DAEMON" == "spampd" ]]; then + max_score=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1) + elif [[ "$SPAM_DAEMON" == "amavis" ]]; then + max_score=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1) + else + max_score=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1) + fi + echo "postfix_spamassassin_score_max{hostname=\"${HOSTNAME}\"} ${max_score:-0}" + + # Messages scanned total + echo "# HELP postfix_spamassassin_scanned_total Total messages scanned" + echo "# TYPE postfix_spamassassin_scanned_total counter" + scanned_total=$((spam_identified + ham_clean)) + echo "postfix_spamassassin_scanned_total{hostname=\"${HOSTNAME}\"} ${scanned_total}" + + echo "# HELP postfix_spamassassin_scan_time_seconds SpamAssassin scan time stats" + echo "# TYPE postfix_spamassassin_scan_time_seconds gauge" + if [[ "$SPAM_DAEMON" == "spampd" ]]; then + # spampd format: "in 2.15s" + avg_time=$(grep -oP 'spampd.* in \K[\d.]+(?=s)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}') + max_time=$(grep -oP 'spampd.* in \K[\d.]+(?=s)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1) + else + avg_time=$(grep -oP "${SPAM_DAEMON}.* in \K[\d.]+(?= seconds)" "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}') + max_time=$(grep -oP "${SPAM_DAEMON}.* in \K[\d.]+(?= seconds)" "$SPAM_LOG" 2>/dev/null | sort -rn | head -1) + fi + echo "postfix_spamassassin_scan_time_seconds{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_time:-0}" + echo "postfix_spamassassin_scan_time_seconds{stat=\"max\",hostname=\"${HOSTNAME}\"} ${max_time:-0}" + + # spampd-specific: message size stats + if [[ "$SPAM_DAEMON" == "spampd" ]]; then + echo "# HELP postfix_spamassassin_message_size_bytes SpamAssassin processed message sizes" + echo "# TYPE postfix_spamassassin_message_size_bytes gauge" + avg_size=$(grep -oP 'spampd.*, \K\d+(?= bytes)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.0f", sum/count; else print 0}') + max_size=$(grep -oP 'spampd.*, \K\d+(?= bytes)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1) + echo "postfix_spamassassin_message_size_bytes{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_size:-0}" + echo "postfix_spamassassin_message_size_bytes{stat=\"max\",hostname=\"${HOSTNAME}\"} ${max_size:-0}" + + echo "# HELP postfix_spamassassin_threshold SpamAssassin spam threshold" + echo "# TYPE postfix_spamassassin_threshold gauge" + threshold=$(grep -oP 'spampd.*/-?\K[\d.]+(?=\))' "$SPAM_LOG" 2>/dev/null | head -1) + echo "postfix_spamassassin_threshold{hostname=\"${HOSTNAME}\"} ${threshold:-5}" + fi + + # SpamAssassin rules (only available with spamd or if logging to separate file) + # NOTE: spampd (used by Mail-in-a-Box) does NOT log individual rules to mail.log + # Rules are only available if using standalone spamd with verbose logging or a separate log file + SA_RULES_LOG="" + for log in "/var/log/spamassassin.log" "/var/log/spamd.log" "$SPAM_LOG"; do + if [[ -f "$log" ]] && grep -q 'tests=' "$log" 2>/dev/null; then + SA_RULES_LOG="$log" + break + fi + done + if [[ -n "$SA_RULES_LOG" ]]; then + echo "# HELP postfix_spamassassin_rules_total Top SpamAssassin rules triggered" + echo "# TYPE postfix_spamassassin_rules_total counter" + grep -oP 'tests=\K[^,\]\s]+' "$SA_RULES_LOG" 2>/dev/null | tr ',' '\n' | tr -d ' ' | sort | uniq -c | sort -rn | head -15 | while read -r count rule; do + [[ -n "$rule" ]] && echo "postfix_spamassassin_rules_total{rule=\"${rule}\",hostname=\"${HOSTNAME}\"} ${count}" + done + fi + + # Daemon status + echo "# HELP postfix_spamassassin_up SpamAssassin daemon status" + echo "# TYPE postfix_spamassassin_up gauge" + if pgrep -f "${SPAM_DAEMON}" &>/dev/null; then + echo "postfix_spamassassin_up{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} 1" + else + echo "postfix_spamassassin_up{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} 0" + fi + + echo "# HELP postfix_spamassassin_processes Number of spam daemon processes" + echo "# TYPE postfix_spamassassin_processes gauge" + spam_procs=$(pgrep -c -f "${SPAM_DAEMON}" 2>/dev/null) || spam_procs=0 + echo "postfix_spamassassin_processes{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} ${spam_procs}" +fi + +# Greylisting stats (postgrey) +echo "# HELP postfix_greylist_total Greylisting events" +echo "# TYPE postfix_greylist_total counter" +greylist_defer=$(grep_count 'action=greylist' "$LOG_FILE") +greylist_pass=$(grep_count 'action=pass.*reason=triplet' "$LOG_FILE") +greylist_whitelist=$(grep_count 'action=pass.*reason=client whitelist\|action=pass, reason=client AWL' "$LOG_FILE") +echo "postfix_greylist_total{action=\"defer\",hostname=\"${HOSTNAME}\"} ${greylist_defer}" +echo "postfix_greylist_total{action=\"pass\",hostname=\"${HOSTNAME}\"} ${greylist_pass}" +echo "postfix_greylist_total{action=\"whitelist\",hostname=\"${HOSTNAME}\"} ${greylist_whitelist}" + +echo "# HELP postfix_greylist_reason_total Greylisting by reason" +echo "# TYPE postfix_greylist_reason_total counter" +grey_new=$(grep_count 'reason=new' "$LOG_FILE") +grey_early=$(grep_count 'reason=early-retry' "$LOG_FILE") +grey_triplet=$(grep_count 'reason=triplet found' "$LOG_FILE") +echo "postfix_greylist_reason_total{reason=\"new\",hostname=\"${HOSTNAME}\"} ${grey_new}" +echo "postfix_greylist_reason_total{reason=\"early_retry\",hostname=\"${HOSTNAME}\"} ${grey_early}" +echo "postfix_greylist_reason_total{reason=\"triplet_found\",hostname=\"${HOSTNAME}\"} ${grey_triplet}" + +echo "# HELP postfix_greylist_delay_seconds Greylist delay statistics" +echo "# TYPE postfix_greylist_delay_seconds gauge" +avg_delay=$(grep -oP 'delay=\K\d+' "$LOG_FILE" 2>/dev/null | grep -v '^0$' | awk '{sum+=$1; count++} END {if(count>0) printf "%.0f", sum/count; else print 0}') +max_delay=$(grep -oP 'postgrey.*delay=\K\d+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1) +echo "postfix_greylist_delay_seconds{type=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_delay:-0}" +echo "postfix_greylist_delay_seconds{type=\"max\",hostname=\"${HOSTNAME}\"} ${max_delay:-0}" + +echo "# HELP postfix_greylist_clients_total Unique greylisted client IPs" +echo "# TYPE postfix_greylist_clients_total gauge" +grey_clients=$(grep 'action=greylist' "$LOG_FILE" 2>/dev/null | grep -oP 'client_address=\K[^,]+' | sort -u | wc -l) +echo "postfix_greylist_clients_total{hostname=\"${HOSTNAME}\"} ${grey_clients:-0}" + +echo "# HELP postfix_greylist_top_senders Top greylisted sender domains" +echo "# TYPE postfix_greylist_top_senders counter" +grep 'action=greylist' "$LOG_FILE" 2>/dev/null | grep -oP 'sender=\K[^,]+' | sed 's/.*@//' | sort | uniq -c | sort -rn | head -10 | while read -r count domain; do + [[ -n "$domain" ]] && echo "postfix_greylist_top_senders{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +# Cleanup daemon stats (total messages entering system) +echo "# HELP postfix_cleanup_total Messages processed by cleanup daemon" +echo "# TYPE postfix_cleanup_total counter" +cleanup_count=$(grep_count 'message-id=' "$LOG_FILE") +echo "postfix_cleanup_total{hostname=\"${HOSTNAME}\"} ${cleanup_count}" + +# Virtual mailbox errors +echo "# HELP postfix_virtual_errors_total Virtual mailbox lookup errors" +echo "# TYPE postfix_virtual_errors_total counter" +virtual_not_found=$(grep_count 'mailbox not found\|User unknown in virtual' "$LOG_FILE") +echo "postfix_virtual_errors_total{hostname=\"${HOSTNAME}\"} ${virtual_not_found}" + +# Address verification failures +echo "# HELP postfix_address_verify_total Address verification events" +echo "# TYPE postfix_address_verify_total counter" +verify_fail=$(grep_count 'address verification failed' "$LOG_FILE") +verify_success=$(grep_count 'address verification succeeded\|cache hit' "$LOG_FILE") +echo "postfix_address_verify_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${verify_fail}" +echo "postfix_address_verify_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${verify_success}" + +# Postfix master process uptime (based on pid file age) +echo "# HELP postfix_master_uptime_seconds Postfix master process uptime" +echo "# TYPE postfix_master_uptime_seconds gauge" +MASTER_PID_FILE="/var/spool/postfix/pid/master.pid" +if [[ -f "$MASTER_PID_FILE" ]]; then + master_start=$(stat -c %Y "$MASTER_PID_FILE" 2>/dev/null) || master_start=0 + if [[ $master_start -gt 0 ]]; then + uptime_seconds=$(($(date +%s) - master_start)) + else + uptime_seconds=0 + fi +else + uptime_seconds=0 +fi +echo "postfix_master_uptime_seconds{hostname=\"${HOSTNAME}\"} ${uptime_seconds}" + +# DNS lookup failures +echo "# HELP postfix_dns_errors_total DNS lookup errors" +echo "# TYPE postfix_dns_errors_total counter" +dns_not_found=$(grep_count 'Host not found\|Name service error\|Host or domain name not found' "$LOG_FILE") +dns_timeout=$(grep_count 'DNS lookup.*timeout\|name server.*timeout' "$LOG_FILE") +dns_servfail=$(grep_count 'SERVFAIL\|server failure' "$LOG_FILE") +echo "postfix_dns_errors_total{type=\"not_found\",hostname=\"${HOSTNAME}\"} ${dns_not_found}" +echo "postfix_dns_errors_total{type=\"timeout\",hostname=\"${HOSTNAME}\"} ${dns_timeout}" +echo "postfix_dns_errors_total{type=\"servfail\",hostname=\"${HOSTNAME}\"} ${dns_servfail}" + +# STARTTLS usage - count TLS connections vs total SMTP connections +# "used" = successful TLS connections (inbound + outbound) +# "total" = total SMTP connections for ratio calculation +echo "# HELP postfix_starttls_total STARTTLS connection counts" +echo "# TYPE postfix_starttls_total counter" +starttls_inbound=$(grep_count 'TLS connection established from' "$LOG_FILE") +starttls_outbound=$(grep_count 'TLS connection established to' "$LOG_FILE") +echo "postfix_starttls_total{type=\"inbound\",hostname=\"${HOSTNAME}\"} ${starttls_inbound}" +echo "postfix_starttls_total{type=\"outbound\",hostname=\"${HOSTNAME}\"} ${starttls_outbound}" + +# Sender/recipient access rejections +echo "# HELP postfix_access_reject_total Sender/recipient access rejections" +echo "# TYPE postfix_access_reject_total counter" +sender_reject=$(grep_count 'Sender address rejected' "$LOG_FILE") +recipient_reject=$(grep_count 'Recipient address rejected' "$LOG_FILE") +client_reject=$(grep_count 'Client host rejected' "$LOG_FILE") +echo "postfix_access_reject_total{type=\"sender\",hostname=\"${HOSTNAME}\"} ${sender_reject}" +echo "postfix_access_reject_total{type=\"recipient\",hostname=\"${HOSTNAME}\"} ${recipient_reject}" +echo "postfix_access_reject_total{type=\"client\",hostname=\"${HOSTNAME}\"} ${client_reject}" + +# Queue filesystem usage +echo "# HELP postfix_queue_filesystem_usage_percent Queue filesystem usage percentage" +echo "# TYPE postfix_queue_filesystem_usage_percent gauge" +queue_usage=$(df "${QUEUE_DIR}" 2>/dev/null | awk 'NR==2 {gsub(/%/,""); print $5}') || queue_usage=0 +echo "postfix_queue_filesystem_usage_percent{hostname=\"${HOSTNAME}\"} ${queue_usage:-0}" + +# Postfix file descriptor count (for master process) +echo "# HELP postfix_file_descriptors Open file descriptors by postfix" +echo "# TYPE postfix_file_descriptors gauge" +if [[ -f "$MASTER_PID_FILE" ]]; then + master_pid=$(tr -d '[:space:]' < "$MASTER_PID_FILE" 2>/dev/null) + if [[ -n "$master_pid" ]] && [[ -d "/proc/${master_pid}/fd" ]]; then + fd_count=$(find "/proc/${master_pid}/fd" -maxdepth 1 2>/dev/null | wc -l) + else + fd_count=0 + fi +else + fd_count=0 +fi +echo "postfix_file_descriptors{hostname=\"${HOSTNAME}\"} ${fd_count}" + +# Script execution time +# Dovecot IMAP/POP3 login metrics +echo "# HELP dovecot_logins_total Successful logins by protocol" +echo "# TYPE dovecot_logins_total counter" +imap_logins=$(grep_count 'imap-login: Info: Login:' "$LOG_FILE") +pop3_logins=$(grep_count 'pop3-login: Info: Login:' "$LOG_FILE") +echo "dovecot_logins_total{protocol=\"imap\",hostname=\"${HOSTNAME}\"} ${imap_logins}" +echo "dovecot_logins_total{protocol=\"pop3\",hostname=\"${HOSTNAME}\"} ${pop3_logins}" + +echo "# HELP dovecot_login_auth_method_total Logins by authentication method" +echo "# TYPE dovecot_login_auth_method_total counter" +for method in PLAIN LOGIN CRAM-MD5 DIGEST-MD5; do + count=$(grep_count "Login:.*method=${method}" "$LOG_FILE") + echo "dovecot_login_auth_method_total{method=\"${method}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +echo "# HELP dovecot_login_tls_total Logins with/without TLS" +echo "# TYPE dovecot_login_tls_total counter" +tls_logins=$(grep -c 'Login:.*TLS' "$LOG_FILE" 2>/dev/null) || tls_logins=0 +notls_logins=$(grep 'Login:' "$LOG_FILE" 2>/dev/null | grep -cv 'TLS') || notls_logins=0 +echo "dovecot_login_tls_total{tls=\"yes\",hostname=\"${HOSTNAME}\"} ${tls_logins}" +echo "dovecot_login_tls_total{tls=\"no\",hostname=\"${HOSTNAME}\"} ${notls_logins}" + +echo "# HELP dovecot_login_failed_total Failed login attempts" +echo "# TYPE dovecot_login_failed_total counter" +imap_failed=$(grep_count 'imap-login: Info: Aborted login\|imap-login:.*auth failed' "$LOG_FILE") +pop3_failed=$(grep_count 'pop3-login: Info: Aborted login\|pop3-login:.*auth failed' "$LOG_FILE") +echo "dovecot_login_failed_total{protocol=\"imap\",hostname=\"${HOSTNAME}\"} ${imap_failed}" +echo "dovecot_login_failed_total{protocol=\"pop3\",hostname=\"${HOSTNAME}\"} ${pop3_failed}" + +echo "# HELP dovecot_login_user_total Logins per user (top 20)" +echo "# TYPE dovecot_login_user_total counter" +grep -oP 'Login: user=<\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count user; do + echo "dovecot_login_user_total{user=\"${user}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +echo "# HELP dovecot_login_client_ip_total Logins per client IP (top 20)" +echo "# TYPE dovecot_login_client_ip_total counter" +grep -oP 'Login:.*rip=\K[^,]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count ip; do + echo "dovecot_login_client_ip_total{client_ip=\"${ip}\",hostname=\"${HOSTNAME}\"} ${count}" +done + +local END_TIME +END_TIME=$(date +%s.%N) +local DURATION +DURATION=$(echo "$END_TIME - $START_TIME" | bc) +echo "# HELP postfix_collector_duration_seconds Time taken to collect metrics" +echo "# TYPE postfix_collector_duration_seconds gauge" +echo "postfix_collector_duration_seconds{hostname=\"${HOSTNAME}\"} ${DURATION}" + +echo "# HELP postfix_collector_last_run_timestamp Unix timestamp of last collection" +echo "# TYPE postfix_collector_last_run_timestamp gauge" +echo "postfix_collector_last_run_timestamp{hostname=\"${HOSTNAME}\"} $(date +%s)" +} + +# ============================================================================ +# HTTP SERVER MODE +# ============================================================================ + +run_http_server() { + echo "Starting Postfix metrics exporter on port $HTTP_PORT..." >&2 + + if ! command -v nc >/dev/null 2>&1; then + echo "ERROR: netcat (nc) required for HTTP mode" >&2 + exit 1 + fi + + while true; do + { + read -r request + if [[ "$request" =~ ^GET\ /metrics ]]; then + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r" + generate_metrics + else + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r" + cat < + +Postfix Metrics Exporter + +

Postfix Prometheus Exporter

+

Metrics

+

Available Metrics

+
    +
  • Queue sizes and ages
  • +
  • Message counts by status
  • +
  • TLS connection stats
  • +
  • SASL authentication
  • +
  • Bounce reasons
  • +
  • SpamAssassin scores
  • +
  • Dovecot delivery stats
  • +
+ + +EOF + fi + } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null + done +} + +# ============================================================================ +# MAIN EXECUTION +# ============================================================================ + +main() { + parse_args "$@" + + if [ "$HTTP_MODE" = true ]; then + run_http_server + elif [ -n "$OUTPUT_FILE" ]; then + # Textfile collector mode: write atomically using temp file + local output_dir + output_dir="$(dirname "$OUTPUT_FILE")" + mkdir -p "$output_dir" + + # Create temp file in SAME directory for atomic rename (same filesystem) + local temp_file + temp_file=$(mktemp "${output_dir}/.postfix_metrics.XXXXXX") + + # Generate metrics to temp file + if ! generate_metrics > "$temp_file" 2>/dev/null; then + rm -f "$temp_file" + echo "ERROR: Failed to generate metrics" >&2 + exit 1 + fi + + # Validate: file must exist and have content + local file_lines + file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0) + + if [ "$file_lines" -lt 10 ]; then + rm -f "$temp_file" + echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2 + exit 1 + fi + + # Set permissions before move + chmod 644 "$temp_file" + + # Atomic rename - no gap where file is missing + mv -f "$temp_file" "$OUTPUT_FILE" + + echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2 + else + # Default: output to stdout + generate_metrics + fi +} + +# Execute main function with all script arguments +main "$@" diff --git a/salt-key-manager.sh b/salt-key-manager.sh new file mode 100644 index 0000000..7dda128 --- /dev/null +++ b/salt-key-manager.sh @@ -0,0 +1,535 @@ +#!/bin/bash + +################################################ +#### Salt Key Manager #### +#### Automate salt-key operations #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### Version: 1.00-030526 #### +################################################ + +set -o pipefail + +SCRIPT_NAME=$(basename "$0") +readonly SCRIPT_NAME + +# Default configuration +readonly DEFAULT_STALE_DAYS=30 +readonly DEFAULT_CACHE_DIR="/var/cache/salt/master/minions" + +# Configuration variables +DEBUG=${DEBUG:-} + +# Runtime flags +ACTION="" +TARGET_MINION="" +STALE_DAYS=$DEFAULT_STALE_DAYS +EXPORT_PATH="" +BULK_FILE="" +AUTO_YES=false +USE_COLOR=true + +# Colors +C_GREEN="" +C_YELLOW="" +C_RED="" +C_CYAN="" +C_RESET="" + +handle_error() { + local exit_code=$1 + local line_number=$2 + echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2 + exit "$exit_code" +} + +trap 'handle_error $? $LINENO' ERR + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +log_info() { + echo "[INFO] $*" +} + +log_warn() { + echo "[WARN] $*" >&2 +} + +log_error() { + echo "[ERROR] $*" >&2 +} + +setup_colors() { + if [[ "$USE_COLOR" == true ]] && [[ -t 1 ]]; then + C_GREEN='\033[0;32m' + C_YELLOW='\033[0;33m' + C_RED='\033[0;31m' + C_CYAN='\033[0;36m' + C_RESET='\033[0m' + fi +} + +show_help() { + cat << EOF +Usage: $SCRIPT_NAME [ACTION] [OPTIONS] + +Manage Salt minion keys — accept, reject, delete, verify, rotate, and +clean up stale keys. + +ACTIONS: + --list List all keys by status with counts + --verify Show pending keys with fingerprints for verification + --accept-all Accept all pending keys + --accept MINION Accept a specific minion key + --reject MINION Reject a specific minion key + --delete MINION Delete a specific minion key + --rotate MINION Rotate a minion key (delete, re-accept on reconnect) + --cleanup-stale [DAYS] Delete keys for minions not seen in DAYS days (default: $DEFAULT_STALE_DAYS) + --export PATH Export all accepted key fingerprints to a file + --bulk-accept FILE Accept minions listed in a file (one per line) + +OPTIONS: + --yes Skip confirmation prompts + --no-color Disable colored output + --help, -h Show this help message + +ENVIRONMENT VARIABLES: + DEBUG Enable debug output + +EXAMPLES: + # List all keys with status + sudo $SCRIPT_NAME --list + + # Show pending keys for verification + sudo $SCRIPT_NAME --verify + + # Accept all pending keys + sudo $SCRIPT_NAME --accept-all --yes + + # Accept a specific minion + sudo $SCRIPT_NAME --accept web01 + + # Clean up minions not seen in 60 days + sudo $SCRIPT_NAME --cleanup-stale 60 + + # Export fingerprints for auditing + sudo $SCRIPT_NAME --export /tmp/salt-keys.txt + + # Bulk accept from a file + sudo $SCRIPT_NAME --bulk-accept /tmp/new-minions.txt --yes +EOF +} + +count_keys() { + local status="$1" + salt-key --list "$status" 2>/dev/null | grep -cv "^$status\|^$" || echo 0 +} + +do_list() { + echo "Salt Key Status" + echo "===============" + echo "" + + local accepted unaccepted denied rejected + accepted=$(count_keys "accepted") + unaccepted=$(count_keys "unaccepted") + denied=$(count_keys "denied") + rejected=$(count_keys "rejected") + + printf ' %bAccepted:%b %d\n' "$C_GREEN" "$C_RESET" "$accepted" + printf ' %bPending:%b %d\n' "$C_YELLOW" "$C_RESET" "$unaccepted" + printf ' %bDenied:%b %d\n' "$C_RED" "$C_RESET" "$denied" + printf ' %bRejected:%b %d\n' "$C_RED" "$C_RESET" "$rejected" + echo "" + + if ((accepted > 0)); then + printf '%bAccepted Keys:%b\n' "$C_GREEN" "$C_RESET" + salt-key --list accepted 2>/dev/null | grep -v "^Accepted Keys:" | sed 's/^/ /' + echo "" + fi + + if ((unaccepted > 0)); then + printf '%bPending Keys:%b\n' "$C_YELLOW" "$C_RESET" + salt-key --list unaccepted 2>/dev/null | grep -v "^Unaccepted Keys:" | sed 's/^/ /' + echo "" + fi + + if ((denied > 0)); then + printf '%bDenied Keys:%b\n' "$C_RED" "$C_RESET" + salt-key --list denied 2>/dev/null | grep -v "^Denied Keys:" | sed 's/^/ /' + echo "" + fi + + if ((rejected > 0)); then + printf '%bRejected Keys:%b\n' "$C_RED" "$C_RESET" + salt-key --list rejected 2>/dev/null | grep -v "^Rejected Keys:" | sed 's/^/ /' + echo "" + fi +} + +do_verify() { + local pending + pending=$(salt-key --list unaccepted 2>/dev/null | grep -v "^Unaccepted Keys:$" | grep -v "^$") + + if [[ -z "$pending" ]]; then + log_info "No pending keys to verify" + return 0 + fi + + echo "Master Fingerprint:" + printf ' %b' "$C_CYAN" + salt-key -F master 2>/dev/null | grep -A1 "master.pub" | tail -1 | tr -d ' ' + printf '%b\n\n' "$C_RESET" + + echo "Pending Keys with Fingerprints:" + echo "" + + while IFS= read -r minion; do + [[ -z "$minion" ]] && continue + minion=$(echo "$minion" | tr -d '[:space:]') + local fingerprint + fingerprint=$(salt-key -f "$minion" 2>/dev/null | grep -v "^Unaccepted Keys:" | awk '{print $2}' | head -1) + printf ' %b%-30s%b %s\n' "$C_YELLOW" "$minion" "$C_RESET" "${fingerprint:-unknown}" + done <<< "$pending" + + echo "" + log_info "Verify each fingerprint matches the minion's local fingerprint:" + log_info " (on minion) salt-call --local key.finger" +} + +do_accept_all() { + local pending + pending=$(count_keys "unaccepted") + + if ((pending == 0)); then + log_info "No pending keys to accept" + return 0 + fi + + log_info "Accepting $pending pending key(s)..." + + if [[ "$AUTO_YES" != true ]]; then + echo "Accept all $pending pending keys? [y/N] " + read -r confirm + if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then + log_info "Aborted" + return 0 + fi + fi + + salt-key -A -y 2>/dev/null + log_info "All pending keys accepted" +} + +do_accept() { + local minion="$1" + log_info "Accepting key for: $minion" + + if [[ "$AUTO_YES" != true ]]; then + local fingerprint + fingerprint=$(salt-key -f "$minion" 2>/dev/null | grep -v "^Unaccepted Keys:" | awk '{print $2}' | head -1) + echo "Fingerprint: ${fingerprint:-unknown}" + echo "Accept key for $minion? [y/N] " + read -r confirm + if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then + log_info "Aborted" + return 0 + fi + fi + + salt-key -a "$minion" -y 2>/dev/null + log_info "Key accepted for $minion" +} + +do_reject() { + local minion="$1" + log_info "Rejecting key for: $minion" + + if [[ "$AUTO_YES" != true ]]; then + echo "Reject key for $minion? [y/N] " + read -r confirm + if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then + log_info "Aborted" + return 0 + fi + fi + + salt-key -r "$minion" -y 2>/dev/null + log_info "Key rejected for $minion" +} + +do_delete() { + local minion="$1" + log_info "Deleting key for: $minion" + + if [[ "$AUTO_YES" != true ]]; then + echo "Delete key for $minion? This cannot be undone. [y/N] " + read -r confirm + if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then + log_info "Aborted" + return 0 + fi + fi + + salt-key -d "$minion" -y 2>/dev/null + log_info "Key deleted for $minion" +} + +do_rotate() { + local minion="$1" + log_info "Rotating key for: $minion" + log_info "This will delete the current key — the minion must reconnect to get a new key accepted" + + if [[ "$AUTO_YES" != true ]]; then + echo "Rotate key for $minion? [y/N] " + read -r confirm + if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then + log_info "Aborted" + return 0 + fi + fi + + salt-key -d "$minion" -y 2>/dev/null + log_info "Key deleted for $minion — accept the new key when the minion reconnects" + log_info "On the minion, restart salt-minion: systemctl restart salt-minion" +} + +do_cleanup_stale() { + local days="$1" + log_info "Finding minions not seen in $days days..." + + if [[ ! -d "$DEFAULT_CACHE_DIR" ]]; then + log_error "Minion cache directory not found: $DEFAULT_CACHE_DIR" + return 1 + fi + + local stale_minions=() + local cutoff + cutoff=$(date -d "-${days} days" +%s 2>/dev/null) || cutoff=$(date -v-"${days}"d +%s 2>/dev/null) + + while IFS= read -r minion_dir; do + local minion_name + minion_name=$(basename "$minion_dir") + local last_modified + last_modified=$(stat -c %Y "$minion_dir" 2>/dev/null) || last_modified=$(stat -f %m "$minion_dir" 2>/dev/null) || continue + + if ((last_modified < cutoff)); then + local days_ago=$(( ($(date +%s) - last_modified) / 86400 )) + stale_minions+=("$minion_name") + printf ' %b%-30s%b (last seen %d days ago)\n' "$C_RED" "$minion_name" "$C_RESET" "$days_ago" + fi + done < <(find "$DEFAULT_CACHE_DIR" -maxdepth 1 -mindepth 1 -type d 2>/dev/null) + + if [[ ${#stale_minions[@]} -eq 0 ]]; then + log_info "No stale minions found" + return 0 + fi + + echo "" + log_info "Found ${#stale_minions[@]} stale minion(s)" + + if [[ "$AUTO_YES" != true ]]; then + echo "Delete keys for all ${#stale_minions[@]} stale minions? [y/N] " + read -r confirm + if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then + log_info "Aborted" + return 0 + fi + fi + + for minion in "${stale_minions[@]}"; do + salt-key -d "$minion" -y 2>/dev/null && log_info "Deleted key: $minion" + done + + log_info "Stale key cleanup complete" +} + +do_export() { + local output_path="$1" + log_info "Exporting accepted key fingerprints to $output_path..." + + { + echo "# Salt Key Fingerprint Export" + echo "# Generated: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" + echo "# Master: $(hostname -f 2>/dev/null || hostname)" + echo "#" + echo "# Format: minion_id fingerprint" + echo "" + salt-key -F accepted 2>/dev/null | grep -v "^Accepted Keys:" | while IFS= read -r line; do + [[ -z "$line" ]] && continue + echo "$line" + done + } > "$output_path" + + local count + count=$(grep -cv "^#\|^$" "$output_path" 2>/dev/null) || count=0 + log_info "Exported $count key fingerprint(s) to $output_path" +} + +do_bulk_accept() { + local input_file="$1" + + if [[ ! -f "$input_file" ]]; then + log_error "File not found: $input_file" + return 1 + fi + + local count=0 + local failed=0 + + while IFS= read -r line; do + [[ -z "$line" || "$line" == \#* ]] && continue + local minion_id="${line%%:*}" + minion_id=$(echo "$minion_id" | tr -d '[:space:]') + + if salt-key -a "$minion_id" -y 2>/dev/null; then + log_info "Accepted: $minion_id" + count=$((count + 1)) + else + log_error "Failed to accept: $minion_id" + failed=$((failed + 1)) + fi + done < "$input_file" + + log_info "Bulk accept complete: $count accepted, $failed failed" +} + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + --list) + ACTION="list" + shift + ;; + --verify) + ACTION="verify" + shift + ;; + --accept-all) + ACTION="accept-all" + shift + ;; + --accept) + ACTION="accept" + TARGET_MINION="$2" + shift 2 + ;; + --reject) + ACTION="reject" + TARGET_MINION="$2" + shift 2 + ;; + --delete) + ACTION="delete" + TARGET_MINION="$2" + shift 2 + ;; + --rotate) + ACTION="rotate" + TARGET_MINION="$2" + shift 2 + ;; + --cleanup-stale) + ACTION="cleanup-stale" + if [[ -n "${2:-}" && "$2" =~ ^[0-9]+$ ]]; then + STALE_DAYS="$2" + shift 2 + else + shift + fi + ;; + --export) + ACTION="export" + EXPORT_PATH="$2" + shift 2 + ;; + --bulk-accept) + ACTION="bulk-accept" + BULK_FILE="$2" + shift 2 + ;; + --yes) + AUTO_YES=true + shift + ;; + --no-color) + USE_COLOR=false + shift + ;; + --help|-h) + show_help + exit 0 + ;; + *) + log_error "Unknown option: $1" + show_help >&2 + exit 1 + ;; + esac + done +} + +validate_requirements() { + if [[ $EUID -ne 0 ]]; then + log_error "This script must be run as root (use sudo)" + exit 1 + fi + + if [[ -z "$ACTION" ]]; then + log_error "An action is required" + show_help >&2 + exit 1 + fi + + if ! command -v salt-key >/dev/null 2>&1; then + log_error "salt-key not found — is salt-master installed?" + exit 1 + fi + + if [[ "$ACTION" == "accept" || "$ACTION" == "reject" || "$ACTION" == "delete" || "$ACTION" == "rotate" ]]; then + if [[ -z "$TARGET_MINION" ]]; then + log_error "Minion name is required for --$ACTION" + exit 1 + fi + fi + + if [[ "$ACTION" == "export" && -z "$EXPORT_PATH" ]]; then + log_error "Output path is required for --export" + exit 1 + fi + + if [[ "$ACTION" == "bulk-accept" && -z "$BULK_FILE" ]]; then + log_error "Input file is required for --bulk-accept" + exit 1 + fi +} + +main() { + parse_arguments "$@" + validate_requirements + setup_colors + + case "$ACTION" in + list) do_list ;; + verify) do_verify ;; + accept-all) do_accept_all ;; + accept) do_accept "$TARGET_MINION" ;; + reject) do_reject "$TARGET_MINION" ;; + delete) do_delete "$TARGET_MINION" ;; + rotate) do_rotate "$TARGET_MINION" ;; + cleanup-stale) do_cleanup_stale "$STALE_DAYS" ;; + export) do_export "$EXPORT_PATH" ;; + bulk-accept) do_bulk_accept "$BULK_FILE" ;; + esac + + debug_echo "Script completed successfully" +} + +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/salt-master-metrics.sh b/salt-master-metrics.sh new file mode 100755 index 0000000..df66359 --- /dev/null +++ b/salt-master-metrics.sh @@ -0,0 +1,1314 @@ +#!/bin/bash +################################################################################ +# Script Name: salt-master-metrics.sh +# Version: 3.1 +# Author: Phil Connor, contact@mylinux.work +# License: MIT +# Description: Production Prometheus exporter for Salt Master metrics +# +# Exports metrics for: +# - Master process health (CPU, memory, uptime) +# - Minion connectivity (up, down, accepted, rejected, denied, unaccepted) +# - Per-minion last-seen timestamp (stale minions only) +# - Minion version drift (match vs mismatch count) +# - Job statistics (active, cached, recent, completed 1h/24h, failed 24h) +# - Jobs by function breakdown (top 10) +# - Per-function expected/actual responses with success/failure (24h) +# - Per-function new job counts with success/failure (24h) +# - Scheduled job returns per minion/function/state (24h) +# - Key management counts +# - Event bus health +# - ZeroMQ port status (4505, 4506) +# - Worker thread utilization +# - Salt versions (master vs minion drift) +# - Cache disk usage and inode count +# - Salt master log error rate (1h) +# - File server cache size +# - Configuration values (keep_jobs, master_stats) +# - Highstate metrics (failures, last timestamp per minion) +# - Extended configuration detection (state_events, presence_events, timeout, job_cache) +# - Salt API process status +# - Auth failure rate from logs +# - File roots total size +# - Master log file size +# - Minion auth/key events from journal +# +# Modes: +# --textfile Write to node_exporter textfile collector +# --http Run HTTP server for direct Prometheus scraping +# stdout Default: print metrics to stdout +# +# Changelog: +# 3.1 - Added per-function expected/actual response metrics, per-function +# new job metrics with success/failure, scheduled job return +# metrics per minion/function/state. All from job cache parsing. +# 3.0 - Added config detection (state_events, presence_events, timeout, +# job_cache, publish_port, ret_port), salt-api process status, +# auth failure rate, file_roots size, master log size, minion +# auth events. Pure bash (no python3 dependency). +# 2.0 - Added per-minion last-seen, version drift, jobs completed/failed, +# jobs by function, ZeroMQ port checks, cache disk/inode metrics, +# log error rate, fileserver cache size, config values, highstate +# metrics. Expanded caching layer for expensive operations. +# 1.0 - Initial release +################################################################################ + +SCRIPT_VERSION="3.1" +TEXTFILE_DIR="/var/lib/node_exporter" +OUTPUT_FILE="" +HTTP_MODE=false +HTTP_PORT=9417 +LOCK_FILE="/var/run/salt-master-metrics.lock" +SALT_MASTER_CONFIG="/etc/salt/master" +SALT_CACHE_DIR="/var/cache/salt/master" +SALT_PKI_DIR="/etc/salt/pki/master" +SALT_RUN_DIR="/var/run/salt/master" + +# Timeouts for salt commands (seconds) +SALT_CMD_TIMEOUT=15 + +# Cache for expensive operations +MINION_UP_CACHE="" +MINION_DOWN_CACHE="" +KEY_LIST_CACHE="" +VERSIONS_CACHE="" +LOG_ERRORS_CACHE="" +LOG_CRITICAL_CACHE="" +HIGHSTATE_FAILURES_CACHE="" +ACTIVE_JOBS_CACHE="" +JOB_LIST_CACHE="" +LOG_AUTH_FAILURES_CACHE="" +LOG_KEY_EVENTS_CACHE="" + +show_usage() { + cat </dev/null) + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + echo "ERROR: Another instance is already running (PID: $pid)" >&2 + exit 1 + else + echo "Removing stale lock file" >&2 + rm -f "$LOCK_FILE" + fi + fi + echo $$ > "$LOCK_FILE" + trap cleanup EXIT INT TERM +} + +cleanup() { + rm -f "$LOCK_FILE" +} + +# --------------------------------------------------------------------------- +# Data collection (cached) +# --------------------------------------------------------------------------- + +cache_key_list() { + KEY_LIST_CACHE=$(timeout "$SALT_CMD_TIMEOUT" salt-key -L 2>/dev/null || echo "") +} + +# Use plain-text salt-run commands (one minion per line, no python needed) +cache_minion_up() { + MINION_UP_CACHE=$(timeout "$SALT_CMD_TIMEOUT" salt-run manage.up 2>/dev/null || echo "") +} + +cache_minion_down() { + MINION_DOWN_CACHE=$(timeout "$SALT_CMD_TIMEOUT" salt-run manage.down 2>/dev/null || echo "") +} + +cache_versions() { + VERSIONS_CACHE=$(timeout "$SALT_CMD_TIMEOUT" salt-run manage.versions 2>/dev/null || echo "") +} + +cache_active_jobs() { + ACTIVE_JOBS_CACHE=$(timeout "$SALT_CMD_TIMEOUT" salt-run jobs.active 2>/dev/null || echo "") +} + +cache_job_list() { + JOB_LIST_CACHE=$(timeout "$SALT_CMD_TIMEOUT" salt-run jobs.list_jobs --out=txt 2>/dev/null || echo "") +} + +# Run all expensive salt-run commands in parallel using temp files +cache_all_salt_data() { + local tmp_dir + tmp_dir=$(mktemp -d /tmp/salt_metrics_cache.XXXXXX) + + timeout "$SALT_CMD_TIMEOUT" salt-key -L > "$tmp_dir/keys" 2>/dev/null & + timeout "$SALT_CMD_TIMEOUT" salt-run manage.up > "$tmp_dir/up" 2>/dev/null & + timeout "$SALT_CMD_TIMEOUT" salt-run manage.down > "$tmp_dir/down" 2>/dev/null & + timeout "$SALT_CMD_TIMEOUT" salt-run manage.versions > "$tmp_dir/versions" 2>/dev/null & + timeout "$SALT_CMD_TIMEOUT" salt-run jobs.active > "$tmp_dir/active" 2>/dev/null & + timeout "$SALT_CMD_TIMEOUT" salt-run jobs.list_jobs --out=txt > "$tmp_dir/joblist" 2>/dev/null & + wait + + KEY_LIST_CACHE=$(cat "$tmp_dir/keys" 2>/dev/null) + MINION_UP_CACHE=$(cat "$tmp_dir/up" 2>/dev/null) + MINION_DOWN_CACHE=$(cat "$tmp_dir/down" 2>/dev/null) + VERSIONS_CACHE=$(cat "$tmp_dir/versions" 2>/dev/null) + ACTIVE_JOBS_CACHE=$(cat "$tmp_dir/active" 2>/dev/null) + JOB_LIST_CACHE=$(cat "$tmp_dir/joblist" 2>/dev/null) + + rm -rf "$tmp_dir" +} + +cache_log_data() { + local since_time + since_time=$(date -d '1 hour ago' '+%Y-%m-%d %H:%M:%S' 2>/dev/null) + if command -v journalctl >/dev/null 2>&1; then + LOG_ERRORS_CACHE=$(journalctl -u salt-master --since "$since_time" --no-pager 2>/dev/null | grep -c "ERROR" 2>/dev/null || true) + LOG_CRITICAL_CACHE=$(journalctl -u salt-master --since "$since_time" --no-pager 2>/dev/null | grep -c "CRITICAL" 2>/dev/null || true) + elif [ -f /var/log/salt/master ]; then + local cutoff + cutoff=$(date -d '1 hour ago' '+%Y-%m-%d %H:%M' 2>/dev/null) + LOG_ERRORS_CACHE=$(awk -v cutoff="$cutoff" '$0 >= cutoff' /var/log/salt/master 2>/dev/null | grep -c "ERROR" 2>/dev/null || true) + LOG_CRITICAL_CACHE=$(awk -v cutoff="$cutoff" '$0 >= cutoff' /var/log/salt/master 2>/dev/null | grep -c "CRITICAL" 2>/dev/null || true) + else + LOG_ERRORS_CACHE="0" + LOG_CRITICAL_CACHE="0" + fi + + # Auth failures and key events + if command -v journalctl >/dev/null 2>&1; then + LOG_AUTH_FAILURES_CACHE=$(journalctl -u salt-master --since "$since_time" --no-pager 2>/dev/null | grep -ic "authentication denied\|failed to authenticate\|salt\.crypt.*denied" 2>/dev/null || true) + LOG_KEY_EVENTS_CACHE=$(journalctl -u salt-master --since "$since_time" --no-pager 2>/dev/null | grep -ic "salt\.key\|key.*accept\|key.*reject\|key.*denied\|new key" 2>/dev/null || true) + elif [ -f /var/log/salt/master ]; then + LOG_AUTH_FAILURES_CACHE=$(awk -v cutoff="$cutoff" '$0 >= cutoff' /var/log/salt/master 2>/dev/null | grep -ic "authentication denied\|failed to authenticate\|salt\.crypt.*denied" 2>/dev/null || true) + LOG_KEY_EVENTS_CACHE=$(awk -v cutoff="$cutoff" '$0 >= cutoff' /var/log/salt/master 2>/dev/null | grep -ic "salt\.key\|key.*accept\|key.*reject\|key.*denied\|new key" 2>/dev/null || true) + else + LOG_AUTH_FAILURES_CACHE="0" + LOG_KEY_EVENTS_CACHE="0" + fi +} + +cache_highstate_data() { + local since_time + since_time=$(date -d '24 hours ago' '+%Y-%m-%d %H:%M:%S' 2>/dev/null) + if command -v journalctl >/dev/null 2>&1; then + HIGHSTATE_FAILURES_CACHE=$(journalctl -u salt-master --since "$since_time" --no-pager 2>/dev/null | grep -c "highstate.*fail\|Highstate.*fail\|state.highstate.*False" 2>/dev/null || true) + elif [ -f /var/log/salt/master ]; then + local cutoff + cutoff=$(date -d '24 hours ago' '+%Y-%m-%d %H:%M' 2>/dev/null) + HIGHSTATE_FAILURES_CACHE=$(awk -v cutoff="$cutoff" '$0 >= cutoff' /var/log/salt/master 2>/dev/null | grep -c "highstate.*fail\|Highstate.*fail\|state.highstate.*False" 2>/dev/null || true) + else + HIGHSTATE_FAILURES_CACHE="0" + fi + +} + +# --------------------------------------------------------------------------- +# Key metrics +# --------------------------------------------------------------------------- + +get_key_count() { + local category="$1" + if [ -z "$KEY_LIST_CACHE" ]; then + echo "0" + return + fi + case "$category" in + accepted) + find "$SALT_PKI_DIR/minions/" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l + ;; + denied) + find "$SALT_PKI_DIR/minions_denied/" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l + ;; + unaccepted) + find "$SALT_PKI_DIR/minions_pre/" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l + ;; + rejected) + find "$SALT_PKI_DIR/minions_rejected/" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l + ;; + *) echo "0" ;; + esac +} + +# --------------------------------------------------------------------------- +# Minion status metrics (plain-text output: one minion per line, prefixed "- ") +# --------------------------------------------------------------------------- + +get_minions_up() { + if [ -z "$MINION_UP_CACHE" ]; then + echo "0" + return + fi + echo "$MINION_UP_CACHE" | grep -c "^-" 2>/dev/null || true +} + +get_minions_down() { + if [ -z "$MINION_DOWN_CACHE" ]; then + echo "0" + return + fi + echo "$MINION_DOWN_CACHE" | grep -c "^-" 2>/dev/null || true +} + +# --------------------------------------------------------------------------- +# Process metrics +# --------------------------------------------------------------------------- + +get_master_pid() { + pgrep -f "salt-master" -o 2>/dev/null || echo "" +} + +get_master_uptime_seconds() { + local pid + pid=$(get_master_pid) + if [ -n "$pid" ] && [ -d "/proc/$pid" ]; then + local start_time + start_time=$(stat -c %Y "/proc/$pid" 2>/dev/null || echo "0") + if [ "$start_time" -gt 0 ]; then + echo $(( $(date +%s) - start_time )) + else + echo "0" + fi + else + echo "0" + fi +} + +get_master_memory_bytes() { + local pid + pid=$(get_master_pid) + if [ -n "$pid" ] && [ -f "/proc/$pid/status" ]; then + local rss_kb + rss_kb=$(grep VmRSS "/proc/$pid/status" 2>/dev/null | awk '{print $2}') + if [ -n "$rss_kb" ]; then + echo $(( rss_kb * 1024 )) + else + echo "0" + fi + else + echo "0" + fi +} + +get_master_cpu_percent() { + local pid + pid=$(get_master_pid) + if [ -n "$pid" ]; then + ps -p "$pid" -o %cpu --no-headers 2>/dev/null | tr -d ' ' || echo "0" + else + echo "0" + fi +} + +get_master_thread_count() { + local pid + pid=$(get_master_pid) + if [ -n "$pid" ] && [ -d "/proc/$pid/task" ]; then + find "/proc/$pid/task" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l + else + echo "0" + fi +} + +get_master_open_fds() { + local pid + pid=$(get_master_pid) + if [ -n "$pid" ] && [ -d "/proc/$pid/fd" ]; then + find "/proc/$pid/fd" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l + else + echo "0" + fi +} + +get_salt_process_count() { + pgrep -c -f "salt-master" 2>/dev/null || true +} + +# --------------------------------------------------------------------------- +# Worker thread config +# --------------------------------------------------------------------------- + +get_configured_workers() { + local workers + workers=$(grep -E "^worker_threads:" "$SALT_MASTER_CONFIG" 2>/dev/null | awk '{print $2}') + if [ -z "$workers" ]; then + for f in /etc/salt/master.d/*.conf; do + [ -f "$f" ] || continue + workers=$(grep -E "^worker_threads:" "$f" 2>/dev/null | awk '{print $2}') + [ -n "$workers" ] && break + done + fi + echo "${workers:-5}" +} + +# --------------------------------------------------------------------------- +# Job cache metrics +# --------------------------------------------------------------------------- + +get_job_cache_count() { + if [ -d "$SALT_CACHE_DIR/jobs" ]; then + find "$SALT_CACHE_DIR/jobs" -name ".load.p" -type f 2>/dev/null | wc -l + else + echo "0" + fi +} + +get_job_cache_size_bytes() { + if [ -d "$SALT_CACHE_DIR/jobs" ]; then + du -sb "$SALT_CACHE_DIR/jobs" 2>/dev/null | awk '{print $1}' + else + echo "0" + fi +} + +get_active_jobs() { + if [ -z "$ACTIVE_JOBS_CACHE" ]; then + echo "0" + return + fi + echo "$ACTIVE_JOBS_CACHE" | grep -c "^[0-9]" 2>/dev/null || true +} + +# --------------------------------------------------------------------------- +# Event bus / IPC health +# --------------------------------------------------------------------------- + +get_event_pub_socket_exists() { + if [ -S "$SALT_RUN_DIR/master_event_pub.ipc" ]; then + echo "1" + else + echo "0" + fi +} + +get_event_pull_socket_exists() { + if [ -S "$SALT_RUN_DIR/master_event_pull.ipc" ]; then + echo "1" + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Salt version +# --------------------------------------------------------------------------- + +get_salt_version() { + salt --version 2>/dev/null | awk '{print $2}' || echo "unknown" +} + +# --------------------------------------------------------------------------- +# Minion cache staleness +# --------------------------------------------------------------------------- + +get_minion_cache_count() { + if [ -d "$SALT_CACHE_DIR/minions" ]; then + find "$SALT_CACHE_DIR/minions" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Syndic detection +# --------------------------------------------------------------------------- + +get_syndic_count() { + if [ -d "$SALT_PKI_DIR/syndics" ]; then + find "$SALT_PKI_DIR/syndics" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Per-minion last-seen (stale only, >1h) +# --------------------------------------------------------------------------- + +generate_minion_last_seen_metrics() { + local now oldest_age minion_dir mtime age + now=$(date +%s) + oldest_age=0 + + if [ ! -d "$SALT_CACHE_DIR/minions" ]; then + echo "salt_master_minion_cache_oldest_seconds 0" + return + fi + + for minion_dir in "$SALT_CACHE_DIR/minions"/*/; do + [ -d "$minion_dir" ] || continue + local minion_name + minion_name=$(basename "$minion_dir") + mtime=$(stat -c %Y "$minion_dir" 2>/dev/null || echo "$now") + age=$(( now - mtime )) + if [ "$age" -gt "$oldest_age" ]; then + oldest_age=$age + fi + echo "salt_master_minion_last_seen_seconds{minion=\"${minion_name}\"} ${mtime}" + done + + echo "salt_master_minion_cache_oldest_seconds $oldest_age" +} + +# --------------------------------------------------------------------------- +# Minion version drift +# --------------------------------------------------------------------------- + +get_version_match_count() { + if [ -z "$VERSIONS_CACHE" ]; then + echo "0" + return + fi + # manage.versions text output: "Up to date:" header followed by "- minion" lines + local in_section=0 count=0 + while IFS= read -r line; do + if [[ "$line" =~ ^"Up to date:" ]]; then + in_section=1 + continue + fi + if [ "$in_section" -eq 1 ]; then + if [[ "$line" =~ ^"- " ]]; then + count=$((count + 1)) + elif [[ "$line" =~ ^[A-Za-z] ]]; then + break + fi + fi + done <<< "$VERSIONS_CACHE" + echo "$count" +} + +get_version_mismatch_count() { + if [ -z "$VERSIONS_CACHE" ]; then + echo "0" + return + fi + # Count "- minion" lines NOT under "Up to date:" section + local in_uptodate=0 count=0 + while IFS= read -r line; do + if [[ "$line" =~ ^"Up to date:" ]]; then + in_uptodate=1 + continue + elif [[ "$line" =~ ^[A-Za-z] ]]; then + in_uptodate=0 + continue + fi + if [ "$in_uptodate" -eq 0 ] && [[ "$line" =~ ^"- " ]]; then + count=$((count + 1)) + fi + done <<< "$VERSIONS_CACHE" + echo "$count" +} + +# --------------------------------------------------------------------------- +# Jobs completed (1h and 24h) +# --------------------------------------------------------------------------- + +get_jobs_completed_1h() { + if [ -d "$SALT_CACHE_DIR/jobs" ]; then + find "$SALT_CACHE_DIR/jobs" -name ".load.p" -type f -mmin -60 2>/dev/null | wc -l + else + echo "0" + fi +} + +get_jobs_completed_24h() { + if [ -d "$SALT_CACHE_DIR/jobs" ]; then + find "$SALT_CACHE_DIR/jobs" -name ".load.p" -type f -mmin -1440 2>/dev/null | wc -l + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Failed jobs (24h) +# --------------------------------------------------------------------------- + +get_jobs_failed_24h() { + if [ -z "$JOB_LIST_CACHE" ]; then + echo "0" + return + fi + # salt-run jobs.list_jobs --out=txt contains "Result: False" or "retcode: " for failures + # Count lines containing "False" in Result field from last 24h job listing + echo "$JOB_LIST_CACHE" | grep -ic "result.*false\|retcode: [1-9]" 2>/dev/null || true +} + +# --------------------------------------------------------------------------- +# Jobs by function (top 10, last 24h) +# --------------------------------------------------------------------------- + +generate_jobs_by_function_metrics() { + if [ -z "$JOB_LIST_CACHE" ]; then + return + fi + # Extract "Function:" lines, count by function name, emit top 10 + echo "$JOB_LIST_CACHE" | grep -i "Function:" 2>/dev/null | \ + awk -F': *' '{print $NF}' | sort | uniq -c | sort -rn | head -10 | \ + while read -r count func; do + [ -n "$func" ] && echo "salt_master_jobs_by_function{function=\"${func}\"} ${count}" + done +} + +# --------------------------------------------------------------------------- +# Job cache single-pass scan (collects all per-function metrics at once) +# Max 200 jobs to avoid slow scans on busy masters. +# --------------------------------------------------------------------------- + +JOB_CACHE_SCAN_MAX=200 + +generate_job_cache_metrics() { + if [ ! -d "$SALT_CACHE_DIR/jobs" ]; then + return + fi + + declare -A func_expected + declare -A func_success + declare -A func_failure + declare -A sched_key_success + declare -A sched_key_failure + + local jid_dirs + jid_dirs=$(find "$SALT_CACHE_DIR/jobs" -name ".load.p" -type f -mmin -1440 2>/dev/null | head -n "$JOB_CACHE_SCAN_MAX") + + if [ -z "$jid_dirs" ]; then + return + fi + + while IFS= read -r load_file; do + [ -z "$load_file" ] && continue + local job_dir func_name + job_dir=$(dirname "$load_file") + + func_name="" + if [ -n "$JOB_LIST_CACHE" ]; then + local jid_tail jid_prefix full_jid + jid_tail=$(basename "$job_dir") + jid_prefix=$(basename "$(dirname "$job_dir")") + full_jid="${jid_prefix}${jid_tail}" + func_name=$(echo "$JOB_LIST_CACHE" | grep -A5 "$full_jid" 2>/dev/null \ + | grep -i "Function:" | head -1 | sed 's/.*Function:[[:space:]]*//' | tr -d '[:space:]') + fi + + local load_strings="" + if [ -z "$func_name" ]; then + load_strings=$(timeout 2 strings "$load_file" 2>/dev/null || true) + func_name=$(echo "$load_strings" | grep -oE '(cmd\.[a-z_]+|state\.[a-z_]+|test\.[a-z_]+|grains\.[a-z_]+|pillar\.[a-z_]+|saltutil\.[a-z_]+|pkg\.[a-z_]+|service\.[a-z_]+|file\.[a-z_]+|sys\.[a-z_]+)' | head -1) + fi + [ -z "$func_name" ] && func_name="unknown" + + local is_scheduled=0 + local state_name="" + if [ -n "$load_strings" ]; then + if echo "$load_strings" | grep -qi "schedule" 2>/dev/null; then + is_scheduled=1 + state_name=$(echo "$load_strings" | grep -oE '\b[a-z_]+\.(sls|init)\b' | head -1 | sed 's/\.sls$//' | sed 's/\.init$//') + fi + elif [ -z "$load_strings" ]; then + load_strings=$(timeout 2 strings "$load_file" 2>/dev/null || true) + if echo "$load_strings" | grep -qi "schedule" 2>/dev/null; then + is_scheduled=1 + state_name=$(echo "$load_strings" | grep -oE '\b[a-z_]+\.(sls|init)\b' | head -1 | sed 's/\.sls$//' | sed 's/\.init$//') + fi + fi + [ -z "$state_name" ] && state_name="" + + local minion_count=0 + local minion_dir + for minion_dir in "$job_dir"/*/; do + [ -d "$minion_dir" ] || continue + minion_count=$((minion_count + 1)) + + local minion_name + minion_name=$(basename "$minion_dir") + + if [ -f "$minion_dir/return.p" ]; then + local ret_content + ret_content=$(timeout 2 strings "$minion_dir/return.p" 2>/dev/null || true) + local is_fail=0 + if echo "$ret_content" | grep -qiE "false|traceback|error|exception" 2>/dev/null; then + is_fail=1 + fi + + if [ "$is_fail" -eq 1 ]; then + func_failure["$func_name"]=$(( ${func_failure["$func_name"]:-0} + 1 )) + else + func_success["$func_name"]=$(( ${func_success["$func_name"]:-0} + 1 )) + fi + + if [ "$is_scheduled" -eq 1 ]; then + local skey="${func_name}|${minion_name}|${state_name}" + if [ "$is_fail" -eq 1 ]; then + sched_key_failure["$skey"]=$(( ${sched_key_failure["$skey"]:-0} + 1 )) + else + sched_key_success["$skey"]=$(( ${sched_key_success["$skey"]:-0} + 1 )) + fi + fi + fi + done + + if [ "$minion_count" -gt 0 ]; then + func_expected["$func_name"]=$(( ${func_expected["$func_name"]:-0} + minion_count )) + fi + done <<< "$jid_dirs" + + for func in "${!func_expected[@]}"; do + echo "salt_master_expected_responses_total{function=\"${func}\",state=\"\"} ${func_expected[$func]}" + done + + local all_funcs + all_funcs=$(printf '%s\n' "${!func_success[@]}" "${!func_failure[@]}" | sort -u) + while IFS= read -r func; do + [ -z "$func" ] && continue + echo "salt_master_function_responses_total{function=\"${func}\",state=\"\",success=\"true\"} ${func_success[$func]:-0}" + echo "salt_master_function_responses_total{function=\"${func}\",state=\"\",success=\"false\"} ${func_failure[$func]:-0}" + done <<< "$all_funcs" + + local all_keys + all_keys=$(printf '%s\n' "${!sched_key_success[@]}" "${!sched_key_failure[@]}" | sort -u) + while IFS= read -r key; do + [ -z "$key" ] && continue + local func minion state + func=$(echo "$key" | cut -d'|' -f1) + minion=$(echo "$key" | cut -d'|' -f2) + state=$(echo "$key" | cut -d'|' -f3) + echo "salt_master_scheduled_job_return_total{function=\"${func}\",minion=\"${minion}\",state=\"${state}\",success=\"true\"} ${sched_key_success[$key]:-0}" + echo "salt_master_scheduled_job_return_total{function=\"${func}\",minion=\"${minion}\",state=\"${state}\",success=\"false\"} ${sched_key_failure[$key]:-0}" + done <<< "$all_keys" +} + +# --------------------------------------------------------------------------- +# New jobs by function with success/failure (from job list text output) +# --------------------------------------------------------------------------- + +generate_new_job_metrics() { + if [ -z "$JOB_LIST_CACHE" ]; then + return + fi + + declare -A func_total + declare -A func_failed + + local current_func="" + while IFS= read -r line; do + if [[ "$line" =~ Function: ]]; then + current_func=$(echo "$line" | sed 's/.*Function:[[:space:]]*//' | tr -d '[:space:]') + [ -n "$current_func" ] && func_total["$current_func"]=$(( ${func_total["$current_func"]:-0} + 1 )) + fi + if [[ "$line" =~ Result:.*False ]] || [[ "$line" =~ retcode:\ [1-9] ]]; then + [ -n "$current_func" ] && func_failed["$current_func"]=$(( ${func_failed["$current_func"]:-0} + 1 )) + fi + done <<< "$JOB_LIST_CACHE" + + for func in "${!func_total[@]}"; do + local total=${func_total[$func]} + local failed=${func_failed[$func]:-0} + local succeeded=$((total - failed)) + echo "salt_master_new_job_total{function=\"${func}\",state=\"\",success=\"true\"} ${succeeded}" + echo "salt_master_new_job_total{function=\"${func}\",state=\"\",success=\"false\"} ${failed}" + done +} + +# --------------------------------------------------------------------------- +# ZeroMQ port status +# --------------------------------------------------------------------------- + +get_port_listening() { + local port="$1" + if ss -tlnp 2>/dev/null | grep -q ":${port} " 2>/dev/null; then + echo "1" + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Cache disk usage and inode count +# --------------------------------------------------------------------------- + +get_cache_disk_used_bytes() { + if [ -d "$SALT_CACHE_DIR" ]; then + du -sb "$SALT_CACHE_DIR" 2>/dev/null | awk '{print $1}' + else + echo "0" + fi +} + +get_cache_disk_available_bytes() { + if [ -d "$SALT_CACHE_DIR" ]; then + df -B1 "$SALT_CACHE_DIR" 2>/dev/null | tail -1 | awk '{print $4}' + else + echo "0" + fi +} + +get_cache_inode_count() { + if [ -d "$SALT_CACHE_DIR" ]; then + find "$SALT_CACHE_DIR" 2>/dev/null | wc -l + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# File server cache size +# --------------------------------------------------------------------------- + +get_fileserver_cache_size_bytes() { + if [ -d "$SALT_CACHE_DIR/file_lists" ]; then + du -sb "$SALT_CACHE_DIR/file_lists" 2>/dev/null | awk '{print $1}' + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Configuration values +# --------------------------------------------------------------------------- + +get_config_value() { + local key="$1" default="$2" + local val + + val=$(grep -E "^[[:space:]]*${key}:" "$SALT_MASTER_CONFIG" 2>/dev/null | head -1 | sed "s/^[[:space:]]*${key}:[[:space:]]*//" | tr -d '[:space:]') + if [ -z "$val" ]; then + for f in /etc/salt/master.d/*.conf; do + [ -f "$f" ] || continue + val=$(grep -E "^[[:space:]]*${key}:" "$f" 2>/dev/null | head -1 | sed "s/^[[:space:]]*${key}:[[:space:]]*//" | tr -d '[:space:]') + [ -n "$val" ] && break + done + fi + if [ -z "$val" ]; then + val=$(timeout "$SALT_CMD_TIMEOUT" salt-run config.get "$key" 2>/dev/null | tr -d '[:space:]') + [ "$val" = "None" ] || [ "$val" = "" ] && val="" + fi + echo "${val:-$default}" +} + +get_config_bool() { + local val + val=$(get_config_value "$1" "$2") + case "${val,,}" in + true|yes|1) echo "1" ;; + *) echo "0" ;; + esac +} + +get_config_keep_jobs() { + get_config_value "keep_jobs" "24" +} + +get_config_master_stats_enabled() { + get_config_bool "master_stats" "false" +} + +# --------------------------------------------------------------------------- +# Salt API status +# --------------------------------------------------------------------------- + +get_salt_api_running() { + if pgrep -f "salt-api" >/dev/null 2>&1; then + echo "1" + else + echo "0" + fi +} + +get_salt_api_port() { + ss -tlnp 2>/dev/null | grep "salt-api" | awk '{print $4}' | grep -oE '[0-9]+$' | head -1 +} + +# --------------------------------------------------------------------------- +# File roots size +# --------------------------------------------------------------------------- + +get_file_roots_size_bytes() { + local roots_dir + roots_dir=$(get_config_value "file_roots" "") + if [ -z "$roots_dir" ] && [ -d "/srv/salt" ]; then + roots_dir="/srv/salt" + fi + if [ -n "$roots_dir" ] && [ -d "$roots_dir" ]; then + du -sb "$roots_dir" 2>/dev/null | awk '{print $1}' + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Master log file size +# --------------------------------------------------------------------------- + +get_master_log_size_bytes() { + local log_file + log_file=$(get_config_value "log_file" "/var/log/salt/master") + if [ -f "$log_file" ]; then + stat -c %s "$log_file" 2>/dev/null || echo "0" + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Pillar roots size +# --------------------------------------------------------------------------- + +get_pillar_roots_size_bytes() { + local pillar_dir + pillar_dir=$(get_config_value "pillar_roots" "") + if [ -z "$pillar_dir" ] && [ -d "/srv/pillar" ]; then + pillar_dir="/srv/pillar" + fi + if [ -n "$pillar_dir" ] && [ -d "$pillar_dir" ]; then + du -sb "$pillar_dir" 2>/dev/null | awk '{print $1}' + else + echo "0" + fi +} + +# --------------------------------------------------------------------------- +# Metric generation +# --------------------------------------------------------------------------- + +generate_metrics() { + local start_time + start_time=$(date +%s) + + local master_pid master_running + master_pid=$(get_master_pid) + if [ -n "$master_pid" ]; then + master_running=1 + else + master_running=0 + fi + + local salt_version + salt_version=$(get_salt_version) + + local keys_accepted keys_denied keys_unaccepted keys_rejected + keys_accepted=$(find "$SALT_PKI_DIR/minions/" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l) + keys_denied=$(find "$SALT_PKI_DIR/minions_denied/" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l) + keys_unaccepted=$(find "$SALT_PKI_DIR/minions_pre/" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l) + keys_rejected=$(find "$SALT_PKI_DIR/minions_rejected/" -maxdepth 1 -mindepth 1 2>/dev/null | wc -l) + local keys_total=$((keys_accepted + keys_denied + keys_unaccepted + keys_rejected)) + + local minions_up minions_down + minions_up=$(get_minions_up) + minions_down=$(get_minions_down) + + local master_uptime master_memory master_cpu master_threads master_fds salt_procs + master_uptime=$(get_master_uptime_seconds) + master_memory=$(get_master_memory_bytes) + master_cpu=$(get_master_cpu_percent) + master_threads=$(get_master_thread_count) + master_fds=$(get_master_open_fds) + salt_procs=$(get_salt_process_count) + + local configured_workers + configured_workers=$(get_configured_workers) + + local job_cache_count job_cache_size active_jobs + job_cache_count=$(get_job_cache_count) + job_cache_size=$(get_job_cache_size_bytes) + active_jobs=$(get_active_jobs) + + local event_pub_socket event_pull_socket + event_pub_socket=$(get_event_pub_socket_exists) + event_pull_socket=$(get_event_pull_socket_exists) + + local minion_cache_count syndic_count + minion_cache_count=$(get_minion_cache_count) + syndic_count=$(get_syndic_count) + + local version_match version_mismatch + version_match=$(get_version_match_count) + version_mismatch=$(get_version_mismatch_count) + + local jobs_1h jobs_24h jobs_failed_24h + jobs_1h=$(get_jobs_completed_1h) + jobs_24h=$(get_jobs_completed_24h) + jobs_failed_24h=$(get_jobs_failed_24h) + + local port_4505 port_4506 + port_4505=$(get_port_listening 4505) + port_4506=$(get_port_listening 4506) + + local cache_disk_used cache_disk_avail cache_inode_count + cache_disk_used=$(get_cache_disk_used_bytes) + cache_disk_avail=$(get_cache_disk_available_bytes) + cache_inode_count=$(get_cache_inode_count) + + local fileserver_cache_size + fileserver_cache_size=$(get_fileserver_cache_size_bytes) + + local config_keep_jobs config_master_stats + config_keep_jobs=$(get_config_keep_jobs) + config_master_stats=$(get_config_master_stats_enabled) + + local config_state_events config_presence_events config_timeout config_job_cache + config_state_events=$(get_config_bool "state_events" "false") + config_presence_events=$(get_config_bool "presence_events" "false") + config_timeout=$(get_config_value "timeout" "5") + config_job_cache=$(get_config_bool "job_cache" "true") + + local config_publish_port config_ret_port + config_publish_port=$(get_config_value "publish_port" "4505") + config_ret_port=$(get_config_value "ret_port" "4506") + + local salt_api_running salt_api_port + salt_api_running=$(get_salt_api_running) + salt_api_port=$(get_salt_api_port) + + local file_roots_size pillar_roots_size master_log_size + file_roots_size=$(get_file_roots_size_bytes) + pillar_roots_size=$(get_pillar_roots_size_bytes) + master_log_size=$(get_master_log_size_bytes) + + cat </dev/null || true) + echo "salt_master_highstate_jobs_24h ${hs_count:-0}" + else + echo "salt_master_highstate_jobs_24h 0" + fi + + cat <&2 + + while true; do + { + read -r request + if [[ "$request" =~ ^GET\ /metrics ]]; then + printf "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\n\r\n" + cache_all_salt_data + cache_log_data + cache_highstate_data + generate_metrics + else + printf "HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n" + echo "

Salt Master Exporter v${SCRIPT_VERSION}

Metrics" + fi + } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null + done +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +main() { + parse_args "$@" + + [ "$HTTP_MODE" != true ] && acquire_lock + + if [ "$HTTP_MODE" = true ]; then + run_http_server + elif [ -n "$OUTPUT_FILE" ]; then + cache_all_salt_data + cache_log_data + cache_highstate_data + + mkdir -p "$(dirname "$OUTPUT_FILE")" + + local temp_file + temp_file=$(mktemp /tmp/salt_master_metrics.XXXXXX) + + generate_metrics > "$temp_file" + + rm -f "$OUTPUT_FILE" + mv "$temp_file" "$OUTPUT_FILE" + chmod 644 "$OUTPUT_FILE" + sync + else + cache_all_salt_data + cache_log_data + cache_highstate_data + generate_metrics + fi +} + +main "$@" diff --git a/salt-setup.sh b/salt-setup.sh new file mode 100644 index 0000000..b99b757 --- /dev/null +++ b/salt-setup.sh @@ -0,0 +1,509 @@ +#!/bin/bash + +################################################ +#### Salt Master/Minion Setup Automation #### +#### Install and configure SaltStack #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### Version: 1.00-030526 #### +################################################ + +set -o pipefail + +SCRIPT_NAME=$(basename "$0") +readonly SCRIPT_NAME + +# Default configuration +readonly DEFAULT_SALT_VERSION="latest" +readonly DEFAULT_FILE_ROOTS="/srv/salt" +readonly DEFAULT_PILLAR_ROOTS="/srv/pillar" +readonly DEFAULT_MASTER_INTERFACE="0.0.0.0" +readonly DEFAULT_MASTER_PORT_PUB=4505 +readonly DEFAULT_MASTER_PORT_RET=4506 + +# Configuration variables (can be overridden by environment) +SALT_VERSION=${SALT_VERSION:-$DEFAULT_SALT_VERSION} +FILE_ROOTS=${FILE_ROOTS:-$DEFAULT_FILE_ROOTS} +PILLAR_ROOTS=${PILLAR_ROOTS:-$DEFAULT_PILLAR_ROOTS} +DEBUG=${DEBUG:-} + +# Runtime flags +MODE="" +MASTER_IP="" +MINION_ID="" +AUTO_ACCEPT=false +AUTO_YES=false +PKG_MANAGER="" +OS_FAMILY="" +OS_VERSION="" + +handle_error() { + local exit_code=$1 + local line_number=$2 + echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2 + exit "$exit_code" +} + +trap 'handle_error $? $LINENO' ERR + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +log_info() { + echo "[INFO] $*" +} + +log_warn() { + echo "[WARN] $*" >&2 +} + +log_error() { + echo "[ERROR] $*" >&2 +} + +show_help() { + cat << EOF +Usage: $SCRIPT_NAME [OPTIONS] + +Automate Salt master and/or minion installation and configuration. + +Supports Ubuntu/Debian and RHEL/AlmaLinux. Adds the Salt Project repository, +installs packages, configures services, creates directory structure, and +opens firewall ports. + +OPTIONS: + --mode master|minion|both What to install (required) + --master-ip ADDRESS Salt master IP or hostname (required for minion/both) + --minion-id NAME Custom minion ID (default: system hostname) + --auto-accept Enable auto_accept on master (NOT for production) + --salt-version VERSION Pin Salt version (default: latest) + --yes Skip confirmation prompts + --help, -h Show this help message + +ENVIRONMENT VARIABLES: + SALT_VERSION Salt version to install (default: $DEFAULT_SALT_VERSION) + FILE_ROOTS Master file_roots path (default: $DEFAULT_FILE_ROOTS) + PILLAR_ROOTS Master pillar_roots path (default: $DEFAULT_PILLAR_ROOTS) + DEBUG Enable debug output + +EXAMPLES: + # Install salt-master + sudo $SCRIPT_NAME --mode master --yes + + # Install salt-minion pointing to master + sudo $SCRIPT_NAME --mode minion --master-ip 10.0.0.1 + + # Install both on the same node + sudo $SCRIPT_NAME --mode both --master-ip localhost --yes + + # Install with custom minion ID + sudo $SCRIPT_NAME --mode minion --master-ip salt.example.com --minion-id web01 + + # Install specific Salt version + sudo $SCRIPT_NAME --mode master --salt-version 3006 --yes +EOF +} + +detect_os() { + if [[ -f /etc/os-release ]]; then + # shellcheck disable=SC1091 + source /etc/os-release + OS_VERSION="$VERSION_ID" + case "$ID" in + ubuntu|debian) + OS_FAMILY="debian" + PKG_MANAGER="apt" + ;; + rhel|centos|rocky|almalinux|ol|fedora) + OS_FAMILY="rhel" + if command -v dnf >/dev/null 2>&1; then + PKG_MANAGER="dnf" + else + PKG_MANAGER="yum" + fi + ;; + *) + log_error "Unsupported OS: $ID" + exit 1 + ;; + esac + else + log_error "Cannot detect OS — /etc/os-release not found" + exit 1 + fi + debug_echo "Detected OS: $OS_FAMILY ($PKG_MANAGER) version $OS_VERSION" +} + +get_cpu_count() { + nproc 2>/dev/null || echo 2 +} + +add_salt_repo_debian() { + log_info "Adding Salt Project repository (Debian/Ubuntu)..." + + apt-get update -qq + apt-get install -y -qq curl gnupg2 >/dev/null + + local keyring="/etc/apt/keyrings/salt-archive-keyring.gpg" + mkdir -p /etc/apt/keyrings + curl -fsSL "https://repo.saltproject.io/salt/py3/ubuntu/${OS_VERSION}/amd64/SALT-PROJECT-GPG-PUBKEY-2023.gpg" \ + -o "$keyring" + + local repo_url="https://repo.saltproject.io/salt/py3/ubuntu/${OS_VERSION}/amd64" + if [[ "$SALT_VERSION" != "latest" ]]; then + repo_url="${repo_url}/${SALT_VERSION}" + fi + echo "deb [signed-by=${keyring}] ${repo_url} ${VERSION_CODENAME} main" \ + > /etc/apt/sources.list.d/salt.list + + apt-get update -qq + log_info "Salt repository added" +} + +add_salt_repo_rhel() { + log_info "Adding Salt Project repository (RHEL)..." + + local major_ver="${OS_VERSION%%.*}" + local repo_url="https://repo.saltproject.io/salt/py3/redhat/${major_ver}/x86_64" + if [[ "$SALT_VERSION" != "latest" ]]; then + repo_url="${repo_url}/${SALT_VERSION}" + fi + + cat > /etc/yum.repos.d/salt.repo << REPOEOF +[salt] +name=Salt Project for RHEL ${major_ver} +baseurl=${repo_url} +enabled=1 +gpgcheck=1 +gpgkey=https://repo.saltproject.io/salt/py3/redhat/${major_ver}/x86_64/SALT-PROJECT-GPG-PUBKEY-2023.pub +REPOEOF + + "$PKG_MANAGER" clean expire-cache -q + log_info "Salt repository added" +} + +install_master() { + log_info "Installing salt-master..." + case "$PKG_MANAGER" in + apt) + apt-get install -y -qq salt-master >/dev/null + ;; + dnf|yum) + "$PKG_MANAGER" install -y -q salt-master + ;; + esac + log_info "salt-master installed" +} + +install_minion() { + log_info "Installing salt-minion..." + case "$PKG_MANAGER" in + apt) + apt-get install -y -qq salt-minion >/dev/null + ;; + dnf|yum) + "$PKG_MANAGER" install -y -q salt-minion + ;; + esac + log_info "salt-minion installed" +} + +configure_master() { + log_info "Configuring salt-master..." + + local worker_threads + worker_threads=$(get_cpu_count) + + if [[ -f /etc/salt/master ]]; then + cp /etc/salt/master /etc/salt/master.bak."$(date +%Y%m%d%H%M%S)" + log_info "Backed up existing /etc/salt/master" + fi + + cat > /etc/salt/master << MASTEREOF +##### Salt Master Configuration ##### +##### Managed by salt-setup.sh ##### + +interface: ${DEFAULT_MASTER_INTERFACE} + +file_roots: + base: + - ${FILE_ROOTS} + +pillar_roots: + base: + - ${PILLAR_ROOTS} + +worker_threads: ${worker_threads} +timeout: 30 +state_events: True +presence_events: True +MASTEREOF + + if [[ "$AUTO_ACCEPT" == true ]]; then + { + echo "" + echo "# WARNING: NOT recommended for production" + echo "auto_accept: True" + } >> /etc/salt/master + log_warn "auto_accept enabled — NOT recommended for production" + else + { + echo "" + echo "auto_accept: False" + } >> /etc/salt/master + fi + + log_info "Master configuration written to /etc/salt/master" +} + +configure_minion() { + log_info "Configuring salt-minion..." + + local minion_id + minion_id="${MINION_ID:-$(hostname -f 2>/dev/null || hostname)}" + + if [[ -f /etc/salt/minion ]]; then + cp /etc/salt/minion /etc/salt/minion.bak."$(date +%Y%m%d%H%M%S)" + log_info "Backed up existing /etc/salt/minion" + fi + + cat > /etc/salt/minion << MINIONEOF +##### Salt Minion Configuration ##### +##### Managed by salt-setup.sh ##### + +master: ${MASTER_IP} +id: ${minion_id} + +# grains: +# role: webserver +# environment: production +MINIONEOF + + log_info "Minion configured (id: ${minion_id}, master: ${MASTER_IP})" +} + +create_directory_structure() { + log_info "Creating Salt directory structure..." + + mkdir -p "${FILE_ROOTS}" "${PILLAR_ROOTS}" + + if [[ ! -f "${FILE_ROOTS}/top.sls" ]]; then + cat > "${FILE_ROOTS}/top.sls" << 'TOPEOF' +base: + '*': + [] + # - common + # - packages +TOPEOF + log_info "Created ${FILE_ROOTS}/top.sls" + fi + + if [[ ! -f "${PILLAR_ROOTS}/top.sls" ]]; then + cat > "${PILLAR_ROOTS}/top.sls" << 'PTOPEOF' +base: + '*': + [] + # - common +PTOPEOF + log_info "Created ${PILLAR_ROOTS}/top.sls" + fi +} + +open_firewall_ports() { + log_info "Configuring firewall for Salt master ports..." + + if command -v ufw >/dev/null 2>&1; then + if ufw status | grep -q "Status: active"; then + ufw allow ${DEFAULT_MASTER_PORT_PUB}/tcp >/dev/null + ufw allow ${DEFAULT_MASTER_PORT_RET}/tcp >/dev/null + log_info "Opened ports ${DEFAULT_MASTER_PORT_PUB}/${DEFAULT_MASTER_PORT_RET} in ufw" + else + debug_echo "ufw not active — skipping" + fi + elif command -v firewall-cmd >/dev/null 2>&1; then + if firewall-cmd --state >/dev/null 2>&1; then + firewall-cmd --permanent --add-port=${DEFAULT_MASTER_PORT_PUB}/tcp >/dev/null + firewall-cmd --permanent --add-port=${DEFAULT_MASTER_PORT_RET}/tcp >/dev/null + firewall-cmd --reload >/dev/null + log_info "Opened ports ${DEFAULT_MASTER_PORT_PUB}/${DEFAULT_MASTER_PORT_RET} in firewalld" + else + debug_echo "firewalld not running — skipping" + fi + else + log_warn "No supported firewall detected — manually open ports ${DEFAULT_MASTER_PORT_PUB} and ${DEFAULT_MASTER_PORT_RET}" + fi +} + +start_service() { + local service="$1" + log_info "Enabling and starting ${service}..." + systemctl enable "$service" >/dev/null 2>&1 + systemctl restart "$service" + if systemctl is-active "$service" >/dev/null 2>&1; then + log_info "${service} is running" + else + log_error "${service} failed to start" + systemctl status "$service" --no-pager + return 1 + fi +} + +show_summary() { + echo "" + echo "============================================" + echo " Salt Setup Complete" + echo "============================================" + + if [[ "$MODE" == "master" || "$MODE" == "both" ]]; then + echo "" + echo " Master:" + echo " Config: /etc/salt/master" + echo " File roots: ${FILE_ROOTS}" + echo " Pillar roots: ${PILLAR_ROOTS}" + echo " Ports: ${DEFAULT_MASTER_PORT_PUB}, ${DEFAULT_MASTER_PORT_RET}" + echo "" + echo " Master fingerprint:" + salt-key -F master 2>/dev/null | grep -A1 "master.pub" || echo " (not yet generated — restart may be needed)" + echo "" + echo " Next steps:" + echo " salt-key -L # List pending keys" + echo " salt-key -a # Accept a minion key" + echo " salt '*' test.ping # Test connectivity" + fi + + if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then + local minion_id + minion_id="${MINION_ID:-$(hostname -f 2>/dev/null || hostname)}" + echo "" + echo " Minion:" + echo " Config: /etc/salt/minion" + echo " Master: ${MASTER_IP}" + echo " Minion ID: ${minion_id}" + echo "" + echo " Next steps:" + echo " salt-call test.ping # Test master connectivity" + if [[ "$AUTO_ACCEPT" != true ]]; then + echo " (on master) salt-key -a ${minion_id}" + fi + fi + + echo "" + echo "============================================" +} + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + --mode) + MODE="$2" + if [[ "$MODE" != "master" && "$MODE" != "minion" && "$MODE" != "both" ]]; then + log_error "Mode must be 'master', 'minion', or 'both'" + exit 1 + fi + shift 2 + ;; + --master-ip) + MASTER_IP="$2" + shift 2 + ;; + --minion-id) + MINION_ID="$2" + shift 2 + ;; + --auto-accept) + AUTO_ACCEPT=true + shift + ;; + --salt-version) + SALT_VERSION="$2" + shift 2 + ;; + --yes) + AUTO_YES=true + shift + ;; + --help|-h) + show_help + exit 0 + ;; + *) + log_error "Unknown option: $1" + show_help >&2 + exit 1 + ;; + esac + done +} + +validate_requirements() { + if [[ $EUID -ne 0 ]]; then + log_error "This script must be run as root (use sudo)" + exit 1 + fi + + if [[ -z "$MODE" ]]; then + log_error "--mode is required (master, minion, or both)" + show_help >&2 + exit 1 + fi + + if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then + if [[ -z "$MASTER_IP" ]]; then + log_error "--master-ip is required for minion/both modes" + exit 1 + fi + fi + + detect_os +} + +main() { + parse_arguments "$@" + validate_requirements + + echo "============================================" + echo " Salt Setup" + echo " Mode: $MODE" + echo " OS: $OS_FAMILY ($PKG_MANAGER)" + if [[ -n "$MASTER_IP" ]]; then + echo " Master: $MASTER_IP" + fi + echo "============================================" + echo "" + + if [[ "$AUTO_YES" != true ]]; then + echo "Press Enter to continue, or Ctrl+C to abort..." + read -r + fi + + case "$OS_FAMILY" in + debian) add_salt_repo_debian ;; + rhel) add_salt_repo_rhel ;; + esac + + if [[ "$MODE" == "master" || "$MODE" == "both" ]]; then + install_master + configure_master + create_directory_structure + open_firewall_ports + start_service salt-master + fi + + if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then + install_minion + configure_minion + start_service salt-minion + fi + + show_summary + + debug_echo "Script completed successfully" +} + +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/salt-status.ps1 b/salt-status.ps1 new file mode 100644 index 0000000..415cc75 --- /dev/null +++ b/salt-status.ps1 @@ -0,0 +1,513 @@ +<# +.SYNOPSIS + Monitors Salt Minion service status and exports metrics for Prometheus windows_exporter. + +.DESCRIPTION + This script checks the status of the Salt Minion service and creates Prometheus-formatted metrics. + The metrics are written to a text file that can be consumed by the windows_exporter. + It can also create a scheduled task to run periodically. + +.PARAMETER ValidateNotNullOrEmpty + Switch to validate that the MetricsFilePath parameter is not null or empty. + +.PARAMETER ValidateScript + Validate that the MetricsFilePath parameter is a valid Windows path. + +.PARAMETER MetricsFilePath + The path where the Prometheus metrics file will be written. + +.PARAMETER InstallScheduledTask + Switch to create a scheduled task for periodic monitoring. + +.PARAMETER TaskIntervalMinutes + The interval in minutes for the scheduled task. Default is 15 minutes. + +.PARAMETER TimeoutSeconds + Timeout in seconds for service status checks. Default is 30 seconds. + +.PARAMETER TimeoutSeconds + Timeout in seconds for service status checks. Default is 30 seconds. + +.PARAMETER SaltMasterPort + The port number for the Salt Master. Default is 4505. + +.PARAMETER DryRun + Switch to output metrics to console instead of writing to file. + +.PARAMETER Verbose + Switch to enable verbose debug output for troubleshooting. + +.PARAMETER Quiet + Switch to suppress non-error output (useful for scheduled tasks). + +.PARAMETER NoCron + Switch to skip scheduled task installation. + +.PARAMETER Version + Switch to display script version and exit. + +.NOTES + Version: 3.3.0-20250915 + Author: Phil Connor, contact@mylinux.work + License: MIT + Created: 2025-01-24 loosly based on my salt_status.sh used with the linux servers. +#> + +param( + [ValidateNotNullOrEmpty()] + [ValidateScript({ + $parentPath = Split-Path $_ -Parent + if ($parentPath -and -not (Test-Path $parentPath)) { + throw "Directory does not exist: $parentPath" + } + if ($_ -match '^[A-Za-z]:\\') { + return $true + } + throw "Invalid file path format" + })] + [string]$MetricsFilePath = "$env:ProgramFiles\windows_exporter\textfile_inputs\salt_status.prom", + [switch]$InstallScheduledTask = $false, + [ValidateRange(1, 1440)] # Validate the interval is between 1 and 1440 minutes for the scheduled task + [int]$TaskIntervalMinutes = 15, + [ValidateRange(1, 300)] # Validate the timeout is between 1 and 3600 seconds for service status checks + [int]$TimeoutSeconds = 30, + [int]$SaltMasterPort = 4505, + [switch]$DryRun = $false, # Output metrics to console instead of file + [switch]$Verbose = $false, # Enable verbose debug output + [switch]$Quiet = $false, # Suppress non-error output + [switch]$NoCron = $false, # Skip scheduled task installation + [switch]$Version = $false # Show version and exit +) + +# Handle version display +if ($Version) { + Write-Host "Salt Status Monitor PowerShell Script" + Write-Host "Version: 3.3.0-20250915" + Write-Host "Author: Phil Connor pconnor@ara.com" + exit 0 +} + +# Set up logging preferences based on Verbose/Quiet flags +if ($Verbose) { + $VerbosePreference = 'Continue' + $InformationPreference = 'Continue' +} +if ($Quiet) { + $VerbosePreference = 'SilentlyContinue' + $InformationPreference = 'SilentlyContinue' + $WarningPreference = 'SilentlyContinue' +} + +# Logging functions +function Write-VerboseLog { + param([string]$Message) + if ($Verbose) { + Write-Host "[VERBOSE] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Cyan + } +} + +function Write-InfoLog { + param([string]$Message) + if (-not $Quiet) { + Write-Host "[INFO] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Green + } +} + +# Create a scheduled task to run this script every 15 minutes +if ($InstallScheduledTask -and -not $NoCron) { + $taskName = "SaltMinionStatusCheck" + $existingTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue + + if (-not $existingTask) { + $taskAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`"" + # Add validation + if (-not $TaskIntervalMinutes -or $TaskIntervalMinutes -le 0) { + throw "TaskIntervalMinutes must be a positive integer" + } + + $taskTrigger = New-ScheduledTaskTrigger -Once -At (Get-Date).AddMinutes(1) -RepetitionInterval (New-TimeSpan -Minutes $TaskIntervalMinutes) -RepetitionDuration (New-TimeSpan -Days 365) + $taskPrincipal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest + + try { + Write-InfoLog "Creating scheduled task: $taskName" + Register-ScheduledTask -TaskName $taskName -Action $taskAction -Trigger $taskTrigger -Principal $taskPrincipal -Description "Monitors Salt Minion status every $TaskIntervalMinutes minutes" + + # Verify the task was created + $createdTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue + if (-not $createdTask) { + throw "Failed to verify scheduled task creation" + } + Write-InfoLog "Successfully created scheduled task: $taskName" + } catch { + Write-Error "Failed to create auto-start task: $($_.Exception.Message)" + throw + } + } else { + Write-InfoLog "Scheduled task $taskName already exists. Skipping creation." + } +} + +# Function to check if required commands are available +function Test-CommandAvailability { + param([string]$Command) + + try { + Get-Command $Command -ErrorAction Stop | Out-Null + return $true + } catch { + Write-Warning "Required command '$Command' is not available" + return $false + } +} + +# Function to check if the salt-master is connected +function Test-Port4505Connection { + try { + # Use netstat to check for active connections on the salt-master port + $portCheck = netstat -an 2>$null | Select-String "\s+[^:]+:$SaltMasterPort\s+" + + # Check if we found any active connections on the port + if ($null -ne $portCheck) { + Write-VerboseLog "Port $SaltMasterPort is in use and has active connections" + return $true + } else { + Write-VerboseLog "No active connections found on port $SaltMasterPort" + return $false + } + } catch [System.Management.Automation.ActionPreferenceStopException] { + # Silently ignore this specific exception when error action is set to Stop + } catch { + # Log any other unexpected errors and return failure status + Write-Warning "Failed to check port $SaltMasterPort : $($_.Exception.Message)" + return $false + } +} + +# Function to check if the salt-master responds to ping +function Test-SaltPing { + param( + [int]$TimeoutSeconds = $TimeoutSeconds + ) + if (-not (Test-CommandAvailability "salt-call")) { + Write-Warning "Salt-call command not found" + return $false + } + + $job = $null + try { + $job = Start-Job -ScriptBlock { salt-call test.ping --local 2>$null } -ErrorAction Stop + $completed = $job | Wait-Job -Timeout $TimeoutSeconds + if (-not $completed) { + Write-Warning "Salt-call test.ping timed out after $TimeoutSeconds seconds" + return $false + } + $saltTest = $job | Receive-Job -ErrorAction SilentlyContinue + if ($null -eq $saltTest) { + Write-Host "No response from salt-call test.ping" + return $false + } + + if ($saltTest -is [array]) { + $saltTest = $saltTest -join "`n" + } + + if ($saltTest -match "local:\s*True" -or $saltTest -match "^\s*True\s*$") { + Write-VerboseLog "Salt-call test.ping returned True" + return $true + } else { + Write-VerboseLog "Salt-call test.ping failed or returned unexpected output: $saltTest" + return $false + } + } catch { + Write-Warning "Salt-Call failed: $($_.Exception.Message)" + return $false + } finally { + if ($null -ne $job) { + try { + if ($job.State -eq 'Running') { + $job | Stop-Job -Force -ErrorAction SilentlyContinue + } + } finally { + $job | Remove-Job -Force -ErrorAction SilentlyContinue + } + } + } +} + +# Function to check if prometheus named metrics are sanitized or not +function Test-PrometheusMetricName { + param([string]$MetricName) + + # Prometheus metric names should match: [a-zA-Z_:][a-zA-Z0-9_:]* + if ($MetricName -match '^[a-zA-Z_:][a-zA-Z0-9_:]*$') { + return $true + } + return $false +} + +# Function to format and add a metric to the metrics array +function Add-PrometheusMetric { + param( + [string]$Name, + [string]$Help, + [string]$Type, + [object]$Value, + [ref]$MetricsArray + ) + + if (-not (Test-PrometheusMetricName $Name)) { + Write-Warning "Invalid metric name: $Name" + return + } + + $MetricsArray.Value += "# HELP $Name $Help" + $MetricsArray.Value += "# TYPE $Name $Type" + $MetricsArray.Value += "$Name $Value" +} + +# Function to check Windows service status +function Test-SaltMinionService { + try { + $service = Get-Service -Name "salt-minion" -ErrorAction SilentlyContinue + if ($null -eq $service) { + Write-Warning "Salt-minion service not found" + return 2 # Service not found + } + + if ($service.Status -eq 'Running') { + return 1 # Service is running + } else { + return 0 # Service is not running + } + } catch { + Write-Warning "Failed to check salt-minion service status: $($_.Exception.Message)" + return 0 + } +} + +# Function to get Salt version +function Get-SaltVersion { + if (-not (Test-CommandAvailability "salt-call")) { + return "0" + } + + try { + $versionOutput = & salt-call --version 2>$null + if ($versionOutput -match "(\d+\.\d+)") { + return $matches[1] + } + return "0" + } catch { + Write-Warning "Failed to get Salt version: $($_.Exception.Message)" + return "0" + } +} + +# Function to get Salt-minion memory usage +function Get-SaltMemoryUsage { + try { + $saltProcesses = Get-Process -Name "salt-minion" -ErrorAction SilentlyContinue + if ($null -eq $saltProcesses) { + return 0 + } + + $totalMemory = 0 + foreach ($process in $saltProcesses) { + $totalMemory += $process.WorkingSet64 + } + return $totalMemory + } catch { + Write-Warning "Failed to get salt-minion memory usage: $($_.Exception.Message)" + return 0 + } +} + +# Function to get last successful communication timestamp +function Get-LastCommunicationTimestamp { + if (-not (Test-CommandAvailability "salt-call")) { + return 0 + } + + try { + $pingResult = Test-SaltPing + if ($pingResult) { + return [int][double]::Parse((Get-Date -UFormat %s)) + } + return 0 + } catch { + Write-Warning "Failed to get last communication timestamp: $($_.Exception.Message)" + return 0 + } +} + +# Function to count recent Salt errors in Windows Event Log +function Get-SaltErrorCount { + try { + $24HoursAgo = (Get-Date).AddHours(-24) + $errorEvents = Get-WinEvent -FilterHashtable @{ + LogName = 'Application' + Source = 'salt-minion' + Level = 2 # Error level + StartTime = $24HoursAgo + } -ErrorAction SilentlyContinue + + if ($null -eq $errorEvents) { + return 0 + } + + return $errorEvents.Count + } catch { + # Fallback: try to read from salt log file if it exists + $logPath = "${env:ProgramData}\Salt Project\Salt\var\log\salt\minion" + if (Test-Path $logPath) { + try { + $logContent = Get-Content $logPath -Tail 1000 -ErrorAction SilentlyContinue + $errorLines = $logContent | Where-Object { $_ -match "\[ERROR\]" } + return $errorLines.Count + } catch { + return 0 + } + } + return 0 + } +} + +# Function to export Prometheus metrics +function Export-PrometheusMetrics { + #Starts the metrics export. + $startTime = Get-Date + $metrics = @() + $errors = @() + + try { + # Connection status metric (port 4505) + try { + if (-not (Test-CommandAvailability "netstat")) { + $errors += "netstat command not found" + $connectionStatus = 2 + } else { + $connectionStatus = if (Test-Port4505Connection) { 1 } else { 0 } + } + Add-PrometheusMetric -Name "minion_connection_status" -Help "Shows if Salt-Minion is connected to Salt-Master." -Type "gauge" -Value $connectionStatus -MetricsArray ([ref]$metrics) + } catch { + $errors += "Port 4505 check failed: $($_.Exception.Message)" + Add-PrometheusMetric -Name "minion_connection_status" -Help "Shows if Salt-Minion is connected to Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) + } + + # Salt ping metric + try { + if (-not (Test-CommandAvailability "salt-call")) { + $errors += "salt-call command not found" + $pingStatus = 2 + } else { + $pingStatus = if (Test-SaltPing) { 1 } else { 0 } + } + Add-PrometheusMetric -Name "minion_ping_status" -Help "Shows if Salt-Minion is able to ping Salt-Master." -Type "gauge" -Value $pingStatus -MetricsArray ([ref]$metrics) + } catch { + $errors += "Salt ping check failed: $($_.Exception.Message)" + Add-PrometheusMetric -Name "minion_ping_status" -Help "Shows if Salt-Minion is able to ping Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) + } + + # Service status metric + try { + $serviceStatus = Test-SaltMinionService + Add-PrometheusMetric -Name "minion_service_status" -Help "Shows if Salt-Minion service is active." -Type "gauge" -Value $serviceStatus -MetricsArray ([ref]$metrics) + } catch { + $errors += "Service status check failed: $($_.Exception.Message)" + Add-PrometheusMetric -Name "minion_service_status" -Help "Shows if Salt-Minion service is active." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) + } + + # Last communication timestamp + try { + $lastComm = Get-LastCommunicationTimestamp + Add-PrometheusMetric -Name "minion_last_communication_timestamp" -Help "Timestamp of last successful communication with Salt-Master." -Type "gauge" -Value $lastComm -MetricsArray ([ref]$metrics) + } catch { + $errors += "Last communication check failed: $($_.Exception.Message)" + Add-PrometheusMetric -Name "minion_last_communication_timestamp" -Help "Timestamp of last successful communication with Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) + } + + # Salt version metric + try { + $version = Get-SaltVersion + Add-PrometheusMetric -Name "minion_version" -Help "Salt-Minion version number." -Type "gauge" -Value $version -MetricsArray ([ref]$metrics) + } catch { + $errors += "Version check failed: $($_.Exception.Message)" + Add-PrometheusMetric -Name "minion_version" -Help "Salt-Minion version number." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) + } + + # Memory usage metric + try { + $memoryUsage = Get-SaltMemoryUsage + Add-PrometheusMetric -Name "minion_memory_usage_bytes" -Help "Salt-Minion process memory usage in bytes." -Type "gauge" -Value $memoryUsage -MetricsArray ([ref]$metrics) + } catch { + $errors += "Memory usage check failed: $($_.Exception.Message)" + Add-PrometheusMetric -Name "minion_memory_usage_bytes" -Help "Salt-Minion process memory usage in bytes." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) + } + + # Error count metric + try { + $errorCount = Get-SaltErrorCount + Add-PrometheusMetric -Name "minion_error_count" -Help "Number of error entries in Salt-Minion log file." -Type "counter" -Value $errorCount -MetricsArray ([ref]$metrics) + } catch { + $errors += "Error count check failed: $($_.Exception.Message)" + Add-PrometheusMetric -Name "minion_error_count" -Help "Number of error entries in Salt-Minion log file." -Type "counter" -Value 0 -MetricsArray ([ref]$metrics) + } + + # Windows-specific: Script execution error count + Add-PrometheusMetric -Name "windows_salt_script_errors_total" -Help "Total number of errors during script execution" -Type "counter" -Value $errors.Count -MetricsArray ([ref]$metrics) + + # Windows-specific: Script runtime + $scriptRuntime = (Get-Date) - $startTime + Add-PrometheusMetric -Name "windows_salt_script_runtime_seconds" -Help "Total script execution time in seconds" -Type "gauge" -Value $scriptRuntime.TotalSeconds -MetricsArray ([ref]$metrics) + + } finally { + # Ensure cleanup happens regardless of success/failure + if ($errors.Count -gt 0) { + Write-Warning "Script completed with $($errors.Count) errors" + } + } + + return $metrics +} + + +# Output metrics to console or file +try { + # Export metrics as an array of strings + $exportedMetrics = Export-PrometheusMetrics + if ($null -eq $exportedMetrics) { + throw "Export-PrometheusMetrics returned null" + } + + if ($DryRun) { + # Dry run mode: output to console + Write-Host "=== DRY RUN MODE - Metrics that would be written to $MetricsFilePath ===" -ForegroundColor Yellow + $exportedMetrics | ForEach-Object { Write-Host $_ } + Write-Host "=== END DRY RUN OUTPUT ===" -ForegroundColor Yellow + } else { + # Normal mode: write to file with retry mechanism + $retryCount = 0 + $maxRetries = 3 + do { + try { + # Write the metrics to the file + $exportedMetrics | Out-File -FilePath $MetricsFilePath -Encoding UTF8 -Force + break + } catch [System.IO.IOException] { + $retryCount++ + if ($retryCount -ge $maxRetries) { + throw + } + # Wait 100ms before retrying + Start-Sleep -Milliseconds 100 + } + } while ($retryCount -lt $maxRetries) + } +} catch { + Write-Error "Failed to export metrics: $($_.Exception.Message)" + exit 1 +} + +# Uncomment the following line to write metrics to the console +# $exportedMetrics = Export-PrometheusMetrics + diff --git a/salt-status.sh b/salt-status.sh new file mode 100755 index 0000000..3a8c80a --- /dev/null +++ b/salt-status.sh @@ -0,0 +1,409 @@ +#!/bin/bash + +##################################################### +### ### +### Description: Expose metrics from salt-minion. ### +### ### +### Phil Connor, contact@mylinux.work ### +### License: MIT ### +### Version 2.28.0.20250915 ### +### ### +##################################################### + +# Exit on any error, treat unset variables as errors, and fail pipes on first failure +set -euo pipefail + +# Parse command line arguments +DRY_RUN=false +VERBOSE=false +QUIET=false +NO_CRON=false +SCRIPT_VERSION="2.28.0.20250915" + +show_version() { + echo "Salt Status Monitor Bash Script" + echo "Version: $SCRIPT_VERSION" + echo "Author: Phil Connor pconnor@ara.com" +} + +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Monitor Salt minion status and export Prometheus metrics" + echo "" + echo "Options:" + echo " --dry-run Output metrics to console instead of file" + echo " --verbose Enable verbose debug output" + echo " --quiet Suppress non-error output" + echo " --no-cron Skip cron job installation" + echo " --timeout N Override timeout seconds (default: varies by operation)" + echo " --version Show version and exit" + echo " --help Show this help message" +} + +# Logging functions +log_verbose() { + [[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1" +} + +log_info() { + [[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1" +} + +while [[ $# -gt 0 ]]; do + case $1 in + --dry-run) + DRY_RUN=true + shift + ;; + --verbose|-v) + VERBOSE=true + shift + ;; + --quiet|-q) + QUIET=true + shift + ;; + --no-cron) + NO_CRON=true + shift + ;; + --timeout) + if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then + TIMEOUT_OVERRIDE="$2" + shift 2 + else + echo "Error: --timeout requires a numeric value" >&2 + exit 1 + fi + ;; + --version) + show_version + exit 0 + ;; + -h|--help) + show_help + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + echo "Use --help for usage information" >&2 + exit 1 + ;; + esac +done + +# Get absolute path to this script for cron job installation +readonly SCRIPT_PATH="$(readlink -f "$0")" + +# Configuration with defaults - can be overridden by environment variables +readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User to install cron job under +readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored +readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the metrics directory +readonly LOCK_DIR="${LOCK_DIR:-/var/run}" # Directory for lock files to prevent concurrent runs +readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/10 * * * *}" # Cron schedule - every 10 minutes by default +readonly SALT_MASTER_PORT=4505 # Salt master communication port + +# Status codes used in Prometheus metrics +readonly STATUS_SUCCESS=1 # Service is working correctly +readonly STATUS_FAILURE=0 # Service has failed or is not responding +readonly STATUS_NOT_FOUND=2 # Service/command not found on system + +# Validate that critical environment variables are set +[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && { + echo "ERROR: Required environment variables not set" >&2 + exit 1 +} + +# Error handling function that logs to stderr and exits with specified code +handle_error() { + echo "ERROR: $1" >&2 + exit "${2:-1}" +} + +# Logging function with timestamp and level +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$1] $2" +} + +# Find a command in PATH or fallback directories +# Returns the full path to the executable or exits with error +find_command() { + local cmd="$1" + shift + local fallback_paths=("$@") + + # First try to find command in PATH + if command -v "$cmd" &>/dev/null; then + command -v "$cmd" + return 0 + fi + + # If not in PATH, check fallback directories + for path in "${fallback_paths[@]}"; do + local full_path="$path/$cmd" + [[ -x "$full_path" ]] && { + echo "$full_path" + return 0 + } + done + + # Command not found anywhere + handle_error "Could not find '$cmd' executable" +} + +# Install a cron job to run this script periodically +# Only installs if the job doesn't already exist +install_cron_job() { + # Check if cron job already exists + crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH" && return 0 + + # Create temporary file for new crontab + local temp_cron + temp_cron=$(mktemp) + + # Combine existing crontab with new job + { + crontab -l 2>/dev/null || true # Get existing crontab, ignore errors if empty + echo "$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/salt_status.prom 2>&1" + } > "$temp_cron" + + # Install the new crontab + if crontab -u "$CRONTAB_USER" "$temp_cron"; then + log_info "Cron job installed successfully" + else + rm -f "$temp_cron" + handle_error "Failed to install cron job" + fi + + # Clean up temporary file + rm -f "$temp_cron" +} + +# Set up file locking to prevent multiple instances of this script running +# Uses file descriptor 9 for the lock +setup_lock() { + # Ensure lock directory exists + [[ ! -d "$LOCK_DIR" ]] && handle_error "Lock directory does not exist: $LOCK_DIR" + + # Clean up old lock files (older than 60 minutes) + find "$LOCK_DIR" -name "salt_status.*" -type f -mmin +60 -delete 2>/dev/null || true + + # Create unique lock file + lockfile=$(mktemp -p "$LOCK_DIR" salt_status.XXXXXX) || handle_error "Failed to create lock file" + + # Open lock file on file descriptor 9 and attempt to lock it + exec 9>"$lockfile" + flock -n 9 || handle_error "Script is already running" + + # Set up cleanup trap to release lock and remove file on exit + trap 'flock -u 9; exec 9>&-; rm -f "$lockfile"' EXIT INT TERM +} + +# Ensure the Node Exporter directory exists and is writable +# Creates the directory if running as root and sets proper ownership +setup_directories() { + # Return early if directory already exists + [[ -d "$NODE_EXPORTER_DIR" ]] && return 0 + + # Create directory if running as root + if [[ "$(id -u)" == "0" ]]; then + mkdir -p "$NODE_EXPORTER_DIR" + # Set ownership to prometheus user, ignore errors if user doesn't exist + chown "$PROMETHEUS_USER:" "$NODE_EXPORTER_DIR" 2>/dev/null || true + fi + + # Verify the directory is writable + [[ ! -w "$NODE_EXPORTER_DIR" ]] && handle_error "$NODE_EXPORTER_DIR is not writable" +} + +# Check if Salt-minion has an active network connection to Salt-master +# Uses ss (socket statistics) to check for established connections on port 4505 +check_salt_connection() { + local ss_path + ss_path=$(find_command ss /bin /usr/bin /usr/sbin) + + log_verbose "Checking for Salt connection on port $SALT_MASTER_PORT" + + # Check for established connections (-nt = numeric, no header, TCP) + if "$ss_path" -nt | grep -q "\b$SALT_MASTER_PORT\b"; then + log_verbose "Found active connection on port $SALT_MASTER_PORT" + echo $STATUS_SUCCESS + else + log_verbose "No active connection found on port $SALT_MASTER_PORT" + echo $STATUS_FAILURE + fi +} + +# Test if Salt-minion can successfully ping the Salt-master +# Uses salt-call test.ping to verify two-way communication +check_salt_ping() { + local salt_call_path + + # Try to find salt-call command, return NOT_FOUND if missing + if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then + echo $STATUS_NOT_FOUND + return + fi + + # Execute ping test and check for True response + if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then + echo $STATUS_SUCCESS + else + echo $STATUS_FAILURE + fi +} + +# Check if Salt-minion service is active using systemctl +check_salt_service() { + local systemctl_path + + # Find systemctl command + if ! systemctl_path=$(find_command systemctl /bin /usr/bin /sbin /usr/sbin 2>/dev/null); then + echo $STATUS_NOT_FOUND + return + fi + + # Check if salt-minion service is active + if "$systemctl_path" is-active salt-minion &>/dev/null; then + echo $STATUS_SUCCESS + else + echo $STATUS_FAILURE + fi +} + +# Get timestamp of last successful Salt communication +check_salt_last_communication() { + local salt_call_path + + # Try to find salt-call command, return 0 if missing + if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then + echo "0" + return + fi + + # Get current timestamp if ping succeeds, otherwise 0 + if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then + date +%s + else + echo "0" + fi +} + +# Get Salt-minion version information +get_salt_version() { + local salt_call_path + + # Try to find salt-call command, return empty if missing + if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then + echo "0" + return + fi + + # Extract version number and convert to numeric (e.g., 3006.1 becomes 3006.1) + local version + version=$("$salt_call_path" --version 2>/dev/null | grep -o '[0-9]\+\.[0-9]\+' | head -1) + echo "${version:-0}" +} + +# Get Salt-minion process memory usage in bytes +get_salt_memory_usage() { + local ps_path + + # Find ps command + if ! ps_path=$(find_command ps /bin /usr/bin 2>/dev/null); then + echo "0" + return + fi + + # Get RSS memory usage in KB and convert to bytes + local memory_kb + memory_kb=$("$ps_path" -eo comm,rss | grep -E '^salt-minion' | awk '{sum+=$2} END {print sum+0}' 2>/dev/null) + [[ -z "$memory_kb" ]] && memory_kb=0 + echo "$((memory_kb * 1024))" +} + +# Count recent errors in salt-minion log +count_salt_errors() { + local log_file="/var/log/salt/minion" + + # Return 0 if log file doesn't exist or isn't readable + [[ ! -r "$log_file" ]] && { echo "0"; return; } + + # Count ERROR lines from last 24 hours + local error_count + error_count=$(grep -c "\[ERROR\]" "$log_file" 2>/dev/null) + echo "${error_count:-0}" +} + +# Output a Prometheus metric in the correct format +# Parameters: metric_name, value, help_text, metric_type +output_metric() { + local name="$1" value="$2" help="$3" type="$4" + + # Output in Prometheus exposition format + cat << EOF +# HELP $name $help +# TYPE $name $type +$name $value +EOF +} + +# Main function that orchestrates the metric collection process +main() { + # Skip setup steps in dry-run mode + if [[ "$DRY_RUN" == "false" ]]; then + # Set up file locking to prevent concurrent execution + setup_lock + + # Ensure output directory exists and is writable + setup_directories + + # Install cron job for periodic execution (only if script file exists and not disabled) + if [[ -f "$SCRIPT_PATH" && "$NO_CRON" == "false" ]]; then + install_cron_job + elif [[ "$NO_CRON" == "true" ]]; then + log_info "Skipping cron job installation (--no-cron specified)" + fi + else + echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/salt_status.prom ===" >&2 + fi + + # Collect Salt status metrics + local connection_status ping_status service_status last_comm version memory_usage error_count + connection_status=$(check_salt_connection) + ping_status=$(check_salt_ping) + service_status=$(check_salt_service) + last_comm=$(check_salt_last_communication) + version=$(get_salt_version) + memory_usage=$(get_salt_memory_usage) + error_count=$(count_salt_errors) + + # Output metrics in Prometheus format + output_metric "minion_connection_status" "$connection_status" \ + "Shows if Salt-Minion is connected to Salt-Master." "gauge" + + output_metric "minion_ping_status" "$ping_status" \ + "Shows if Salt-Minion is able to ping Salt-Master." "gauge" + + output_metric "minion_service_status" "$service_status" \ + "Shows if Salt-Minion service is active." "gauge" + + output_metric "minion_last_communication_timestamp" "$last_comm" \ + "Timestamp of last successful communication with Salt-Master." "gauge" + + output_metric "minion_version" "$version" \ + "Salt-Minion version number." "gauge" + + output_metric "minion_memory_usage_bytes" "$memory_usage" \ + "Salt-Minion process memory usage in bytes." "gauge" + + output_metric "minion_error_count" "$error_count" \ + "Number of error entries in Salt-Minion log file." "counter" + + if [[ "$DRY_RUN" == "true" ]]; then + echo "=== END DRY RUN OUTPUT ===" >&2 + fi +} + +# Execute main function with all script arguments +main "$@" diff --git a/setup-iperf3-server.sh b/setup-iperf3-server.sh new file mode 100644 index 0000000..8c0bbd4 --- /dev/null +++ b/setup-iperf3-server.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +############################################################# +#### iperf3 Server Setup #### +#### Install and configure iperf3 as a systemd service #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 1.0 #### +#### #### +#### Usage: sudo ./setup-iperf3-server.sh [OPTIONS] #### +############################################################# + +set -euo pipefail + +# Default configuration +LISTEN_PORT=9182 +HARDENED=false +UNINSTALL=false + +SERVICE_NAME="iperf3-server" +SERVICE_FILE="/etc/systemd/system/${SERVICE_NAME}.service" + +show_help() { + cat </dev/null 2>&1; then + echo "iperf3 is already installed." + return + fi + + echo "Installing iperf3..." + if command -v apt-get >/dev/null 2>&1; then + apt-get update && apt-get install -y iperf3 + elif command -v dnf >/dev/null 2>&1; then + dnf install -y iperf3 + elif command -v yum >/dev/null 2>&1; then + yum install -y iperf3 + else + echo "ERROR: Cannot install iperf3 automatically. Please install manually." + exit 1 + fi +} + +install_service() { + echo "Installing systemd service..." + + if [[ "$HARDENED" == true ]]; then + echo "Using hardened service configuration (private networks only)." + cat > "$SERVICE_FILE" < "$SERVICE_FILE" </dev/null | awk '{print $1}') -p ${LISTEN_PORT} -t 10" + echo "" + echo "To customize settings, edit:" + echo " ${SERVICE_FILE}" + echo "Then run: sudo systemctl daemon-reload && sudo systemctl restart ${SERVICE_NAME}" +} + +uninstall_service() { + echo "Removing iperf3 server service..." + systemctl stop "${SERVICE_NAME}" 2>/dev/null || true + systemctl disable "${SERVICE_NAME}" 2>/dev/null || true + rm -f "$SERVICE_FILE" + systemctl daemon-reload + echo "iperf3 server service removed." +} + +# --- Main execution --- + +parse_args "$@" + +if [[ "$UNINSTALL" == true ]]; then + uninstall_service +else + echo "Setting up iperf3 server service on port ${LISTEN_PORT}..." + install_iperf3 + install_service +fi diff --git a/speedtest-metrics.sh b/speedtest-metrics.sh new file mode 100755 index 0000000..6b3fae0 --- /dev/null +++ b/speedtest-metrics.sh @@ -0,0 +1,637 @@ +#!/bin/bash + +############################################################# +#### Speedtest Metrics Exporter #### +#### Internet & LAN speed metrics for Prometheus #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 2.1 #### +#### #### +#### Usage: ./speedtest-metrics.sh [OPTIONS] #### +############################################################# + +set -euo pipefail + +######################### +### Output Mode ### +######################### + +LISTEN_PORT="${SPEEDTEST_EXPORTER_PORT:-9196}" +TEXTFILE_DIR="/var/lib/node_exporter" +OUTPUT_FILE="" +HTTP_MODE=false + +######################### +### Parse Arguments ### +######################### + +show_help() { + cat <&2 + show_help >&2 + exit 1 + ;; + esac + done +} + +parse_args "$@" + +######################### +### Metrics Collection ### +######################### + +collect_metrics() { + +# Configuration +TEMP_FILE="/tmp/speedtest_$$" +IPERF_SERVER="${IPERF_SERVER:-192.168.1.100}" # Set to your local iperf3 server IP +IPERF_PORT="${IPERF_PORT:-9182}" # iperf3 port +# Multiple speedtest servers - add/remove server IDs as needed +# Common server IDs for major cities: +# Dallas/DFW: 5029 (AT&T), 12190 (Spectrum), 26847 (Verizon) +# New York: 3737 (Verizon), 11570 (Optimum), 17395 (Spectrum) +SPEEDTEST_SERVERS="${SPEEDTEST_SERVERS:-auto}" # Comma-separated server IDs or "auto" + +cleanup() { + rm -f "$TEMP_FILE" +} +trap cleanup EXIT + +# Record script start time +SCRIPT_START_TIME=$(date +%s.%N) + +# Internet Speed Test - Multiple Servers +echo "# Running internet speedtest on multiple servers..." >&2 + +# Initialize arrays to store results for all servers +declare -a SERVER_IDS=() +declare -a PING_LATENCIES=() +declare -a PING_JITTERS=() +declare -a PING_LOWS=() +declare -a PING_HIGHS=() +declare -a DOWNLOAD_MBPS=() +declare -a UPLOAD_MBPS=() +declare -a PACKET_LOSSES=() +declare -a EXTERNAL_IPS=() +declare -a TEST_TIMESTAMPS=() +declare -a SERVER_NAMES=() +declare -a SERVER_LOCATIONS=() +declare -a SERVER_COUNTRIES=() +declare -a ISPS=() +declare -a RESULT_URLS=() +declare -a DOWNLOAD_SIZES=() +declare -a UPLOAD_SIZES=() +declare -a SUCCESSES=() + +# Convert comma-separated servers to array +IFS=',' read -ra SERVERS <<< "$SPEEDTEST_SERVERS" + +# Test each server +for server_id in "${SERVERS[@]}"; do + server_id=$(echo "$server_id" | xargs) # Trim whitespace + echo "# Testing server $server_id..." >&2 + + TEMP_SERVER_FILE="/tmp/speedtest_${server_id}_$$" + + # Handle auto server selection vs specific server ID + if [[ "$server_id" == "auto" ]]; then + speedtest_cmd="speedtest --format=json" + else + speedtest_cmd="speedtest -s $server_id --format=json" + fi + + if $speedtest_cmd --accept-license --accept-gdpr > "$TEMP_SERVER_FILE" 2>/dev/null; then + echo "# Server $server_id: SUCCESS" >&2 + + # Parse results for this server + ping_latency=$(jq -r '.ping.latency // "0"' "$TEMP_SERVER_FILE") + ping_jitter=$(jq -r '.ping.jitter // "0"' "$TEMP_SERVER_FILE") + ping_low=$(jq -r '.ping.low // "0"' "$TEMP_SERVER_FILE") + ping_high=$(jq -r '.ping.high // "0"' "$TEMP_SERVER_FILE") + download_bandwidth=$(jq -r '.download.bandwidth // "0"' "$TEMP_SERVER_FILE") + upload_bandwidth=$(jq -r '.upload.bandwidth // "0"' "$TEMP_SERVER_FILE") + packet_loss=$(jq -r '.packetLoss // "0"' "$TEMP_SERVER_FILE") + external_ip=$(jq -r '.interface.externalIp // "unknown"' "$TEMP_SERVER_FILE") + + # Handle timestamp conversion + test_timestamp_raw=$(jq -r '.timestamp // "0"' "$TEMP_SERVER_FILE") + if [[ "$test_timestamp_raw" != "0" ]] && [[ "$test_timestamp_raw" != "unknown" ]]; then + test_timestamp=$(date -d "$test_timestamp_raw" +%s 2>/dev/null || echo "0") + else + test_timestamp=0 + fi + + server_name=$(jq -r '.server.name // "unknown"' "$TEMP_SERVER_FILE") + server_location=$(jq -r '.server.location // "unknown"' "$TEMP_SERVER_FILE") + server_country=$(jq -r '.server.country // "unknown"' "$TEMP_SERVER_FILE") + isp=$(jq -r '.isp // "unknown"' "$TEMP_SERVER_FILE") + result_url=$(jq -r '.result.url // "unknown"' "$TEMP_SERVER_FILE") + download_size=$(jq -r '.download.bytes // "0"' "$TEMP_SERVER_FILE") + upload_size=$(jq -r '.upload.bytes // "0"' "$TEMP_SERVER_FILE") + + # Convert from bits to Mbps (fallback to awk if bc unavailable) + download_mbps=$(echo "scale=2; $download_bandwidth / 125000" | bc -l 2>/dev/null || echo "$download_bandwidth" | awk '{printf "%.2f", $1/125000}') + upload_mbps=$(echo "scale=2; $upload_bandwidth / 125000" | bc -l 2>/dev/null || echo "$upload_bandwidth" | awk '{printf "%.2f", $1/125000}') + + success=1 + else + echo "# Server $server_id: FAILED" >&2 + + # Set default values for failed test + ping_latency=0; ping_jitter=0; ping_low=0; ping_high=0 + download_mbps=0; upload_mbps=0; packet_loss=0 + external_ip="unknown"; test_timestamp=0; server_name="unknown" + server_location="unknown"; server_country="unknown"; isp="unknown" + result_url="unknown"; download_size=0; upload_size=0 + success=0 + fi + + # Store results in arrays + SERVER_IDS+=("$server_id") + PING_LATENCIES+=("$ping_latency") + PING_JITTERS+=("$ping_jitter") + PING_LOWS+=("$ping_low") + PING_HIGHS+=("$ping_high") + DOWNLOAD_MBPS+=("$download_mbps") + UPLOAD_MBPS+=("$upload_mbps") + PACKET_LOSSES+=("$packet_loss") + EXTERNAL_IPS+=("$external_ip") + TEST_TIMESTAMPS+=("$test_timestamp") + SERVER_NAMES+=("$server_name") + SERVER_LOCATIONS+=("$server_location") + SERVER_COUNTRIES+=("$server_country") + ISPS+=("$isp") + RESULT_URLS+=("$result_url") + DOWNLOAD_SIZES+=("$download_size") + UPLOAD_SIZES+=("$upload_size") + SUCCESSES+=("$success") + + # Cleanup temp file + rm -f "$TEMP_SERVER_FILE" +done + +# Local Network Speed Test (iperf3) - Enhanced with additional metrics +echo "# Testing local network speed..." >&2 +if command -v iperf3 >/dev/null 2>&1; then + # Test download from local server (we are client) + if local_down=$(timeout 10 iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 5 -J 2>/dev/null); then + local_download_mbps=$(echo "$local_down" | jq -r '.end.sum_received.bits_per_second // "0"' | awk '{printf "%.2f", $1/1000000}') + local_download_bytes=$(echo "$local_down" | jq -r '.end.sum_received.bytes // "0"') + local_download_retransmits=$(echo "$local_down" | jq -r '.end.sum_sent.retransmits // "0"') + local_download_rtt=$(echo "$local_down" | jq -r '.end.streams[0].sender.mean_rtt // "0"' | awk '{printf "%.3f", $1/1000}') # Convert to ms + local_download_rtt_var=$(echo "$local_down" | jq -r '.end.streams[0].sender.rtt_variance // "0"' | awk '{printf "%.3f", $1/1000}') + local_download_cpu_local=$(echo "$local_down" | jq -r '.end.cpu_utilization_percent.host_total // "0"') + local_download_cpu_remote=$(echo "$local_down" | jq -r '.end.cpu_utilization_percent.remote_total // "0"') + local_download_congestion_window=$(echo "$local_down" | jq -r '.end.streams[0].sender.max_snd_cwnd // "0"') + local_download_success=1 + else + local_download_mbps=0; local_download_bytes=0; local_download_retransmits=0 + local_download_rtt=0; local_download_rtt_var=0; local_download_cpu_local=0 + local_download_cpu_remote=0; local_download_congestion_window=0; local_download_success=0 + fi + + # Test upload to local server (we are client, reverse mode) + if local_up=$(timeout 10 iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 5 -R -J 2>/dev/null); then + local_upload_mbps=$(echo "$local_up" | jq -r '.end.sum_sent.bits_per_second // "0"' | awk '{printf "%.2f", $1/1000000}') + local_upload_bytes=$(echo "$local_up" | jq -r '.end.sum_sent.bytes // "0"') + local_upload_retransmits=$(echo "$local_up" | jq -r '.end.sum_received.retransmits // "0"') + local_upload_rtt=$(echo "$local_up" | jq -r '.end.streams[0].receiver.mean_rtt // "0"' | awk '{printf "%.3f", $1/1000}') + local_upload_rtt_var=$(echo "$local_up" | jq -r '.end.streams[0].receiver.rtt_variance // "0"' | awk '{printf "%.3f", $1/1000}') + local_upload_cpu_local=$(echo "$local_up" | jq -r '.end.cpu_utilization_percent.host_total // "0"') + local_upload_cpu_remote=$(echo "$local_up" | jq -r '.end.cpu_utilization_percent.remote_total // "0"') + local_upload_congestion_window=$(echo "$local_up" | jq -r '.end.streams[0].receiver.max_snd_cwnd // "0"') + local_upload_success=1 + else + local_upload_mbps=0; local_upload_bytes=0; local_upload_retransmits=0 + local_upload_rtt=0; local_upload_rtt_var=0; local_upload_cpu_local=0 + local_upload_cpu_remote=0; local_upload_congestion_window=0; local_upload_success=0 + fi +else + echo "# iperf3 not installed, skipping local network test" >&2 + local_download_mbps=0; local_upload_mbps=0; local_download_bytes=0; local_upload_bytes=0 + local_download_retransmits=0; local_upload_retransmits=0; local_download_rtt=0; local_upload_rtt=0 + local_download_rtt_var=0; local_upload_rtt_var=0; local_download_cpu_local=0; local_upload_cpu_local=0 + local_download_cpu_remote=0; local_upload_cpu_remote=0; local_download_congestion_window=0; local_upload_congestion_window=0 + local_download_success=0; local_upload_success=0 +fi + +# Calculate script runtime +SCRIPT_END_TIME=$(date +%s.%N) +SCRIPT_RUNTIME=$(echo "$SCRIPT_END_TIME - $SCRIPT_START_TIME" | bc -l 2>/dev/null || echo "$SCRIPT_END_TIME $SCRIPT_START_TIME" | awk '{printf "%.3f", $1-$2}') + +# Output Prometheus metrics +cat < "$tmp_file" + mv "$tmp_file" "$OUTPUT_FILE" + echo "Metrics written to $OUTPUT_FILE" >&2 + else + echo "$metrics" + fi +} + +start_server() { + if ! command -v socat >/dev/null 2>&1; then + echo "socat is required for HTTP mode. Install it first." >&2 + exit 1 + fi + echo "Starting Speedtest Metrics Exporter on port $LISTEN_PORT" >&2 + echo "Metrics available at http://localhost:$LISTEN_PORT/metrics" >&2 + while true; do + socat TCP-LISTEN:"$LISTEN_PORT",reuseaddr,fork EXEC:"$0 --handle-request" 2>/dev/null || { + echo "Server error, restarting in 5 seconds..." >&2 + sleep 5 + } + done +} + +# Main execution +if [[ "$HTTP_MODE" == true ]]; then + start_server +elif [[ -n "$OUTPUT_FILE" ]]; then + write_output +else + collect_metrics +fi diff --git a/ssl-cert-deploy.sh b/ssl-cert-deploy.sh new file mode 100644 index 0000000..8bd2e35 --- /dev/null +++ b/ssl-cert-deploy.sh @@ -0,0 +1,682 @@ +#!/bin/bash + +################################################ +#### SSL Certificate Deployer #### +#### Deploy certs to multiple services #### +#### #### +#### Author: Phil Connor #### +#### License: MIT #### +#### Contact: contact@mylinux.work #### +#### Version: 1.00-030326 #### +################################################ + +set -o pipefail + +SCRIPT_NAME=$(basename "$0") +readonly SCRIPT_NAME + +# Runtime variables +CERT_FILE="" +KEY_FILE="" +CA_FILE="" +TARGETS="" +DRY_RUN=false +BACKUP=false +DEBUG=${DEBUG:-} + +handle_error() { + local exit_code=$1 + local line_number=$2 + echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2 + exit "$exit_code" +} + +trap 'handle_error $? $LINENO' ERR + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +info() { + echo "[INFO] $*" +} + +warn() { + echo "[WARN] $*" >&2 +} + +error() { + echo "[ERROR] $*" >&2 +} + +show_help() { + cat << EOF +Usage: $SCRIPT_NAME [OPTIONS] + +Deploy SSL certificates to multiple service targets in a single run. + +OPTIONS: + --cert FILE Path to the SSL certificate file (required) + --key FILE Path to the SSL private key file (required) + --ca FILE Path to the CA bundle file (optional) + --targets LIST Comma-separated list of targets (required) + --dry-run Show what would be done without making changes + --backup Backup existing certificates before overwriting + --help, -h Show this help message + +SUPPORTED TARGETS: + nginx Copy cert+key to /etc/nginx/ssl/, reload nginx + apache Copy cert+key to /etc/httpd/ssl/ or /etc/apache2/ssl/, reload + postfix Update TLS cert/key in main.cf, reload postfix + dovecot Update ssl_cert/ssl_key in dovecot config, reload dovecot + artifactory Import cert into Artifactory Java keystore, restart + bitbucket Import cert into Bitbucket Java keystore, restart + jira Import cert into Jira Java keystore, restart + haproxy Concatenate cert+key into PEM at /etc/haproxy/certs/, reload + system Update system CA trust store + +ENVIRONMENT VARIABLES: + DEBUG Enable debug output when set + +EXAMPLES: + $SCRIPT_NAME --cert server.crt --key server.key --targets nginx,haproxy + $SCRIPT_NAME --cert server.crt --key server.key --ca ca-bundle.crt --targets apache,postfix,dovecot + $SCRIPT_NAME --cert server.crt --key server.key --targets artifactory,bitbucket,jira --backup + $SCRIPT_NAME --cert server.crt --key server.key --targets system --dry-run + DEBUG=1 $SCRIPT_NAME --cert server.crt --key server.key --targets nginx +EOF +} + +validate_cert_key_match() { + local cert="$1" + local key="$2" + + local cert_modulus + cert_modulus=$(openssl x509 -noout -modulus -in "$cert" 2>/dev/null | openssl md5) + local key_modulus + key_modulus=$(openssl rsa -noout -modulus -in "$key" 2>/dev/null | openssl md5) + + if [[ "$cert_modulus" != "$key_modulus" ]]; then + error "Certificate and key do not match (modulus mismatch)" + debug_echo "Cert modulus: $cert_modulus" + debug_echo "Key modulus: $key_modulus" + return 1 + fi + + debug_echo "Certificate and key match" + return 0 +} + +backup_file() { + local file="$1" + if [[ -f "$file" ]]; then + local backup_name + backup_name="${file}.bak.$(date +%Y%m%d%H%M%S)" + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would backup $file -> $backup_name" + else + cp -a "$file" "$backup_name" + info "Backed up $file -> $backup_name" + fi + fi +} + +copy_file() { + local src="$1" + local dest="$2" + + if [[ "$BACKUP" == true ]]; then + backup_file "$dest" + fi + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would copy $src -> $dest" + else + cp -a "$src" "$dest" + chmod 600 "$dest" + info "Copied $src -> $dest" + fi +} + +reload_service() { + local service="$1" + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would reload $service" + else + if systemctl is-active --quiet "$service" 2>/dev/null; then + systemctl reload "$service" + info "Reloaded $service" + else + warn "Service $service is not active, skipping reload" + fi + fi +} + +restart_service() { + local service="$1" + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would restart $service" + else + systemctl restart "$service" + info "Restarted $service" + fi +} + +get_keystore_password() { + local password_url="$1" + local storepass="" + + # Try Vault HTTP API first if URL provided + if [[ -n "$password_url" ]]; then + debug_echo "Retrieving keystore password from $password_url" + storepass=$(curl -sf -X GET "$password_url" 2>/dev/null | jq -r '.data.password // empty' 2>/dev/null || true) + fi + + # Fall back to Vault CLI + if [[ -z "$storepass" ]]; then + debug_echo "Falling back to Vault CLI for keystore password" + storepass=$(vault kv get -field=password secret/keystore 2>/dev/null || true) + fi + + # Fall back to default + if [[ -z "$storepass" ]]; then + debug_echo "Using default keystore password" + storepass="changeit" + fi + + echo "$storepass" +} + +find_java_keystore() { + local -n java_bin_ref=$1 + local -n keystore_ref=$2 + + # Common Java installation paths + local java_paths=( + "/opt/jfrog/artifactory/app/third-party/java" + "/mnt/ebs/bitbucket/*/jre" + "/mnt/ebs/jira/jre" + "/usr/lib/jvm/java-*-openjdk" + "/usr/lib/jvm/default-java" + "/opt/java" + "/usr/java/latest" + ) + + # Check JAVA_HOME first + if [[ -n "${JAVA_HOME:-}" && -x "$JAVA_HOME/bin/keytool" ]]; then + java_bin_ref="$JAVA_HOME/bin" + keystore_ref="$JAVA_HOME/lib/security/cacerts" + if [[ -f "$keystore_ref" ]]; then + debug_echo "Found Java via JAVA_HOME: $java_bin_ref" + return 0 + fi + fi + + # Search common paths with glob expansion + for path_pattern in "${java_paths[@]}"; do + for java_dir in $path_pattern; do + if [[ -d "$java_dir" ]]; then + local bin_dir="$java_dir/bin" + local cacerts="$java_dir/lib/security/cacerts" + + if [[ -x "$bin_dir/keytool" && -f "$cacerts" ]]; then + java_bin_ref="$bin_dir" + keystore_ref="$cacerts" + debug_echo "Found Java at: $java_dir" + return 0 + fi + fi + done + done + + # Fallback: try system keytool + if command -v keytool >/dev/null 2>&1; then + java_bin_ref="$(dirname "$(command -v keytool)")" + # Try common system keystore locations + local system_keystores=( + "/etc/ssl/certs/java/cacerts" + "/usr/lib/jvm/default-java/lib/security/cacerts" + "/etc/pki/ca-trust/extracted/java/cacerts" + ) + for ks in "${system_keystores[@]}"; do + if [[ -f "$ks" ]]; then + keystore_ref="$ks" + debug_echo "Found system Java at: $java_bin_ref" + return 0 + fi + done + fi + + return 1 +} + +deploy_java_keystore() { + local keystore="$1" + local java_bin="$2" + local alias_name="$3" + local vault_url="$4" + local service_name="$5" + + local storepass + storepass=$(get_keystore_password "$vault_url") + + if [[ "$BACKUP" == true ]]; then + backup_file "$keystore" + fi + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would delete alias '$alias_name' from keystore $keystore" + info "[DRY RUN] Would import $CERT_FILE into keystore $keystore" + info "[DRY RUN] Would restart $service_name" + else + "$java_bin/keytool" -delete -alias "$alias_name" -keystore "$keystore" -storepass "$storepass" 2>/dev/null || true + "$java_bin/keytool" -import -noprompt -alias "$alias_name" -keystore "$keystore" -file "$CERT_FILE" -storepass "$storepass" + info "Imported certificate into $keystore" + restart_service "$service_name" + fi +} + +# ---- Target handlers ---- + +deploy_nginx() { + info "Deploying to nginx..." + local ssl_dir="/etc/nginx/ssl" + + if [[ "$DRY_RUN" != true ]]; then + mkdir -p "$ssl_dir" + fi + + copy_file "$CERT_FILE" "$ssl_dir/server.crt" + copy_file "$KEY_FILE" "$ssl_dir/server.key" + + if [[ -n "$CA_FILE" ]]; then + copy_file "$CA_FILE" "$ssl_dir/ca-bundle.crt" + fi + + reload_service nginx +} + +deploy_apache() { + info "Deploying to apache..." + local ssl_dir="" + + if [[ -d "/etc/httpd" ]]; then + ssl_dir="/etc/httpd/ssl" + elif [[ -d "/etc/apache2" ]]; then + ssl_dir="/etc/apache2/ssl" + else + error "Could not detect Apache configuration directory" + return 1 + fi + + if [[ "$DRY_RUN" != true ]]; then + mkdir -p "$ssl_dir" + fi + + copy_file "$CERT_FILE" "$ssl_dir/server.crt" + copy_file "$KEY_FILE" "$ssl_dir/server.key" + + if [[ -n "$CA_FILE" ]]; then + copy_file "$CA_FILE" "$ssl_dir/ca-bundle.crt" + fi + + # Detect and reload the correct service + if systemctl list-units --type=service --all 2>/dev/null | grep -q "httpd.service"; then + reload_service httpd + elif systemctl list-units --type=service --all 2>/dev/null | grep -q "apache2.service"; then + reload_service apache2 + else + warn "Could not detect Apache service name" + fi +} + +deploy_postfix() { + info "Deploying to postfix..." + local main_cf="/etc/postfix/main.cf" + + if [[ ! -f "$main_cf" ]]; then + error "Postfix main.cf not found at $main_cf" + return 1 + fi + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would update smtpd_tls_cert_file in $main_cf to $CERT_FILE" + info "[DRY RUN] Would update smtpd_tls_key_file in $main_cf to $KEY_FILE" + info "[DRY RUN] Would reload postfix" + else + if [[ "$BACKUP" == true ]]; then + backup_file "$main_cf" + fi + + if grep -q "^smtpd_tls_cert_file" "$main_cf"; then + sed -i "s|^smtpd_tls_cert_file.*|smtpd_tls_cert_file = $CERT_FILE|" "$main_cf" + else + echo "smtpd_tls_cert_file = $CERT_FILE" >> "$main_cf" + fi + + if grep -q "^smtpd_tls_key_file" "$main_cf"; then + sed -i "s|^smtpd_tls_key_file.*|smtpd_tls_key_file = $KEY_FILE|" "$main_cf" + else + echo "smtpd_tls_key_file = $KEY_FILE" >> "$main_cf" + fi + + info "Updated $main_cf with certificate paths" + reload_service postfix + fi +} + +deploy_dovecot() { + info "Deploying to dovecot..." + local dovecot_conf="" + + if [[ -f "/etc/dovecot/conf.d/10-ssl.conf" ]]; then + dovecot_conf="/etc/dovecot/conf.d/10-ssl.conf" + elif [[ -f "/etc/dovecot/dovecot.conf" ]]; then + dovecot_conf="/etc/dovecot/dovecot.conf" + else + error "Could not find dovecot configuration" + return 1 + fi + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would update ssl_cert in $dovecot_conf to <$CERT_FILE" + info "[DRY RUN] Would update ssl_key in $dovecot_conf to <$KEY_FILE" + info "[DRY RUN] Would reload dovecot" + else + if [[ "$BACKUP" == true ]]; then + backup_file "$dovecot_conf" + fi + + if grep -q "^ssl_cert" "$dovecot_conf"; then + sed -i "s|^ssl_cert.*|ssl_cert = <$CERT_FILE|" "$dovecot_conf" + else + echo "ssl_cert = <$CERT_FILE" >> "$dovecot_conf" + fi + + if grep -q "^ssl_key" "$dovecot_conf"; then + sed -i "s|^ssl_key.*|ssl_key = <$KEY_FILE|" "$dovecot_conf" + else + echo "ssl_key = <$KEY_FILE" >> "$dovecot_conf" + fi + + info "Updated $dovecot_conf with certificate paths" + reload_service dovecot + fi +} + +deploy_artifactory() { + info "Deploying to artifactory..." + local java_bin="/opt/jfrog/artifactory/app/third-party/java/bin" + local keystore="/opt/jfrog/artifactory/app/third-party/java/lib/security/cacerts" + + if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then + debug_echo "Artifactory default paths not found, searching for Java" + if ! find_java_keystore java_bin keystore; then + error "Could not find Java keytool or keystore for Artifactory" + return 1 + fi + fi + + deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "artifactory" +} + +deploy_bitbucket() { + info "Deploying to bitbucket..." + local java_bin="" + local keystore="" + + # Check app-specific paths first with glob + for bb_dir in /mnt/ebs/bitbucket/*/jre; do + if [[ -d "$bb_dir" && -x "$bb_dir/bin/keytool" && -f "$bb_dir/lib/security/cacerts" ]]; then + java_bin="$bb_dir/bin" + keystore="$bb_dir/lib/security/cacerts" + break + fi + done + + if [[ -z "$java_bin" || -z "$keystore" ]]; then + debug_echo "Bitbucket default paths not found, searching for Java" + if ! find_java_keystore java_bin keystore; then + error "Could not find Java keytool or keystore for Bitbucket" + return 1 + fi + fi + + deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "atlbitbucket" +} + +deploy_jira() { + info "Deploying to jira..." + local java_bin="/mnt/ebs/jira/jre/bin" + local keystore="/mnt/ebs/jira/jre/lib/security/cacerts" + + if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then + debug_echo "Jira default paths not found, searching for Java" + if ! find_java_keystore java_bin keystore; then + error "Could not find Java keytool or keystore for Jira" + return 1 + fi + fi + + deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "jira" +} + +deploy_haproxy() { + info "Deploying to haproxy..." + local cert_dir="/etc/haproxy/certs" + local pem_file="$cert_dir/server.pem" + + if [[ "$DRY_RUN" != true ]]; then + mkdir -p "$cert_dir" + fi + + if [[ "$BACKUP" == true ]]; then + backup_file "$pem_file" + fi + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would concatenate $CERT_FILE + $KEY_FILE -> $pem_file" + info "[DRY RUN] Would reload haproxy" + else + cat "$CERT_FILE" "$KEY_FILE" > "$pem_file" + chmod 600 "$pem_file" + info "Created combined PEM at $pem_file" + reload_service haproxy + fi +} + +deploy_system() { + info "Deploying to system CA trust store..." + + if [[ -z "$CA_FILE" && -z "$CERT_FILE" ]]; then + error "No certificate or CA bundle provided for system trust store" + return 1 + fi + + local cert_to_install="${CA_FILE:-$CERT_FILE}" + + if command -v update-ca-trust >/dev/null 2>&1; then + # RHEL/CentOS/Fedora/Rocky/Alma + local trust_dir="/etc/pki/ca-trust/source/anchors" + local cert_name + cert_name=$(basename "$cert_to_install") + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would copy $cert_to_install -> $trust_dir/$cert_name" + info "[DRY RUN] Would run update-ca-trust" + else + copy_file "$cert_to_install" "$trust_dir/$cert_name" + update-ca-trust + info "Updated system CA trust store (RHEL-based)" + fi + elif command -v update-ca-certificates >/dev/null 2>&1; then + # Debian/Ubuntu + local trust_dir="/usr/local/share/ca-certificates" + local cert_name + cert_name=$(basename "$cert_to_install") + # Debian requires .crt extension + cert_name="${cert_name%.*}.crt" + + if [[ "$DRY_RUN" == true ]]; then + info "[DRY RUN] Would copy $cert_to_install -> $trust_dir/$cert_name" + info "[DRY RUN] Would run update-ca-certificates" + else + copy_file "$cert_to_install" "$trust_dir/$cert_name" + update-ca-certificates + info "Updated system CA trust store (Debian-based)" + fi + else + error "Could not find update-ca-trust or update-ca-certificates" + return 1 + fi +} + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + --cert) + CERT_FILE="$2" + shift 2 + ;; + --key) + KEY_FILE="$2" + shift 2 + ;; + --ca) + CA_FILE="$2" + shift 2 + ;; + --targets) + TARGETS="$2" + shift 2 + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --backup) + BACKUP=true + shift + ;; + --help|-h) + show_help + exit 0 + ;; + *) + error "Unknown option: $1" + show_help >&2 + exit 1 + ;; + esac + done +} + +validate_inputs() { + if [[ -z "$CERT_FILE" ]]; then + error "Certificate file is required (--cert)" + exit 1 + fi + + if [[ -z "$KEY_FILE" ]]; then + error "Key file is required (--key)" + exit 1 + fi + + if [[ -z "$TARGETS" ]]; then + error "At least one target is required (--targets)" + exit 1 + fi + + if [[ ! -f "$CERT_FILE" ]]; then + error "Certificate file not found: $CERT_FILE" + exit 1 + fi + + if [[ ! -f "$KEY_FILE" ]]; then + error "Key file not found: $KEY_FILE" + exit 1 + fi + + if [[ -n "$CA_FILE" && ! -f "$CA_FILE" ]]; then + error "CA bundle file not found: $CA_FILE" + exit 1 + fi + + if ! openssl x509 -noout -text -in "$CERT_FILE" >/dev/null 2>&1; then + error "Invalid certificate file: $CERT_FILE" + exit 1 + fi + + if ! openssl rsa -noout -check -in "$KEY_FILE" >/dev/null 2>&1; then + error "Invalid key file: $KEY_FILE" + exit 1 + fi + + if ! validate_cert_key_match "$CERT_FILE" "$KEY_FILE"; then + exit 1 + fi +} + +deploy_target() { + local target="$1" + + case "$target" in + nginx) deploy_nginx ;; + apache) deploy_apache ;; + postfix) deploy_postfix ;; + dovecot) deploy_dovecot ;; + artifactory) deploy_artifactory ;; + bitbucket) deploy_bitbucket ;; + jira) deploy_jira ;; + haproxy) deploy_haproxy ;; + system) deploy_system ;; + *) + error "Unknown target: $target" + error "Valid targets: nginx, apache, postfix, dovecot, artifactory, bitbucket, jira, haproxy, system" + return 1 + ;; + esac +} + +main() { + parse_arguments "$@" + validate_inputs + + if [[ "$DRY_RUN" == true ]]; then + info "Running in DRY RUN mode — no changes will be made" + fi + + local failed=0 + local succeeded=0 + + IFS=',' read -ra target_list <<< "$TARGETS" + for target in "${target_list[@]}"; do + # Trim whitespace + target=$(echo "$target" | tr -d '[:space:]') + info "--- Deploying to target: $target ---" + + if deploy_target "$target"; then + ((succeeded++)) + info "Target $target: OK" + else + ((failed++)) + error "Target $target: FAILED" + fi + echo + done + + info "Deployment complete: $succeeded succeeded, $failed failed" + + if [[ $failed -gt 0 ]]; then + return 1 + fi +} + +# Execute main function if script is run directly +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/systemd-service-exporter.sh b/systemd-service-exporter.sh new file mode 100644 index 0000000..be59ea3 --- /dev/null +++ b/systemd-service-exporter.sh @@ -0,0 +1,347 @@ +#!/bin/bash +################################################################################ +# Script Name: systemd-service-exporter.sh +# Version: 1.0 +# Description: Prometheus textfile collector exporter for systemd service status +# Monitors service state, uptime, restart count, and enabled status +# +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +# Date: 2026-03-03 +# +# Prerequisites: +# - systemctl command available (systemd) +# - node_exporter with textfile collector enabled +# - /var/lib/node_exporter directory exists +# +# Usage: +# # Configure services via environment variable +# SERVICE_LIST="nginx,sshd,cron" ./systemd-service-exporter.sh +# +# # Configure services via config file +# echo -e "nginx\nsshd\ncron" > /etc/systemd-service-exporter.conf +# ./systemd-service-exporter.sh +# +# # Debug mode +# DEBUG=1 SERVICE_LIST="nginx" ./systemd-service-exporter.sh +# +# # Dry run (output to stdout) +# ./systemd-service-exporter.sh --dry-run +# +# Metrics Exported: +# - linux_systemd_service_state{service,state} - Service state (1=current, 0=other) +# - linux_systemd_service_uptime_seconds{service} - Seconds since service became active +# - linux_systemd_service_restarts_total{service} - Number of times the service restarted +# - linux_systemd_service_enabled{service} - Whether the service is enabled (1/0) +# +# Configuration: +# Environment: SERVICE_LIST (comma-separated) +# Config file: /etc/systemd-service-exporter.conf (one per line) +# Textfile directory: /var/lib/node_exporter +# +################################################################################ + +set -o pipefail + +# ============================================================================ +# CONFIGURATION +# ============================================================================ + +readonly VERSION="1.0" +readonly SCRIPT_NAME="${0##*/}" +readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}" +readonly OUTPUT_FILE="${TEXTFILE_DIR}/systemd_services.prom" +readonly CONFIG_FILE="${CONFIG_FILE:-/etc/systemd-service-exporter.conf}" +readonly TMP_FILE="${OUTPUT_FILE}.$$" + +# Runtime flags +DRY_RUN=false +DEBUG=${DEBUG:-} + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +debug_echo() { + if [[ -n "$DEBUG" ]]; then + echo "[DEBUG] $*" >&2 + fi +} + +log_error() { + echo "[ERROR] $*" >&2 +} + +cleanup() { + rm -f "$TMP_FILE" +} + +trap cleanup EXIT + +show_help() { + cat </dev/null) || true + echo "${state:-unknown}" +} + +get_service_uptime() { + local service="$1" + local timestamp + timestamp=$(systemctl show "$service" --property=ActiveEnterTimestamp --value 2>/dev/null) || true + + if [[ -z "$timestamp" || "$timestamp" == "" ]]; then + echo "0" + return + fi + + local active_epoch + active_epoch=$(date -d "$timestamp" +%s 2>/dev/null) || true + + if [[ -z "$active_epoch" ]]; then + echo "0" + return + fi + + local now + now=$(date +%s) + local uptime=$((now - active_epoch)) + + if [[ $uptime -lt 0 ]]; then + echo "0" + else + echo "$uptime" + fi +} + +get_restart_count() { + local service="$1" + local count + count=$(systemctl show "$service" --property=NRestarts --value 2>/dev/null) || true + echo "${count:-0}" +} + +get_enabled_status() { + local service="$1" + local status + status=$(systemctl is-enabled "$service" 2>/dev/null) || true + + if [[ "$status" == "enabled" ]]; then + echo "1" + else + echo "0" + fi +} + +state_to_value() { + local current_state="$1" + local check_state="$2" + + if [[ "$current_state" == "$check_state" ]]; then + echo "1" + else + echo "0" + fi +} + +collect_metrics() { + local services=() + while IFS= read -r svc; do + services+=("$svc") + done < <(load_services) + + local output="" + + # Header comments + output+="# HELP linux_systemd_service_state Current state of the systemd service\n" + output+="# TYPE linux_systemd_service_state gauge\n" + + for service in "${services[@]}"; do + local state + state=$(get_service_state "$service") + debug_echo "Service $service: state=$state" + + for s in active inactive failed; do + local val + val=$(state_to_value "$state" "$s") + output+="linux_systemd_service_state{service=\"${service}\",state=\"${s}\"} ${val}\n" + done + done + + output+="# HELP linux_systemd_service_uptime_seconds Time in seconds since the service became active\n" + output+="# TYPE linux_systemd_service_uptime_seconds gauge\n" + + for service in "${services[@]}"; do + local uptime + uptime=$(get_service_uptime "$service") + debug_echo "Service $service: uptime=${uptime}s" + output+="linux_systemd_service_uptime_seconds{service=\"${service}\"} ${uptime}\n" + done + + output+="# HELP linux_systemd_service_restarts_total Total number of service restarts\n" + output+="# TYPE linux_systemd_service_restarts_total counter\n" + + for service in "${services[@]}"; do + local restarts + restarts=$(get_restart_count "$service") + debug_echo "Service $service: restarts=$restarts" + output+="linux_systemd_service_restarts_total{service=\"${service}\"} ${restarts}\n" + done + + output+="# HELP linux_systemd_service_enabled Whether the service is enabled to start at boot\n" + output+="# TYPE linux_systemd_service_enabled gauge\n" + + for service in "${services[@]}"; do + local enabled + enabled=$(get_enabled_status "$service") + debug_echo "Service $service: enabled=$enabled" + output+="linux_systemd_service_enabled{service=\"${service}\"} ${enabled}\n" + done + + printf '%b' "$output" +} + +# ============================================================================ +# OUTPUT +# ============================================================================ + +write_metrics() { + local metrics + metrics=$(collect_metrics) + + if [[ "$DRY_RUN" == "true" ]]; then + echo "$metrics" + return + fi + + if [[ ! -d "$TEXTFILE_DIR" ]]; then + log_error "Textfile collector directory does not exist: $TEXTFILE_DIR" + exit 1 + fi + + echo "$metrics" > "$TMP_FILE" + mv "$TMP_FILE" "$OUTPUT_FILE" + debug_echo "Metrics written to $OUTPUT_FILE" +} + +# ============================================================================ +# MAIN +# ============================================================================ + +main() { + while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) + DRY_RUN=true + shift + ;; + --debug) + DEBUG=1 + shift + ;; + --help|-h) + show_help + ;; + --version|-v) + show_version + ;; + *) + log_error "Unknown option: $1" + echo "Use --help for usage information" >&2 + exit 1 + ;; + esac + done + + if ! command -v systemctl &>/dev/null; then + log_error "systemctl not found — this script requires systemd" + exit 1 + fi + + write_metrics +} + +main "$@" diff --git a/ufw-blocklist-metrics.sh b/ufw-blocklist-metrics.sh new file mode 100755 index 0000000..e651ddb --- /dev/null +++ b/ufw-blocklist-metrics.sh @@ -0,0 +1,542 @@ +#!/bin/bash +################################################################################ +# Script Name: ufw-blocklist-metrics.sh +# Version: 2.3 +# Description: Production Prometheus exporter for UFW Blocklists (OPTIMIZED) +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +# +# Optimizations in v2.1: +# - Single journalctl call with cached output +# - Cached feed config parsing +# - Eliminated redundant file operations +# - 4.5 minutes → ~30 seconds typical runtime +# +# Fixes in v2.2: +# - Fixed typo in script name header (bocklist → blocklist) +# - Fixed ipset member counting to use Members: section +# - Fixed empty journal data producing false grep counts +# - Fixed HTTP response headers missing trailing \r\n +# - Fixed SC2155/SC2126/SC2295 shellcheck warnings +# - Added scrape timestamp metric +# - Used SCRIPT_VERSION variable for version strings +# +# Fixes in v2.3: +# - Fixed get_ipset_size using grep -c (exit 1 on 0 matches) causing +# duplicate "0" output lines and arithmetic errors; switched to wc -l +# - Fixed same grep -c || echo 0 bug in ufw_blocklist_enabled and +# ufw_blocklist_total_rules heredoc substitutions +# - Fixed misplaced 2>/dev/null on [ ] test for conntrack and effectiveness +# - Fixed hardcoded v2.1 in usage text; now uses SCRIPT_VERSION +################################################################################ + +CONFIG_DIR="/etc/ufw-threats" +CACHE_DIR="$CONFIG_DIR/cache" +FEEDS_CONFIG="$CONFIG_DIR/feeds.conf" +IPSET_PREFIX="ufw-feed" +WHITELIST_IPSET="ufw-whitelist" +WHITELIST_IPSET_V6="ufw-whitelist-v6" +SCRIPT_VERSION="2.3" + +TEXTFILE_DIR="/var/lib/node_exporter" +OUTPUT_FILE="" +HTTP_MODE=false +HTTP_PORT=9418 +LOCK_FILE="/var/run/ufw-blocklist-metrics.lock" + +# Global cache variables +JOURNAL_1H="" +JOURNAL_24H="" +FEEDS_ARRAY=() + +show_usage() { + cat </dev/null | grep '\[THREAT' || echo "") + JOURNAL_24H=$(timeout 30 journalctl --since "24 hours ago" 2>/dev/null | grep '\[THREAT' || echo "") +} + +# Parse feeds config ONCE into array +cache_feeds_config() { + FEEDS_ARRAY=() + if [ -f "$FEEDS_CONFIG" ]; then + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + FEEDS_ARRAY+=("$enabled|$name|$url|$type|$description") + done < "$FEEDS_CONFIG" + fi +} + +get_ipset_size() { + local ipset_name="$1" + local count + count=$(ipset list "$ipset_name" 2>/dev/null | sed -n '/^Members:$/,$p' | tail -n +2 | wc -l) + echo "${count:-0}" +} + +# Optimized: Use cached journal data +get_feed_blocks() { + local feed="$1" + local period="$2" + local data + + case "$period" in + "1 hour ago") data="$JOURNAL_1H" ;; + "24 hours ago") data="$JOURNAL_24H" ;; + *) echo 0; return ;; + esac + + if [ -z "$data" ]; then echo 0; return; fi + local count + count=$(printf '%s' "$data" | grep -c "\[THREAT:${feed}\]" 2>/dev/null) + echo "${count:-0}" +} + +get_feed_blocks_v6() { + local feed="$1" + local period="$2" + local data + + case "$period" in + "1 hour ago") data="$JOURNAL_1H" ;; + "24 hours ago") data="$JOURNAL_24H" ;; + *) echo 0; return ;; + esac + + if [ -z "$data" ]; then echo 0; return; fi + local count + count=$(printf '%s' "$data" | grep -c "\[THREAT-v6:${feed}\]" 2>/dev/null) + echo "${count:-0}" +} + +get_file_timestamp() { + [ -f "$1" ] && stat -c %Y "$1" 2>/dev/null || echo "0" +} + +get_file_size() { + [ -f "$1" ] && stat -c %s "$1" 2>/dev/null || echo "0" +} + +get_cache_age() { + if [ -f "$1" ]; then + echo $(($(date +%s) - $(stat -c %Y "$1" 2>/dev/null || echo 0))) + else + echo "0" + fi +} + +get_conntrack_count() { + if [ -f /proc/sys/net/netfilter/nf_conntrack_count ]; then + cat /proc/sys/net/netfilter/nf_conntrack_count + else + echo "0" + fi +} + +get_conntrack_max() { + if [ -f /proc/sys/net/netfilter/nf_conntrack_max ]; then + cat /proc/sys/net/netfilter/nf_conntrack_max + else + echo "0" + fi +} + +get_ipset_memory() { + local ipset_name="$1" + local mem + mem=$(ipset list "$ipset_name" -t 2>/dev/null | grep "Size in memory:" | awk '{print $4}') + echo "${mem:-0}" +} + +get_cache_disk_usage() { + if [ -d "$CACHE_DIR" ]; then + df -B1 "$CACHE_DIR" 2>/dev/null | tail -1 | awk '{print $3"|"$4"|"$5}' + else + echo "0|0|0%" + fi +} + +get_total_cache_size() { + if [ -d "$CACHE_DIR" ]; then + du -sb "$CACHE_DIR" 2>/dev/null | awk '{print $1}' + else + echo "0" + fi +} + +acquire_lock() { + if [ -f "$LOCK_FILE" ]; then + local pid + pid=$(cat "$LOCK_FILE" 2>/dev/null) + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + echo "ERROR: Another instance is already running (PID: $pid)" >&2 + exit 1 + else + echo "Removing stale lock file" >&2 + rm -f "$LOCK_FILE" + fi + fi + echo $$ > "$LOCK_FILE" + trap cleanup EXIT INT TERM +} + +cleanup() { + rm -f "$LOCK_FILE" +} + +generate_metrics() { + local start_time + start_time=$(date +%s) + + cat </dev/null | grep "^${IPSET_PREFIX}-"); do + # Extract feed name and IP version + local feed_name="${ipset_name#"${IPSET_PREFIX}"-}" + local ip_version="4" + + if [[ "$feed_name" =~ -v6$ ]]; then + feed_name="${feed_name%-v6}" + ip_version="6" + fi + + # Only show enabled feeds + if ! printf '%s\n' "${FEEDS_ARRAY[@]}" | grep -q "^1|${feed_name}|" 2>/dev/null; then + continue + fi + + local size + size=$(get_ipset_size "$ipset_name") + echo "ufw_blocklist_ipset_size{feed=\"$feed_name\",ip_version=\"$ip_version\",status=\"enabled\"} $size" + done + + cat </dev/null || echo "0") + else + effectiveness="0" + fi + + echo "ufw_blocklist_effectiveness{feed=\"$name\"} $effectiveness" + done + + cat </dev/null || echo "0") + else + conntrack_usage="0" + fi + + # Cache disk metrics + local disk_info cache_size disk_used disk_avail disk_pct + disk_info=$(get_cache_disk_usage) + cache_size=$(get_total_cache_size) + disk_used=$(echo "$disk_info" | cut -d'|' -f1) + disk_avail=$(echo "$disk_info" | cut -d'|' -f2) + disk_pct=$(echo "$disk_info" | cut -d'|' -f3 | tr -d '%') + + cat </dev/null | sort -u | wc -l) +ufw_blocklist_total_unique_ips{ip_version="6"} $(cat "$CACHE_DIR"/*-v6.parsed 2>/dev/null | sort -u | wc -l) + +# HELP ufw_blocklist_total_rules Total UFW firewall rules +# TYPE ufw_blocklist_total_rules gauge +ufw_blocklist_total_rules $(ufw status numbered 2>/dev/null | grep -c '^\[') + +# HELP ufw_blocklist_scrape_timestamp_seconds Unix timestamp of metric generation +# TYPE ufw_blocklist_scrape_timestamp_seconds gauge +ufw_blocklist_scrape_timestamp_seconds $(date +%s) + +# HELP ufw_blocklist_exporter_duration_seconds Time to generate all metrics +# TYPE ufw_blocklist_exporter_duration_seconds gauge +ufw_blocklist_exporter_duration_seconds $(($(date +%s) - start_time)) +EOF + + echo "" +} + +run_http_server() { + echo "Starting exporter on port $HTTP_PORT..." >&2 + + while true; do + { + read -r request + if [[ "$request" =~ ^GET\ /metrics ]]; then + printf "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\n\r\n" + cache_journal_data + cache_feeds_config + generate_metrics + else + printf "HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n" + echo "

UFW Blocklist Exporter v${SCRIPT_VERSION}

Metrics" + fi + } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null + done +} + +main() { + parse_args "$@" + + # Prevent multiple instances (skip for HTTP mode as it should run continuously) + [ "$HTTP_MODE" != true ] && acquire_lock + + if [ "$HTTP_MODE" = true ]; then + run_http_server + elif [ -n "$OUTPUT_FILE" ]; then + # Cache data before generating metrics + cache_journal_data + cache_feeds_config + + # Ensure output directory exists + mkdir -p "$(dirname "$OUTPUT_FILE")" + + # Create temp file in /tmp (not in node_exporter directory!) + local temp_file + temp_file=$(mktemp /tmp/ufw_metrics.XXXXXX) + + # Generate metrics to temp file + generate_metrics > "$temp_file" + + # FORCE NEW INODE: Delete old file first, then move + rm -f "$OUTPUT_FILE" + + # Move temp file to final location + mv "$temp_file" "$OUTPUT_FILE" + + # Ensure node_exporter user can read it + chmod 644 "$OUTPUT_FILE" + + # Force filesystem sync + sync + else + cache_journal_data + cache_feeds_config + generate_metrics + fi +} + +main "$@" diff --git a/ufw-blocklists.sh b/ufw-blocklists.sh new file mode 100755 index 0000000..a92d449 --- /dev/null +++ b/ufw-blocklists.sh @@ -0,0 +1,996 @@ +#!/bin/bash +################################################################################ +# Script Name: ufw-blocklists.sh +# Version: 1.0 +# Description: Per-feed UFW threat intelligence blocking with ipset +# Author: Phil Connor +# Contact: contact@mylinux.work +# Website: https://mylinux.work +# License: MIT +################################################################################ +# Don't use 'set -e' - it causes silent failures when log file has permission issues + +CONFIG_DIR="/etc/ufw-threats" +FEEDS_CONFIG="$CONFIG_DIR/feeds.conf" +CACHE_DIR="$CONFIG_DIR/cache" +LOG_FILE="/var/log/ufw-threats.log" +SSH_PORT="22" +ENABLE_AUTO_UPDATE=true +UPDATE_INTERVAL="daily" +ENABLE_IPV6=true +UFW_RULES_FILE="/etc/ufw/before.rules" +UFW_RULES_V6_FILE="/etc/ufw/before6.rules" +IPSET_PREFIX="ufw-feed" +WHITELIST_IPSET="ufw-whitelist" +WHITELIST_IPSET_V6="ufw-whitelist-v6" +MAX_BACKUPS=10 + + +show_usage() { + cat <> "$LOG_FILE" 2>/dev/null || true +} + +# Iterate over enabled feeds in $FEEDS_CONFIG, calling the provided callback +# function with arguments: name url type description +# Usage: for_each_enabled_feed my_callback_function +for_each_enabled_feed() { + local callback="$1" + [ -f "$FEEDS_CONFIG" ] || return 0 + + local enabled name url type description + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + "$callback" "$name" "$url" "$type" "$description" + done < "$FEEDS_CONFIG" +} + +# Iterate over ALL feeds (enabled + disabled), calling the provided callback +# function with arguments: enabled name url type description +for_each_feed() { + local callback="$1" + [ -f "$FEEDS_CONFIG" ] || return 0 + + local enabled name url type description + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + "$callback" "$enabled" "$name" "$url" "$type" "$description" + done < "$FEEDS_CONFIG" +} + +parse_args() { + COMMAND="" + while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) show_usage ;; + -s|--ssh-port) SSH_PORT="$2"; shift 2 ;; + --no-auto-update) ENABLE_AUTO_UPDATE=false; shift ;; + --no-ipv6) ENABLE_IPV6=false; shift ;; + --update-interval) UPDATE_INTERVAL="$2"; shift 2 ;; + install|update|apply-rules|test-rules|list-feeds|show-stats|whitelist-init|whitelist-list|clean-cache) COMMAND="$1"; shift ;; + add-feed) COMMAND="add-feed"; FEED_NAME="$2"; FEED_URL="$3"; shift 3 ;; + remove-feed|enable-feed|disable-feed) COMMAND="$1"; FEED_NAME="$2"; shift 2 ;; + whitelist-add) COMMAND="whitelist-add"; WHITELIST_IP="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac + done + [ -z "$COMMAND" ] && COMMAND="install" +} + +cleanup_old_backups() { + local max_keep=${MAX_BACKUPS:-10} + + find "$(dirname "$UFW_RULES_FILE")" -maxdepth 1 -name "$(basename "$UFW_RULES_FILE").backup-*" -printf '%T@ %p\n' 2>/dev/null \ + | sort -rn | tail -n +$((max_keep + 1)) | cut -d' ' -f2- | xargs -r rm -f 2>/dev/null || true + + if [ "$ENABLE_IPV6" = true ]; then + find "$(dirname "$UFW_RULES_V6_FILE")" -maxdepth 1 -name "$(basename "$UFW_RULES_V6_FILE").backup-*" -printf '%T@ %p\n' 2>/dev/null \ + | sort -rn | tail -n +$((max_keep + 1)) | cut -d' ' -f2- | xargs -r rm -f 2>/dev/null || true + fi + + rm -f "${UFW_RULES_FILE}.backup-"*.clean "${UFW_RULES_V6_FILE}.backup-"*.clean 2>/dev/null || true +} + +check_requirements() { + local enable_ufw="${1:-true}" + + [ "$EUID" -ne 0 ] && { echo "Please run as root"; exit 1; } + + if ! command -v ufw >/dev/null 2>&1; then + apt-get update && apt-get install -y ufw ipset curl 2>/dev/null || \ + dnf install -y ufw ipset curl 2>/dev/null || \ + yum install -y ufw ipset curl 2>/dev/null + fi + + command -v ipset >/dev/null 2>&1 || apt-get install -y ipset + command -v curl >/dev/null 2>&1 || { echo "ERROR: curl required"; exit 1; } + + # CRITICAL: Ensure all ipsets referenced by before.rules exist BEFORE enabling UFW. + # If ipsets are missing (e.g., after reboot, failed persistence), UFW enable will fail + # with "Set ufw-feed-XXX doesn't exist" and block ALL traffic including DNS. + ensure_ipsets_exist + + if [ "$enable_ufw" = true ]; then + ufw --force enable + fi + + cleanup_old_backups +} + +_ensure_feed_ipset() { + local name="$1" + + ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1 || \ + ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000 2>/dev/null || true + + if [ "$ENABLE_IPV6" = true ]; then + ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1 || \ + ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000 2>/dev/null || true + fi +} + +ensure_ipsets_exist() { + if [ -f /etc/ipset.conf ]; then + ipset restore -f /etc/ipset.conf 2>/dev/null || true + fi + + ipset list "$WHITELIST_IPSET" >/dev/null 2>&1 || \ + ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000 2>/dev/null || true + + if [ "$ENABLE_IPV6" = true ]; then + ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1 || \ + ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000 2>/dev/null || true + fi + + for_each_enabled_feed _ensure_feed_ipset +} + +validate_feed_name() { + local name="$1" + if [ -z "$name" ]; then + echo "ERROR: Feed name cannot be empty"; return 1 + fi + if [[ ! "$name" =~ ^[a-zA-Z0-9_-]+$ ]]; then + echo "ERROR: Feed name '$name' contains invalid characters (only a-z, 0-9, _, - allowed)"; return 1 + fi + if [ "${#name}" -gt 20 ]; then + echo "ERROR: Feed name '$name' too long (max 20 chars, ipset name limit)"; return 1 + fi +} + +create_directory_structure() { + mkdir -p "$CONFIG_DIR" "$CACHE_DIR" + touch "$LOG_FILE" + chmod 700 "$CONFIG_DIR" + chmod 600 "$LOG_FILE" +} + +initialize_feeds_config() { + local has_feeds + has_feeds=$(grep -c '^[01]|' "$FEEDS_CONFIG" 2>/dev/null || echo 0) + + if [ -f "$FEEDS_CONFIG" ] && [ "$has_feeds" -gt 0 ]; then + log_message "Feeds configuration already exists with $has_feeds feeds" + return + fi + + log_message "Creating feeds configuration..." + + [ -f "$FEEDS_CONFIG" ] && mv "$FEEDS_CONFIG" "${FEEDS_CONFIG}.old-$(date +%Y%m%d-%H%M%S)" + + cat > "$FEEDS_CONFIG" <<'EOF' +# Threat Intelligence Feeds Configuration +# Format: ENABLED|NAME|URL|TYPE|DESCRIPTION +# +# ENABLED: 1 (enabled) or 0 (disabled) +# NAME: Unique feed identifier +# URL: Feed URL +# TYPE: Format type (plain, cidr, commented, custom) +# DESCRIPTION: Feed description + +1|cinsarmy|http://cinsscore.com/list/ci-badguys.txt|plain|CINS Army Malicious IPs +1|firehol-level1|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level1.netset|cidr|FireHOL Level 1 - Most aggressive attackers +1|firehol-level2|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level2.netset|cidr|FireHOL Level 2 - Attacks in last 48h +0|firehol-level3|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level3.netset|cidr|FireHOL Level 3 - Attacks in last 30d +1|ipsum-1|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/1.txt|plain|IPsum Level 1 - Most dangerous +0|ipsum-2|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/2.txt|plain|IPsum Level 2 - Dangerous +0|ipsum-3|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/3.txt|plain|IPsum Level 3 - Suspicious +0|spamhaus-drop|https://www.spamhaus.org/drop/drop.txt|commented|Spamhaus DROP List +0|spamhaus-edrop|https://www.spamhaus.org/drop/edrop.txt|commented|Spamhaus EDROP List +1|spamhaus-dropv6|https://www.spamhaus.org/drop/dropv6.txt|commented|Spamhaus DROP V6 List +0|feodo-tracker|https://feodotracker.abuse.ch/downloads/ipblocklist.txt|commented|Feodo Tracker C2 IPs +0|sslbl-aggressive|https://sslbl.abuse.ch/blacklist/sslipblacklist_aggressive.txt|commented|SSL Blacklist Aggressive +0|sslbl-all|https://sslbl.abuse.ch/blacklist/sslipblacklist.txt|commented|SSL Blacklist All +1|blocklist-de|https://lists.blocklist.de/lists/all.txt|plain|Blocklist.de All Attacks +0|greensnow|https://blocklist.greensnow.co/greensnow.txt|plain|GreenSnow Blacklist +0|emergingthreats|https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt|plain|Emerging Threats IPs +0|bruteforce-ssh|https://lists.blocklist.de/lists/ssh.txt|plain|SSH Bruteforce Attempts +1|binarydefense|https://www.binarydefense.com/banlist.txt|plain|Binary Defense Blacklist +1|bruteforce-bl|https://danger.rulez.sk/projects/bruteforceblocker/blist.php|commented|BruteForce Blocker +0|dshield-top|https://www.dshield.org/block.txt|commented|DShield Top Attackers +1|dshield-fhol|https://iplists.firehol.org/files/dshield.netset|commented|Dshield FireHol top 20 +0|tor-exit|https://check.torproject.org/torbulkexitlist|plain|TOR Exit Nodes (optional) +0|abuseipdb-1d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-1d.ipv4|commented|AbuseIPDB with confidence score 100 1 day +0|abuseipdb-3d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-3d.ipv4|commented|AbuseIPDB with confidence score 100 3 day +0|abuseipdb-7d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-7d.ipv4|commented|AbuseIPDB with confidence score 100 7 day +1|abuseipdb-14d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-14d.ipv4|commented|AbuseIPDB with confidence score 100 14 day +0|abuseipdb-30d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-30d.ipv4|commented|AbuseIPDB with confidence score 100 30 day + + +# Add custom feeds below this line +EOF + chmod 600 "$FEEDS_CONFIG" +} + +_setup_feed_ipset() { + local name="$1" + + if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then + ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000 + log_message " Created ipset: ${IPSET_PREFIX}-${name}" + fi + + if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then + ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000 + log_message " Created ipset: ${IPSET_PREFIX}-${name}-v6" + fi +} + +setup_ipsets() { + log_message "Setting up ipsets (per-feed mode)..." + + if ! ipset list "$WHITELIST_IPSET" >/dev/null 2>&1; then + ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000 + ipset add "$WHITELIST_IPSET" 127.0.0.1 + fi + + if [ "$ENABLE_IPV6" = true ] && ! ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1; then + ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000 + ipset add "$WHITELIST_IPSET_V6" ::1 + fi + + for_each_enabled_feed _setup_feed_ipset + setup_ipset_persistence +} + +setup_ipset_persistence() { + cat > /etc/systemd/system/ipset-persistent.service <<'EOF' +[Unit] +Description=ipset persistent configuration +Before=network-pre.target ufw.service +Wants=network-pre.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=-/sbin/ipset restore -f /etc/ipset.conf +ExecStop=/sbin/ipset save -f /etc/ipset.conf +StandardOutput=null +StandardError=null + +[Install] +WantedBy=multi-user.target +EOF + + ipset save > /etc/ipset.conf + systemctl enable ipset-persistent.service 2>/dev/null || true +} + +download_feed() { + local url="$1" output="$2" + local http_code + http_code=$(curl -f -s -m 60 --connect-timeout 10 -L \ + -A "ufw-threat-feeds-per-feed/1.0" \ + -w "%{http_code}" -o "$output" "$url" 2>/dev/null) || true + + if [ ! -s "$output" ]; then + log_message " Download failed for $url (HTTP $http_code, empty response)" + return 1 + fi + return 0 +} + +parse_feed() { + local file="$1" type="$2" output_v4="$3" output_v6="$4" + + : > "$output_v4" + : > "$output_v6" + + case "$type" in + plain) + grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?$' "$file" >> "$output_v4" 2>/dev/null || true + if [ "$ENABLE_IPV6" = true ]; then + grep -E '^[0-9a-fA-F:]+(/[0-9]+)?$' "$file" | grep ':' >> "$output_v6" 2>/dev/null || true + fi + ;; + cidr) + grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' "$file" \ + | cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$output_v4" 2>/dev/null || true + if [ "$ENABLE_IPV6" = true ]; then + grep -E '^[0-9a-fA-F:]+(/[0-9]+)?' "$file" \ + | grep ':' | cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$output_v6" 2>/dev/null || true + fi + ;; + commented) + grep -v -E '^[#;]|^$' "$file" \ + | grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' >> "$output_v4" 2>/dev/null || true + if [ "$ENABLE_IPV6" = true ]; then + grep -v -E '^[#;]|^$' "$file" \ + | grep -oE '[0-9a-fA-F:]+(/[0-9]+)?' \ + | grep -E '^[0-9a-fA-F]{1,4}:[0-9a-fA-F:]+' >> "$output_v6" 2>/dev/null || true + fi + ;; + esac +} + +_clean_stale_cache() { + local enabled_feeds="$1" + local cleaned=0 + + for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed; do + [ -f "$cache_file" ] || continue + local bn feed_name + bn=$(basename "$cache_file") + feed_name="${bn%%.raw}" + feed_name="${feed_name%%-v4.parsed}" + feed_name="${feed_name%%-v6.parsed}" + + if ! grep -q "^${feed_name}$" <<< "$enabled_feeds"; then + rm -f "$cache_file" && cleaned=$((cleaned + 1)) + fi + done + + [ "$cleaned" -gt 0 ] && log_message " Cleaned $cleaned stale cache files" +} + +_load_ipset_v4() { + local name="$1" v4_file="$2" + { + echo "create ${IPSET_PREFIX}-${name}-tmp hash:net family inet hashsize 4096 maxelem 200000" + while IFS= read -r ip; do + [ -z "$ip" ] && continue + echo "add ${IPSET_PREFIX}-${name}-tmp $ip" + done < "$v4_file" + echo "swap ${IPSET_PREFIX}-${name} ${IPSET_PREFIX}-${name}-tmp" + echo "destroy ${IPSET_PREFIX}-${name}-tmp" + } | ipset restore 2>/dev/null +} + +_load_ipset_v6() { + local name="$1" v6_file="$2" + { + echo "create ${IPSET_PREFIX}-${name}-v6-tmp hash:net family inet6 hashsize 4096 maxelem 200000" + while IFS= read -r ip; do + [ -z "$ip" ] && continue + echo "add ${IPSET_PREFIX}-${name}-v6-tmp $ip" + done < "$v6_file" + echo "swap ${IPSET_PREFIX}-${name}-v6 ${IPSET_PREFIX}-${name}-v6-tmp" + echo "destroy ${IPSET_PREFIX}-${name}-v6-tmp" + } | ipset restore 2>/dev/null +} + +update_feeds() { + log_message "Starting per-feed update..." + + if [ ! -f "$FEEDS_CONFIG" ]; then + echo "ERROR: Feeds config not found: $FEEDS_CONFIG" + echo "Run 'install' command first" + exit 1 + fi + + local enabled_count + enabled_count=$(grep -c '^1|' "$FEEDS_CONFIG" 2>/dev/null || echo 0) + if [ "$enabled_count" -eq 0 ]; then + echo "ERROR: No enabled feeds found in $FEEDS_CONFIG" + echo "Check the config file format" + exit 1 + fi + + log_message "Found $enabled_count enabled feeds" + + local enabled_feeds + enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2) + + # NOTE: Do NOT destroy ipsets for disabled feeds here. The before.rules may still + # reference them (if apply-rules hasn't been re-run). Destroying in-use ipsets causes + # "Set doesn't exist" on next UFW reload, which blocks all traffic. + # Ipset cleanup happens safely in cmd_disable_feed/cmd_remove_feed after rules are regenerated. + _clean_stale_cache "$enabled_feeds" + + local total_feeds=0 + local failed_feeds=0 + + local enabled name url type description + while IFS='|' read -r enabled name url type description; do + [[ "$enabled" =~ ^#.*$ ]] && continue + [[ -z "$enabled" ]] && continue + [ "$enabled" != "1" ] && continue + + total_feeds=$((total_feeds + 1)) + log_message "Processing feed: $name" + + local raw="$CACHE_DIR/${name}.raw" + local v4_file="$CACHE_DIR/${name}-v4.parsed" + local v6_file="$CACHE_DIR/${name}-v6.parsed" + + if download_feed "$url" "$raw" && parse_feed "$raw" "$type" "$v4_file" "$v6_file"; then + local count_v4 count_v6 + count_v4=$(wc -l < "$v4_file" 2>/dev/null || echo 0) + count_v6=0 + [ "$ENABLE_IPV6" = true ] && count_v6=$(wc -l < "$v6_file" 2>/dev/null || echo 0) + + [ "$count_v4" -gt 0 ] && _load_ipset_v4 "$name" "$v4_file" + [ "$ENABLE_IPV6" = true ] && [ "$count_v6" -gt 0 ] && _load_ipset_v6 "$name" "$v6_file" + + log_message " $name: $count_v4 IPv4, $count_v6 IPv6" + else + log_message " FAILED: $name" + failed_feeds=$((failed_feeds + 1)) + fi + done < "$FEEDS_CONFIG" + + ipset save > /etc/ipset.conf + log_message "Updated $total_feeds feeds ($failed_feeds failed)" +} + +# Build iptables rules block for IPv4 or IPv6 +# Args: v4|v6 output_file +_build_rules_block() { + local family="$1" output="$2" + local chain_prefix whitelist_set set_suffix log_tag + + if [ "$family" = "v4" ]; then + chain_prefix="ufw-before-input" + whitelist_set="$WHITELIST_IPSET" + set_suffix="" + log_tag="THREAT" + else + chain_prefix="ufw6-before-input" + whitelist_set="$WHITELIST_IPSET_V6" + set_suffix="-v6" + log_tag="THREAT-v6" + fi + + cat > "$output" <> "$output" <> "$output" +} + +# Insert rules into a UFW template file and validate +# Args: template_file rules_file output_file +# Returns 0 on success, 1 on validation failure +_insert_and_validate_rules() { + local template="$1" rules_file="$2" output="$3" + local insert_line + + insert_line=$(grep -n "^# End required lines" "$template" | cut -d: -f1) + if [ -z "$insert_line" ]; then + log_message "ERROR: Could not find '# End required lines' in $template" + return 1 + fi + + head -n "$insert_line" "$template" > "$output" + cat "$rules_file" >> "$output" + tail -n +"$((insert_line + 1))" "$template" >> "$output" + + local filter_count + filter_count=$(grep -c '^\*filter' "$output" 2>/dev/null || echo 0) + if [ "$filter_count" -ne 1 ]; then + log_message "ERROR: Generated rules file has $filter_count *filter blocks (expected 1)" + return 1 + fi + + return 0 +} + +_verify_ipsets_callback() { + local name="$1" + + if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then + log_message "ERROR: Required ipset ${IPSET_PREFIX}-${name} is missing" + _MISSING_SETS=$((_MISSING_SETS + 1)) + fi + if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then + log_message "ERROR: Required ipset ${IPSET_PREFIX}-${name}-v6 is missing" + _MISSING_SETS=$((_MISSING_SETS + 1)) + fi +} + +apply_ufw_rules() { + log_message "Applying UFW rules (per-feed)..." + + if [ ! -f /usr/share/ufw/before.rules ]; then + log_message "ERROR: UFW default template /usr/share/ufw/before.rules not found" + return 1 + fi + + local tmpdir + tmpdir=$(mktemp -d) + trap 'rm -rf "$tmpdir"' RETURN + + [ -f "$UFW_RULES_FILE" ] && cp "$UFW_RULES_FILE" "${UFW_RULES_FILE}.backup-$(date +%Y%m%d-%H%M%S)" + [ "$ENABLE_IPV6" = true ] && [ -f "$UFW_RULES_V6_FILE" ] && \ + cp "$UFW_RULES_V6_FILE" "${UFW_RULES_V6_FILE}.backup-$(date +%Y%m%d-%H%M%S)" + + cp /usr/share/ufw/before.rules "$UFW_RULES_FILE" + [ "$ENABLE_IPV6" = true ] && cp /usr/share/ufw/before6.rules "$UFW_RULES_V6_FILE" + + log_message " Starting from clean UFW templates" + + # Build and insert IPv4 rules + local v4_rules="$tmpdir/v4_rules" + local v4_output="$tmpdir/v4_output" + _build_rules_block "v4" "$v4_rules" + + if ! _insert_and_validate_rules "$UFW_RULES_FILE" "$v4_rules" "$v4_output"; then + log_message " Aborting to prevent corruption." + return 1 + fi + mv "$v4_output" "$UFW_RULES_FILE" + log_message " IPv4 rules generated and validated" + + # Build and insert IPv6 rules + if [ "$ENABLE_IPV6" = true ]; then + local v6_rules="$tmpdir/v6_rules" + local v6_output="$tmpdir/v6_output" + _build_rules_block "v6" "$v6_rules" + + if _insert_and_validate_rules "$UFW_RULES_V6_FILE" "$v6_rules" "$v6_output"; then + mv "$v6_output" "$UFW_RULES_V6_FILE" + log_message " IPv6 rules generated and validated" + else + log_message " Aborting IPv6 rules. Keeping IPv4 only." + fi + fi + + ufw limit "$SSH_PORT/tcp" 2>/dev/null || ufw allow "$SSH_PORT/tcp" + + # CRITICAL: Ensure all ipsets exist BEFORE reloading UFW + log_message " Verifying ipsets exist..." + ensure_ipsets_exist + setup_ipsets + + _MISSING_SETS=0 + for_each_enabled_feed _verify_ipsets_callback + + if [ "$_MISSING_SETS" -gt 0 ]; then + log_message "ERROR: $_MISSING_SETS required ipsets missing. Aborting UFW reload to prevent lockout." + return 1 + fi + + ipset save > /etc/ipset.conf + + log_message " Reloading UFW..." + if ufw status | grep -q "Status: active"; then + ufw reload + else + ufw --force enable + fi + + cleanup_old_backups + log_message "UFW rules applied and validated successfully" +} + +setup_auto_update() { + [ "$ENABLE_AUTO_UPDATE" = false ] && return + + local script_path + script_path=$(readlink -f "$0") + + cat > /etc/systemd/system/ufw-threat-feeds-update.service < /etc/ipset.conf' +EOF + + cat > /etc/systemd/system/ufw-threat-feeds-update.timer < /usr/local/bin/ufw-whitelist <<'EOF' +#!/bin/bash +[ -z "$1" ] && { echo "Usage: ufw-whitelist "; exit 1; } +if [[ "$1" == *:* ]]; then + ipset add ufw-whitelist-v6 "$1" && echo "Whitelisted IPv6: $1" +else + ipset add ufw-whitelist "$1" && echo "Whitelisted IPv4: $1" +fi +ipset save > /etc/ipset.conf +EOF + + local script_path + script_path=$(readlink -f "$0") + cat > /usr/local/bin/ufw-threat-reload </dev/null | grep -c '^[0-9]' 2>/dev/null) + v4_count=${v4_count:-0} + + v6_count=0 + if [ "$ENABLE_IPV6" = true ]; then + v6_count=$(ipset list "${IPSET_PREFIX}-${name}-v6" 2>/dev/null | grep -c '^[0-9a-fA-F:]' 2>/dev/null) + v6_count=${v6_count:-0} + fi + + blocks=$(journalctl --since "1 hour ago" 2>/dev/null | grep -c "\[THREAT:${name}\]" 2>/dev/null) + blocks=${blocks:-0} + + printf "%-25s %10d %10d %12d\n" "$name" "$v4_count" "$v6_count" "$blocks" + done < "$FEEDS_CONFIG" +} + +_list_feed_entry() { + local feed_enabled="$1" name="$2" url="$3" type="$4" description="$5" + local status="DISABLED" + [ "$feed_enabled" = "1" ] && status="ENABLED" + printf "%-10s %-25s %s\n" "$status" "$name" "$description" +} + +cmd_list_feeds() { + printf "%-10s %-25s %s\n" "STATUS" "NAME" "DESCRIPTION" + echo "-------------------------------------------------------------------" + for_each_feed _list_feed_entry +} + +cmd_add_feed() { + validate_feed_name "$FEED_NAME" || exit 1 + grep -q "^[01]|${FEED_NAME}|" "$FEEDS_CONFIG" 2>/dev/null && { echo "Feed exists"; exit 1; } + echo "1|${FEED_NAME}|${FEED_URL}|plain|Custom: ${FEED_NAME}" >> "$FEEDS_CONFIG" + log_message "Added feed: $FEED_NAME" +} + +cmd_remove_feed() { + validate_feed_name "$FEED_NAME" || exit 1 + sed -i "/^[01]|${FEED_NAME}|/d" "$FEEDS_CONFIG" + log_message "Removed feed: $FEED_NAME" + + log_message "Regenerating UFW rules..." + apply_ufw_rules || return 1 + + ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true + ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true +} + +cmd_enable_feed() { + validate_feed_name "$FEED_NAME" || exit 1 + sed -i "s/^0|${FEED_NAME}|/1|${FEED_NAME}|/" "$FEEDS_CONFIG" + log_message "Enabled: $FEED_NAME" + + log_message "Regenerating UFW rules..." + apply_ufw_rules +} + +cmd_disable_feed() { + validate_feed_name "$FEED_NAME" || exit 1 + sed -i "s/^1|${FEED_NAME}|/0|${FEED_NAME}|/" "$FEEDS_CONFIG" + log_message "Disabled: $FEED_NAME" + + log_message "Regenerating UFW rules..." + apply_ufw_rules || return 1 + + ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true + ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true +} + +cmd_whitelist_add() { + [ -z "$WHITELIST_IP" ] && { echo "Usage: $0 whitelist-add "; exit 1; } + + if [[ "$WHITELIST_IP" == *:* ]]; then + if ipset add "$WHITELIST_IPSET_V6" "$WHITELIST_IP" 2>/dev/null; then + log_message "Added to IPv6 whitelist: $WHITELIST_IP" + else + echo "Failed to add $WHITELIST_IP"; exit 1 + fi + else + if ipset add "$WHITELIST_IPSET" "$WHITELIST_IP" 2>/dev/null; then + log_message "Added to IPv4 whitelist: $WHITELIST_IP" + else + echo "Failed to add $WHITELIST_IP"; exit 1 + fi + fi + + ipset save > /etc/ipset.conf +} + +cmd_whitelist_init() { + log_message "Initializing whitelist with private networks..." + + local private_networks=( + "10.0.0.0/8" + "172.16.0.0/12" + "192.168.0.0/16" + "169.254.0.0/16" + "127.0.0.0/8" + ) + + local private_networks_v6=( + "fc00::/7" + "fe80::/10" + "::1" + ) + + echo "Adding IPv4 private networks to whitelist..." + for net in "${private_networks[@]}"; do + if ipset add "$WHITELIST_IPSET" "$net" 2>/dev/null; then + echo " + $net" + else + echo " - $net (already exists or error)" + fi + done + + if [ "$ENABLE_IPV6" = true ]; then + echo "Adding IPv6 private networks to whitelist..." + for net in "${private_networks_v6[@]}"; do + if ipset add "$WHITELIST_IPSET_V6" "$net" 2>/dev/null; then + echo " + $net" + else + echo " - $net (already exists or error)" + fi + done + fi + + ipset save > /etc/ipset.conf + log_message "Whitelist initialized with RFC1918/private networks" +} + +cmd_whitelist_list() { + echo "==========================================" + echo "IPv4 Whitelist ($WHITELIST_IPSET)" + echo "==========================================" + ipset list "$WHITELIST_IPSET" 2>/dev/null | grep -E '^[0-9]' || echo "No entries" + + if [ "$ENABLE_IPV6" = true ]; then + echo "" + echo "==========================================" + echo "IPv6 Whitelist ($WHITELIST_IPSET_V6)" + echo "==========================================" + ipset list "$WHITELIST_IPSET_V6" 2>/dev/null | grep -E '^[0-9a-fA-F:]' || echo "No entries" + fi +} + +cmd_clean_cache() { + log_message "Cleaning cache for disabled feeds..." + + local removed=0 + local kept=0 + + local enabled_feeds + enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2) + + for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed; do + [ -f "$cache_file" ] || continue + + local bn feed_name + bn=$(basename "$cache_file") + feed_name="${bn%%.raw}" + feed_name="${feed_name%%-v4.parsed}" + feed_name="${feed_name%%-v6.parsed}" + + if ! grep -q "^${feed_name}$" <<< "$enabled_feeds"; then + rm -f "$cache_file" + removed=$((removed + 1)) + else + kept=$((kept + 1)) + fi + done + + log_message "Removed $removed cache files, kept $kept active feeds" +} + +cmd_test_rules() { + log_message "Testing UFW rule generation (dry-run mode)..." + + if [ ! -f /usr/share/ufw/before.rules ]; then + echo "ERROR: UFW default template /usr/share/ufw/before.rules not found" + return 1 + fi + + local test_dir + test_dir=$(mktemp -d) + trap 'rm -rf "$test_dir"' RETURN + + local test_v4="$test_dir/before.rules.test" + cp /usr/share/ufw/before.rules "$test_v4" + [ "$ENABLE_IPV6" = true ] && cp /usr/share/ufw/before6.rules "$test_dir/before6.rules.test" + + local v4_rules="$test_dir/v4_rules" + local v4_output="$test_dir/v4_output" + + _build_rules_block "v4" "$v4_rules" + + local feed_count + feed_count=$(grep -c '^1|' "$FEEDS_CONFIG" 2>/dev/null || echo 0) + echo "Generated rules for $feed_count enabled feeds" + + if ! _insert_and_validate_rules "$test_v4" "$v4_rules" "$v4_output"; then + echo "VALIDATION FAILED" + return 1 + fi + + echo "Validation passed: exactly 1 *filter block found" + + local total_lines rule_lines + total_lines=$(wc -l < "$v4_output") + rule_lines=$(grep -c "^-A " "$v4_output" 2>/dev/null || echo 0) + + echo "Generated $rule_lines iptables rules in $total_lines total lines" + echo "" + echo "==========================================" + echo "Sample of generated rules:" + echo "==========================================" + grep "# UFW THREAT FEEDS" -A 10 "$v4_output" | head -15 + echo "..." + echo "" + echo "==========================================" + echo "Test passed - rules would be generated safely" + echo " To apply these rules, run: $0 apply-rules" + echo "==========================================" +} + +cmd_install() { + log_message "Installing per-feed threat blocking..." + check_requirements + create_directory_structure + initialize_feeds_config + setup_ipsets + update_feeds + apply_ufw_rules + setup_auto_update + create_management_commands + + echo "" + echo "==========================================" + echo "Per-Feed Installation Complete" + echo "==========================================" + echo "Mode: Per-feed ipsets (detailed tracking)" + echo "Feeds: $(grep -c '^1|' "$FEEDS_CONFIG")" + echo "IPv6: $ENABLE_IPV6" + echo "Auto-update: $ENABLE_AUTO_UPDATE ($UPDATE_INTERVAL)" + echo "" + echo "Commands:" + echo " $0 show-stats # View per-feed statistics" + echo " $0 update # Update all feeds" + echo " ufw-whitelist IP # Whitelist an IP" + echo "" + echo "Logs: grep 'THREAT:' /var/log/syslog" + echo "==========================================" +} + +main() { + parse_args "$@" + case "$COMMAND" in + install) cmd_install ;; + update) + check_requirements false + create_directory_structure + update_feeds + ;; + apply-rules) + check_requirements + apply_ufw_rules + ;; + test-rules) cmd_test_rules ;; + list-feeds) cmd_list_feeds ;; + show-stats) cmd_show_stats ;; + add-feed) cmd_add_feed ;; + remove-feed) cmd_remove_feed ;; + enable-feed) cmd_enable_feed ;; + disable-feed) cmd_disable_feed ;; + whitelist-add) cmd_whitelist_add ;; + whitelist-init) cmd_whitelist_init ;; + whitelist-list) cmd_whitelist_list ;; + clean-cache) cmd_clean_cache ;; + *) show_usage ;; + esac +} + +main "$@" diff --git a/update-code-server.sh b/update-code-server.sh new file mode 100644 index 0000000..1d43e90 --- /dev/null +++ b/update-code-server.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +#################################################################### +#### Code-Server Update Script #### +#### For RHEL/Rocky/Alma, Oracle Linux, Debian & Ubuntu #### +#### #### +#### Author: Phil Connor #### +#### Contact: contact@mylinux.work #### +#### License: MIT #### +#### Version: 1.2 #### +#### #### +#### Usage: sudo ./update-code-server.sh #### +#################################################################### + +############################# +#### User Configurations #### +############################# +SERVDIR=/usr/local/code-server # where you want the code-server installed + +######################## +#### System Configs #### +######################## +OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]') +OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}') +CSVER=$(code-server --version | awk '{print $1}') + +########################################################### +#### Detect Package Manger from OS and OSVer Variables #### +########################################################### +if [ "${OS}" = ubuntu ]; then + PAKMGR="apt-get -y" +elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then + if [ "${OSVER}" = 7 ]; then + PAKMGR="yum -y" + fi + if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then + PAKMGR="dnf -y" + fi +fi + +################### +#### Update OS #### +################### +function update_os() { + { + if [ "${OS}" = ubuntu ]; then + ${PAKMGR} update + ${PAKMGR} upgrade + else + ${PAKMGR} update + fi + } +} +############################################### +#### Get the latest version of Code Server #### +############################################### +get_latest_version() { + { + version="$(curl -fsSLI -o /dev/null -w "%{url_effective}" https://github.com/coder/code-server/releases/latest)" + version="${version#https://github.com/coder/code-server/releases/tag/}" + version="${version#v}" + echo "$version" + #### Compare Code-Server versions #### + if [[ "$version" != "$CSVER" ]] && [[ "$(printf '%s\n' "$CSVER" "$version" | sort -V | tail -1)" == "$version" ]]; then + compare=1 + else + compare=0 + fi + } +} + +######################################### +#### Download and Update Codeserver #### +######################################### +install_codeserver() { + { + if [ $compare = 1 ]; then + systemctl stop code-server + # check if command wget exists + if ! command -v wget >/dev/null 2>&1; then + ${PAKMGR} install wget + fi + cd ~/ || exit + wget "https://github.com/coder/code-server/releases/download/v$version/code-server-$version-linux-amd64.tar.gz" + tar xvf "code-server-$version-linux-amd64.tar.gz" + cp -r ~/code-server-"$version"-linux-amd64/* ${SERVDIR} + rm -f ~/code-server-"$version"-linux-amd64.tar.gz + rm -rf ~/code-server-"$version"-linux-amd64 + systemctl start code-server + fi + } +} + +#update_os +get_latest_version +install_codeserver diff --git a/update-prometheus-stack.sh b/update-prometheus-stack.sh new file mode 100755 index 0000000..58b5431 --- /dev/null +++ b/update-prometheus-stack.sh @@ -0,0 +1,570 @@ +#!/bin/bash + +set -euo pipefail + +########################################################################## +## Prometheus Stack Updater ## +## ## +## Updates installed Prometheus ecosystem binaries to latest release ## +## from GitHub. Only touches components that are already installed. ## +## ## +## Supported components: ## +## prometheus, node_exporter, blackbox_exporter, ## +## alertmanager, mysqld_exporter, promtool, amtool, ## +## loki, promtail, alloy, grafana ## +## ## +## Usage: ## +## ./update-prometheus-stack.sh [OPTIONS] ## +## ## +## Options: ## +## --check Show what would be updated (no changes) ## +## --all Update all installed components ## +## --prometheus Update only Prometheus ## +## --node-exporter Update only node_exporter ## +## --blackbox Update only blackbox_exporter ## +## --alertmanager Update only AlertManager ## +## --mysql-exporter Update only mysqld_exporter ## +## --loki Update only Loki ## +## --promtail Update only Promtail ## +## --alloy Update only Alloy ## +## --grafana Update only Grafana (via package manager) ## +## --force Update even if already at latest version ## +## --arch Override architecture (default: auto-detect) ## +## --backup-only Backup configs only (no updates) ## +## --help Show this help message ## +## ## +## Author: Phil Connor ## +## Contact: pconnor@ara.com ## +########################################################################## + +BINDIR="/usr/local/bin" +PROMDIR="/etc/prometheus" +BACKUPDIR="${PROMDIR}/backups" +LOGFILE="/var/log/prometheus-update.log" +TMPDIR_BASE="/tmp/prometheus-update-$$" +CHECK_ONLY=false +BACKUP_ONLY=false +FORCE=false +ARCH="" +UPDATED=0 +SKIPPED=0 +FAILED=0 +COMPONENTS_REQUESTED=() + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +log() { + local msg + msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1" + echo -e "$msg" | tee -a "$LOGFILE" 2>/dev/null || echo -e "$msg" +} + +log_ok() { log "${GREEN}✓${NC} $1"; } +log_warn() { log "${YELLOW}⚠${NC} $1"; } +log_err() { log "${RED}✗${NC} $1" >&2; } +log_info() { log "${CYAN}→${NC} $1"; } + +# shellcheck disable=SC2329 +cleanup() { + # shellcheck disable=SC2317 + [[ -d "$TMPDIR_BASE" ]] && rm -rf "$TMPDIR_BASE" +} +trap cleanup EXIT + +show_help() { + sed -n '/^## Usage:/,/^####/{ /^####/d; s/^## //; s/^##$//; p }' "$0" + exit 0 +} + +detect_arch() { + if [[ -n "$ARCH" ]]; then + echo "$ARCH" + return + fi + local machine + machine=$(uname -m) + case "$machine" in + x86_64) echo "amd64" ;; + aarch64) echo "arm64" ;; + armv7l) echo "armv7" ;; + armv6l) echo "armv6" ;; + *) echo "amd64" ;; + esac +} + +get_installed_version() { + local binary="$1" + local path="${BINDIR}/${binary}" + if [[ ! -x "$path" ]]; then + echo "not_installed" + return + fi + case "$binary" in + prometheus|promtool) + "$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown" + ;; + node_exporter|blackbox_exporter|mysqld_exporter) + "$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown" + ;; + alertmanager|amtool) + "$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown" + ;; + loki|promtail) + "$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown" + ;; + alloy) + "$path" --version 2>&1 | head -1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown" + ;; + *) + echo "unknown" + ;; + esac +} + +get_latest_version() { + local repo="$1" + local version="" + + case "$repo" in + prometheus/*) + local component="${repo#prometheus/}" + version=$(curl -sf "https://prometheus.io/download/" | \ + grep -oP "${component}-\K[0-9]+\.[0-9]+\.[0-9]+" | head -1 || echo "") + ;; + grafana/*) + version=$(curl -sfL "https://github.com/${repo}/releases/latest" | \ + grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "") + ;; + esac + + if [[ -z "$version" ]]; then + log_err "Failed to query latest version for ${repo}" + return 1 + fi + echo "$version" +} + +get_download_url() { + local repo="$1" + local version="$2" + local pattern="$3" + local component="${repo#*/}" + + case "$repo" in + prometheus/*) + echo "https://github.com/${repo}/releases/download/v${version}/${component}-${version}.${pattern}" + ;; + grafana/*) + echo "https://github.com/${repo}/releases/download/v${version}/${pattern}" + ;; + esac +} + +download_and_extract() { + local url="$1" + local workdir="$2" + mkdir -p "$workdir" + local filename + filename=$(basename "$url") + log_info "Downloading ${filename}" + if ! curl -sfL -o "${workdir}/${filename}" "$url"; then + log_err "Download failed: ${url}" + return 1 + fi + cd "$workdir" + case "$filename" in + *.tar.gz|*.tgz) + tar -xzf "$filename" + ;; + *.zip) + unzip -q "$filename" + ;; + *) + chmod +x "$filename" + ;; + esac +} + +stop_service() { + local service="$1" + if systemctl is-active --quiet "$service" 2>/dev/null; then + log_info "Stopping ${service}" + systemctl stop "$service" + return 0 + fi + return 1 +} + +start_service() { + local service="$1" + if systemctl is-enabled --quiet "$service" 2>/dev/null; then + log_info "Starting ${service}" + systemctl daemon-reload + systemctl start "$service" + fi +} + +backup_binary() { + local binary="$1" + local path="${BINDIR}/${binary}" + if [[ -f "$path" ]]; then + local backup + backup="${path}.backup.$(date +%Y%m%d_%H%M%S)" + cp "$path" "$backup" + log_info "Backed up ${path} → ${backup}" + fi +} + +backup_configs() { + local name="$1" + local config_files="$2" + if [[ -z "$config_files" ]]; then + return 0 + fi + mkdir -p "$BACKUPDIR" + local timestamp + timestamp=$(date +%Y%m%d_%H%M%S) + for cfg in $config_files; do + if [[ -f "$cfg" ]]; then + local filename + filename=$(basename "$cfg") + cp "$cfg" "${BACKUPDIR}/${filename}.${timestamp}" + log_info "Config backed up: ${cfg} → ${BACKUPDIR}/${filename}.${timestamp}" + fi + done +} + +update_component() { + local name="$1" + local repo="$2" + local service_name="$3" + local binaries="$4" + local file_pattern="$5" + local owner="${6:-prometheus}" + local config_files="${7:-}" + + local hw + hw=$(detect_arch) + + local installed + installed=$(get_installed_version "${binaries%% *}") + + if [[ "$installed" == "not_installed" ]]; then + return 0 + fi + + local latest + latest=$(get_latest_version "$repo") || { ((FAILED++)) || true; return 1; } + + echo "" + log " ${CYAN}${name}${NC}: installed=${installed} latest=${latest}" + + if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then + log_ok "Already at latest version" + ((SKIPPED++)) || true + return 0 + fi + + if [[ "$CHECK_ONLY" == "true" ]]; then + if [[ "$installed" != "$latest" ]]; then + log_warn "Update available: ${installed} → ${latest}" + fi + return 0 + fi + + local pattern="${file_pattern//ARCH/${hw}}" + local url + url=$(get_download_url "$repo" "$latest" "$pattern") + if [[ -z "$url" ]]; then + log_err "Could not find download URL for ${name} (pattern: ${pattern})" + ((FAILED++)) || true + return 1 + fi + + local workdir="${TMPDIR_BASE}/${name}" + download_and_extract "$url" "$workdir" || { ((FAILED++)) || true; return 1; } + + backup_configs "$name" "$config_files" + + local was_running=false + if stop_service "$service_name"; then + was_running=true + fi + + for bin in $binaries; do + local found + found=$(find "$workdir" \( -name "$bin" -o -name "${bin}-*" \) -type f 2>/dev/null | head -1) + if [[ -n "$found" ]]; then + backup_binary "$bin" + mv "$found" "${BINDIR}/${bin}" + chown "${owner}:${owner}" "${BINDIR}/${bin}" 2>/dev/null || \ + chown "${owner}." "${BINDIR}/${bin}" 2>/dev/null || true + chmod 755 "${BINDIR}/${bin}" + log_ok "Updated ${bin}" + else + log_warn "Binary ${bin} not found in download" + fi + done + + if [[ "$was_running" == "true" ]]; then + start_service "$service_name" + fi + + local new_ver + new_ver=$(get_installed_version "${binaries%% *}") + log_ok "${name} updated: ${installed} → ${new_ver}" + ((UPDATED++)) || true +} + +is_pkg_installed() { + local pkg="$1" + if command -v rpm >/dev/null 2>&1; then + rpm -q "$pkg" >/dev/null 2>&1 + elif command -v dpkg >/dev/null 2>&1; then + dpkg -l "$pkg" 2>/dev/null | grep -q "^ii" + else + return 1 + fi +} + +update_alloy() { + if ! command -v alloy >/dev/null 2>&1 && [[ ! -x "${BINDIR}/alloy" ]]; then + return 0 + fi + + if is_pkg_installed "alloy"; then + log_info "Alloy installed via package manager — updating with dnf/apt" + update_alloy_pkg + else + log_info "Alloy installed as standalone binary — updating from GitHub" + update_component "Alloy" "grafana/alloy" "alloy" "alloy" "alloy-linux-ARCH.zip" "root" "/etc/alloy/config.alloy" + fi +} + +update_alloy_pkg() { + local alloy_bin="alloy" + command -v alloy >/dev/null 2>&1 || alloy_bin="${BINDIR}/alloy" + + local installed + installed=$("$alloy_bin" --version 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown") + + local latest + latest=$(curl -sfL "https://github.com/grafana/alloy/releases/latest" | \ + grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "") + if [[ -z "$latest" ]]; then + log_err "Failed to query latest version for Alloy" + ((FAILED++)) || true + return 1 + fi + + echo "" + log " ${CYAN}Alloy${NC}: installed=${installed} latest=${latest}" + + if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then + log_ok "Already at latest version" + ((SKIPPED++)) || true + return 0 + fi + + if [[ "$CHECK_ONLY" == "true" ]]; then + if [[ "$installed" != "$latest" ]]; then + log_warn "Update available: ${installed} → ${latest}" + fi + return 0 + fi + + backup_configs "Alloy" "/etc/alloy/config.alloy" + + if command -v apt-get >/dev/null 2>&1; then + apt-get -y update && apt-get -y install --only-upgrade alloy + elif command -v dnf >/dev/null 2>&1; then + dnf -y upgrade alloy + elif command -v yum >/dev/null 2>&1; then + yum -y update alloy + fi + + systemctl daemon-reload + systemctl restart alloy + + local new_ver + new_ver=$("$alloy_bin" --version 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown") + log_ok "Alloy updated: ${installed} → ${new_ver}" + ((UPDATED++)) || true +} + +update_grafana() { + if ! command -v grafana-server >/dev/null 2>&1; then + return 0 + fi + + local installed + installed=$(grafana-server -v 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown") + + local latest + latest=$(curl -sfL "https://github.com/grafana/grafana/releases/latest" | \ + grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "") + if [[ -z "$latest" ]]; then + log_err "Failed to query latest version for Grafana" + ((FAILED++)) || true + return 1 + fi + + echo "" + log " ${CYAN}Grafana${NC}: installed=${installed} latest=${latest}" + + if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then + log_ok "Already at latest version" + ((SKIPPED++)) || true + return 0 + fi + + if [[ "$CHECK_ONLY" == "true" ]]; then + if [[ "$installed" != "$latest" ]]; then + log_warn "Update available: ${installed} → ${latest}" + fi + return 0 + fi + + backup_configs "Grafana" "/etc/grafana/grafana.ini /etc/grafana/ldap.toml" + + log_info "Updating Grafana via package manager" + if command -v apt-get >/dev/null 2>&1; then + apt-get -y update && apt-get -y install --only-upgrade grafana + elif command -v dnf >/dev/null 2>&1; then + dnf -y upgrade grafana + elif command -v yum >/dev/null 2>&1; then + yum -y update grafana + else + log_err "No supported package manager found for Grafana update" + ((FAILED++)) || true + return 1 + fi + + systemctl daemon-reload + systemctl restart grafana-server + + local new_ver + new_ver=$(grafana-server -v 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown") + log_ok "Grafana updated: ${installed} → ${new_ver}" + ((UPDATED++)) || true +} + +should_update() { + local component="$1" + if [[ ${#COMPONENTS_REQUESTED[@]} -eq 0 ]]; then + return 0 + fi + for c in "${COMPONENTS_REQUESTED[@]}"; do + [[ "$c" == "$component" ]] && return 0 + done + return 1 +} + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case "$1" in + --check) CHECK_ONLY=true; shift ;; + --backup-only) BACKUP_ONLY=true; shift ;; + --force) FORCE=true; shift ;; + --all) COMPONENTS_REQUESTED=(); shift ;; + --prometheus) COMPONENTS_REQUESTED+=("prometheus"); shift ;; + --node-exporter) COMPONENTS_REQUESTED+=("node_exporter"); shift ;; + --blackbox) COMPONENTS_REQUESTED+=("blackbox"); shift ;; + --alertmanager) COMPONENTS_REQUESTED+=("alertmanager"); shift ;; + --mysql-exporter) COMPONENTS_REQUESTED+=("mysql_exporter"); shift ;; + --loki) COMPONENTS_REQUESTED+=("loki"); shift ;; + --promtail) COMPONENTS_REQUESTED+=("promtail"); shift ;; + --alloy) COMPONENTS_REQUESTED+=("alloy"); shift ;; + --grafana) COMPONENTS_REQUESTED+=("grafana"); shift ;; + --arch) ARCH="$2"; shift 2 ;; + --help) show_help ;; + *) + log_err "Unknown option: $1" + show_help + ;; + esac + done +} + +main() { + parse_arguments "$@" + + if [[ $EUID -ne 0 ]]; then + log_err "This script must be run as root" + exit 1 + fi + + mkdir -p "$TMPDIR_BASE" "$(dirname "$LOGFILE")" + touch "$LOGFILE" + + local mode="UPDATE" + [[ "$CHECK_ONLY" == "true" ]] && mode="CHECK" + [[ "$BACKUP_ONLY" == "true" ]] && mode="BACKUP" + + echo "" + echo "==============================================" + echo " Prometheus Stack Updater [${mode}]" + echo " $(date '+%Y-%m-%d %H:%M:%S')" + echo " Architecture: $(detect_arch)" + echo "==============================================" + + if [[ "$BACKUP_ONLY" == "true" ]]; then + local configs=( + "$PROMDIR/prometheus.yml" + "$PROMDIR/blackbox.yml" + "$PROMDIR/alertmanager.yml" + "/etc/.mysqld_exporter.cnf" + "/etc/loki/loki-config.yml" + "/etc/promtail/promtail-config.yml" + "/etc/alloy/config.alloy" + "/etc/grafana/grafana.ini" + "/etc/grafana/ldap.toml" + ) + local backed_up=0 + mkdir -p "$BACKUPDIR" + local timestamp + timestamp=$(date +%Y%m%d_%H%M%S) + for cfg in "${configs[@]}"; do + if [[ -f "$cfg" ]]; then + local filename + filename=$(basename "$cfg") + cp "$cfg" "${BACKUPDIR}/${filename}.${timestamp}" + log_ok "Backed up ${cfg} → ${BACKUPDIR}/${filename}.${timestamp}" + ((backed_up++)) + fi + done + echo "" + log "Backed up ${backed_up} config file(s) to ${BACKUPDIR}" + exit 0 + fi + + # Name Repo Service Binaries File Pattern Owner Config Files + should_update "prometheus" && update_component "Prometheus" "prometheus/prometheus" "prometheus" "prometheus promtool" "linux-ARCH.tar.gz" "prometheus" "$PROMDIR/prometheus.yml" + should_update "node_exporter" && update_component "Node Exporter" "prometheus/node_exporter" "node_exporter" "node_exporter" "linux-ARCH.tar.gz" "root" "" + should_update "blackbox" && update_component "Blackbox Exporter" "prometheus/blackbox_exporter" "blackbox_exporter" "blackbox_exporter" "linux-ARCH.tar.gz" "prometheus" "$PROMDIR/blackbox.yml" + should_update "alertmanager" && update_component "AlertManager" "prometheus/alertmanager" "alertmanager" "alertmanager amtool" "linux-ARCH.tar.gz" "alertmanager" "$PROMDIR/alertmanager.yml" + should_update "mysql_exporter" && update_component "MySQL Exporter" "prometheus/mysqld_exporter" "mysqld_exporter" "mysqld_exporter" "linux-ARCH.tar.gz" "prometheus" "/etc/.mysqld_exporter.cnf" + should_update "loki" && update_component "Loki" "grafana/loki" "loki" "loki" "loki-linux-ARCH.zip" "loki" "/etc/loki/loki-config.yml" + should_update "promtail" && update_component "Promtail" "grafana/loki" "promtail" "promtail" "promtail-linux-ARCH.zip" "promtail" "/etc/promtail/promtail-config.yml" + should_update "alloy" && update_alloy + should_update "grafana" && update_grafana + + echo "" + echo "==============================================" + echo -e " Results: ${GREEN}${UPDATED} updated${NC} ${YELLOW}${SKIPPED} current${NC} ${RED}${FAILED} failed${NC}" + echo "==============================================" + echo "" + + if [[ "$CHECK_ONLY" == "false" ]]; then + log "Log saved to ${LOGFILE}" + fi + + [[ $FAILED -gt 0 ]] && exit 1 + exit 0 +} + +main "$@" diff --git a/users-logged-in.ps1 b/users-logged-in.ps1 new file mode 100644 index 0000000..a8849f9 --- /dev/null +++ b/users-logged-in.ps1 @@ -0,0 +1,692 @@ +<# +.SYNOPSIS + Monitors RDP user sessions and exports metrics for Prometheus windows_exporter. + +.DESCRIPTION + This script monitors the number of active RDP user sessions and creates Prometheus-formatted metrics. + The metrics are written to a text file that can be consumed by the windows_exporter. + It can also run periodically. + +.PARAMETER MetricsPath + The path where the Prometheus metrics file will be written. + +.PARAMETER IntervalSeconds + The interval in seconds for the scheduled task. Default is 60 seconds. + +.Parameter RunOnce + Switch to run the script once and exit instead of creating a scheduled task. + +.PARAMETER Debug + Switch to run the script in debug mode. + +.PARAMETER RunOnce + Switch to run the script once and exit instead of creating a scheduled task. + +.PARAMETER DryRun + Switch to output metrics to console instead of writing to file. + +.PARAMETER Verbose + Switch to enable verbose debug output. + +.PARAMETER Quiet + Switch to suppress non-error output. + +.PARAMETER NoSchedule + Switch to skip scheduled task creation. + +.PARAMETER Version + Switch to display script version and exit. + +.NOTES + Version: 1.1.2-20251002 + Author: Phil Connor contact@mylinux.work + + Features: + - Monitors active RDP user sessions using quser command + - Captures username, session name, session ID, state (Active/Disconnected), idle time, and logon time + - Attempts to correlate session IDs with client IP addresses using qwinsta + - Writes metrics to a text file for consumption by windows_exporter. + - Reads last 10 PowerShell commands from each user's PSReadline history file. +#> + +param( + [ValidateScript({ + if ($_ -and -not (Test-Path (Split-Path $_ -Parent))) { + throw "Directory for metrics path does not exist: $(Split-Path $_ -Parent)" + } + return $true + })] + [string]$MetricsPath = "C:\Program Files\windows_exporter\textfile_inputs\users_logged_in.prom", + [int]$IntervalSeconds = 60, + [switch]$RunOnce, + [switch]$Debug, + [switch]$DryRun, + [switch]$Verbose, + [switch]$Quiet, + [switch]$NoSchedule, + [switch]$Version +) + +# Handle version display +if ($Version) { + Write-Host "Windows RDP User Monitor PowerShell Script" + Write-Host "Version: 1.1.0-20250915" + Write-Host "Author: Phil Connor contact@mylinux.work" + exit 0 +} + +# Set up logging preferences based on Verbose/Quiet flags +if ($Verbose) { + $VerbosePreference = 'Continue' + $InformationPreference = 'Continue' +} +if ($Quiet) { + $VerbosePreference = 'SilentlyContinue' + $InformationPreference = 'SilentlyContinue' + $WarningPreference = 'SilentlyContinue' +} + +# Enhanced logging functions +function Write-InfoLog { + param([string]$Message) + if (-not $Quiet) { + Write-Host "[INFO] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Green + } +} + +function Write-VerboseLog { + param([string]$Message) + if ($Verbose) { + Write-Host "[VERBOSE] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Cyan + } +} + +# Configuration constants for the script +$script:Config = @{ + RDP_SESSION_PATTERN = "rdp-tcp#\d+|console" # Regex pattern to match RDP session names + METRIC_NAME = "windows_rdp_users_logged_in" # Primary Prometheus metric name + QWINSTA_IP_REGEX = '^\s*(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+\.\d+\.\d+\.\d+)' # Pattern for IP extraction + QUSER_HEADER_REGEX = "USERNAME.*SESSIONNAME.*ID.*STATE" # Expected quser output header format + COLUMNS = @{ # Column positions in quser output + USERNAME = 0; SESSION = 1; ID = 2; STATE = 3; IDLE = 4; LOGON_START = 5 + } +} + +# Sanitize string values for use as Prometheus metric labels +# Removes or replaces characters that would break Prometheus metric format +function ConvertTo-MetricLabel { + param([AllowEmptyString()][string]$Value) + if ([string]::IsNullOrEmpty($Value)) { return "" } + + # Replace problematic characters with underscores to prevent metric parsing issues + $sanitized = $Value -replace '["\\\n\r\t>]', '_' + + # Limit length to prevent overly long metric labels (Prometheus best practice) + if ($sanitized.Length -gt 200) { + $sanitized = $sanitized.Substring(0, 200) + "..." + } + return $sanitized +} + +# Format metric data into Prometheus text format +function Write-PrometheusMetric { + param( + [ValidateNotNullOrEmpty()][string]$Name, + [ValidateNotNullOrEmpty()][string]$Help, + [ValidateNotNullOrEmpty()][string]$Type, + [ValidateNotNull()][array]$Metrics + ) + + try { + @( + # Write Prometheus metric header with help text and type + "# HELP $Name $Help" + "# TYPE $Name $Type" + # Format each metric with its labels and value + $Metrics | ForEach-Object { + if ($null -eq $_ -or $null -eq $_.Labels -or $null -eq $_.Value) { + throw "Invalid metric data" + } + "$Name$($_.Labels) $($_.Value)" + } + ) + } + catch { + Write-Error "Failed to write metric: $($_.Exception.Message)" + } +} + +# Execute quser command and validate output format +# Returns raw quser command output after basic validation +function Get-QUserData { + try { + # Run quser command and suppress stderr to avoid noise + $output = quser 2>$null + + # Validate that we got some output + if (-not $output -or $output.Count -eq 0) { + throw "No user sessions found or quser command failed" + } + + # Ensure output has expected header format + if ($output.Count -lt 2 -or $output[0] -notmatch $script:Config.QUSER_HEADER_REGEX) { + throw "Unexpected quser output format" + } + return $output + } + catch [System.Management.Automation.CommandNotFoundException] { + throw "quser command not found. This script requires Windows with Terminal Services." + } +} + +# Get IP addresses for RDP sessions using qwinsta command +# Attempts to correlate session IDs with client IP addresses for remote sessions +function Get-SessionIPAddresses { + try { + $sessionIPs = @{} + # Run qwinsta to get session information including IP addresses + $qwinstaOutput = qwinsta 2>$null + + if ($qwinstaOutput) { + Write-Verbose "Raw qwinsta output:" + $qwinstaOutput | ForEach-Object { Write-Verbose " $_" } + + foreach ($line in $qwinstaOutput) { + # Skip header lines and empty lines + if ([string]::IsNullOrWhiteSpace($line) -or $line -match '^\s*SESSIONNAME') { + continue + } + + Write-Verbose "Processing qwinsta line: '$line'" + + # Look for any IP address in the line and try to correlate with session ID + if ($line -match '(\d+\.\d+\.\d+\.\d+)') { + $ipAddress = $matches[1] + + # Try different patterns to find session ID that corresponds to this IP + $sessionId = $null + + # Pattern 1: Standard format with session ID as 3rd column + if ($line -match '^\s*(\S+)\s+(\S+)?\s+(\d+)\s+') { + $sessionId = $matches[3] + } + # Pattern 2: RDP session format + elseif ($line -match 'rdp-tcp#\d+.*?\s(\d+)\s+') { + $sessionId = $matches[1] + } + # Pattern 3: Any number that looks like a session ID (between spaces) + elseif ($line -match '\s(\d+)\s+\w+') { + $sessionId = $matches[1] + } + + # Store the mapping if we found a valid session ID + if ($sessionId) { + $sessionIPs[$sessionId] = $ipAddress + Write-Verbose "Mapped session ID $sessionId to IP $ipAddress" + } + else { + Write-Verbose "Found IP $ipAddress but could not determine session ID" + } + } + } + } + + Write-Verbose "Final session IP mapping: $($sessionIPs | ConvertTo-Json -Compress)" + return $sessionIPs + } + catch { + # Don't fail the entire script if IP detection fails + Write-Warning "Failed to get session IP addresses: $($_.Exception.Message)" + return @{} + } +} + +# Parses a single line of quser output into a structured object +# Converts space-separated quser output into a PowerShell object with named properties +function ConvertFrom-QUserLine { + param( + [ValidateNotNullOrEmpty()][string]$Line, + [hashtable]$SessionIPs = @{} + ) + + # Split the line into fields, normalizing whitespace + $fields = $Line.Trim() -Replace '\s+', ' ' -Split '\s' + + # Validate minimum expected field count + if ($fields.Length -lt 6) { return $null } + + $cols = $script:Config.COLUMNS + $sessionId = $fields[$cols.ID] + + # Look up IP address for this session if available + $ipAddress = if ($SessionIPs.ContainsKey($sessionId)) { $SessionIPs[$sessionId] } else { "unknown" } + + # Extract logon time from remaining fields (may span multiple columns) + $logonTime = if ($fields.Length -gt $cols.LOGON_START) { + $endIndex = if ($fields.Length -gt 6) { $fields.Length - 2 } else { $fields.Length - 1 } + $fields[$cols.LOGON_START..$endIndex] -join ' ' + } + else { "Unknown" } + + # Clean username by removing leading ">" character if present (indicates active session) + $cleanUserName = $fields[$cols.USERNAME] -replace '^>', '' + + # Create structured object with all session information + return [PSCustomObject]@{ + UserName = $cleanUserName + SessionName = $fields[$cols.SESSION] + ID = $sessionId + State = $fields[$cols.STATE] + IdleTime = $fields[$cols.IDLE] + LogonTime = $logonTime + ClientLocation = if ($fields.Length -gt 6) { $fields[-1] } else { "local" } + IPAddress = $ipAddress + } +} + +# Get command history for a specific user session +# Retrieves recent PowerShell commands from the user's PSReadline history file +function Get-UserCommandHistory { + param( + [string]$UserName, + [string]$SessionId, + [int]$MaxCommands = 10 + ) + + try { + # Sanitize username to remove invalid file path characters + $sanitizedUserName = $UserName -replace '[<>:"|?*]', '_' + + # Try to get PowerShell history from the user's profile + $historyPath = "C:\Users\$sanitizedUserName\AppData\Roaming\Microsoft\Windows\PowerShell\PSReadline\ConsoleHost_history.txt" + $commands = @() + + # Check if PowerShell history file exists + if (Test-Path $historyPath) { + # Read the last N commands from the history file + $historyContent = Get-Content $historyPath -Tail $MaxCommands -ErrorAction SilentlyContinue + if ($historyContent) { + # Clean up the commands by trimming whitespace and removing empty lines + $commands = $historyContent | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne "" } + } + } + + # If no PowerShell history, try to get CMD history using doskey + if ($commands.Count -eq 0) { + try { + # Use query session to check if user is active, then try to get command history + $sessionInfo = query session $SessionId 2>$null + if ($sessionInfo) { + # This is a simplified approach - in practice, CMD history is harder to access remotely + $commands = @("No recent command history available") + } + } + catch { + $commands = @("Unable to retrieve command history") + } + } + + # Return the most recent commands up to the specified limit + return $commands | Select-Object -First $MaxCommands + } + catch { + Write-Verbose "Failed to get command history for user $UserName (Session $SessionId): $($_.Exception.Message)" + return @("Command history unavailable") + } +} + +# Get all active RDP user sessions with detailed information +# Combines quser and qwinsta data to create comprehensive user session objects +function Get-RDPUsers { + try { + # Get raw user session data and IP address mappings + $qUserOutput = Get-QUserData + $sessionIPs = Get-SessionIPAddresses + Write-Verbose "Found $($qUserOutput.Count) total user sessions" + Write-Verbose "Found $($sessionIPs.Count) session IP addresses" + + # Process each user session line (skip header line) + $allUsers = $qUserOutput | Select-Object -Skip 1 | ForEach-Object { + # Parse the quser output line into a structured object + $user = ConvertFrom-QUserLine $_ $sessionIPs + if ($null -eq $user) { + Write-Warning "Skipping malformed quser output: $_" + return + } + + # Add command history to user object + $commandHistory = Get-UserCommandHistory -UserName $user.UserName -SessionId $user.ID + $user | Add-Member -NotePropertyName "CommandHistory" -NotePropertyValue $commandHistory + + $user + } | Where-Object { $_ } + + # Filter to only RDP sessions (excluding services and other non-user sessions) + $rdpUsers = $allUsers | Where-Object { + $_.SessionName -match $script:Config.RDP_SESSION_PATTERN -and + ![string]::IsNullOrEmpty($_.UserName) -and + ![string]::IsNullOrEmpty($_.SessionName) -and + ![string]::IsNullOrEmpty($_.State) + } + + Write-Verbose "Processed $($allUsers.Count) valid user sessions" + Write-Verbose "Filtered to $($rdpUsers.Count) RDP sessions" + + return $rdpUsers + } + catch { + throw "Failed to collect user data: $($_.Exception.Message)" + } +} + +# Creates Prometheus metrics from user session data +# Transforms user session objects into Prometheus-formatted metric data +function New-UserMetrics { + param([array]$Users) + + if (-not $Users) { return @() } + + # Initialize counters and collections for metric generation + $stateCount = @{ Active = 0; Disc = 0 } + $usernames = @() + $userMetrics = @() + $commandMetrics = @() + + # Process each user to create individual metrics + foreach ($user in $Users) { + if ($null -eq $user) { + Write-Warning "Found null user in collection" + continue + } + + # Track state counts for summary metrics + $stateCount[$user.State]++ + $usernames += $user.UserName + + # Create individual user session metric + $userMetrics += @{ + Labels = "{username=`"$(ConvertTo-MetricLabel $user.UserName)`",session=`"$(ConvertTo-MetricLabel $user.SessionName)`",state=`"$($user.State)`",location=`"$(ConvertTo-MetricLabel $user.ClientLocation)`",ip=`"$(ConvertTo-MetricLabel $user.IPAddress)`"}" + Value = 1 + } + + # Add command history metrics for each user + if ($user.CommandHistory -and $user.CommandHistory.Count -gt 0) { + for ($i = 0; $i -lt $user.CommandHistory.Count; $i++) { + $command = ConvertTo-MetricLabel $user.CommandHistory[$i] + $commandMetrics += @{ + Labels = "{username=`"$(ConvertTo-MetricLabel $user.UserName)`",session=`"$(ConvertTo-MetricLabel $user.SessionName)`",command_index=`"$($i + 1)`",command=`"$command`"}" + Value = 1 + } + } + } + } + + # Create summary metrics with totals and user list + $summaryMetrics = @( + @{ Labels = '{metric="total"}'; Value = $Users.Count } + @{ Labels = '{metric="active"}'; Value = $stateCount.Active } + @{ Labels = '{metric="disconnected"}'; Value = $stateCount.Disc } + @{ Labels = '{metric="users_list",users="' + $(ConvertTo-MetricLabel (($usernames | Sort-Object) -join ',')) + '"}'; Value = 1 } + ) + + # Combine all metric types into a single collection + return $summaryMetrics + $userMetrics + $commandMetrics +} + +# Write metrics content to file using atomic write operation +function Write-MetricsFile { + param( + [ValidateNotNull()]$Content, + [string]$Path + ) + + if (-not $Path) { + return $Content + } + + # Ensure the directory exists + $directory = Split-Path $Path -Parent + if ($directory -and -not (Test-Path $directory)) { + try { + New-Item -Path $directory -ItemType Directory -Force | Out-Null + Write-Verbose "Created directory: $directory" + } + catch { + Write-Error "Failed to create directory '$directory': $($_.Exception.Message)" + return + } + } + + $tempPath = "$Path.tmp" + try { + if ($Content -is [array]) { + $Content -join "`n" | Out-File -FilePath $tempPath -Encoding UTF8 + } else { + $Content | Out-File -FilePath $tempPath -Encoding UTF8 + } + Move-Item -Path $tempPath -Destination $Path -Force -ErrorAction Stop + } + catch { + Write-Error "Failed to write metrics file: $($_.Exception.Message)" + if (Test-Path $tempPath) { Remove-Item $tempPath -Force } + } +} + +# Main function that orchestrates the complete metrics collection process +# Coordinates all data collection, processing, and output generation +function Invoke-MetricsCollection { + $startTime = Get-Date + + # Add dry-run header if applicable + if ($DryRun) { + Write-Host "=== DRY RUN MODE - Metrics that would be written to $MetricsPath ===" -ForegroundColor Yellow + } + + try { + # Collect RDP user session data + Write-VerboseLog "Collecting RDP user session data..." + $rdpUsers = Get-RDPUsers + if ($null -eq $rdpUsers) { + throw "Get-RDPUsers returned null" + } + + Write-VerboseLog "Found $($rdpUsers.Count) RDP users" + + # Convert user data to Prometheus metrics + $metrics = New-UserMetrics -Users $rdpUsers + if ($null -eq $metrics) { + throw "New-UserMetrics returned null" + } + + # Collect failed login attempts + Write-VerboseLog "Collecting failed login data..." + $failedLoginMetrics = Get-FailedLogins + + # Calculate script execution time for performance monitoring + $endTime = Get-Date + $executionTimeMs = [math]::Round(($endTime - $startTime).TotalMilliseconds, 2) + + # Add execution time metric for monitoring script performance + $executionMetric = @{ + Labels = '{metric="execution_time_ms"}' + Value = $executionTimeMs + } + $metrics += $executionMetric + + # Split metrics into different types + $userMetrics = $metrics | Where-Object { $_.Labels -notmatch 'command=' } + $commandMetrics = $metrics | Where-Object { $_.Labels -match 'command=' } + + # Generate Prometheus-formatted output + $output = @() + $output += Write-PrometheusMetric -Name $script:Config.METRIC_NAME -Help "Number of RDP users currently logged in" -Type "gauge" -Metrics $userMetrics + + # Add command history metrics as a separate metric family + if ($commandMetrics.Count -gt 0) { + $output += Write-PrometheusMetric -Name "windows_rdp_user_command_history" -Help "Recent command history for RDP users" -Type "gauge" -Metrics $commandMetrics + } + + # Add failed login metrics + if ($failedLoginMetrics.Count -gt 0) { + $output += Write-PrometheusMetric -Name "windows_user_failed_logins" -Help "Failed login attempts from Windows Event Log" -Type "counter" -Metrics $failedLoginMetrics + } + + if ($null -eq $output) { + throw "Write-PrometheusMetric returned null" + } + + Write-VerboseLog "Metrics collection completed (execution time: ${executionTimeMs}ms)" + + # Output to console and/or file based on mode + if ($DryRun) { + Write-Host $output + Write-Host "=== END DRY RUN OUTPUT ===" -ForegroundColor Yellow + } else { + Write-Output $output + Write-MetricsFile -Content $output -Path $MetricsPath + } + } + catch { + Write-Error "Failed to collect metrics: $($_.Exception.Message)" + # Attempt to write partial results if available + if ($MetricsPath -and $output -and -not $DryRun) { + $output | Out-File -FilePath $MetricsPath -Encoding UTF8 + } + } +} + +# Register cleanup handler for graceful shutdown +Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action { + Write-Host "Shutting down gracefully..." +} + +# Create scheduled task for periodic execution +function New-MetricsScheduledTask { + param( + [int]$IntervalSeconds = 60, + [string]$TaskName = "PrometheusRDPMetrics" + ) + + try { + # Check if scheduled task already exists + if (Get-ScheduledTask -TaskName $TaskName -ErrorAction SilentlyContinue) { + Write-InfoLog "Scheduled task '$TaskName' already exists. Skipping creation." + return + } + + $principal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest + $action = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`" -MetricsPath `"$MetricsPath`" -RunOnce" + $trigger = New-ScheduledTaskTrigger -Once -At (Get-Date) -RepetitionInterval (New-TimeSpan -Seconds $IntervalSeconds) + $settings = New-ScheduledTaskSettingsSet -AllowStartIfOnBatteries -DontStopIfGoingOnBatteries -StartWhenAvailable + + Register-ScheduledTask -TaskName $TaskName -Action $action -Trigger $trigger -Principal $principal -Settings $settings -Force + Write-InfoLog "Scheduled task '$TaskName' created successfully with $IntervalSeconds second interval" + } + catch { + Write-Error "Failed to create scheduled task: $($_.Exception.Message)" + } +} + +# Debug function to test qwinsta parsing +function Test-QwinstaOutput { + Write-Host "=== Testing qwinsta output parsing ===" -ForegroundColor Cyan + + try { + $qwinstaOutput = qwinsta 2>$null + Write-Host "Raw qwinsta output:" -ForegroundColor Yellow + $qwinstaOutput | ForEach-Object { Write-Host " $_" } + + Write-Host "`nTesting IP address extraction:" -ForegroundColor Yellow + $sessionIPs = Get-SessionIPAddresses + $sessionIPs.GetEnumerator() | ForEach-Object { + Write-Host " Session ID $($_.Key) -> IP $($_.Value)" -ForegroundColor Green + } + + Write-Host "`nTesting quser output:" -ForegroundColor Yellow + $quserOutput = quser 2>$null + $quserOutput | ForEach-Object { Write-Host " $_" } + + } + catch { + Write-Error "Test failed: $($_.Exception.Message)" + } +} + +# Get failed login attempts from Windows Event Log +function Get-FailedLogins { + try { + $failedLogins = @() + $24HoursAgo = (Get-Date).AddHours(-24) + + # Query Windows Security Event Log for failed logon attempts (Event ID 4625) + $failedLogonEvents = Get-WinEvent -FilterHashtable @{ + LogName = 'Security' + Id = 4625 # Failed logon attempts + StartTime = $24HoursAgo + } -ErrorAction SilentlyContinue | Select-Object -First 50 + + if ($failedLogonEvents) { + foreach ($event in $failedLogonEvents) { + try { + $eventXml = [xml]$event.ToXml() + $eventData = $eventXml.Event.EventData.Data + + # Extract relevant information from event data + $targetUserName = ($eventData | Where-Object {$_.Name -eq 'TargetUserName'}).'#text' + $workstationName = ($eventData | Where-Object {$_.Name -eq 'WorkstationName'}).'#text' + $sourceNetworkAddress = ($eventData | Where-Object {$_.Name -eq 'IpAddress'}).'#text' + $failureReason = ($eventData | Where-Object {$_.Name -eq 'SubStatus'}).'#text' + + # Clean up values + if ([string]::IsNullOrWhiteSpace($targetUserName)) { $targetUserName = "unknown" } + if ([string]::IsNullOrWhiteSpace($sourceNetworkAddress) -or $sourceNetworkAddress -eq '-') { $sourceNetworkAddress = "local" } + if ([string]::IsNullOrWhiteSpace($workstationName)) { $workstationName = "unknown" } + + # Determine failure type based on sub status + $failureType = switch ($failureReason) { + "0xC0000064" { "invalid_user" } + "0xC000006A" { "wrong_password" } + "0xC0000234" { "account_locked" } + "0xC0000072" { "account_disabled" } + "0xC000006F" { "logon_time_restriction" } + "0xC0000070" { "workstation_restriction" } + default { "other_failure" } + } + + $failedLogins += @{ + Labels = "{username=`"$targetUserName`",source_ip=`"$sourceNetworkAddress`",workstation=`"$workstationName`",failure_type=`"$failureType`"}" + Value = 1 + } + } catch { + Write-VerboseLog "Failed to parse event: $($_.Exception.Message)" + } + } + } + + return $failedLogins + } catch { + Write-Warning "Failed to get failed login events: $($_.Exception.Message)" + return @() + } +} + +# Main execution logic - determines script behavior based on parameters +if ($Debug) { + # Debug mode: test qwinsta and quser output parsing + Test-QwinstaOutput +} +elseif ($RunOnce -or $DryRun) { + # Single execution mode: collect metrics once and exit + Invoke-MetricsCollection +} +else { + # Scheduled mode: create scheduled task (unless NoSchedule) and run immediately + if (-not $NoSchedule) { + New-MetricsScheduledTask -IntervalSeconds $IntervalSeconds + } else { + Write-InfoLog "Skipping scheduled task creation (-NoSchedule specified)" + } + + # Run metrics collection immediately + Invoke-MetricsCollection +} diff --git a/users-logged-in.sh b/users-logged-in.sh new file mode 100755 index 0000000..85bc962 --- /dev/null +++ b/users-logged-in.sh @@ -0,0 +1,619 @@ +#!/bin/bash + +######################################################################################## +#### users_logged_in.sh #### +#### #### +#### This script monitors and reports information about users currently logged into #### +#### a Linux system. It's designed to work with Prometheus monitoring system to #### +#### track user activity on Amazon, Ubuntu, and RedHat Linux servers. #### +#### #### +#### Contact: Phil Connor contact@mylinux.work #### +#### Version 3.3.1-20250923 #### +######################################################################################## + +set -euo pipefail + +# CLI flags +DRY_RUN=false +VERBOSE=false +QUIET=false +NO_CRON=false +SCRIPT_VERSION="3.3.1-20250923" + +# Parse command line arguments +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + --dry-run) + DRY_RUN=true + shift + ;; + --verbose|-v) + VERBOSE=true + DEBUG=1 + shift + ;; + --quiet|-q) + QUIET=true + shift + ;; + --no-cron) + NO_CRON=true + shift + ;; + --version) + echo "User Login Monitor" + echo "Version: $SCRIPT_VERSION" + echo "Author: Phil Connor contact@mylinux.work" + exit 0 + ;; + -h|--help) + echo "Usage: $0 [OPTIONS]" + echo "Monitor user login activity and export Prometheus metrics" + echo "" + echo "Options:" + echo " --dry-run Output metrics to console instead of file" + echo " --verbose Enable verbose debug output" + echo " --quiet Suppress non-error output" + echo " --no-cron Skip cron job installation" + echo " --version Show version and exit" + echo " --help Show this help message" + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + echo "Use --help for usage information" >&2 + exit 1 + ;; + esac + done +} + +# Enhanced logging functions +log_verbose() { + [[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1" +} + +log_info() { + [[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1" +} + +# System Configuration - Define default values and paths +readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored +readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the Prometheus files +readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User under which the cron job runs +readonly SCRIPT_PATH="$(readlink -f "$0")" # Full path to this script +readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/3 * * * *}" # Cron schedule (every 3 minutes by default) +readonly LOCKFILE="/var/run/users_logged_in.lock" # Prevents multiple instances from running + +# Required commands - Map of commands to their expected locations +declare -A COMMANDS=( + [awk]="/usr/bin" # Text processing utility + [cut]="/usr/bin" # Extract columns from text + [grep]="/usr/bin" # Search text patterns + [sed]="/usr/bin" # Stream editor for text manipulation + [sort]="/usr/bin" # Sort lines of text + [uniq]="/usr/bin" # Remove duplicate lines + [who]="/usr/bin" # Show logged in users +) + +# Command paths (populated by find_commands function) +declare -A CMD_PATHS + +# Validation - Ensure required environment variables are set +[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && { + echo "ERROR: Required environment variables not set" >&2 + exit 1 +} + +# Error handling function - Display error message and exit with specified code +handle_error() { + local err_msg="$1" + local exit_code="${2:-1}" + echo "ERROR: $err_msg" >&2 + exit "$exit_code" +} + +# Logging function - Output timestamped log messages +log() { + local level="$1" + local message="$2" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message" +} + +# Find command location - Locate executable path or use fallback +find_command() { + local command_name="$1" + local fallback_path="$2" + + local path + path=$(command -v "$command_name" 2>/dev/null) || path="$fallback_path/$command_name" + + [[ -x "$path" ]] || handle_error "Cannot find or execute '$command_name'" + echo "$path" +} + +# Initialize command paths - Populate CMD_PATHS array with actual command locations +find_commands() { + for cmd in "${!COMMANDS[@]}"; do + CMD_PATHS[$cmd]=$(find_command "$cmd" "${COMMANDS[$cmd]}") + done +} + +# Cleanup function - Remove lockfile on script exit +cleanup() { + rm -f "$LOCKFILE" +} + +# Setup Prometheus directory - Create and set permissions for metrics output directory +setup_directory() { + if [[ ! -d "$NODE_EXPORTER_DIR" ]]; then + if [[ $(id -u) -eq 0 ]]; then + mkdir -p "$NODE_EXPORTER_DIR" + chown "$PROMETHEUS_USER": "$NODE_EXPORTER_DIR" 2>/dev/null || true + fi + fi + + [[ -w "$NODE_EXPORTER_DIR" ]] || handle_error "$NODE_EXPORTER_DIR is not writable" +} + +# Setup lockfile - Prevent multiple script instances from running simultaneously +setup_lockfile() { + find "$LOCKFILE" -mmin +60 -delete 2>/dev/null || true # Remove stale lockfiles older than 60 minutes + [[ -f "$LOCKFILE" ]] && handle_error "Script is already running" + touch "$LOCKFILE" && chmod 600 "$LOCKFILE" +} + +# Install cron job - Automatically schedule this script to run periodically +install_cron_job() { + if [[ "$NO_CRON" == "true" ]]; then + log_info "Skipping cron job installation (--no-cron specified)" + return 0 + fi + + if [[ -f "$SCRIPT_PATH" ]] && ! crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH"; then + local cron_entry="$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/usrlogins.prom 2>&1" + if ! (echo -e "$(crontab -u "$CRONTAB_USER" -l 2>/dev/null || echo '')\n$cron_entry" | crontab -u "$CRONTAB_USER" -); then + log "WARNING" "Failed to install cron job for user $CRONTAB_USER" + else + log_info "Cron job installed successfully" + fi + fi +} + +# Get logged users - Extract user information and format as Prometheus metrics +get_logged_users() { + "${CMD_PATHS[who]}" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \ + "${CMD_PATHS[awk]}" '{ + gsub(/US\\|@us\.[^.]+\.net/, "", $1) # Remove domain prefixes from username (US\ or @us.*.net) + gsub(/\//, " ", $2) # Replace slashes in terminal names + gsub(/:/, "", $2) # Remove colons from terminal names + gsub(/:100/, "aws_workspace", $5) # Convert AWS workspace notation + gsub(/\(|\)/, "", $5) # Remove parentheses from location + print "node_logged_in_usrs{name=\""$1"\", terminal=\""$2"\", location=\""$5"\"}", 1 + }' +} + +# Get user terminal count - Count open terminals per user +get_user_terminal_count() { + "${CMD_PATHS[who]}" | "${CMD_PATHS[sed]}" 's/.*US\\[\t ]*//;s/,//g' | \ + "${CMD_PATHS[cut]}" -f1 -d' ' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" -c | \ + "${CMD_PATHS[awk]}" '{ + gsub(/@us\.[^.]+\.net/, "", $2) # Remove email domain from username (@us.*.net) + print "node_logged_in_usr_terminals{username=\""$2"\"}", $1 + }' +} + +# Get total user count - Count total logged in sessions +get_total_user_count() { + "${CMD_PATHS[who]}" -q | "${CMD_PATHS[grep]}" users | \ + "${CMD_PATHS[awk]}" '{print $2}' | "${CMD_PATHS[cut]}" -d "=" -f2 +} + +# Get last user commands - Extract recent bash history for each user +get_last_user_commands() { + local username="$1" + local history_file + + if [[ -z "$username" ]]; then + return 1 + fi + + # Try different history file locations based on username and common paths + for hist_path in "/home/${username}/.bash_history" "/home/${username}/.history" "/root/.bash_history"; do + if [[ -r "$hist_path" ]]; then + history_file="$hist_path" + break + fi + done + + # Extract last 10 commands and format as Prometheus metrics + if [[ -n "$history_file" ]]; then + tail -n 10 "$history_file" 2>/dev/null | \ + "${CMD_PATHS[awk]}" -v user="$username" 'NR <= 10 { + gsub(/\\/, "\\\\", $0) # Escape backslashes first (before other escaping) + gsub(/"/, "\\\"", $0) # Escape double quotes in commands + gsub(/'\''/, "", $0) # Remove single quotes (problematic for Prometheus) + print "node_user_last_commands{username=\"" user "\", command_number=\"" NR "\", command=\"" $0 "\"} 1" + }' + fi +} + +# Get sudo commands - Extract recent privileged commands from auth logs +get_sudo_commands() { + local username="$1" + + if [[ -z "$username" ]]; then + return 1 + fi + + # Strip domain prefixes for comparison + local clean_username="${username#US\\}" + clean_username="${clean_username%@*}" + + # Check both Ubuntu (/var/log/auth.log) and RHEL (/var/log/secure) locations + local auth_logs=("/var/log/secure" "/var/log/auth.log") + local commands_found="" + + for log_file in "${auth_logs[@]}"; do + if [[ -r "$log_file" ]]; then + # Try RHEL/Amazon Linux format first (TTY= pattern) + commands_found=$(grep "TTY=" "$log_file" 2>/dev/null | \ + grep -E "(US\\\\$clean_username|$clean_username|$username)" | \ + grep "COMMAND=" | \ + tail -10 | \ + "${CMD_PATHS[awk]}" -F'; COMMAND=' -v user="$clean_username" '{ + if (NF >= 2) { + cmd = $2 + gsub(/#040/, " ", cmd) # Convert #040 to spaces + gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace + gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping) + gsub(/"/, "\\\"", cmd) # Escape double quotes + gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus) + if (cmd != "" && length(cmd) > 0) { + print user "|||" cmd # Use delimiter for deduplication + } + } + }') + + # If RHEL format didn't work, try Ubuntu format + if [[ -z "$commands_found" ]]; then + commands_found=$(grep "COMMAND=" "$log_file" 2>/dev/null | \ + grep -E "(USER=$clean_username|$clean_username :)" | \ + tail -10 | \ + "${CMD_PATHS[awk]}" -F'COMMAND=' -v user="$clean_username" '{ + if (NF >= 2) { + cmd = $2 + gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace + gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping) + gsub(/"/, "\\\"", cmd) # Escape double quotes + gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus) + if (cmd != "" && length(cmd) > 0) { + print user "|||" cmd # Use delimiter for deduplication + } + } + }') + fi + + # If we found commands, break (prefer secure over auth.log for RHEL) + if [[ -n "$commands_found" ]]; then + break + fi + fi + done + + # Deduplicate and format as proper metrics + if [[ -n "$commands_found" ]]; then + echo "$commands_found" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \ + "${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{ + print "node_user_sudo_commands{username=\"" $1 "\", command=\"" $2 "\"} 1" + }' + fi +} + +# Get session events - Extract login/logout events from auth logs +get_session_events() { + local username="$1" + + if [[ -z "$username" ]]; then + return 1 + fi + + # Strip domain prefixes for comparison + local clean_username="${username#US\\}" + clean_username="${clean_username%@*}" + + # Check both log files for session events + local auth_logs=("/var/log/secure" "/var/log/auth.log") + local session_events="" + + for log_file in "${auth_logs[@]}"; do + if [[ -r "$log_file" ]]; then + # Get recent session events (last 24 hours worth) + session_events=$(grep -E "(session opened|session closed|Accepted)" "$log_file" 2>/dev/null | \ + grep -E "(US\\\\$clean_username|$clean_username|$username)" | \ + tail -20 | \ + "${CMD_PATHS[awk]}" -v user="$clean_username" '{ + if ($0 ~ /session opened/) { + method = "ssh" + if ($0 ~ /sudo/) method = "sudo" + print user "|||login|||" method # Use delimiter for deduplication + } + else if ($0 ~ /session closed/) { + method = "ssh" + if ($0 ~ /sudo/) method = "sudo" + print user "|||logout|||" method # Use delimiter for deduplication + } + else if ($0 ~ /Accepted/) { + method = "ssh" + if ($0 ~ /publickey/) method = "ssh-key" + else if ($0 ~ /password/) method = "ssh-password" + print user "|||login|||" method # Use delimiter for deduplication + } + }') + + if [[ -n "$session_events" ]]; then + break + fi + fi + done + + # Deduplicate and format as proper metrics + if [[ -n "$session_events" ]]; then + echo "$session_events" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \ + "${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{ + print "node_user_session_events{username=\"" $1 "\", event=\"" $2 "\", method=\"" $3 "\"} 1" + }' + fi +} + +# Get failed login attempts - Track security events +get_failed_logins() { + # Check both log files for failed authentication attempts + local auth_logs=("/var/log/secure" "/var/log/auth.log") + local failed_logins="" + + for log_file in "${auth_logs[@]}"; do + if [[ -r "$log_file" ]]; then + # Get failed login attempts from last 24 hours + failed_logins=$(grep -E "(Failed password|authentication failure|Invalid user)" "$log_file" 2>/dev/null | \ + tail -50 | \ + "${CMD_PATHS[awk]}" '{ + username = "unknown" + source_ip = "unknown" + + # Extract username - handle various formats + if ($0 ~ /for [a-zA-Z0-9_]+/) { + match($0, /for ([a-zA-Z0-9_\\]+)/, arr) + if (arr[1]) { + username = arr[1] + gsub(/US\\/, "", username) # Clean domain prefix + } + } + + # Extract source IP + if ($0 ~ /from [0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) { + match($0, /from ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/, arr) + if (arr[1]) source_ip = arr[1] + } + + failure_type = "password" + if ($0 ~ /Invalid user/) failure_type = "invalid_user" + else if ($0 ~ /authentication failure/) failure_type = "auth_failure" + + print username "|||" source_ip "|||" failure_type # Use delimiter for deduplication + }') + + if [[ -n "$failed_logins" ]]; then + break + fi + fi + done + + # Deduplicate and format as proper metrics + if [[ -n "$failed_logins" ]]; then + echo "$failed_logins" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \ + "${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{ + print "node_user_failed_logins{username=\"" $1 "\", source_ip=\"" $2 "\", failure_type=\"" $3 "\"} 1" + }' + fi +} + +# Get active session durations - Calculate how long users have been logged in +get_session_durations() { + local current_time + current_time=$(date +%s) + + "${CMD_PATHS[who]}" -u | "${CMD_PATHS[awk]}" -v current_time="$current_time" '{ + if (NF >= 5) { + username = $1 + gsub(/US\\|@us\.[^.]+\.net/, "", username) # Clean username (US\ or @us.*.net) + + # Parse login time (format: Oct 15 14:30 or 14:30) + login_time = "" + if ($3 ~ /:/) { + # Today format: 14:30 + login_time = $3 + login_date = strftime("%Y-%m-%d", current_time) + } else if ($4 ~ /:/) { + # Date format: Oct 15 14:30 + login_date = strftime("%Y", current_time) "-" $3 "-" $4 + login_time = $5 + } + + if (login_time != "" && login_date != "") { + # Convert to epoch (approximate) + split(login_time, time_parts, ":") + hours = time_parts[1] + minutes = time_parts[2] + + # Simple duration calculation (today only) + login_seconds = (hours * 3600) + (minutes * 60) + current_seconds = strftime("%H", current_time) * 3600 + strftime("%M", current_time) * 60 + + if (current_seconds >= login_seconds) { + duration = current_seconds - login_seconds + } else { + duration = (86400 - login_seconds) + current_seconds # Cross midnight + } + + print username "|||" duration # Use delimiter for deduplication + } + } + }' | "${CMD_PATHS[sort]}" -k1,1 | \ + "${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{ + # Keep the latest/highest duration for each username + if ($1 != prev_user) { + if (prev_user != "") { + print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration + } + prev_user = $1 + max_duration = $2 + } else if ($2 > max_duration) { + max_duration = $2 + } + } END { + if (prev_user != "") { + print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration + } + }' +} + +# Output metric - Format and display Prometheus metric with help text and type +output_metric() { + local metric_name="$1" + local help_text="$2" + local metric_type="$3" + local metric_value="$4" + local default_value="$5" + + echo "# HELP $metric_name $help_text" + echo "# TYPE $metric_name $metric_type" + echo "${metric_value:-$default_value}" +} + +# Main function - Orchestrate the entire monitoring process +main() { + # Parse command line arguments first + parse_arguments "$@" + + # Record script start time for runtime metric + local script_start_time + script_start_time=$(date +%s.%N) + + # Add dry-run header if applicable + if [[ "$DRY_RUN" == "true" ]]; then + echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/usrlogins.prom ===" >&2 + fi + + trap cleanup EXIT # Ensure cleanup runs when script exits + + # Initialize environment and commands + find_commands + + # Skip setup in dry-run mode + if [[ "$DRY_RUN" == "false" ]]; then + setup_directory + setup_lockfile + install_cron_job + fi + + # Generate and output all Prometheus metrics + + # Metric 1: Individual user sessions with details + local users + users=$(get_logged_users) + output_metric "node_logged_in_usrs" "Currently Logged in Users" "gauge" \ + "$users" 'node_logged_in_usrs{name="", location=""} 0' + + # Metric 2: Terminal count per user + local user_terminals + user_terminals=$(get_user_terminal_count) + output_metric "node_logged_in_usr_terminals" "Total of open sessions per user" "gauge" \ + "$user_terminals" 'node_logged_in_usr_terminals{username=""} 0' + + # Metric 3: Total user count system-wide + local total_count + total_count=$(get_total_user_count) + output_metric "node_logged_in_total" "Total of open sessions on the system" "gauge" \ + "node_logged_in_total ${total_count:-0}" "node_logged_in_total 0" + + # Metric 4: Last 10 commands for each logged in user + local logged_users + logged_users=$("${CMD_PATHS[who]}" | "${CMD_PATHS[awk]}" '{gsub(/US\\|@us\.[^.]+\.net/, "", $1); print $1}' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}") + + local user_commands="" + while IFS= read -r user; do + if [[ -n "$user" ]]; then + local commands + commands=$(get_last_user_commands "$user") + if [[ -n "$commands" ]]; then + user_commands+="$commands"$'\n' + fi + fi + done <<< "$logged_users" + + output_metric "node_user_last_commands" "Last 10 commands executed by logged in users" "gauge" \ + "$user_commands" 'node_user_last_commands{username="", command_number="", command=""} 0' + + # Metric 5: Recent sudo commands for each logged in user + local sudo_commands="" + while IFS= read -r user; do + if [[ -n "$user" ]]; then + local sudo_cmds + sudo_cmds=$(get_sudo_commands "$user") + if [[ -n "$sudo_cmds" ]]; then + sudo_commands+="$sudo_cmds"$'\n' + fi + fi + done <<< "$logged_users" + + output_metric "node_user_sudo_commands" "Recent sudo commands executed by logged in users" "gauge" \ + "$sudo_commands" 'node_user_sudo_commands{username="", command=""} 0' + + # Metric 6: Session events (login/logout) for each logged in user + local session_events="" + while IFS= read -r user; do + if [[ -n "$user" ]]; then + local events + events=$(get_session_events "$user") + if [[ -n "$events" ]]; then + session_events+="$events"$'\n' + fi + fi + done <<< "$logged_users" + + output_metric "node_user_session_events" "Login and logout events for users" "gauge" \ + "$session_events" 'node_user_session_events{username="", event="", method=""} 0' + + # Metric 7: Active session durations + local session_durations + session_durations=$(get_session_durations) + output_metric "node_user_session_duration_seconds" "Duration of active user sessions in seconds" "gauge" \ + "$session_durations" 'node_user_session_duration_seconds{username=""} 0' + + # Metric 8: Failed login attempts (security monitoring) + local failed_logins + failed_logins=$(get_failed_logins) + output_metric "node_user_failed_logins" "Failed login attempts by username and source IP" "counter" \ + "$failed_logins" 'node_user_failed_logins{username="", source_ip="", failure_type=""} 0' + + # Metric 9: Script runtime + local script_end_time script_runtime + script_end_time=$(date +%s.%N) + script_runtime=$(echo "$script_end_time - $script_start_time" | bc -l 2>/dev/null || echo "0") + output_metric "node_user_monitor_runtime_seconds" "Script execution time in seconds" "gauge" \ + "node_user_monitor_runtime_seconds $script_runtime" "node_user_monitor_runtime_seconds 0" + + if [[ "$DRY_RUN" == "true" ]]; then + echo "=== END DRY RUN OUTPUT ===" >&2 + fi +} + +# Script entry point +main "$@" + +# 2025-09-23 +# Fixed: Prometheus parsing errors with single quotes (\' sequences) +# Fixed: Prometheus parsing errors with backslash escapes (\u, \x, etc.) +# Improved: Domain regex pattern now handles any us.*.net domain instead of just us.calormen.net