diff --git a/PdnsInstall.sh b/PdnsInstall.sh
index 1a239f7..bece42c 100644
--- a/PdnsInstall.sh
+++ b/PdnsInstall.sh
@@ -2,7 +2,7 @@
set -euo pipefail
######################################################################################
-#### Version 3.0 ####
+#### Version 3.1 ####
#### For questions or comments contact@mylinux.work ####
#### Author : Phil Connor ####
#### ####
@@ -36,8 +36,8 @@ HTTP=nginx # <-- Choose apache or nginx --> The apa
##########################
ip4=$(ip -o -4 route get 8.8.8.8 | awk '{print $7; exit}')
host=$(hostname -f)
-OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
-OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
+OS=$({ grep PRETTY_NAME /etc/os-release || true; } | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
+OSVER=$({ grep VERSION_ID /etc/os-release || true; } | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
# OS family: debian (ubuntu, debian), rhel (centos, red, oracle, rocky, alma), fedora, suse (opensuse)
OS_FAMILY=""
diff --git a/ad-health-exporter.ps1 b/ad-health-exporter.ps1
new file mode 100644
index 0000000..b024710
--- /dev/null
+++ b/ad-health-exporter.ps1
@@ -0,0 +1,602 @@
+<#
+.SYNOPSIS
+ Active Directory Health Prometheus Metrics Exporter
+.DESCRIPTION
+ Prometheus exporter for Active Directory health. Monitors replication,
+ FSMO roles, account hygiene, dcdiag tests, DNS SRV records, SYSVOL/NETLOGON
+ accessibility, and domain controller metadata. Exports metrics as
+ Prometheus-compatible text format for windows_exporter textfile collector.
+.PARAMETER Mode
+ Output mode: 'stdout' (default), 'textfile', or 'http'
+.PARAMETER Port
+ HTTP port for http mode (default: 9198)
+.PARAMETER TextfileDir
+ Directory for textfile collector output (default: C:\ProgramData\node_exporter)
+.PARAMETER OutputFile
+ Custom output file path
+.PARAMETER InstallScheduledTask
+ Switch to create a scheduled task for auto-start on system boot
+.PARAMETER TaskIntervalMinutes
+ Interval in minutes for the scheduled task (default: 5)
+.NOTES
+ Author: Phil Connor
+ Contact: contact@mylinux.work
+ Website: https://mylinux.work
+ License: MIT
+ Version: 1.0
+
+ Metrics Exported:
+ Core Status:
+ - windows_ad_up
+ - windows_ad_exporter_info{version}
+
+ Replication:
+ - windows_ad_replication_failure_total{partner}
+ - windows_ad_replication_last_success_timestamp{partner}
+ - windows_ad_replication_pending_objects{partner}
+
+ FSMO Roles:
+ - windows_ad_fsmo_role_holder{role}
+
+ Account Health:
+ - windows_ad_account_lockout_total
+ - windows_ad_account_disabled_total
+ - windows_ad_account_expired_total
+ - windows_ad_account_password_expired_total
+ - windows_ad_account_inactive_total
+
+ Computer Health:
+ - windows_ad_computer_stale_total
+
+ Group Health:
+ - windows_ad_group_empty_total
+
+ DCDiag:
+ - windows_ad_dcdiag_test_result{test}
+
+ DNS and Shares:
+ - windows_ad_dns_srv_record_status
+ - windows_ad_sysvol_accessible
+ - windows_ad_netlogon_accessible
+
+ Domain Controller Info:
+ - windows_ad_domain_controller_info{domain,site,gc}
+
+ Exporter:
+ - windows_ad_exporter_duration_seconds
+ - windows_ad_exporter_last_run_timestamp
+#>
+
+param(
+ [ValidateSet('stdout', 'textfile', 'http')]
+ [string]$Mode = 'stdout',
+
+ [int]$Port = 9198,
+
+ [string]$TextfileDir = 'C:\ProgramData\node_exporter',
+
+ [string]$OutputFile,
+
+ [switch]$InstallScheduledTask,
+
+ [int]$TaskIntervalMinutes = 5
+)
+
+# Create a scheduled task to run this script every $TaskIntervalMinutes minutes
+# The task will run as SYSTEM and will be set to run at startup
+if ($InstallScheduledTask) {
+ $taskName = "ADHealthExporter"
+ $existingTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue
+
+ if (-not $existingTask) {
+ $taskAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`" -Mode textfile"
+
+ if (-not $TaskIntervalMinutes -or $TaskIntervalMinutes -le 0) {
+ throw "TaskIntervalMinutes must be a positive integer"
+ }
+
+ $taskTrigger = New-ScheduledTaskTrigger -Once -At (Get-Date).AddMinutes(1) -RepetitionInterval (New-TimeSpan -Minutes $TaskIntervalMinutes) -RepetitionDuration (New-TimeSpan -Days 365)
+ $taskPrincipal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest
+
+ try {
+ Write-Host "Creating scheduled task: $taskName"
+ Register-ScheduledTask -TaskName $taskName -Action $taskAction -Trigger $taskTrigger -Principal $taskPrincipal -Description "Exports Active Directory health metrics for Prometheus every $TaskIntervalMinutes minutes"
+
+ $createdTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue
+ if (-not $createdTask) {
+ throw "Failed to verify scheduled task creation"
+ }
+ Write-Host "Successfully created scheduled task: $taskName" -ForegroundColor Green
+ } catch {
+ Write-Error "Failed to create auto-start task: $($_.Exception.Message)"
+ throw
+ }
+ } else {
+ Write-Host "Scheduled task '$taskName' already exists, skipping creation"
+ }
+}
+
+$ErrorActionPreference = 'SilentlyContinue'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+function Get-UnixTimestamp {
+ [int][double]::Parse((Get-Date -UFormat '%s'))
+}
+
+function Format-MetricValue {
+ param([double]$Value, [int]$Decimals = 2)
+ [math]::Round($Value, $Decimals)
+}
+
+function Sanitize-LabelValue {
+ param([string]$Value)
+ $Value -replace '\\', '\\\\' -replace '"', '\\"' -replace "`n", '\\n'
+}
+
+# ============================================================================
+# REPLICATION METRICS
+# ============================================================================
+
+function Get-ReplicationMetrics {
+ $sb = [System.Text.StringBuilder]::new()
+
+ try {
+ $partners = Get-ADReplicationPartnerMetadata -Target * -ErrorAction Stop
+
+ [void]$sb.AppendLine('# HELP windows_ad_replication_failure_total Replication failures per partner')
+ [void]$sb.AppendLine('# TYPE windows_ad_replication_failure_total gauge')
+ foreach ($p in $partners) {
+ $partnerName = Sanitize-LabelValue ($p.Partner -replace '^CN=NTDS Settings,CN=', '' -replace ',.*$', '')
+ $failures = if ($p.ConsecutiveReplicationFailures) { $p.ConsecutiveReplicationFailures } else { 0 }
+ [void]$sb.AppendLine("windows_ad_replication_failure_total{partner=`"$partnerName`"} $failures")
+ }
+ [void]$sb.AppendLine('')
+
+ [void]$sb.AppendLine('# HELP windows_ad_replication_last_success_timestamp Last successful replication per partner (unix timestamp)')
+ [void]$sb.AppendLine('# TYPE windows_ad_replication_last_success_timestamp gauge')
+ foreach ($p in $partners) {
+ $partnerName = Sanitize-LabelValue ($p.Partner -replace '^CN=NTDS Settings,CN=', '' -replace ',.*$', '')
+ $ts = 0
+ if ($p.LastReplicationSuccess) {
+ $epoch = [datetime]'1970-01-01'
+ $ts = [int]($p.LastReplicationSuccess.ToUniversalTime() - $epoch).TotalSeconds
+ }
+ [void]$sb.AppendLine("windows_ad_replication_last_success_timestamp{partner=`"$partnerName`"} $ts")
+ }
+ [void]$sb.AppendLine('')
+
+ [void]$sb.AppendLine('# HELP windows_ad_replication_pending_objects Pending replication objects per partner')
+ [void]$sb.AppendLine('# TYPE windows_ad_replication_pending_objects gauge')
+ foreach ($p in $partners) {
+ $partnerName = Sanitize-LabelValue ($p.Partner -replace '^CN=NTDS Settings,CN=', '' -replace ',.*$', '')
+ $pending = if ($p.InboundNeighbors) {
+ ($p.InboundNeighbors | Measure-Object -Property EstimatedChanges -Sum).Sum
+ } else { 0 }
+ if (-not $pending) { $pending = 0 }
+ [void]$sb.AppendLine("windows_ad_replication_pending_objects{partner=`"$partnerName`"} $pending")
+ }
+ [void]$sb.AppendLine('')
+ }
+ catch {
+ Write-Warning "Failed to collect replication metrics: $_"
+ [void]$sb.AppendLine('# HELP windows_ad_replication_failure_total Replication failures per partner')
+ [void]$sb.AppendLine('# TYPE windows_ad_replication_failure_total gauge')
+ [void]$sb.AppendLine('')
+ }
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# FSMO ROLE METRICS
+# ============================================================================
+
+function Get-FsmoRoleMetrics {
+ $sb = [System.Text.StringBuilder]::new()
+
+ try {
+ $domain = Get-ADDomain -ErrorAction Stop
+ $forest = Get-ADForest -ErrorAction Stop
+ $localDC = $env:COMPUTERNAME
+
+ $fsmoRoles = @{
+ 'PDCEmulator' = $domain.PDCEmulator
+ 'RIDMaster' = $domain.RIDMaster
+ 'InfrastructureMaster' = $domain.InfrastructureMaster
+ 'SchemaMaster' = $forest.SchemaMaster
+ 'DomainNamingMaster' = $forest.DomainNamingMaster
+ }
+
+ [void]$sb.AppendLine('# HELP windows_ad_fsmo_role_holder FSMO role holder (1 if this DC holds the role)')
+ [void]$sb.AppendLine('# TYPE windows_ad_fsmo_role_holder gauge')
+ foreach ($role in $fsmoRoles.GetEnumerator()) {
+ $holdsRole = if ($role.Value -match "^$localDC(\.|$)") { 1 } else { 0 }
+ [void]$sb.AppendLine("windows_ad_fsmo_role_holder{role=`"$($role.Key)`"} $holdsRole")
+ }
+ [void]$sb.AppendLine('')
+ }
+ catch {
+ Write-Warning "Failed to collect FSMO role metrics: $_"
+ [void]$sb.AppendLine('# HELP windows_ad_fsmo_role_holder FSMO role holder (1 if this DC holds the role)')
+ [void]$sb.AppendLine('# TYPE windows_ad_fsmo_role_holder gauge')
+ [void]$sb.AppendLine('')
+ }
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# ACCOUNT HEALTH METRICS
+# ============================================================================
+
+function Get-AccountHealthMetrics {
+ $sb = [System.Text.StringBuilder]::new()
+
+ try {
+ # Locked out accounts
+ $lockedOut = @(Search-ADAccount -LockedOut -ErrorAction Stop).Count
+ [void]$sb.AppendLine('# HELP windows_ad_account_lockout_total Number of locked out accounts')
+ [void]$sb.AppendLine('# TYPE windows_ad_account_lockout_total gauge')
+ [void]$sb.AppendLine("windows_ad_account_lockout_total $lockedOut")
+ [void]$sb.AppendLine('')
+
+ # Disabled accounts
+ $disabled = @(Search-ADAccount -AccountDisabled -ErrorAction Stop).Count
+ [void]$sb.AppendLine('# HELP windows_ad_account_disabled_total Number of disabled accounts')
+ [void]$sb.AppendLine('# TYPE windows_ad_account_disabled_total gauge')
+ [void]$sb.AppendLine("windows_ad_account_disabled_total $disabled")
+ [void]$sb.AppendLine('')
+
+ # Expired accounts
+ $expired = @(Search-ADAccount -AccountExpired -ErrorAction Stop).Count
+ [void]$sb.AppendLine('# HELP windows_ad_account_expired_total Number of expired accounts')
+ [void]$sb.AppendLine('# TYPE windows_ad_account_expired_total gauge')
+ [void]$sb.AppendLine("windows_ad_account_expired_total $expired")
+ [void]$sb.AppendLine('')
+
+ # Password expired accounts
+ $pwdExpired = @(Search-ADAccount -PasswordExpired -ErrorAction Stop).Count
+ [void]$sb.AppendLine('# HELP windows_ad_account_password_expired_total Accounts with expired passwords')
+ [void]$sb.AppendLine('# TYPE windows_ad_account_password_expired_total gauge')
+ [void]$sb.AppendLine("windows_ad_account_password_expired_total $pwdExpired")
+ [void]$sb.AppendLine('')
+
+ # Inactive accounts (no logon in 90 days)
+ $inactive = @(Search-ADAccount -AccountInactive -TimeSpan (New-TimeSpan -Days 90) -UsersOnly -ErrorAction Stop).Count
+ [void]$sb.AppendLine('# HELP windows_ad_account_inactive_total Accounts inactive for more than 90 days')
+ [void]$sb.AppendLine('# TYPE windows_ad_account_inactive_total gauge')
+ [void]$sb.AppendLine("windows_ad_account_inactive_total $inactive")
+ [void]$sb.AppendLine('')
+ }
+ catch {
+ Write-Warning "Failed to collect account health metrics: $_"
+ }
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# COMPUTER HEALTH METRICS
+# ============================================================================
+
+function Get-ComputerHealthMetrics {
+ $sb = [System.Text.StringBuilder]::new()
+
+ try {
+ $staleComputers = @(Search-ADAccount -AccountInactive -TimeSpan (New-TimeSpan -Days 90) -ComputersOnly -ErrorAction Stop).Count
+ [void]$sb.AppendLine('# HELP windows_ad_computer_stale_total Computers not logged in for more than 90 days')
+ [void]$sb.AppendLine('# TYPE windows_ad_computer_stale_total gauge')
+ [void]$sb.AppendLine("windows_ad_computer_stale_total $staleComputers")
+ [void]$sb.AppendLine('')
+ }
+ catch {
+ Write-Warning "Failed to collect computer health metrics: $_"
+ }
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# GROUP HEALTH METRICS
+# ============================================================================
+
+function Get-GroupHealthMetrics {
+ $sb = [System.Text.StringBuilder]::new()
+
+ try {
+ $allGroups = Get-ADGroup -Filter { GroupCategory -eq 'Security' } -Properties Members -ErrorAction Stop
+ $emptyGroups = @($allGroups | Where-Object { $_.Members.Count -eq 0 }).Count
+ [void]$sb.AppendLine('# HELP windows_ad_group_empty_total Empty security groups')
+ [void]$sb.AppendLine('# TYPE windows_ad_group_empty_total gauge')
+ [void]$sb.AppendLine("windows_ad_group_empty_total $emptyGroups")
+ [void]$sb.AppendLine('')
+ }
+ catch {
+ Write-Warning "Failed to collect group health metrics: $_"
+ }
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# DCDIAG METRICS
+# ============================================================================
+
+function Get-DcdiagMetrics {
+ $sb = [System.Text.StringBuilder]::new()
+
+ try {
+ $tests = @('Connectivity', 'Replications', 'DNS', 'Services', 'Advertising', 'FrsEvent', 'KccEvent')
+
+ [void]$sb.AppendLine('# HELP windows_ad_dcdiag_test_result DCDiag test result (1=pass, 0=fail)')
+ [void]$sb.AppendLine('# TYPE windows_ad_dcdiag_test_result gauge')
+
+ foreach ($test in $tests) {
+ try {
+ $output = dcdiag /test:$test 2>&1 | Out-String
+ $passed = if ($output -match "passed test $test") { 1 } else { 0 }
+ [void]$sb.AppendLine("windows_ad_dcdiag_test_result{test=`"$test`"} $passed")
+ }
+ catch {
+ [void]$sb.AppendLine("windows_ad_dcdiag_test_result{test=`"$test`"} 0")
+ }
+ }
+ [void]$sb.AppendLine('')
+ }
+ catch {
+ Write-Warning "Failed to collect dcdiag metrics: $_"
+ [void]$sb.AppendLine('# HELP windows_ad_dcdiag_test_result DCDiag test result (1=pass, 0=fail)')
+ [void]$sb.AppendLine('# TYPE windows_ad_dcdiag_test_result gauge')
+ [void]$sb.AppendLine('')
+ }
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# DNS AND SHARE ACCESSIBILITY METRICS
+# ============================================================================
+
+function Get-DnsAndShareMetrics {
+ $sb = [System.Text.StringBuilder]::new()
+
+ try {
+ # DNS SRV record check
+ $domain = (Get-ADDomain -ErrorAction Stop).DNSRoot
+ $srvOk = 0
+ try {
+ $srvResult = Resolve-DnsName -Name "_ldap._tcp.dc._msdcs.$domain" -Type SRV -ErrorAction Stop
+ if ($srvResult) { $srvOk = 1 }
+ } catch {}
+
+ [void]$sb.AppendLine('# HELP windows_ad_dns_srv_record_status DNS SRV record health (1=OK, 0=missing)')
+ [void]$sb.AppendLine('# TYPE windows_ad_dns_srv_record_status gauge')
+ [void]$sb.AppendLine("windows_ad_dns_srv_record_status $srvOk")
+ [void]$sb.AppendLine('')
+ }
+ catch {
+ Write-Warning "Failed to check DNS SRV records: $_"
+ [void]$sb.AppendLine('# HELP windows_ad_dns_srv_record_status DNS SRV record health (1=OK, 0=missing)')
+ [void]$sb.AppendLine('# TYPE windows_ad_dns_srv_record_status gauge')
+ [void]$sb.AppendLine("windows_ad_dns_srv_record_status 0")
+ [void]$sb.AppendLine('')
+ }
+
+ # SYSVOL accessibility
+ try {
+ $dcName = $env:COMPUTERNAME
+ $sysvolOk = if (Test-Path "\\$dcName\SYSVOL") { 1 } else { 0 }
+ }
+ catch {
+ $sysvolOk = 0
+ }
+ [void]$sb.AppendLine('# HELP windows_ad_sysvol_accessible SYSVOL share accessibility (1=OK, 0=fail)')
+ [void]$sb.AppendLine('# TYPE windows_ad_sysvol_accessible gauge')
+ [void]$sb.AppendLine("windows_ad_sysvol_accessible $sysvolOk")
+ [void]$sb.AppendLine('')
+
+ # NETLOGON accessibility
+ try {
+ $netlogonOk = if (Test-Path "\\$dcName\NETLOGON") { 1 } else { 0 }
+ }
+ catch {
+ $netlogonOk = 0
+ }
+ [void]$sb.AppendLine('# HELP windows_ad_netlogon_accessible NETLOGON share accessibility (1=OK, 0=fail)')
+ [void]$sb.AppendLine('# TYPE windows_ad_netlogon_accessible gauge')
+ [void]$sb.AppendLine("windows_ad_netlogon_accessible $netlogonOk")
+ [void]$sb.AppendLine('')
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# DOMAIN CONTROLLER INFO
+# ============================================================================
+
+function Get-DomainControllerInfoMetrics {
+ $sb = [System.Text.StringBuilder]::new()
+
+ try {
+ $dc = Get-ADDomainController -ErrorAction Stop
+ $domainName = Sanitize-LabelValue $dc.Domain
+ $siteName = Sanitize-LabelValue $dc.Site
+ $isGC = if ($dc.IsGlobalCatalog) { "true" } else { "false" }
+
+ [void]$sb.AppendLine('# HELP windows_ad_domain_controller_info Domain controller metadata')
+ [void]$sb.AppendLine('# TYPE windows_ad_domain_controller_info gauge')
+ [void]$sb.AppendLine("windows_ad_domain_controller_info{domain=`"$domainName`",site=`"$siteName`",gc=`"$isGC`"} 1")
+ [void]$sb.AppendLine('')
+ }
+ catch {
+ Write-Warning "Failed to collect domain controller info: $_"
+ [void]$sb.AppendLine('# HELP windows_ad_domain_controller_info Domain controller metadata')
+ [void]$sb.AppendLine('# TYPE windows_ad_domain_controller_info gauge')
+ [void]$sb.AppendLine('')
+ }
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# COLLECT ALL METRICS
+# ============================================================================
+
+function Get-AllMetrics {
+ $scriptStart = Get-Date
+ $sb = [System.Text.StringBuilder]::new()
+
+ # Exporter up
+ [void]$sb.AppendLine('# HELP windows_ad_up Exporter status (1=up, 0=down)')
+ [void]$sb.AppendLine('# TYPE windows_ad_up gauge')
+ [void]$sb.AppendLine('windows_ad_up 1')
+ [void]$sb.AppendLine('')
+
+ # Exporter info
+ [void]$sb.AppendLine('# HELP windows_ad_exporter_info Exporter version information')
+ [void]$sb.AppendLine('# TYPE windows_ad_exporter_info gauge')
+ [void]$sb.AppendLine('windows_ad_exporter_info{version="1.0"} 1')
+ [void]$sb.AppendLine('')
+
+ # Collect all sections
+ [void]$sb.Append((Get-ReplicationMetrics))
+ [void]$sb.Append((Get-FsmoRoleMetrics))
+ [void]$sb.Append((Get-AccountHealthMetrics))
+ [void]$sb.Append((Get-ComputerHealthMetrics))
+ [void]$sb.Append((Get-GroupHealthMetrics))
+ [void]$sb.Append((Get-DcdiagMetrics))
+ [void]$sb.Append((Get-DnsAndShareMetrics))
+ [void]$sb.Append((Get-DomainControllerInfoMetrics))
+
+ # Exporter runtime
+ $scriptEnd = Get-Date
+ $duration = Format-MetricValue ($scriptEnd - $scriptStart).TotalSeconds
+ $timestamp = Get-UnixTimestamp
+
+ [void]$sb.AppendLine('# HELP windows_ad_exporter_duration_seconds Time to generate all metrics')
+ [void]$sb.AppendLine('# TYPE windows_ad_exporter_duration_seconds gauge')
+ [void]$sb.AppendLine("windows_ad_exporter_duration_seconds $duration")
+ [void]$sb.AppendLine('')
+ [void]$sb.AppendLine('# HELP windows_ad_exporter_last_run_timestamp Unix timestamp of last successful run')
+ [void]$sb.AppendLine('# TYPE windows_ad_exporter_last_run_timestamp gauge')
+ [void]$sb.AppendLine("windows_ad_exporter_last_run_timestamp $timestamp")
+ [void]$sb.AppendLine('')
+
+ $sb.ToString()
+}
+
+# ============================================================================
+# HTTP SERVER MODE
+# ============================================================================
+
+function Start-HttpServer {
+ param([int]$ListenPort)
+
+ $prefix = "http://+:$ListenPort/"
+ $listener = [System.Net.HttpListener]::new()
+ $listener.Prefixes.Add($prefix)
+
+ try {
+ $listener.Start()
+ Write-Host "Starting Active Directory health exporter on port $ListenPort..." -ForegroundColor Green
+ Write-Host "Metrics available at http://localhost:$ListenPort/metrics"
+
+ while ($listener.IsListening) {
+ $context = $listener.GetContext()
+ $request = $context.Request
+ $response = $context.Response
+
+ if ($request.Url.AbsolutePath -eq '/metrics') {
+ $metrics = Get-AllMetrics
+ $buffer = [System.Text.Encoding]::UTF8.GetBytes($metrics)
+ $response.ContentType = 'text/plain; version=0.0.4; charset=utf-8'
+ }
+ else {
+ $html = @"
+
+
+
AD Health Exporter v1.0
+
+Active Directory Health Exporter v1.0
+Metrics
+Sections
+
+- Replication health (failures, last success, pending objects)
+- FSMO role holders
+- Account health (locked, disabled, expired, inactive)
+- Computer health (stale computers)
+- Group health (empty security groups)
+- DCDiag test results
+- DNS SRV records and share accessibility
+- Domain controller metadata
+
+
+
+"@
+ $buffer = [System.Text.Encoding]::UTF8.GetBytes($html)
+ $response.ContentType = 'text/html; charset=utf-8'
+ }
+
+ $response.ContentLength64 = $buffer.Length
+ $response.OutputStream.Write($buffer, 0, $buffer.Length)
+ $response.OutputStream.Close()
+ }
+ }
+ catch {
+ Write-Error "HTTP server error: $_"
+ Write-Error "If access denied, run: netsh http add urlacl url=http://+:$ListenPort/ user=Everyone"
+ }
+ finally {
+ if ($listener.IsListening) {
+ $listener.Stop()
+ }
+ }
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+switch ($Mode) {
+ 'http' {
+ Start-HttpServer -ListenPort $Port
+ }
+ 'textfile' {
+ if (-not $OutputFile) {
+ $OutputFile = Join-Path $TextfileDir 'windows_ad_health.prom'
+ }
+
+ $outputDir = Split-Path $OutputFile -Parent
+ if (-not (Test-Path $outputDir)) {
+ New-Item -Path $outputDir -ItemType Directory -Force | Out-Null
+ }
+
+ $tempFile = Join-Path $outputDir ".windows_ad_health_metrics.$PID.tmp"
+
+ try {
+ $metrics = Get-AllMetrics
+ $metrics | Out-File -FilePath $tempFile -Encoding utf8 -NoNewline
+
+ $lineCount = ($metrics -split "`n").Count
+ if ($lineCount -lt 10) {
+ Remove-Item -Path $tempFile -Force -ErrorAction SilentlyContinue
+ Write-Error "Metrics file too small ($lineCount lines), keeping previous"
+ exit 1
+ }
+
+ Move-Item -Path $tempFile -Destination $OutputFile -Force
+ Write-Host "Metrics written to $OutputFile ($lineCount lines)" -ForegroundColor Green
+ }
+ catch {
+ Remove-Item -Path $tempFile -Force -ErrorAction SilentlyContinue
+ Write-Error "Failed to generate metrics: $_"
+ exit 1
+ }
+ }
+ default {
+ Get-AllMetrics | Write-Output
+ }
+}
diff --git a/add-apache-bot-block.sh b/add-apache-bot-block.sh
new file mode 100755
index 0000000..5686164
--- /dev/null
+++ b/add-apache-bot-block.sh
@@ -0,0 +1,374 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-apache-bot-block.sh
+# Version: 1.1
+# Description: Automate AI scraper, SEO bot, vulnerability scanner, and
+# scraping framework blocking on standard Apache servers.
+# Creates mod_rewrite rules server-wide or per-directory via
+# .htaccess.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - Apache installed (apache2 on Debian/Ubuntu or httpd on RHEL/CentOS)
+# - Root access
+# - mod_rewrite available
+#
+# Usage:
+# sudo ./add-apache-bot-block.sh
+# sudo ./add-apache-bot-block.sh --dry-run
+# sudo ./add-apache-bot-block.sh --remove
+# sudo ./add-apache-bot-block.sh --htaccess /var/www/html
+# sudo ./add-apache-bot-block.sh --htaccess /var/www/html --remove
+#
+# Changelog:
+# 1.1 — 2026-05-04: Removed OAI-SearchBot from blocklist. User-facing fetcher
+# bot, not a training crawler. Blocking it prevents your content from
+# being cited in AI search answers.
+#
+################################################################################
+
+set -euo pipefail
+
+# --- Configuration ---
+DRY_RUN=false
+REMOVE=false
+HTACCESS_PATH=""
+DISTRO="" # debian or rhel
+CONF_FILE="" # set after distro detection
+APACHE_SVC="" # apache2 or httpd
+
+# --- Colors ---
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+info() { echo -e "${GREEN}[OK]${NC} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+usage() {
+ cat <&2
+ exit 1
+fi
+
+# --- Distro detection ---
+detect_distro() {
+ if [[ -f /etc/debian_version ]]; then
+ DISTRO="debian"
+ CONF_FILE="/etc/apache2/conf-available/bot-block.conf"
+ APACHE_SVC="apache2"
+ elif [[ -f /etc/redhat-release ]]; then
+ DISTRO="rhel"
+ CONF_FILE="/etc/httpd/conf.d/bot-block.conf"
+ APACHE_SVC="httpd"
+ else
+ echo -e "${RED}Error: Unsupported distribution (neither Debian/Ubuntu nor RHEL/CentOS)${NC}" >&2
+ exit 1
+ fi
+}
+
+detect_distro
+
+# --- Apache check ---
+if ! command -v apachectl &>/dev/null; then
+ echo -e "${RED}Error: Apache (${APACHE_SVC}) not found${NC}" >&2
+ exit 1
+fi
+
+# --- Bot-block rules content ---
+MANAGED_START="# bot-block-managed-start"
+MANAGED_END="# bot-block-managed-end"
+
+generate_rules() {
+ cat <<'RULES'
+# bot-block-managed-start
+# Bot-blocking rules for Apache — generated by add-apache-bot-block.sh
+# https://mylinux.work
+
+RewriteEngine On
+
+# AI scrapers
+RewriteCond %{HTTP_USER_AGENT} ABEvalBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} GPTBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} ClaudeBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} anthropic-ai [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} CCBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Bytespider [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} TikTokSpider [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} cohere-ai [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} PerplexityBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Diffbot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} MistralBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} YandexGPTBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} meta-externalagent [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Meta-ExternalFetcher [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} meta-webindexer [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} PetalBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Amazonbot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Amzn-SearchBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} AI2Bot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Timpibot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} img2dataset [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} YouBot [NC,OR]
+
+# SEO scrapers
+RewriteCond %{HTTP_USER_AGENT} MJ12bot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} SemrushBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} AhrefsBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} DotBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} DataForSeoBot [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} SERanking [NC,OR]
+
+# Vulnerability scanners
+RewriteCond %{HTTP_USER_AGENT} Nikto [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} sqlmap [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Nmap [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} masscan [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} ZmEu [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Morpheus [NC,OR]
+
+# Scraping frameworks
+RewriteCond %{HTTP_USER_AGENT} Scrapy [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} python-requests [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Go-http-client [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} Java/ [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} libwww-perl [NC,OR]
+RewriteCond %{HTTP_USER_AGENT} trafilatura [NC]
+RewriteRule .* - [F]
+
+# Block broken srcset scrapers
+RewriteCond %{REQUEST_URI} %20[0-9]+w,https?:// [NC]
+RewriteRule .* - [F]
+# bot-block-managed-end
+RULES
+}
+
+# =====================================================
+# --remove mode
+# =====================================================
+if [[ "$REMOVE" == "true" ]]; then
+
+ # --- Remove from .htaccess ---
+ if [[ -n "$HTACCESS_PATH" ]]; then
+ HTACCESS_FILE="${HTACCESS_PATH%/}/.htaccess"
+ step "Removing bot-block rules from ${HTACCESS_FILE}"
+
+ if [[ ! -f "$HTACCESS_FILE" ]]; then
+ warn "File not found: ${HTACCESS_FILE} — nothing to remove"
+ exit 0
+ fi
+
+ if ! grep -q "$MANAGED_START" "$HTACCESS_FILE"; then
+ warn "No managed bot-block block found in ${HTACCESS_FILE}"
+ exit 0
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would strip managed block from ${HTACCESS_FILE}"
+ else
+ cp "$HTACCESS_FILE" "${HTACCESS_FILE}.bak.$(date +%s)"
+ warn "Backup created"
+ sed -i "/${MANAGED_START}/,/${MANAGED_END}/d" "$HTACCESS_FILE"
+ info "Managed block removed from ${HTACCESS_FILE}"
+ fi
+ exit 0
+ fi
+
+ # --- Remove server-wide conf ---
+ step "Removing bot-block configuration"
+
+ if [[ ! -f "$CONF_FILE" ]]; then
+ warn "Config not found: ${CONF_FILE} — nothing to remove"
+ exit 0
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ if [[ "$DISTRO" == "debian" ]]; then
+ echo " Would run: a2disconf bot-block"
+ fi
+ echo " Would remove: ${CONF_FILE}"
+ echo " Would reload: ${APACHE_SVC}"
+ else
+ if [[ "$DISTRO" == "debian" ]]; then
+ a2disconf bot-block 2>/dev/null || true
+ info "Conf disabled (a2disconf)"
+ fi
+ rm -f "$CONF_FILE"
+ info "Removed ${CONF_FILE}"
+
+ step "Reloading ${APACHE_SVC}"
+ systemctl reload "$APACHE_SVC"
+ info "${APACHE_SVC} reloaded"
+ fi
+
+ echo ""
+ echo -e "${BOLD}Bot-block configuration removed.${NC}"
+ exit 0
+fi
+
+# =====================================================
+# --htaccess mode (install)
+# =====================================================
+if [[ -n "$HTACCESS_PATH" ]]; then
+ HTACCESS_FILE="${HTACCESS_PATH%/}/.htaccess"
+ step "Writing bot-block rules to ${HTACCESS_FILE}"
+
+ if [[ ! -d "$HTACCESS_PATH" ]]; then
+ echo -e "${RED}Error: Directory not found: ${HTACCESS_PATH}${NC}" >&2
+ exit 1
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would write managed block to ${HTACCESS_FILE}"
+ echo ""
+ echo -e "${BOLD}Dry-run complete — no changes made.${NC}"
+ exit 0
+ fi
+
+ # Back up existing .htaccess if it exists
+ if [[ -f "$HTACCESS_FILE" ]]; then
+ # Remove old managed block if present
+ if grep -q "$MANAGED_START" "$HTACCESS_FILE"; then
+ cp "$HTACCESS_FILE" "${HTACCESS_FILE}.bak.$(date +%s)"
+ warn "Backup created"
+ sed -i "/${MANAGED_START}/,/${MANAGED_END}/d" "$HTACCESS_FILE"
+ warn "Old managed block removed"
+ else
+ cp "$HTACCESS_FILE" "${HTACCESS_FILE}.bak.$(date +%s)"
+ warn "Existing .htaccess backed up"
+ fi
+ fi
+
+ # Append rules
+ generate_rules >> "$HTACCESS_FILE"
+ info "Bot-block rules written to ${HTACCESS_FILE}"
+
+ echo ""
+ echo -e "${BOLD}Done.${NC}"
+ echo ""
+ echo " File: ${HTACCESS_FILE}"
+ echo " Verify: curl -A 'GPTBot' -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
+ echo " Expected: 403"
+ exit 0
+fi
+
+# =====================================================
+# Server-wide install (default)
+# =====================================================
+
+# --- Step 1: Enable mod_rewrite (Debian) ---
+if [[ "$DISTRO" == "debian" ]]; then
+ step "Enabling mod_rewrite"
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: a2enmod rewrite"
+ else
+ a2enmod rewrite 2>/dev/null || true
+ info "mod_rewrite enabled"
+ fi
+fi
+
+# --- Step 2: Write conf file ---
+step "Creating bot-block conf at ${CONF_FILE}"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would create: ${CONF_FILE}"
+else
+ if [[ -f "$CONF_FILE" ]]; then
+ cp "$CONF_FILE" "${CONF_FILE}.bak.$(date +%s)"
+ warn "Existing config backed up"
+ fi
+ generate_rules > "$CONF_FILE"
+ info "Config created: ${CONF_FILE}"
+fi
+
+# --- Step 3: Enable conf (Debian) ---
+if [[ "$DISTRO" == "debian" ]]; then
+ step "Enabling bot-block conf"
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: a2enconf bot-block"
+ else
+ a2enconf bot-block 2>/dev/null || true
+ info "Conf enabled (a2enconf)"
+ fi
+fi
+
+# --- Step 4: Validate config ---
+step "Testing Apache configuration"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: apachectl configtest"
+else
+ if apachectl configtest 2>&1; then
+ info "Apache config valid"
+ else
+ echo -e "${RED}[ERROR] Apache config test failed${NC}" >&2
+ echo " Restore backup from ${CONF_FILE}.bak.* and reload" >&2
+ exit 1
+ fi
+fi
+
+# --- Step 5: Reload Apache ---
+step "Reloading ${APACHE_SVC}"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: systemctl reload ${APACHE_SVC}"
+else
+ systemctl reload "$APACHE_SVC"
+ info "${APACHE_SVC} reloaded"
+fi
+
+# =====================================================
+# Summary
+# =====================================================
+echo ""
+echo -e "${BOLD}Done.${NC}"
+echo ""
+echo " Config: ${CONF_FILE}"
+echo " Distro: ${DISTRO}"
+echo ""
+echo " Verify: curl -A 'GPTBot' -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
+echo " Expected: 403"
+echo ""
+echo " Remove: sudo $(basename "$0") --remove"
diff --git a/add-fail2ban-ai-bots.sh b/add-fail2ban-ai-bots.sh
new file mode 100755
index 0000000..2ed0e67
--- /dev/null
+++ b/add-fail2ban-ai-bots.sh
@@ -0,0 +1,395 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-fail2ban-ai-bots.sh
+# Version: 1.1
+# Description: Adds a Fail2ban jail to block AI scrapers and unwanted bots
+# that ignore robots.txt. Installs filter + jail config and
+# reloads Fail2ban.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Usage:
+# sudo ./add-fail2ban-ai-bots.sh
+# sudo ./add-fail2ban-ai-bots.sh --logpath /var/log/nginx/access.log
+# sudo ./add-fail2ban-ai-bots.sh --bantime 604800
+# sudo ./add-fail2ban-ai-bots.sh --dry-run
+#
+# Changelog:
+# 1.1 — 2026-05-04: Removed Claude-Web, Perplexity-User, ChatGPT-User, and
+# OAI-SearchBot from blocklist. These are user-facing fetcher bots that
+# retrieve content when someone pastes a URL into an AI chat or search.
+# Blocking them prevents your content from being cited in AI answers.
+# Training crawlers (ClaudeBot, PerplexityBot, GPTBot) remain blocked.
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# DEFAULTS
+# ============================================================================
+
+readonly VERSION="1.1"
+readonly SCRIPT_NAME="${0##*/}"
+
+LOGPATH="auto"
+BANTIME="86400"
+MAXRETRY="1"
+DRY_RUN=false
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+show_usage() {
+ cat </dev/null; then
+ log_error "Fail2ban is not installed"
+ log_error "Install it first: https://mylinux.work/guides/fail2ban-setup/"
+ exit 1
+ fi
+
+ if ! systemctl is-active --quiet fail2ban; then
+ log_error "Fail2ban is not running"
+ exit 1
+ fi
+
+ log_info "Fail2ban is installed and running"
+}
+
+detect_logpath() {
+ if [[ "$LOGPATH" != "auto" ]]; then
+ # Support glob patterns (e.g. /var/log/apache2/domains/*.log)
+ # shellcheck disable=SC2086,SC2206
+ local matches=( $LOGPATH )
+ if [[ ${#matches[@]} -eq 0 || ! -f "${matches[0]}" ]]; then
+ log_error "Log file not found: $LOGPATH"
+ exit 1
+ fi
+ log_info "Using specified log path: $LOGPATH (${#matches[@]} file(s))"
+ return
+ fi
+
+ log_step "Auto-detecting web server access log..."
+
+ # HestiaCP / VestaCP — apache domains (check first: has full access logs with user agents)
+ local hestia_apache=( /var/log/apache2/domains/*.log )
+ if [[ -f "${hestia_apache[0]:-}" ]]; then
+ LOGPATH="/var/log/apache2/domains/*.log"
+ log_info "Detected HestiaCP/VestaCP apache: $LOGPATH (${#hestia_apache[@]} file(s))"
+ return
+ fi
+
+ # HestiaCP / VestaCP — nginx domains (proxy logs only in nginx+apache mode)
+ local hestia_nginx=( /var/log/nginx/domains/*.log )
+ if [[ -f "${hestia_nginx[0]:-}" ]]; then
+ LOGPATH="/var/log/nginx/domains/*.log"
+ log_info "Detected HestiaCP/VestaCP nginx: $LOGPATH (${#hestia_nginx[@]} file(s))"
+ return
+ fi
+
+ # Nginx (standard)
+ if [[ -f /var/log/nginx/access.log ]]; then
+ LOGPATH="/var/log/nginx/access.log"
+ log_info "Detected nginx: $LOGPATH"
+ return
+ fi
+
+ # Apache (Debian/Ubuntu)
+ if [[ -f /var/log/apache2/access.log ]]; then
+ LOGPATH="/var/log/apache2/access.log"
+ log_info "Detected apache2: $LOGPATH"
+ return
+ fi
+
+ # Apache (RHEL/Rocky)
+ if [[ -f /var/log/httpd/access_log ]]; then
+ LOGPATH="/var/log/httpd/access_log"
+ log_info "Detected httpd: $LOGPATH"
+ return
+ fi
+
+ log_error "Could not auto-detect access log. Use --logpath to specify."
+ exit 1
+}
+
+# ============================================================================
+# INSTALL FILTER
+# ============================================================================
+
+install_filter() {
+ local filter_file="/etc/fail2ban/filter.d/ai-bots.conf"
+
+ log_step "Installing filter: $filter_file"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $filter_file"
+ echo ""
+ generate_filter
+ echo ""
+ return
+ fi
+
+ if [[ -f "$filter_file" ]]; then
+ log_warn "Filter already exists — backing up to ${filter_file}.bak"
+ cp "$filter_file" "${filter_file}.bak"
+ fi
+
+ generate_filter > "$filter_file"
+ log_info "Filter installed: $filter_file"
+}
+
+generate_filter() {
+ cat <<'EOF'
+# Fail2ban filter to block AI scrapers and unwanted bots
+# https://mylinux.work
+#
+# Matches common AI crawler user agents in web server access logs.
+# These bots scrape content for AI model training and typically
+# ignore robots.txt directives.
+
+[Definition]
+
+# Match AI and unwanted bot user agents in access logs
+# Supports both combined and common log formats
+failregex = ^ \S+ \S+ \[.*\] "\S+ \S+ \S+" \d+ \d+ "\S+" ".*(?:ABEvalBot|GPTBot|CCBot|ClaudeBot|anthropic-ai|Bytespider|TikTokSpider|cohere-ai|meta-externalagent|Meta-ExternalFetcher|PetalBot|Amazonbot|AI2Bot|Ai2Bot-Dolma|YouBot|PerplexityBot|Diffbot|Applebot-Extended|Google-Extended|MistralBot|YandexGPTBot|MJ12bot|Scrapy|DataForSeoBot|Timpibot|img2dataset|HanaleiBot|SemrushBot|AhrefsBot|DotBot|SERanking|trafilatura).*"
+
+ignoreregex =
+
+# Author: Phil Connor — https://mylinux.work
+EOF
+}
+
+# ============================================================================
+# INSTALL JAIL
+# ============================================================================
+
+install_jail() {
+ local jail_file="/etc/fail2ban/jail.d/ai-bots.conf"
+
+ log_step "Installing jail: $jail_file"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $jail_file"
+ echo ""
+ generate_jail
+ echo ""
+ return
+ fi
+
+ if [[ -f "$jail_file" ]]; then
+ log_warn "Jail config already exists — backing up to ${jail_file}.bak"
+ cp "$jail_file" "${jail_file}.bak"
+ fi
+
+ generate_jail > "$jail_file"
+ log_info "Jail config installed: $jail_file"
+}
+
+generate_jail() {
+ cat </dev/null; then
+ log_warn "Config test not available — reloading directly"
+ fi
+
+ fail2ban-client reload
+ sleep 2
+
+ if systemctl is-active --quiet fail2ban; then
+ log_info "Fail2ban reloaded successfully"
+ else
+ log_error "Fail2ban failed to restart — check: journalctl -u fail2ban"
+ exit 1
+ fi
+}
+
+verify_jail() {
+ log_step "Verifying ai-bots jail..."
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would verify jail status"
+ return
+ fi
+
+ echo ""
+ if fail2ban-client status ai-bots 2>/dev/null; then
+ echo ""
+ log_info "AI bots jail is active and monitoring $LOGPATH"
+ else
+ log_error "Jail 'ai-bots' is not running — check: fail2ban-client status"
+ log_error "Debug with: fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/ai-bots.conf"
+ exit 1
+ fi
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ echo ""
+ echo "============================================"
+ echo " Fail2ban AI Bot Blocker v${VERSION}"
+ echo " https://mylinux.work"
+ echo "============================================"
+ echo ""
+
+ check_root
+ check_fail2ban
+ detect_logpath
+ install_filter
+ install_jail
+ reload_fail2ban
+ verify_jail
+
+ echo ""
+ echo "============================================"
+ echo " Setup Complete"
+ echo "============================================"
+ echo ""
+ echo " Jail: ai-bots"
+ echo " Log: $LOGPATH"
+ echo " Ban time: ${BANTIME}s ($(( BANTIME / 3600 ))h)"
+ echo " Max retry: $MAXRETRY"
+ echo ""
+ echo " Useful commands:"
+ echo " fail2ban-client status ai-bots"
+ echo " fail2ban-client set ai-bots unbanip "
+ echo " fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/ai-bots.conf"
+ echo ""
+}
+
+main "$@"
diff --git a/add-fail2ban-head-crawler.sh b/add-fail2ban-head-crawler.sh
new file mode 100755
index 0000000..5a87b39
--- /dev/null
+++ b/add-fail2ban-head-crawler.sh
@@ -0,0 +1,456 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-fail2ban-head-crawler.sh
+# Version: 1.0
+# Description: Adds a Fail2ban jail to block HEAD-only crawlers — bots that
+# systematically send HEAD requests with no referer to probe or
+# index your site while spoofing real browser user agents.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Usage:
+# sudo ./add-fail2ban-head-crawler.sh
+# sudo ./add-fail2ban-head-crawler.sh --logpath /var/log/nginx/access.log
+# sudo ./add-fail2ban-head-crawler.sh --maxretry 10
+# sudo ./add-fail2ban-head-crawler.sh --dry-run
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# DEFAULTS
+# ============================================================================
+
+readonly VERSION="1.0"
+readonly SCRIPT_NAME="${0##*/}"
+
+LOGPATH="auto"
+BANTIME="86400"
+MAXRETRY="5"
+FINDTIME="300"
+DRY_RUN=false
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+show_usage() {
+ cat </dev/null; then
+ log_error "Fail2ban is not installed"
+ log_error "Install it first: https://mylinux.work/guides/fail2ban-setup/"
+ exit 1
+ fi
+
+ if ! systemctl is-active --quiet fail2ban; then
+ log_error "Fail2ban is not running"
+ exit 1
+ fi
+
+ log_info "Fail2ban is installed and running"
+}
+
+detect_logpath() {
+ if [[ "$LOGPATH" != "auto" ]]; then
+ # shellcheck disable=SC2086
+ local matches=( $LOGPATH )
+ if [[ ${#matches[@]} -eq 0 || ! -f "${matches[0]}" ]]; then
+ log_error "Log file not found: $LOGPATH"
+ exit 1
+ fi
+ log_info "Using specified log path: $LOGPATH"
+ return
+ fi
+
+ log_step "Auto-detecting web server access log..."
+
+ # HestiaCP — apache domains
+ local hestia_apache=( /var/log/apache2/domains/*.log )
+ if [[ -f "${hestia_apache[0]:-}" ]]; then
+ LOGPATH="/var/log/apache2/domains/*.log"
+ log_info "Detected HestiaCP apache: $LOGPATH"
+ return
+ fi
+
+ # HestiaCP — nginx domains
+ local hestia_nginx=( /var/log/nginx/domains/*.log )
+ if [[ -f "${hestia_nginx[0]:-}" ]]; then
+ LOGPATH="/var/log/nginx/domains/*.log"
+ log_info "Detected HestiaCP nginx: $LOGPATH"
+ return
+ fi
+
+ # Nginx (standard)
+ if [[ -f /var/log/nginx/access.log ]]; then
+ LOGPATH="/var/log/nginx/access.log"
+ log_info "Detected nginx: $LOGPATH"
+ return
+ fi
+
+ # Apache (Debian/Ubuntu)
+ if [[ -f /var/log/apache2/access.log ]]; then
+ LOGPATH="/var/log/apache2/access.log"
+ log_info "Detected apache2: $LOGPATH"
+ return
+ fi
+
+ # Apache (RHEL/Rocky)
+ if [[ -f /var/log/httpd/access_log ]]; then
+ LOGPATH="/var/log/httpd/access_log"
+ log_info "Detected httpd: $LOGPATH"
+ return
+ fi
+
+ log_error "Could not auto-detect access log. Use --logpath to specify."
+ exit 1
+}
+
+# ============================================================================
+# REMOVE
+# ============================================================================
+
+do_remove() {
+ local filter_file="/etc/fail2ban/filter.d/head-crawler.conf"
+ local jail_file="/etc/fail2ban/jail.d/head-crawler.conf"
+
+ log_step "Removing HEAD crawler jail..."
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would remove $filter_file"
+ log_info "[DRY RUN] Would remove $jail_file"
+ log_info "[DRY RUN] Would reload fail2ban"
+ return
+ fi
+
+ if [[ -f "$jail_file" ]]; then
+ rm -f "$jail_file"
+ log_info "Removed: $jail_file"
+ else
+ log_warn "Jail config not found: $jail_file"
+ fi
+
+ if [[ -f "$filter_file" ]]; then
+ rm -f "$filter_file"
+ log_info "Removed: $filter_file"
+ else
+ log_warn "Filter not found: $filter_file"
+ fi
+
+ fail2ban-client reload
+ sleep 2
+ log_info "Fail2ban reloaded — head-crawler jail removed"
+ exit 0
+}
+
+# ============================================================================
+# INSTALL FILTER
+# ============================================================================
+
+install_filter() {
+ local filter_file="/etc/fail2ban/filter.d/head-crawler.conf"
+
+ log_step "Installing filter: $filter_file"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $filter_file"
+ echo ""
+ generate_filter
+ echo ""
+ return
+ fi
+
+ if [[ -f "$filter_file" ]]; then
+ log_warn "Filter already exists — backing up to ${filter_file}.bak"
+ cp "$filter_file" "${filter_file}.bak"
+ fi
+
+ generate_filter > "$filter_file"
+ log_info "Filter installed: $filter_file"
+}
+
+generate_filter() {
+ cat <<'EOF'
+# Fail2ban filter to block HEAD-only crawlers
+# https://mylinux.work
+#
+# Catches bots that send HEAD requests with no referer. These are typically
+# scrapers, SEO tools, or reconnaissance bots that spoof real browser user
+# agents and rotate through cloud IPs to avoid detection.
+#
+# The filter matches:
+# - HTTP HEAD method
+# - No referer (logged as "-")
+# - Any user agent (spoofed or otherwise)
+#
+# Combined with a low maxretry (default: 5 in 5 min), this catches
+# systematic crawlers while ignoring occasional legitimate HEAD requests
+# (browser prefetch, monitoring probes).
+
+[Definition]
+
+# HEAD request with no referer — combined log format
+# Format: IP - - [date] "HEAD /path HTTP/x.x" status size "-" "user agent"
+failregex = ^ \S+ \S+ \[.*\] "HEAD \S+ \S+" \d+ \d+ "-" ".*"
+
+ignoreregex =
+
+# Author: Phil Connor — https://mylinux.work
+EOF
+}
+
+# ============================================================================
+# INSTALL JAIL
+# ============================================================================
+
+install_jail() {
+ local jail_file="/etc/fail2ban/jail.d/head-crawler.conf"
+
+ log_step "Installing jail: $jail_file"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $jail_file"
+ echo ""
+ generate_jail
+ echo ""
+ return
+ fi
+
+ if [[ -f "$jail_file" ]]; then
+ log_warn "Jail config already exists — backing up to ${jail_file}.bak"
+ cp "$jail_file" "${jail_file}.bak"
+ fi
+
+ generate_jail > "$jail_file"
+ log_info "Jail config installed: $jail_file"
+}
+
+generate_jail() {
+ cat </dev/null; then
+ log_warn "Config test not available — reloading directly"
+ fi
+
+ fail2ban-client reload
+ sleep 2
+
+ if systemctl is-active --quiet fail2ban; then
+ log_info "Fail2ban reloaded successfully"
+ else
+ log_error "Fail2ban failed to restart — check: journalctl -u fail2ban"
+ exit 1
+ fi
+}
+
+verify_jail() {
+ log_step "Verifying head-crawler jail..."
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would verify jail status"
+ return
+ fi
+
+ echo ""
+ if fail2ban-client status head-crawler 2>/dev/null; then
+ echo ""
+ log_info "HEAD crawler jail is active and monitoring $LOGPATH"
+ else
+ log_error "Jail 'head-crawler' is not running — check: fail2ban-client status"
+ log_error "Debug with: fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/head-crawler.conf"
+ exit 1
+ fi
+}
+
+test_against_logs() {
+ if $DRY_RUN; then
+ # shellcheck disable=SC2086
+ local matches=( $LOGPATH )
+ if [[ -f "${matches[0]}" ]]; then
+ log_step "Testing filter against existing logs..."
+ echo ""
+ fail2ban-regex "${matches[0]}" /dev/stdin <<'FILTER' 2>&1 | tail -5
+[Definition]
+failregex = ^ \S+ \S+ \[.*\] "HEAD \S+ \S+" \d+ \d+ "-" ".*"
+ignoreregex =
+FILTER
+ echo ""
+ fi
+ fi
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ echo ""
+ echo "============================================"
+ echo " Fail2ban HEAD Crawler Blocker v${VERSION}"
+ echo " https://mylinux.work"
+ echo "============================================"
+ echo ""
+
+ check_root
+ check_fail2ban
+
+ if $REMOVE; then
+ do_remove
+ fi
+
+ detect_logpath
+ test_against_logs
+ install_filter
+ install_jail
+ reload_fail2ban
+ verify_jail
+
+ echo ""
+ echo "============================================"
+ echo " Setup Complete"
+ echo "============================================"
+ echo ""
+ echo " Jail: head-crawler"
+ echo " Log: $LOGPATH"
+ echo " Ban time: ${BANTIME}s ($(( BANTIME / 3600 ))h)"
+ echo " Max retry: $MAXRETRY (HEAD requests before ban)"
+ echo " Find time: ${FINDTIME}s ($(( FINDTIME / 60 ))m window)"
+ echo ""
+ echo " Useful commands:"
+ echo " fail2ban-client status head-crawler"
+ echo " fail2ban-client set head-crawler unbanip "
+ echo " fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/head-crawler.conf"
+ echo ""
+}
+
+main "$@"
diff --git a/add-fail2ban-image-scraper.sh b/add-fail2ban-image-scraper.sh
new file mode 100755
index 0000000..d3c5c98
--- /dev/null
+++ b/add-fail2ban-image-scraper.sh
@@ -0,0 +1,474 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-fail2ban-image-scraper.sh
+# Version: 1.0
+# Description: Adds a Fail2ban jail to block image scrapers — bots that
+# directly request image files with no referer. Real browsers
+# always send a referer when loading images (the page containing
+# the
tag). Direct image requests with no referer are
+# almost always scrapers harvesting images for AI training
+# datasets or content theft.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Usage:
+# sudo ./add-fail2ban-image-scraper.sh
+# sudo ./add-fail2ban-image-scraper.sh --logpath /var/log/nginx/access.log
+# sudo ./add-fail2ban-image-scraper.sh --maxretry 3
+# sudo ./add-fail2ban-image-scraper.sh --dry-run
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# DEFAULTS
+# ============================================================================
+
+readonly VERSION="1.1"
+readonly SCRIPT_NAME="${0##*/}"
+
+LOGPATH="auto"
+BANTIME="86400"
+MAXRETRY="5"
+FINDTIME="300"
+IGNOREIP=""
+DRY_RUN=false
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+show_usage() {
+ cat </dev/null; then
+ log_error "Fail2ban is not installed"
+ log_error "Install it first: https://mylinux.work/guides/fail2ban-setup/"
+ exit 1
+ fi
+
+ if ! systemctl is-active --quiet fail2ban; then
+ log_error "Fail2ban is not running"
+ exit 1
+ fi
+
+ log_info "Fail2ban is installed and running"
+}
+
+detect_logpath() {
+ if [[ "$LOGPATH" != "auto" ]]; then
+ # shellcheck disable=SC2086
+ local matches=( $LOGPATH )
+ if [[ ${#matches[@]} -eq 0 || ! -f "${matches[0]}" ]]; then
+ log_error "Log file not found: $LOGPATH"
+ exit 1
+ fi
+ log_info "Using specified log path: $LOGPATH"
+ return
+ fi
+
+ log_step "Auto-detecting web server access log..."
+
+ # HestiaCP — apache domains
+ local hestia_apache=( /var/log/apache2/domains/*.log )
+ if [[ -f "${hestia_apache[0]:-}" ]]; then
+ LOGPATH="/var/log/apache2/domains/*.log"
+ log_info "Detected HestiaCP apache: $LOGPATH"
+ return
+ fi
+
+ # HestiaCP — nginx domains
+ local hestia_nginx=( /var/log/nginx/domains/*.log )
+ if [[ -f "${hestia_nginx[0]:-}" ]]; then
+ LOGPATH="/var/log/nginx/domains/*.log"
+ log_info "Detected HestiaCP nginx: $LOGPATH"
+ return
+ fi
+
+ # Nginx (standard)
+ if [[ -f /var/log/nginx/access.log ]]; then
+ LOGPATH="/var/log/nginx/access.log"
+ log_info "Detected nginx: $LOGPATH"
+ return
+ fi
+
+ # Apache (Debian/Ubuntu)
+ if [[ -f /var/log/apache2/access.log ]]; then
+ LOGPATH="/var/log/apache2/access.log"
+ log_info "Detected apache2: $LOGPATH"
+ return
+ fi
+
+ # Apache (RHEL/Rocky)
+ if [[ -f /var/log/httpd/access_log ]]; then
+ LOGPATH="/var/log/httpd/access_log"
+ log_info "Detected httpd: $LOGPATH"
+ return
+ fi
+
+ log_error "Could not auto-detect access log. Use --logpath to specify."
+ exit 1
+}
+
+# ============================================================================
+# REMOVE
+# ============================================================================
+
+do_remove() {
+ local filter_file="/etc/fail2ban/filter.d/image-scraper.conf"
+ local jail_file="/etc/fail2ban/jail.d/image-scraper.conf"
+
+ log_step "Removing image scraper jail..."
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would remove $filter_file"
+ log_info "[DRY RUN] Would remove $jail_file"
+ log_info "[DRY RUN] Would reload fail2ban"
+ return
+ fi
+
+ if [[ -f "$jail_file" ]]; then
+ rm -f "$jail_file"
+ log_info "Removed: $jail_file"
+ else
+ log_warn "Jail config not found: $jail_file"
+ fi
+
+ if [[ -f "$filter_file" ]]; then
+ rm -f "$filter_file"
+ log_info "Removed: $filter_file"
+ else
+ log_warn "Filter not found: $filter_file"
+ fi
+
+ fail2ban-client reload
+ sleep 2
+ log_info "Fail2ban reloaded — image-scraper jail removed"
+ exit 0
+}
+
+# ============================================================================
+# INSTALL FILTER
+# ============================================================================
+
+install_filter() {
+ local filter_file="/etc/fail2ban/filter.d/image-scraper.conf"
+
+ log_step "Installing filter: $filter_file"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $filter_file"
+ echo ""
+ generate_filter
+ echo ""
+ return
+ fi
+
+ if [[ -f "$filter_file" ]]; then
+ log_warn "Filter already exists — backing up to ${filter_file}.bak"
+ cp "$filter_file" "${filter_file}.bak"
+ fi
+
+ generate_filter > "$filter_file"
+ log_info "Filter installed: $filter_file"
+}
+
+generate_filter() {
+ cat <<'EOF'
+# Fail2ban filter to block image scrapers
+# https://mylinux.work
+#
+# Catches bots that directly request image files with no referer.
+# When a real browser loads an image from a web page, it sends the page
+# URL as the referer header. Direct image requests with no referer
+# indicate scraping — typically for AI training datasets or content theft.
+#
+# Matches: GET requests for .png, .jpg, .jpeg, .gif, .webp, .svg, .avif
+# with referer logged as "-" (absent/empty).
+#
+# Does NOT match .ico (favicons are legitimately requested without referer).
+
+[Definition]
+
+# Direct image request with no referer — combined log format
+# Format: IP - - [date] "GET /path/image.png HTTP/x.x" status size "-" "user agent"
+failregex = ^ \S+ \S+ \[.*\] "GET \S+\.(?:png|jpe?g|gif|webp|svg|avif) \S+" \d+ \d+ "-" ".*"
+
+ignoreregex =
+
+# Author: Phil Connor — https://mylinux.work
+EOF
+}
+
+# ============================================================================
+# INSTALL JAIL
+# ============================================================================
+
+install_jail() {
+ local jail_file="/etc/fail2ban/jail.d/image-scraper.conf"
+
+ log_step "Installing jail: $jail_file"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $jail_file"
+ echo ""
+ generate_jail
+ echo ""
+ return
+ fi
+
+ if [[ -f "$jail_file" ]]; then
+ log_warn "Jail config already exists — backing up to ${jail_file}.bak"
+ cp "$jail_file" "${jail_file}.bak"
+ fi
+
+ generate_jail > "$jail_file"
+ log_info "Jail config installed: $jail_file"
+}
+
+generate_jail() {
+ cat </dev/null; then
+ log_warn "Config test not available — reloading directly"
+ fi
+
+ fail2ban-client reload
+ sleep 2
+
+ if systemctl is-active --quiet fail2ban; then
+ log_info "Fail2ban reloaded successfully"
+ else
+ log_error "Fail2ban failed to restart — check: journalctl -u fail2ban"
+ exit 1
+ fi
+}
+
+verify_jail() {
+ log_step "Verifying image-scraper jail..."
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would verify jail status"
+ return
+ fi
+
+ echo ""
+ if fail2ban-client status image-scraper 2>/dev/null; then
+ echo ""
+ log_info "Image scraper jail is active and monitoring $LOGPATH"
+ else
+ log_error "Jail 'image-scraper' is not running — check: fail2ban-client status"
+ log_error "Debug with: fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/image-scraper.conf"
+ exit 1
+ fi
+}
+
+test_against_logs() {
+ if $DRY_RUN; then
+ # shellcheck disable=SC2086
+ local matches=( $LOGPATH )
+ if [[ -f "${matches[0]}" ]]; then
+ log_step "Testing filter against existing logs..."
+ echo ""
+ fail2ban-regex "${matches[0]}" /dev/stdin <<'FILTER' 2>&1 | tail -5
+[Definition]
+failregex = ^ \S+ \S+ \[.*\] "GET \S+\.(?:png|jpe?g|gif|webp|svg|avif) \S+" \d+ \d+ "-" ".*"
+ignoreregex =
+FILTER
+ echo ""
+ fi
+ fi
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ echo ""
+ echo "============================================"
+ echo " Fail2ban Image Scraper Blocker v${VERSION}"
+ echo " https://mylinux.work"
+ echo "============================================"
+ echo ""
+
+ check_root
+ check_fail2ban
+
+ if $REMOVE; then
+ do_remove
+ fi
+
+ detect_logpath
+ test_against_logs
+ install_filter
+ install_jail
+ reload_fail2ban
+ verify_jail
+
+ echo ""
+ echo "============================================"
+ echo " Setup Complete"
+ echo "============================================"
+ echo ""
+ echo " Jail: image-scraper"
+ echo " Log: $LOGPATH"
+ echo " Ban time: ${BANTIME}s ($(( BANTIME / 3600 ))h)"
+ echo " Max retry: $MAXRETRY (direct image requests before ban)"
+ echo " Find time: ${FINDTIME}s ($(( FINDTIME / 60 ))m window)"
+ if [[ -n "$IGNOREIP" ]]; then
+ echo " Ignore: $IGNOREIP"
+ fi
+ echo ""
+ echo " Useful commands:"
+ echo " fail2ban-client status image-scraper"
+ echo " fail2ban-client set image-scraper unbanip "
+ echo " fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/image-scraper.conf"
+ echo ""
+}
+
+main "$@"
diff --git a/add-fail2ban-nginx-hardening.sh b/add-fail2ban-nginx-hardening.sh
new file mode 100755
index 0000000..4ba8d54
--- /dev/null
+++ b/add-fail2ban-nginx-hardening.sh
@@ -0,0 +1,558 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-fail2ban-nginx-hardening.sh
+# Version: 1.0
+# Description: Adds custom Fail2ban jails to block vulnerability scanners,
+# script probes, and path enumeration attacks on nginx servers.
+# Installs filters + jails and reloads Fail2ban.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Usage:
+# sudo ./add-fail2ban-nginx-hardening.sh
+# sudo ./add-fail2ban-nginx-hardening.sh --logpath /var/log/nginx/access.log
+# sudo ./add-fail2ban-nginx-hardening.sh --bantime 604800
+# sudo ./add-fail2ban-nginx-hardening.sh --dry-run
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# DEFAULTS
+# ============================================================================
+
+readonly VERSION="1.0"
+readonly SCRIPT_NAME="${0##*/}"
+
+ACCESS_LOGPATH="auto"
+ERROR_LOGPATH="auto"
+BANTIME="86400"
+DRY_RUN=false
+SKIP_JAILS=""
+ALLOW_EXTENSIONS=""
+ALLOW_PATHS=""
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+show_usage() {
+ cat </dev/null; then
+ log_error "Fail2ban is not installed"
+ log_error "Install it first: https://mylinux.work/guides/fail2ban-setup/"
+ exit 1
+ fi
+
+ if ! systemctl is-active --quiet fail2ban; then
+ log_error "Fail2ban is not running"
+ exit 1
+ fi
+
+ log_info "Fail2ban is installed and running"
+}
+
+detect_logpaths() {
+ # Access log
+ if [[ "$ACCESS_LOGPATH" == "auto" ]]; then
+ if [[ -f /var/log/nginx/access.log ]]; then
+ ACCESS_LOGPATH="/var/log/nginx/access.log"
+ else
+ log_error "Could not find nginx access log. Use --logpath to specify."
+ exit 1
+ fi
+ elif [[ ! -f "$ACCESS_LOGPATH" ]]; then
+ log_error "Access log not found: $ACCESS_LOGPATH"
+ exit 1
+ fi
+ log_info "Access log: $ACCESS_LOGPATH"
+
+ # Error log
+ if [[ "$ERROR_LOGPATH" == "auto" ]]; then
+ if [[ -f /var/log/nginx/error.log ]]; then
+ ERROR_LOGPATH="/var/log/nginx/error.log"
+ else
+ ERROR_LOGPATH="$ACCESS_LOGPATH"
+ log_warn "Error log not found — using access log for all jails"
+ fi
+ fi
+ log_info "Error log: $ERROR_LOGPATH"
+}
+
+should_skip() {
+ local jail="$1"
+ [[ ",$SKIP_JAILS," == *",$jail,"* ]]
+}
+
+# Build extension regex excluding allowed extensions
+build_extension_regex() {
+ local all_exts="php|asp|aspx|jsp|cgi|exe|pl"
+ if [[ -n "$ALLOW_EXTENSIONS" ]]; then
+ local result=""
+ IFS='|' read -ra EXT_ARRAY <<< "${all_exts}"
+ for ext in "${EXT_ARRAY[@]}"; do
+ if [[ ",$ALLOW_EXTENSIONS," != *",$ext,"* ]]; then
+ [[ -n "$result" ]] && result="${result}|"
+ result="${result}${ext}"
+ fi
+ done
+ if [[ -z "$result" ]]; then
+ log_warn "All extensions whitelisted — skipping nginx-noscript"
+ return 1
+ fi
+ echo "$result"
+ else
+ echo "$all_exts"
+ fi
+ return 0
+}
+
+# Build path ignore regex from allowed paths
+build_path_ignoreregex() {
+ if [[ -z "$ALLOW_PATHS" ]]; then
+ echo ""
+ return
+ fi
+ local ignore_parts=""
+ IFS=',' read -ra PATH_ARRAY <<< "$ALLOW_PATHS"
+ for p in "${PATH_ARRAY[@]}"; do
+ p=$(echo "$p" | xargs | sed 's|^/||')
+ [[ -n "$ignore_parts" ]] && ignore_parts="${ignore_parts}|"
+ ignore_parts="${ignore_parts}/${p}"
+ done
+ echo "ignoreregex = ^ \\S+ \\S+ \\[.*\\] \"\\S+ [^\"]*($ignore_parts)[^\"]*HTTP"
+}
+
+# ============================================================================
+# HELPER — write file with backup
+# ============================================================================
+
+write_config() {
+ local file="$1"
+ local label="$2"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $file"
+ echo ""
+ cat
+ echo ""
+ return
+ fi
+
+ if [[ -f "$file" ]]; then
+ log_warn "$label already exists — backing up to ${file}.bak"
+ cp "$file" "${file}.bak"
+ fi
+
+ cat > "$file"
+ log_info "$label installed: $file"
+}
+
+# ============================================================================
+# JAIL 1: nginx-noscript
+# ============================================================================
+
+install_noscript() {
+ if should_skip "noscript"; then
+ log_info "Skipping nginx-noscript (--skip)"
+ return
+ fi
+
+ local ext_regex
+ ext_regex=$(build_extension_regex) || return 0
+
+ log_step "Installing nginx-noscript filter and jail..."
+ if [[ -n "$ALLOW_EXTENSIONS" ]]; then
+ log_info "Whitelisted extensions: $ALLOW_EXTENSIONS"
+ fi
+ log_info "Blocking extensions: $ext_regex"
+
+ # Filter
+ generate_noscript_filter "$ext_regex" | write_config \
+ /etc/fail2ban/filter.d/nginx-noscript.conf \
+ "nginx-noscript filter"
+
+ # Jail
+ generate_noscript_jail | write_config \
+ /etc/fail2ban/jail.d/nginx-noscript.conf \
+ "nginx-noscript jail"
+}
+
+generate_noscript_filter() {
+ local ext_regex="$1"
+ cat < \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|OPTIONS) [^"]*\.($ext_regex)(\?[^\"]*)? HTTP[^"]*" (400|403|404|444)
+
+ignoreregex =
+
+# Author: Phil Connor — https://mylinux.work
+EOF
+}
+
+generate_noscript_jail() {
+ cat < \S+ \S+ \[.*\] "(GET|POST|HEAD) [^"]*(/\.env|/\.git|/\.svn|/\.hg|/\.htaccess|/\.htpasswd|/\.aws|/\.docker|/\.ssh|/\.kube|/\.config|/wp-admin|/wp-login|/wp-config|/wp-content/uploads|/wp-includes|/xmlrpc\.php|/administrator|/admin/config|/phpmyadmin|/pma|/myadmin|/dbadmin|/mysql|/phpinfo|/info\.php|/server-status|/server-info|/cgi-bin|/shell|/cmd|/command|/console|/config\.json|/config\.yml|/config\.yaml|/config\.xml|/database\.yml|/backup|/dump|/db\.sql|/\.sql|/api/v1/debug|/debug|/trace|/actuator|/swagger|/graphql|/solr|/elasticsearch|/_cat|/_cluster)[^"]*HTTP[^"]*" (400|403|404|444)
+
+$ignore_line
+
+# Author: Phil Connor — https://mylinux.work
+EOF
+}
+
+generate_pathscan_jail() {
+ cat < \S+ \S+ \[.*\] "\S+ \S+ \S+" (400|401|403|404|405|444)
+
+ignoreregex =
+
+# Author: Phil Connor — https://mylinux.work
+EOF
+}
+
+generate_4xx_jail() {
+ cat </dev/null; then
+ log_info " ✓ $full_jail is active"
+ else
+ log_error " ✗ $full_jail failed to start"
+ all_ok=false
+ fi
+ done
+ echo ""
+
+ if ! $all_ok; then
+ log_error "Some jails failed — debug with:"
+ log_error " fail2ban-client status"
+ log_error " fail2ban-regex $ACCESS_LOGPATH /etc/fail2ban/filter.d/.conf"
+ exit 1
+ fi
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ echo ""
+ echo "============================================"
+ echo " Fail2ban Nginx Hardening v${VERSION}"
+ echo " https://mylinux.work"
+ echo "============================================"
+ echo ""
+
+ check_root
+ check_fail2ban
+ detect_logpaths
+
+ install_noscript
+ install_pathscan
+ install_4xx_flood
+
+ reload_fail2ban
+ verify_jails
+
+ local installed=0
+ should_skip "noscript" || installed=$((installed + 1))
+ should_skip "pathscan" || installed=$((installed + 1))
+ should_skip "4xx-flood" || installed=$((installed + 1))
+
+ echo "============================================"
+ echo " Setup Complete — ${installed} Jail(s) Installed"
+ echo "============================================"
+ echo ""
+ echo " Jails:"
+ should_skip "noscript" || echo " nginx-noscript Ban after 2 script requests (.php, .asp, etc.)"
+ should_skip "pathscan" || echo " nginx-pathscan Ban on first sensitive path probe (.env, .git, wp-admin)"
+ should_skip "4xx-flood" || echo " nginx-4xx-flood Ban after 20 errors in 5 minutes"
+ echo ""
+ echo " Log: $ACCESS_LOGPATH"
+ echo " Ban time: ${BANTIME}s ($(( BANTIME / 3600 ))h)"
+ echo ""
+ echo " Useful commands:"
+ echo " fail2ban-client status nginx-noscript"
+ echo " fail2ban-client status nginx-pathscan"
+ echo " fail2ban-client status nginx-4xx-flood"
+ echo " fail2ban-client set unbanip "
+ echo " fail2ban-regex $ACCESS_LOGPATH /etc/fail2ban/filter.d/nginx-pathscan.conf"
+ echo ""
+}
+
+main "$@"
diff --git a/add-fail2ban-scraper-detect.sh b/add-fail2ban-scraper-detect.sh
new file mode 100755
index 0000000..0c5d99e
--- /dev/null
+++ b/add-fail2ban-scraper-detect.sh
@@ -0,0 +1,504 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-fail2ban-scraper-detect.sh
+# Version: 1.1
+# Description: Adds a Fail2ban jail to detect and ban headless Chrome scrapers
+# that pass JavaScript challenges but exhibit bot behavior —
+# rapid 499 responses (connection abandoned mid-download),
+# high-frequency 404s (probing non-existent URLs), and
+# HeadlessChrome user agent strings (no real user). Complements
+# add-fail2ban-image-scraper.sh which catches no-referer image
+# grabs. This filter catches the next tier: bots running real
+# browsers (Puppeteer/Playwright) that execute JS, accept cookies,
+# and send proper referers but still behave differently from humans.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Usage:
+# sudo ./add-fail2ban-scraper-detect.sh
+# sudo ./add-fail2ban-scraper-detect.sh --logpath /var/log/nginx/access.log
+# sudo ./add-fail2ban-scraper-detect.sh --maxretry 5
+# sudo ./add-fail2ban-scraper-detect.sh --dry-run
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# DEFAULTS
+# ============================================================================
+
+readonly VERSION="1.1"
+readonly SCRIPT_NAME="${0##*/}"
+
+LOGPATH="auto"
+BANTIME="86400"
+MAXRETRY="3"
+FINDTIME="300"
+IGNOREIP=""
+DRY_RUN=false
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+show_usage() {
+ cat </dev/null; then
+ log_error "Fail2ban is not installed"
+ log_error "Install it first: https://mylinux.work/guides/fail2ban-setup/"
+ exit 1
+ fi
+
+ if ! systemctl is-active --quiet fail2ban; then
+ log_error "Fail2ban is not running"
+ exit 1
+ fi
+
+ log_info "Fail2ban is installed and running"
+}
+
+detect_logpath() {
+ if [[ "$LOGPATH" != "auto" ]]; then
+ # shellcheck disable=SC2086
+ local matches=( $LOGPATH )
+ if [[ ${#matches[@]} -eq 0 || ! -f "${matches[0]}" ]]; then
+ log_error "Log file not found: $LOGPATH"
+ exit 1
+ fi
+ log_info "Using specified log path: $LOGPATH"
+ return
+ fi
+
+ log_step "Auto-detecting web server access log..."
+
+ # HestiaCP — apache domains
+ local hestia_apache=( /var/log/apache2/domains/*.log )
+ if [[ -f "${hestia_apache[0]:-}" ]]; then
+ LOGPATH="/var/log/apache2/domains/*.log"
+ log_info "Detected HestiaCP apache: $LOGPATH"
+ return
+ fi
+
+ # HestiaCP — nginx domains
+ local hestia_nginx=( /var/log/nginx/domains/*.log )
+ if [[ -f "${hestia_nginx[0]:-}" ]]; then
+ LOGPATH="/var/log/nginx/domains/*.log"
+ log_info "Detected HestiaCP nginx: $LOGPATH"
+ return
+ fi
+
+ # Nginx (standard)
+ if [[ -f /var/log/nginx/access.log ]]; then
+ LOGPATH="/var/log/nginx/access.log"
+ log_info "Detected nginx: $LOGPATH"
+ return
+ fi
+
+ # Apache (Debian/Ubuntu)
+ if [[ -f /var/log/apache2/access.log ]]; then
+ LOGPATH="/var/log/apache2/access.log"
+ log_info "Detected apache2: $LOGPATH"
+ return
+ fi
+
+ # Apache (RHEL/Rocky)
+ if [[ -f /var/log/httpd/access_log ]]; then
+ LOGPATH="/var/log/httpd/access_log"
+ log_info "Detected httpd: $LOGPATH"
+ return
+ fi
+
+ log_error "Could not auto-detect access log. Use --logpath to specify."
+ exit 1
+}
+
+# ============================================================================
+# REMOVE
+# ============================================================================
+
+do_remove() {
+ local filter_file="/etc/fail2ban/filter.d/scraper-detect.conf"
+ local jail_file="/etc/fail2ban/jail.d/scraper-detect.conf"
+
+ log_step "Removing scraper-detect jail..."
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would remove $filter_file"
+ log_info "[DRY RUN] Would remove $jail_file"
+ log_info "[DRY RUN] Would reload fail2ban"
+ return
+ fi
+
+ if [[ -f "$jail_file" ]]; then
+ rm -f "$jail_file"
+ log_info "Removed: $jail_file"
+ else
+ log_warn "Jail config not found: $jail_file"
+ fi
+
+ if [[ -f "$filter_file" ]]; then
+ rm -f "$filter_file"
+ log_info "Removed: $filter_file"
+ else
+ log_warn "Filter not found: $filter_file"
+ fi
+
+ fail2ban-client reload
+ sleep 2
+ log_info "Fail2ban reloaded — scraper-detect jail removed"
+ exit 0
+}
+
+# ============================================================================
+# INSTALL FILTER
+# ============================================================================
+
+install_filter() {
+ local filter_file="/etc/fail2ban/filter.d/scraper-detect.conf"
+
+ log_step "Installing filter: $filter_file"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $filter_file"
+ echo ""
+ generate_filter
+ echo ""
+ return
+ fi
+
+ if [[ -f "$filter_file" ]]; then
+ log_warn "Filter already exists — backing up to ${filter_file}.bak"
+ cp "$filter_file" "${filter_file}.bak"
+ fi
+
+ generate_filter > "$filter_file"
+ log_info "Filter installed: $filter_file"
+}
+
+generate_filter() {
+ cat <<'EOF'
+# Fail2ban filter to detect headless Chrome scrapers
+# https://mylinux.work
+#
+# Catches three patterns that indicate automated scraping:
+#
+# 1. HTTP 499 — nginx-specific status meaning "client closed connection
+# before the server responded." Scrapers fire requests then drop them
+# once they've grabbed the HTML. Real users rarely trigger this.
+#
+# 2. HTTP 404 — not found. A single 404 is normal (mistyped URL). Many
+# 404s in a short window indicate URL probing or stale scraper runs.
+#
+# 3. HeadlessChrome — Puppeteer/Playwright bots that don't bother hiding
+# the headless user agent. No legitimate browser sends this string.
+# Matched on any status code — headless Chrome is never a real user.
+#
+# Combined with maxretry in the jail, this catches bots that generate
+# multiple errors quickly while ignoring the occasional human mistake.
+# HeadlessChrome matches are instant (maxretry 1 would suffice) but
+# the jail threshold still applies — a few hits trigger the ban.
+
+[Definition]
+
+# Match 499 (client dropped), 404 (not found), and HeadlessChrome UA
+# Works with combined, common, and enriched (GeoIP) log formats
+failregex = ^ \S+ \S+ \[.*\] "\S+ \S+ \S+" 499
+ ^ \S+ \S+ \[.*\] "\S+ \S+ \S+" 404
+ ^ .* ".*HeadlessChrome.*"
+
+# Whitelist legitimate bots and monitoring tools
+ignoreregex = ^ .* ".*(?:Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot|Slurp|facebookexternalhit|Twitterbot|LinkedInBot|Applebot|Blackbox-Exporter|UptimeRobot|Pingdom|CensysInspect|Feedfetcher|Mediapartners-Google).*"
+
+# Author: Phil Connor — https://mylinux.work
+EOF
+}
+
+# ============================================================================
+# INSTALL JAIL
+# ============================================================================
+
+install_jail() {
+ local jail_file="/etc/fail2ban/jail.d/scraper-detect.conf"
+
+ log_step "Installing jail: $jail_file"
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would create $jail_file"
+ echo ""
+ generate_jail
+ echo ""
+ return
+ fi
+
+ if [[ -f "$jail_file" ]]; then
+ log_warn "Jail config already exists — backing up to ${jail_file}.bak"
+ cp "$jail_file" "${jail_file}.bak"
+ fi
+
+ generate_jail > "$jail_file"
+ log_info "Jail config installed: $jail_file"
+}
+
+generate_jail() {
+ cat </dev/null; then
+ log_warn "Config test not available — reloading directly"
+ fi
+
+ fail2ban-client reload
+ sleep 2
+
+ if systemctl is-active --quiet fail2ban; then
+ log_info "Fail2ban reloaded successfully"
+ else
+ log_error "Fail2ban failed to restart — check: journalctl -u fail2ban"
+ exit 1
+ fi
+}
+
+verify_jail() {
+ log_step "Verifying scraper-detect jail..."
+
+ if $DRY_RUN; then
+ log_info "[DRY RUN] Would verify jail status"
+ return
+ fi
+
+ echo ""
+ if fail2ban-client status scraper-detect 2>/dev/null; then
+ echo ""
+ log_info "Scraper-detect jail is active and monitoring $LOGPATH"
+ else
+ log_error "Jail 'scraper-detect' is not running — check: fail2ban-client status"
+ log_error "Debug with: fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/scraper-detect.conf"
+ exit 1
+ fi
+}
+
+test_against_logs() {
+ if $DRY_RUN; then
+ # shellcheck disable=SC2086
+ local matches=( $LOGPATH )
+ if [[ -f "${matches[0]}" ]]; then
+ log_step "Testing filter against existing logs..."
+ local tmp_filter
+ tmp_filter=$(mktemp /tmp/scraper-detect-filter.XXXXXX)
+ generate_filter > "$tmp_filter"
+ echo ""
+ fail2ban-regex "${matches[0]}" "$tmp_filter" 2>&1 | tail -8
+ rm -f "$tmp_filter"
+ echo ""
+ fi
+ fi
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ echo ""
+ echo "============================================"
+ echo " Fail2ban Scraper Detect v${VERSION}"
+ echo " https://mylinux.work"
+ echo "============================================"
+ echo ""
+
+ check_root
+ check_fail2ban
+
+ if $REMOVE; then
+ do_remove
+ fi
+
+ detect_logpath
+ test_against_logs
+ install_filter
+ install_jail
+ reload_fail2ban
+ verify_jail
+
+ echo ""
+ echo "============================================"
+ echo " Setup Complete"
+ echo "============================================"
+ echo ""
+ echo " Jail: scraper-detect"
+ echo " Log: $LOGPATH"
+ echo " Ban time: ${BANTIME}s ($(( BANTIME / 3600 ))h)"
+ echo " Max retry: $MAXRETRY (499/404 errors before ban)"
+ echo " Find time: ${FINDTIME}s ($(( FINDTIME / 60 ))m window)"
+ if [[ -n "$IGNOREIP" ]]; then
+ echo " Ignore: $IGNOREIP"
+ fi
+ echo ""
+ echo " Useful commands:"
+ echo " fail2ban-client status scraper-detect"
+ echo " fail2ban-client set scraper-detect unbanip "
+ echo " fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/scraper-detect.conf"
+ echo ""
+}
+
+main "$@"
diff --git a/add-nginx-block-head.sh b/add-nginx-block-head.sh
new file mode 100755
index 0000000..074e9e0
--- /dev/null
+++ b/add-nginx-block-head.sh
@@ -0,0 +1,257 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### add-nginx-block-head.sh — Block HEAD requests in Nginx (HestiaCP compatible) ####
+#### Adds a 444 drop rule for HEAD method crawlers/scrapers. ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### sudo ./add-nginx-block-head.sh ####
+#### sudo ./add-nginx-block-head.sh --dry-run ####
+#### sudo ./add-nginx-block-head.sh --remove ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+DRY_RUN=false
+REMOVE=false
+SNIPPET_NAME="nginx.conf_block_head"
+
+# ── Colors ────────────────────────────────────────────────────────────
+if [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+else
+ RED="" GREEN="" YELLOW="" BOLD="" RESET=""
+fi
+
+log() { echo -e "${GREEN}[OK]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*"; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+info() { echo -e "${BOLD}[INFO]${RESET} $*"; }
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat </dev/null; then
+ err "Nginx not found"
+ exit 1
+fi
+
+if ! command -v v-list-users &>/dev/null; then
+ err "HestiaCP not found (v-list-users missing)"
+ exit 1
+fi
+
+# ── Snippet content ──────────────────────────────────────────────────
+SNIPPET_CONTENT='# Block HEAD request crawlers/scrapers
+# Added by add-nginx-block-head.sh
+# Returns 444 (drop connection) — no response sent to bot
+if ($request_method = HEAD) {
+ return 444;
+}'
+
+# ── Find all HestiaCP domains ────────────────────────────────────────
+get_all_domain_dirs() {
+ local users
+ users=$(v-list-users plain 2>/dev/null | cut -f1)
+
+ for user in $users; do
+ local user_conf="/home/${user}/conf/web"
+ [[ -d "$user_conf" ]] || continue
+
+ # Find domain directories by looking for nginx.conf files
+ for nginx_conf in "${user_conf}"/*/nginx.conf; do
+ [[ -f "$nginx_conf" ]] || continue
+ dirname "$nginx_conf"
+ done
+ done
+}
+
+# ── Remove mode ───────────────────────────────────────────────────────
+if [[ "$REMOVE" == "true" ]]; then
+ removed=0
+
+ while IFS= read -r domain_dir; do
+ snippet="${domain_dir}/${SNIPPET_NAME}"
+ ssl_snippet="${domain_dir}/nginx.ssl.conf_block_head"
+
+ for f in "$snippet" "$ssl_snippet"; do
+ if [[ -f "$f" ]]; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would remove: ${f}"
+ else
+ rm -f "$f"
+ log "Removed ${f}"
+ fi
+ ((removed++)) || true
+ fi
+ done
+ done < <(get_all_domain_dirs)
+
+ if [[ $removed -eq 0 ]]; then
+ info "No block-head snippets found — nothing to remove"
+ exit 0
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would test and reload Nginx"
+ exit 0
+ fi
+
+ if nginx -t 2>/dev/null; then
+ systemctl reload nginx
+ log "Nginx reloaded — HEAD requests are now allowed"
+ else
+ err "Nginx config test failed after removal — check your config"
+ exit 1
+ fi
+ exit 0
+fi
+
+# ── Install mode ──────────────────────────────────────────────────────
+domain_dirs=()
+while IFS= read -r dir; do
+ domain_dirs+=("$dir")
+done < <(get_all_domain_dirs)
+
+if [[ ${#domain_dirs[@]} -eq 0 ]]; then
+ err "No HestiaCP web domains found"
+ exit 1
+fi
+
+info "Found ${#domain_dirs[@]} domain config(s)"
+echo ""
+
+created=0
+skipped=0
+created_files=()
+
+for domain_dir in "${domain_dirs[@]}"; do
+ domain_name=$(basename "$domain_dir")
+
+ # Add snippet for both HTTP and HTTPS server blocks
+ for conf_type in "" ".ssl"; do
+ if [[ -n "$conf_type" ]]; then
+ snippet="${domain_dir}/nginx${conf_type}.conf_block_head"
+ else
+ snippet="${domain_dir}/${SNIPPET_NAME}"
+ fi
+
+ # Check the main config exists for this type
+ main_conf="${domain_dir}/nginx${conf_type}.conf"
+ [[ -f "$main_conf" ]] || continue
+
+ if [[ -f "$snippet" ]]; then
+ info "Already exists: ${snippet}"
+ ((skipped++)) || true
+ continue
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would create: ${snippet}"
+ ((created++)) || true
+ else
+ echo "$SNIPPET_CONTENT" > "$snippet"
+ created_files+=("$snippet")
+ log "Created ${snippet}"
+ ((created++)) || true
+ fi
+ done
+done
+
+echo ""
+
+if [[ $created -eq 0 && $skipped -gt 0 ]]; then
+ info "HEAD requests are already blocked on all domains"
+ exit 0
+fi
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo ""
+ echo "$SNIPPET_CONTENT"
+ echo ""
+ info "Would create ${created} snippet(s) (${skipped} already exist)"
+ info "Would test Nginx config and reload"
+ exit 0
+fi
+
+# Test Nginx config
+info "Testing Nginx configuration..."
+if nginx -t 2>&1; then
+ echo ""
+ log "Config test passed"
+ systemctl reload nginx
+ log "Nginx reloaded — HEAD requests blocked on ${#domain_dirs[@]} domain(s) (444 drop)"
+else
+ echo ""
+ err "Config test FAILED — rolling back all changes"
+ for f in "${created_files[@]}"; do
+ rm -f "$f"
+ err "Removed ${f}"
+ done
+ err "Nginx was NOT reloaded — your site is unaffected"
+ exit 1
+fi
+
+echo ""
+info "Verify with: curl -I https://your-site.com"
+info "Expected: curl returns empty reply (connection dropped)"
+info "To undo: $(basename "$0") --remove"
diff --git a/add-nginx-bot-block.sh b/add-nginx-bot-block.sh
new file mode 100755
index 0000000..7fc8ab0
--- /dev/null
+++ b/add-nginx-bot-block.sh
@@ -0,0 +1,443 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-nginx-bot-block.sh
+# Version: 1.3
+# Description: Configure AI scraper and bot blocking on standard nginx servers.
+# Creates an nginx map in conf.d and injects bot-blocking rules
+# into server blocks found in sites-enabled and conf.d.
+# For HestiaCP / VestaCP / myVesta servers, use hestia-bot-block.sh instead.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - nginx installed and running
+# - Root access
+#
+# Usage:
+# sudo ./add-nginx-bot-block.sh
+# sudo ./add-nginx-bot-block.sh --dry-run
+# sudo ./add-nginx-bot-block.sh --conf /etc/nginx/sites-enabled/mysite.conf
+# sudo ./add-nginx-bot-block.sh --status-code 403
+# sudo ./add-nginx-bot-block.sh --remove
+#
+# Changelog:
+# 1.3 — 2026-05-11: Added fragment-in-referer blocking (real browsers strip
+# URI fragments from the Referer header). Added request method blocking
+# (only GET/HEAD allowed — static sites never need POST/PUT/DELETE).
+# Added ospa-radar (lead-gen/business intelligence crawler) to blocklist.
+# 1.2 — 2026-05-08: Added Exabot (defunct Dassault/Exalead crawler, now
+# spoofed) and Sogou (Tencent Chinese search crawler) to blocklist.
+# 1.1 — 2026-05-04: Removed Claude-Web and OAI-SearchBot from blocklist.
+# These are user-facing fetcher bots, not training crawlers. Blocking
+# them prevents your content from being cited in AI answers.
+#
+################################################################################
+
+set -euo pipefail
+
+# --- Configuration ---
+CONF_DIR="/etc/nginx/conf.d"
+SITES_DIR="/etc/nginx/sites-enabled"
+MAP_FILE="${CONF_DIR}/bot-block.conf"
+DRY_RUN=false
+REMOVE=false
+SINGLE_CONF=""
+STATUS_CODE="444"
+TIMESTAMP=$(date +%s)
+MARKER_START="# bot-block-managed-start"
+MARKER_END="# bot-block-managed-end"
+
+# --- Colors ---
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+info() { echo -e "${GREEN}[OK]${NC} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+usage() {
+ cat <&2
+ exit 1
+fi
+
+if ! command -v nginx &>/dev/null; then
+ echo -e "${RED}Error: nginx not found${NC}" >&2
+ exit 1
+fi
+
+# =====================================================
+# Collect config files to process
+# =====================================================
+collect_configs() {
+ local configs=()
+
+ if [[ -n "$SINGLE_CONF" ]]; then
+ if [[ ! -f "$SINGLE_CONF" ]]; then
+ echo -e "${RED}Error: Config file not found: ${SINGLE_CONF}${NC}" >&2
+ exit 1
+ fi
+ configs+=("$SINGLE_CONF")
+ else
+ # Scan sites-enabled
+ if [[ -d "$SITES_DIR" ]]; then
+ for f in "$SITES_DIR"/*; do
+ [[ -f "$f" ]] && configs+=("$f")
+ done
+ fi
+ # Scan conf.d (skip bot-block.conf itself)
+ if [[ -d "$CONF_DIR" ]]; then
+ for f in "$CONF_DIR"/*.conf; do
+ [[ -f "$f" ]] || continue
+ [[ "$f" == "$MAP_FILE" ]] && continue
+ configs+=("$f")
+ done
+ fi
+ fi
+
+ # Filter to only files containing a server block
+ local server_configs=()
+ for f in "${configs[@]}"; do
+ if grep -qP '^\s*server\s*\{' "$f" 2>/dev/null; then
+ server_configs+=("$f")
+ fi
+ done
+
+ printf '%s\n' "${server_configs[@]}"
+}
+
+# =====================================================
+# REMOVE MODE
+# =====================================================
+if [[ "$REMOVE" == "true" ]]; then
+ step "Removing bot-block configuration"
+
+ # Remove map file
+ if [[ -f "$MAP_FILE" ]]; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would remove: ${MAP_FILE}"
+ else
+ rm -f "$MAP_FILE"
+ info "Removed: ${MAP_FILE}"
+ fi
+ else
+ warn "Map file not found: ${MAP_FILE} (already removed?)"
+ fi
+
+ # Strip managed blocks from config files
+ step "Scanning for injected bot-block rules"
+
+ mapfile -t configs < <(collect_configs)
+
+ if [[ ${#configs[@]} -eq 0 ]]; then
+ warn "No server block config files found"
+ else
+ for conf in "${configs[@]}"; do
+ if grep -q "$MARKER_START" "$conf" 2>/dev/null; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would clean: ${conf}"
+ else
+ cp "$conf" "${conf}.bak.${TIMESTAMP}"
+ sed -i "/${MARKER_START}/,/${MARKER_END}/d" "$conf"
+ info "Cleaned: ${conf}"
+ fi
+ fi
+ done
+ fi
+
+ # Validate and reload
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: nginx -t"
+ echo " Would run: systemctl reload nginx"
+ else
+ step "Testing nginx configuration"
+ if nginx -t 2>&1; then
+ info "nginx config valid"
+ else
+ echo -e "${RED}[ERROR] nginx config test failed — restore .bak files${NC}" >&2
+ exit 1
+ fi
+
+ step "Reloading nginx"
+ systemctl reload nginx
+ info "nginx reloaded"
+ fi
+
+ echo ""
+ echo -e "${BOLD}Bot-block rules removed.${NC}"
+ exit 0
+fi
+
+# =====================================================
+# INSTALL MODE
+# =====================================================
+
+# Step 1: Create nginx map
+# =====================================================
+step "Creating bot-block map at ${MAP_FILE}"
+
+MAP_CONTENT='# Bot-blocking map for AI scrapers, SEO bots, and vulnerability scanners
+# Generated by add-nginx-bot-block.sh — https://mylinux.work
+
+map $http_user_agent $is_bad_bot {
+ default 0;
+
+ # AI scrapers
+ ~*ABEvalBot 1;
+ ~*GPTBot 1;
+ ~*ClaudeBot 1;
+ ~*anthropic-ai 1;
+ ~*CCBot 1;
+ ~*Bytespider 1;
+ ~*TikTokSpider 1;
+ ~*cohere-ai 1;
+ ~*PerplexityBot 1;
+ ~*Diffbot 1;
+ ~*MistralBot 1;
+ ~*YandexGPTBot 1;
+ ~*meta-externalagent 1;
+ ~*Meta-ExternalFetcher 1;
+ ~*meta-webindexer 1;
+ ~*PetalBot 1;
+ ~*Amazonbot 1;
+ ~*Amzn-SearchBot 1;
+ ~*AI2Bot 1;
+ ~*Timpibot 1;
+ ~*img2dataset 1;
+ ~*YouBot 1;
+ ~*HanaleiBot 1;
+
+ # Defunct crawlers (spoofed user agents)
+ ~*Exabot 1;
+ ~*Sogou 1;
+
+ # SEO scrapers
+ ~*MJ12bot 1;
+ ~*SemrushBot 1;
+ ~*AhrefsBot 1;
+ ~*DotBot 1;
+ ~*DataForSeoBot 1;
+ ~*SERanking 1;
+
+ # Vulnerability scanners
+ ~*Nikto 1;
+ ~*sqlmap 1;
+ ~*Nmap 1;
+ ~*masscan 1;
+ ~*ZmEu 1;
+ ~*Morpheus 1;
+
+ # Lead-gen / business intelligence bots
+ ~*ospa-radar 1;
+ ~*HubSeedsBot 1;
+
+ # AI scrapers / research bots
+ ~*Aranet-SearchBot 1;
+ ~*AzureAI-SearchBot 1;
+ ~*MINERVA-DeepResearch 1;
+ ~*NagetBot 1;
+ ~*LAIABot 1;
+ ~*pi-coding-agent 1;
+
+ # Probe / monitoring bots
+ ~*CMS-Checker 1;
+ ~*NexoFaviconBot 1;
+ ~*AwarioBot 1;
+ ~*AwarioSmartBot 1;
+ ~*CopyousBot 1;
+ ~*SurdotlyBot 1;
+ ~*trendictionbot 1;
+ ~*wpbot 1;
+ ~*WebFetchTool 1;
+ ~*YisouSpider 1;
+
+ # Scraping frameworks
+ ~*Scrapy 1;
+ ~*python-requests 1;
+ ~*Go-http-client 1;
+ ~*Java/ 1;
+ ~*libwww-perl 1;
+ ~*trafilatura 1;
+ ~*node-fetch 1;
+
+ # Outdated browsers (Chrome < 115 — almost certainly bots)
+ ~*Chrome/([1-9][0-9]?|10[0-9]|11[0-4])\. 1;
+
+ # Empty / missing user agent
+ "" 1;
+ "-" 1;
+}'
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would create: ${MAP_FILE}"
+else
+ if [[ -f "$MAP_FILE" ]]; then
+ cp "$MAP_FILE" "${MAP_FILE}.bak.${TIMESTAMP}"
+ warn "Existing map backed up"
+ fi
+ echo "$MAP_CONTENT" > "$MAP_FILE"
+ info "Map created: ${MAP_FILE}"
+fi
+
+# =====================================================
+# Step 2: Inject bot-blocking rule into server blocks
+# =====================================================
+step "Scanning for server blocks to inject bot-blocking rule"
+
+mapfile -t configs < <(collect_configs)
+
+if [[ ${#configs[@]} -eq 0 ]]; then
+ warn "No server block config files found in ${SITES_DIR} or ${CONF_DIR}"
+else
+ MODIFIED=0
+ for conf in "${configs[@]}"; do
+ # Skip if already managed
+ if grep -q "$MARKER_START" "$conf" 2>/dev/null; then
+ warn "Already managed: ${conf} — skipping"
+ continue
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would inject into: ${conf}"
+ MODIFIED=$((MODIFIED + 1))
+ continue
+ fi
+
+ # Backup
+ cp "$conf" "${conf}.bak.${TIMESTAMP}"
+
+ # Inject the if block before the first location directive inside each server block
+ BOT_BLOCK="\\
+ ${MARKER_START}\\
+ if (\$is_bad_bot) {\\
+ return ${STATUS_CODE};\\
+ }\\
+ # Block broken srcset scrapers\\
+ if (\$request_uri ~* \"%20[0-9]+w,https?://\") {\\
+ return ${STATUS_CODE};\\
+ }\\
+ # Block spoofed referers with fragment identifiers (real browsers strip these)\\
+ if (\$http_referer ~* \"#\") {\\
+ return ${STATUS_CODE};\\
+ }\\
+ # Block non-GET/HEAD methods (static sites never need POST/PUT/DELETE)\\
+ if (\$request_method !~ ^(GET|HEAD)\$ ) {\\
+ return ${STATUS_CODE};\\
+ }\\
+ ${MARKER_END}"
+
+ awk -v block="$BOT_BLOCK" '
+ /^\s*server\s*\{/ { in_server = 1; injected = 0 }
+ in_server && !injected && /^\s*location\s/ {
+ print block
+ print ""
+ injected = 1
+ }
+ /^\s*\}/ && in_server {
+ # Track brace depth to know when server block ends
+ }
+ { print }
+ ' "$conf" > "${conf}.tmp"
+ mv "${conf}.tmp" "$conf"
+
+ info "Injected into: ${conf}"
+ MODIFIED=$((MODIFIED + 1))
+ done
+
+ if [[ $MODIFIED -eq 0 ]]; then
+ warn "No files modified (all already managed)"
+ fi
+fi
+
+# =====================================================
+# Step 3: Validate nginx config
+# =====================================================
+step "Testing nginx configuration"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: nginx -t"
+else
+ if nginx -t 2>&1; then
+ info "nginx config valid"
+ else
+ echo -e "${RED}[ERROR] nginx config test failed${NC}" >&2
+ echo " Restore backups (.bak.${TIMESTAMP}) from ${SITES_DIR} and ${CONF_DIR}" >&2
+ exit 1
+ fi
+fi
+
+# =====================================================
+# Step 4: Reload nginx
+# =====================================================
+step "Reloading nginx"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: systemctl reload nginx"
+else
+ systemctl reload nginx
+ info "nginx reloaded"
+fi
+
+# =====================================================
+# Summary
+# =====================================================
+echo ""
+echo -e "${BOLD}Done.${NC}"
+echo ""
+echo " Map: ${MAP_FILE}"
+echo " Status code: ${STATUS_CODE}"
+if [[ -n "$SINGLE_CONF" ]]; then
+ echo " Config: ${SINGLE_CONF}"
+else
+ echo " Scanned: ${SITES_DIR}/ and ${CONF_DIR}/*.conf"
+fi
+echo ""
+echo " To remove: sudo $(basename "$0") --remove"
+echo ""
+echo " Verify: curl -A 'GPTBot' -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
+echo " Expected: 444 (connection dropped) or 000 (no response)"
diff --git a/add-nginx-js-challenge.sh b/add-nginx-js-challenge.sh
new file mode 100644
index 0000000..d00fd97
--- /dev/null
+++ b/add-nginx-js-challenge.sh
@@ -0,0 +1,582 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-nginx-js-challenge.sh
+# Version: 3.1
+# Description: Adds a lightweight JavaScript cookie challenge to nginx.
+# Bots that don't execute JavaScript are silently dropped.
+# Legitimate search engine crawlers are whitelisted by user agent.
+# Headless Chrome bots from suspect GeoIP regions with no external
+# referrer are tarpitted (served at 50 bytes/sec).
+# Works alongside bot-block.conf (run add-nginx-bot-block.sh first).
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - nginx installed and running
+# - Root access
+#
+# Usage:
+# sudo ./add-nginx-js-challenge.sh
+# sudo ./add-nginx-js-challenge.sh --dry-run
+# sudo ./add-nginx-js-challenge.sh --remove
+#
+# How it works:
+# 1. Whitelisted bot UAs (Googlebot, Bingbot, etc.) bypass the check entirely
+# 2. All other visitors must have a cookie with a randomized name and token
+# 3. First-time visitors get a brief redirect to a challenge page that sets
+# the cookie via JS and bounces them back — takes < 100ms
+# 4. Bots that don't run JS never get the cookie and get 444'd
+# 5. Cookie name and token are randomized per installation — re-running the
+# script rotates them, immediately invalidating old pre-set cookies
+#
+# Changelog:
+# 3.1 — 2026-05-21: Challenge endpoint rate limiting. Headless Chrome bot farms
+# were passing the JS challenge on every request by spawning fresh browser
+# instances without persistent cookies. Added limit_req_zone on the
+# challenge endpoint: 3 requests allowed (burst), then 1/min sustained.
+# Excess requests get 444. Added --challenge-burst and --challenge-rate.
+# Fixed geoip2 variable name ($geoip2_country_code to match standard
+# geoip2.conf). Conditional geoip2 block — only added if no existing
+# mmdb is loaded elsewhere in nginx config. Challenge JS now treats
+# same-domain referrers as "direct" for tarpit purposes.
+# 3.0 — 2026-05-20: Referrer tracking through challenge redirect. Original
+# HTTP Referer is passed as &ref= param in the 302 redirect. Challenge
+# JS stores it in a _bc_ref cookie. Tarpit map: visitors from suspect
+# GeoIP countries (CN by default) with no external referrer are served
+# at 50 bytes/sec via limit_rate, draining headless Chrome resources.
+# Requires ngx_http_geoip2_module for GeoIP-based tarpitting.
+# Added --tarpit-countries option (default: CN).
+# Added --tarpit-rate option (default: 50 bytes/sec).
+# 2.0 — 2026-05-19: Randomized cookie name and token per installation.
+# Cookie name is now a random 2-character suffix (e.g. _v7, _xq).
+# Cookie value is now a 32-char hex token instead of static "verified".
+# Values persist in /etc/nginx/js-challenge.env for future reference.
+# Re-running rotates credentials and invalidates old bot bypass cookies.
+# Added no-cache headers on challenge page to prevent stale HTML after
+# rotation. Fixed challenge page Secure flag to be conditional on HTTPS.
+# Fixed challenge location — removed incorrect 'internal' directive.
+# 1.0 — 2026-05-11: Initial release
+#
+################################################################################
+
+set -euo pipefail
+
+# --- Configuration ---
+CONF_DIR="/etc/nginx/conf.d"
+CHALLENGE_MAP="${CONF_DIR}/js-challenge.conf"
+CHALLENGE_DIR="/var/www/js-challenge"
+CHALLENGE_HTML="${CHALLENGE_DIR}/challenge.html"
+STATE_FILE="/etc/nginx/js-challenge.env"
+CHALLENGE_PATH="/_bc"
+DRY_RUN=false
+REMOVE=false
+COOKIE_MAX_AGE=86400 # 24 hours
+TARPIT_COUNTRIES="${TARPIT_COUNTRIES:-CN}" # GeoIP country codes to tarpit (space-separated)
+TARPIT_RATE="${TARPIT_RATE:-50}" # bytes/sec for tarpitted responses
+CHALLENGE_RATE="${CHALLENGE_RATE:-1}" # sustained challenge requests per minute per IP
+CHALLENGE_BURST="${CHALLENGE_BURST:-3}" # initial burst of challenge requests allowed
+TIMESTAMP=$(date +%s)
+
+# --- Colors ---
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+info() { echo -e "${GREEN}[OK]${NC} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+usage() {
+ cat <&2
+ exit 1
+fi
+
+# =====================================================
+# Generate or load cookie credentials
+# =====================================================
+
+generate_credentials() {
+ COOKIE_NAME="_$(openssl rand -hex 1)"
+ COOKIE_VALUE="$(openssl rand -hex 16)"
+}
+
+save_credentials() {
+ if [[ "$DRY_RUN" != "true" ]]; then
+ cat > "$STATE_FILE" <&1; then
+ systemctl reload nginx
+ info "nginx reloaded"
+ else
+ echo -e "${RED}[ERROR] nginx config test failed after removal${NC}" >&2
+ exit 1
+ fi
+ fi
+
+ echo ""
+ echo -e "${BOLD}JS challenge removed.${NC}"
+ echo ""
+ echo " Note: You may also need to remove the js-challenge location blocks"
+ echo " from your server block configs (look for 'js-challenge-managed')."
+ exit 0
+fi
+
+# =====================================================
+# Step 1: Create the challenge HTML page
+# =====================================================
+step "Creating challenge page at ${CHALLENGE_HTML}"
+
+CHALLENGE_CONTENT='
+
+Verifying
+
+
+
+Cookies must be enabled to access this site.
+
+
+'
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would create: ${CHALLENGE_DIR}/"
+ echo " Would create: ${CHALLENGE_HTML}"
+else
+ mkdir -p "$CHALLENGE_DIR"
+ echo "$CHALLENGE_CONTENT" > "$CHALLENGE_HTML"
+ info "Challenge page created: ${CHALLENGE_HTML}"
+fi
+
+# Save credentials
+save_credentials
+
+# =====================================================
+# Step 2: Create nginx map config
+# =====================================================
+step "Creating JS challenge map at ${CHALLENGE_MAP}"
+
+# Build the cookie variable name for nginx (e.g. _v7 → $cookie__v7)
+NGINX_COOKIE_VAR="\$cookie_${COOKIE_NAME}"
+
+# Check if a geoip2 block already loads an mmdb anywhere in nginx config.
+# If so, $geoip2_country_code should already be defined — don't duplicate.
+GEOIP2_BLOCK=""
+if ! grep -r 'geoip2.*\.mmdb' /etc/nginx/ \
+ --include='*.conf' --exclude='js-challenge.conf' --exclude='*.bak.*' \
+ -q 2>/dev/null; then
+ GEOIP2_BLOCK='
+# ── GeoIP2: country lookup for tarpit decisions ──────────────────────
+# Uses the City database (superset of Country). Adjust path if needed.
+geoip2 /usr/share/GeoIP/GeoLite2-City.mmdb {
+ $geoip2_country_code country iso_code;
+}
+'
+ step "No existing geoip2 country_code config found — adding to map config"
+fi
+
+ # Collect server_name values from nginx configs to build same-site referer map
+ local REFERER_ENTRIES=""
+ local _jsc_domain_seen=()
+ for _conf in /etc/nginx/conf.d/*.conf /etc/nginx/sites-enabled/*; do
+ [[ -f "$_conf" ]] || continue
+ while read -r _sn; do
+ for _d in $_sn; do
+ [[ "$_d" == "server_name" || "$_d" == ";" || "$_d" == "_" || "$_d" =~ ^[0-9] ]] && continue
+ _d="${_d%;}"
+ [[ " ${_jsc_domain_seen[*]:-} " == *" $_d "* ]] && continue
+ _jsc_domain_seen+=("$_d")
+ local _d_escaped="${_d//./\\.}"
+ REFERER_ENTRIES+=" ~^1:https?://${_d_escaped} 1;\n"
+ done
+ done < <(grep -oP '^\s*server_name\s+\K[^;]+;?' "$_conf" 2>/dev/null)
+ done
+
+ if [[ -z "$REFERER_ENTRIES" ]]; then
+ warn "No server_name values found — same-site image bypass will not work"
+ warn "Images behind the challenge may cause redirect loops for browsers"
+ fi
+
+MAP_CONTENT='# JS cookie challenge — allowed bots and cookie check
+# Generated by add-nginx-js-challenge.sh — https://mylinux.work
+# Cookie: '"${COOKIE_NAME}"' Token: '"${COOKIE_VALUE:0:8}"'...
+# Generated: '"$(date -Iseconds)"'
+
+# ── Rate limit: challenge endpoint ───────────────────────────────────
+# Real users hit the challenge once and keep the cookie. Headless bot farms
+# spawn fresh browsers per request, hitting the challenge every time.
+# Rate: '"${CHALLENGE_RATE}"'r/m with burst of '"${CHALLENGE_BURST}"' — excess gets 444.
+limit_req_zone $binary_remote_addr zone=jschallenge:10m rate='"${CHALLENGE_RATE}"'r/m;
+
+# Bots that legitimately identify themselves and should bypass the JS check
+map $http_user_agent $is_allowed_bot {
+ default 0;
+
+ # Search engines
+ ~*Googlebot 1;
+ ~*bingbot 1;
+ ~*Slurp 1;
+ ~*DuckDuckBot 1;
+ ~*DuckAssistBot 1;
+ ~*Baiduspider 1;
+ ~*YandexBot 1;
+ ~*YandexFavicons 1;
+ ~*Applebot 1;
+ ~*Qwantbot 1;
+ ~*Qwantify 1;
+ ~*Bravebot 1;
+ ~*kagi-fetcher 1;
+ ~*Kagibot 1;
+ ~*Yahoo! 1;
+ ~*Yeti 1;
+
+ # Social media / link previews
+ ~*facebookexternalhit 1;
+ ~*Facebot 1;
+ ~*Twitterbot 1;
+ ~*LinkedInBot 1;
+ ~*Slackbot 1;
+ ~*Slack-ImgProxy 1;
+ ~*Discordbot 1;
+ ~*TelegramBot 1;
+ ~*WhatsApp 1;
+ ~*redditbot 1;
+ ~*ArenaUnfurlBot 1;
+
+ # Feed readers
+ ~*Feedly 1;
+ ~*Miniflux 1;
+ ~*FreshRSS 1;
+ ~*NewsBlur 1;
+ ~*Tiny\ Tiny\ RSS 1;
+ ~*Inoreader 1;
+ ~*NetNewsWire 1;
+
+ # Monitoring / uptime
+ ~*UptimeRobot 1;
+ ~*Pingdom 1;
+ ~*StatusCake 1;
+ ~*Blackbox-Exporter 1;
+
+ # AI answer bots (user-facing, not training crawlers)
+ ~*OAI-SearchBot 1;
+ ~*ChatGPT-User 1;
+ ~*Claude-Web 1;
+ ~*Claude-User 1;
+ ~*MistralAI-User 1;
+
+ # Archive / research
+ ~*archive\.org_bot 1;
+
+ # Apple Safari prefetch
+ ~*safarifetcherd 1;
+
+ # Link checkers / validators
+ ~*W3C_Validator 1;
+ ~*W3C-checklink 1;
+ ~*LinkChecker 1;
+ ~*link-check 1;
+
+ # Decentralized search
+ ~*yacybot 1;
+
+ # Add your own allowed bots below
+}
+
+# Validate the challenge cookie — exact token match
+map '"${NGINX_COOKIE_VAR}"' $js_cookie_valid {
+ default 0;
+ "'"${COOKIE_VALUE}"'" 1;
+}
+
+# Detect requests to the challenge page and download paths (prevent redirect loops)
+map $uri $is_challenge_uri {
+ default 0;
+ "'"${CHALLENGE_PATH}"'" 1;
+ ~^/downloads/ 1;
+ ~*\.(css|js|woff2?)$ 1;
+ ~*favicon 1;
+ ~*apple-touch-icon 1;
+}
+
+# Detect image sub-resource requests with same-site referer (browser
loads)
+# These bypass the challenge because: (a) images cannot execute JS challenges,
+# and (b) the same-site referer proves the browser loaded a page from this domain.
+# Direct image requests from scrapers (no referer or external referer) still get challenged.
+map $uri $is_image_request {
+ default 0;
+ ~*\.(png|jpe?g|gif|svg|webp|ico|avif)$ 1;
+}
+map "$is_image_request:$http_referer" $is_samesite_image {
+ default 0;
+'"${REFERER_ENTRIES}"'}
+
+# Combined check: need challenge if not allowed bot, no valid cookie, and not the challenge page
+map "$is_allowed_bot:$js_cookie_valid:$is_challenge_uri:$is_samesite_image" $needs_js_challenge {
+ default 1;
+ "1:0:0:0" 0;
+ "1:0:0:1" 0;
+ "1:0:1:0" 0;
+ "1:0:1:1" 0;
+ "1:1:0:0" 0;
+ "1:1:0:1" 0;
+ "1:1:1:0" 0;
+ "1:1:1:1" 0;
+ "0:1:0:0" 0;
+ "0:1:0:1" 0;
+ "0:1:1:0" 0;
+ "0:1:1:1" 0;
+ "0:0:1:0" 0;
+ "0:0:1:1" 0;
+ "0:0:0:1" 0;
+}
+'"${GEOIP2_BLOCK}"'
+# ── Tarpit: headless Chrome bots from suspect regions ─────────────────
+# Visitors from tarpit countries with no external referrer (passed through
+# the challenge redirect as the _bc_ref cookie) are served at a crawl.
+# This drains headless Chrome resources (~200-500 MB RAM per instance)
+# without giving the bot a clear "blocked" signal to adapt to.
+#
+# The _bc_ref cookie is set by the challenge page JS from the &ref= param.
+# It contains the original HTTP Referer before the 302 redirect destroyed it.
+# "direct" = no external referrer (typed URL or bot). Cookie expires in 120s.
+
+# Check if visitor is from a tarpit country (requires geoip2 module)
+map $geoip2_country_code $is_tarpit_country {
+ default 0;
+'"$(for cc in $TARPIT_COUNTRIES; do echo " \"${cc}\" 1;"; done)"'
+}
+
+# Tarpit only if: tarpit country + no external referrer + passed JS challenge
+map "$is_tarpit_country:$cookie__bc_ref" $tarpit_client {
+ default 0;
+ "1:direct" 1;
+ "1:" 1;
+}
+
+# Serve the challenge page
+server {
+ listen 127.0.0.1:18444;
+ server_name _;
+ root /var/www/js-challenge;
+
+ location / {
+ add_header Cache-Control "no-store, no-cache, must-revalidate" always;
+ add_header Pragma "no-cache" always;
+ try_files /challenge.html =404;
+ }
+}'
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would create: ${CHALLENGE_MAP}"
+else
+ if [[ -f "$CHALLENGE_MAP" ]]; then
+ cp "$CHALLENGE_MAP" "${CHALLENGE_MAP}.bak.${TIMESTAMP}"
+ warn "Existing config backed up"
+ fi
+ echo "$MAP_CONTENT" > "$CHALLENGE_MAP"
+ info "Map config created: ${CHALLENGE_MAP}"
+fi
+
+# =====================================================
+# Step 3: Show injection instructions
+# =====================================================
+step "Server block configuration"
+
+echo ""
+echo " Add the following inside each server block (after your bot-block rules):"
+echo ""
+echo -e "${CYAN} # js-challenge-managed-start"
+echo " location = ${CHALLENGE_PATH} {"
+echo " limit_req zone=jschallenge burst=${CHALLENGE_BURST} nodelay;"
+echo " limit_req_status 444;"
+echo " proxy_pass http://127.0.0.1:18444/;"
+echo " }"
+echo ""
+echo " # JS cookie challenge — redirect non-JS visitors"
+echo " if (\$needs_js_challenge) {"
+echo " return 302 ${CHALLENGE_PATH}?r=\$request_uri&ref=\$http_referer;"
+echo " }"
+echo ""
+echo " # Tarpit headless Chrome bots from suspect GeoIP regions"
+echo " if (\$tarpit_client) {"
+echo " set \$limit_rate ${TARPIT_RATE};"
+echo " }"
+echo -e " # js-challenge-managed-end${NC}"
+echo ""
+echo " Or re-run add-nginx-bot-block.sh to have it injected automatically"
+echo " (if supported in your version)."
+echo ""
+
+# =====================================================
+# Step 4: Validate nginx config
+# =====================================================
+step "Testing nginx configuration"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: nginx -t"
+else
+ if nginx -t 2>&1; then
+ info "nginx config valid"
+ else
+ echo -e "${RED}[ERROR] nginx config test failed${NC}" >&2
+ echo " Restore backup: ${CHALLENGE_MAP}.bak.${TIMESTAMP}" >&2
+ exit 1
+ fi
+fi
+
+# =====================================================
+# Step 5: Reload nginx
+# =====================================================
+step "Reloading nginx"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ echo " Would run: systemctl reload nginx"
+else
+ systemctl reload nginx
+ info "nginx reloaded"
+fi
+
+# =====================================================
+# Summary
+# =====================================================
+echo ""
+echo -e "${BOLD}Done.${NC}"
+echo ""
+echo " Challenge map: ${CHALLENGE_MAP}"
+echo " Challenge page: ${CHALLENGE_HTML}"
+echo " State file: ${STATE_FILE}"
+echo " Cookie name: ${COOKIE_NAME}"
+echo " Cookie token: ${COOKIE_VALUE:0:8}... (32 hex chars)"
+echo " Cookie TTL: ${COOKIE_MAX_AGE}s"
+echo " Tarpit countries: ${TARPIT_COUNTRIES}"
+echo " Tarpit rate: ${TARPIT_RATE} bytes/sec"
+echo " Challenge rate: ${CHALLENGE_RATE}r/m (burst: ${CHALLENGE_BURST})"
+echo ""
+echo " To rotate credentials (invalidate bot-cached cookies):"
+echo " sudo $(basename "$0")"
+echo ""
+echo " To remove: sudo $(basename "$0") --remove"
+echo ""
+echo " Test (bot without cookie gets redirected to challenge):"
+echo " curl -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
+echo " Expected: 302"
+echo ""
+echo " Test (browser completes challenge — 302 → 200):"
+echo " Open https://yourdomain.com in a browser"
+echo " Expected: brief redirect then page loads normally"
+echo ""
+echo " Test (old static bypass no longer works):"
+echo " curl -b '_bc=verified' -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
+echo " Expected: 302 (not 200 — old cookie is invalid)"
+echo ""
+echo " Test (rate limit on challenge endpoint):"
+echo " for i in 1 2 3 4 5; do curl -o /dev/null -s -w \"\$i: %{http_code}\n\" https://yourdomain.com${CHALLENGE_PATH}; done"
+echo " Expected: first 3 return 200, then 444 (rate limited)"
+echo ""
+echo " Test (allowed bot bypasses challenge):"
+echo " curl -A 'Googlebot' -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
+echo " Expected: 200"
diff --git a/add-prometheus-tls.sh b/add-prometheus-tls.sh
new file mode 100644
index 0000000..1ed1d95
--- /dev/null
+++ b/add-prometheus-tls.sh
@@ -0,0 +1,1234 @@
+#!/bin/bash
+################################################################################
+# Script Name: add-prometheus-tls.sh
+# Version: 1.01
+# Description: Add TLS encryption to Prometheus and node_exporter
+# Auto-detects whether this is the Prometheus server (generates
+# a CA + server cert) or a target node (configures node_exporter
+# with a provided or generated cert signed by the Prometheus CA).
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Role Detection:
+# - "server" — Prometheus is installed: generates CA, server cert,
+# configures prometheus.yml for TLS scraping, and optionally
+# configures the local node_exporter too.
+# - "node" — Only node_exporter found: configures node_exporter with
+# TLS using a cert signed by the Prometheus CA (CA cert must
+# be provided or copied from the server).
+#
+# Usage:
+# sudo ./add-prometheus-tls.sh # Auto-detect role
+# sudo ./add-prometheus-tls.sh --role server # Force server mode
+# sudo ./add-prometheus-tls.sh --role node # Force node mode
+# sudo ./add-prometheus-tls.sh --role node --ca-cert /path/to/ca.crt --ca-key /path/to/ca.key
+# sudo ./add-prometheus-tls.sh --deploy host1,host2 # Push TLS to remote nodes
+# sudo ./add-prometheus-tls.sh --deploy-file hosts.txt # Push TLS to nodes from file
+# sudo ./add-prometheus-tls.sh --status # Show TLS status
+# sudo ./add-prometheus-tls.sh --remove # Remove TLS config
+#
+################################################################################
+
+set -euo pipefail
+
+SCRIPT_VERSION="1.0"
+
+# Paths
+PROM_DIR="/etc/prometheus"
+PROM_TLS_DIR="${PROM_DIR}/tls"
+NODE_EXPORTER_DIR="/etc/node_exporter"
+NODE_EXPORTER_TLS_DIR="${NODE_EXPORTER_DIR}/tls"
+BACKUP_DIR="/var/backups/prometheus-tls"
+
+# CA defaults
+CA_DAYS=3650
+CERT_DAYS=825
+KEY_BITS=4096
+
+# Runtime
+ROLE="" # "server" or "node"
+CA_CERT="" # path to existing CA cert (node mode)
+CA_KEY="" # path to existing CA key (node mode)
+PROM_USER="prometheus"
+NODE_USER="node_exporter"
+HOSTNAME_FQDN=""
+DEPLOY_TARGETS="" # comma-separated hosts for --deploy
+DEPLOY_FILE="" # file containing hosts for --deploy-file
+SSH_USER="root" # SSH user for deploy
+SSH_KEY="" # optional SSH key path
+DRY_RUN=false
+DEBUG=${DEBUG:-}
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2
+ exit 1
+}
+
+warn() {
+ echo "WARNING: $1" >&2
+}
+
+info() {
+ echo "[INFO] $1"
+}
+
+debug_echo() {
+ if [[ -n "$DEBUG" ]]; then
+ echo "[DEBUG] $*" >&2
+ fi
+}
+
+backup_file() {
+ local file="$1"
+ if [[ ! -f "$file" ]]; then
+ return 0
+ fi
+ local timestamp
+ timestamp=$(date +%F_%H%M%S)
+ local backup_path="${BACKUP_DIR}/${timestamp}"
+ mkdir -p "$backup_path"
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would backup $file -> ${backup_path}/$(basename "$file")"
+ else
+ cp -a "$file" "${backup_path}/"
+ info "Backed up $file -> ${backup_path}/$(basename "$file")"
+ fi
+}
+
+# ============================================================================
+# ROLE DETECTION
+# ============================================================================
+
+detect_role() {
+ local has_prometheus=false
+ local has_node_exporter=false
+
+ if systemctl list-unit-files prometheus.service &>/dev/null && \
+ systemctl cat prometheus.service &>/dev/null; then
+ has_prometheus=true
+ fi
+
+ if systemctl list-unit-files node_exporter.service &>/dev/null && \
+ systemctl cat node_exporter.service &>/dev/null; then
+ has_node_exporter=true
+ fi
+
+ if [[ "$has_prometheus" == true ]]; then
+ ROLE="server"
+ info "Detected role: server (Prometheus installed)"
+ elif [[ "$has_node_exporter" == true ]]; then
+ ROLE="node"
+ info "Detected role: node (node_exporter only)"
+ else
+ die "Neither Prometheus nor node_exporter detected. Install them first."
+ fi
+}
+
+detect_hostname() {
+ if [[ -n "$HOSTNAME_FQDN" ]]; then
+ return 0
+ fi
+
+ HOSTNAME_FQDN=$(hostname -f 2>/dev/null || hostname)
+ info "Using hostname: ${HOSTNAME_FQDN}"
+}
+
+# ============================================================================
+# CERTIFICATE GENERATION
+# ============================================================================
+
+generate_ca() {
+ local ca_dir="${PROM_TLS_DIR}"
+ local ca_cert="${ca_dir}/ca.crt"
+ local ca_key="${ca_dir}/ca.key"
+
+ if [[ -f "$ca_cert" && -f "$ca_key" ]]; then
+ echo ""
+ echo " CA certificate already exists at ${ca_cert}"
+ read -r -p " Regenerate CA? (will invalidate all existing certs) [y/N]: " confirm
+ if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
+ info "Keeping existing CA"
+ CA_CERT="$ca_cert"
+ CA_KEY="$ca_key"
+ return 0
+ fi
+ backup_file "$ca_cert"
+ backup_file "$ca_key"
+ fi
+
+ info "Generating Certificate Authority..."
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would generate CA cert at ${ca_cert}"
+ CA_CERT="$ca_cert"
+ CA_KEY="$ca_key"
+ return 0
+ fi
+
+ mkdir -p "$ca_dir"
+
+ openssl genrsa -out "$ca_key" "$KEY_BITS" 2>/dev/null
+ openssl req -x509 -new -nodes \
+ -key "$ca_key" \
+ -sha256 \
+ -days "$CA_DAYS" \
+ -out "$ca_cert" \
+ -subj "/CN=Prometheus CA/O=Prometheus/OU=Monitoring" \
+ 2>/dev/null
+
+ chmod 644 "$ca_cert"
+ chmod 600 "$ca_key"
+
+ CA_CERT="$ca_cert"
+ CA_KEY="$ca_key"
+
+ info "CA certificate created: ${ca_cert}"
+ info "CA key created: ${ca_key} (keep this safe!)"
+}
+
+generate_cert() {
+ local name="$1" # e.g., "prometheus" or "node_exporter"
+ local cert_dir="$2" # where to put the cert
+ local owner="$3" # file owner user
+
+ local cert_file="${cert_dir}/${name}.crt"
+ local key_file="${cert_dir}/${name}.key"
+
+ if [[ -f "$cert_file" && -f "$key_file" ]]; then
+ echo ""
+ echo " Certificate for ${name} already exists."
+ read -r -p " Regenerate? [y/N]: " confirm
+ if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
+ info "Keeping existing ${name} certificate"
+ return 0
+ fi
+ backup_file "$cert_file"
+ backup_file "$key_file"
+ fi
+
+ info "Generating certificate for ${name}..."
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would generate cert at ${cert_file}"
+ return 0
+ fi
+
+ mkdir -p "$cert_dir"
+
+ # Create CSR config with SANs
+ local csr_conf
+ csr_conf=$(mktemp)
+ cat > "$csr_conf" </dev/null | awk '{print $1}' || echo "127.0.0.1")
+CSREOF
+
+ # Create ext config for signing
+ local ext_conf
+ ext_conf=$(mktemp)
+ cat > "$ext_conf" </dev/null | awk '{print $1}' || echo "127.0.0.1")
+EXTEOF
+
+ # Generate key
+ openssl genrsa -out "$key_file" "$KEY_BITS" 2>/dev/null
+
+ # Generate CSR
+ local csr_file
+ csr_file=$(mktemp)
+ openssl req -new \
+ -key "$key_file" \
+ -out "$csr_file" \
+ -config "$csr_conf" \
+ 2>/dev/null
+
+ # Sign with CA
+ openssl x509 -req \
+ -in "$csr_file" \
+ -CA "$CA_CERT" \
+ -CAkey "$CA_KEY" \
+ -CAcreateserial \
+ -out "$cert_file" \
+ -days "$CERT_DAYS" \
+ -sha256 \
+ -extfile "$ext_conf" \
+ 2>/dev/null
+
+ # Set ownership
+ chmod 644 "$cert_file"
+ chmod 600 "$key_file"
+ if id "$owner" &>/dev/null; then
+ chown "${owner}:${owner}" "$cert_file" "$key_file"
+ fi
+
+ # Cleanup temp files
+ rm -f "$csr_conf" "$ext_conf" "$csr_file"
+
+ info "Certificate created: ${cert_file}"
+ info "Key created: ${key_file}"
+}
+
+# ============================================================================
+# PROMETHEUS SERVER CONFIGURATION
+# ============================================================================
+
+configure_prometheus_tls() {
+ local web_config="${PROM_DIR}/web.yml"
+
+ if [[ -f "$web_config" ]] && grep -q "tls_server_config" "$web_config" 2>/dev/null; then
+ echo ""
+ echo " Prometheus web.yml already has TLS config."
+ read -r -p " Overwrite? [y/N]: " confirm
+ if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
+ info "Keeping existing Prometheus TLS config"
+ return 0
+ fi
+ backup_file "$web_config"
+ fi
+
+ info "Configuring Prometheus TLS (web.yml)..."
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would create ${web_config}"
+ return 0
+ fi
+
+ cat > "$web_config" </dev/null; then
+ chown "${PROM_USER}:${PROM_USER}" "$web_config"
+ fi
+ chmod 644 "$web_config"
+
+ # Ensure --web.config.file is in the systemd unit
+ update_prometheus_service
+
+ info "Prometheus web.yml created: ${web_config}"
+}
+
+update_prometheus_service() {
+ local service_file
+ service_file=$(systemctl show -p FragmentPath prometheus.service 2>/dev/null | cut -d= -f2)
+
+ if [[ -z "$service_file" || ! -f "$service_file" ]]; then
+ warn "Could not find prometheus.service unit file"
+ warn "Manually add '--web.config.file=${PROM_DIR}/web.yml' to Prometheus startup"
+ return 0
+ fi
+
+ if grep -q "web.config.file" "$service_file" 2>/dev/null; then
+ debug_echo "Prometheus service already has --web.config.file flag"
+ return 0
+ fi
+
+ info "Updating Prometheus systemd service to use web.yml..."
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would add --web.config.file to ${service_file}"
+ return 0
+ fi
+
+ backup_file "$service_file"
+
+ # Add --web.config.file to the ExecStart line
+ if grep -qE '^ExecStart=.*prometheus' "$service_file"; then
+ sed -i '/^ExecStart=.*prometheus/ s|$| \\\n --web.config.file='"${PROM_DIR}"'/web.yml|' "$service_file"
+ systemctl daemon-reload
+ info "Added --web.config.file to Prometheus service"
+ else
+ warn "Could not auto-patch service file. Add manually:"
+ warn " --web.config.file=${PROM_DIR}/web.yml"
+ fi
+}
+
+update_prometheus_scrape_configs() {
+ local prom_config="${PROM_DIR}/prometheus.yml"
+
+ if [[ ! -f "$prom_config" ]]; then
+ warn "prometheus.yml not found at ${prom_config} — skipping scrape config update"
+ return 0
+ fi
+
+ info "Updating prometheus.yml scrape configs for TLS..."
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would update scrape configs in ${prom_config}"
+ return 0
+ fi
+
+ backup_file "$prom_config"
+
+ # Check if tls_config already exists for node targets
+ if grep -q "tls_config" "$prom_config" 2>/dev/null; then
+ info "prometheus.yml already contains tls_config entries"
+ echo " Review ${prom_config} to ensure all scrape jobs use TLS."
+ return 0
+ fi
+
+ # Create a TLS snippet file that can be included
+ local tls_snippet="${PROM_DIR}/tls_scrape.yml"
+ cat > "$tls_snippet" </dev/null; then
+ chown "${PROM_USER}:${PROM_USER}" "$tls_snippet"
+ fi
+
+ # Auto-patch: update scheme and add tls_config to existing jobs
+ # Update scheme: http -> https for node jobs
+ local tmpfile
+ tmpfile=$(mktemp)
+ local in_job=false
+ local job_patched=false
+
+ while IFS= read -r line; do
+ echo "$line" >> "$tmpfile"
+
+ # Detect job_name lines
+ if [[ "$line" =~ ^[[:space:]]*-[[:space:]]*job_name: ]]; then
+ in_job=true
+ job_patched=false
+ fi
+
+ # If we're in a job block and find scheme: http (not https), patch it
+ if [[ "$in_job" == true && "$job_patched" == false ]]; then
+ if [[ "$line" =~ ^[[:space:]]*scheme:[[:space:]]*http[[:space:]]*$ ]]; then
+ # Replace this line with https + tls_config
+ sed -i '$ s|scheme: http|scheme: https|' "$tmpfile"
+ # Determine indentation
+ local indent
+ indent=$(echo "$line" | sed 's/\(^[[:space:]]*\).*/\1/')
+ echo "${indent}tls_config:" >> "$tmpfile"
+ echo "${indent} ca_file: ${PROM_TLS_DIR}/ca.crt" >> "$tmpfile"
+ job_patched=true
+ fi
+ fi
+ done < "$prom_config"
+
+ # If no scheme: lines were found, add a note
+ if ! grep -q "scheme: https" "$tmpfile" 2>/dev/null; then
+ info "No 'scheme: http' lines found to auto-patch."
+ info "Reference TLS snippet created at: ${tls_snippet}"
+ info "Manually update your scrape jobs to use scheme: https with tls_config."
+ rm -f "$tmpfile"
+ return 0
+ fi
+
+ cp "$tmpfile" "$prom_config"
+ rm -f "$tmpfile"
+
+ if id "$PROM_USER" &>/dev/null; then
+ chown "${PROM_USER}:${PROM_USER}" "$prom_config"
+ fi
+
+ info "Updated scrape configs in ${prom_config}"
+ info "TLS reference snippet saved to: ${tls_snippet}"
+}
+
+# ============================================================================
+# NODE EXPORTER CONFIGURATION
+# ============================================================================
+
+configure_node_exporter_tls() {
+ local tls_dir="$NODE_EXPORTER_TLS_DIR"
+ local web_config="${NODE_EXPORTER_DIR}/web.yml"
+
+ mkdir -p "$tls_dir" "$NODE_EXPORTER_DIR"
+
+ # Generate cert for this node
+ generate_cert "node_exporter" "$tls_dir" "$NODE_USER"
+
+ # Copy CA cert to node_exporter dir for reference
+ if [[ "$DRY_RUN" != true && -f "$CA_CERT" ]]; then
+ cp -a "$CA_CERT" "${tls_dir}/ca.crt"
+ if id "$NODE_USER" &>/dev/null; then
+ chown "${NODE_USER}:${NODE_USER}" "${tls_dir}/ca.crt"
+ fi
+ fi
+
+ if [[ -f "$web_config" ]] && grep -q "tls_server_config" "$web_config" 2>/dev/null; then
+ echo ""
+ echo " node_exporter web.yml already has TLS config."
+ read -r -p " Overwrite? [y/N]: " confirm
+ if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
+ info "Keeping existing node_exporter TLS config"
+ update_node_exporter_service
+ return 0
+ fi
+ backup_file "$web_config"
+ fi
+
+ info "Configuring node_exporter TLS (web.yml)..."
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would create ${web_config}"
+ return 0
+ fi
+
+ cat > "$web_config" </dev/null; then
+ chown "${NODE_USER}:${NODE_USER}" "$web_config"
+ fi
+ chmod 644 "$web_config"
+
+ update_node_exporter_service
+
+ info "node_exporter web.yml created: ${web_config}"
+}
+
+update_node_exporter_service() {
+ local service_file
+ service_file=$(systemctl show -p FragmentPath node_exporter.service 2>/dev/null | cut -d= -f2)
+
+ if [[ -z "$service_file" || ! -f "$service_file" ]]; then
+ warn "Could not find node_exporter.service unit file"
+ warn "Manually add '--web.config.file=${NODE_EXPORTER_DIR}/web.yml' to node_exporter startup"
+ return 0
+ fi
+
+ if grep -q "web.config.file" "$service_file" 2>/dev/null; then
+ debug_echo "node_exporter service already has --web.config.file flag"
+ return 0
+ fi
+
+ info "Updating node_exporter systemd service to use web.yml..."
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would add --web.config.file to ${service_file}"
+ return 0
+ fi
+
+ backup_file "$service_file"
+
+ if grep -qE '^ExecStart=.*node_exporter' "$service_file"; then
+ sed -i '/^ExecStart=.*node_exporter/ s|$| \\\n --web.config.file='"${NODE_EXPORTER_DIR}"'/web.yml|' "$service_file"
+ systemctl daemon-reload
+ info "Added --web.config.file to node_exporter service"
+ else
+ warn "Could not auto-patch service file. Add manually:"
+ warn " --web.config.file=${NODE_EXPORTER_DIR}/web.yml"
+ fi
+}
+
+# ============================================================================
+# STATUS
+# ============================================================================
+
+show_status() {
+ echo ""
+ echo "=========================================="
+ echo "Prometheus TLS Status (v${SCRIPT_VERSION})"
+ echo "=========================================="
+ echo ""
+
+ # Check Prometheus
+ echo "--- Prometheus Server ---"
+ if systemctl cat prometheus.service &>/dev/null 2>&1; then
+ local prom_status="installed"
+ systemctl is-active --quiet prometheus 2>/dev/null && prom_status="running"
+
+ echo " Service: ${prom_status}"
+
+ if [[ -f "${PROM_DIR}/web.yml" ]] && grep -q "tls_server_config" "${PROM_DIR}/web.yml" 2>/dev/null; then
+ echo " TLS: ✓ enabled (web.yml)"
+ else
+ echo " TLS: ✗ not configured"
+ fi
+
+ if [[ -f "${PROM_TLS_DIR}/ca.crt" ]]; then
+ local ca_expiry
+ ca_expiry=$(openssl x509 -enddate -noout -in "${PROM_TLS_DIR}/ca.crt" 2>/dev/null | cut -d= -f2)
+ echo " CA cert: ✓ present (expires: ${ca_expiry})"
+ else
+ echo " CA cert: ✗ not found"
+ fi
+
+ if [[ -f "${PROM_TLS_DIR}/prometheus.crt" ]]; then
+ local prom_expiry
+ prom_expiry=$(openssl x509 -enddate -noout -in "${PROM_TLS_DIR}/prometheus.crt" 2>/dev/null | cut -d= -f2)
+ echo " Server cert: ✓ present (expires: ${prom_expiry})"
+ else
+ echo " Server cert: ✗ not found"
+ fi
+
+ # Verify Prometheus is actually serving HTTPS
+ if curl -sk --max-time 3 "https://localhost:9090/-/healthy" &>/dev/null; then
+ echo " HTTPS: ✓ responding on https://localhost:9090"
+ elif curl -s --max-time 3 "http://localhost:9090/-/healthy" &>/dev/null; then
+ echo " HTTPS: ✗ still serving plain HTTP"
+ else
+ echo " HTTPS: ? could not connect"
+ fi
+ else
+ echo " Not installed"
+ fi
+
+ echo ""
+
+ # Check node_exporter
+ echo "--- node_exporter ---"
+ if systemctl cat node_exporter.service &>/dev/null 2>&1; then
+ local node_status="installed"
+ systemctl is-active --quiet node_exporter 2>/dev/null && node_status="running"
+
+ echo " Service: ${node_status}"
+
+ if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]] && grep -q "tls_server_config" "${NODE_EXPORTER_DIR}/web.yml" 2>/dev/null; then
+ echo " TLS: ✓ enabled (web.yml)"
+ else
+ echo " TLS: ✗ not configured"
+ fi
+
+ if [[ -f "${NODE_EXPORTER_TLS_DIR}/node_exporter.crt" ]]; then
+ local node_expiry
+ node_expiry=$(openssl x509 -enddate -noout -in "${NODE_EXPORTER_TLS_DIR}/node_exporter.crt" 2>/dev/null | cut -d= -f2)
+ echo " Cert: ✓ present (expires: ${node_expiry})"
+ else
+ echo " Cert: ✗ not found"
+ fi
+
+ # Verify node_exporter is actually serving HTTPS
+ if curl -sk --max-time 3 "https://localhost:9100/metrics" &>/dev/null; then
+ echo " HTTPS: ✓ responding on https://localhost:9100"
+ elif curl -s --max-time 3 "http://localhost:9100/metrics" &>/dev/null; then
+ echo " HTTPS: ✗ still serving plain HTTP"
+ else
+ echo " HTTPS: ? could not connect"
+ fi
+ else
+ echo " Not installed"
+ fi
+
+ echo ""
+}
+
+# ============================================================================
+# REMOVE
+# ============================================================================
+
+do_remove() {
+ echo ""
+ echo "=========================================="
+ echo "Remove Prometheus TLS Configuration"
+ echo "=========================================="
+ echo ""
+
+ # Remove Prometheus TLS
+ if [[ -f "${PROM_DIR}/web.yml" ]]; then
+ backup_file "${PROM_DIR}/web.yml"
+ if [[ "$DRY_RUN" != true ]]; then
+ rm -f "${PROM_DIR}/web.yml"
+ fi
+ info "Removed Prometheus web.yml"
+
+ # Remove --web.config.file from service
+ local prom_service
+ prom_service=$(systemctl show -p FragmentPath prometheus.service 2>/dev/null | cut -d= -f2)
+ if [[ -n "$prom_service" && -f "$prom_service" ]] && grep -q "web.config.file" "$prom_service"; then
+ backup_file "$prom_service"
+ if [[ "$DRY_RUN" != true ]]; then
+ sed -i '/--web.config.file/d' "$prom_service"
+ # Clean up trailing backslash if left dangling
+ sed -i '${/^[[:space:]]*\\[[:space:]]*$/d}' "$prom_service"
+ systemctl daemon-reload
+ fi
+ info "Removed --web.config.file from prometheus.service"
+ fi
+
+ if [[ "$DRY_RUN" != true ]]; then
+ systemctl restart prometheus 2>/dev/null || warn "Could not restart Prometheus"
+ fi
+ fi
+
+ # Remove node_exporter TLS
+ if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]]; then
+ backup_file "${NODE_EXPORTER_DIR}/web.yml"
+ if [[ "$DRY_RUN" != true ]]; then
+ rm -f "${NODE_EXPORTER_DIR}/web.yml"
+ fi
+ info "Removed node_exporter web.yml"
+
+ local node_service
+ node_service=$(systemctl show -p FragmentPath node_exporter.service 2>/dev/null | cut -d= -f2)
+ if [[ -n "$node_service" && -f "$node_service" ]] && grep -q "web.config.file" "$node_service"; then
+ backup_file "$node_service"
+ if [[ "$DRY_RUN" != true ]]; then
+ sed -i '/--web.config.file/d' "$node_service"
+ sed -i '${/^[[:space:]]*\\[[:space:]]*$/d}' "$node_service"
+ systemctl daemon-reload
+ fi
+ info "Removed --web.config.file from node_exporter.service"
+ fi
+
+ if [[ "$DRY_RUN" != true ]]; then
+ systemctl restart node_exporter 2>/dev/null || warn "Could not restart node_exporter"
+ fi
+ fi
+
+ echo ""
+ info "TLS configuration removed. Backups saved to: ${BACKUP_DIR}"
+ info "Note: Certificate files in ${PROM_TLS_DIR} and ${NODE_EXPORTER_TLS_DIR} were NOT deleted."
+ info "Remove them manually if no longer needed."
+}
+
+# ============================================================================
+# SERVER SETUP
+# ============================================================================
+
+setup_server() {
+ echo ""
+ echo "=========================================="
+ echo "Prometheus Server TLS Setup"
+ echo "Version: ${SCRIPT_VERSION}"
+ echo "=========================================="
+ echo ""
+
+ detect_hostname
+ mkdir -p "$PROM_TLS_DIR" "$BACKUP_DIR"
+
+ # Step 1: Generate CA
+ echo ""
+ echo "=== Step 1: Certificate Authority ==="
+ generate_ca
+
+ # Step 2: Generate Prometheus server cert
+ echo ""
+ echo "=== Step 2: Prometheus Server Certificate ==="
+ generate_cert "prometheus" "$PROM_TLS_DIR" "$PROM_USER"
+
+ # Step 3: Configure Prometheus web.yml
+ echo ""
+ echo "=== Step 3: Prometheus TLS Configuration ==="
+ configure_prometheus_tls
+
+ # Step 4: Update scrape configs
+ echo ""
+ echo "=== Step 4: Scrape Configuration ==="
+ update_prometheus_scrape_configs
+
+ # Step 5: Optionally configure local node_exporter
+ if systemctl cat node_exporter.service &>/dev/null 2>&1; then
+ echo ""
+ echo "=== Step 5: Local node_exporter ==="
+ echo " node_exporter detected on this server."
+ read -r -p " Configure TLS for local node_exporter too? [Y/n]: " configure_node
+ if [[ ! "$configure_node" =~ ^[Nn]$ ]]; then
+ configure_node_exporter_tls
+ fi
+ fi
+
+ # Step 6: Restart services
+ echo ""
+ echo "=== Restarting Services ==="
+ if [[ "$DRY_RUN" != true ]]; then
+ info "Restarting Prometheus..."
+ systemctl restart prometheus
+ if systemctl is-active --quiet prometheus; then
+ info "Prometheus restarted successfully"
+ else
+ warn "Prometheus failed to start — check: journalctl -u prometheus"
+ fi
+
+ if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]]; then
+ info "Restarting node_exporter..."
+ systemctl restart node_exporter
+ if systemctl is-active --quiet node_exporter; then
+ info "node_exporter restarted successfully"
+ else
+ warn "node_exporter failed to start — check: journalctl -u node_exporter"
+ fi
+ fi
+ else
+ info "[DRY RUN] Would restart prometheus and node_exporter"
+ fi
+
+ # Summary
+ echo ""
+ echo "=========================================="
+ echo "TLS Setup Complete!"
+ echo "=========================================="
+ echo ""
+ echo "CA Certificate: ${PROM_TLS_DIR}/ca.crt"
+ echo "CA Key: ${PROM_TLS_DIR}/ca.key"
+ echo "Server Certificate: ${PROM_TLS_DIR}/prometheus.crt"
+ echo "Backups: ${BACKUP_DIR}"
+ echo ""
+ echo "To configure remote nodes, copy the CA cert and key to each node:"
+ echo ""
+ echo " scp ${PROM_TLS_DIR}/ca.crt ${PROM_TLS_DIR}/ca.key user@node:/tmp/"
+ echo " ssh user@node 'sudo ./add-prometheus-tls.sh --role node --ca-cert /tmp/ca.crt --ca-key /tmp/ca.key'"
+ echo ""
+ echo "To verify: curl -s --cacert ${PROM_TLS_DIR}/ca.crt https://localhost:9090/-/healthy"
+ echo ""
+}
+
+# ============================================================================
+# NODE SETUP
+# ============================================================================
+
+setup_node() {
+ echo ""
+ echo "=========================================="
+ echo "Node Exporter TLS Setup"
+ echo "Version: ${SCRIPT_VERSION}"
+ echo "=========================================="
+ echo ""
+
+ detect_hostname
+ mkdir -p "$NODE_EXPORTER_TLS_DIR" "$BACKUP_DIR"
+
+ # Check for CA cert/key
+ if [[ -z "$CA_CERT" || -z "$CA_KEY" ]]; then
+ # Check if they exist locally (maybe copied from server)
+ if [[ -f "${NODE_EXPORTER_TLS_DIR}/ca.crt" && -f "${NODE_EXPORTER_TLS_DIR}/ca.key" ]]; then
+ CA_CERT="${NODE_EXPORTER_TLS_DIR}/ca.crt"
+ CA_KEY="${NODE_EXPORTER_TLS_DIR}/ca.key"
+ info "Found existing CA files in ${NODE_EXPORTER_TLS_DIR}"
+ elif [[ -f "${PROM_TLS_DIR}/ca.crt" && -f "${PROM_TLS_DIR}/ca.key" ]]; then
+ CA_CERT="${PROM_TLS_DIR}/ca.crt"
+ CA_KEY="${PROM_TLS_DIR}/ca.key"
+ info "Found existing CA files in ${PROM_TLS_DIR}"
+ else
+ echo " No CA certificate found. You need the CA cert and key from your"
+ echo " Prometheus server to sign this node's certificate."
+ echo ""
+ echo " Copy them from the Prometheus server:"
+ echo " scp prometheus-server:${PROM_TLS_DIR}/ca.crt /tmp/"
+ echo " scp prometheus-server:${PROM_TLS_DIR}/ca.key /tmp/"
+ echo ""
+ read -r -p " Path to CA certificate: " CA_CERT
+ read -r -p " Path to CA key: " CA_KEY
+
+ if [[ ! -f "$CA_CERT" ]]; then
+ die "CA certificate not found: ${CA_CERT}"
+ fi
+ if [[ ! -f "$CA_KEY" ]]; then
+ die "CA key not found: ${CA_KEY}"
+ fi
+ fi
+ else
+ # Validate provided paths
+ if [[ ! -f "$CA_CERT" ]]; then
+ die "CA certificate not found: ${CA_CERT}"
+ fi
+ if [[ ! -f "$CA_KEY" ]]; then
+ die "CA key not found: ${CA_KEY}"
+ fi
+ fi
+
+ # Copy CA files to node_exporter tls dir
+ if [[ "$DRY_RUN" != true ]]; then
+ cp -a "$CA_CERT" "${NODE_EXPORTER_TLS_DIR}/ca.crt"
+ cp -a "$CA_KEY" "${NODE_EXPORTER_TLS_DIR}/ca.key"
+ chmod 644 "${NODE_EXPORTER_TLS_DIR}/ca.crt"
+ chmod 600 "${NODE_EXPORTER_TLS_DIR}/ca.key"
+ fi
+
+ # Generate cert and configure
+ echo ""
+ echo "=== Generating node_exporter Certificate ==="
+ configure_node_exporter_tls
+
+ # Restart
+ echo ""
+ echo "=== Restarting node_exporter ==="
+ if [[ "$DRY_RUN" != true ]]; then
+ systemctl restart node_exporter
+ if systemctl is-active --quiet node_exporter; then
+ info "node_exporter restarted successfully"
+ else
+ warn "node_exporter failed to start — check: journalctl -u node_exporter"
+ fi
+ else
+ info "[DRY RUN] Would restart node_exporter"
+ fi
+
+ # Summary
+ echo ""
+ echo "=========================================="
+ echo "node_exporter TLS Setup Complete!"
+ echo "=========================================="
+ echo ""
+ echo "Certificate: ${NODE_EXPORTER_TLS_DIR}/node_exporter.crt"
+ echo "Key: ${NODE_EXPORTER_TLS_DIR}/node_exporter.key"
+ echo "Backups: ${BACKUP_DIR}"
+ echo ""
+ echo "Add this node to your Prometheus server's prometheus.yml:"
+ echo ""
+ echo " - job_name: 'node'"
+ echo " scheme: https"
+ echo " tls_config:"
+ echo " ca_file: ${PROM_TLS_DIR}/ca.crt"
+ echo " static_configs:"
+ echo " - targets: ['${HOSTNAME_FQDN}:9100']"
+ echo ""
+ echo "To verify: curl -s --cacert ${NODE_EXPORTER_TLS_DIR}/ca.crt https://localhost:9100/metrics | head"
+ echo ""
+}
+
+# ============================================================================
+# REMOTE DEPLOY
+# ============================================================================
+
+build_ssh_cmd() {
+ local ssh_opts="-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10"
+ if [[ -n "$SSH_KEY" ]]; then
+ ssh_opts+=" -i ${SSH_KEY}"
+ fi
+ echo "ssh ${ssh_opts}"
+}
+
+build_scp_cmd() {
+ local scp_opts="-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10"
+ if [[ -n "$SSH_KEY" ]]; then
+ scp_opts+=" -i ${SSH_KEY}"
+ fi
+ echo "scp ${scp_opts}"
+}
+
+deploy_to_nodes() {
+ local hosts=()
+
+ # Build host list from --deploy and/or --deploy-file
+ if [[ -n "$DEPLOY_TARGETS" ]]; then
+ IFS=',' read -ra target_hosts <<< "$DEPLOY_TARGETS"
+ hosts+=("${target_hosts[@]}")
+ fi
+
+ if [[ -n "$DEPLOY_FILE" ]]; then
+ if [[ ! -f "$DEPLOY_FILE" ]]; then
+ die "Deploy file not found: ${DEPLOY_FILE}"
+ fi
+ while IFS= read -r line; do
+ # Skip blank lines and comments
+ line=$(echo "$line" | sed 's/#.*//' | xargs)
+ [[ -z "$line" ]] && continue
+ hosts+=("$line")
+ done < "$DEPLOY_FILE"
+ fi
+
+ if [[ ${#hosts[@]} -eq 0 ]]; then
+ die "No target hosts specified"
+ fi
+
+ # Verify CA exists (must run server setup first)
+ if [[ ! -f "${PROM_TLS_DIR}/ca.crt" || ! -f "${PROM_TLS_DIR}/ca.key" ]]; then
+ die "CA not found at ${PROM_TLS_DIR}/. Run server setup first: $0 --role server"
+ fi
+
+ local script_path
+ script_path=$(readlink -f "$0")
+ local ssh_cmd scp_cmd
+ ssh_cmd=$(build_ssh_cmd)
+ scp_cmd=$(build_scp_cmd)
+
+ echo ""
+ echo "=========================================="
+ echo "Deploy TLS to Remote Nodes"
+ echo "=========================================="
+ echo ""
+ echo " CA: ${PROM_TLS_DIR}/ca.crt"
+ echo " SSH user: ${SSH_USER}"
+ echo " Targets: ${hosts[*]}"
+ echo ""
+
+ local succeeded=0
+ local failed=0
+ local failed_hosts=()
+
+ for host in "${hosts[@]}"; do
+ echo "--- ${host} ---"
+
+ if [[ "$DRY_RUN" == true ]]; then
+ info "[DRY RUN] Would deploy TLS to ${host}"
+ ((succeeded++)) || true
+ continue
+ fi
+
+ # Test SSH connectivity
+ if ! $ssh_cmd "${SSH_USER}@${host}" "echo ok" &>/dev/null; then
+ warn "Cannot connect to ${host} — skipping"
+ ((failed++)) || true
+ failed_hosts+=("$host")
+ echo ""
+ continue
+ fi
+
+ # Create temp dir on remote
+ local remote_tmp
+ remote_tmp=$($ssh_cmd "${SSH_USER}@${host}" "mktemp -d /tmp/prom-tls-XXXXXX")
+
+ # Copy CA cert, CA key, and this script
+ $scp_cmd "${PROM_TLS_DIR}/ca.crt" "${PROM_TLS_DIR}/ca.key" "$script_path" \
+ "${SSH_USER}@${host}:${remote_tmp}/" 2>/dev/null
+
+ if [[ $? -ne 0 ]]; then
+ warn "Failed to copy files to ${host} — skipping"
+ ((failed++)) || true
+ failed_hosts+=("$host")
+ echo ""
+ continue
+ fi
+
+ # Run the script in node mode on the remote host
+ info "Running node setup on ${host}..."
+ if $ssh_cmd "${SSH_USER}@${host}" \
+ "chmod +x ${remote_tmp}/$(basename "$script_path") && \
+ ${remote_tmp}/$(basename "$script_path") \
+ --role node \
+ --ca-cert ${remote_tmp}/ca.crt \
+ --ca-key ${remote_tmp}/ca.key"; then
+ info "${host}: TLS configured successfully"
+ ((succeeded++)) || true
+ else
+ warn "${host}: Setup failed — check logs on that host"
+ ((failed++)) || true
+ failed_hosts+=("$host")
+ fi
+
+ # Cleanup temp files on remote
+ $ssh_cmd "${SSH_USER}@${host}" "rm -rf ${remote_tmp}" 2>/dev/null
+
+ echo ""
+ done
+
+ # Summary
+ echo "=========================================="
+ echo "Deploy Complete"
+ echo "=========================================="
+ echo ""
+ echo " Succeeded: ${succeeded}"
+ echo " Failed: ${failed}"
+
+ if [[ ${#failed_hosts[@]} -gt 0 ]]; then
+ echo " Failed hosts: ${failed_hosts[*]}"
+ fi
+
+ # Print prometheus.yml snippet for all successful hosts
+ echo ""
+ echo "Add these targets to your prometheus.yml:"
+ echo ""
+ echo " - job_name: 'node'"
+ echo " scheme: https"
+ echo " tls_config:"
+ echo " ca_file: ${PROM_TLS_DIR}/ca.crt"
+ echo " static_configs:"
+ echo -n " - targets: ["
+ local first=true
+ for host in "${hosts[@]}"; do
+ # Skip failed hosts
+ local is_failed=false
+ for fh in "${failed_hosts[@]}"; do
+ [[ "$fh" == "$host" ]] && is_failed=true
+ done
+ [[ "$is_failed" == true ]] && continue
+
+ if [[ "$first" == true ]]; then
+ echo -n "'${host}:9100'"
+ first=false
+ else
+ echo -n ", '${host}:9100'"
+ fi
+ done
+ echo "]"
+ echo ""
+
+ [[ $failed -gt 0 ]] && return 1
+ return 0
+}
+
+# ============================================================================
+# ARGUMENT PARSING
+# ============================================================================
+
+parse_arguments() {
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --role)
+ ROLE="$2"
+ if [[ "$ROLE" != "server" && "$ROLE" != "node" ]]; then
+ die "Invalid role: ${ROLE}. Must be 'server' or 'node'"
+ fi
+ shift 2
+ ;;
+ --ca-cert)
+ CA_CERT="$2"
+ shift 2
+ ;;
+ --ca-key)
+ CA_KEY="$2"
+ shift 2
+ ;;
+ --hostname)
+ HOSTNAME_FQDN="$2"
+ shift 2
+ ;;
+ --deploy)
+ DEPLOY_TARGETS="$2"
+ shift 2
+ ;;
+ --deploy-file)
+ DEPLOY_FILE="$2"
+ shift 2
+ ;;
+ --ssh-user)
+ SSH_USER="$2"
+ shift 2
+ ;;
+ --ssh-key)
+ SSH_KEY="$2"
+ shift 2
+ ;;
+ --dry-run)
+ DRY_RUN=true
+ shift
+ ;;
+ --status)
+ show_status
+ exit 0
+ ;;
+ --remove)
+ do_remove
+ exit 0
+ ;;
+ -h|--help)
+ show_usage
+ ;;
+ *)
+ die "Unknown option: $1. Use --help for usage."
+ ;;
+ esac
+ done
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ if [[ $EUID -ne 0 ]]; then
+ die "This script must be run as root"
+ fi
+
+ parse_arguments "$@"
+
+ # Check openssl is available
+ if ! command -v openssl &>/dev/null; then
+ die "openssl is required but not installed"
+ fi
+
+ # Deploy mode — push TLS to remote nodes from the Prometheus server
+ if [[ -n "$DEPLOY_TARGETS" || -n "$DEPLOY_FILE" ]]; then
+ deploy_to_nodes
+ exit $?
+ fi
+
+ # Auto-detect role if not specified
+ if [[ -z "$ROLE" ]]; then
+ detect_role
+ fi
+
+ case "$ROLE" in
+ server) setup_server ;;
+ node) setup_node ;;
+ esac
+}
+
+main "$@"
diff --git a/alb-health-reporter.sh b/alb-health-reporter.sh
new file mode 100755
index 0000000..c63701f
--- /dev/null
+++ b/alb-health-reporter.sh
@@ -0,0 +1,574 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### alb-health-reporter.sh — Check AWS ALB/NLB target group health and alert ####
+#### Reports unhealthy targets, CloudWatch metrics, and sends SNS alerts ####
+#### Requires: bash 4+, aws-cli v2, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./alb-health-reporter.sh --check ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+AWS_REGION="${AWS_REGION:-}"
+ALB_NAME="${ALB_NAME:-}"
+TARGET_GROUP="${TARGET_GROUP:-}"
+SNS_TOPIC_ARN="${SNS_TOPIC_ARN:-}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+RUN_MODE=""
+START_TIME=""
+UNHEALTHY_COUNT=0
+HEALTHY_COUNT=0
+DRAINING_COUNT=0
+TOTAL_TARGETS=0
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+die() { err "$*"; exit 1; }
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── AWS CLI wrapper ───────────────────────────────────────────────────
+aws_cmd() {
+ local args=("$@")
+ [[ -n "$AWS_REGION" ]] && args+=(--region "$AWS_REGION")
+ verbose "aws ${args[*]}"
+ aws "${args[@]}"
+}
+
+# ── Dependency check ──────────────────────────────────────────────────
+check_deps() {
+ for cmd in aws jq; do
+ if ! command -v "$cmd" &>/dev/null; then
+ die "${cmd} is required but not installed"
+ fi
+ done
+
+ if ! aws sts get-caller-identity &>/dev/null; then
+ die "AWS credentials not configured or expired"
+ fi
+
+ if [[ -z "$AWS_REGION" ]]; then
+ AWS_REGION=$(aws configure get region 2>/dev/null || echo "")
+ if [[ -z "$AWS_REGION" ]]; then
+ die "AWS_REGION is required"
+ fi
+ fi
+ verbose "Using region: ${AWS_REGION}"
+}
+
+# ── Get load balancers ────────────────────────────────────────────────
+get_load_balancers() {
+ local query_args=(elbv2 describe-load-balancers)
+
+ if [[ -n "$ALB_NAME" ]]; then
+ query_args+=(--names "$ALB_NAME")
+ fi
+
+ aws_cmd "${query_args[@]}" \
+ --query 'LoadBalancers[*].{ARN:LoadBalancerArn,Name:LoadBalancerName,Type:Type,State:State.Code,DNSName:DNSName}' \
+ --output json 2>/dev/null
+}
+
+# ── Get target groups for a load balancer ─────────────────────────────
+get_target_groups() {
+ local lb_arn="$1"
+
+ if [[ -n "$TARGET_GROUP" ]]; then
+ aws_cmd elbv2 describe-target-groups \
+ --target-group-arns "$TARGET_GROUP" \
+ --query 'TargetGroups[*].{ARN:TargetGroupArn,Name:TargetGroupName,Protocol:Protocol,Port:Port,HealthPath:HealthCheckPath}' \
+ --output json 2>/dev/null
+ return
+ fi
+
+ aws_cmd elbv2 describe-target-groups \
+ --load-balancer-arn "$lb_arn" \
+ --query 'TargetGroups[*].{ARN:TargetGroupArn,Name:TargetGroupName,Protocol:Protocol,Port:Port,HealthPath:HealthCheckPath}' \
+ --output json 2>/dev/null
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# CHECK MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_check() {
+ log "Checking target group health..."
+
+ local lbs_json
+ lbs_json=$(get_load_balancers)
+
+ local lb_count
+ lb_count=$(echo "$lbs_json" | jq 'length')
+
+ if [[ "$lb_count" -eq 0 ]]; then
+ log "No load balancers found"
+ return
+ fi
+
+ log "Found ${lb_count} load balancer(s)"
+
+ echo "$lbs_json" | jq -c '.[]' | while IFS= read -r lb; do
+ local lb_arn lb_name lb_type lb_state
+ lb_arn=$(echo "$lb" | jq -r '.ARN')
+ lb_name=$(echo "$lb" | jq -r '.Name')
+ lb_type=$(echo "$lb" | jq -r '.Type')
+ lb_state=$(echo "$lb" | jq -r '.State')
+
+ echo ""
+ echo -e " ${BOLD}${lb_name}${RESET} (${lb_type}, ${lb_state})"
+
+ local tgs_json
+ tgs_json=$(get_target_groups "$lb_arn")
+
+ local tg_count
+ tg_count=$(echo "$tgs_json" | jq 'length')
+
+ if [[ "$tg_count" -eq 0 ]]; then
+ echo " No target groups"
+ continue
+ fi
+
+ echo "$tgs_json" | jq -c '.[]' | while IFS= read -r tg; do
+ local tg_arn tg_name tg_proto tg_port
+ tg_arn=$(echo "$tg" | jq -r '.ARN')
+ tg_name=$(echo "$tg" | jq -r '.Name')
+ tg_proto=$(echo "$tg" | jq -r '.Protocol')
+ tg_port=$(echo "$tg" | jq -r '.Port')
+
+ echo ""
+ echo -e " ${BOLD}Target Group: ${tg_name}${RESET} (${tg_proto}:${tg_port})"
+
+ local health_json
+ health_json=$(aws_cmd elbv2 describe-target-health \
+ --target-group-arn "$tg_arn" \
+ --query 'TargetHealthDescriptions[*].{Id:Target.Id,Port:Target.Port,State:TargetHealth.State,Reason:TargetHealth.Reason,Desc:TargetHealth.Description}' \
+ --output json 2>/dev/null)
+
+ local target_count
+ target_count=$(echo "$health_json" | jq 'length')
+ TOTAL_TARGETS=$((TOTAL_TARGETS + target_count))
+
+ if [[ "$target_count" -eq 0 ]]; then
+ echo -e " ${YELLOW}No registered targets${RESET}"
+ continue
+ fi
+
+ printf " ${BOLD}%-22s %-8s %-12s %s${RESET}\n" "TARGET" "PORT" "STATE" "REASON"
+ printf " %s\n" "$(printf '%.0s─' {1..60})"
+
+ echo "$health_json" | jq -c '.[]' | while IFS= read -r target; do
+ local tid tport tstate treason
+ tid=$(echo "$target" | jq -r '.Id')
+ tport=$(echo "$target" | jq -r '.Port')
+ tstate=$(echo "$target" | jq -r '.State')
+ treason=$(echo "$target" | jq -r '.Reason // "-"')
+
+ local icon color
+ case "$tstate" in
+ healthy)
+ icon="${GREEN}✓${RESET}"
+ color="$GREEN"
+ HEALTHY_COUNT=$((HEALTHY_COUNT + 1))
+ ;;
+ unhealthy)
+ icon="${RED}✗${RESET}"
+ color="$RED"
+ UNHEALTHY_COUNT=$((UNHEALTHY_COUNT + 1))
+ ;;
+ draining)
+ icon="${YELLOW}⊘${RESET}"
+ color="$YELLOW"
+ DRAINING_COUNT=$((DRAINING_COUNT + 1))
+ ;;
+ *)
+ icon="${DIM}?${RESET}"
+ color="$DIM"
+ ;;
+ esac
+
+ printf " ${icon} %-20s %-8s ${color}%-12s${RESET} %s\n" "$tid" "$tport" "$tstate" "$treason"
+ done
+ done
+ done
+
+ echo ""
+ echo -e " ${BOLD}Summary${RESET}"
+ echo " Total targets: ${TOTAL_TARGETS}"
+ echo -e " Healthy: ${GREEN}${HEALTHY_COUNT}${RESET}"
+ [[ "$UNHEALTHY_COUNT" -gt 0 ]] && echo -e " Unhealthy: ${RED}${UNHEALTHY_COUNT}${RESET}" || echo " Unhealthy: 0"
+ [[ "$DRAINING_COUNT" -gt 0 ]] && echo -e " Draining: ${YELLOW}${DRAINING_COUNT}${RESET}" || echo " Draining: 0"
+ log "Completed in $(elapsed)"
+
+ if [[ "$UNHEALTHY_COUNT" -gt 0 ]]; then
+ return 2
+ elif [[ "$DRAINING_COUNT" -gt 0 ]]; then
+ return 1
+ fi
+ return 0
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIST MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_list() {
+ log "Listing load balancers..."
+
+ local lbs_json
+ lbs_json=$(get_load_balancers)
+
+ if [[ "$OUTPUT_FORMAT" == "json" ]]; then
+ echo "$lbs_json" | jq '.'
+ return
+ fi
+
+ echo ""
+ printf " ${BOLD}%-30s %-12s %-10s %s${RESET}\n" "NAME" "TYPE" "STATE" "DNS NAME"
+ printf " %s\n" "$(printf '%.0s─' {1..90})"
+
+ echo "$lbs_json" | jq -c '.[]' | while IFS= read -r lb; do
+ local lb_name lb_type lb_state dns_name
+ lb_name=$(echo "$lb" | jq -r '.Name')
+ lb_type=$(echo "$lb" | jq -r '.Type')
+ lb_state=$(echo "$lb" | jq -r '.State')
+ dns_name=$(echo "$lb" | jq -r '.DNSName')
+
+ local color="$GREEN"
+ [[ "$lb_state" != "active" ]] && color="$YELLOW"
+
+ printf " %-30s %-12s ${color}%-10s${RESET} %s\n" \
+ "${lb_name:0:30}" "$lb_type" "$lb_state" "${dns_name:0:50}"
+ done
+
+ local count
+ count=$(echo "$lbs_json" | jq 'length')
+ echo ""
+ log "Total: ${count} load balancer(s)"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# METRICS MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_metrics() {
+ log "Fetching CloudWatch metrics (last 1 hour)..."
+
+ local lbs_json
+ lbs_json=$(get_load_balancers)
+
+ local now
+ now=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+ local one_hour_ago
+ one_hour_ago=$(date -u -d "-1 hour" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null) || \
+ one_hour_ago=$(date -u -v-1H +%Y-%m-%dT%H:%M:%SZ 2>/dev/null) || \
+ one_hour_ago="$now"
+
+ echo "$lbs_json" | jq -c '.[]' | while IFS= read -r lb; do
+ local lb_arn lb_name lb_type
+ lb_arn=$(echo "$lb" | jq -r '.ARN')
+ lb_name=$(echo "$lb" | jq -r '.Name')
+ lb_type=$(echo "$lb" | jq -r '.Type')
+
+ # Extract the ALB suffix for CloudWatch dimension
+ local lb_suffix
+ lb_suffix=${lb_arn##*loadbalancer/}
+
+ echo ""
+ echo -e " ${BOLD}${lb_name}${RESET}"
+
+ local namespace="AWS/ApplicationELB"
+ [[ "$lb_type" == "network" ]] && namespace="AWS/NetworkELB"
+
+ # Request count
+ local req_count
+ req_count=$(aws_cmd cloudwatch get-metric-statistics \
+ --namespace "$namespace" \
+ --metric-name "RequestCount" \
+ --dimensions "Name=LoadBalancer,Value=${lb_suffix}" \
+ --start-time "$one_hour_ago" \
+ --end-time "$now" \
+ --period 3600 \
+ --statistics Sum \
+ --query 'Datapoints[0].Sum' \
+ --output text 2>/dev/null) || req_count="N/A"
+ [[ "$req_count" == "None" ]] && req_count="0"
+
+ echo " Request count (1h): ${req_count}"
+
+ # 5xx errors
+ local err_5xx
+ err_5xx=$(aws_cmd cloudwatch get-metric-statistics \
+ --namespace "$namespace" \
+ --metric-name "HTTPCode_Target_5XX_Count" \
+ --dimensions "Name=LoadBalancer,Value=${lb_suffix}" \
+ --start-time "$one_hour_ago" \
+ --end-time "$now" \
+ --period 3600 \
+ --statistics Sum \
+ --query 'Datapoints[0].Sum' \
+ --output text 2>/dev/null) || err_5xx="N/A"
+ [[ "$err_5xx" == "None" ]] && err_5xx="0"
+
+ if [[ "$err_5xx" != "0" && "$err_5xx" != "N/A" ]]; then
+ echo -e " 5XX errors (1h): ${RED}${err_5xx}${RESET}"
+ else
+ echo " 5XX errors (1h): ${err_5xx}"
+ fi
+
+ # Response time
+ local resp_time
+ resp_time=$(aws_cmd cloudwatch get-metric-statistics \
+ --namespace "$namespace" \
+ --metric-name "TargetResponseTime" \
+ --dimensions "Name=LoadBalancer,Value=${lb_suffix}" \
+ --start-time "$one_hour_ago" \
+ --end-time "$now" \
+ --period 3600 \
+ --statistics Average \
+ --query 'Datapoints[0].Average' \
+ --output text 2>/dev/null) || resp_time="N/A"
+ [[ "$resp_time" == "None" ]] && resp_time="N/A"
+
+ if [[ "$resp_time" != "N/A" ]]; then
+ local resp_ms
+ resp_ms=$(awk "BEGIN { printf \"%.1f\", $resp_time * 1000 }" 2>/dev/null || echo "$resp_time")
+ echo " Avg response time: ${resp_ms}ms"
+ else
+ echo " Avg response time: N/A"
+ fi
+ done
+
+ echo ""
+ log "Metrics collected in $(elapsed)"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ALERT MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_alert() {
+ local check_exit=0
+ do_check || check_exit=$?
+
+ if [[ "$check_exit" -eq 2 && -n "$SNS_TOPIC_ARN" ]]; then
+ log "Sending SNS alert for ${UNHEALTHY_COUNT} unhealthy target(s)..."
+
+ local subject="ALB Health Alert: ${UNHEALTHY_COUNT} unhealthy target(s) in ${AWS_REGION}"
+ local message
+ message="ALB Health Reporter Alert
+
+Region: ${AWS_REGION}
+Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)
+Hostname: $(hostname -f 2>/dev/null || hostname)
+
+Summary:
+ Total targets: ${TOTAL_TARGETS}
+ Healthy: ${HEALTHY_COUNT}
+ Unhealthy: ${UNHEALTHY_COUNT}
+ Draining: ${DRAINING_COUNT}
+
+Action required: ${UNHEALTHY_COUNT} target(s) are unhealthy.
+Run: alb-health-reporter.sh --check for details."
+
+ if aws_cmd sns publish \
+ --topic-arn "$SNS_TOPIC_ARN" \
+ --subject "${subject:0:100}" \
+ --message "$message" \
+ --output text &>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} SNS alert sent to ${SNS_TOPIC_ARN}"
+ else
+ warn "Failed to send SNS alert"
+ fi
+ elif [[ "$check_exit" -eq 2 && -z "$SNS_TOPIC_ARN" ]]; then
+ warn "Unhealthy targets found but no --sns-topic specified"
+ fi
+
+ exit "$check_exit"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# PROMETHEUS OUTPUT
+# ══════════════════════════════════════════════════════════════════════
+
+print_prometheus() {
+ # Run check silently to collect counts
+ do_check > /dev/null 2>&1 || true
+
+ local ts
+ ts=$(date +%s)
+ cat </dev/null || echo 'default')}"
+ echo "Mode: ${RUN_MODE}"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ echo ""
+
+ check_deps
+
+ if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then
+ print_prometheus
+ return
+ fi
+
+ case "$RUN_MODE" in
+ check) do_check ;;
+ list) do_list ;;
+ metrics) do_metrics ;;
+ alert) do_alert ;;
+ esac
+}
+
+main "$@"
diff --git a/alertmanager-exporter.sh b/alertmanager-exporter.sh
new file mode 100644
index 0000000..b3b49d1
--- /dev/null
+++ b/alertmanager-exporter.sh
@@ -0,0 +1,683 @@
+#!/bin/bash
+################################################################################
+# Script Name: alertmanager-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for Alertmanager operational overview.
+# Queries the Alertmanager API for active alerts, silences,
+# cluster health, and config status. Complements the built-in
+# /metrics endpoint with higher-level operational metrics.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - curl
+# - jq
+# - Alertmanager running and accessible
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# # Output to stdout
+# ./alertmanager-exporter.sh
+#
+# # HTTP server mode
+# ./alertmanager-exporter.sh --http -p 9094
+#
+# # Textfile collector mode
+# ./alertmanager-exporter.sh --textfile
+#
+# Metrics Exported:
+# - alertmanager_overview_up - Exporter status (1=up, 0=down)
+# - alertmanager_overview_info - Alertmanager version info
+# - alertmanager_overview_alerts_active_total - Total active alerts
+# - alertmanager_overview_alerts_by_state - Alerts by state
+# - alertmanager_overview_alerts_by_severity - Alerts by severity
+# - alertmanager_overview_alerts_by_receiver - Alerts by receiver
+# - alertmanager_overview_alert_groups_total - Alert group count
+# - alertmanager_overview_silences_active - Active silences
+# - alertmanager_overview_silences_pending - Pending silences
+# - alertmanager_overview_silences_expired - Expired silences
+# - alertmanager_overview_silence_coverage_ratio - Silence coverage
+# - alertmanager_overview_cluster_peers - Peer count
+# - alertmanager_overview_cluster_peer_healthy - Per-peer health
+# - alertmanager_overview_config_hash - Config hash for drift detection
+# - alertmanager_overview_uptime_seconds - Uptime
+# - alertmanager_overview_last_config_reload_timestamp - Last reload
+# - alertmanager_overview_exporter_duration_seconds - Script duration
+# - alertmanager_overview_exporter_last_run_timestamp - Last run time
+#
+# Configuration:
+# Default HTTP port: 9094
+# Textfile directory: /var/lib/node_exporter
+# Alertmanager URL: http://localhost:9093
+#
+################################################################################
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9094
+AM_URL="http://localhost:9093"
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Check prerequisites
+check_requirements() {
+ local missing=0
+
+ if ! command -v curl >/dev/null 2>&1; then
+ echo "ERROR: curl not found" >&2
+ missing=1
+ fi
+
+ if ! command -v jq >/dev/null 2>&1; then
+ echo "ERROR: jq not found" >&2
+ missing=1
+ fi
+
+ return $missing
+}
+
+# Query an Alertmanager API endpoint
+# Args: $1 - endpoint path (e.g., /api/v2/alerts)
+# Returns: JSON response or empty string on failure
+am_api() {
+ local endpoint="$1"
+ curl -sf --connect-timeout 5 --max-time 10 "${AM_URL}${endpoint}" 2>/dev/null
+}
+
+# ============================================================================
+# METRIC COLLECTION FUNCTIONS
+# ============================================================================
+
+# Get alert counts by state
+# Populates global variables: ALERTS_ACTIVE, ALERTS_SUPPRESSED, ALERTS_UNPROCESSED
+collect_alerts() {
+ local alerts_json
+ alerts_json=$(am_api "/api/v2/alerts")
+
+ if [ -z "$alerts_json" ]; then
+ ALERTS_TOTAL=0
+ ALERTS_ACTIVE=0
+ ALERTS_SUPPRESSED=0
+ ALERTS_UNPROCESSED=0
+ ALERTS_JSON="[]"
+ return 1
+ fi
+
+ ALERTS_JSON="$alerts_json"
+ ALERTS_TOTAL=$(echo "$alerts_json" | jq 'length')
+ ALERTS_ACTIVE=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "active")] | length')
+ ALERTS_SUPPRESSED=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "suppressed")] | length')
+ ALERTS_UNPROCESSED=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "unprocessed")] | length')
+}
+
+# Get alert counts by severity label
+# Returns: metrics lines for each severity
+collect_alerts_by_severity() {
+ local severity count
+
+ for severity in critical warning info; do
+ count=$(echo "$ALERTS_JSON" | jq --arg sev "$severity" \
+ '[.[] | select(.labels.severity == $sev)] | length')
+ echo "alertmanager_overview_alerts_by_severity{severity=\"$severity\"} ${count:-0}"
+ done
+
+ # Count alerts with no severity or other severity values
+ count=$(echo "$ALERTS_JSON" | jq \
+ '[.[] | select(.labels.severity != "critical" and .labels.severity != "warning" and .labels.severity != "info")] | length')
+ if [ "$count" -gt 0 ]; then
+ echo "alertmanager_overview_alerts_by_severity{severity=\"other\"} $count"
+ fi
+}
+
+# Get alert counts by receiver
+collect_alerts_by_receiver() {
+ echo "$ALERTS_JSON" | jq -r '
+ [.[] | .receivers[]?.name // "unknown"] |
+ group_by(.) |
+ map({receiver: .[0], count: length}) |
+ .[] |
+ "alertmanager_overview_alerts_by_receiver{receiver=\"\(.receiver)\"} \(.count)"
+ ' 2>/dev/null
+}
+
+# Get alert group count
+collect_alert_groups() {
+ local groups_json
+ groups_json=$(am_api "/api/v2/alerts/groups")
+
+ if [ -z "$groups_json" ]; then
+ echo "0"
+ return
+ fi
+
+ echo "$groups_json" | jq 'length'
+}
+
+# Get silence counts by state
+collect_silences() {
+ local silences_json
+ silences_json=$(am_api "/api/v2/silences")
+
+ if [ -z "$silences_json" ]; then
+ SILENCES_ACTIVE=0
+ SILENCES_PENDING=0
+ SILENCES_EXPIRED=0
+ return 1
+ fi
+
+ SILENCES_ACTIVE=$(echo "$silences_json" | jq '[.[] | select(.status.state == "active")] | length')
+ SILENCES_PENDING=$(echo "$silences_json" | jq '[.[] | select(.status.state == "pending")] | length')
+ SILENCES_EXPIRED=$(echo "$silences_json" | jq '[.[] | select(.status.state == "expired")] | length')
+}
+
+# Calculate silence coverage ratio
+# Returns: ratio 0.0-1.0 (suppressed / total alerts)
+calculate_silence_coverage() {
+ if [ "$ALERTS_TOTAL" -gt 0 ]; then
+ awk "BEGIN {printf \"%.4f\", $ALERTS_SUPPRESSED / $ALERTS_TOTAL}"
+ else
+ echo "0"
+ fi
+}
+
+# Get cluster status
+collect_cluster_status() {
+ local status_json
+ status_json=$(am_api "/api/v2/status")
+
+ if [ -z "$status_json" ]; then
+ CLUSTER_PEERS=0
+ CLUSTER_STATUS="unknown"
+ AM_VERSION="unknown"
+ AM_UPTIME_SECONDS=0
+ CONFIG_HASH="0"
+ LAST_RELOAD=0
+ return 1
+ fi
+
+ AM_VERSION=$(echo "$status_json" | jq -r '.versionInfo.version // "unknown"')
+
+ # Cluster info
+ # shellcheck disable=SC2034 # reserved for future use
+ CLUSTER_STATUS=$(echo "$status_json" | jq -r '.cluster.status // "disabled"')
+ CLUSTER_PEERS=$(echo "$status_json" | jq '.cluster.peers // [] | length')
+
+ # Peer details (for per-peer health metrics)
+ CLUSTER_PEERS_JSON=$(echo "$status_json" | jq '.cluster.peers // []')
+
+ # Uptime from start time
+ local start_time
+ start_time=$(echo "$status_json" | jq -r '.uptime // empty' 2>/dev/null)
+ if [ -n "$start_time" ]; then
+ local start_epoch now_epoch
+ start_epoch=$(date -d "$start_time" +%s 2>/dev/null || echo 0)
+ now_epoch=$(date +%s)
+ if [ "$start_epoch" -gt 0 ]; then
+ AM_UPTIME_SECONDS=$((now_epoch - start_epoch))
+ else
+ AM_UPTIME_SECONDS=0
+ fi
+ else
+ AM_UPTIME_SECONDS=0
+ fi
+
+ # Config hash — hash the config JSON for drift detection
+ local config_json
+ config_json=$(echo "$status_json" | jq -r '.config.original // ""')
+ if [ -n "$config_json" ]; then
+ CONFIG_HASH=$(echo "$config_json" | sha256sum | awk '{print $1}' | head -c 16)
+ else
+ CONFIG_HASH="0"
+ fi
+
+ # Last config reload — not directly available from /api/v2/status
+ # We'll pull this from the built-in /metrics if reachable
+ local reload_ts
+ reload_ts=$(curl -sf "${AM_URL}/metrics" 2>/dev/null | \
+ grep "^alertmanager_config_last_reload_success_timestamp_seconds" | \
+ awk '{print $2}' | head -1)
+ LAST_RELOAD=${reload_ts:-0}
+}
+
+# Output per-peer health metrics
+output_peer_metrics() {
+ if [ "$CLUSTER_PEERS" -eq 0 ] || [ -z "$CLUSTER_PEERS_JSON" ]; then
+ return
+ fi
+
+ echo "$CLUSTER_PEERS_JSON" | jq -r '
+ .[] |
+ "alertmanager_overview_cluster_peer_healthy{peer=\"\(.address // "unknown")\"} 1"
+ ' 2>/dev/null
+}
+
+# Get notification metrics from built-in /metrics endpoint
+collect_notification_metrics() {
+ local metrics_raw
+ metrics_raw=$(curl -sf "${AM_URL}/metrics" 2>/dev/null)
+
+ if [ -z "$metrics_raw" ]; then
+ return 1
+ fi
+
+ NOTIFICATION_METRICS="$metrics_raw"
+}
+
+# Output notification rate per receiver (from built-in metrics)
+output_notification_rates() {
+ if [ -z "$NOTIFICATION_METRICS" ]; then
+ return
+ fi
+
+ echo "$NOTIFICATION_METRICS" | \
+ grep "^alertmanager_notifications_total{" | \
+ sed 's/alertmanager_notifications_total/alertmanager_overview_notification_rate/' 2>/dev/null
+}
+
+# Output notification failures per receiver (from built-in metrics)
+output_notification_failures() {
+ if [ -z "$NOTIFICATION_METRICS" ]; then
+ return
+ fi
+
+ echo "$NOTIFICATION_METRICS" | \
+ grep "^alertmanager_notifications_failed_total{" | \
+ sed 's/alertmanager_notifications_failed_total/alertmanager_overview_notification_failures/' 2>/dev/null
+}
+
+# Output notification latency per receiver (from built-in metrics)
+output_notification_latency() {
+ if [ -z "$NOTIFICATION_METRICS" ]; then
+ return
+ fi
+
+ # Use the _sum and _count to compute average latency per integration
+ echo "$NOTIFICATION_METRICS" | \
+ grep "^alertmanager_notification_latency_seconds_sum{" | \
+ sed 's/alertmanager_notification_latency_seconds_sum/alertmanager_overview_notification_latency_seconds/' 2>/dev/null
+}
+
+# ============================================================================
+# METRIC OUTPUT
+# ============================================================================
+
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check if Alertmanager is reachable
+ local am_up=1
+ if ! am_api "/api/v2/status" >/dev/null 2>&1; then
+ am_up=0
+ fi
+
+ cat <&2
+ echo "Alertmanager URL: $AM_URL" >&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ while true; do
+ {
+ read -r request
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+Alertmanager Overview Exporter
+
+Alertmanager Overview Exporter v1.0
+Alertmanager URL: $AM_URL
+Metrics
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ if ! check_requirements; then
+ exit 1
+ fi
+
+ if [ "$HTTP_MODE" = true ]; then
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.alertmanager_overview.XXXXXX")
+
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 5 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ chmod 644 "$temp_file"
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ generate_metrics
+ fi
+}
+
+main "$@"
diff --git a/alertmanager-silence-manager.sh b/alertmanager-silence-manager.sh
new file mode 100644
index 0000000..d238c34
--- /dev/null
+++ b/alertmanager-silence-manager.sh
@@ -0,0 +1,848 @@
+#!/bin/bash
+################################################################################
+# Script Name: alertmanager-silence-manager.sh
+# Version: 1.0
+# Description: CLI tool for managing Prometheus Alertmanager silences.
+# Create, bulk-create, extend, expire, list, audit, and export
+# silences via the Alertmanager API v2. Supports dry-run mode,
+# pattern-based operations, and YAML bulk silence files.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - curl
+# - jq
+# - Alertmanager running and accessible
+#
+# Usage:
+# # Create a single silence
+# ./alertmanager-silence-manager.sh create --matcher 'alertname=HighCPU' --duration 2h --comment "Maintenance"
+#
+# # Bulk create from YAML
+# ./alertmanager-silence-manager.sh bulk-create --file maintenance.yaml
+#
+# # List active silences
+# ./alertmanager-silence-manager.sh list --state active
+#
+# # Extend a silence
+# ./alertmanager-silence-manager.sh extend --id abc12345 --duration 1h
+#
+# # Expire a silence
+# ./alertmanager-silence-manager.sh expire --id abc12345
+#
+# # Export active silences to YAML
+# ./alertmanager-silence-manager.sh export --output silences.yaml
+#
+# # Audit silences
+# ./alertmanager-silence-manager.sh audit
+#
+# Configuration:
+# ALERTMANAGER_URL Alertmanager base URL (default: http://localhost:9093)
+# SILENCE_AUTHOR Author name for silences (default: current user)
+# SILENCE_COMMENT_PREFIX Prefix for all silence comments (default: none)
+#
+################################################################################
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+AM_URL="${ALERTMANAGER_URL:-http://localhost:9093}"
+AUTHOR="${SILENCE_AUTHOR:-$(whoami)}"
+COMMENT_PREFIX="${SILENCE_COMMENT_PREFIX:-}"
+DRY_RUN=false
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat < [OPTIONS]
+
+Manage Prometheus Alertmanager silences via the API.
+
+COMMANDS:
+ create Create a single silence
+ bulk-create Create silences from a YAML file
+ list List silences in table format
+ extend Extend a silence by duration
+ expire Expire silences by ID or pattern
+ export Export active silences to YAML
+ audit Show detailed silence audit info
+
+CREATE OPTIONS:
+ --matcher STR Label matcher (e.g., 'alertname=HighCPU'), repeatable
+ --duration STR Duration (e.g., 2h, 30m, 1d)
+ --comment STR Silence comment/reason
+ --dry-run Preview without creating
+
+BULK-CREATE OPTIONS:
+ --file PATH Path to YAML silence definitions
+ --dry-run Preview without creating
+
+LIST OPTIONS:
+ --state STR Filter: active, pending, expired, all (default: active)
+
+EXTEND OPTIONS:
+ --id ID Silence ID to extend
+ --duration STR Additional duration (e.g., 1h)
+ --dry-run Preview without extending
+
+EXPIRE OPTIONS:
+ --id ID Silence ID to expire
+ --match STR Pattern match on comment (e.g., 'comment=~maintenance.*')
+ --dry-run Preview without expiring
+
+EXPORT OPTIONS:
+ --output PATH Output file (default: stdout)
+
+ENVIRONMENT VARIABLES:
+ ALERTMANAGER_URL Base URL (default: http://localhost:9093)
+ SILENCE_AUTHOR Author name (default: current user)
+ SILENCE_COMMENT_PREFIX Prefix for comments
+
+EXAMPLES:
+ $0 create --matcher 'alertname=HighCPU' --matcher 'instance=web-01' --duration 2h --comment "Maintenance"
+ $0 bulk-create --file maintenance-window.yaml --dry-run
+ $0 list --state active
+ $0 extend --id 4a2f8c3e --duration 1h
+ $0 expire --match 'comment=~maintenance.*'
+ $0 export --output silences-backup.yaml
+ $0 audit
+
+EOF
+ exit 0
+}
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+log_dry_run() { echo -e "${YELLOW}[DRY RUN]${NC} $*"; }
+
+check_requirements() {
+ local missing=0
+
+ if ! command -v curl >/dev/null 2>&1; then
+ log_error "curl not found"
+ missing=1
+ fi
+
+ if ! command -v jq >/dev/null 2>&1; then
+ log_error "jq not found"
+ missing=1
+ fi
+
+ return $missing
+}
+
+check_connectivity() {
+ local status
+ status=$(curl -sf --connect-timeout 5 --max-time 10 -o /dev/null -w "%{http_code}" "${AM_URL}/api/v2/status" 2>/dev/null)
+
+ if [ "$status" != "200" ]; then
+ log_error "Cannot reach Alertmanager at ${AM_URL} (HTTP $status)"
+ exit 1
+ fi
+}
+
+# Query Alertmanager API
+# Args: $1 - method, $2 - endpoint, $3 - data (optional)
+am_api() {
+ local method="$1" endpoint="$2" data="$3"
+
+ if [ -n "$data" ]; then
+ curl -sf --connect-timeout 5 --max-time 15 \
+ -X "$method" \
+ -H "Content-Type: application/json" \
+ -d "$data" \
+ "${AM_URL}${endpoint}" 2>/dev/null
+ else
+ curl -sf --connect-timeout 5 --max-time 15 \
+ -X "$method" \
+ "${AM_URL}${endpoint}" 2>/dev/null
+ fi
+}
+
+# Parse duration string to seconds
+# Supports: 30s, 5m, 2h, 1d
+parse_duration() {
+ local input="$1"
+ local num unit
+
+ num=$(echo "$input" | sed 's/[^0-9]//g')
+ unit=$(echo "$input" | sed 's/[0-9]//g')
+
+ case "$unit" in
+ s) echo "$num" ;;
+ m) echo $((num * 60)) ;;
+ h) echo $((num * 3600)) ;;
+ d) echo $((num * 86400)) ;;
+ *) log_error "Invalid duration unit: $unit (use s/m/h/d)"; return 1 ;;
+ esac
+}
+
+# Get ISO 8601 timestamp for now + offset seconds
+# Args: $1 - offset in seconds (default: 0)
+iso_timestamp() {
+ local offset="${1:-0}"
+ date -u -d "+${offset} seconds" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null || \
+ date -u -v "+${offset}S" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null
+}
+
+# Parse a matcher string like 'alertname=HighCPU' or 'instance=~web-0[1-3]'
+# Returns JSON object
+parse_matcher() {
+ local input="$1"
+ local name value is_regex is_equal
+
+ if [[ "$input" == *"=~"* ]]; then
+ name="${input%%=~*}"
+ value="${input#*=~}"
+ is_regex="true"
+ is_equal="true"
+ elif [[ "$input" == *"!~"* ]]; then
+ name="${input%%!~*}"
+ value="${input#*!~}"
+ is_regex="true"
+ is_equal="false"
+ elif [[ "$input" == *"!="* ]]; then
+ name="${input%%!=*}"
+ value="${input#*!=}"
+ is_regex="false"
+ is_equal="false"
+ elif [[ "$input" == *"="* ]]; then
+ name="${input%%=*}"
+ value="${input#*=}"
+ is_regex="false"
+ is_equal="true"
+ else
+ log_error "Invalid matcher format: $input"
+ return 1
+ fi
+
+ jq -n --arg n "$name" --arg v "$value" \
+ --argjson r "$is_regex" --argjson e "$is_equal" \
+ '{name: $n, value: $v, isRegex: $r, isEqual: $e}'
+}
+
+# Truncate string to max length
+truncate() {
+ local str="$1" max="${2:-30}"
+ if [ ${#str} -gt "$max" ]; then
+ echo "${str:0:$((max-2))}.."
+ else
+ echo "$str"
+ fi
+}
+
+# ============================================================================
+# CREATE COMMAND
+# ============================================================================
+
+cmd_create() {
+ local matchers_json="[]"
+ local duration=""
+ local comment=""
+
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --matcher)
+ local m
+ m=$(parse_matcher "$2") || exit 1
+ matchers_json=$(echo "$matchers_json" | jq --argjson m "$m" '. + [$m]')
+ shift 2 ;;
+ --duration) duration="$2"; shift 2 ;;
+ --comment) comment="$2"; shift 2 ;;
+ --dry-run) DRY_RUN=true; shift ;;
+ *) log_error "Unknown option for create: $1"; exit 1 ;;
+ esac
+ done
+
+ if [ "$(echo "$matchers_json" | jq 'length')" -eq 0 ]; then
+ log_error "At least one --matcher is required"
+ exit 1
+ fi
+
+ if [ -z "$duration" ]; then
+ log_error "--duration is required"
+ exit 1
+ fi
+
+ if [ -z "$comment" ]; then
+ log_error "--comment is required"
+ exit 1
+ fi
+
+ local duration_secs
+ duration_secs=$(parse_duration "$duration") || exit 1
+
+ local starts_at ends_at
+ starts_at=$(iso_timestamp 0)
+ ends_at=$(iso_timestamp "$duration_secs")
+
+ local full_comment="${COMMENT_PREFIX}${comment}"
+
+ local payload
+ payload=$(jq -n \
+ --argjson matchers "$matchers_json" \
+ --arg startsAt "$starts_at" \
+ --arg endsAt "$ends_at" \
+ --arg createdBy "$AUTHOR" \
+ --arg comment "$full_comment" \
+ '{matchers: $matchers, startsAt: $startsAt, endsAt: $endsAt, createdBy: $createdBy, comment: $comment}')
+
+ if [ "$DRY_RUN" = true ]; then
+ log_dry_run "Would create silence:"
+ echo "$payload" | jq .
+ return
+ fi
+
+ local response
+ response=$(am_api POST "/api/v2/silences" "$payload")
+
+ if [ $? -eq 0 ] && [ -n "$response" ]; then
+ local sid
+ sid=$(echo "$response" | jq -r '.silenceID // empty')
+ if [ -n "$sid" ]; then
+ log_info "Silence created: ${sid}"
+ else
+ log_info "Silence created"
+ fi
+ else
+ log_error "Failed to create silence"
+ exit 1
+ fi
+}
+
+# ============================================================================
+# BULK-CREATE COMMAND
+# ============================================================================
+
+cmd_bulk_create() {
+ local file=""
+
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --file) file="$2"; shift 2 ;;
+ --dry-run) DRY_RUN=true; shift ;;
+ *) log_error "Unknown option for bulk-create: $1"; exit 1 ;;
+ esac
+ done
+
+ if [ -z "$file" ] || [ ! -f "$file" ]; then
+ log_error "Valid --file is required"
+ exit 1
+ fi
+
+ local success=0 failed=0
+ local in_silence=false
+ local in_matchers=false
+ local matchers_json="[]"
+ local duration="" comment=""
+ local current_matcher_name="" current_matcher_value="" current_matcher_regex="false"
+
+ flush_silence() {
+ if [ -z "$duration" ] || [ "$(echo "$matchers_json" | jq 'length')" -eq 0 ]; then
+ return
+ fi
+
+ local duration_secs starts_at ends_at full_comment payload
+ duration_secs=$(parse_duration "$duration") || { ((failed++)); return; }
+ starts_at=$(iso_timestamp 0)
+ ends_at=$(iso_timestamp "$duration_secs")
+ full_comment="${COMMENT_PREFIX}${comment}"
+
+ payload=$(jq -n \
+ --argjson matchers "$matchers_json" \
+ --arg startsAt "$starts_at" \
+ --arg endsAt "$ends_at" \
+ --arg createdBy "$AUTHOR" \
+ --arg comment "$full_comment" \
+ '{matchers: $matchers, startsAt: $startsAt, endsAt: $endsAt, createdBy: $createdBy, comment: $comment}')
+
+ if [ "$DRY_RUN" = true ]; then
+ log_dry_run "Would create silence:"
+ echo "$payload" | jq .
+ echo ""
+ ((success++))
+ return
+ fi
+
+ local response
+ response=$(am_api POST "/api/v2/silences" "$payload")
+
+ if [ $? -eq 0 ] && [ -n "$response" ]; then
+ local sid
+ sid=$(echo "$response" | jq -r '.silenceID // empty')
+ log_info "Silence created: ${sid:-ok} (${comment})"
+ ((success++))
+ else
+ log_error "Failed to create silence: ${comment}"
+ ((failed++))
+ fi
+ }
+
+ flush_matcher() {
+ if [ -n "$current_matcher_name" ]; then
+ local m
+ m=$(jq -n --arg n "$current_matcher_name" --arg v "$current_matcher_value" \
+ --argjson r "$current_matcher_regex" \
+ '{name: $n, value: $v, isRegex: $r, isEqual: true}')
+ matchers_json=$(echo "$matchers_json" | jq --argjson m "$m" '. + [$m]')
+ current_matcher_name=""
+ current_matcher_value=""
+ current_matcher_regex="false"
+ fi
+ }
+
+ while IFS= read -r line || [ -n "$line" ]; do
+ # Strip leading/trailing whitespace for comparison
+ local trimmed
+ trimmed=$(echo "$line" | sed 's/^[[:space:]]*//' | sed 's/[[:space:]]*$//')
+
+ # Skip comments and empty lines
+ [[ "$trimmed" =~ ^# ]] && continue
+ [ -z "$trimmed" ] && continue
+
+ # New silence block
+ if [[ "$trimmed" == "- matchers:" ]]; then
+ flush_matcher
+ if [ "$in_silence" = true ]; then
+ flush_silence
+ fi
+ in_silence=true
+ in_matchers=true
+ matchers_json="[]"
+ duration=""
+ comment=""
+ continue
+ fi
+
+ # Matcher entry
+ if [[ "$trimmed" == "- name:"* ]] && [ "$in_matchers" = true ]; then
+ flush_matcher
+ current_matcher_name=$(echo "$trimmed" | sed 's/^- name:[[:space:]]*//')
+ continue
+ fi
+
+ if [[ "$trimmed" == "value:"* ]] && [ "$in_matchers" = true ]; then
+ current_matcher_value=$(echo "$trimmed" | sed 's/^value:[[:space:]]*//')
+ continue
+ fi
+
+ if [[ "$trimmed" == "isRegex:"* ]] && [ "$in_matchers" = true ]; then
+ current_matcher_regex=$(echo "$trimmed" | sed 's/^isRegex:[[:space:]]*//')
+ continue
+ fi
+
+ # Duration
+ if [[ "$trimmed" == "duration:"* ]]; then
+ flush_matcher
+ in_matchers=false
+ duration=$(echo "$trimmed" | sed 's/^duration:[[:space:]]*//')
+ continue
+ fi
+
+ # Comment
+ if [[ "$trimmed" == "comment:"* ]]; then
+ comment=$(echo "$trimmed" | sed 's/^comment:[[:space:]]*//' | sed 's/^"//' | sed 's/"$//')
+ continue
+ fi
+
+ done < "$file"
+
+ # Flush last silence
+ flush_matcher
+ if [ "$in_silence" = true ]; then
+ flush_silence
+ fi
+
+ echo ""
+ log_info "Bulk create complete: ${success} succeeded, ${failed} failed"
+}
+
+# ============================================================================
+# LIST COMMAND
+# ============================================================================
+
+cmd_list() {
+ local state="active"
+
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --state) state="$2"; shift 2 ;;
+ *) log_error "Unknown option for list: $1"; exit 1 ;;
+ esac
+ done
+
+ local silences_json
+ silences_json=$(am_api GET "/api/v2/silences")
+
+ if [ -z "$silences_json" ]; then
+ log_error "Failed to fetch silences"
+ exit 1
+ fi
+
+ # Filter by state
+ local filtered
+ if [ "$state" = "all" ]; then
+ filtered="$silences_json"
+ else
+ filtered=$(echo "$silences_json" | jq --arg s "$state" '[.[] | select(.status.state == $s)]')
+ fi
+
+ local count
+ count=$(echo "$filtered" | jq 'length')
+
+ if [ "$count" -eq 0 ]; then
+ log_info "No ${state} silences found"
+ return
+ fi
+
+ printf "${BLUE}%-10s %-10s %-12s %-30s %-20s %-20s %s${NC}\n" \
+ "ID" "STATE" "AUTHOR" "MATCHERS" "STARTS" "ENDS" "COMMENT"
+ printf '%.0s-' {1..120}
+ echo ""
+
+ echo "$filtered" | jq -r '.[] | [
+ .id[0:8],
+ .status.state,
+ .createdBy,
+ ([.matchers[] | "\(.name)=\(.value)"] | join(", ")),
+ .startsAt[0:19],
+ .endsAt[0:19],
+ .comment
+ ] | @tsv' | while IFS=$'\t' read -r id st author matchers starts ends comment; do
+ printf "%-10s %-10s %-12s %-30s %-20s %-20s %s\n" \
+ "$id" "$st" "$(truncate "$author" 12)" "$(truncate "$matchers" 30)" \
+ "$starts" "$ends" "$(truncate "$comment" 40)"
+ done
+
+ echo ""
+ log_info "${count} silence(s) found"
+}
+
+# ============================================================================
+# EXTEND COMMAND
+# ============================================================================
+
+cmd_extend() {
+ local silence_id="" duration=""
+
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --id) silence_id="$2"; shift 2 ;;
+ --duration) duration="$2"; shift 2 ;;
+ --dry-run) DRY_RUN=true; shift ;;
+ *) log_error "Unknown option for extend: $1"; exit 1 ;;
+ esac
+ done
+
+ if [ -z "$silence_id" ]; then
+ log_error "--id is required"
+ exit 1
+ fi
+
+ if [ -z "$duration" ]; then
+ log_error "--duration is required"
+ exit 1
+ fi
+
+ local duration_secs
+ duration_secs=$(parse_duration "$duration") || exit 1
+
+ # Find the silence (match by prefix)
+ local silences_json silence
+ silences_json=$(am_api GET "/api/v2/silences")
+ silence=$(echo "$silences_json" | jq --arg id "$silence_id" '[.[] | select(.id | startswith($id)) | select(.status.state == "active")] | first // empty')
+
+ if [ -z "$silence" ] || [ "$silence" = "null" ]; then
+ log_error "Active silence not found matching ID: ${silence_id}"
+ exit 1
+ fi
+
+ local full_id
+ full_id=$(echo "$silence" | jq -r '.id')
+
+ # Build new silence with extended endsAt
+ local new_ends_at
+ new_ends_at=$(iso_timestamp "$duration_secs")
+
+ local payload
+ payload=$(echo "$silence" | jq --arg endsAt "$new_ends_at" \
+ 'del(.id, .status, .updatedAt) | .endsAt = $endsAt')
+
+ if [ "$DRY_RUN" = true ]; then
+ log_dry_run "Would extend silence ${full_id} to ${new_ends_at}:"
+ echo "$payload" | jq .
+ return
+ fi
+
+ local response
+ response=$(am_api POST "/api/v2/silences" "$payload")
+
+ if [ $? -eq 0 ] && [ -n "$response" ]; then
+ local new_id
+ new_id=$(echo "$response" | jq -r '.silenceID // empty')
+ log_info "Silence extended: ${new_id:-ok} (new end: ${new_ends_at})"
+ else
+ log_error "Failed to extend silence"
+ exit 1
+ fi
+}
+
+# ============================================================================
+# EXPIRE COMMAND
+# ============================================================================
+
+cmd_expire() {
+ local silence_id="" match_pattern=""
+
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --id) silence_id="$2"; shift 2 ;;
+ --match) match_pattern="$2"; shift 2 ;;
+ --dry-run) DRY_RUN=true; shift ;;
+ *) log_error "Unknown option for expire: $1"; exit 1 ;;
+ esac
+ done
+
+ if [ -z "$silence_id" ] && [ -z "$match_pattern" ]; then
+ log_error "Either --id or --match is required"
+ exit 1
+ fi
+
+ local silences_json
+ silences_json=$(am_api GET "/api/v2/silences")
+
+ if [ -z "$silences_json" ]; then
+ log_error "Failed to fetch silences"
+ exit 1
+ fi
+
+ local ids_to_expire=()
+
+ if [ -n "$silence_id" ]; then
+ # Match by ID prefix
+ local matched
+ matched=$(echo "$silences_json" | jq -r --arg id "$silence_id" \
+ '.[] | select(.id | startswith($id)) | select(.status.state == "active") | .id')
+ while IFS= read -r id; do
+ [ -n "$id" ] && ids_to_expire+=("$id")
+ done <<< "$matched"
+ fi
+
+ if [ -n "$match_pattern" ]; then
+ # Parse pattern: 'comment=~regex'
+ local field pattern
+ if [[ "$match_pattern" == *"=~"* ]]; then
+ field="${match_pattern%%=~*}"
+ pattern="${match_pattern#*=~}"
+ else
+ log_error "Match pattern must use =~ syntax (e.g., 'comment=~maintenance.*')"
+ exit 1
+ fi
+
+ local matched
+ if [ "$field" = "comment" ]; then
+ matched=$(echo "$silences_json" | jq -r --arg p "$pattern" \
+ '.[] | select(.status.state == "active") | select(.comment | test($p)) | .id')
+ else
+ matched=$(echo "$silences_json" | jq -r --arg f "$field" --arg p "$pattern" \
+ '.[] | select(.status.state == "active") | select(.matchers[] | select(.name == $f) | .value | test($p)) | .id')
+ fi
+
+ while IFS= read -r id; do
+ [ -n "$id" ] && ids_to_expire+=("$id")
+ done <<< "$matched"
+ fi
+
+ if [ ${#ids_to_expire[@]} -eq 0 ]; then
+ log_warn "No matching active silences found"
+ return
+ fi
+
+ local success=0 failed=0
+
+ for id in "${ids_to_expire[@]}"; do
+ if [ "$DRY_RUN" = true ]; then
+ log_dry_run "Would expire silence: ${id}"
+ ((success++))
+ continue
+ fi
+
+ if curl -sf --connect-timeout 5 --max-time 10 \
+ -X DELETE "${AM_URL}/api/v2/silence/${id}" >/dev/null 2>&1; then
+ log_info "Expired silence: ${id}"
+ ((success++))
+ else
+ log_error "Failed to expire silence: ${id}"
+ ((failed++))
+ fi
+ done
+
+ echo ""
+ log_info "Expire complete: ${success} succeeded, ${failed} failed"
+}
+
+# ============================================================================
+# EXPORT COMMAND
+# ============================================================================
+
+cmd_export() {
+ local output=""
+
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --output) output="$2"; shift 2 ;;
+ *) log_error "Unknown option for export: $1"; exit 1 ;;
+ esac
+ done
+
+ local silences_json
+ silences_json=$(am_api GET "/api/v2/silences")
+
+ if [ -z "$silences_json" ]; then
+ log_error "Failed to fetch silences"
+ exit 1
+ fi
+
+ local active
+ active=$(echo "$silences_json" | jq '[.[] | select(.status.state == "active")]')
+ local count
+ count=$(echo "$active" | jq 'length')
+
+ if [ "$count" -eq 0 ]; then
+ log_warn "No active silences to export"
+ return
+ fi
+
+ local yaml_output
+ yaml_output=$(echo "$active" | jq -r '
+ "silences:",
+ (.[] |
+ " - matchers:",
+ (.matchers[] |
+ " - name: \(.name)",
+ " value: \(.value)",
+ " isRegex: \(.isRegex)"
+ ),
+ " duration: \(
+ ((.endsAt | fromdateiso8601) - (.startsAt | fromdateiso8601)) |
+ if . >= 86400 then "\(. / 86400 | floor)d"
+ elif . >= 3600 then "\(. / 3600 | floor)h"
+ elif . >= 60 then "\(. / 60 | floor)m"
+ else "\(.)s"
+ end
+ )",
+ " comment: \"\(.comment)\""
+ )
+ ')
+
+ if [ -n "$output" ]; then
+ echo "$yaml_output" > "$output"
+ log_info "Exported ${count} silence(s) to ${output}"
+ else
+ echo "$yaml_output"
+ fi
+}
+
+# ============================================================================
+# AUDIT COMMAND
+# ============================================================================
+
+cmd_audit() {
+ local silences_json
+ silences_json=$(am_api GET "/api/v2/silences")
+
+ if [ -z "$silences_json" ]; then
+ log_error "Failed to fetch silences"
+ exit 1
+ fi
+
+ local active
+ active=$(echo "$silences_json" | jq '[.[] | select(.status.state == "active")]')
+ local count
+ count=$(echo "$active" | jq 'length')
+
+ if [ "$count" -eq 0 ]; then
+ log_info "No active silences"
+ return
+ fi
+
+ printf "${BLUE}%-10s %-12s %-20s %-20s %-10s %-28s %s${NC}\n" \
+ "ID" "AUTHOR" "CREATED" "EXPIRES" "DURATION" "MATCHERS" "COMMENT"
+ printf '%.0s-' {1..130}
+ echo ""
+
+ echo "$active" | jq -r '.[] | [
+ .id[0:8],
+ .createdBy,
+ .startsAt[0:19],
+ .endsAt[0:19],
+ (
+ ((.endsAt | fromdateiso8601) - (.startsAt | fromdateiso8601)) |
+ if . >= 86400 then "\(. / 86400 | floor)d"
+ elif . >= 3600 then "\(. / 3600 | floor)h"
+ elif . >= 60 then "\(. / 60 | floor)m"
+ else "\(.)s"
+ end
+ ),
+ ([.matchers[] | "\(.name)=\(.value)"] | join(", ")),
+ .comment
+ ] | @tsv' | while IFS=$'\t' read -r id author created expires duration matchers comment; do
+ printf "%-10s %-12s %-20s %-20s %-10s %-28s %s\n" \
+ "$id" "$(truncate "$author" 12)" "$created" "$expires" "$duration" \
+ "$(truncate "$matchers" 28)" "$(truncate "$comment" 40)"
+ done
+
+ echo ""
+ log_info "${count} active silence(s)"
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ if [ $# -eq 0 ]; then
+ show_usage
+ fi
+
+ check_requirements || exit 1
+
+ local command="$1"
+ shift
+
+ case "$command" in
+ -h|--help) show_usage ;;
+ create|bulk-create|list|extend|expire|export|audit) ;;
+ *) log_error "Unknown command: $command"; echo ""; show_usage ;;
+ esac
+
+ # All commands except --help require connectivity
+ check_connectivity
+
+ case "$command" in
+ create) cmd_create "$@" ;;
+ bulk-create) cmd_bulk_create "$@" ;;
+ list) cmd_list "$@" ;;
+ extend) cmd_extend "$@" ;;
+ expire) cmd_expire "$@" ;;
+ export) cmd_export "$@" ;;
+ audit) cmd_audit "$@" ;;
+ esac
+}
+
+main "$@"
diff --git a/alloy-config-generator.sh b/alloy-config-generator.sh
new file mode 100755
index 0000000..dd1d5bc
--- /dev/null
+++ b/alloy-config-generator.sh
@@ -0,0 +1,576 @@
+#!/usr/bin/env bash
+#
+# Alloy Config Generator
+#
+# Interactive script that generates a Grafana Alloy configuration
+# file based on your environment. Asks what backends you use, what
+# signals to collect, and what services to monitor, then outputs
+# a working config.alloy ready to deploy.
+#
+# Usage:
+# ./alloy-config-generator.sh
+# ./alloy-config-generator.sh -o /etc/alloy/config.alloy
+# ./alloy-config-generator.sh --non-interactive --metrics --logs --prometheus-url http://mimir:9009
+#
+# Parameters:
+# -o, --output FILE Write config to file (default: stdout)
+# --non-interactive Skip prompts, use flags and defaults
+# --metrics Enable host metrics collection
+# --logs Enable log collection
+# --traces Enable OTLP trace collection
+# --journald Enable journald log collection
+# --docker Enable Docker container log collection
+# --nginx Enable nginx log collection
+# --prometheus-url URL Prometheus/Mimir remote_write URL
+# --loki-url URL Loki push URL
+# --tempo-url URL Tempo OTLP endpoint (host:port)
+# --hostname NAME Hostname label for metrics and logs
+# --scrape-targets LIST Comma-separated host:port targets to scrape
+# --help Show usage
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+# Version: 1.0
+
+set -euo pipefail
+
+# --- Configuration ---
+readonly VERSION="1.0"
+readonly SCRIPT_NAME="$(basename "$0")"
+
+# Defaults
+OUTPUT=""
+NON_INTERACTIVE=false
+ENABLE_METRICS=false
+ENABLE_LOGS=false
+ENABLE_TRACES=false
+ENABLE_JOURNALD=false
+ENABLE_DOCKER=false
+ENABLE_NGINX=false
+PROMETHEUS_URL=""
+LOKI_URL=""
+TEMPO_URL=""
+HOSTNAME=""
+SCRAPE_TARGETS=""
+
+# --- Functions ---
+
+usage() {
+ cat </dev/null || echo "server")
+ ask "Hostname label" "$detected_hostname" HOSTNAME
+ echo ""
+
+ # --- Backends ---
+ echo "== Backends =="
+ echo ""
+
+ ask "Prometheus/Mimir remote_write URL (leave empty to skip metrics)" "http://prometheus:9090/api/v1/write" PROMETHEUS_URL
+ if [[ -n "$PROMETHEUS_URL" ]]; then
+ ENABLE_METRICS=true
+ fi
+
+ ask "Loki push URL (leave empty to skip logs)" "http://loki:3100/loki/api/v1/push" LOKI_URL
+ if [[ -n "$LOKI_URL" ]]; then
+ ENABLE_LOGS=true
+ fi
+
+ ask "Tempo OTLP endpoint host:port (leave empty to skip traces)" "" TEMPO_URL
+ if [[ -n "$TEMPO_URL" ]]; then
+ ENABLE_TRACES=true
+ fi
+
+ echo ""
+
+ # --- Metrics options ---
+ if [[ "$ENABLE_METRICS" == true ]]; then
+ echo "== Metrics =="
+ echo ""
+
+ local extra_targets=""
+ ask "Additional Prometheus scrape targets (comma-separated host:port, or empty)" "" extra_targets
+ if [[ -n "$extra_targets" ]]; then
+ SCRAPE_TARGETS="$extra_targets"
+ fi
+ echo ""
+ fi
+
+ # --- Log options ---
+ if [[ "$ENABLE_LOGS" == true ]]; then
+ echo "== Logs =="
+ echo ""
+ ask_yn "Collect journald logs?" "y" ENABLE_JOURNALD
+ ask_yn "Collect Docker container logs?" "n" ENABLE_DOCKER
+ ask_yn "Collect nginx logs?" "n" ENABLE_NGINX
+ echo ""
+ fi
+}
+
+# --- Config generation functions ---
+
+generate_header() {
+ cat <\\\\S+) (?P\\\\S+) (?P\\\\S+)\" (?P\\\\d+) (?P\\\\d+)"
+ }
+
+ stage.labels {
+ values = {
+ method = "",
+ status = "",
+ }
+ }
+}
+
+EOF
+}
+
+generate_logs_write() {
+ if [[ "$ENABLE_LOGS" != true ]]; then
+ return
+ fi
+
+ cat <&2; usage ;;
+ esac
+ done
+
+ # Set hostname default
+ if [[ -z "$HOSTNAME" ]]; then
+ HOSTNAME=$(hostname -s 2>/dev/null || echo "server")
+ fi
+
+ # Set backend URL defaults for non-interactive mode
+ if [[ "$NON_INTERACTIVE" == true ]]; then
+ [[ "$ENABLE_METRICS" == true && -z "$PROMETHEUS_URL" ]] && PROMETHEUS_URL="http://prometheus:9090/api/v1/write"
+ [[ "$ENABLE_LOGS" == true && -z "$LOKI_URL" ]] && LOKI_URL="http://loki:3100/loki/api/v1/push"
+ [[ "$ENABLE_TRACES" == true && -z "$TEMPO_URL" ]] && TEMPO_URL="tempo:4317"
+ fi
+
+ # Interactive mode
+ if [[ "$NON_INTERACTIVE" != true ]]; then
+ interactive_setup
+ fi
+
+ # Check at least one signal is enabled
+ if [[ "$ENABLE_METRICS" != true && "$ENABLE_LOGS" != true && "$ENABLE_TRACES" != true ]]; then
+ echo "ERROR: No signals enabled. Enable at least one of: metrics, logs, traces" >&2
+ exit 1
+ fi
+
+ # Generate config
+ if [[ -n "$OUTPUT" ]]; then
+ generate_config > "$OUTPUT"
+ echo ""
+ echo "Config written to: $OUTPUT"
+ echo ""
+ echo "Signals enabled:"
+ [[ "$ENABLE_METRICS" == true ]] && echo " ✓ Metrics → $PROMETHEUS_URL"
+ [[ "$ENABLE_LOGS" == true ]] && echo " ✓ Logs → $LOKI_URL"
+ [[ "$ENABLE_TRACES" == true ]] && echo " ✓ Traces → $TEMPO_URL"
+ echo ""
+ echo "Next steps:"
+ echo " 1. Review the config: cat $OUTPUT"
+ echo " 2. Validate syntax: alloy fmt $OUTPUT"
+ echo " 3. Test it: alloy run $OUTPUT"
+ echo " 4. Deploy: sudo cp $OUTPUT /etc/alloy/config.alloy && sudo systemctl restart alloy"
+ else
+ generate_config
+ fi
+}
+
+main "$@"
diff --git a/ami-lifecycle-manager.sh b/ami-lifecycle-manager.sh
new file mode 100644
index 0000000..a6a95db
--- /dev/null
+++ b/ami-lifecycle-manager.sh
@@ -0,0 +1,670 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### ami-lifecycle-manager.sh — AWS AMI lifecycle management ####
+#### Create, tag, retain, and deregister AMIs with orphan snapshot cleanup ####
+#### Requires: bash 4+, aws-cli v2, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./ami-lifecycle-manager.sh --create --instance-id i-1234567890abcdef0 ####
+#### ./ami-lifecycle-manager.sh --enforce --retention-days 30 ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+AWS_REGION="${AWS_REGION:-}"
+AWS_PROFILE_OPT="${AWS_PROFILE:-}"
+AMI_RETENTION_DAYS="${AMI_RETENTION_DAYS:-30}"
+AMI_NAME_PREFIX="${AMI_NAME_PREFIX:-ami-lifecycle}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+DRY_RUN="false"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+RUN_MODE=""
+INSTANCE_ID=""
+RETENTION_DAYS="$AMI_RETENTION_DAYS"
+START_TIME=""
+MANAGER_TAG="ami-lifecycle-manager"
+
+# ── Colors ────────────────────────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "auto" && ! -t 1 ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ RED="\033[0;31m"
+ GREEN="\033[0;32m"
+ YELLOW="\033[0;33m"
+ BLUE="\033[0;34m"
+ BOLD="\033[1m"
+ DIM="\033[2m"
+ RESET="\033[0m"
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log_info() { printf "${GREEN}[INFO]${RESET} %s\n" "$*"; }
+log_warn() { printf "${YELLOW}[WARN]${RESET} %s\n" "$*" >&2; }
+log_error() { printf "${RED}[ERROR]${RESET} %s\n" "$*" >&2; }
+log_verbose() { [[ "$VERBOSE" == "true" ]] && printf "${DIM}[DEBUG] %s${RESET}\n" "$*"; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+die() { log_error "$@"; exit 1; }
+
+today_utc() { date -u +%Y-%m-%d; }
+
+epoch_from_date() {
+ local d="$1"
+ if date --version >/dev/null 2>&1; then
+ date -d "$d" +%s
+ else
+ date -j -f "%Y-%m-%d" "$d" +%s
+ fi
+}
+
+days_since() {
+ local created="$1"
+ local now
+ now=$(date -u +%s)
+ local then
+ then=$(epoch_from_date "$created")
+ echo $(( (now - then) / 86400 ))
+}
+
+date_offset_days() {
+ local base="$1" offset="$2"
+ if date --version >/dev/null 2>&1; then
+ date -d "${base} +${offset} days" +%Y-%m-%d
+ else
+ date -j -v+"${offset}d" -f "%Y-%m-%d" "$base" +%Y-%m-%d
+ fi
+}
+
+# ── AWS CLI wrapper ───────────────────────────────────────────────────
+aws_cmd() {
+ local args=("$@")
+ [[ -n "$AWS_REGION" ]] && args+=(--region "$AWS_REGION")
+ [[ -n "$AWS_PROFILE_OPT" ]] && args+=(--profile "$AWS_PROFILE_OPT")
+ log_verbose "aws ${args[*]}"
+ aws "${args[@]}"
+}
+
+# ── Dependency check ─────────────────────────────────────────────────
+check_deps() {
+ local missing=()
+ command -v aws >/dev/null 2>&1 || missing+=("aws-cli")
+ command -v jq >/dev/null 2>&1 || missing+=("jq")
+ if (( ${#missing[@]} > 0 )); then
+ die "Missing required tools: ${missing[*]}"
+ fi
+
+ local bash_major="${BASH_VERSINFO[0]}"
+ if (( bash_major < 4 )); then
+ die "Requires bash 4+, found ${BASH_VERSION}"
+ fi
+
+ # Verify AWS credentials
+ if ! aws_cmd sts get-caller-identity --output text >/dev/null 2>&1; then
+ die "AWS credentials not configured or expired"
+ fi
+
+ # Determine region
+ if [[ -z "$AWS_REGION" ]]; then
+ AWS_REGION=$(aws configure get region 2>/dev/null || echo "")
+ if [[ -z "$AWS_REGION" ]]; then
+ die "AWS_REGION is required (set via env var, --region, or aws configure)"
+ fi
+ fi
+}
+
+# ── Header ────────────────────────────────────────────────────────────
+print_header() {
+ local account_id
+ account_id=$(aws_cmd sts get-caller-identity --query Account --output text 2>/dev/null || echo "unknown")
+
+ echo "AMI Lifecycle Manager"
+ echo "Account: $account_id"
+ echo "Region: $AWS_REGION"
+ echo "Mode: $RUN_MODE"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ echo ""
+}
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat < 0 )); do
+ case "$1" in
+ --create)
+ RUN_MODE="create"; shift ;;
+ --enforce)
+ RUN_MODE="enforce"; shift ;;
+ --clean-snapshots)
+ RUN_MODE="clean-snapshots"; shift ;;
+ --inventory)
+ RUN_MODE="inventory"; shift ;;
+ --instance-id)
+ [[ $# -lt 2 ]] && die "--instance-id requires a value"
+ INSTANCE_ID="$2"; shift 2 ;;
+ --retention-days)
+ [[ $# -lt 2 ]] && die "--retention-days requires a value"
+ RETENTION_DAYS="$2"; shift 2 ;;
+ --dry-run)
+ DRY_RUN="true"; shift ;;
+ --format)
+ [[ $# -lt 2 ]] && die "--format requires a value"
+ OUTPUT_FORMAT="$2"; shift 2 ;;
+ --profile)
+ [[ $# -lt 2 ]] && die "--profile requires a value"
+ AWS_PROFILE_OPT="$2"; shift 2 ;;
+ --region)
+ [[ $# -lt 2 ]] && die "--region requires a value"
+ AWS_REGION="$2"; shift 2 ;;
+ --verbose)
+ VERBOSE="true"; shift ;;
+ --no-color)
+ COLOR="never"; shift ;;
+ --help|-h)
+ usage ;;
+ *)
+ die "Unknown option: $1 (see --help)" ;;
+ esac
+ done
+
+ if [[ -z "$RUN_MODE" ]]; then
+ log_error "No mode specified"
+ echo ""
+ usage
+ fi
+
+ if [[ "$RUN_MODE" == "create" && -z "$INSTANCE_ID" ]]; then
+ die "--create requires --instance-id"
+ fi
+
+ case "$OUTPUT_FORMAT" in
+ text|csv|json) ;;
+ *) die "Invalid --format: $OUTPUT_FORMAT (expected text, csv, json)" ;;
+ esac
+
+ if ! [[ "$RETENTION_DAYS" =~ ^[0-9]+$ ]]; then
+ die "--retention-days must be a positive integer"
+ fi
+}
+
+# ── Get instance name ─────────────────────────────────────────────────
+get_instance_name() {
+ local iid="$1"
+ aws_cmd ec2 describe-instances \
+ --instance-ids "$iid" \
+ --query 'Reservations[0].Instances[0].Tags[?Key==`Name`].Value | [0]' \
+ --output text 2>/dev/null || echo "N/A"
+}
+
+# ── Create AMI ────────────────────────────────────────────────────────
+create_ami() {
+ local instance_id="$INSTANCE_ID"
+ local today
+ today="$(today_utc)"
+
+ log_info "Creating AMI from instance ${instance_id}..."
+
+ # Get instance name
+ local instance_name
+ instance_name=$(get_instance_name "$instance_id")
+ if [[ "$instance_name" == "None" || -z "$instance_name" ]]; then
+ instance_name="unnamed"
+ fi
+ log_info "Instance name: ${instance_name}"
+
+ # Build AMI name
+ local ami_name="${AMI_NAME_PREFIX}-${instance_name}-${today}"
+ local ami_description="AMI created by ${MANAGER_TAG} from ${instance_id} (${instance_name}) on ${today}"
+
+ # Calculate expiry date
+ local expires
+ expires=$(date_offset_days "$today" "$RETENTION_DAYS")
+
+ # Create the AMI (no-reboot to avoid downtime)
+ local ami_id
+ ami_id=$(aws_cmd ec2 create-image \
+ --instance-id "$instance_id" \
+ --name "$ami_name" \
+ --description "$ami_description" \
+ --no-reboot \
+ --query 'ImageId' \
+ --output text 2>/dev/null) || die "Failed to create AMI from ${instance_id}"
+
+ log_info "AMI created: ${ami_id}"
+ log_info "Name: ${ami_name}"
+
+ # Tag the AMI
+ aws_cmd ec2 create-tags \
+ --resources "$ami_id" \
+ --tags \
+ "Key=Name,Value=${ami_name}" \
+ "Key=managed-by,Value=${MANAGER_TAG}" \
+ "Key=source-instance,Value=${instance_id}" \
+ "Key=source-name,Value=${instance_name}" \
+ "Key=created-date,Value=${today}" \
+ "Key=retention-days,Value=${RETENTION_DAYS}" \
+ "Key=expires,Value=${expires}" \
+ >/dev/null 2>&1 || log_warn "Failed to tag AMI ${ami_id}"
+
+ log_info "Tags applied:"
+ printf " %-16s = %s\n" "managed-by" "$MANAGER_TAG"
+ printf " %-16s = %s\n" "source-instance" "$instance_id"
+ printf " %-16s = %s\n" "source-name" "$instance_name"
+ printf " %-16s = %s\n" "created-date" "$today"
+ printf " %-16s = %s\n" "retention-days" "$RETENTION_DAYS"
+ printf " %-16s = %s\n" "expires" "$expires"
+
+ # Wait briefly for snapshots to appear, then tag them too
+ log_verbose "Waiting for AMI snapshots to register..."
+ local retries=0
+ local snap_ids=""
+ while (( retries < 12 )); do
+ snap_ids=$(aws_cmd ec2 describe-images \
+ --image-ids "$ami_id" \
+ --query 'Images[0].BlockDeviceMappings[*].Ebs.SnapshotId' \
+ --output text 2>/dev/null || echo "")
+ if [[ -n "$snap_ids" && "$snap_ids" != "None" ]]; then
+ break
+ fi
+ sleep 5
+ ((retries++)) || true
+ done
+
+ if [[ -n "$snap_ids" && "$snap_ids" != "None" ]]; then
+ for snap_id in $snap_ids; do
+ aws_cmd ec2 create-tags \
+ --resources "$snap_id" \
+ --tags \
+ "Key=managed-by,Value=${MANAGER_TAG}" \
+ "Key=source-ami,Value=${ami_id}" \
+ "Key=source-instance,Value=${instance_id}" \
+ "Key=created-date,Value=${today}" \
+ >/dev/null 2>&1 || log_warn "Failed to tag snapshot ${snap_id}"
+ log_verbose "Tagged snapshot ${snap_id}"
+ done
+ fi
+}
+
+# ── Get managed AMIs ──────────────────────────────────────────────────
+get_managed_amis() {
+ local account_id
+ account_id=$(aws_cmd sts get-caller-identity --query Account --output text 2>/dev/null)
+
+ aws_cmd ec2 describe-images \
+ --owners "$account_id" \
+ --filters "Name=tag:managed-by,Values=${MANAGER_TAG}" \
+ --query 'Images[*]' \
+ --output json 2>/dev/null || echo "[]"
+}
+
+# ── Enforce retention ─────────────────────────────────────────────────
+enforce_retention() {
+ log_info "Enforcing retention policy (${RETENTION_DAYS} days)..."
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log_info "DRY RUN — no AMIs will be deregistered"
+ fi
+
+ local amis_json
+ amis_json=$(get_managed_amis)
+
+ local total
+ total=$(echo "$amis_json" | jq 'length')
+ log_info "Found ${total} managed AMI(s)"
+
+ if (( total == 0 )); then
+ log_info "No managed AMIs found — nothing to do"
+ return
+ fi
+
+ local today
+ today=$(today_utc)
+ local active=0 expired=0 deregistered=0
+
+ # Print table header for text output
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo ""
+ printf " %-24s %-42s %-6s %-11s %s\n" "AMI" "NAME" "AGE" "RETENTION" "STATUS"
+ echo " ──────────────────────────────────────────────────────────────────────────────────────"
+ fi
+
+ local csv_lines=()
+ local json_items=()
+
+ while IFS=$'\t' read -r ami_id ami_name created_date retention_tag; do
+ [[ -z "$ami_id" || "$ami_id" == "null" ]] && continue
+
+ # Use tag retention or default
+ local ret="${retention_tag}"
+ if [[ -z "$ret" || "$ret" == "null" || "$ret" == "None" ]]; then
+ ret="$RETENTION_DAYS"
+ fi
+
+ local age=0
+ if [[ -n "$created_date" && "$created_date" != "null" && "$created_date" != "None" ]]; then
+ age=$(days_since "$created_date")
+ fi
+
+ local status="active"
+ if (( age > ret )); then
+ status="expired"
+ ((expired++)) || true
+ else
+ ((active++)) || true
+ fi
+
+ case "$OUTPUT_FORMAT" in
+ text)
+ local status_icon="✓ active"
+ if [[ "$status" == "expired" ]]; then
+ status_icon="✗ expired"
+ fi
+ printf " %-24s %-42s %3dd %3dd %s\n" \
+ "$ami_id" "$ami_name" "$age" "$ret" "$status_icon"
+ ;;
+ csv)
+ csv_lines+=("\"${ami_id}\",\"${ami_name}\",${age},${ret},\"${status}\"")
+ ;;
+ json)
+ json_items+=("{\"ami_id\":\"${ami_id}\",\"name\":\"${ami_name}\",\"age_days\":${age},\"retention_days\":${ret},\"status\":\"${status}\"}")
+ ;;
+ esac
+
+ # Deregister expired AMIs
+ if [[ "$status" == "expired" ]]; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log_info "[DRY RUN] Would deregister ${ami_id} (${age}d old, retention ${ret}d)"
+ else
+ log_info "Deregistering ${ami_id} (${age}d old, retention ${ret}d)..."
+ if aws_cmd ec2 deregister-image --image-id "$ami_id" >/dev/null 2>&1; then
+ ((deregistered++)) || true
+ else
+ log_warn "Failed to deregister ${ami_id}"
+ fi
+ fi
+ fi
+ done < <(echo "$amis_json" | jq -r '.[] | [.ImageId, (.Tags // [] | map(select(.Key == "Name")) | .[0].Value // "N/A"), (.Tags // [] | map(select(.Key == "created-date")) | .[0].Value // ""), (.Tags // [] | map(select(.Key == "retention-days")) | .[0].Value // "")] | @tsv')
+
+ echo ""
+
+ case "$OUTPUT_FORMAT" in
+ text)
+ echo "Summary"
+ printf " Total managed AMIs: %d\n" "$total"
+ printf " Active: %d\n" "$active"
+ printf " Expired: %d\n" "$expired"
+ if [[ "$DRY_RUN" == "true" ]]; then
+ printf " Would deregister: %d\n" "$expired"
+ else
+ printf " Deregistered: %d\n" "$deregistered"
+ fi
+ ;;
+ csv)
+ echo "ami_id,name,age_days,retention_days,status"
+ for line in "${csv_lines[@]}"; do
+ echo "$line"
+ done
+ ;;
+ json)
+ local joined
+ joined=$(printf ",%s" "${json_items[@]}")
+ joined="${joined:1}"
+ printf '{"mode":"enforce","retention_days":%d,"dry_run":%s,"total":%d,"active":%d,"expired":%d,"items":[%s]}\n' \
+ "$RETENTION_DAYS" "$DRY_RUN" "$total" "$active" "$expired" "$joined"
+ ;;
+ esac
+}
+
+# ── Clean orphan snapshots ────────────────────────────────────────────
+clean_orphan_snapshots() {
+ log_info "Searching for orphaned AMI snapshots..."
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log_info "DRY RUN — no snapshots will be deleted"
+ fi
+
+ local account_id
+ account_id=$(aws_cmd sts get-caller-identity --query Account --output text 2>/dev/null)
+
+ # Get all snapshots tagged as managed by us
+ local snaps_json
+ snaps_json=$(aws_cmd ec2 describe-snapshots \
+ --owner-ids "$account_id" \
+ --filters "Name=tag:managed-by,Values=${MANAGER_TAG}" \
+ --query 'Snapshots[*]' \
+ --output json 2>/dev/null) || die "Failed to describe snapshots"
+
+ local total_snaps
+ total_snaps=$(echo "$snaps_json" | jq 'length')
+ log_info "Found ${total_snaps} managed snapshot(s)"
+
+ if (( total_snaps == 0 )); then
+ log_info "No managed snapshots found — nothing to do"
+ return
+ fi
+
+ # Get all currently registered AMI IDs
+ local registered_amis
+ registered_amis=$(aws_cmd ec2 describe-images \
+ --owners "$account_id" \
+ --query 'Images[*].ImageId' \
+ --output text 2>/dev/null) || die "Failed to describe images"
+
+ local orphan_count=0
+ local deleted_count=0
+ local total_size=0
+
+ while IFS=$'\t' read -r snap_id source_ami snap_size; do
+ [[ -z "$snap_id" || "$snap_id" == "null" ]] && continue
+
+ # Check if the source AMI still exists
+ local is_orphan="false"
+ if [[ -z "$source_ami" || "$source_ami" == "null" || "$source_ami" == "None" ]]; then
+ is_orphan="true"
+ elif ! echo "$registered_amis" | grep -qw "$source_ami" 2>/dev/null; then
+ is_orphan="true"
+ fi
+
+ if [[ "$is_orphan" == "true" ]]; then
+ ((orphan_count++)) || true
+ local size_gb="${snap_size:-0}"
+ ((total_size += size_gb)) || true
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log_info "[DRY RUN] Would delete orphan snapshot ${snap_id} (${size_gb} GiB, source AMI: ${source_ami:-unknown})"
+ else
+ log_info "Deleting orphan snapshot ${snap_id} (${size_gb} GiB)..."
+ if aws_cmd ec2 delete-snapshot --snapshot-id "$snap_id" >/dev/null 2>&1; then
+ ((deleted_count++)) || true
+ else
+ log_warn "Failed to delete snapshot ${snap_id}"
+ fi
+ fi
+ fi
+ done < <(echo "$snaps_json" | jq -r '.[] | [.SnapshotId, (.Tags // [] | map(select(.Key == "source-ami")) | .[0].Value // ""), (.VolumeSize // 0 | tostring)] | @tsv')
+
+ echo ""
+ echo "Summary"
+ printf " Total managed snapshots: %d\n" "$total_snaps"
+ printf " Orphaned: %d\n" "$orphan_count"
+ if [[ "$DRY_RUN" == "true" ]]; then
+ printf " Would delete: %d\n" "$orphan_count"
+ else
+ printf " Deleted: %d\n" "$deleted_count"
+ fi
+ printf " Storage reclaimed: %d GiB\n" "$total_size"
+}
+
+# ── Inventory report ──────────────────────────────────────────────────
+inventory_report() {
+ log_info "Generating AMI inventory report..."
+
+ local amis_json
+ amis_json=$(get_managed_amis)
+
+ local total
+ total=$(echo "$amis_json" | jq 'length')
+ log_info "Found ${total} managed AMI(s)"
+
+ if (( total == 0 )); then
+ log_info "No managed AMIs found"
+ return
+ fi
+
+ local account_id
+ account_id=$(aws_cmd sts get-caller-identity --query Account --output text 2>/dev/null)
+
+ case "$OUTPUT_FORMAT" in
+ text)
+ echo ""
+ printf " %-24s %-30s %-12s %-6s %-11s %s\n" \
+ "AMI" "SOURCE INSTANCE" "CREATED" "AGE" "RETENTION" "SNAPSHOTS"
+ echo " ────────────────────────────────────────────────────────────────────────────────────────────────"
+ ;;
+ csv)
+ echo "ami_id,name,source_instance,source_name,created_date,age_days,retention_days,expires,snapshot_count"
+ ;;
+ esac
+
+ local json_items=()
+
+ while IFS=$'\t' read -r ami_id ami_name source_instance source_name created_date retention_tag expires_tag snap_count; do
+ [[ -z "$ami_id" || "$ami_id" == "null" ]] && continue
+
+ # Defaults for missing tags
+ [[ "$source_instance" == "null" || -z "$source_instance" ]] && source_instance="N/A"
+ [[ "$source_name" == "null" || -z "$source_name" ]] && source_name=""
+ [[ "$created_date" == "null" || -z "$created_date" ]] && created_date="unknown"
+ [[ "$retention_tag" == "null" || -z "$retention_tag" ]] && retention_tag="$RETENTION_DAYS"
+ [[ "$expires_tag" == "null" || -z "$expires_tag" ]] && expires_tag="N/A"
+ [[ "$snap_count" == "null" || -z "$snap_count" ]] && snap_count="0"
+
+ local age=0
+ if [[ "$created_date" != "unknown" ]]; then
+ age=$(days_since "$created_date")
+ fi
+
+ local instance_display="$source_instance"
+ if [[ -n "$source_name" && "$source_name" != "N/A" ]]; then
+ instance_display="${source_instance} (${source_name})"
+ fi
+
+ case "$OUTPUT_FORMAT" in
+ text)
+ printf " %-24s %-30s %-12s %3dd %3dd %s\n" \
+ "$ami_id" "$instance_display" "$created_date" "$age" "$retention_tag" "$snap_count"
+ ;;
+ csv)
+ echo "\"${ami_id}\",\"${ami_name}\",\"${source_instance}\",\"${source_name}\",\"${created_date}\",${age},${retention_tag},\"${expires_tag}\",${snap_count}"
+ ;;
+ json)
+ json_items+=("{\"ami_id\":\"${ami_id}\",\"name\":\"${ami_name}\",\"source_instance\":\"${source_instance}\",\"source_name\":\"${source_name}\",\"created_date\":\"${created_date}\",\"age_days\":${age},\"retention_days\":${retention_tag},\"expires\":\"${expires_tag}\",\"snapshot_count\":${snap_count}}")
+ ;;
+ esac
+ done < <(echo "$amis_json" | jq -r '.[] | [
+ .ImageId,
+ (.Tags // [] | map(select(.Key == "Name")) | .[0].Value // "N/A"),
+ (.Tags // [] | map(select(.Key == "source-instance")) | .[0].Value // ""),
+ (.Tags // [] | map(select(.Key == "source-name")) | .[0].Value // ""),
+ (.Tags // [] | map(select(.Key == "created-date")) | .[0].Value // ""),
+ (.Tags // [] | map(select(.Key == "retention-days")) | .[0].Value // ""),
+ (.Tags // [] | map(select(.Key == "expires")) | .[0].Value // ""),
+ (.BlockDeviceMappings // [] | map(select(.Ebs.SnapshotId)) | length | tostring)
+ ] | @tsv')
+
+ if [[ "$OUTPUT_FORMAT" == "json" ]]; then
+ local joined
+ joined=$(printf ",%s" "${json_items[@]}")
+ joined="${joined:1}"
+ printf '{"mode":"inventory","total":%d,"items":[%s]}\n' "$total" "$joined"
+ fi
+
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo ""
+ printf " Total: %d managed AMI(s)\n" "$total"
+ fi
+}
+
+# ── Main ──────────────────────────────────────────────────────────────
+main() {
+ parse_args "$@"
+ setup_colors
+ check_deps
+
+ START_TIME=$(date +%s)
+
+ print_header
+
+ case "$RUN_MODE" in
+ create)
+ create_ami
+ ;;
+ enforce)
+ enforce_retention
+ ;;
+ clean-snapshots)
+ clean_orphan_snapshots
+ ;;
+ inventory)
+ inventory_report
+ ;;
+ *)
+ die "Unknown mode: $RUN_MODE"
+ ;;
+ esac
+
+ local elapsed=$(( $(date +%s) - START_TIME ))
+ log_info "Completed in ${elapsed}s"
+}
+
+main "$@"
diff --git a/apache-metrics-exporter.sh b/apache-metrics-exporter.sh
new file mode 100644
index 0000000..db049ea
--- /dev/null
+++ b/apache-metrics-exporter.sh
@@ -0,0 +1,1308 @@
+#!/bin/bash
+#############################################################
+#### Apache Metrics Exporter for Prometheus ####
+#### Comprehensive Apache monitoring via mod_status, ####
+#### logs, SSL, process, and config metrics ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.01 ####
+#### ####
+#### Usage: ./apache-metrics-exporter.sh [OPTIONS] ####
+#############################################################
+#
+# Metrics collected:
+# - mod_status: accesses, bytes, req/sec, busy/idle workers, scoreboard
+# - Process: worker count, memory usage, CPU usage, open files
+# - Access logs: requests by status code, response times, bytes transferred
+# - SSL: certificate expiry days for configured domains
+# - Config: MPM type, MaxRequestWorkers, KeepAliveTimeout
+# - Upstream: proxy/balancer status (if configured)
+#
+# Requirements:
+# - Apache with mod_status enabled (ExtendedStatus On)
+# - socat (for HTTP server)
+# - curl (for server-status fetching)
+#
+set -euo pipefail
+
+#########################
+### Auto-detect Apache ###
+#########################
+
+APACHE_BIN=""
+APACHECTL=""
+APACHE_PROC=""
+
+detect_apache_flavor() {
+ if command -v apache2 &>/dev/null; then
+ APACHE_BIN="apache2"
+ APACHECTL="apache2ctl"
+ APACHE_PROC="apache2"
+ elif command -v httpd &>/dev/null; then
+ APACHE_BIN="httpd"
+ APACHECTL="httpd"
+ APACHE_PROC="httpd"
+ else
+ APACHE_BIN=""
+ APACHECTL=""
+ APACHE_PROC=""
+ fi
+}
+
+detect_apache_flavor
+
+#########################
+### Configuration ###
+#########################
+
+LISTEN_PORT="${APACHE_EXPORTER_PORT:-9117}"
+STATUS_URL="${APACHE_STATUS_URL:-http://127.0.0.1/server-status?auto}"
+SSL_CHECK_DOMAINS="${SSL_CHECK_DOMAINS:-}" # Comma-separated list of domains to check SSL
+SCRAPE_INTERVAL="${SCRAPE_INTERVAL:-15}"
+
+# Auto-detect paths based on distro
+if [[ -d /etc/apache2 ]]; then
+ ACCESS_LOG="${APACHE_ACCESS_LOG:-/var/log/apache2/access.log}"
+ ERROR_LOG="${APACHE_ERROR_LOG:-/var/log/apache2/error.log}"
+ APACHE_CONF="${APACHE_CONF:-/etc/apache2/apache2.conf}"
+ SITES_DIR="${APACHE_SITES_DIR:-/etc/apache2/sites-enabled}"
+ CONF_D_DIR="${APACHE_CONF_D:-/etc/apache2/conf-enabled}"
+elif [[ -d /etc/httpd ]]; then
+ ACCESS_LOG="${APACHE_ACCESS_LOG:-/var/log/httpd/access_log}"
+ ERROR_LOG="${APACHE_ERROR_LOG:-/var/log/httpd/error_log}"
+ APACHE_CONF="${APACHE_CONF:-/etc/httpd/conf/httpd.conf}"
+ SITES_DIR="${APACHE_SITES_DIR:-/etc/httpd/conf.d}"
+ CONF_D_DIR="${APACHE_CONF_D:-/etc/httpd/conf.d}"
+else
+ ACCESS_LOG="${APACHE_ACCESS_LOG:-/var/log/apache2/access.log}"
+ ERROR_LOG="${APACHE_ERROR_LOG:-/var/log/apache2/error.log}"
+ APACHE_CONF="${APACHE_CONF:-/etc/apache2/apache2.conf}"
+ SITES_DIR="${APACHE_SITES_DIR:-/etc/apache2/sites-enabled}"
+ CONF_D_DIR="${APACHE_CONF_D:-/etc/apache2/conf-enabled}"
+fi
+
+# Log parsing settings
+LOG_TAIL_LINES="${LOG_TAIL_LINES:-10000}" # Number of lines to parse from access log
+LOG_PARSE_INTERVAL="${LOG_PARSE_INTERVAL:-60}" # How often to parse logs (seconds)
+
+# State files for log metrics
+STATE_DIR="/tmp/apache-metrics"
+LAST_LOG_PARSE=0
+
+# Output mode
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+
+#########################
+### Logging ###
+#########################
+
+log() {
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
+}
+
+#########################
+### Parse Arguments ###
+#########################
+
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --textfile)
+ OUTPUT_FILE="$TEXTFILE_DIR/apache.prom"
+ shift
+ ;;
+ --http)
+ HTTP_MODE=true
+ shift
+ ;;
+ --output|-o)
+ OUTPUT_FILE="$2"
+ shift 2
+ ;;
+ --port)
+ LISTEN_PORT="$2"
+ shift 2
+ ;;
+ --status-url)
+ STATUS_URL="$2"
+ shift 2
+ ;;
+ --access-log)
+ ACCESS_LOG="$2"
+ shift 2
+ ;;
+ --error-log)
+ ERROR_LOG="$2"
+ shift 2
+ ;;
+ --apache-conf)
+ APACHE_CONF="$2"
+ shift 2
+ ;;
+ --ssl-domains)
+ SSL_CHECK_DOMAINS="$2"
+ shift 2
+ ;;
+ --help)
+ cat </dev/null; then
+ echo "apt"
+ elif command -v dnf &>/dev/null; then
+ echo "dnf"
+ elif command -v yum &>/dev/null; then
+ echo "yum"
+ elif command -v zypper &>/dev/null; then
+ echo "zypper"
+ elif command -v pacman &>/dev/null; then
+ echo "pacman"
+ elif command -v apk &>/dev/null; then
+ echo "apk"
+ else
+ echo ""
+ fi
+}
+
+install_package() {
+ local pkg="$1"
+ local pkgmgr
+ pkgmgr=$(detect_package_manager)
+
+ log "Installing $pkg..."
+
+ case "$pkgmgr" in
+ apt)
+ apt-get update -qq && apt-get install -y -qq "$pkg"
+ ;;
+ dnf)
+ dnf install -y -q "$pkg"
+ ;;
+ yum)
+ yum install -y -q "$pkg"
+ ;;
+ zypper)
+ zypper install -y -q "$pkg"
+ ;;
+ pacman)
+ pacman -S --noconfirm "$pkg"
+ ;;
+ apk)
+ apk add --quiet "$pkg"
+ ;;
+ *)
+ log "ERROR: Unknown package manager. Please install $pkg manually."
+ return 1
+ ;;
+ esac
+}
+
+setup() {
+ mkdir -p "$STATE_DIR"
+
+ # Check for required tools and install if missing
+ if ! command -v socat &>/dev/null; then
+ log "socat not found, attempting to install..."
+ if [[ $EUID -eq 0 ]]; then
+ if ! install_package socat; then
+ log "ERROR: Failed to install socat"
+ exit 1
+ fi
+ log "socat installed successfully"
+ else
+ log "ERROR: socat is required. Run as root to auto-install, or install manually:"
+ log " Debian/Ubuntu: apt install socat"
+ log " RHEL/CentOS: yum install socat"
+ log " Fedora: dnf install socat"
+ log " Alpine: apk add socat"
+ exit 1
+ fi
+ fi
+
+ if ! command -v curl &>/dev/null; then
+ log "curl not found, attempting to install..."
+ if [[ $EUID -eq 0 ]]; then
+ if ! install_package curl; then
+ log "ERROR: Failed to install curl"
+ exit 1
+ fi
+ log "curl installed successfully"
+ else
+ log "ERROR: curl is required. Run as root to auto-install, or install manually."
+ exit 1
+ fi
+ fi
+
+ # Check if Apache is running
+ if [[ -n "$APACHE_PROC" ]]; then
+ if ! pgrep -x "$APACHE_PROC" &>/dev/null && ! pidof "$APACHE_PROC" &>/dev/null; then
+ log "WARNING: $APACHE_PROC process not found - process metrics will show apache_process_running=0"
+ fi
+ else
+ log "WARNING: Apache binary not found (neither apache2 nor httpd)"
+ fi
+
+ # Check if server-status is accessible
+ check_server_status
+}
+
+check_server_status() {
+ log "Checking server-status at $STATUS_URL..."
+
+ local response http_code
+ response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$STATUS_URL" 2>/dev/null)
+
+ if [[ "$response" == "200" ]]; then
+ # Verify it's actually mod_status output
+ local content
+ content=$(curl -s --max-time 5 "$STATUS_URL" 2>/dev/null)
+ if echo "$content" | grep -q "Total Accesses"; then
+ log "✓ mod_status is working correctly"
+ return 0
+ else
+ log "WARNING: $STATUS_URL returned 200 but doesn't look like mod_status output"
+ log " Expected 'Total Accesses' in response (ensure ExtendedStatus On)"
+ show_server_status_help
+ return 1
+ fi
+ elif [[ "$response" == "000" ]]; then
+ log "WARNING: Cannot connect to $STATUS_URL (connection refused/timeout)"
+ log " server-status metrics will show apache_up=0"
+ show_server_status_help
+ return 1
+ elif [[ "$response" == "403" ]]; then
+ log "WARNING: Access denied to $STATUS_URL (HTTP 403)"
+ log " Check 'Require' directives in server-status location block"
+ show_server_status_help
+ return 1
+ elif [[ "$response" == "404" ]]; then
+ log "WARNING: server-status endpoint not found at $STATUS_URL (HTTP 404)"
+ log " mod_status may not be enabled"
+ show_server_status_help
+ return 1
+ else
+ log "WARNING: Unexpected response from $STATUS_URL (HTTP $response)"
+ show_server_status_help
+ return 1
+ fi
+}
+
+show_server_status_help() {
+ log ""
+ log "To enable mod_status, configure Apache as follows:"
+ log ""
+ log " Debian/Ubuntu:"
+ log " sudo a2enmod status"
+ log " # Edit /etc/apache2/mods-enabled/status.conf:"
+ log " ExtendedStatus On"
+ log " "
+ log " SetHandler server-status"
+ log " Require local"
+ log " "
+ log ""
+ log " RHEL/CentOS/Rocky:"
+ log " # Add to /etc/httpd/conf.d/status.conf:"
+ log " ExtendedStatus On"
+ log " "
+ log " SetHandler server-status"
+ log " Require local"
+ log " "
+ log ""
+ log "Then reload: apachectl configtest && systemctl reload apache2 (or httpd)"
+ log ""
+ log "Or specify a different URL with: --status-url "
+ log ""
+}
+
+#########################
+### Server Status Metrics ###
+#########################
+
+collect_server_status() {
+ local status_output
+
+ echo "# HELP apache_up Whether Apache mod_status is reachable"
+ echo "# TYPE apache_up gauge"
+
+ if ! status_output=$(curl -s --max-time 5 "$STATUS_URL" 2>/dev/null); then
+ echo "apache_up 0"
+ return
+ fi
+
+ # Verify we got valid mod_status output
+ if ! echo "$status_output" | grep -q "Total Accesses"; then
+ echo "apache_up 0"
+ return
+ fi
+
+ echo "apache_up 1"
+
+ # Parse mod_status ?auto output
+ # Format:
+ # Total Accesses: 12345
+ # Total kBytes: 67890
+ # CPULoad: .0123456
+ # Uptime: 86400
+ # ReqPerSec: .142857
+ # BytesPerSec: 804.571
+ # BytesPerReq: 5632
+ # BusyWorkers: 3
+ # IdleWorkers: 7
+ # Scoreboard: __W_K....._R..
+
+ local total_accesses total_kbytes cpu_load uptime req_per_sec bytes_per_sec bytes_per_req
+ local busy_workers idle_workers scoreboard
+
+ total_accesses=$(echo "$status_output" | grep '^Total Accesses:' | awk '{print $3}') || total_accesses=0
+ total_kbytes=$(echo "$status_output" | grep '^Total kBytes:' | awk '{print $3}') || total_kbytes=0
+ cpu_load=$(echo "$status_output" | grep '^CPULoad:' | awk '{print $2}') || cpu_load=0
+ uptime=$(echo "$status_output" | grep '^Uptime:' | awk '{print $2}') || uptime=0
+ req_per_sec=$(echo "$status_output" | grep '^ReqPerSec:' | awk '{print $2}') || req_per_sec=0
+ bytes_per_sec=$(echo "$status_output" | grep '^BytesPerSec:' | awk '{print $2}') || bytes_per_sec=0
+ bytes_per_req=$(echo "$status_output" | grep '^BytesPerReq:' | awk '{print $2}') || bytes_per_req=0
+ busy_workers=$(echo "$status_output" | grep '^BusyWorkers:' | awk '{print $2}') || busy_workers=0
+ idle_workers=$(echo "$status_output" | grep '^IdleWorkers:' | awk '{print $2}') || idle_workers=0
+ scoreboard=$(echo "$status_output" | grep '^Scoreboard:' | awk '{print $2}') || scoreboard=""
+
+ # Convert kBytes to bytes
+ local total_bytes
+ total_bytes=$(echo "$total_kbytes * 1024" | bc 2>/dev/null || echo "$((total_kbytes * 1024))")
+
+ cat </dev/null || pidof "$APACHE_PROC" 2>/dev/null | awk '{print $1}' || echo "")
+
+ if [[ -z "$apache_master_pid" ]]; then
+ echo "# HELP apache_process_running Whether Apache process is running"
+ echo "# TYPE apache_process_running gauge"
+ echo "apache_process_running 0"
+ return
+ fi
+
+ echo "# HELP apache_process_running Whether Apache process is running"
+ echo "# TYPE apache_process_running gauge"
+ echo "apache_process_running 1"
+
+ # Get all Apache PIDs
+ apache_pids=$(pgrep -x "$APACHE_PROC" 2>/dev/null || pidof "$APACHE_PROC" 2>/dev/null || echo "")
+
+ # Count workers (total processes minus master)
+ worker_count=$(echo "$apache_pids" | wc -w)
+ if [[ $worker_count -gt 0 ]]; then
+ worker_count=$((worker_count - 1)) # Subtract master
+ fi
+
+ echo "# HELP apache_workers_count Number of Apache worker processes"
+ echo "# TYPE apache_workers_count gauge"
+ echo "apache_workers_count $worker_count"
+
+ # Calculate total memory usage (RSS in bytes)
+ total_memory=0
+ total_cpu=0
+ total_fds=0
+ total_threads=0
+
+ for pid in $apache_pids; do
+ if [[ -d "/proc/$pid" ]]; then
+ # Memory (RSS in KB from /proc/pid/status, convert to bytes)
+ local rss
+ rss=$(grep -m1 'VmRSS:' "/proc/$pid/status" 2>/dev/null | awk '{print $2}' || echo "0")
+ total_memory=$((total_memory + rss * 1024))
+
+ # CPU time (from /proc/pid/stat - utime + stime in jiffies)
+ local stat_line utime stime
+ if stat_line=$(cat "/proc/$pid/stat" 2>/dev/null); then
+ utime=$(echo "$stat_line" | awk '{print $14}')
+ stime=$(echo "$stat_line" | awk '{print $15}')
+ total_cpu=$((total_cpu + utime + stime))
+ fi
+
+ # Open file descriptors
+ local fds
+ fds=$(ls -1 "/proc/$pid/fd" 2>/dev/null | wc -l || echo "0")
+ total_fds=$((total_fds + fds))
+
+ # Threads
+ local threads
+ threads=$(grep -c '^Threads:' "/proc/$pid/status" 2>/dev/null || true)
+ if [[ "$threads" -eq 0 ]]; then
+ threads=$(grep 'Threads:' "/proc/$pid/status" 2>/dev/null | awk '{print $2}' || echo "1")
+ fi
+ total_threads=$((total_threads + threads))
+ fi
+ done
+
+ # Convert CPU jiffies to seconds (assuming 100 Hz)
+ local cpu_seconds
+ cpu_seconds=$(echo "scale=2; $total_cpu / 100" | bc 2>/dev/null || echo "$total_cpu")
+
+ cat </dev/null || echo "0")
+ # starttime is in jiffies since boot
+ start_seconds=$(awk "BEGIN {printf \"%.0f\", $(cat /proc/uptime | awk '{print $1}') - ($starttime / 100)}")
+ local now_epoch
+ now_epoch=$(date +%s)
+ local process_start=$((now_epoch - start_seconds))
+ echo "apache_process_start_time_seconds $process_start"
+ else
+ echo "apache_process_start_time_seconds 0"
+ fi
+
+ # Get max open files limit
+ if [[ -f "/proc/$apache_master_pid/limits" ]]; then
+ local max_fds
+ max_fds=$(grep 'Max open files' "/proc/$apache_master_pid/limits" 2>/dev/null | awk '{print $4}' || echo "0")
+ echo ""
+ echo "# HELP apache_process_max_fds Maximum number of open file descriptors"
+ echo "# TYPE apache_process_max_fds gauge"
+ echo "apache_process_max_fds $max_fds"
+ fi
+}
+
+#########################
+### Config Metrics ###
+#########################
+
+collect_config_metrics() {
+ if [[ ! -f "$APACHE_CONF" ]]; then
+ echo "# Apache config not found at $APACHE_CONF"
+ return
+ fi
+
+ local max_request_workers keepalive_timeout keepalive_enabled
+ local mpm_type
+
+ # Parse MaxRequestWorkers (or MaxClients for older Apache)
+ max_request_workers=$(grep -rihE '^\s*MaxRequestWorkers' "$APACHE_CONF" "$CONF_D_DIR" "$SITES_DIR" 2>/dev/null | head -1 | awk '{print $2}' || echo "")
+ if [[ -z "$max_request_workers" ]]; then
+ max_request_workers=$(grep -rihE '^\s*MaxClients' "$APACHE_CONF" "$CONF_D_DIR" "$SITES_DIR" 2>/dev/null | head -1 | awk '{print $2}' || echo "0")
+ fi
+ max_request_workers="${max_request_workers:-0}"
+
+ # Parse KeepAliveTimeout
+ keepalive_timeout=$(grep -rihE '^\s*KeepAliveTimeout' "$APACHE_CONF" "$CONF_D_DIR" 2>/dev/null | head -1 | awk '{print $2}' || echo "0")
+ keepalive_timeout="${keepalive_timeout:-0}"
+
+ # Check KeepAlive on/off
+ keepalive_enabled=$(grep -rihE '^\s*KeepAlive\s' "$APACHE_CONF" "$CONF_D_DIR" 2>/dev/null | head -1 | awk '{print tolower($2)}' || echo "on")
+ if [[ "$keepalive_enabled" == "on" ]]; then
+ keepalive_enabled=1
+ else
+ keepalive_enabled=0
+ fi
+
+ # Detect MPM type
+ mpm_type="unknown"
+ if [[ -n "$APACHECTL" ]]; then
+ local modules_list
+ modules_list=$($APACHECTL -M 2>/dev/null || echo "")
+ if echo "$modules_list" | grep -q 'mpm_event_module'; then
+ mpm_type="event"
+ elif echo "$modules_list" | grep -q 'mpm_worker_module'; then
+ mpm_type="worker"
+ elif echo "$modules_list" | grep -q 'mpm_prefork_module'; then
+ mpm_type="prefork"
+ fi
+ fi
+
+ cat </dev/null | wc -l)
+ elif [[ -d "$CONF_D_DIR" ]]; then
+ vhost_count=$(find "$CONF_D_DIR" -name "*.conf" -type f 2>/dev/null | wc -l)
+ fi
+
+ echo ""
+ echo "# HELP apache_config_vhosts_total Number of configured virtual hosts"
+ echo "# TYPE apache_config_vhosts_total gauge"
+ echo "apache_config_vhosts_total $vhost_count"
+
+ # Parse ServerLimit if available
+ local server_limit
+ server_limit=$(grep -rihE '^\s*ServerLimit' "$APACHE_CONF" "$CONF_D_DIR" 2>/dev/null | head -1 | awk '{print $2}' || echo "0")
+ if [[ "$server_limit" != "0" ]] && [[ -n "$server_limit" ]]; then
+ echo ""
+ echo "# HELP apache_config_server_limit ServerLimit setting"
+ echo "# TYPE apache_config_server_limit gauge"
+ echo "apache_config_server_limit $server_limit"
+ fi
+
+ # Parse Timeout
+ local timeout_val
+ timeout_val=$(grep -rihE '^\s*Timeout\s' "$APACHE_CONF" "$CONF_D_DIR" 2>/dev/null | head -1 | awk '{print $2}' || echo "0")
+ if [[ "$timeout_val" != "0" ]] && [[ -n "$timeout_val" ]]; then
+ echo ""
+ echo "# HELP apache_config_timeout Timeout setting in seconds"
+ echo "# TYPE apache_config_timeout gauge"
+ echo "apache_config_timeout $timeout_val"
+ fi
+}
+
+#########################
+### Access Log Metrics ###
+#########################
+
+collect_access_log_metrics() {
+ if [[ ! -f "$ACCESS_LOG" ]] || [[ ! -r "$ACCESS_LOG" ]]; then
+ echo "# Access log not readable at $ACCESS_LOG"
+ return
+ fi
+
+ local now
+ now=$(date +%s)
+
+ # Only parse logs every LOG_PARSE_INTERVAL seconds
+ if [[ -f "$STATE_DIR/last_parse" ]]; then
+ LAST_LOG_PARSE=$(cat "$STATE_DIR/last_parse")
+ fi
+
+ if [[ $((now - LAST_LOG_PARSE)) -lt $LOG_PARSE_INTERVAL ]] && [[ -f "$STATE_DIR/log_metrics" ]]; then
+ cat "$STATE_DIR/log_metrics"
+ return
+ fi
+
+ echo "$now" > "$STATE_DIR/last_parse"
+
+ # Parse access log for status codes and other metrics
+ # Assuming combined log format: $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"
+
+ local log_data
+ log_data=$(tail -n "$LOG_TAIL_LINES" "$ACCESS_LOG" 2>/dev/null || echo "")
+
+ if [[ -z "$log_data" ]]; then
+ echo "# No log data available"
+ return
+ fi
+
+ local metrics_output=""
+
+ # Count by status code
+ local status_counts
+ status_counts=$(echo "$log_data" | awk '{print $9}' | { grep -E '^[0-9]{3}$' || true; } | sort | uniq -c | sort -rn)
+
+ metrics_output+="# HELP apache_http_requests_by_status_total HTTP requests by status code (from last $LOG_TAIL_LINES log lines)
+# TYPE apache_http_requests_by_status_total gauge
+"
+
+ # Initialize counters for status code groups
+ local count_1xx=0 count_2xx=0 count_3xx=0 count_4xx=0 count_5xx=0
+
+ while read -r count status; do
+ if [[ -n "$status" ]] && [[ -n "$count" ]]; then
+ metrics_output+="apache_http_requests_by_status_total{status=\"$status\"} $count
+"
+ # Aggregate by category
+ case "${status:0:1}" in
+ 1) count_1xx=$((count_1xx + count)) ;;
+ 2) count_2xx=$((count_2xx + count)) ;;
+ 3) count_3xx=$((count_3xx + count)) ;;
+ 4) count_4xx=$((count_4xx + count)) ;;
+ 5) count_5xx=$((count_5xx + count)) ;;
+ esac
+ fi
+ done <<< "$status_counts"
+
+ metrics_output+="
+# HELP apache_http_requests_by_status_class_total HTTP requests by status class
+# TYPE apache_http_requests_by_status_class_total gauge
+apache_http_requests_by_status_class_total{class=\"1xx\"} $count_1xx
+apache_http_requests_by_status_class_total{class=\"2xx\"} $count_2xx
+apache_http_requests_by_status_class_total{class=\"3xx\"} $count_3xx
+apache_http_requests_by_status_class_total{class=\"4xx\"} $count_4xx
+apache_http_requests_by_status_class_total{class=\"5xx\"} $count_5xx
+"
+
+ # Calculate total bytes sent
+ local total_bytes
+ total_bytes=$(echo "$log_data" | awk '{sum += $10} END {print sum+0}')
+
+ metrics_output+="
+# HELP apache_http_response_bytes_total Total bytes sent in responses (from last $LOG_TAIL_LINES log lines)
+# TYPE apache_http_response_bytes_total gauge
+apache_http_response_bytes_total $total_bytes
+"
+
+ # Count requests by method
+ local method_counts
+ method_counts=$(echo "$log_data" | awk -F'"' '{print $2}' | awk '{print $1}' | { grep -E '^(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)$' || true; } | sort | uniq -c)
+
+ metrics_output+="
+# HELP apache_http_requests_by_method_total HTTP requests by method (from last $LOG_TAIL_LINES log lines)
+# TYPE apache_http_requests_by_method_total gauge
+"
+
+ while read -r count method; do
+ if [[ -n "$method" ]] && [[ -n "$count" ]]; then
+ metrics_output+="apache_http_requests_by_method_total{method=\"$method\"} $count
+"
+ fi
+ done <<< "$method_counts"
+
+ # Count unique IPs
+ local unique_ips
+ unique_ips=$(echo "$log_data" | awk '{print $1}' | sort -u | wc -l)
+
+ metrics_output+="
+# HELP apache_http_unique_clients Unique client IPs (from last $LOG_TAIL_LINES log lines)
+# TYPE apache_http_unique_clients gauge
+apache_http_unique_clients $unique_ips
+"
+
+ # Top URIs (for potential abuse detection)
+ local top_uris
+ top_uris=$(echo "$log_data" | awk -F'"' '{print $2}' | awk '{print $2}' | { grep -v '^-$' || true; } | sort | uniq -c | sort -rn | head -5)
+
+ metrics_output+="
+# HELP apache_http_top_uri_requests_total Top requested URIs (from last $LOG_TAIL_LINES log lines)
+# TYPE apache_http_top_uri_requests_total gauge
+"
+
+ local rank=1
+ while read -r count uri; do
+ if [[ -n "$uri" ]] && [[ -n "$count" ]]; then
+ # Truncate URI and escape quotes
+ uri="${uri:0:100}"
+ uri="${uri//\"/\\\"}"
+ metrics_output+="apache_http_top_uri_requests_total{uri=\"$uri\",rank=\"$rank\"} $count
+"
+ rank=$((rank + 1))
+ fi
+ done <<< "$top_uris"
+
+ # Count requests in time windows
+ local recent_requests
+ recent_requests=$(echo "$log_data" | wc -l)
+
+ metrics_output+="
+# HELP apache_http_requests_in_sample Total requests in sample window
+# TYPE apache_http_requests_in_sample gauge
+apache_http_requests_in_sample $recent_requests
+"
+
+ # Save metrics for caching
+ echo "$metrics_output" > "$STATE_DIR/log_metrics"
+ echo "$metrics_output"
+}
+
+#########################
+### Error Log Metrics ###
+#########################
+
+collect_error_log_metrics() {
+ if [[ ! -f "$ERROR_LOG" ]] || [[ ! -r "$ERROR_LOG" ]]; then
+ echo "# Error log not readable at $ERROR_LOG"
+ return
+ fi
+
+ # Count errors by level from last 1000 lines
+ local log_data
+ log_data=$(tail -n 1000 "$ERROR_LOG" 2>/dev/null || echo "")
+
+ if [[ -z "$log_data" ]]; then
+ return
+ fi
+
+ local emerg_count alert_count crit_count error_count warn_count notice_count info_count
+
+ emerg_count=$(echo "$log_data" | grep -c '\[emerg\]' 2>/dev/null) || emerg_count=0
+ alert_count=$(echo "$log_data" | grep -c '\[alert\]' 2>/dev/null) || alert_count=0
+ crit_count=$(echo "$log_data" | grep -c '\[crit\]' 2>/dev/null) || crit_count=0
+ error_count=$(echo "$log_data" | grep -c '\[error\]' 2>/dev/null) || error_count=0
+ warn_count=$(echo "$log_data" | grep -c '\[warn\]' 2>/dev/null) || warn_count=0
+ notice_count=$(echo "$log_data" | grep -c '\[notice\]' 2>/dev/null) || notice_count=0
+ info_count=$(echo "$log_data" | grep -c '\[info\]' 2>/dev/null) || info_count=0
+
+ cat </dev/null || echo "0")
+ log_mtime=$(stat -c %Y "$ERROR_LOG" 2>/dev/null || echo "0")
+ now=$(date +%s)
+ log_age=$((now - log_mtime))
+
+ cat </dev/null | grep -v '#' | awk '{print $2}' | tr -d '"' | sort -u || echo "")
+
+ if [[ -z "$cert_files" ]]; then
+ echo "# No SSL certificates found in Apache config"
+ return
+ fi
+
+ echo "# HELP apache_ssl_certificate_expiry_days Days until SSL certificate expires"
+ echo "# TYPE apache_ssl_certificate_expiry_days gauge"
+ echo "# HELP apache_ssl_certificate_expiry_timestamp Unix timestamp when certificate expires"
+ echo "# TYPE apache_ssl_certificate_expiry_timestamp gauge"
+
+ while read -r cert_file; do
+ if [[ -f "$cert_file" ]]; then
+ local expiry_date expiry_epoch now_epoch days_left cn
+
+ expiry_date=$(openssl x509 -enddate -noout -in "$cert_file" 2>/dev/null | cut -d= -f2 || echo "")
+ if [[ -n "$expiry_date" ]]; then
+ expiry_epoch=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0")
+ now_epoch=$(date +%s)
+ days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
+
+ # Get CN from certificate
+ cn=$(openssl x509 -subject -noout -in "$cert_file" 2>/dev/null | grep -oP 'CN\s*=\s*\K[^,/]+' || basename "$cert_file")
+ cn="${cn// /_}"
+
+ echo "apache_ssl_certificate_expiry_days{certificate=\"$cn\",file=\"$cert_file\"} $days_left"
+ echo "apache_ssl_certificate_expiry_timestamp{certificate=\"$cn\",file=\"$cert_file\"} $expiry_epoch"
+ fi
+ fi
+ done <<< "$cert_files"
+ return
+ fi
+
+ # Check specified domains via network
+ echo "# HELP apache_ssl_certificate_expiry_days Days until SSL certificate expires"
+ echo "# TYPE apache_ssl_certificate_expiry_days gauge"
+ echo "# HELP apache_ssl_certificate_expiry_timestamp Unix timestamp when certificate expires"
+ echo "# TYPE apache_ssl_certificate_expiry_timestamp gauge"
+
+ IFS=',' read -ra domain_array <<< "$domains"
+ for domain in "${domain_array[@]}"; do
+ domain=$(echo "$domain" | tr -d ' ')
+ if [[ -n "$domain" ]]; then
+ local expiry_date expiry_epoch now_epoch days_left
+
+ expiry_date=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2 || echo "")
+
+ if [[ -n "$expiry_date" ]]; then
+ expiry_epoch=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0")
+ now_epoch=$(date +%s)
+ days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
+
+ echo "apache_ssl_certificate_expiry_days{domain=\"$domain\"} $days_left"
+ echo "apache_ssl_certificate_expiry_timestamp{domain=\"$domain\"} $expiry_epoch"
+ else
+ echo "apache_ssl_certificate_expiry_days{domain=\"$domain\"} -1"
+ fi
+ fi
+ done
+}
+
+#########################
+### Proxy/Upstream Metrics ###
+#########################
+
+collect_upstream_metrics() {
+ # Check for proxy/balancer configurations
+ local proxy_passes
+ proxy_passes=$(grep -rh 'ProxyPass\s' "$SITES_DIR" "$CONF_D_DIR" "$APACHE_CONF" 2>/dev/null | grep -v '#' | grep -v 'ProxyPassReverse' | awk '{print $2}' | sort -u || echo "")
+
+ local balancers
+ balancers=$(grep -rhoE 'balancer://[a-zA-Z0-9_-]+' "$SITES_DIR" "$CONF_D_DIR" "$APACHE_CONF" 2>/dev/null | sort -u || echo "")
+
+ if [[ -z "$proxy_passes" ]] && [[ -z "$balancers" ]]; then
+ return
+ fi
+
+ local proxy_count=0
+ if [[ -n "$proxy_passes" ]]; then
+ proxy_count=$(echo "$proxy_passes" | wc -l)
+ fi
+
+ local balancer_count=0
+ if [[ -n "$balancers" ]]; then
+ balancer_count=$(echo "$balancers" | wc -l)
+ fi
+
+ cat </dev/null | grep -c 'BalancerMember' 2>/dev/null) || member_count=0
+ echo "apache_balancer_members_total{balancer=\"$name\"} $member_count"
+ fi
+ done <<< "$balancers"
+ fi
+}
+
+#########################
+### Version Metrics ###
+#########################
+
+collect_version_metrics() {
+ local version="unknown"
+
+ if [[ -n "$APACHE_BIN" ]]; then
+ version=$($APACHE_BIN -v 2>&1 | grep -oP 'Apache/\K[0-9.]+' || echo "unknown")
+ fi
+
+ echo "# HELP apache_version_info Apache version information"
+ echo "# TYPE apache_version_info gauge"
+ echo "apache_version_info{version=\"$version\"} 1"
+
+ # Check loaded modules
+ if [[ -n "$APACHECTL" ]]; then
+ local modules_output
+ modules_output=$($APACHECTL -M 2>/dev/null || echo "")
+
+ local has_ssl has_proxy has_proxy_http has_proxy_balancer has_rewrite
+ local has_headers has_deflate has_expires has_status has_http2
+
+ has_ssl=$(echo "$modules_output" | grep -q 'ssl_module' && echo "1" || echo "0")
+ has_proxy=$(echo "$modules_output" | grep -q 'proxy_module' && echo "1" || echo "0")
+ has_proxy_http=$(echo "$modules_output" | grep -q 'proxy_http_module' && echo "1" || echo "0")
+ has_proxy_balancer=$(echo "$modules_output" | grep -q 'proxy_balancer_module' && echo "1" || echo "0")
+ has_rewrite=$(echo "$modules_output" | grep -q 'rewrite_module' && echo "1" || echo "0")
+ has_headers=$(echo "$modules_output" | grep -q 'headers_module' && echo "1" || echo "0")
+ has_deflate=$(echo "$modules_output" | grep -q 'deflate_module' && echo "1" || echo "0")
+ has_expires=$(echo "$modules_output" | grep -q 'expires_module' && echo "1" || echo "0")
+ has_status=$(echo "$modules_output" | grep -q 'status_module' && echo "1" || echo "0")
+ has_http2=$(echo "$modules_output" | grep -q 'http2_module' && echo "1" || echo "0")
+
+ cat </dev/null || echo "0")
+ ulimit_n=$(ulimit -n 2>/dev/null || echo "0")
+
+ cat </dev/null | awk '{print $1}' || echo "0")
+
+ echo ""
+ echo "# HELP apache_system_open_files Current system-wide open files"
+ echo "# TYPE apache_system_open_files gauge"
+ echo "apache_system_open_files $open_files"
+}
+
+#########################
+### Collect All Metrics ###
+#########################
+
+collect_all_metrics() {
+ local hostname
+ hostname=$(hostname -f 2>/dev/null || hostname)
+
+ cat </dev/null || {
+ log "Server error, restarting in 5 seconds..."
+ sleep 5
+ }
+ done
+}
+
+#########################
+### Output ###
+#########################
+
+write_output() {
+ local metrics
+ metrics=$(collect_all_metrics)
+
+ if [[ -n "$OUTPUT_FILE" ]]; then
+ local tmp_file="${OUTPUT_FILE}.$$"
+ echo "$metrics" > "$tmp_file"
+ mv "$tmp_file" "$OUTPUT_FILE"
+ else
+ echo "$metrics"
+ fi
+}
+
+#########################
+### Main ###
+#########################
+
+main() {
+ if [[ "${1:-}" == "--handle-request" ]]; then
+ handle_request
+ exit 0
+ fi
+
+ parse_args "$@"
+ setup
+
+ if [[ "$HTTP_MODE" == true ]]; then
+ start_server
+ elif [[ -n "$OUTPUT_FILE" ]]; then
+ write_output
+ else
+ collect_all_metrics
+ fi
+}
+
+main "$@"
diff --git a/apache-security-auditor.sh b/apache-security-auditor.sh
new file mode 100644
index 0000000..cc07e52
--- /dev/null
+++ b/apache-security-auditor.sh
@@ -0,0 +1,658 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### apache-security-auditor.sh — Audit Apache httpd configuration for security issues####
+#### Checks server info, TLS, headers, directories, modules, and file permissions ####
+#### Requires: bash 4+, root access ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### sudo ./apache-security-auditor.sh --full ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+# ── Severity counters ────────────────────────────────────────────────
+TOTAL_CRIT=0
+TOTAL_WARN=0
+TOTAL_INFO=0
+TOTAL_OK=0
+
+flag_crit() { ((TOTAL_CRIT++)) || true; }
+flag_warn() { ((TOTAL_WARN++)) || true; }
+flag_info() { ((TOTAL_INFO++)) || true; }
+flag_ok() { ((TOTAL_OK++)) || true; }
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+APACHE_CONF=""
+APACHECTL=""
+APACHE_CONF_DIR=""
+APACHE_RUN_USER=""
+PLATFORM=""
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+
+# ── Platform detection ───────────────────────────────────────────────
+detect_platform() {
+ if [[ -n "$APACHE_CONF" ]]; then
+ if command -v apache2ctl &>/dev/null; then
+ APACHECTL="apache2ctl"
+ APACHE_CONF_DIR="$(dirname "$APACHE_CONF")"
+ APACHE_RUN_USER="www-data"
+ PLATFORM="Debian/Ubuntu (apache2)"
+ elif command -v httpd &>/dev/null; then
+ APACHECTL="httpd"
+ APACHE_CONF_DIR="$(dirname "$APACHE_CONF")"
+ APACHE_RUN_USER="apache"
+ PLATFORM="RHEL/CentOS (httpd)"
+ else
+ die "Cannot find apache2ctl or httpd"
+ fi
+ return
+ fi
+
+ if command -v apache2ctl &>/dev/null && [[ -f /etc/apache2/apache2.conf ]]; then
+ APACHECTL="apache2ctl"
+ APACHE_CONF="/etc/apache2/apache2.conf"
+ APACHE_CONF_DIR="/etc/apache2"
+ APACHE_RUN_USER="www-data"
+ PLATFORM="Debian/Ubuntu (apache2)"
+ elif command -v httpd &>/dev/null && [[ -f /etc/httpd/conf/httpd.conf ]]; then
+ APACHECTL="httpd"
+ APACHE_CONF="/etc/httpd/conf/httpd.conf"
+ APACHE_CONF_DIR="/etc/httpd"
+ APACHE_RUN_USER="apache"
+ PLATFORM="RHEL/CentOS (httpd)"
+ else
+ die "Cannot detect Apache installation — use --config to specify config path"
+ fi
+
+ verbose "Platform: ${PLATFORM}"
+ verbose "Config: ${APACHE_CONF}"
+ verbose "Config dir: ${APACHE_CONF_DIR}"
+}
+
+# ── Get all config files ─────────────────────────────────────────────
+get_config_files() {
+ local files=()
+
+ if [[ -f "$APACHE_CONF" ]]; then
+ files+=("$APACHE_CONF")
+ fi
+
+ local included
+ included=$($APACHECTL -t -D DUMP_INCLUDES 2>/dev/null | grep -oP '\(\*\) \K.*|^ *\K/.*' || true)
+ if [[ -n "$included" ]]; then
+ while IFS= read -r f; do
+ [[ -f "$f" ]] && files+=("$f")
+ done <<< "$included"
+ fi
+
+ for d in "${APACHE_CONF_DIR}/sites-enabled" "${APACHE_CONF_DIR}/conf-enabled" \
+ "${APACHE_CONF_DIR}/conf.d" "${APACHE_CONF_DIR}/conf.modules.d"; do
+ if [[ -d "$d" ]]; then
+ while IFS= read -r f; do
+ files+=("$f")
+ done < <(find "$d" -name '*.conf' -type f 2>/dev/null)
+ fi
+ done
+
+ printf '%s\n' "${files[@]}" | sort -u
+}
+
+# ── Search across all config files ───────────────────────────────────
+search_config() {
+ local pattern="$1"
+ local config_files
+ config_files=$(get_config_files)
+
+ while IFS= read -r f; do
+ [[ -z "$f" ]] && continue
+ grep -iP "$pattern" "$f" 2>/dev/null || true
+ done <<< "$config_files"
+}
+
+# ── Table header ─────────────────────────────────────────────────────
+print_table_header() {
+ printf " %-32s %-14s %s\n" "CHECK" "STATUS" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..65})"
+}
+
+# ── Table row ────────────────────────────────────────────────────────
+print_row() {
+ local check="$1" status="$2" severity="$3"
+ local color=""
+ case "$severity" in
+ CRITICAL) color="$RED"; flag_crit ;;
+ WARN) color="$YELLOW"; flag_warn ;;
+ INFO) color="$CYAN"; flag_info ;;
+ OK) color="$GREEN"; flag_ok ;;
+ esac
+ printf " %-32s %-14s %b%s%b\n" "$check" "$status" "$color" "$severity" "$RESET"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SERVER INFO AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_server_info() {
+ log "Auditing server information exposure..."
+ echo ""
+ print_table_header
+
+ # ServerTokens
+ local tokens
+ tokens=$(search_config '^\s*ServerTokens' | tail -1 | awk '{print $2}')
+ if [[ -z "$tokens" ]]; then
+ print_row "ServerTokens" "Full (default)" "CRITICAL"
+ elif [[ "${tokens,,}" == "prod" || "${tokens,,}" == "productonly" ]]; then
+ print_row "ServerTokens" "Prod" "OK"
+ elif [[ "${tokens,,}" == "major" ]]; then
+ print_row "ServerTokens" "Major" "WARN"
+ else
+ print_row "ServerTokens" "$tokens" "CRITICAL"
+ fi
+
+ # ServerSignature
+ local sig
+ sig=$(search_config '^\s*ServerSignature' | tail -1 | awk '{print $2}')
+ if [[ -z "$sig" ]]; then
+ print_row "ServerSignature" "On (default)" "CRITICAL"
+ elif [[ "${sig,,}" == "off" ]]; then
+ print_row "ServerSignature" "Off" "OK"
+ else
+ print_row "ServerSignature" "$sig" "CRITICAL"
+ fi
+
+ # TraceEnable
+ local trace
+ trace=$(search_config '^\s*TraceEnable' | tail -1 | awk '{print $2}')
+ if [[ -z "$trace" ]]; then
+ print_row "TraceEnable" "On (default)" "WARN"
+ elif [[ "${trace,,}" == "off" ]]; then
+ print_row "TraceEnable" "Off" "OK"
+ else
+ print_row "TraceEnable" "$trace" "WARN"
+ fi
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# TLS AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_tls() {
+ log "Auditing TLS configuration..."
+ echo ""
+ print_table_header
+
+ # Check if mod_ssl is loaded
+ if ! $APACHECTL -M 2>/dev/null | grep -q ssl_module; then
+ print_row "mod_ssl" "not loaded" "INFO"
+ echo ""
+ return
+ fi
+ print_row "mod_ssl" "loaded" "OK"
+
+ # SSLProtocol
+ local proto
+ proto=$(search_config '^\s*SSLProtocol' | tail -1)
+ if [[ -z "$proto" ]]; then
+ print_row "SSLProtocol" "not set (default)" "WARN"
+ else
+ if echo "$proto" | grep -iqP '(\+SSLv3|\+TLSv1\.0|\+TLSv1[^.]|[^-]TLSv1[^.23])'; then
+ print_row "SSLProtocol (legacy)" "enabled" "CRITICAL"
+ elif echo "$proto" | grep -iqP '(\+TLSv1\.1|[^-]TLSv1\.1)'; then
+ print_row "SSLProtocol (TLSv1.1)" "enabled" "CRITICAL"
+ else
+ print_row "SSLProtocol" "modern only" "OK"
+ fi
+ fi
+
+ # SSLCipherSuite
+ local ciphers
+ ciphers=$(search_config '^\s*SSLCipherSuite' | tail -1)
+ if [[ -z "$ciphers" ]]; then
+ print_row "SSLCipherSuite" "not set" "WARN"
+ else
+ print_row "SSLCipherSuite" "configured" "OK"
+ fi
+
+ # SSLHonorCipherOrder
+ local honor
+ honor=$(search_config '^\s*SSLHonorCipherOrder' | tail -1 | awk '{print $2}')
+ if [[ -z "$honor" ]]; then
+ print_row "SSLHonorCipherOrder" "not set" "WARN"
+ elif [[ "${honor,,}" == "on" ]]; then
+ print_row "SSLHonorCipherOrder" "on" "OK"
+ else
+ print_row "SSLHonorCipherOrder" "$honor" "WARN"
+ fi
+
+ # HSTS
+ local hsts
+ hsts=$(search_config 'Strict-Transport-Security')
+ if [[ -z "$hsts" ]]; then
+ print_row "HSTS Header" "missing" "WARN"
+ else
+ print_row "HSTS Header" "set" "OK"
+ fi
+
+ # OCSP Stapling
+ local ocsp
+ ocsp=$(search_config '^\s*SSLUseStapling' | tail -1 | awk '{print $2}')
+ if [[ -z "$ocsp" ]]; then
+ print_row "OCSP Stapling" "not configured" "WARN"
+ elif [[ "${ocsp,,}" == "on" ]]; then
+ print_row "OCSP Stapling" "on" "OK"
+ else
+ print_row "OCSP Stapling" "$ocsp" "WARN"
+ fi
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SECURITY HEADERS AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_headers() {
+ log "Auditing security headers..."
+ echo ""
+ print_table_header
+
+ # Check if mod_headers is loaded
+ if ! $APACHECTL -M 2>/dev/null | grep -q headers_module; then
+ print_row "mod_headers" "not loaded" "WARN"
+ echo ""
+ return
+ fi
+
+ local headers=(
+ "X-Content-Type-Options"
+ "X-Frame-Options"
+ "Content-Security-Policy"
+ "Referrer-Policy"
+ "Permissions-Policy"
+ )
+
+ for header in "${headers[@]}"; do
+ local found
+ found=$(search_config "Header.*set.*${header}")
+ if [[ -n "$found" ]]; then
+ print_row "$header" "set" "OK"
+ else
+ print_row "$header" "missing" "WARN"
+ fi
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# DIRECTORY AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_directories() {
+ log "Auditing directory and file restrictions..."
+ echo ""
+ print_table_header
+
+ # Options Indexes (enabled = bad)
+ local indexes_on
+ indexes_on=$(search_config '^\s*Options\b.*\bIndexes\b' | grep -v '\-Indexes' || true)
+ if [[ -n "$indexes_on" ]]; then
+ print_row "Options Indexes" "enabled" "WARN"
+ else
+ print_row "Options Indexes" "disabled" "OK"
+ fi
+
+ # AllowOverride All
+ local override_all
+ override_all=$(search_config '^\s*AllowOverride\s+All' || true)
+ if [[ -n "$override_all" ]]; then
+ print_row "AllowOverride" "All (permissive)" "WARN"
+ else
+ print_row "AllowOverride" "restricted" "OK"
+ fi
+
+ # Sensitive file protection (.git, .env, .htpasswd)
+ local sensitive_protection
+ sensitive_protection=$(search_config '(FilesMatch|Files|Directory).*(\\.git|\\.env|\\.htpasswd)' || true)
+ if [[ -n "$sensitive_protection" ]]; then
+ print_row "Sensitive file blocking" "configured" "OK"
+ else
+ print_row "Sensitive file blocking" "not configured" "CRITICAL"
+ fi
+
+ # Check for root directory restriction
+ local root_deny
+ root_deny=$(search_config '^\s*Require\s+all\s+denied' || true)
+ if [[ -n "$root_deny" ]]; then
+ print_row "Root directory denied" "yes" "OK"
+ else
+ print_row "Root directory denied" "not found" "WARN"
+ fi
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MODULES AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_modules() {
+ log "Auditing modules..."
+ echo ""
+ print_table_header
+
+ local loaded_modules
+ loaded_modules=$($APACHECTL -M 2>/dev/null || true)
+
+ # mod_security
+ if echo "$loaded_modules" | grep -q "security2_module"; then
+ print_row "mod_security" "loaded" "OK"
+ else
+ print_row "mod_security" "not loaded" "INFO"
+ fi
+
+ # mod_status
+ if echo "$loaded_modules" | grep -q "status_module"; then
+ local status_restricted
+ status_restricted=$(search_config '(/dev/null)
+ if [[ "$conf_perms" -le 644 ]]; then
+ print_row "Config ($APACHE_CONF)" "$conf_perms" "OK"
+ else
+ print_row "Config ($APACHE_CONF)" "$conf_perms" "WARN"
+ fi
+ fi
+
+ # .htpasswd files
+ local htpasswd_files
+ htpasswd_files=$(find "$APACHE_CONF_DIR" /var/www -name '.htpasswd' -type f 2>/dev/null || true)
+ if [[ -n "$htpasswd_files" ]]; then
+ while IFS= read -r f; do
+ local perms
+ perms=$(stat -c '%a' "$f" 2>/dev/null)
+ if [[ "$perms" -le 640 ]]; then
+ print_row ".htpasswd ($f)" "$perms" "OK"
+ else
+ print_row ".htpasswd ($f)" "$perms" "WARN"
+ fi
+ done <<< "$htpasswd_files"
+ else
+ verbose "No .htpasswd files found"
+ fi
+
+ # Document root world-writable check
+ local docroots
+ docroots=$(search_config '^\s*DocumentRoot' | awk '{print $2}' | tr -d '"' | sort -u)
+ if [[ -n "$docroots" ]]; then
+ while IFS= read -r dr; do
+ [[ -z "$dr" || ! -d "$dr" ]] && continue
+ local dr_perms
+ dr_perms=$(stat -c '%a' "$dr" 2>/dev/null)
+ if [[ "${dr_perms: -1}" -ge 6 ]]; then
+ print_row "Docroot ($dr)" "${dr_perms} (world-writable)" "CRITICAL"
+ else
+ print_row "Docroot ($dr)" "$dr_perms" "OK"
+ fi
+ done <<< "$docroots"
+ fi
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+print_summary() {
+ local elapsed
+ elapsed=$(( $(date +%s) - START_TIME ))
+
+ echo ""
+ echo " ══════════════════════════════════════════"
+ echo " Apache Security Audit Summary"
+ echo " ══════════════════════════════════════════"
+ printf " %-20s %b%d%b\n" "CRITICAL:" "$RED" "$TOTAL_CRIT" "$RESET"
+ printf " %-20s %b%d%b\n" "WARN:" "$YELLOW" "$TOTAL_WARN" "$RESET"
+ printf " %-20s %b%d%b\n" "INFO:" "$CYAN" "$TOTAL_INFO" "$RESET"
+ printf " %-20s %b%d%b\n" "OK:" "$GREEN" "$TOTAL_OK" "$RESET"
+ echo " ──────────────────────────────────────────"
+ printf " Completed in %ds\n" "$elapsed"
+ echo ""
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ echo -e " ${RED}${BOLD}Action required:${RESET} ${TOTAL_CRIT} critical finding(s)"
+ echo ""
+ echo " Top recommendations:"
+ echo " • Set ServerTokens Prod and ServerSignature Off"
+ echo " • Disable SSLv3, TLSv1, and TLSv1.1"
+ echo " • Restrict mod_status to localhost with Require ip 127.0.0.1"
+ echo " • Block access to .git, .env, and .htpasswd files"
+ echo " • Fix world-writable document root permissions"
+ echo ""
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ echo -e " ${YELLOW}Review recommended:${RESET} ${TOTAL_WARN} warning(s)"
+ echo ""
+ echo " Suggestions:"
+ echo " • Add security headers (CSP, X-Frame-Options, HSTS)"
+ echo " • Enable OCSP stapling for TLS"
+ echo " • Disable mod_info and mod_autoindex in production"
+ echo " • Set TraceEnable Off"
+ echo ""
+ else
+ echo -e " ${GREEN}All checks passed${RESET}"
+ echo ""
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat <&2
+ exit 1
+ fi
+
+ RUN_MODE="${modes[*]}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+main() {
+ parse_args "$@"
+ setup_colors
+ detect_platform
+
+ START_TIME=$(date +%s)
+
+ echo ""
+ echo -e "${BOLD}Apache Security Auditor${RESET}"
+ echo -e "Host: $(hostname)"
+ echo -e "Config: ${APACHE_CONF}"
+ echo -e "Platform: ${PLATFORM}"
+ echo -e "Mode: ${RUN_MODE}"
+ echo -e "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ echo ""
+
+ for mode in $RUN_MODE; do
+ case "$mode" in
+ server-info) audit_server_info ;;
+ tls) audit_tls ;;
+ headers) audit_headers ;;
+ directories) audit_directories ;;
+ modules) audit_modules ;;
+ permissions) audit_permissions ;;
+ esac
+ done
+
+ print_summary
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ exit 2
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ exit 1
+ fi
+ exit 0
+}
+
+main "$@"
diff --git a/apt-updates-exporter.sh b/apt-updates-exporter.sh
new file mode 100644
index 0000000..9b5c081
--- /dev/null
+++ b/apt-updates-exporter.sh
@@ -0,0 +1,405 @@
+#!/bin/bash
+
+#############################################################
+#### APT Package Updates Exporter for Prometheus ####
+#### Expose pending apt updates as Prometheus metrics ####
+#### for Debian and Ubuntu servers ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.7 ####
+#### ####
+#### Usage: ./apt-updates-exporter.sh ####
+#############################################################
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+parse_args "$@"
+
+# Configuration variables with default values
+AUTO_UPDATE_ENABLED="${AUTO_UPDATE_ENABLED:-false}" # Enable automatic package updates
+AUTO_REMOVE_ENABLED="${AUTO_REMOVE_ENABLED:-false}" # Enable automatic removal of orphaned packages
+APT_GET_CMD="${APT_GET_CMD:-/usr/bin/apt-get}" # Path to apt-get command
+AWK_CMD="${AWK_CMD:-/usr/bin/awk}" # Path to awk command
+CRON_INTERVAL="${CRON_INTERVAL:-0 0 * * *}" # Cron schedule (daily at midnight)
+GREP_CMD="${GREP_CMD:-/usr/bin/grep}" # Path to grep command
+METRICS_DIR="${METRICS_DIR:-/var/lib/node_exporter}" # Directory for Prometheus metrics files
+SORT_CMD="${SORT_CMD:-/usr/bin/sort}" # Path to sort command
+UNIQ_CMD="${UNIQ_CMD:-/usr/bin/uniq}" # Path to uniq command
+
+# File paths for tracking update state
+UPDATES_TIMESTAMP_FILE="$METRICS_DIR/updates_detected" # Tracks when updates were first detected
+WAIT_PERIOD_ENABLED="${WAIT_PERIOD_ENABLED:-true}" # Enable waiting period before auto-updates
+UPDATED_PACKAGES_FILE="$METRICS_DIR/updated_packages" # List of packages updated in last run
+AUTO_REMOVE_FILE="$METRICS_DIR/auto_remove_packages" # List of packages removed in last auto-removal
+WAIT_PERIOD_SECONDS=$((3 * 24 * 60 * 60)) # Wait period: 3 days in seconds
+
+# Safety check: prevent concurrent execution that could cause lock conflicts
+if pidof apt apt-get >/dev/null || fuser /var/lib/dpkg/lock /var/lib/apt/lists/lock /var/cache/apt/archives/lock >/dev/null 2>&1; then
+ echo "node_upgrades_pending{origin=\"error\",arch=\"unknown\"} -1"
+ echo "node_upgradelist{pkgname=\"error\", update_version=\"\", current_version=\"\", origin=\"\"} -1"
+ echo "node_auto_updates{status=\"error\"} -1"
+ exit 1
+fi
+
+#### Setup: Ensure metrics directory exists with proper permissions ####
+if [[ ! -d "$METRICS_DIR" ]]; then
+ # Create metrics directory
+ mkdir -p "$METRICS_DIR" || {
+ echo "Failed to create $METRICS_DIR"
+ exit 1
+ }
+ # Set ownership to prometheus user (try different formats for compatibility)
+ chown prometheus:prometheus "$METRICS_DIR" 2>/dev/null || chown prometheus. "$METRICS_DIR" || {
+ echo "Failed to set ownership of $METRICS_DIR"
+ exit 1
+ }
+ # Set appropriate permissions for metrics directory
+ chmod 755 "$METRICS_DIR" || {
+ echo "Failed to set permissions on $METRICS_DIR"
+ exit 1
+ }
+fi
+
+#### Setup: Ensure cron job exists for automated execution ####
+if ! crontab -l | grep -q "updates.sh"; then
+ # Add cron job to run this script automatically
+ echo -e "$(crontab -u root -l)\n$CRON_INTERVAL /usr/local/bin/updates.sh > $METRICS_DIR/updates.prom 2>&1" | crontab -u root -
+ # Verify the cron job was added successfully
+ crontab -l | grep -q "updates.sh" || {
+ echo "Failed to add cron job"
+ exit 1
+ }
+fi
+
+#### Setup: Ensure logrotate configuration exists for state files ####
+LOGROTATE_CONFIG="/etc/logrotate.d/node-exporter-metrics"
+if [[ ! -f "$LOGROTATE_CONFIG" ]]; then
+ # Create logrotate configuration for monthly rotation of state files
+ cat >"$LOGROTATE_CONFIG" </dev/null || true
+ endscript
+}
+EOF
+ # Verify the logrotate configuration is valid
+ logrotate -d "$LOGROTATE_CONFIG" >/dev/null 2>&1 || {
+ echo "Failed to create valid logrotate configuration"
+ rm -f "$LOGROTATE_CONFIG"
+ exit 1
+ }
+fi
+
+#### Function: Count pending package upgrades grouped by origin and architecture ####
+get_upgrades() {
+ # Test apt-get upgrade command and exit on failure
+ if ! $APT_GET_CMD -qq --just-print upgrade >/dev/null 2>&1; then
+ echo "node_upgrades_pending{origin=\"error\",arch=\"unknown\"} -1"
+ return 1
+ fi
+
+ # Parse apt-get output to extract package info to create Prometheus metrics
+ $APT_GET_CMD -qq --just-print upgrade |
+ $AWK_CMD -F '[()]' '/^Inst/ {
+ sub("^[^ ]+ ", "", $2) # Remove package name from origin field
+ gsub(" ","",$2) # Remove spaces from origin
+ sub(/\[|\]/, " ", $2) # Replace brackets with space
+ print $2
+ }' |
+ $SORT_CMD | # Sort the output
+ $UNIQ_CMD -c | # Count unique entries
+ $AWK_CMD '{
+ gsub(/\\\\/, "\\\\", $2) # Escape backslashes for Prometheus labels
+ gsub(/\\/, "\\\\", $2)
+ gsub(/"/, "\\\"", $2) # Escape quotes for Prometheus labels
+ gsub(/\[|\]/, "", $3) # Remove brackets from architecture
+ printf "node_upgrades_pending{origin=\"%s\",arch=\"%s\"} %d\n", $2, $3, $1
+ }'
+}
+
+#### Function: Handle automatic package updates with optional wait period ####
+handle_auto_updates() {
+ # Skip if auto-updates are disabled
+ [[ "$AUTO_UPDATE_ENABLED" != "true" ]] && return
+
+ local should_update=false
+
+ # Check if we should wait before updating (prevents immediate updates on detection)
+ if [[ "$WAIT_PERIOD_ENABLED" == "true" ]]; then
+ local current_time detected_time
+ current_time=$(date +%s)
+ detected_time=$(cat "$UPDATES_TIMESTAMP_FILE" 2>/dev/null || echo "0")
+ # Only update if wait period has elapsed
+ ((current_time - detected_time >= WAIT_PERIOD_SECONDS)) && should_update=true
+ else
+ # Update immediately if wait period is disabled
+ should_update=true
+ fi
+
+ if [[ "$should_update" == "true" ]]; then
+ perform_auto_update
+ # Clear timestamp file after updating (reset wait period)
+ [[ "$WAIT_PERIOD_ENABLED" == "true" ]] && rm -f "$UPDATES_TIMESTAMP_FILE"
+ fi
+}
+
+#### Function: Execute automatic package updates and record metrics ####
+perform_auto_update() {
+ # Output Prometheus metric headers
+ echo '# HELP node_auto_updates Number of packages auto-updated.'
+ echo '# TYPE node_auto_updates gauge'
+
+ local update_output update_count
+ # Run apt update and upgrade non-interactively with timeout, capture output
+ update_output=$(timeout 300 bash -c "DEBIAN_FRONTEND=noninteractive $APT_GET_CMD update >/dev/null 2>&1 && DEBIAN_FRONTEND=noninteractive $APT_GET_CMD -y -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' upgrade 2>&1" || echo "TIMEOUT_ERROR")
+ # Check for timeout or other errors
+ if [[ "$update_output" == *"TIMEOUT_ERROR"* ]]; then
+ echo "node_auto_updates{status=\"timeout\"} -1"
+ return 1
+ fi
+
+ # Count number of packages that were actually unpacked/updated
+ update_count=$(echo "$update_output" | grep -c "^Unpacking ")
+
+ echo "node_auto_updates{status=\"success\"} $update_count"
+ # Save list of updated packages with versions for reporting
+ echo "$update_output" | grep "^Unpacking " | awk '{gsub(/[()]/, "", $3); print $2 " " $3}' >"$UPDATED_PACKAGES_FILE"
+}
+
+#### Function: Generate detailed list of individual packages available for upgrade ####
+get_upgrade_list() {
+ # Test apt-get upgrade command and handle failures
+ if ! $APT_GET_CMD --just-print upgrade >/dev/null 2>&1; then
+ echo 'node_upgradelist{pkgname="error", update_version="", current_version="", origin=""} -1'
+ return 1
+ fi
+
+ # Parse each package installation line to extract detailed package information
+ $APT_GET_CMD --just-print upgrade |
+ $GREP_CMD Inst | # Filter for installation lines
+ $AWK_CMD '{
+ gsub(/\(|\)/, "", $4) # Remove parentheses from version
+ gsub(/:/, ".", $4) # Replace colons with dots in version
+ gsub(/\[|\]/, "", $3) # Remove brackets from current version
+ gsub(/:/, " ", $5) # Replace colons with spaces in origin
+
+ # Escape special characters for Prometheus label values
+ gsub(/\\/, "\\\\", $2) # Escape backslashes in package name
+ gsub(/"/, "\\\"", $2) # Escape quotes in package name
+ gsub(/\\/, "\\\\", $4) # Escape backslashes in version
+ gsub(/"/, "\\\"", $4) # Escape quotes in version
+
+ # Output Prometheus metric with package details
+ printf "node_upgradelist{pkgname=\"%s\",update_version=\"%s\", current_version=\"%s\", origin=\"%s\"} 1\n", $2, $4, $3, $5
+ }'
+}
+
+#### Function: Get list of packages that can be automatically removed (orphaned) ####
+get_auto_remove_list() {
+ # Test autoremove command with dry-run to see what would be removed
+ if ! $APT_GET_CMD --dry-run autoremove >/dev/null 2>&1; then
+ echo 'node_autoremove_packages{pkgname="error"} -1'
+ return 1
+ fi
+
+ # Parse dry-run output to find packages that would be removed
+ $APT_GET_CMD --dry-run autoremove 2>/dev/null |
+ $GREP_CMD "Remv" | # Filter for removal lines
+ $AWK_CMD '{
+ # Escape special characters for Prometheus labels
+ gsub(/\\/, "\\\\", $2) # Escape backslashes in package name
+ gsub(/"/, "\\\"", $2) # Escape quotes in package name
+ printf "node_autoremove_packages{pkgname=\"%s\"} 1\n", $2
+ }'
+}
+
+#### Function: Handle automatic removal of orphaned packages ####
+handle_auto_remove() {
+ # Skip if auto-remove is disabled
+ [[ "$AUTO_REMOVE_ENABLED" != "true" ]] && return
+
+ perform_auto_remove
+}
+
+#### Function: Execute automatic package removal and record metrics ####
+perform_auto_remove() {
+ # Output Prometheus metric headers
+ echo '# HELP node_auto_remove Number of packages auto-removed.'
+ echo '# TYPE node_auto_remove gauge'
+
+ local remove_output remove_count
+ # Run autoremove non-interactively and capture output
+ remove_output=$(DEBIAN_FRONTEND=noninteractive $APT_GET_CMD -y autoremove 2>&1)
+ # Count packages that were actually removed
+ remove_count=$(echo "$remove_output" | grep -c "^Removing ")
+
+ echo "node_auto_remove{status=\"success\"} $remove_count"
+ # Save list of removed packages for reporting
+ echo "$remove_output" | grep "^Removing " | awk '{print $2}' >"$AUTO_REMOVE_FILE"
+}
+
+#### Generate all Prometheus metrics ####
+generate_metrics() {
+ #### Upgrade list metrics ####
+ upgradelist=$(get_upgrade_list)
+ echo '# HELP node_upgradelist List of packages for upgrade'
+ echo '# TYPE node_upgradelist gauge'
+ if [[ -n "${upgradelist}" ]]; then
+ echo "${upgradelist}"
+ else
+ echo 'node_upgradelist{pkgname="", update_version="", current_version="", origin=""} 0'
+ fi
+
+ #### Pending upgrades metrics and auto-updates ####
+ pending_upgrades=$(get_upgrades)
+ echo '# HELP node_upgrades_pending Apt package pending updates by origin.'
+ echo '# TYPE node_upgrades_pending gauge'
+
+ if [[ -n "$pending_upgrades" ]]; then
+ printf "%s\n" "$pending_upgrades"
+
+ if [[ ! -f "$UPDATES_TIMESTAMP_FILE" ]]; then
+ date +%s >"$UPDATES_TIMESTAMP_FILE"
+ fi
+
+ handle_auto_updates
+ else
+ echo 'node_upgrades_pending{origin="", arch=""} 0'
+ echo '# HELP node_auto_updates Number of packages auto-updated.'
+ echo '# TYPE node_auto_updates gauge'
+ echo 'node_auto_updates{status="success"} 0'
+ rm -f "$UPDATES_TIMESTAMP_FILE"
+ fi
+
+ #### Auto-removable packages metrics ####
+ autoremovelist=$(get_auto_remove_list)
+ echo '# HELP node_autoremove_packages List of packages available for auto-removal'
+ echo '# TYPE node_autoremove_packages gauge'
+ if [[ -n "${autoremovelist}" ]]; then
+ echo "${autoremovelist}"
+ handle_auto_remove
+ else
+ echo 'node_autoremove_packages{pkgname=""} 0'
+ fi
+
+ #### Packages updated in the last run ####
+ if [[ -f "$UPDATED_PACKAGES_FILE" ]]; then
+ echo '# HELP node_updated_packages List of packages updated in last update'
+ echo '# TYPE node_updated_packages gauge'
+ while IFS=' ' read -r package version; do
+ echo "node_updated_packages{package=\"$package\",version=\"$version\"} 1"
+ done <"$UPDATED_PACKAGES_FILE"
+ fi
+
+ #### Packages removed in the last auto-removal ####
+ if [[ -f "$AUTO_REMOVE_FILE" ]]; then
+ echo '# HELP node_removed_packages List of packages removed in last auto-removal'
+ echo '# TYPE node_removed_packages gauge'
+ while IFS= read -r package; do
+ echo "node_removed_packages{package=\"$package\"} 1"
+ done <"$AUTO_REMOVE_FILE"
+ fi
+
+ #### Reboot required check ####
+ echo '# HELP node_reboot_required Node reboot is required for software updates.'
+ echo '# TYPE node_reboot_required gauge'
+ if [[ -f '/run/reboot-required' ]]; then
+ echo 'node_reboot_required 1'
+ else
+ echo 'node_reboot_required 0'
+ fi
+}
+
+#### Main execution ####
+if [[ -n "$OUTPUT_FILE" ]]; then
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ temp_file=$(mktemp "${output_dir}/.apt_updates_metrics.XXXXXX")
+
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+ if [[ "$file_lines" -lt 5 ]]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ chmod 644 "$temp_file"
+ mv -f "$temp_file" "$OUTPUT_FILE"
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+else
+ generate_metrics
+fi
diff --git a/artifactory-exporter.sh b/artifactory-exporter.sh
new file mode 100755
index 0000000..79011fe
--- /dev/null
+++ b/artifactory-exporter.sh
@@ -0,0 +1,538 @@
+#!/usr/bin/env bash
+#
+# Artifactory Prometheus Metrics Exporter
+#
+# Prometheus textfile collector exporter for JFrog Artifactory.
+# Uses the Artifactory REST API to collect storage per repo, artifact
+# counts, HTTP request stats, GC metrics, DB connections, JVM heap,
+# and system health.
+#
+# Usage:
+# ARTIFACTORY_URL="https://artifactory.example.com" ARTIFACTORY_TOKEN="cmVmd..." ./artifactory-exporter.sh
+# ARTIFACTORY_URL="https://artifactory.example.com" ARTIFACTORY_TOKEN="cmVmd..." ./artifactory-exporter.sh --textfile
+# ARTIFACTORY_URL="https://artifactory.example.com" ARTIFACTORY_TOKEN="cmVmd..." ./artifactory-exporter.sh --install
+#
+# Parameters:
+# --textfile Write to textfile collector directory
+# --install Create cron job for automatic collection
+# --help Show usage
+#
+# Environment:
+# ARTIFACTORY_URL Artifactory base URL (required)
+# ARTIFACTORY_TOKEN API token or access token (required)
+# TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter/textfile_collector)
+# CURL_TIMEOUT API request timeout in seconds (default: 10)
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+# Version: 1.0
+#
+# Metrics Exported:
+# Core:
+# - artifactory_up
+# - artifactory_exporter_info{version}
+# - artifactory_health_status
+#
+# Storage (per-repo):
+# - artifactory_repo_used_bytes{repo,type}
+# - artifactory_repo_artifact_count{repo,type}
+# - artifactory_repo_folder_count{repo,type}
+#
+# Storage (totals):
+# - artifactory_storage_total_bytes
+# - artifactory_storage_used_bytes
+# - artifactory_storage_free_bytes
+# - artifactory_storage_binaries_count
+# - artifactory_storage_binaries_total_bytes
+# - artifactory_storage_optimization_percent
+#
+# JVM:
+# - artifactory_jvm_heap_used_bytes
+# - artifactory_jvm_heap_max_bytes
+# - artifactory_jvm_heap_free_bytes
+# - artifactory_jvm_nonheap_used_bytes
+#
+# Database:
+# - artifactory_db_pool_active
+# - artifactory_db_pool_idle
+# - artifactory_db_pool_max
+#
+# HTTP:
+# - artifactory_http_requests_total{status}
+#
+# Garbage Collection:
+# - artifactory_gc_duration_seconds
+# - artifactory_gc_freed_bytes
+# - artifactory_gc_last_run_timestamp
+#
+# Exporter:
+# - artifactory_exporter_duration_seconds
+# - artifactory_exporter_last_run_timestamp
+
+set -euo pipefail
+
+# --- Configuration ---
+readonly VERSION="1.0"
+readonly SCRIPT_NAME="$(basename "$0")"
+ARTIFACTORY_URL="${ARTIFACTORY_URL:-}"
+ARTIFACTORY_TOKEN="${ARTIFACTORY_TOKEN:-}"
+TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}"
+CURL_TIMEOUT="${CURL_TIMEOUT:-10}"
+TEXTFILE_MODE=false
+OUTPUT=""
+START_TIME=""
+
+# --- Functions ---
+
+usage() {
+ cat </dev/null; then
+ missing+=("$cmd")
+ fi
+ done
+ if [[ ${#missing[@]} -gt 0 ]]; then
+ echo "ERROR: Missing required commands: ${missing[*]}" >&2
+ echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2
+ exit 1
+ fi
+}
+
+validate_config() {
+ if [[ -z "$ARTIFACTORY_URL" ]]; then
+ echo "ERROR: ARTIFACTORY_URL environment variable is required" >&2
+ exit 1
+ fi
+ if [[ -z "$ARTIFACTORY_TOKEN" ]]; then
+ echo "ERROR: ARTIFACTORY_TOKEN environment variable is required" >&2
+ exit 1
+ fi
+ # Strip trailing slash
+ ARTIFACTORY_URL="${ARTIFACTORY_URL%/}"
+}
+
+api_get() {
+ local endpoint="$1"
+ curl -sf --max-time "$CURL_TIMEOUT" \
+ -H "Authorization: Bearer ${ARTIFACTORY_TOKEN}" \
+ "${ARTIFACTORY_URL}${endpoint}" 2>/dev/null || echo ""
+}
+
+add_metric() {
+ local name="$1"
+ local type="$2"
+ local help="$3"
+ local value="$4"
+ local labels="${5:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name} ${value}
+"
+ fi
+}
+
+add_metric_value() {
+ local name="$1"
+ local value="$2"
+ local labels="${3:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="${name} ${value}
+"
+ fi
+}
+
+# Convert Artifactory human-readable size strings to bytes.
+# Artifactory returns storage sizes as "1.23 GB", "456.78 MB", etc.
+parse_size_to_bytes() {
+ local size_str="$1"
+
+ if [[ -z "$size_str" || "$size_str" == "null" ]]; then
+ echo "0"
+ return
+ fi
+
+ local number unit
+ number=$(echo "$size_str" | grep -oP '[\d.]+' | head -1)
+ unit=$(echo "$size_str" | grep -oP '[A-Za-z]+' | head -1)
+
+ if [[ -z "$number" ]]; then
+ echo "0"
+ return
+ fi
+
+ case "${unit^^}" in
+ BYTES|B)
+ echo "$number" | awk '{printf "%.0f", $1}' ;;
+ KB)
+ echo "$number" | awk '{printf "%.0f", $1 * 1024}' ;;
+ MB)
+ echo "$number" | awk '{printf "%.0f", $1 * 1048576}' ;;
+ GB)
+ echo "$number" | awk '{printf "%.0f", $1 * 1073741824}' ;;
+ TB)
+ echo "$number" | awk '{printf "%.0f", $1 * 1099511627776}' ;;
+ *)
+ echo "$number" | awk '{printf "%.0f", $1}' ;;
+ esac
+}
+
+# Parse percentage string like "85.43%" to a float.
+parse_percent() {
+ local pct_str="$1"
+
+ if [[ -z "$pct_str" || "$pct_str" == "null" ]]; then
+ echo "0"
+ return
+ fi
+
+ echo "$pct_str" | grep -oP '[\d.]+' | head -1 || echo "0"
+}
+
+collect_health() {
+ # Simple ping check
+ local ping_result
+ ping_result=$(api_get "/api/system/ping")
+
+ if [[ -z "$ping_result" || "$ping_result" != "OK" ]]; then
+ add_metric "artifactory_up" "gauge" "Artifactory reachability (1=up, 0=down)" "0"
+ return 1
+ fi
+
+ add_metric "artifactory_up" "gauge" "Artifactory reachability (1=up, 0=down)" "1"
+
+ # Detailed health check via router API
+ local health_json
+ health_json=$(api_get "/router/api/v1/system/health")
+
+ if [[ -n "$health_json" ]]; then
+ local node_state
+ node_state=$(echo "$health_json" | jq -r '.node_state // .services[0].state // empty' 2>/dev/null)
+
+ if [[ "$node_state" == "HEALTHY" ]]; then
+ add_metric "artifactory_health_status" "gauge" "System health (1=healthy, 0=unhealthy)" "1"
+ else
+ add_metric "artifactory_health_status" "gauge" "System health (1=healthy, 0=unhealthy)" "0"
+ fi
+ else
+ # Ping succeeded so system is at least partially healthy
+ add_metric "artifactory_health_status" "gauge" "System health (1=healthy, 0=unhealthy)" "1"
+ fi
+
+ return 0
+}
+
+collect_storage() {
+ local storage_json
+ storage_json=$(api_get "/api/storageinfo")
+
+ if [[ -z "$storage_json" ]]; then
+ return
+ fi
+
+ # --- Total storage summary ---
+ local total_space used_space free_space
+ total_space=$(echo "$storage_json" | jq -r '.fileStoreSummary.totalSpace // empty' 2>/dev/null)
+ used_space=$(echo "$storage_json" | jq -r '.fileStoreSummary.usedSpace // empty' 2>/dev/null)
+ free_space=$(echo "$storage_json" | jq -r '.fileStoreSummary.freeSpace // empty' 2>/dev/null)
+
+ [[ -n "$total_space" ]] && add_metric "artifactory_storage_total_bytes" "gauge" "Total file store capacity in bytes" "$(parse_size_to_bytes "$total_space")"
+ [[ -n "$used_space" ]] && add_metric "artifactory_storage_used_bytes" "gauge" "Used file store space in bytes" "$(parse_size_to_bytes "$used_space")"
+ [[ -n "$free_space" ]] && add_metric "artifactory_storage_free_bytes" "gauge" "Free file store space in bytes" "$(parse_size_to_bytes "$free_space")"
+
+ # --- Binaries summary ---
+ local binaries_count binaries_size optimization
+ binaries_count=$(echo "$storage_json" | jq -r '.binariesSummary.binariesCount // empty' 2>/dev/null)
+ binaries_size=$(echo "$storage_json" | jq -r '.binariesSummary.binariesSize // empty' 2>/dev/null)
+ optimization=$(echo "$storage_json" | jq -r '.binariesSummary.optimization // empty' 2>/dev/null)
+
+ if [[ -n "$binaries_count" ]]; then
+ local clean_count
+ clean_count=$(echo "$binaries_count" | tr -d ',')
+ add_metric "artifactory_storage_binaries_count" "gauge" "Total number of binaries stored" "$clean_count"
+ fi
+ [[ -n "$binaries_size" ]] && add_metric "artifactory_storage_binaries_total_bytes" "gauge" "Total size of binaries in bytes" "$(parse_size_to_bytes "$binaries_size")"
+ [[ -n "$optimization" ]] && add_metric "artifactory_storage_optimization_percent" "gauge" "Storage optimization percentage" "$(parse_percent "$optimization")"
+
+ # --- Per-repository metrics ---
+ local repo_count
+ repo_count=$(echo "$storage_json" | jq -r '.repositoriesSummaryList | length // 0' 2>/dev/null)
+
+ if [[ "$repo_count" -gt 0 ]]; then
+ # Extract repo data as tab-separated lines: key, type, usedSpace, filesCount, foldersCount
+ local repo_lines
+ repo_lines=$(echo "$storage_json" | jq -r '
+ .repositoriesSummaryList[]
+ | select(.repoKey != "TOTAL")
+ | [.repoKey, (.repoType // "UNKNOWN"), (.usedSpace // "0 bytes"), (.filesCount // 0), (.foldersCount // 0)]
+ | @tsv
+ ' 2>/dev/null)
+
+ if [[ -n "$repo_lines" ]]; then
+ OUTPUT+="# HELP artifactory_repo_used_bytes Repository used space in bytes
+# TYPE artifactory_repo_used_bytes gauge
+"
+ while IFS=$'\t' read -r repo_key repo_type repo_used files_count folders_count; do
+ local repo_bytes
+ repo_bytes=$(parse_size_to_bytes "$repo_used")
+ add_metric_value "artifactory_repo_used_bytes" "$repo_bytes" "repo=\"${repo_key}\",type=\"${repo_type}\""
+ done <<< "$repo_lines"
+
+ OUTPUT+="# HELP artifactory_repo_artifact_count Number of artifacts in repository
+# TYPE artifactory_repo_artifact_count gauge
+"
+ while IFS=$'\t' read -r repo_key repo_type repo_used files_count folders_count; do
+ add_metric_value "artifactory_repo_artifact_count" "$files_count" "repo=\"${repo_key}\",type=\"${repo_type}\""
+ done <<< "$repo_lines"
+
+ OUTPUT+="# HELP artifactory_repo_folder_count Number of folders in repository
+# TYPE artifactory_repo_folder_count gauge
+"
+ while IFS=$'\t' read -r repo_key repo_type repo_used files_count folders_count; do
+ add_metric_value "artifactory_repo_folder_count" "$folders_count" "repo=\"${repo_key}\",type=\"${repo_type}\""
+ done <<< "$repo_lines"
+ fi
+ fi
+}
+
+collect_system_info() {
+ # Try the open metrics endpoint first (Artifactory 7.x+)
+ local metrics_text
+ metrics_text=$(api_get "/api/v1/system/metrics")
+
+ if [[ -n "$metrics_text" ]]; then
+ # Parse JVM heap from open metrics format
+ local heap_used heap_max heap_free nonheap_used
+ heap_used=$(echo "$metrics_text" | grep -m1 'jvm_memory_used_bytes.*area="heap"' | grep -oP '[\d.]+$' || true)
+ heap_max=$(echo "$metrics_text" | grep -m1 'jvm_memory_max_bytes.*area="heap"' | grep -oP '[\d.]+$' || true)
+ heap_free=$(echo "$metrics_text" | grep -m1 'jvm_memory_committed_bytes.*area="heap"' | grep -oP '[\d.]+$' || true)
+ nonheap_used=$(echo "$metrics_text" | grep -m1 'jvm_memory_used_bytes.*area="nonheap"' | grep -oP '[\d.]+$' || true)
+
+ [[ -n "$heap_used" ]] && add_metric "artifactory_jvm_heap_used_bytes" "gauge" "JVM heap memory used" "${heap_used%.*}"
+ [[ -n "$heap_max" ]] && add_metric "artifactory_jvm_heap_max_bytes" "gauge" "JVM heap memory maximum" "${heap_max%.*}"
+ if [[ -n "$heap_free" && -n "$heap_used" ]]; then
+ local free_calc
+ free_calc=$(echo "$heap_free $heap_used" | awk '{printf "%.0f", $1 - $2}')
+ add_metric "artifactory_jvm_heap_free_bytes" "gauge" "JVM heap memory free" "$free_calc"
+ fi
+ [[ -n "$nonheap_used" ]] && add_metric "artifactory_jvm_nonheap_used_bytes" "gauge" "JVM non-heap memory used" "${nonheap_used%.*}"
+
+ # Parse DB pool from open metrics
+ local db_active db_idle db_max
+ db_active=$(echo "$metrics_text" | grep -m1 'db_pool_active_connections' | grep -oP '[\d.]+$' || true)
+ db_idle=$(echo "$metrics_text" | grep -m1 'db_pool_idle_connections' | grep -oP '[\d.]+$' || true)
+ db_max=$(echo "$metrics_text" | grep -m1 'db_pool_max_connections' | grep -oP '[\d.]+$' || true)
+
+ [[ -n "$db_active" ]] && add_metric "artifactory_db_pool_active" "gauge" "Active database connections" "${db_active%.*}"
+ [[ -n "$db_idle" ]] && add_metric "artifactory_db_pool_idle" "gauge" "Idle database connections" "${db_idle%.*}"
+ [[ -n "$db_max" ]] && add_metric "artifactory_db_pool_max" "gauge" "Maximum database connections" "${db_max%.*}"
+
+ return
+ fi
+
+ # Fallback: use system info endpoint (older Artifactory)
+ local info_json
+ info_json=$(api_get "/api/system/info")
+
+ if [[ -z "$info_json" ]]; then
+ return
+ fi
+
+ local heap_used_str heap_max_str heap_free_str
+ heap_used_str=$(echo "$info_json" | jq -r '.["jvm.heap.used"] // empty' 2>/dev/null)
+ heap_max_str=$(echo "$info_json" | jq -r '.["jvm.heap.max"] // empty' 2>/dev/null)
+ heap_free_str=$(echo "$info_json" | jq -r '.["jvm.heap.free"] // empty' 2>/dev/null)
+
+ [[ -n "$heap_used_str" ]] && add_metric "artifactory_jvm_heap_used_bytes" "gauge" "JVM heap memory used" "$(parse_size_to_bytes "$heap_used_str")"
+ [[ -n "$heap_max_str" ]] && add_metric "artifactory_jvm_heap_max_bytes" "gauge" "JVM heap memory maximum" "$(parse_size_to_bytes "$heap_max_str")"
+ [[ -n "$heap_free_str" ]] && add_metric "artifactory_jvm_heap_free_bytes" "gauge" "JVM heap memory free" "$(parse_size_to_bytes "$heap_free_str")"
+
+ local db_active db_max
+ db_active=$(echo "$info_json" | jq -r '.["db.pool.active"] // empty' 2>/dev/null)
+ db_max=$(echo "$info_json" | jq -r '.["db.pool.max"] // empty' 2>/dev/null)
+
+ [[ -n "$db_active" ]] && add_metric "artifactory_db_pool_active" "gauge" "Active database connections" "$db_active"
+ [[ -n "$db_max" ]] && add_metric "artifactory_db_pool_max" "gauge" "Maximum database connections" "$db_max"
+}
+
+collect_http_stats() {
+ # Try open metrics endpoint for HTTP stats (Artifactory 7.x+)
+ local metrics_text
+ metrics_text=$(api_get "/api/v1/system/metrics")
+
+ if [[ -n "$metrics_text" ]]; then
+ local http_2xx http_3xx http_4xx http_5xx
+ http_2xx=$(echo "$metrics_text" | grep -m1 'http_response_total.*status="2xx"' | grep -oP '[\d.]+$' || true)
+ http_3xx=$(echo "$metrics_text" | grep -m1 'http_response_total.*status="3xx"' | grep -oP '[\d.]+$' || true)
+ http_4xx=$(echo "$metrics_text" | grep -m1 'http_response_total.*status="4xx"' | grep -oP '[\d.]+$' || true)
+ http_5xx=$(echo "$metrics_text" | grep -m1 'http_response_total.*status="5xx"' | grep -oP '[\d.]+$' || true)
+
+ OUTPUT+="# HELP artifactory_http_requests_total Total HTTP requests by status class
+# TYPE artifactory_http_requests_total counter
+"
+ [[ -n "$http_2xx" ]] && add_metric_value "artifactory_http_requests_total" "${http_2xx%.*}" 'status="2xx"'
+ [[ -n "$http_3xx" ]] && add_metric_value "artifactory_http_requests_total" "${http_3xx%.*}" 'status="3xx"'
+ [[ -n "$http_4xx" ]] && add_metric_value "artifactory_http_requests_total" "${http_4xx%.*}" 'status="4xx"'
+ [[ -n "$http_5xx" ]] && add_metric_value "artifactory_http_requests_total" "${http_5xx%.*}" 'status="5xx"'
+ fi
+}
+
+collect_gc_info() {
+ local gc_json
+ gc_json=$(api_get "/api/system/storage/gc")
+
+ if [[ -z "$gc_json" ]]; then
+ return
+ fi
+
+ # Duration in milliseconds
+ local gc_duration_ms
+ gc_duration_ms=$(echo "$gc_json" | jq -r '.gcDurationMillis // empty' 2>/dev/null)
+
+ if [[ -n "$gc_duration_ms" ]]; then
+ local gc_duration_secs
+ gc_duration_secs=$(echo "$gc_duration_ms" | awk '{printf "%.3f", $1 / 1000}')
+ add_metric "artifactory_gc_duration_seconds" "gauge" "Duration of last garbage collection in seconds" "$gc_duration_secs"
+ fi
+
+ # Freed space
+ local gc_freed_size
+ gc_freed_size=$(echo "$gc_json" | jq -r '.freedSpace // empty' 2>/dev/null)
+
+ if [[ -n "$gc_freed_size" ]]; then
+ local gc_freed_bytes
+ gc_freed_bytes=$(parse_size_to_bytes "$gc_freed_size")
+ add_metric "artifactory_gc_freed_bytes" "gauge" "Bytes freed by last garbage collection" "$gc_freed_bytes"
+ fi
+
+ # Last run timestamp
+ local gc_time
+ gc_time=$(echo "$gc_json" | jq -r '.gcTime // empty' 2>/dev/null)
+
+ if [[ -n "$gc_time" ]]; then
+ # Try to convert ISO timestamp to epoch
+ local gc_epoch
+ gc_epoch=$(date -d "$gc_time" +%s 2>/dev/null || echo "")
+ if [[ -n "$gc_epoch" ]]; then
+ add_metric "artifactory_gc_last_run_timestamp" "gauge" "Unix timestamp of last garbage collection" "$gc_epoch"
+ fi
+ fi
+}
+
+write_output() {
+ if [[ "$TEXTFILE_MODE" == true ]]; then
+ local output_file="${TEXTFILE_DIR}/artifactory.prom"
+ local temp_file="${output_file}.$$"
+
+ mkdir -p "$TEXTFILE_DIR"
+ echo "$OUTPUT" > "$temp_file"
+ mv "$temp_file" "$output_file"
+ else
+ echo "$OUTPUT"
+ fi
+}
+
+install_cron() {
+ if [[ $EUID -ne 0 ]]; then
+ echo "ERROR: --install requires root" >&2
+ exit 1
+ fi
+
+ local script_path
+ script_path=$(readlink -f "$0")
+
+ cat > /etc/cron.d/artifactory-exporter </dev/null
+EOF
+
+ chmod 644 /etc/cron.d/artifactory-exporter
+ echo "Installed cron job: /etc/cron.d/artifactory-exporter"
+ echo "Metrics will be written to: ${TEXTFILE_DIR}/artifactory.prom"
+}
+
+# --- Main ---
+
+main() {
+ # Parse arguments
+ for arg in "$@"; do
+ case "$arg" in
+ --textfile) TEXTFILE_MODE=true ;;
+ --install)
+ check_dependencies
+ validate_config
+ install_cron
+ exit 0
+ ;;
+ --help|-h) usage ;;
+ *) echo "Unknown option: $arg" >&2; usage ;;
+ esac
+ done
+
+ check_dependencies
+ validate_config
+
+ START_TIME=$(date +%s%N)
+
+ # Exporter info
+ add_metric "artifactory_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\""
+
+ # Collect metrics
+ if collect_health; then
+ collect_storage
+ collect_system_info
+ collect_http_stats
+ collect_gc_info
+ fi
+
+ # Exporter performance
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ add_metric "artifactory_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration"
+ add_metric "artifactory_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)"
+
+ write_output
+}
+
+main "$@"
diff --git a/audit-log-analyzer.sh b/audit-log-analyzer.sh
new file mode 100644
index 0000000..6d0b17c
--- /dev/null
+++ b/audit-log-analyzer.sh
@@ -0,0 +1,575 @@
+#!/bin/bash
+
+#############################################################
+#### Audit Log Analyzer Script for SELinux and AppArmor ####
+#### Parses denial logs and suggests fix commands ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### To use this script chmod it to 755 ####
+#### or simply type bash ####
+#############################################################
+
+# ── Colors ────────────────────────────────────────────────
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m' # No Color
+
+# ── Defaults ──────────────────────────────────────────────
+MODE="recent"
+OUTPUT_FILE=""
+QUIET=0
+TOTAL_DENIALS=0
+UNIQUE_TYPES=0
+SUGGESTED_FIXES=0
+
+# ── Functions ─────────────────────────────────────────────
+
+usage() {
+ echo -e "${BOLD}Audit Log Analyzer — SELinux & AppArmor${NC}"
+ echo ""
+ echo "Usage: $0 [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --help Show this help message"
+ echo " --recent Analyze denials from the last hour only (default)"
+ echo " --all Analyze all denials in the log"
+ echo " --output FILE Save suggested fixes to FILE"
+ echo " --quiet Show suggestions only, suppress raw denial lines"
+ echo ""
+ echo "Examples:"
+ echo " sudo bash $0 --recent"
+ echo " sudo bash $0 --all --output fixes.txt"
+ echo " sudo bash $0 --quiet --output /tmp/fixes.txt"
+ exit 0
+}
+
+check_root() {
+ if [[ $EUID -ne 0 ]]; then
+ echo -e "${RED}Error: This script must be run as root.${NC}"
+ echo "Please run with: sudo bash $0"
+ exit 1
+ fi
+}
+
+detect_mac_system() {
+ SELINUX_ACTIVE=0
+ APPARMOR_ACTIVE=0
+
+ # Check SELinux
+ if command -v getenforce &>/dev/null; then
+ SELINUX_STATUS=$(getenforce 2>/dev/null)
+ if [[ "$SELINUX_STATUS" == "Enforcing" || "$SELINUX_STATUS" == "Permissive" ]]; then
+ SELINUX_ACTIVE=1
+ fi
+ fi
+
+ # Check AppArmor
+ if command -v aa-status &>/dev/null; then
+ if aa-status &>/dev/null; then
+ APPARMOR_ACTIVE=1
+ fi
+ elif [[ -d /sys/module/apparmor ]]; then
+ APPARMOR_ACTIVE=1
+ fi
+
+ if [[ $SELINUX_ACTIVE -eq 0 && $APPARMOR_ACTIVE -eq 0 ]]; then
+ echo -e "${YELLOW}Warning: Neither SELinux nor AppArmor appears to be active on this system.${NC}"
+ exit 1
+ fi
+}
+
+output_line() {
+ local line="$1"
+ echo -e "$line"
+ if [[ -n "$OUTPUT_FILE" ]]; then
+ echo -e "$line" | sed 's/\x1b\[[0-9;]*m//g' >> "$OUTPUT_FILE"
+ fi
+}
+
+# ── SELinux Analysis ──────────────────────────────────────
+
+parse_selinux_denial() {
+ local line="$1"
+
+ local scontext tcontext tclass perm comm name path
+
+ scontext=$(echo "$line" | grep -oP 'scontext=\K[^ ]+')
+ tcontext=$(echo "$line" | grep -oP 'tcontext=\K[^ ]+')
+ tclass=$(echo "$line" | grep -oP 'tclass=\K[^ ]+')
+ perm=$(echo "$line" | grep -oP '\{ \K[^}]+')
+ comm=$(echo "$line" | grep -oP 'comm="\K[^"]+')
+ name=$(echo "$line" | grep -oP 'name="\K[^"]+')
+ path=$(echo "$line" | grep -oP 'path="\K[^"]+')
+
+ if [[ $QUIET -eq 0 ]]; then
+ output_line "${RED}DENIAL:${NC} $line"
+ fi
+
+ output_line "${CYAN} Source context:${NC} $scontext"
+ output_line "${CYAN} Target context:${NC} $tcontext"
+ output_line "${CYAN} Class:${NC} $tclass"
+ output_line "${CYAN} Permission:${NC} $perm"
+ [[ -n "$comm" ]] && output_line "${CYAN} Command:${NC} $comm"
+ [[ -n "$path" ]] && output_line "${CYAN} Path:${NC} $path"
+
+ suggest_selinux_fix "$scontext" "$tcontext" "$tclass" "$perm" "$path" "$name"
+ output_line ""
+}
+
+suggest_selinux_fix() {
+ local scontext="$1" tcontext="$2" tclass="$3" perm="$4" path="$5" name="$6"
+
+ ((SUGGESTED_FIXES++))
+
+ # Port binding denials
+ if [[ "$tclass" == "tcp_socket" || "$tclass" == "udp_socket" ]]; then
+ local stype
+ stype=$(echo "$scontext" | cut -d: -f3)
+ output_line "${GREEN} Suggested fix (port rule):${NC}"
+ output_line "${GREEN} semanage port -a -t ${stype} -p tcp ${NC}"
+ output_line "${YELLOW} (Replace with the actual port)${NC}"
+ return
+ fi
+
+ # File access denials
+ if [[ -n "$path" && ("$tclass" == "file" || "$tclass" == "dir" || "$tclass" == "lnk_file") ]]; then
+ local ttype
+ ttype=$(echo "$tcontext" | cut -d: -f3)
+ output_line "${GREEN} Suggested fix (file context):${NC}"
+ output_line "${GREEN} semanage fcontext -a -t ${ttype} \"${path}\"${NC}"
+ output_line "${GREEN} restorecon -Rv \"${path}\"${NC}"
+
+ # Also check for boolean solutions
+ suggest_selinux_boolean "$scontext" "$tcontext" "$tclass" "$perm"
+ return
+ fi
+
+ # General boolean suggestion
+ suggest_selinux_boolean "$scontext" "$tcontext" "$tclass" "$perm"
+}
+
+suggest_selinux_boolean() {
+ local scontext="$1" tcontext="$2" tclass="$3" perm="$4"
+ local stype
+ stype=$(echo "$scontext" | cut -d: -f3)
+
+ # Try to find relevant booleans
+ if command -v getsebool &>/dev/null; then
+ local booleans
+ booleans=$(getsebool -a 2>/dev/null | grep -i "${stype%%_t}" | head -5)
+ if [[ -n "$booleans" ]]; then
+ output_line "${GREEN} Possibly relevant booleans:${NC}"
+ while IFS= read -r bool_line; do
+ local bool_name
+ bool_name=$(echo "$bool_line" | cut -d' ' -f1)
+ output_line "${GREEN} setsebool -P ${bool_name} on${NC}"
+ done <<< "$booleans"
+ fi
+ fi
+
+ output_line "${YELLOW} If no boolean applies, consider generating a custom policy module (see below).${NC}"
+}
+
+categorize_selinux_denial() {
+ local line="$1"
+ local tclass
+ tclass=$(echo "$line" | grep -oP 'tclass=\K[^ ]+')
+
+ case "$tclass" in
+ file|dir|lnk_file|fifo_file|sock_file)
+ echo "file_access"
+ ;;
+ tcp_socket|udp_socket|rawip_socket|netlink_socket)
+ echo "network"
+ ;;
+ *_port_t)
+ echo "port_binding"
+ ;;
+ process|process2)
+ echo "process"
+ ;;
+ *)
+ echo "other"
+ ;;
+ esac
+}
+
+analyze_selinux() {
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line "${BOLD} SELinux Audit Log Analysis${NC}"
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line ""
+
+ local selinux_status
+ selinux_status=$(getenforce 2>/dev/null)
+ output_line "${CYAN}SELinux status:${NC} $selinux_status"
+ output_line ""
+
+ # Gather denials
+ local denials=""
+
+ if [[ "$MODE" == "recent" ]]; then
+ if command -v ausearch &>/dev/null; then
+ denials=$(ausearch -m avc -ts recent 2>/dev/null | grep "type=AVC")
+ fi
+ # Fallback to log file
+ if [[ -z "$denials" && -f /var/log/audit/audit.log ]]; then
+ local one_hour_ago
+ one_hour_ago=$(date -d '1 hour ago' '+%s' 2>/dev/null)
+ if [[ -n "$one_hour_ago" ]]; then
+ denials=$(awk -v cutoff="$one_hour_ago" '
+ /type=AVC/ {
+ match($0, /msg=audit\(([0-9]+)\./, arr)
+ if (arr[1] >= cutoff) print
+ }
+ ' /var/log/audit/audit.log)
+ fi
+ fi
+ else
+ if [[ -f /var/log/audit/audit.log ]]; then
+ denials=$(grep "type=AVC" /var/log/audit/audit.log)
+ fi
+ fi
+
+ if [[ -z "$denials" ]]; then
+ output_line "${GREEN}No AVC denials found.${NC}"
+ output_line ""
+ return
+ fi
+
+ # Group denials by category
+ declare -A categories
+ local denial_count=0
+
+ while IFS= read -r line; do
+ [[ -z "$line" ]] && continue
+ ((denial_count++))
+ local category
+ category=$(categorize_selinux_denial "$line")
+ categories["$category"]+="$line"$'\n'
+ done <<< "$denials"
+
+ TOTAL_DENIALS=$denial_count
+
+ # Count unique types
+ local unique
+ unique=$(echo "$denials" | grep -oP 'tclass=\K[^ ]+' | sort -u | wc -l)
+ UNIQUE_TYPES=$unique
+
+ # Display grouped results
+ for category in "file_access" "network" "port_binding" "process" "other"; do
+ if [[ -n "${categories[$category]}" ]]; then
+ local label
+ case "$category" in
+ file_access) label="File Access Denials" ;;
+ network) label="Network Denials" ;;
+ port_binding) label="Port Binding Denials" ;;
+ process) label="Process Denials" ;;
+ other) label="Other Denials" ;;
+ esac
+
+ output_line "${BOLD}── ${label} ──────────────────────────────────${NC}"
+ output_line ""
+
+ while IFS= read -r denial_line; do
+ [[ -z "$denial_line" ]] && continue
+ parse_selinux_denial "$denial_line"
+ done <<< "${categories[$category]}"
+ fi
+ done
+
+ # Generate policy module suggestion with audit2allow
+ if command -v audit2allow &>/dev/null; then
+ output_line "${BOLD}── Policy Module Suggestion ──────────────────────${NC}"
+ output_line ""
+ local policy
+ if [[ "$MODE" == "recent" ]]; then
+ policy=$(ausearch -m avc -ts recent 2>/dev/null | audit2allow 2>/dev/null)
+ else
+ policy=$(audit2allow < /var/log/audit/audit.log 2>/dev/null)
+ fi
+
+ if [[ -n "$policy" ]]; then
+ output_line "${GREEN}audit2allow suggests the following policy:${NC}"
+ output_line "$policy"
+ output_line ""
+ output_line "${YELLOW}To create and install a custom module:${NC}"
+ output_line "${GREEN} ausearch -m avc -ts recent | audit2allow -M my_custom_policy${NC}"
+ output_line "${GREEN} semodule -i my_custom_policy.pp${NC}"
+ else
+ output_line "${CYAN}No policy suggestions generated by audit2allow.${NC}"
+ fi
+ output_line ""
+ else
+ output_line "${YELLOW}Note: Install audit2allow (policycoreutils-python-utils) for automatic policy generation.${NC}"
+ output_line ""
+ fi
+}
+
+# ── AppArmor Analysis ────────────────────────────────────
+
+find_apparmor_log_source() {
+ if [[ -f /var/log/syslog ]]; then
+ echo "syslog"
+ elif [[ -f /var/log/kern.log ]]; then
+ echo "kern.log"
+ elif command -v journalctl &>/dev/null; then
+ echo "journalctl"
+ else
+ echo "none"
+ fi
+}
+
+parse_apparmor_denial() {
+ local line="$1"
+
+ local profile operation denied_mask path info
+
+ profile=$(echo "$line" | grep -oP 'profile="\K[^"]+')
+ [[ -z "$profile" ]] && profile=$(echo "$line" | grep -oP 'apparmor="\K[^"]+')
+ operation=$(echo "$line" | grep -oP 'operation="\K[^"]+')
+ denied_mask=$(echo "$line" | grep -oP 'requested_mask="\K[^"]+')
+ [[ -z "$denied_mask" ]] && denied_mask=$(echo "$line" | grep -oP 'denied_mask="\K[^"]+')
+ path=$(echo "$line" | grep -oP 'name="\K[^"]+')
+ info=$(echo "$line" | grep -oP 'info="\K[^"]+')
+
+ if [[ $QUIET -eq 0 ]]; then
+ output_line "${RED}DENIAL:${NC} $line"
+ fi
+
+ [[ -n "$profile" ]] && output_line "${CYAN} Profile:${NC} $profile"
+ [[ -n "$operation" ]] && output_line "${CYAN} Operation:${NC} $operation"
+ [[ -n "$path" ]] && output_line "${CYAN} Path:${NC} $path"
+ [[ -n "$denied_mask" ]] && output_line "${CYAN} Denied mask:${NC} $denied_mask"
+ [[ -n "$info" ]] && output_line "${CYAN} Info:${NC} $info"
+
+ suggest_apparmor_fix "$profile" "$operation" "$path" "$denied_mask"
+ output_line ""
+}
+
+suggest_apparmor_fix() {
+ local profile="$1" operation="$2" path="$3" denied_mask="$4"
+
+ ((SUGGESTED_FIXES++))
+
+ # Build the permission string from the denied mask
+ local perm_str=""
+ case "$denied_mask" in
+ r) perm_str="r" ;;
+ w) perm_str="w" ;;
+ rw) perm_str="rw" ;;
+ x) perm_str="ix" ;;
+ rx) perm_str="rix" ;;
+ rwx) perm_str="rwix" ;;
+ k) perm_str="k" ;;
+ l) perm_str="l" ;;
+ m) perm_str="m" ;;
+ *) perm_str="$denied_mask" ;;
+ esac
+
+ if [[ -n "$path" && -n "$perm_str" ]]; then
+ output_line "${GREEN} Suggested rule to add to profile:${NC}"
+ output_line "${GREEN} ${path} ${perm_str},${NC}"
+ fi
+
+ # Show the profile file path
+ if [[ -n "$profile" ]]; then
+ local profile_file="/etc/apparmor.d/${profile//\//.}"
+ # Try to find the actual profile file
+ if [[ -f "/etc/apparmor.d/$profile" ]]; then
+ profile_file="/etc/apparmor.d/$profile"
+ elif [[ -f "/etc/apparmor.d/${profile//\//.}" ]]; then
+ profile_file="/etc/apparmor.d/${profile//\//.}"
+ else
+ # Search for it
+ local found
+ found=$(grep -rl "profile $profile" /etc/apparmor.d/ 2>/dev/null | head -1)
+ [[ -n "$found" ]] && profile_file="$found"
+ fi
+ output_line "${CYAN} Profile file:${NC} $profile_file"
+ fi
+
+ output_line "${YELLOW} Or run interactively:${NC}"
+ output_line "${GREEN} aa-logprof${NC}"
+}
+
+analyze_apparmor() {
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line "${BOLD} AppArmor Audit Log Analysis${NC}"
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line ""
+
+ # Show AppArmor status
+ if command -v aa-status &>/dev/null; then
+ local enforced loaded
+ enforced=$(aa-status 2>/dev/null | grep -c "enforce")
+ loaded=$(aa-status 2>/dev/null | grep -c "loaded")
+ output_line "${CYAN}AppArmor profiles loaded:${NC} $loaded"
+ output_line "${CYAN}Profiles in enforce mode:${NC} $enforced"
+ output_line ""
+ fi
+
+ # Find log source
+ local log_source
+ log_source=$(find_apparmor_log_source)
+
+ if [[ "$log_source" == "none" ]]; then
+ output_line "${RED}Error: Cannot find AppArmor log source.${NC}"
+ output_line "${YELLOW}Checked: /var/log/syslog, /var/log/kern.log, journalctl${NC}"
+ return
+ fi
+
+ # Gather denials
+ local denials=""
+
+ if [[ "$log_source" == "journalctl" ]]; then
+ if [[ "$MODE" == "recent" ]]; then
+ denials=$(journalctl --since "1 hour ago" --no-pager 2>/dev/null | grep -i "apparmor.*DENIED")
+ else
+ denials=$(journalctl --no-pager 2>/dev/null | grep -i "apparmor.*DENIED")
+ fi
+ else
+ local log_file
+ [[ "$log_source" == "syslog" ]] && log_file="/var/log/syslog"
+ [[ "$log_source" == "kern.log" ]] && log_file="/var/log/kern.log"
+
+ if [[ "$MODE" == "recent" ]]; then
+ local one_hour_ago
+ one_hour_ago=$(date -d '1 hour ago' '+%b %e %H:%M' 2>/dev/null)
+ if [[ -n "$one_hour_ago" ]]; then
+ denials=$(awk -v cutoff="$(date -d '1 hour ago' '+%s' 2>/dev/null)" '
+ /apparmor.*DENIED/ || /apparmor.*denied/ {
+ print
+ }
+ ' "$log_file" | tail -100)
+ else
+ # Fallback: last 100 denial lines
+ denials=$(grep -i "apparmor.*DENIED" "$log_file" | tail -100)
+ fi
+ else
+ denials=$(grep -i "apparmor.*DENIED" "$log_file")
+ fi
+ fi
+
+ if [[ -z "$denials" ]]; then
+ output_line "${GREEN}No AppArmor denials found.${NC}"
+ output_line ""
+ return
+ fi
+
+ local denial_count=0
+ local -A seen_profiles
+
+ output_line "${BOLD}── AppArmor Denials ─────────────────────────────${NC}"
+ output_line ""
+
+ while IFS= read -r line; do
+ [[ -z "$line" ]] && continue
+ ((denial_count++))
+ parse_apparmor_denial "$line"
+
+ local p
+ p=$(echo "$line" | grep -oP 'profile="\K[^"]+')
+ [[ -n "$p" ]] && seen_profiles["$p"]=1
+ done <<< "$denials"
+
+ TOTAL_DENIALS=$denial_count
+ UNIQUE_TYPES=${#seen_profiles[@]}
+
+ # Suggest aa-logprof for interactive fixing
+ output_line "${BOLD}── Interactive Fix Suggestion ────────────────────${NC}"
+ output_line ""
+ output_line "${YELLOW}For interactive profile updates, run:${NC}"
+ output_line "${GREEN} aa-logprof${NC}"
+ output_line ""
+ output_line "${YELLOW}To set a profile to complain mode for testing:${NC}"
+ for prof in "${!seen_profiles[@]}"; do
+ output_line "${GREEN} aa-complain $prof${NC}"
+ done
+ output_line ""
+}
+
+# ── Summary ───────────────────────────────────────────────
+
+print_summary() {
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line "${BOLD} Summary${NC}"
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line ""
+ output_line " Total denials found: ${BOLD}${TOTAL_DENIALS}${NC}"
+ output_line " Unique denial types: ${BOLD}${UNIQUE_TYPES}${NC}"
+ output_line " Suggested fixes: ${BOLD}${SUGGESTED_FIXES}${NC}"
+ output_line ""
+
+ if [[ -n "$OUTPUT_FILE" ]]; then
+ output_line "${GREEN}Suggestions saved to: ${OUTPUT_FILE}${NC}"
+ output_line ""
+ fi
+}
+
+# ── Parse Arguments ───────────────────────────────────────
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --help|-h)
+ usage
+ ;;
+ --recent)
+ MODE="recent"
+ shift
+ ;;
+ --all)
+ MODE="all"
+ shift
+ ;;
+ --output)
+ if [[ -z "$2" || "$2" == --* ]]; then
+ echo -e "${RED}Error: --output requires a filename argument.${NC}"
+ exit 1
+ fi
+ OUTPUT_FILE="$2"
+ shift 2
+ ;;
+ --quiet|-q)
+ QUIET=1
+ shift
+ ;;
+ *)
+ echo -e "${RED}Unknown option: $1${NC}"
+ echo "Use --help for usage information."
+ exit 1
+ ;;
+ esac
+done
+
+# ── Main ──────────────────────────────────────────────────
+
+check_root
+
+# Clear output file if specified
+if [[ -n "$OUTPUT_FILE" ]]; then
+ true > "$OUTPUT_FILE"
+fi
+
+echo -e "${BOLD}Audit Log Analyzer v1.00${NC}"
+echo -e "${CYAN}Mode: ${MODE}${NC}"
+echo ""
+
+detect_mac_system
+
+if [[ $SELINUX_ACTIVE -eq 1 ]]; then
+ analyze_selinux
+fi
+
+if [[ $APPARMOR_ACTIVE -eq 1 ]]; then
+ analyze_apparmor
+fi
+
+print_summary
diff --git a/aws-ami-finder.sh b/aws-ami-finder.sh
new file mode 100755
index 0000000..6ac6e3e
--- /dev/null
+++ b/aws-ami-finder.sh
@@ -0,0 +1,395 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### aws-ami-finder.sh — Find the latest AWS AMI for a given OS type ####
+#### Queries ec2 describe-images with pre-defined OS profiles. ####
+#### Requires: bash, aws CLI ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./aws-ami-finder.sh --os amazon2023 ####
+#### ./aws-ami-finder.sh --os ubuntu2204 ####
+#### ./aws-ami-finder.sh --list ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors ────────────────────────────────────────────────────────────
+if [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+else
+ RED="" GREEN="" YELLOW="" BOLD="" RESET=""
+fi
+
+log() { echo -e "${GREEN}[OK]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*"; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+
+# ── Defaults ──────────────────────────────────────────────────────────
+OS_TYPE=""
+MAX_RESULTS=10
+REGION=""
+SHOW_INSTANCES=false
+INSTANCE_STATE="running"
+RUNNING_FILTER=""
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat </dev/null; then
+ err "AWS CLI not found. Install it first."
+ exit 1
+fi
+
+# ── Running instances mode (SSM-based) ────────────────────────────────
+if [[ "$SHOW_INSTANCES" == "true" ]]; then
+ REGION_ARGS=()
+ [[ -n "$REGION" ]] && REGION_ARGS=(--region "$REGION")
+
+ active_region="${REGION:-$(aws configure get region 2>/dev/null || echo 'not set')}"
+
+ filter_label="all"
+ grep_pattern=""
+
+ case "$RUNNING_FILTER" in
+ amazon2) grep_pattern=$'(Amazon Linux\t2$|Linux/UNIX)'; filter_label="Amazon Linux 2 + Linux/UNIX" ;;
+ amazon2023) grep_pattern=$'Amazon Linux\t2023'; filter_label="Amazon Linux 2023" ;;
+ rhel) grep_pattern="Red Hat Enterprise Linux"; filter_label="RHEL" ;;
+ windows) grep_pattern="Windows"; filter_label="Windows" ;;
+ "") ;;
+ *) err "Unknown filter: $RUNNING_FILTER (use: amazon2, amazon2023, rhel, windows)"; exit 1 ;;
+ esac
+
+ state_label="${INSTANCE_STATE//,/ + }"
+ log "Querying ${filter_label} instances (${state_label}) in ${active_region}..."
+
+ # Get instances (InstanceId, Name, Owner tag, State, PlatformDetails)
+ ec2_data=$(aws ec2 describe-instances \
+ ${REGION_ARGS[@]+"${REGION_ARGS[@]}"} \
+ --filters "Name=instance-state-name,Values=${INSTANCE_STATE}" \
+ --query 'Reservations[].Instances[].[InstanceId, Tags[?Key==`Name`].Value | [0], Tags[?Key==`Owner`].Value | [0], State.Name, PlatformDetails]' \
+ --output text 2>&1) || {
+ err "EC2 query failed:"
+ echo "$ec2_data" >&2
+ exit 1
+ }
+
+ if [[ -z "$ec2_data" ]]; then
+ warn "No running instances found"
+ exit 0
+ fi
+
+ # Build lookup: InstanceId → Name, Owner, State, PlatformDetails
+ declare -A NAMES
+ declare -A OWNERS
+ declare -A STATES
+ declare -A PLATFORMS
+ while IFS=$'\t' read -r iid name owner state plat; do
+ NAMES["$iid"]="$name"
+ OWNERS["$iid"]="${owner:-}"
+ STATES["$iid"]="$state"
+ PLATFORMS["$iid"]="$plat"
+ done <<< "$ec2_data"
+
+ # Get SSM data for OS identification (best-effort)
+ declare -A SSM_PLATFORM
+ declare -A SSM_VERSION
+ ssm_data=$(aws ssm describe-instance-information \
+ ${REGION_ARGS[@]+"${REGION_ARGS[@]}"} \
+ --query 'InstanceInformationList[].[InstanceId, PlatformName, PlatformVersion]' \
+ --output text 2>/dev/null) || ssm_data=""
+
+ if [[ -n "$ssm_data" ]]; then
+ while IFS=$'\t' read -r iid platform version; do
+ SSM_PLATFORM["$iid"]="$platform"
+ SSM_VERSION["$iid"]="$version"
+ done <<< "$ssm_data"
+ fi
+
+ # Build display lines, merging EC2 + SSM data
+ display_lines=()
+ for iid in "${!NAMES[@]}"; do
+ name="${NAMES[$iid]}"
+ owner="${OWNERS[$iid]:--}"
+ [[ "$owner" == "None" || -z "$owner" ]] && owner="-"
+ # Strip session suffix (e.g. "jallen-session-12345" → "jallen")
+ owner="${owner%%-session-*}"
+ if [[ -n "${SSM_PLATFORM[$iid]:-}" ]]; then
+ platform="${SSM_PLATFORM[$iid]}"
+ version="${SSM_VERSION[$iid]}"
+ else
+ platform="${PLATFORMS[$iid]}"
+ version="(no SSM)"
+ fi
+ state="${STATES[$iid]}"
+ display_lines+=("${name}"$'\t'"${iid}"$'\t'"${owner}"$'\t'"${state}"$'\t'"${platform}"$'\t'"${version}")
+ done
+
+ # Apply filter if set
+ if [[ -n "$grep_pattern" ]]; then
+ filtered=()
+ for line in "${display_lines[@]}"; do
+ if echo "$line" | grep -qP "$grep_pattern"; then
+ filtered+=("$line")
+ fi
+ done
+ display_lines=("${filtered[@]+"${filtered[@]}"}")
+ if [[ ${#display_lines[@]} -eq 0 ]]; then
+ warn "No ${filter_label} instances found"
+ exit 0
+ fi
+ fi
+
+ # Sort by name
+ sorted=$(printf '%s\n' "${display_lines[@]}" | sort -t$'\t' -k1)
+
+ # Calculate dynamic column widths from data
+ col_name=4; col_iid=11; col_owner=5; col_state=5; col_plat=8; col_ver=7
+ while IFS=$'\t' read -r name iid owner state platform version; do
+ (( ${#name} > col_name )) && col_name=${#name}
+ (( ${#iid} > col_iid )) && col_iid=${#iid}
+ (( ${#owner} > col_owner )) && col_owner=${#owner}
+ (( ${#state} > col_state )) && col_state=${#state}
+ (( ${#platform}> col_plat )) && col_plat=${#platform}
+ (( ${#version} > col_ver )) && col_ver=${#version}
+ done <<< "$sorted"
+
+ # Add padding
+ col_name=$((col_name + 2))
+ col_iid=$((col_iid + 2))
+ col_owner=$((col_owner + 2))
+ col_state=$((col_state + 2))
+ col_plat=$((col_plat + 2))
+
+ fmt=" %-${col_name}s %-${col_iid}s %-${col_owner}s %-${col_state}s %-${col_plat}s %s\n"
+
+ # Build separator lines matching column widths
+ sep_name=$(printf '%*s' "$col_name" '' | tr ' ' '-')
+ sep_iid=$(printf '%*s' "$col_iid" '' | tr ' ' '-')
+ sep_owner=$(printf '%*s' "$col_owner" '' | tr ' ' '-')
+ sep_state=$(printf '%*s' "$col_state" '' | tr ' ' '-')
+ sep_plat=$(printf '%*s' "$col_plat" '' | tr ' ' '-')
+ sep_ver=$(printf '%*s' "$col_ver" '' | tr ' ' '-')
+
+ # Display
+ echo ""
+ printf " ${BOLD}%-${col_name}s %-${col_iid}s %-${col_owner}s %-${col_state}s %-${col_plat}s %s${RESET}\n" "Name" "Instance ID" "Owner" "State" "Platform" "Version"
+ printf "$fmt" "$sep_name" "$sep_iid" "$sep_owner" "$sep_state" "$sep_plat" "$sep_ver"
+ while IFS=$'\t' read -r name iid owner state platform version; do
+ # Colorize state and pad manually (escape codes break printf %-Ns)
+ state_pad=$(( col_state - ${#state} ))
+ pad_str=$(printf '%*s' "$state_pad" '')
+ case "$state" in
+ running) printf -v state_str '%b' "${GREEN}${state}${RESET}${pad_str}" ;;
+ stopped) printf -v state_str '%b' "${RED}${state}${RESET}${pad_str}" ;;
+ *) printf -v state_str "%-${col_state}s" "$state" ;;
+ esac
+ printf " %-${col_name}s %-${col_iid}s %-${col_owner}s %s%-${col_plat}s %s\n" \
+ "$name" "$iid" "$owner" "$state_str" "$platform" "$version"
+ done <<< "$sorted"
+ echo ""
+
+ ssm_count=0
+ for iid in "${!NAMES[@]}"; do
+ [[ -n "${SSM_PLATFORM[$iid]:-}" ]] && ((ssm_count++)) || true
+ done
+ log "${#display_lines[@]} instance(s) shown (${ssm_count} identified via SSM)"
+ exit 0
+fi
+
+if [[ -z "$OS_TYPE" ]]; then
+ err "No OS type specified. Use --os TYPE or --running"
+ exit 1
+fi
+
+# ── Query ─────────────────────────────────────────────────────────────
+REGION_ARGS=()
+[[ -n "$REGION" ]] && REGION_ARGS=(--region "$REGION")
+
+active_region="${REGION:-$(aws configure get region 2>/dev/null || echo 'not set')}"
+govcloud=false
+is_govcloud "$active_region" && govcloud=true
+
+set_os_profile "$OS_TYPE" "$govcloud"
+
+$govcloud && log "GovCloud detected — using GovCloud owner IDs"
+log "Querying AMIs for ${OS_TYPE} in ${active_region} (owner: ${OWNER})..."
+echo ""
+
+output=$(aws ec2 describe-images \
+ ${REGION_ARGS[@]+"${REGION_ARGS[@]}"} \
+ --owners "$OWNER" \
+ --filters "Name=name,Values=${NAME_FILTER}" "Name=state,Values=available" \
+ --query "reverse(sort_by(Images, &CreationDate))[:${MAX_RESULTS}].[ImageId, Name, Description, CreationDate]" \
+ --output table 2>&1 | sed "s/DescribeImages/Available AMIs/") || {
+ err "AWS CLI failed:"
+ echo "$output" >&2
+ exit 1
+}
+
+if [[ -z "$output" ]]; then
+ warn "No AMIs found for ${OS_TYPE}"
+ warn "Check your AWS region (current: $(aws configure get region 2>/dev/null || echo 'not set'))"
+ exit 1
+fi
+
+echo "$output"
diff --git a/aws-cost-reporter.sh b/aws-cost-reporter.sh
new file mode 100755
index 0000000..5edc818
--- /dev/null
+++ b/aws-cost-reporter.sh
@@ -0,0 +1,601 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### aws-cost-reporter.sh — Daily AWS cost breakdown by service, account, or tag ####
+#### Supports email (SES), Slack webhooks, CSV/JSON export, period comparison ####
+#### Requires: bash 4+, aws-cli v2, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### export AWS_PROFILE="billing" ####
+#### ./aws-cost-reporter.sh --daily ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+AWS_REGION="${AWS_REGION:-us-east-1}"
+GROUP_BY="${GROUP_BY:-SERVICE}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+SES_FROM_ADDRESS="${SES_FROM_ADDRESS:-}"
+SLACK_WEBHOOK_URL="${SLACK_WEBHOOK_URL:-}"
+COST_TAG_KEY="${COST_TAG_KEY:-}"
+COST_TAG_VALUE="${COST_TAG_VALUE:-}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+RUN_MODE=""
+CUSTOM_START=""
+CUSTOM_END=""
+EMAIL_TO=""
+SLACK_URL=""
+START_TIME=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "auto" && ! -t 1 ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ RED="\033[0;31m"
+ GREEN="\033[0;32m"
+ YELLOW="\033[0;33m"
+ # shellcheck disable=SC2034 # BLUE reserved for future use / caller scripts
+ BLUE="\033[0;34m"
+ # shellcheck disable=SC2034 # BOLD reserved for future use / caller scripts
+ BOLD="\033[1m"
+ DIM="\033[2m"
+ RESET="\033[0m"
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log_info() { printf "${GREEN}[INFO]${RESET} %s\n" "$*"; }
+log_warn() { printf "${YELLOW}[WARN]${RESET} %s\n" "$*" >&2; }
+log_error() { printf "${RED}[ERROR]${RESET} %s\n" "$*" >&2; }
+log_debug() { [[ "$VERBOSE" == "true" ]] && printf "${DIM}[DEBUG] %s${RESET}\n" "$*"; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+die() { log_error "$@"; exit 1; }
+
+check_deps() {
+ local missing=()
+ command -v aws >/dev/null 2>&1 || missing+=("aws-cli")
+ command -v jq >/dev/null 2>&1 || missing+=("jq")
+ command -v curl >/dev/null 2>&1 || missing+=("curl")
+ if (( ${#missing[@]} > 0 )); then
+ die "Missing required tools: ${missing[*]}"
+ fi
+
+ local bash_major="${BASH_VERSINFO[0]}"
+ if (( bash_major < 4 )); then
+ die "Requires bash 4+, found ${BASH_VERSION}"
+ fi
+}
+
+validate_date() {
+ local d="$1"
+ if [[ ! "$d" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
+ die "Invalid date format: $d (expected YYYY-MM-DD)"
+ fi
+}
+
+# ── Date math (portable) ─────────────────────────────────────────────
+date_offset() {
+ # Usage: date_offset YYYY-MM-DD -N → date N days before
+ local base="$1" offset="$2"
+ if date --version >/dev/null 2>&1; then
+ # GNU date
+ date -d "${base} ${offset} days" +%Y-%m-%d
+ else
+ # macOS date
+ date -j -v"${offset}d" -f "%Y-%m-%d" "$base" +%Y-%m-%d
+ fi
+}
+
+today_utc() { date -u +%Y-%m-%d; }
+
+first_of_month() {
+ local d="$1"
+ echo "${d:0:8}01"
+}
+
+first_of_prev_month() {
+ local d="$1"
+ local year="${d:0:4}"
+ local month="${d:5:2}"
+ month=$((10#$month - 1))
+ if (( month == 0 )); then
+ month=12
+ year=$((year - 1))
+ fi
+ printf "%04d-%02d-01" "$year" "$month"
+}
+
+days_between() {
+ local s="$1" e="$2"
+ local ss se
+ if date --version >/dev/null 2>&1; then
+ ss=$(date -d "$s" +%s)
+ se=$(date -d "$e" +%s)
+ else
+ ss=$(date -j -f "%Y-%m-%d" "$s" +%s)
+ se=$(date -j -f "%Y-%m-%d" "$e" +%s)
+ fi
+ echo $(( (se - ss) / 86400 ))
+}
+
+# ── Compute date ranges ──────────────────────────────────────────────
+compute_ranges() {
+ local today
+ today="$(today_utc)"
+
+ case "$RUN_MODE" in
+ daily)
+ PERIOD_START="$(date_offset "$today" -1)"
+ PERIOD_END="$today"
+ PREV_START="$(date_offset "$today" -2)"
+ PREV_END="$(date_offset "$today" -1)"
+ ;;
+ weekly)
+ PERIOD_START="$(date_offset "$today" -7)"
+ PERIOD_END="$today"
+ PREV_START="$(date_offset "$today" -14)"
+ PREV_END="$(date_offset "$today" -7)"
+ ;;
+ monthly)
+ PERIOD_START="$(first_of_month "$today")"
+ PERIOD_END="$today"
+ local prev_first
+ prev_first="$(first_of_prev_month "$today")"
+ PREV_START="$prev_first"
+ PREV_END="$PERIOD_START"
+ ;;
+ custom)
+ PERIOD_START="$CUSTOM_START"
+ PERIOD_END="$CUSTOM_END"
+ local span
+ span="$(days_between "$CUSTOM_START" "$CUSTOM_END")"
+ PREV_START="$(date_offset "$CUSTOM_START" "-$span")"
+ PREV_END="$CUSTOM_START"
+ ;;
+ *)
+ die "Unknown mode: $RUN_MODE"
+ ;;
+ esac
+
+ log_debug "Current period: $PERIOD_START → $PERIOD_END"
+ log_debug "Previous period: $PREV_START → $PREV_END"
+}
+
+# ── Build Cost Explorer request ───────────────────────────────────────
+build_ce_filter() {
+ local filter=""
+ if [[ -n "$COST_TAG_KEY" && -n "$COST_TAG_VALUE" ]]; then
+ filter=$(cat </dev/null
+}
+
+# ── Parse cost data ──────────────────────────────────────────────────
+parse_costs() {
+ local raw="$1"
+ echo "$raw" | jq -r '
+ [.ResultsByTime[].Groups[] |
+ {
+ key: .Keys[0],
+ amount: (.Metrics.BlendedCost.Amount | tonumber)
+ }
+ ] |
+ group_by(.key) |
+ map({
+ key: .[0].key,
+ total: (map(.amount) | add)
+ }) |
+ sort_by(-.total) |
+ .[] |
+ "\(.key)\t\(.total)"
+ ' 2>/dev/null || echo ""
+}
+
+# ── Format helpers ────────────────────────────────────────────────────
+fmt_currency() {
+ printf "$%.2f" "$1"
+}
+
+fmt_delta() {
+ local curr="$1" prev="$2"
+ if (( $(echo "$prev == 0" | bc -l) )); then
+ echo "N/A"
+ return
+ fi
+ local pct
+ pct=$(echo "scale=1; (($curr - $prev) / $prev) * 100" | bc -l)
+ local sign=""
+ if (( $(echo "$pct > 0" | bc -l) )); then
+ sign="+"
+ fi
+ echo "${sign}${pct}%"
+}
+
+print_header() {
+ local account_id
+ account_id=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "unknown")
+
+ echo "AWS Cost Reporter"
+ echo "Account: $account_id"
+ echo "Region: $AWS_REGION"
+ echo "Mode: $RUN_MODE"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+
+ if [[ "$RUN_MODE" == "custom" ]]; then
+ echo "Period: $PERIOD_START → $PERIOD_END"
+ fi
+ echo ""
+}
+
+# ── Text table output ────────────────────────────────────────────────
+output_text_table() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="SERVICE"
+ case "$GROUP_BY" in
+ LINKED_ACCOUNT) label="ACCOUNT" ;;
+ TAG) label="TAG" ;;
+ esac
+ local divider="──────────────────────────────────────────────────────────────────────"
+ printf " %-38s %-12s %-12s %s\n" "$label" "COST" "PREV" "DELTA"
+ printf " %s\n" "$divider"
+ local total_curr=0 total_prev=0
+ for key in "${!curr_data[@]}"; do
+ local cost="${curr_data[$key]}" prev_cost="${prev_data[$key]:-0}"
+ printf " %-38s %-12s %-12s %s\n" \
+ "$key" "$(fmt_currency "$cost")" "$(fmt_currency "$prev_cost")" "$(fmt_delta "$cost" "$prev_cost")"
+ total_curr=$(echo "$total_curr + $cost" | bc -l)
+ total_prev=$(echo "$total_prev + $prev_cost" | bc -l)
+ done
+ printf " %s\n" "$divider"
+ printf " %-38s %-12s %-12s %s\n" \
+ "TOTAL" "$(fmt_currency "$total_curr")" "$(fmt_currency "$total_prev")" "$(fmt_delta "$total_curr" "$total_prev")"
+}
+
+# ── CSV output ────────────────────────────────────────────────────────
+output_csv() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="service"
+ case "$GROUP_BY" in
+ LINKED_ACCOUNT) label="account" ;;
+ TAG) label="tag" ;;
+ esac
+ echo "${label},cost,previous_cost,delta_pct"
+ for key in "${!curr_data[@]}"; do
+ local cost="${curr_data[$key]}" prev_cost="${prev_data[$key]:-0}" pct="0"
+ if (( $(echo "$prev_cost != 0" | bc -l) )); then
+ pct=$(echo "scale=2; (($cost - $prev_cost) / $prev_cost) * 100" | bc -l)
+ fi
+ echo "\"$key\",$cost,$prev_cost,$pct"
+ done
+}
+
+# ── JSON output ───────────────────────────────────────────────────────
+output_json() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="service"
+ case "$GROUP_BY" in
+ LINKED_ACCOUNT) label="account" ;;
+ TAG) label="tag" ;;
+ esac
+ local items=()
+ for key in "${!curr_data[@]}"; do
+ items+=("{\"${label}\": \"${key}\", \"cost\": ${curr_data[$key]}, \"previous_cost\": ${prev_data[$key]:-0}}")
+ done
+ local joined
+ joined=$(printf ",%s" "${items[@]}")
+ joined="${joined:1}"
+ printf '{"mode":"%s","period_start":"%s","period_end":"%s","previous_start":"%s","previous_end":"%s","group_by":"%s","items":[%s]}\n' \
+ "$RUN_MODE" "$PERIOD_START" "$PERIOD_END" "$PREV_START" "$PREV_END" "$GROUP_BY" "$joined"
+}
+
+# ── Render report ─────────────────────────────────────────────────────
+render_report() {
+ local curr_raw="$1" prev_raw="$2"
+
+ # Parse into associative arrays
+ declare -A curr_costs
+ declare -A prev_costs
+
+ while IFS=$'\t' read -r key amount; do
+ [[ -z "$key" ]] && continue
+ curr_costs["$key"]="$amount"
+ done <<< "$(parse_costs "$curr_raw")"
+
+ while IFS=$'\t' read -r key amount; do
+ [[ -z "$key" ]] && continue
+ prev_costs["$key"]="$amount"
+ done <<< "$(parse_costs "$prev_raw")"
+
+ # Ensure previous-only keys appear in current with 0
+ for key in "${!prev_costs[@]}"; do
+ if [[ -z "${curr_costs[$key]+x}" ]]; then
+ curr_costs["$key"]="0"
+ fi
+ done
+
+ case "$OUTPUT_FORMAT" in
+ text)
+ print_header
+ local title="Cost Breakdown — ${PERIOD_START} → ${PERIOD_END}"
+ echo "$title"
+ output_text_table curr_costs prev_costs
+ echo ""
+ ;;
+ csv)
+ output_csv curr_costs prev_costs
+ ;;
+ json)
+ output_json curr_costs prev_costs
+ ;;
+ *)
+ die "Unknown format: $OUTPUT_FORMAT"
+ ;;
+ esac
+}
+
+# ── Email via SES ─────────────────────────────────────────────────────
+send_email() {
+ local report="$1" recipient="$2"
+
+ if [[ -z "$SES_FROM_ADDRESS" ]]; then
+ die "--email requires SES_FROM_ADDRESS to be set"
+ fi
+
+ local subject
+ subject="AWS Cost Report — ${RUN_MODE} — $(today_utc)"
+
+ log_info "Sending report to $recipient via SES..."
+
+ local message
+ message=$(jq -n \
+ --arg from "$SES_FROM_ADDRESS" \
+ --arg to "$recipient" \
+ --arg subject "$subject" \
+ --arg body "$report" \
+ '{
+ Source: $from,
+ Destination: { ToAddresses: [$to] },
+ Message: {
+ Subject: { Data: $subject, Charset: "UTF-8" },
+ Body: { Text: { Data: $body, Charset: "UTF-8" } }
+ }
+ }')
+
+ aws ses send-email \
+ --region "$AWS_REGION" \
+ --cli-input-json "$message" \
+ --output text >/dev/null
+
+ log_info "Email sent to $recipient"
+}
+
+# ── Slack webhook ─────────────────────────────────────────────────────
+send_slack() {
+ local report="$1" webhook="$2"
+
+ log_info "Posting report to Slack..."
+
+ # Truncate for Slack message limits
+ local max_len=3000
+ local body="$report"
+ if (( ${#body} > max_len )); then
+ body="${body:0:$max_len}
+
+... (truncated — full report exceeds Slack message limit)"
+ fi
+
+ local payload
+ payload=$(jq -n --arg text "\`\`\`${body}\`\`\`" '{ text: $text }')
+
+ local http_code
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" \
+ -X POST \
+ -H "Content-Type: application/json" \
+ -d "$payload" \
+ "$webhook")
+
+ if [[ "$http_code" != "200" ]]; then
+ log_error "Slack webhook returned HTTP $http_code"
+ return 1
+ fi
+
+ log_info "Slack message posted"
+}
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat < 0 )); do
+ case "$1" in
+ --daily|--weekly|--monthly)
+ RUN_MODE="${1#--}"; shift ;;
+ --custom)
+ RUN_MODE="custom"
+ [[ $# -lt 3 ]] && die "--custom requires START and END dates"
+ CUSTOM_START="$2"; CUSTOM_END="$3"
+ validate_date "$CUSTOM_START"; validate_date "$CUSTOM_END"
+ shift 3 ;;
+ --group-by)
+ [[ $# -lt 2 ]] && die "--group-by requires a value"
+ GROUP_BY="$2"; shift 2 ;;
+ --tag)
+ [[ $# -lt 2 ]] && die "--tag requires KEY=VALUE"
+ [[ "$2" != *"="* ]] && die "--tag value must be KEY=VALUE"
+ COST_TAG_KEY="${2%%=*}"; COST_TAG_VALUE="${2#*=}"; shift 2 ;;
+ --format)
+ [[ $# -lt 2 ]] && die "--format requires a value"
+ OUTPUT_FORMAT="$2"; shift 2 ;;
+ --email)
+ [[ $# -lt 2 ]] && die "--email requires an address"
+ EMAIL_TO="$2"; shift 2 ;;
+ --slack)
+ [[ $# -lt 2 ]] && die "--slack requires a webhook URL"
+ SLACK_URL="$2"; shift 2 ;;
+ --verbose) VERBOSE="true"; shift ;;
+ --no-color) COLOR="never"; shift ;;
+ --help|-h) usage ;;
+ *) die "Unknown option: $1 (see --help)" ;;
+ esac
+ done
+
+ if [[ -z "$RUN_MODE" ]]; then log_error "No mode specified"; echo ""; usage; exit 1; fi
+ [[ -z "$SLACK_URL" && -n "$SLACK_WEBHOOK_URL" ]] && SLACK_URL="$SLACK_WEBHOOK_URL"
+
+ case "$GROUP_BY" in
+ SERVICE|TAG|LINKED_ACCOUNT) ;;
+ *) die "Invalid --group-by: $GROUP_BY" ;;
+ esac
+ case "$OUTPUT_FORMAT" in
+ text|csv|json) ;;
+ *) die "Invalid --format: $OUTPUT_FORMAT" ;;
+ esac
+}
+
+# ── Main ──────────────────────────────────────────────────────────────
+main() {
+ parse_args "$@"
+ setup_colors
+ check_deps
+
+ START_TIME=$(date +%s)
+
+ # Validate AWS credentials
+ log_debug "Validating AWS credentials..."
+ aws sts get-caller-identity --output text >/dev/null 2>&1 \
+ || die "AWS credentials not configured or expired"
+
+ compute_ranges
+
+ log_info "Querying Cost Explorer ($RUN_MODE, group by $GROUP_BY)..."
+
+ local curr_raw prev_raw
+ curr_raw="$(query_costs "$PERIOD_START" "$PERIOD_END")"
+ prev_raw="$(query_costs "$PREV_START" "$PREV_END")"
+
+ if [[ -z "$curr_raw" ]]; then
+ die "No cost data returned for $PERIOD_START → $PERIOD_END"
+ fi
+
+ local report
+ report="$(render_report "$curr_raw" "$prev_raw")"
+
+ # Output to stdout unless sending elsewhere exclusively
+ echo "$report"
+
+ # Email delivery
+ if [[ -n "$EMAIL_TO" ]]; then
+ send_email "$report" "$EMAIL_TO"
+ fi
+
+ # Slack delivery
+ if [[ -n "$SLACK_URL" ]]; then
+ send_slack "$report" "$SLACK_URL"
+ fi
+
+ local elapsed=$(( $(date +%s) - START_TIME ))
+ log_info "Completed in ${elapsed}s"
+}
+
+main "$@"
diff --git a/aws-smoke-tests.sh b/aws-smoke-tests.sh
new file mode 100644
index 0000000..ac92361
--- /dev/null
+++ b/aws-smoke-tests.sh
@@ -0,0 +1,537 @@
+#!/usr/bin/env bash
+
+#####################################################################################
+#### aws-smoke-tests.sh — Verify AWS connectivity and core service health ####
+#### Checks credentials, S3, EC2, IAM, VPC, Route 53, CloudWatch, Security Hub ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.0 ####
+#### ####
+#### Usage: ./aws-smoke-tests.sh ####
+#### AWS_PROFILE=prod S3_BUCKET=my-bucket ./aws-smoke-tests.sh ####
+#### ####
+#### See --help for all options. ####
+#####################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+AWS_REGION="${AWS_REGION:-us-east-1}"
+S3_BUCKET="${S3_BUCKET:-}"
+R53_DOMAIN="${R53_DOMAIN:-}"
+R53_ZONE_ID="${R53_ZONE_ID:-}"
+VPC_ID="${VPC_ID:-}"
+COST_THRESHOLD="${COST_THRESHOLD:-}"
+SG_CHECK_PORTS="${SG_CHECK_PORTS:-22,3389,3306,5432}"
+REQUIRED_PERMISSIONS="${REQUIRED_PERMISSIONS:-}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+PASS=0; FAIL=0; SKIP=0; TOTAL=0
+RESULTS=()
+START_TIME=""
+CALLER_ARN=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then return; fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { [[ "$VERBOSE" == "true" ]] && echo -e "${BLUE}[DEBUG]${RESET} $*" || true; }
+
+# ── Test Result Recording ─────────────────────────────────────────────
+record_pass() {
+ local name="$1" detail="${2:-}"
+ ((PASS++)) || true; ((TOTAL++)) || true
+ local msg="ok ${TOTAL} - ${name}"
+ [[ -n "$detail" ]] && msg="${msg} (${detail})"
+ RESULTS+=("$msg")
+ verbose "PASS: ${name} ${detail}"
+}
+
+record_fail() {
+ local name="$1" detail="${2:-}"
+ ((FAIL++)) || true; ((TOTAL++)) || true
+ local msg="not ok ${TOTAL} - ${name}"
+ [[ -n "$detail" ]] && msg="${msg} (${detail})"
+ RESULTS+=("$msg")
+ verbose "FAIL: ${name} ${detail}"
+}
+
+record_skip() {
+ local name="$1" reason="${2:-}"
+ ((SKIP++)) || true; ((TOTAL++)) || true
+ local msg="ok ${TOTAL} - # SKIP ${name}"
+ [[ -n "$reason" ]] && msg="${msg} — ${reason}"
+ RESULTS+=("$msg")
+ verbose "SKIP: ${name} ${reason}"
+}
+
+# ── Dependency Check ──────────────────────────────────────────────────
+check_dependencies() {
+ local missing=()
+ command -v aws >/dev/null 2>&1 || missing+=("aws-cli")
+ command -v jq >/dev/null 2>&1 || missing+=("jq")
+
+ if [[ ${#missing[@]} -gt 0 ]]; then
+ err "Missing required tools: ${missing[*]}"
+ err "Install aws-cli v2 and jq before running this script."
+ exit 1
+ fi
+ verbose "Dependencies satisfied: aws-cli, jq"
+}
+
+# ── Help ──────────────────────────────────────────────────────────────
+show_help() {
+ cat <<'EOF'
+AWS Smoke Tests — Verify AWS connectivity and core service health
+
+Environment Variables:
+ AWS_REGION Region to test (default: us-east-1)
+ AWS_PROFILE AWS CLI profile to use
+ S3_BUCKET S3 bucket to verify access
+ R53_DOMAIN Route 53 domain to resolve
+ R53_ZONE_ID Hosted zone ID to verify
+ VPC_ID VPC to inspect (auto-detected if not set)
+ COST_THRESHOLD Monthly cost alert threshold in USD
+ SG_CHECK_PORTS Ports to check for open SGs (default: 22,3389,3306,5432)
+ REQUIRED_PERMISSIONS Comma-separated IAM actions to simulate
+ OUTPUT_FORMAT Output format: text or json (default: text)
+ VERBOSE Show detailed output (default: false)
+ COLOR Color output: auto, always, never (default: auto)
+
+Examples:
+ ./aws-smoke-tests.sh
+ AWS_PROFILE=prod S3_BUCKET=my-bucket ./aws-smoke-tests.sh
+ S3_BUCKET=data R53_DOMAIN=example.com COST_THRESHOLD=5000 ./aws-smoke-tests.sh
+EOF
+ exit 0
+}
+
+# ── Tests ─────────────────────────────────────────────────────────────
+
+test_credentials() {
+ verbose "Testing AWS credentials..."
+
+ local identity
+ identity=$(aws sts get-caller-identity --output json 2>/dev/null) || {
+ record_fail "AWS credentials configured" "No valid credentials found"
+ return
+ }
+
+ record_pass "AWS credentials configured"
+
+ local account arn
+ account=$(echo "$identity" | jq -r '.Account // "unknown"')
+ arn=$(echo "$identity" | jq -r '.Arn // "unknown"')
+ CALLER_ARN="$arn"
+
+ record_pass "STS GetCallerIdentity succeeds" "account: ${account}"
+ record_pass "Caller identity" "ARN: ${arn}"
+}
+
+test_s3() {
+ verbose "Testing S3 access..."
+
+ local bucket_count
+ bucket_count=$(aws s3api list-buckets --query 'length(Buckets)' --output text 2>/dev/null) || {
+ record_fail "S3 ListBuckets" "API call failed"
+ return
+ }
+
+ record_pass "S3 ListBuckets succeeds" "${bucket_count} buckets"
+
+ if [[ -n "$S3_BUCKET" ]]; then
+ if aws s3api head-bucket --bucket "$S3_BUCKET" 2>/dev/null; then
+ record_pass "S3 bucket '${S3_BUCKET}' exists and is accessible"
+ else
+ record_fail "S3 bucket '${S3_BUCKET}' exists and is accessible" "head-bucket failed"
+ fi
+ else
+ record_skip "S3 specific bucket check" "S3_BUCKET not set"
+ fi
+}
+
+test_ec2() {
+ verbose "Testing EC2 access..."
+
+ local instances
+ instances=$(aws ec2 describe-instances \
+ --query 'Reservations[].Instances[]' \
+ --output json 2>/dev/null) || {
+ record_fail "EC2 DescribeInstances" "API call failed"
+ return
+ }
+
+ record_pass "EC2 DescribeInstances succeeds"
+
+ local running
+ running=$(echo "$instances" | jq '[.[] | select(.State.Name == "running")] | length')
+ record_pass "Running instances" "${running}"
+}
+
+test_vpc() {
+ verbose "Testing VPC configuration..."
+
+ local vpc_id="$VPC_ID"
+ if [[ -z "$vpc_id" ]]; then
+ vpc_id=$(aws ec2 describe-vpcs \
+ --filters "Name=isDefault,Values=true" \
+ --query 'Vpcs[0].VpcId' \
+ --output text 2>/dev/null) || true
+
+ if [[ -z "$vpc_id" ]] || [[ "$vpc_id" == "None" ]]; then
+ vpc_id=$(aws ec2 describe-vpcs \
+ --query 'Vpcs[0].VpcId' \
+ --output text 2>/dev/null) || true
+ fi
+ fi
+
+ if [[ -z "$vpc_id" ]] || [[ "$vpc_id" == "None" ]]; then
+ record_fail "VPC exists" "No VPC found"
+ return
+ fi
+
+ record_pass "VPC exists" "${vpc_id}"
+
+ # Check subnets
+ local subnet_count
+ subnet_count=$(aws ec2 describe-subnets \
+ --filters "Name=vpc-id,Values=${vpc_id}" \
+ --query 'length(Subnets)' \
+ --output text 2>/dev/null) || subnet_count=0
+
+ if [[ "$subnet_count" -gt 0 ]]; then
+ record_pass "VPC has subnets" "${subnet_count}"
+ else
+ record_fail "VPC has subnets" "0 subnets found"
+ fi
+
+ # Check internet gateway
+ local igw
+ igw=$(aws ec2 describe-internet-gateways \
+ --filters "Name=attachment.vpc-id,Values=${vpc_id}" \
+ --query 'InternetGateways[0].InternetGatewayId' \
+ --output text 2>/dev/null) || igw="None"
+
+ if [[ -n "$igw" ]] && [[ "$igw" != "None" ]]; then
+ record_pass "Internet gateway attached to VPC" "${igw}"
+ else
+ record_fail "Internet gateway attached to VPC" "None found"
+ fi
+}
+
+test_route53() {
+ if [[ -z "$R53_DOMAIN" ]] && [[ -z "$R53_ZONE_ID" ]]; then
+ record_skip "Route 53 checks" "R53_DOMAIN and R53_ZONE_ID not set"
+ return
+ fi
+
+ verbose "Testing Route 53..."
+
+ if [[ -n "$R53_ZONE_ID" ]]; then
+ local zone_name
+ zone_name=$(aws route53 get-hosted-zone \
+ --id "$R53_ZONE_ID" \
+ --query 'HostedZone.Name' \
+ --output text 2>/dev/null) || {
+ record_fail "Route 53 zone ${R53_ZONE_ID} exists"
+ return
+ }
+ record_pass "Route 53 zone exists" "${zone_name}"
+ fi
+
+ if [[ -n "$R53_DOMAIN" ]]; then
+ local zone_count
+ zone_count=$(aws route53 list-hosted-zones \
+ --query 'length(HostedZones)' \
+ --output text 2>/dev/null) || {
+ record_fail "Route 53 ListHostedZones"
+ return
+ }
+ record_pass "Route 53 ListHostedZones succeeds" "${zone_count} zones"
+
+ # Try to resolve the domain using system DNS
+ local resolved
+ resolved=$(dig +short "$R53_DOMAIN" A 2>/dev/null | head -1) || true
+
+ if [[ -n "$resolved" ]]; then
+ record_pass "Route 53 domain ${R53_DOMAIN} resolves" "A: ${resolved}"
+ else
+ record_fail "Route 53 domain ${R53_DOMAIN} resolves" "No A record returned"
+ fi
+ fi
+}
+
+test_security_groups() {
+ verbose "Testing security groups..."
+
+ local sgs
+ sgs=$(aws ec2 describe-security-groups \
+ --query 'SecurityGroups[].{GroupId:GroupId,GroupName:GroupName,IpPermissions:IpPermissions}' \
+ --output json 2>/dev/null) || {
+ record_fail "Security group audit" "API call failed"
+ return
+ }
+
+ IFS=',' read -ra ports <<< "$SG_CHECK_PORTS"
+
+ for port in "${ports[@]}"; do
+ port=$(echo "$port" | tr -d ' ')
+ local open_sgs
+ open_sgs=$(echo "$sgs" | jq -r --argjson port "$port" '
+ [.[] | select(
+ .IpPermissions[]? |
+ select(
+ (.IpRanges[]?.CidrIp == "0.0.0.0/0" or .Ipv6Ranges[]?.CidrIpv6 == "::/0") and
+ (
+ (.FromPort <= $port and .ToPort >= $port) or
+ (.IpProtocol == "-1")
+ )
+ )
+ ) | .GroupId] | unique | join(", ")
+ ' 2>/dev/null) || open_sgs=""
+
+ if [[ -z "$open_sgs" ]]; then
+ record_pass "No security groups with 0.0.0.0/0 on port ${port}"
+ else
+ record_fail "Security group allows 0.0.0.0/0 on port ${port}" "${open_sgs}"
+ fi
+ done
+}
+
+test_cloudwatch_alarms() {
+ verbose "Testing CloudWatch alarms..."
+
+ local alarm_count
+ alarm_count=$(aws cloudwatch describe-alarms \
+ --state-value ALARM \
+ --query 'length(MetricAlarms)' \
+ --output text 2>/dev/null) || {
+ record_fail "CloudWatch alarm check" "API call failed"
+ return
+ }
+
+ if [[ "$alarm_count" -eq 0 ]]; then
+ record_pass "CloudWatch alarms" "0 in ALARM state"
+ else
+ local alarm_names
+ alarm_names=$(aws cloudwatch describe-alarms \
+ --state-value ALARM \
+ --query 'MetricAlarms[].AlarmName' \
+ --output text 2>/dev/null | head -c 200)
+ record_fail "CloudWatch alarms" "${alarm_count} in ALARM state: ${alarm_names}"
+ fi
+}
+
+test_security_hub() {
+ verbose "Testing Security Hub..."
+
+ local findings
+ findings=$(aws securityhub get-findings \
+ --filters '{
+ "WorkflowStatus": [{"Value":"NEW","Comparison":"EQUALS"}],
+ "RecordState": [{"Value":"ACTIVE","Comparison":"EQUALS"}],
+ "SeverityLabel": [{"Value":"CRITICAL","Comparison":"EQUALS"},{"Value":"HIGH","Comparison":"EQUALS"}]
+ }' \
+ --max-items 100 \
+ --query 'length(Findings)' \
+ --output text 2>/dev/null) || {
+ record_skip "Security Hub findings" "Security Hub not enabled or no access"
+ return
+ }
+
+ local critical high
+ critical=$(aws securityhub get-findings \
+ --filters '{
+ "WorkflowStatus": [{"Value":"NEW","Comparison":"EQUALS"}],
+ "RecordState": [{"Value":"ACTIVE","Comparison":"EQUALS"}],
+ "SeverityLabel": [{"Value":"CRITICAL","Comparison":"EQUALS"}]
+ }' \
+ --query 'length(Findings)' \
+ --output text 2>/dev/null) || critical=0
+
+ high=$(aws securityhub get-findings \
+ --filters '{
+ "WorkflowStatus": [{"Value":"NEW","Comparison":"EQUALS"}],
+ "RecordState": [{"Value":"ACTIVE","Comparison":"EQUALS"}],
+ "SeverityLabel": [{"Value":"HIGH","Comparison":"EQUALS"}]
+ }' \
+ --query 'length(Findings)' \
+ --output text 2>/dev/null) || high=0
+
+ if [[ "$critical" -eq 0 ]] && [[ "$high" -eq 0 ]]; then
+ record_pass "Security Hub findings" "0 critical, 0 high"
+ else
+ record_fail "Security Hub findings" "${critical} critical, ${high} high"
+ fi
+}
+
+test_iam_permissions() {
+ if [[ -z "$REQUIRED_PERMISSIONS" ]] || [[ -z "$CALLER_ARN" ]]; then
+ record_skip "IAM permission simulation" "REQUIRED_PERMISSIONS not set or no ARN"
+ return
+ fi
+
+ verbose "Testing IAM permissions..."
+
+ IFS=',' read -ra actions <<< "$REQUIRED_PERMISSIONS"
+ local denied=()
+
+ for action in "${actions[@]}"; do
+ action=$(echo "$action" | tr -d ' ')
+ local result
+ result=$(aws iam simulate-principal-policy \
+ --policy-source-arn "$CALLER_ARN" \
+ --action-names "$action" \
+ --query 'EvaluationResults[0].EvalDecision' \
+ --output text 2>/dev/null) || result="error"
+
+ if [[ "$result" != "allowed" ]]; then
+ denied+=("$action")
+ fi
+ done
+
+ if [[ ${#denied[@]} -eq 0 ]]; then
+ record_pass "IAM simulation" "all ${#actions[@]} required actions allowed"
+ else
+ record_fail "IAM simulation" "denied: ${denied[*]}"
+ fi
+}
+
+test_cost() {
+ if [[ -z "$COST_THRESHOLD" ]]; then
+ record_skip "Cost check" "COST_THRESHOLD not set"
+ return
+ fi
+
+ verbose "Testing monthly cost..."
+
+ local month_start today
+ month_start=$(date -u +%Y-%m-01)
+ today=$(date -u +%Y-%m-%d)
+
+ local cost_json
+ cost_json=$(aws ce get-cost-and-usage \
+ --time-period "Start=${month_start},End=${today}" \
+ --granularity MONTHLY \
+ --metrics BlendedCost \
+ --output json 2>/dev/null) || {
+ record_skip "Cost check" "Cost Explorer API failed (may need to be enabled)"
+ return
+ }
+
+ local amount
+ amount=$(echo "$cost_json" | jq -r '.ResultsByTime[0].Total.BlendedCost.Amount // "0"')
+ local amount_int=${amount%%.*}
+
+ if [[ "$amount_int" -lt "$COST_THRESHOLD" ]]; then
+ record_pass "Current month spend" "\$${amount} below threshold \$${COST_THRESHOLD}"
+ else
+ record_fail "Current month spend" "\$${amount} exceeds threshold \$${COST_THRESHOLD}"
+ fi
+}
+
+# ── Output ────────────────────────────────────────────────────────────
+
+print_tap() {
+ echo "TAP version 14"
+ echo "1..${TOTAL}"
+ for result in "${RESULTS[@]}"; do
+ echo "$result"
+ done
+ echo ""
+ local duration=$(( $(date +%s) - START_TIME ))
+ echo "# Tests: ${TOTAL}, Passed: ${PASS}, Failed: ${FAIL}, Skipped: ${SKIP}"
+ echo "# Duration: ${duration}s"
+}
+
+print_json() {
+ local duration=$(( $(date +%s) - START_TIME ))
+ local json_results="["
+ local first=true
+ for result in "${RESULTS[@]}"; do
+ local status="pass"
+ [[ "$result" == not\ ok* ]] && status="fail"
+ [[ "$result" == *"# SKIP"* ]] && status="skip"
+ local name
+ name=$(echo "$result" | sed -E 's/^(not )?ok [0-9]+ - (# SKIP )?//' | sed 's/ — .*//' | sed 's/ (.*//')
+
+ $first || json_results+=","
+ first=false
+ json_results+="{\"status\":\"${status}\",\"name\":\"${name}\"}"
+ done
+ json_results+="]"
+
+ jq -n \
+ --argjson results "$json_results" \
+ --argjson total "$TOTAL" \
+ --argjson passed "$PASS" \
+ --argjson failed "$FAIL" \
+ --argjson skipped "$SKIP" \
+ --argjson duration "$duration" \
+ '{
+ total: $total,
+ passed: $passed,
+ failed: $failed,
+ skipped: $skipped,
+ duration_seconds: $duration,
+ results: $results
+ }'
+}
+
+# ── Main ──────────────────────────────────────────────────────────────
+
+main() {
+ [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]] && show_help
+
+ setup_colors
+ check_dependencies
+
+ START_TIME=$(date +%s)
+
+ log "AWS Smoke Tests — Region: ${AWS_REGION}"
+ log "────────────────────────────────────────"
+
+ test_credentials
+ test_s3
+ test_ec2
+ test_vpc
+ test_route53
+ test_security_groups
+ test_cloudwatch_alarms
+ test_security_hub
+ test_iam_permissions
+ test_cost
+
+ echo ""
+ if [[ "$OUTPUT_FORMAT" == "json" ]]; then
+ print_json
+ else
+ print_tap
+ fi
+
+ if [[ "$FAIL" -gt 0 ]]; then
+ exit 1
+ fi
+ exit 0
+}
+
+main "$@"
diff --git a/azure-ad-audit.sh b/azure-ad-audit.sh
new file mode 100755
index 0000000..267e0cd
--- /dev/null
+++ b/azure-ad-audit.sh
@@ -0,0 +1,649 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### azure-ad-audit.sh — Audit Azure Entra ID for stale users, MFA gaps, risky ####
+#### sign-ins, excessive permissions, and service principal hygiene via az CLI ####
+#### Requires: bash 4+, az CLI, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./azure-ad-audit.sh --full ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+# ── Severity counters ────────────────────────────────────────────────
+TOTAL_CRIT=0
+TOTAL_WARN=0
+TOTAL_INFO=0
+TOTAL_OK=0
+
+flag_crit() { ((TOTAL_CRIT++)) || true; }
+flag_warn() { ((TOTAL_WARN++)) || true; }
+flag_info() { ((TOTAL_INFO++)) || true; }
+flag_ok() { ((TOTAL_OK++)) || true; }
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+STALE_DAYS="${STALE_DAYS:-90}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+
+# ── Dependency checks ────────────────────────────────────────────────
+check_deps() {
+ command -v az &>/dev/null || die "az CLI is required (https://aka.ms/install-azure-cli)"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+check_credentials() {
+ if ! az account show &>/dev/null 2>&1; then
+ die "Not logged in to Azure CLI — run 'az login' first"
+ fi
+ verbose "Azure CLI authenticated"
+}
+
+# ── Date helpers ─────────────────────────────────────────────────────
+days_since() {
+ local date_str="$1"
+ if [[ -z "$date_str" || "$date_str" == "null" || "$date_str" == "None" ]]; then
+ echo "never"
+ return
+ fi
+ local then_epoch now_epoch
+ then_epoch=$(date -d "${date_str}" +%s 2>/dev/null || echo 0)
+ now_epoch=$(date +%s)
+ if [[ "$then_epoch" -eq 0 ]]; then
+ echo "unknown"
+ return
+ fi
+ echo $(( (now_epoch - then_epoch) / 86400 ))
+}
+
+days_until() {
+ local date_str="$1"
+ if [[ -z "$date_str" || "$date_str" == "null" || "$date_str" == "None" ]]; then
+ echo "unknown"
+ return
+ fi
+ local target_epoch now_epoch
+ target_epoch=$(date -d "${date_str}" +%s 2>/dev/null || echo 0)
+ now_epoch=$(date +%s)
+ if [[ "$target_epoch" -eq 0 ]]; then
+ echo "unknown"
+ return
+ fi
+ echo $(( (target_epoch - now_epoch) / 86400 ))
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# STALE USERS AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_stale_users() {
+ log "Auditing stale user accounts (threshold: ${STALE_DAYS} days)..."
+ echo ""
+
+ printf " %-36s %-24s %-20s %-10s %s\n" \
+ "UPN" "DISPLAY_NAME" "LAST_SIGN_IN" "DAYS_IDLE" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..110})"
+
+ local users_json
+ # shellcheck disable=SC2016
+ users_json=$(az rest --method GET \
+ --url 'https://graph.microsoft.com/v1.0/users?$select=userPrincipalName,displayName,signInActivity,accountEnabled&$top=999' \
+ 2>/dev/null || echo '{"value":[]}')
+
+ echo "$users_json" | jq -c '.value[]? // empty' 2>/dev/null | while IFS= read -r user; do
+ local upn display_name enabled last_sign_in
+ upn=$(echo "$user" | jq -r '.userPrincipalName // "unknown"' 2>/dev/null)
+ display_name=$(echo "$user" | jq -r '.displayName // "unknown"' 2>/dev/null)
+ enabled=$(echo "$user" | jq -r '.accountEnabled // true' 2>/dev/null)
+ last_sign_in=$(echo "$user" | jq -r '.signInActivity.lastSignInDateTime // "null"' 2>/dev/null)
+
+ [[ "$enabled" == "false" ]] && continue
+
+ local idle_days last_sign_display
+ idle_days=$(days_since "$last_sign_in")
+
+ if [[ "$last_sign_in" == "null" || -z "$last_sign_in" ]]; then
+ last_sign_display="Never"
+ else
+ last_sign_display="${last_sign_in:0:10}"
+ fi
+
+ if [[ "$idle_days" == "never" ]]; then
+ printf " %-36s %-24s %-20s %-10s %b%s%b\n" \
+ "${upn:0:34}" "${display_name:0:22}" "$last_sign_display" \
+ "N/A" "$YELLOW" "WARN" "$RESET"
+ flag_warn
+ elif [[ "$idle_days" == "unknown" ]]; then
+ verbose "Skipping ${upn}: unable to parse sign-in date"
+ elif [[ "$idle_days" -gt "$STALE_DAYS" ]]; then
+ printf " %-36s %-24s %-20s %-10s %b%s%b\n" \
+ "${upn:0:34}" "${display_name:0:22}" "$last_sign_display" \
+ "$idle_days" "$YELLOW" "WARN" "$RESET"
+ flag_warn
+ else
+ verbose "User ${upn}: active (${idle_days}d idle)"
+ flag_ok
+ fi
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MFA AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_mfa() {
+ log "Auditing MFA registration status..."
+ echo ""
+
+ printf " %-36s %-14s %-10s %s\n" \
+ "UPN" "MFA_STATUS" "IS_ADMIN" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..80})"
+
+ local reg_json
+ # shellcheck disable=SC2016
+ reg_json=$(az rest --method GET \
+ --url 'https://graph.microsoft.com/v1.0/reports/credentialUserRegistrationDetails?$top=999' \
+ 2>/dev/null || echo '{"value":[]}')
+
+ local admin_upns
+ admin_upns=$(az rest --method GET \
+ --url 'https://graph.microsoft.com/v1.0/directoryRoles' \
+ 2>/dev/null | jq -r '.value[]? | select(.displayName | test("Admin|Administrator"; "i")) | .id' 2>/dev/null || true)
+
+ local admin_members=""
+ while IFS= read -r role_id; do
+ [[ -z "$role_id" ]] && continue
+ local members
+ members=$(az rest --method GET \
+ --url "https://graph.microsoft.com/v1.0/directoryRoles/${role_id}/members" \
+ 2>/dev/null | jq -r '.value[]?.userPrincipalName // empty' 2>/dev/null || true)
+ admin_members="${admin_members}${members}"$'\n'
+ done <<< "$admin_upns"
+
+ echo "$reg_json" | jq -c '.value[]? // empty' 2>/dev/null | while IFS= read -r entry; do
+ local upn mfa_registered
+ upn=$(echo "$entry" | jq -r '.userPrincipalName // "unknown"' 2>/dev/null)
+ mfa_registered=$(echo "$entry" | jq -r '.isMfaRegistered // false' 2>/dev/null)
+
+ local is_admin="No"
+ if echo "$admin_members" | grep -qi "^${upn}$" 2>/dev/null; then
+ is_admin="Yes"
+ fi
+
+ local mfa_display severity
+ if [[ "$mfa_registered" == "true" ]]; then
+ mfa_display="Registered"
+ severity="OK"
+ printf " %-36s %-14s %-10s %b%s%b\n" \
+ "${upn:0:34}" "$mfa_display" "$is_admin" "$GREEN" "$severity" "$RESET"
+ flag_ok
+ else
+ mfa_display="Not registered"
+ if [[ "$is_admin" == "Yes" ]]; then
+ severity="CRITICAL"
+ printf " %-36s %-14s %-10s %b%s%b\n" \
+ "${upn:0:34}" "$mfa_display" "$is_admin" "$RED" "$severity" "$RESET"
+ flag_crit
+ else
+ severity="WARN"
+ printf " %-36s %-14s %-10s %b%s%b\n" \
+ "${upn:0:34}" "$mfa_display" "$is_admin" "$YELLOW" "$severity" "$RESET"
+ flag_warn
+ fi
+ fi
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ADMIN ROLES AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_admins() {
+ log "Auditing privileged role assignments..."
+ echo ""
+
+ printf " %-36s %-28s %-20s %s\n" \
+ "UPN" "ROLE" "SCOPE" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..95})"
+
+ local owner_json
+ owner_json=$(az role assignment list --role "Owner" --all 2>/dev/null || echo '[]')
+
+ echo "$owner_json" | jq -c '.[]? // empty' 2>/dev/null | while IFS= read -r assignment; do
+ local upn role scope
+ upn=$(echo "$assignment" | jq -r '.principalName // "unknown"' 2>/dev/null)
+ role=$(echo "$assignment" | jq -r '.roleDefinitionName // "unknown"' 2>/dev/null)
+ scope=$(echo "$assignment" | jq -r '.scope // "/"' 2>/dev/null)
+
+ [[ -z "$upn" || "$upn" == "unknown" ]] && continue
+
+ local scope_display
+ scope_display="${scope##*/}"
+ [[ -z "$scope_display" ]] && scope_display="$scope"
+
+ printf " %-36s %-28s %-20s %b%s%b\n" \
+ "${upn:0:34}" "${role:0:26}" "${scope_display:0:18}" \
+ "$CYAN" "INFO" "$RESET"
+ flag_info
+ done
+
+ local ga_json
+ ga_json=$(az rest --method GET \
+ --url 'https://graph.microsoft.com/v1.0/directoryRoles' \
+ 2>/dev/null | jq -c '.value[]? | select(.displayName == "Global Administrator")' 2>/dev/null || echo '')
+
+ if [[ -n "$ga_json" ]]; then
+ local ga_role_id
+ ga_role_id=$(echo "$ga_json" | jq -r '.id' 2>/dev/null)
+
+ local ga_members
+ ga_members=$(az rest --method GET \
+ --url "https://graph.microsoft.com/v1.0/directoryRoles/${ga_role_id}/members" \
+ 2>/dev/null || echo '{"value":[]}')
+
+ local ga_count
+ ga_count=$(echo "$ga_members" | jq '[.value[]?] | length' 2>/dev/null || echo 0)
+
+ echo "$ga_members" | jq -c '.value[]? // empty' 2>/dev/null | while IFS= read -r member; do
+ local m_upn
+ m_upn=$(echo "$member" | jq -r '.userPrincipalName // "unknown"' 2>/dev/null)
+
+ printf " %-36s %-28s %-20s %b%s%b\n" \
+ "${m_upn:0:34}" "Global Administrator" "Tenant" \
+ "$CYAN" "INFO" "$RESET"
+ flag_info
+ done
+
+ if [[ "$ga_count" -gt 5 ]]; then
+ echo ""
+ warn "Excessive Global Administrators: ${ga_count} found (recommended: ≤5)"
+ printf " %-36s %-28s %-20s %b%s%b\n" \
+ "— policy —" "Global Admin count: ${ga_count}" ">5 threshold" \
+ "$YELLOW" "WARN" "$RESET"
+ flag_warn
+ elif [[ "$ga_count" -gt 0 ]]; then
+ verbose "Global Administrator count: ${ga_count} (within threshold)"
+ flag_ok
+ fi
+ fi
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SERVICE PRINCIPALS AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_service_principals() {
+ log "Auditing service principal credentials..."
+ echo ""
+
+ printf " %-30s %-14s %-20s %-14s %s\n" \
+ "APP_NAME" "CRED_TYPE" "EXPIRY" "STATUS" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..100})"
+
+ local sp_json
+ sp_json=$(az ad sp list --all --query "[].{appDisplayName:appDisplayName,appId:appId,keyCredentials:keyCredentials,passwordCredentials:passwordCredentials}" 2>/dev/null || echo '[]')
+
+ echo "$sp_json" | jq -c '.[]? // empty' 2>/dev/null | while IFS= read -r sp; do
+ local app_name app_id
+ app_name=$(echo "$sp" | jq -r '.appDisplayName // "unnamed"' 2>/dev/null)
+ app_id=$(echo "$sp" | jq -r '.appId // "unknown"' 2>/dev/null)
+
+ [[ "$app_name" == "unnamed" || -z "$app_name" ]] && app_name="$app_id"
+
+ echo "$sp" | jq -c '.passwordCredentials[]? // empty' 2>/dev/null | while IFS= read -r cred; do
+ local end_date
+ end_date=$(echo "$cred" | jq -r '.endDateTime // "null"' 2>/dev/null)
+
+ local remaining status severity
+ remaining=$(days_until "$end_date")
+
+ if [[ "$remaining" == "unknown" ]]; then
+ status="Unknown"
+ severity="INFO"
+ printf " %-30s %-14s %-20s %-14s %b%s%b\n" \
+ "${app_name:0:28}" "Password" "Unknown" "$status" \
+ "$CYAN" "$severity" "$RESET"
+ flag_info
+ elif [[ "$remaining" -lt 0 ]]; then
+ status="Expired"
+ severity="WARN"
+ printf " %-30s %-14s %-20s %-14s %b%s%b\n" \
+ "${app_name:0:28}" "Password" "${end_date:0:10}" "$status" \
+ "$YELLOW" "$severity" "$RESET"
+ flag_warn
+ elif [[ "$remaining" -lt 30 ]]; then
+ status="Expiring (${remaining}d)"
+ severity="WARN"
+ printf " %-30s %-14s %-20s %-14s %b%s%b\n" \
+ "${app_name:0:28}" "Password" "${end_date:0:10}" "$status" \
+ "$YELLOW" "$severity" "$RESET"
+ flag_warn
+ else
+ verbose "SP ${app_name}: password credential valid (${remaining}d remaining)"
+ flag_ok
+ fi
+ done
+
+ echo "$sp" | jq -c '.keyCredentials[]? // empty' 2>/dev/null | while IFS= read -r cred; do
+ local end_date
+ end_date=$(echo "$cred" | jq -r '.endDateTime // "null"' 2>/dev/null)
+
+ local remaining status severity
+ remaining=$(days_until "$end_date")
+
+ if [[ "$remaining" == "unknown" ]]; then
+ status="Unknown"
+ severity="INFO"
+ printf " %-30s %-14s %-20s %-14s %b%s%b\n" \
+ "${app_name:0:28}" "Certificate" "Unknown" "$status" \
+ "$CYAN" "$severity" "$RESET"
+ flag_info
+ elif [[ "$remaining" -lt 0 ]]; then
+ status="Expired"
+ severity="WARN"
+ printf " %-30s %-14s %-20s %-14s %b%s%b\n" \
+ "${app_name:0:28}" "Certificate" "${end_date:0:10}" "$status" \
+ "$YELLOW" "$severity" "$RESET"
+ flag_warn
+ elif [[ "$remaining" -lt 30 ]]; then
+ status="Expiring (${remaining}d)"
+ severity="WARN"
+ printf " %-30s %-14s %-20s %-14s %b%s%b\n" \
+ "${app_name:0:28}" "Certificate" "${end_date:0:10}" "$status" \
+ "$YELLOW" "$severity" "$RESET"
+ flag_warn
+ else
+ verbose "SP ${app_name}: key credential valid (${remaining}d remaining)"
+ flag_ok
+ fi
+ done
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# GUEST USERS AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_guests() {
+ log "Auditing guest user accounts (threshold: ${STALE_DAYS} days)..."
+ echo ""
+
+ printf " %-36s %-20s %-20s %s\n" \
+ "UPN" "CREATED" "LAST_ACTIVITY" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..90})"
+
+ local guests_json
+ # shellcheck disable=SC2016
+ guests_json=$(az rest --method GET \
+ --url 'https://graph.microsoft.com/v1.0/users?$filter=userType%20eq%20%27Guest%27&$select=userPrincipalName,displayName,createdDateTime,signInActivity&$top=999' \
+ 2>/dev/null || echo '{"value":[]}')
+
+ echo "$guests_json" | jq -c '.value[]? // empty' 2>/dev/null | while IFS= read -r guest; do
+ local upn created last_activity
+ upn=$(echo "$guest" | jq -r '.userPrincipalName // "unknown"' 2>/dev/null)
+ created=$(echo "$guest" | jq -r '.createdDateTime // "null"' 2>/dev/null)
+ last_activity=$(echo "$guest" | jq -r '.signInActivity.lastSignInDateTime // "null"' 2>/dev/null)
+
+ local created_display last_display
+ if [[ "$created" == "null" || -z "$created" ]]; then
+ created_display="Unknown"
+ else
+ created_display="${created:0:10}"
+ fi
+
+ if [[ "$last_activity" == "null" || -z "$last_activity" ]]; then
+ last_display="Never"
+ else
+ last_display="${last_activity:0:10}"
+ fi
+
+ local idle_days
+ idle_days=$(days_since "$last_activity")
+
+ if [[ "$idle_days" == "never" ]]; then
+ printf " %-36s %-20s %-20s %b%s%b\n" \
+ "${upn:0:34}" "$created_display" "$last_display" \
+ "$YELLOW" "WARN" "$RESET"
+ flag_warn
+ elif [[ "$idle_days" == "unknown" ]]; then
+ verbose "Guest ${upn}: unable to determine activity"
+ flag_info
+ elif [[ "$idle_days" -gt "$STALE_DAYS" ]]; then
+ printf " %-36s %-20s %-20s %b%s%b\n" \
+ "${upn:0:34}" "$created_display" "$last_display" \
+ "$YELLOW" "WARN" "$RESET"
+ flag_warn
+ else
+ verbose "Guest ${upn}: active (${idle_days}d idle)"
+ flag_ok
+ fi
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+print_summary() {
+ local elapsed
+ elapsed=$(( $(date +%s) - START_TIME ))
+
+ echo ""
+ echo " ══════════════════════════════════════════"
+ echo " Azure Entra ID Audit Summary"
+ echo " ══════════════════════════════════════════"
+ printf " %-20s %b%d%b\n" "CRITICAL:" "$RED" "$TOTAL_CRIT" "$RESET"
+ printf " %-20s %b%d%b\n" "WARN:" "$YELLOW" "$TOTAL_WARN" "$RESET"
+ printf " %-20s %b%d%b\n" "INFO:" "$CYAN" "$TOTAL_INFO" "$RESET"
+ printf " %-20s %b%d%b\n" "OK:" "$GREEN" "$TOTAL_OK" "$RESET"
+ echo " ──────────────────────────────────────────"
+ printf " Completed in %ds\n" "$elapsed"
+ echo ""
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ echo -e " ${RED}${BOLD}Action required:${RESET} ${TOTAL_CRIT} critical finding(s)"
+ echo ""
+ echo " Top recommendations:"
+ echo " • Enable MFA for all admin accounts immediately"
+ echo " • Review and remove stale user accounts"
+ echo " • Rotate expired service principal credentials"
+ echo " • Reduce the number of Global Administrators to ≤5"
+ echo " • Remove inactive guest accounts"
+ echo ""
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ echo -e " ${YELLOW}Review recommended:${RESET} ${TOTAL_WARN} warning(s)"
+ echo ""
+ echo " Suggestions:"
+ echo " • Disable or remove stale user accounts"
+ echo " • Enforce MFA registration for all users"
+ echo " • Renew expiring service principal credentials"
+ echo " • Clean up inactive guest accounts"
+ echo ""
+ else
+ echo -e " ${GREEN}All checks passed${RESET}"
+ echo ""
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat < N days
+ --mfa Check MFA registration status
+ --admins Audit privileged role assignments
+ --service-principals Audit service principal credential expiry
+ --guests Find stale guest accounts
+
+${BOLD}OPTIONS${RESET}
+ --stale-days N Override stale threshold in days (default: 90)
+ --verbose Debug output
+ --no-color Disable colored output
+ --help Show this help message
+
+${BOLD}ENVIRONMENT VARIABLES${RESET}
+ STALE_DAYS Days before a user is considered stale (default: 90)
+ VERBOSE Enable verbose output (true/false)
+ COLOR Color mode: auto, always, never
+
+${BOLD}PREREQUISITES${RESET}
+ • Azure CLI authenticated: az login
+ • Microsoft Graph API permissions for sign-in activity and reports
+ • Reader role or higher for role assignment queries
+
+${BOLD}EXAMPLES${RESET}
+ # Full audit
+ ${SCRIPT_NAME} --full
+
+ # Check stale users only
+ ${SCRIPT_NAME} --stale-users
+
+ # MFA audit with custom stale threshold
+ ${SCRIPT_NAME} --mfa --stale-days 60
+
+ # Service principal credential check
+ ${SCRIPT_NAME} --service-principals
+
+ # Guest user audit
+ ${SCRIPT_NAME} --guests
+
+${BOLD}EXIT CODES${RESET}
+ 0 All checks passed
+ 1 Warnings found (review recommended)
+ 2 Critical findings (action required)
+EOF
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# PARSE ARGS
+# ══════════════════════════════════════════════════════════════════════
+parse_args() {
+ local modes=()
+
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --full)
+ modes=(stale-users mfa admins service-principals guests)
+ shift ;;
+ --stale-users)
+ modes+=(stale-users); shift ;;
+ --mfa)
+ modes+=(mfa); shift ;;
+ --admins)
+ modes+=(admins); shift ;;
+ --service-principals)
+ modes+=(service-principals); shift ;;
+ --guests)
+ modes+=(guests); shift ;;
+ --stale-days)
+ STALE_DAYS="${2:?--stale-days requires a value}"; shift 2 ;;
+ --verbose)
+ VERBOSE="true"; shift ;;
+ --no-color)
+ COLOR="never"; shift ;;
+ --help|-h)
+ setup_colors; show_help; exit 0 ;;
+ *)
+ die "Unknown option: $1 (see --help)" ;;
+ esac
+ done
+
+ if [[ ${#modes[@]} -eq 0 ]]; then
+ err "No audit mode specified"
+ echo "Run ${SCRIPT_NAME} --help for usage" >&2
+ exit 1
+ fi
+
+ RUN_MODE="${modes[*]}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+main() {
+ parse_args "$@"
+ setup_colors
+ check_deps
+ check_credentials
+
+ START_TIME=$(date +%s)
+
+ echo ""
+ echo -e "${BOLD}Azure Entra ID Auditor${RESET}"
+ echo -e "Mode: ${RUN_MODE}"
+ echo -e "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ echo ""
+
+ for mode in $RUN_MODE; do
+ case "$mode" in
+ stale-users) audit_stale_users ;;
+ mfa) audit_mfa ;;
+ admins) audit_admins ;;
+ service-principals) audit_service_principals ;;
+ guests) audit_guests ;;
+ esac
+ done
+
+ print_summary
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ exit 2
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ exit 1
+ fi
+ exit 0
+}
+
+main "$@"
diff --git a/azure-blob-manager.sh b/azure-blob-manager.sh
new file mode 100644
index 0000000..e5cd21d
--- /dev/null
+++ b/azure-blob-manager.sh
@@ -0,0 +1,597 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### azure-blob-manager.sh — Manage Azure Blob Storage containers, lifecycle, and ####
+#### access auditing via az CLI. Upload, sync, tier, and audit blob storage ####
+#### Requires: bash 4+, az CLI, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./azure-blob-manager.sh --list ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── Severity counters (for audit mode) ───────────────────────────────
+TOTAL_CRIT=0
+TOTAL_WARN=0
+TOTAL_INFO=0
+TOTAL_OK=0
+
+flag_crit() { ((TOTAL_CRIT++)) || true; }
+flag_warn() { ((TOTAL_WARN++)) || true; }
+flag_info() { ((TOTAL_INFO++)) || true; }
+flag_ok() { ((TOTAL_OK++)) || true; }
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+STORAGE_ACCOUNT=""
+CONTAINER_NAME=""
+RESOURCE_GROUP=""
+OUTPUT_FORMAT="${ABM_FORMAT:-text}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+MAX_AGE="${ABM_MAX_AGE:-90}"
+TIER_TARGET=""
+SOURCE_PATH=""
+SUBSCRIPTION=""
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+
+# ── Dependency and credential checks ────────────────────────────────
+check_deps() {
+ command -v az &>/dev/null || die "az CLI is required (install: https://aka.ms/InstallAzureCLIDeb)"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+check_credentials() {
+ local acct
+ acct=$(az account show --output json 2>&1) || die "Azure credentials not configured — run 'az login'"
+
+ local sub_name
+ sub_name=$(echo "$acct" | jq -r '.name')
+ log "Subscription: ${sub_name}"
+
+ if [[ -n "$SUBSCRIPTION" ]]; then
+ az account set --subscription "$SUBSCRIPTION" 2>/dev/null \
+ || die "Cannot switch to subscription: ${SUBSCRIPTION}"
+ fi
+}
+
+# ── Azure CLI wrapper ────────────────────────────────────────────────
+az_cmd() {
+ local args=("$@")
+ [[ -n "$SUBSCRIPTION" ]] && args+=(--subscription "$SUBSCRIPTION")
+ verbose "az ${args[*]}"
+ az "${args[@]}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIST
+# ══════════════════════════════════════════════════════════════════════
+do_list() {
+ if [[ -n "$STORAGE_ACCOUNT" && -n "$CONTAINER_NAME" ]]; then
+ list_blobs
+ elif [[ -n "$STORAGE_ACCOUNT" ]]; then
+ list_containers
+ else
+ list_accounts
+ fi
+}
+
+list_accounts() {
+ section_header "Storage Accounts"
+
+ local accounts
+ local args=(storage account list --output json)
+ [[ -n "$RESOURCE_GROUP" ]] && args+=(--resource-group "$RESOURCE_GROUP")
+ accounts=$(az_cmd "${args[@]}" 2>/dev/null)
+
+ local count
+ count=$(echo "$accounts" | jq 'length')
+
+ printf " %-28s %-16s %-12s %-12s %s\n" \
+ "ACCOUNT" "RESOURCE_GROUP" "KIND" "REPLICATION" "LOCATION"
+ printf " %s\n" "$(printf '%.0s─' {1..90})"
+
+ echo "$accounts" | jq -c '.[]' | while IFS= read -r acct; do
+ local name rg kind repl location
+ name=$(echo "$acct" | jq -r '.name')
+ rg=$(echo "$acct" | jq -r '.resourceGroup')
+ kind=$(echo "$acct" | jq -r '.kind')
+ repl=$(echo "$acct" | jq -r '.sku.name')
+ location=$(echo "$acct" | jq -r '.location')
+
+ printf " %-28s %-16s %-12s %-12s %s\n" \
+ "${name:0:27}" "${rg:0:15}" "${kind:0:11}" "${repl:0:11}" "$location"
+ done
+
+ echo ""
+ field "Total accounts:" "$count"
+}
+
+list_containers() {
+ section_header "Containers in ${STORAGE_ACCOUNT}"
+
+ local containers
+ containers=$(az_cmd storage container list \
+ --account-name "$STORAGE_ACCOUNT" --auth-mode login \
+ --output json 2>/dev/null) || die "Failed to list containers — check permissions"
+
+ local count
+ count=$(echo "$containers" | jq 'length')
+
+ printf " %-32s %-16s %-12s %s\n" \
+ "CONTAINER" "PUBLIC_ACCESS" "LEASE_STATE" "LAST_MODIFIED"
+ printf " %s\n" "$(printf '%.0s─' {1..80})"
+
+ echo "$containers" | jq -c '.[]' | while IFS= read -r ctr; do
+ local name public_access lease_state last_mod
+ name=$(echo "$ctr" | jq -r '.name')
+ public_access=$(echo "$ctr" | jq -r '.properties.publicAccess // "none"')
+ lease_state=$(echo "$ctr" | jq -r '.properties.leaseState // "available"')
+ last_mod=$(echo "$ctr" | jq -r '.properties.lastModified // ""' | cut -dT -f1)
+
+ printf " %-32s %-16s %-12s %s\n" \
+ "${name:0:31}" "$public_access" "$lease_state" "$last_mod"
+ done
+
+ echo ""
+ field "Total containers:" "$count"
+}
+
+list_blobs() {
+ section_header "Blobs in ${STORAGE_ACCOUNT}/${CONTAINER_NAME}"
+
+ local blobs
+ blobs=$(az_cmd storage blob list \
+ --account-name "$STORAGE_ACCOUNT" --container-name "$CONTAINER_NAME" \
+ --auth-mode login --output json 2>/dev/null) \
+ || die "Failed to list blobs — check permissions"
+
+ local count
+ count=$(echo "$blobs" | jq 'length')
+
+ printf " %-40s %-12s %-8s %s\n" \
+ "NAME" "SIZE" "TIER" "LAST_MODIFIED"
+ printf " %s\n" "$(printf '%.0s─' {1..80})"
+
+ echo "$blobs" | jq -c '.[]' | while IFS= read -r blob; do
+ local name size tier last_mod size_str
+ name=$(echo "$blob" | jq -r '.name')
+ size=$(echo "$blob" | jq -r '.properties.contentLength // 0')
+ tier=$(echo "$blob" | jq -r '.properties.blobTier // "N/A"')
+ last_mod=$(echo "$blob" | jq -r '.properties.lastModified // ""' | cut -dT -f1)
+
+ if (( size > 1073741824 )); then
+ size_str="$(( size / 1073741824 )) GB"
+ elif (( size > 1048576 )); then
+ size_str="$(( size / 1048576 )) MB"
+ elif (( size > 1024 )); then
+ size_str="$(( size / 1024 )) KB"
+ else
+ size_str="${size} B"
+ fi
+
+ printf " %-40s %-12s %-8s %s\n" \
+ "${name:0:39}" "$size_str" "$tier" "$last_mod"
+ done
+
+ echo ""
+ field "Total blobs:" "$count"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# AUDIT
+# ══════════════════════════════════════════════════════════════════════
+do_audit() {
+ section_header "Storage Security Audit"
+
+ local accounts
+ local args=(storage account list --output json)
+ [[ -n "$RESOURCE_GROUP" ]] && args+=(--resource-group "$RESOURCE_GROUP")
+ accounts=$(az_cmd "${args[@]}" 2>/dev/null)
+
+ printf " %-28s %-16s %-14s %-14s %s\n" \
+ "ACCOUNT" "HTTPS_ONLY" "PUBLIC_BLOB" "NETWORK_RULES" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..95})"
+
+ echo "$accounts" | jq -c '.[]' | while IFS= read -r acct; do
+ local name https_only public_access net_default
+ name=$(echo "$acct" | jq -r '.name')
+ https_only=$(echo "$acct" | jq -r '.enableHttpsTrafficOnly // true')
+ public_access=$(echo "$acct" | jq -r '.allowBlobPublicAccess // false')
+ net_default=$(echo "$acct" | jq -r '.networkRuleSet.defaultAction // "Allow"')
+
+ local severity="OK" color="$GREEN"
+
+ if [[ "$public_access" == "true" ]]; then
+ severity="CRITICAL"; color="$RED"; flag_crit
+ elif [[ "$https_only" != "true" ]]; then
+ severity="WARN"; color="$YELLOW"; flag_warn
+ elif [[ "$net_default" == "Allow" ]]; then
+ severity="WARN"; color="$YELLOW"; flag_warn
+ else
+ flag_ok
+ fi
+
+ printf " %-28s %-16s %-14s %-14s %b%s%b\n" \
+ "${name:0:27}" "$https_only" "$public_access" "$net_default" \
+ "$color" "$severity" "$RESET"
+ done
+
+ echo ""
+
+ # Check individual containers for public access
+ log "Checking container-level public access..."
+ echo ""
+
+ echo "$accounts" | jq -r '.[].name' | while IFS= read -r acct_name; do
+ local containers
+ containers=$(az_cmd storage container list \
+ --account-name "$acct_name" --auth-mode login \
+ --output json 2>/dev/null) || continue
+
+ echo "$containers" | jq -c '.[]' | while IFS= read -r ctr; do
+ local ctr_name public_access
+ ctr_name=$(echo "$ctr" | jq -r '.name')
+ public_access=$(echo "$ctr" | jq -r '.properties.publicAccess // "none"')
+
+ if [[ "$public_access" != "none" && "$public_access" != "null" ]]; then
+ printf " %-28s %-28s %-14s %b%s%b\n" \
+ "${acct_name:0:27}" "${ctr_name:0:27}" "$public_access" \
+ "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ fi
+ done
+ done
+
+ print_summary
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SYNC
+# ══════════════════════════════════════════════════════════════════════
+do_sync() {
+ [[ -z "$STORAGE_ACCOUNT" ]] && die "--sync requires --account"
+ [[ -z "$CONTAINER_NAME" ]] && die "--sync requires --container"
+ [[ -z "$SOURCE_PATH" ]] && die "--sync requires --source PATH"
+ [[ -d "$SOURCE_PATH" ]] || die "Source path does not exist: ${SOURCE_PATH}"
+
+ section_header "Syncing to ${STORAGE_ACCOUNT}/${CONTAINER_NAME}"
+ field "Source:" "$SOURCE_PATH"
+ echo ""
+
+ if az_cmd storage blob upload-batch \
+ --account-name "$STORAGE_ACCOUNT" \
+ --destination "$CONTAINER_NAME" \
+ --source "$SOURCE_PATH" \
+ --auth-mode login \
+ --overwrite 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} Sync complete"
+ else
+ die "Sync failed"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# TIER
+# ══════════════════════════════════════════════════════════════════════
+do_tier() {
+ [[ -z "$STORAGE_ACCOUNT" ]] && die "--tier requires --account"
+ [[ -z "$CONTAINER_NAME" ]] && die "--tier requires --container"
+ [[ -z "$TIER_TARGET" ]] && die "--tier requires --set-tier TIER"
+
+ section_header "Changing Blob Tier"
+ field "Account:" "$STORAGE_ACCOUNT"
+ field "Container:" "$CONTAINER_NAME"
+ field "Target tier:" "$TIER_TARGET"
+ echo ""
+
+ local blobs
+ blobs=$(az_cmd storage blob list \
+ --account-name "$STORAGE_ACCOUNT" --container-name "$CONTAINER_NAME" \
+ --auth-mode login --output json 2>/dev/null) \
+ || die "Failed to list blobs"
+
+ local changed=0 errors=0
+
+ echo "$blobs" | jq -r '.[].name' | while IFS= read -r blob_name; do
+ if az_cmd storage blob set-tier \
+ --account-name "$STORAGE_ACCOUNT" --container-name "$CONTAINER_NAME" \
+ --name "$blob_name" --tier "$TIER_TARGET" \
+ --auth-mode login 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} ${blob_name} → ${TIER_TARGET}"
+ ((changed++)) || true
+ else
+ echo -e " ${RED}✗${RESET} ${blob_name} — failed"
+ ((errors++)) || true
+ fi
+ done
+
+ echo ""
+ field "Changed:" "$changed"
+ [[ "$errors" -gt 0 ]] && field_color "Errors:" "${RED}${errors}${RESET}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIFECYCLE
+# ══════════════════════════════════════════════════════════════════════
+do_lifecycle() {
+ [[ -z "$STORAGE_ACCOUNT" ]] && die "--lifecycle requires --account"
+ [[ -z "$RESOURCE_GROUP" ]] && die "--lifecycle requires --resource-group"
+
+ section_header "Lifecycle Management Policy"
+ field "Account:" "$STORAGE_ACCOUNT"
+ echo ""
+
+ local policy
+ policy=$(az_cmd storage account management-policy show \
+ --account-name "$STORAGE_ACCOUNT" --resource-group "$RESOURCE_GROUP" \
+ --output json 2>/dev/null)
+
+ if [[ -z "$policy" || "$policy" == "null" ]]; then
+ log "No lifecycle policy configured"
+ else
+ echo "$policy" | jq '.policy.rules[] | {name: .name, type: .type, definition: .definition}'
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# STATS
+# ══════════════════════════════════════════════════════════════════════
+do_stats() {
+ section_header "Storage Statistics"
+
+ local accounts
+ local args=(storage account list --output json)
+ [[ -n "$RESOURCE_GROUP" ]] && args+=(--resource-group "$RESOURCE_GROUP")
+ accounts=$(az_cmd "${args[@]}" 2>/dev/null)
+
+ local total_accounts
+ total_accounts=$(echo "$accounts" | jq 'length')
+
+ printf " %-28s %-16s %-12s %s\n" \
+ "ACCOUNT" "LOCATION" "KIND" "REPLICATION"
+ printf " %s\n" "$(printf '%.0s─' {1..75})"
+
+ echo "$accounts" | jq -c '.[]' | while IFS= read -r acct; do
+ local name location kind repl
+ name=$(echo "$acct" | jq -r '.name')
+ location=$(echo "$acct" | jq -r '.location')
+ kind=$(echo "$acct" | jq -r '.kind')
+ repl=$(echo "$acct" | jq -r '.sku.name')
+
+ printf " %-28s %-16s %-12s %s\n" \
+ "${name:0:27}" "$location" "${kind:0:11}" "$repl"
+ done
+
+ echo ""
+ field "Total accounts:" "$total_accounts"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+print_summary() {
+ echo ""
+ echo " ══════════════════════════════════════════"
+ echo " Storage Audit Summary"
+ echo " ══════════════════════════════════════════"
+ printf " %-20s %b%d%b\n" "CRITICAL:" "$RED" "$TOTAL_CRIT" "$RESET"
+ printf " %-20s %b%d%b\n" "WARN:" "$YELLOW" "$TOTAL_WARN" "$RESET"
+ printf " %-20s %b%d%b\n" "INFO:" "$CYAN" "$TOTAL_INFO" "$RESET"
+ printf " %-20s %b%d%b\n" "OK:" "$GREEN" "$TOTAL_OK" "$RESET"
+ echo " ──────────────────────────────────────────"
+ echo ""
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ echo -e " ${RED}${BOLD}Action required:${RESET} ${TOTAL_CRIT} critical finding(s)"
+ echo ""
+ echo " Top recommendations:"
+ echo " • Disable public blob access on all storage accounts"
+ echo " • Set container access level to private"
+ echo " • Enable HTTPS-only traffic"
+ echo " • Configure network rules to restrict access"
+ echo ""
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ echo -e " ${YELLOW}Review recommended:${RESET} ${TOTAL_WARN} warning(s)"
+ echo ""
+ else
+ echo -e " ${GREEN}All checks passed${RESET}"
+ echo ""
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HELP
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat <&2; }
+log_error() { printf "${RED}[ERROR]${RESET} %s\n" "$*" >&2; }
+log_debug() { [[ "$VERBOSE" == "true" ]] && printf "${DIM}[DEBUG] %s${RESET}\n" "$*"; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+die() { log_error "$@"; exit 1; }
+
+check_deps() {
+ local missing=()
+ command -v az >/dev/null 2>&1 || missing+=("az-cli")
+ command -v jq >/dev/null 2>&1 || missing+=("jq")
+ command -v curl >/dev/null 2>&1 || missing+=("curl")
+ if (( ${#missing[@]} > 0 )); then
+ die "Missing required tools: ${missing[*]}"
+ fi
+
+ local bash_major="${BASH_VERSINFO[0]}"
+ if (( bash_major < 4 )); then
+ die "Requires bash 4+, found ${BASH_VERSION}"
+ fi
+}
+
+validate_date() {
+ local d="$1"
+ if [[ ! "$d" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
+ die "Invalid date format: $d (expected YYYY-MM-DD)"
+ fi
+}
+
+# ── Date math (portable) ─────────────────────────────────────────────
+date_offset() {
+ # Usage: date_offset YYYY-MM-DD -N → date N days before
+ local base="$1" offset="$2"
+ if date --version >/dev/null 2>&1; then
+ # GNU date
+ date -d "${base} ${offset} days" +%Y-%m-%d
+ else
+ # macOS date
+ date -j -v"${offset}d" -f "%Y-%m-%d" "$base" +%Y-%m-%d
+ fi
+}
+
+today_utc() { date -u +%Y-%m-%d; }
+
+first_of_month() {
+ local d="$1"
+ echo "${d:0:8}01"
+}
+
+first_of_prev_month() {
+ local d="$1"
+ local year="${d:0:4}"
+ local month="${d:5:2}"
+ month=$((10#$month - 1))
+ if (( month == 0 )); then
+ month=12
+ year=$((year - 1))
+ fi
+ printf "%04d-%02d-01" "$year" "$month"
+}
+
+days_between() {
+ local s="$1" e="$2"
+ local ss se
+ if date --version >/dev/null 2>&1; then
+ ss=$(date -d "$s" +%s)
+ se=$(date -d "$e" +%s)
+ else
+ ss=$(date -j -f "%Y-%m-%d" "$s" +%s)
+ se=$(date -j -f "%Y-%m-%d" "$e" +%s)
+ fi
+ echo $(( (se - ss) / 86400 ))
+}
+
+# ── Compute date ranges ──────────────────────────────────────────────
+compute_ranges() {
+ local today
+ today="$(today_utc)"
+
+ case "$RUN_MODE" in
+ daily)
+ PERIOD_START="$(date_offset "$today" -1)"
+ PERIOD_END="$today"
+ PREV_START="$(date_offset "$today" -2)"
+ PREV_END="$(date_offset "$today" -1)"
+ ;;
+ weekly)
+ PERIOD_START="$(date_offset "$today" -7)"
+ PERIOD_END="$today"
+ PREV_START="$(date_offset "$today" -14)"
+ PREV_END="$(date_offset "$today" -7)"
+ ;;
+ monthly)
+ PERIOD_START="$(first_of_month "$today")"
+ PERIOD_END="$today"
+ local prev_first
+ prev_first="$(first_of_prev_month "$today")"
+ PREV_START="$prev_first"
+ PREV_END="$PERIOD_START"
+ ;;
+ custom)
+ PERIOD_START="$CUSTOM_START"
+ PERIOD_END="$CUSTOM_END"
+ local span
+ span="$(days_between "$CUSTOM_START" "$CUSTOM_END")"
+ PREV_START="$(date_offset "$CUSTOM_START" "-$span")"
+ PREV_END="$CUSTOM_START"
+ ;;
+ *)
+ die "Unknown mode: $RUN_MODE"
+ ;;
+ esac
+
+ log_debug "Current period: $PERIOD_START → $PERIOD_END"
+ log_debug "Previous period: $PREV_START → $PREV_END"
+}
+
+# ── Resolve subscription ─────────────────────────────────────────────
+resolve_subscription() {
+ if [[ -n "$SUBSCRIPTION" ]]; then
+ log_debug "Using subscription: $SUBSCRIPTION"
+ return
+ fi
+ SUBSCRIPTION=$(az account show --query 'id' -o tsv 2>/dev/null) \
+ || die "Cannot determine subscription. Use --subscription or az account set."
+ log_debug "Resolved subscription: $SUBSCRIPTION"
+}
+
+# ── Build Cost Management query payload ──────────────────────────────
+build_query_payload() {
+ local start="$1" end="$2"
+ local grouping_name grouping_type
+
+ case "$GROUP_BY" in
+ SERVICE)
+ grouping_name="ServiceName"
+ grouping_type="Dimension"
+ ;;
+ RESOURCE_GROUP)
+ grouping_name="ResourceGroupName"
+ grouping_type="Dimension"
+ ;;
+ TAG)
+ if [[ -z "$COST_TAG_KEY" ]]; then
+ die "--group-by TAG requires --tag KEY=VALUE"
+ fi
+ grouping_name="$COST_TAG_KEY"
+ grouping_type="TagKey"
+ ;;
+ *)
+ die "Invalid --group-by value: $GROUP_BY (expected SERVICE, RESOURCE_GROUP, or TAG)"
+ ;;
+ esac
+
+ local filter_block="{}"
+ if [[ -n "$COST_TAG_KEY" && -n "$COST_TAG_VALUE" && "$GROUP_BY" != "TAG" ]]; then
+ filter_block=$(jq -n \
+ --arg key "$COST_TAG_KEY" \
+ --arg val "$COST_TAG_VALUE" \
+ '{
+ "Tags": {
+ "Name": $key,
+ "Operator": "In",
+ "Values": [$val]
+ }
+ }')
+ fi
+
+ local payload
+ if [[ "$filter_block" == "{}" ]]; then
+ payload=$(jq -n \
+ --arg start "$start" \
+ --arg end "$end" \
+ --arg gname "$grouping_name" \
+ --arg gtype "$grouping_type" \
+ '{
+ "type": "ActualCost",
+ "dataSet": {
+ "granularity": "None",
+ "aggregation": {
+ "totalCost": {
+ "name": "Cost",
+ "function": "Sum"
+ }
+ },
+ "grouping": [
+ {
+ "type": $gtype,
+ "name": $gname
+ }
+ ]
+ },
+ "timeframe": "Custom",
+ "timePeriod": {
+ "from": $start,
+ "to": $end
+ }
+ }')
+ else
+ payload=$(jq -n \
+ --arg start "$start" \
+ --arg end "$end" \
+ --arg gname "$grouping_name" \
+ --arg gtype "$grouping_type" \
+ --argjson filter "$filter_block" \
+ '{
+ "type": "ActualCost",
+ "dataSet": {
+ "granularity": "None",
+ "aggregation": {
+ "totalCost": {
+ "name": "Cost",
+ "function": "Sum"
+ }
+ },
+ "grouping": [
+ {
+ "type": $gtype,
+ "name": $gname
+ }
+ ],
+ "filter": $filter
+ },
+ "timeframe": "Custom",
+ "timePeriod": {
+ "from": $start,
+ "to": $end
+ }
+ }')
+ fi
+
+ echo "$payload"
+}
+
+# ── Query Cost Management API ────────────────────────────────────────
+query_costs() {
+ local start="$1" end="$2"
+ local payload
+ payload="$(build_query_payload "$start" "$end")"
+
+ local scope="/subscriptions/${SUBSCRIPTION}"
+ local api_url="https://management.azure.com${scope}/providers/Microsoft.CostManagement/query?api-version=2023-11-01"
+
+ log_debug "Querying: $api_url"
+ log_debug "Payload: $payload"
+
+ az rest \
+ --method post \
+ --url "$api_url" \
+ --body "$payload" \
+ --output json 2>/dev/null
+}
+
+# ── Parse cost data ──────────────────────────────────────────────────
+parse_costs() {
+ local raw="$1"
+ echo "$raw" | jq -r '
+ .properties.rows // [] |
+ map({
+ key: .[1],
+ amount: (.[0] | tonumber)
+ }) |
+ group_by(.key) |
+ map({
+ key: .[0].key,
+ total: (map(.amount) | add)
+ }) |
+ sort_by(-.total) |
+ .[] |
+ "\(.key)\t\(.total)"
+ ' 2>/dev/null || echo ""
+}
+
+# ── Format helpers ────────────────────────────────────────────────────
+fmt_currency() {
+ printf "$%.2f" "$1"
+}
+
+fmt_delta() {
+ local curr="$1" prev="$2"
+ if (( $(echo "$prev == 0" | bc -l) )); then
+ echo "N/A"
+ return
+ fi
+ local pct
+ pct=$(echo "scale=1; (($curr - $prev) / $prev) * 100" | bc -l)
+ local sign=""
+ if (( $(echo "$pct > 0" | bc -l) )); then
+ sign="+"
+ fi
+ echo "${sign}${pct}%"
+}
+
+print_header() {
+ local sub_name
+ sub_name=$(az account show --query 'name' -o tsv 2>/dev/null || echo "unknown")
+
+ echo "Azure Cost Reporter"
+ echo "Subscription: $sub_name ($SUBSCRIPTION)"
+ echo "Mode: $RUN_MODE"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+
+ if [[ "$RUN_MODE" == "custom" ]]; then
+ echo "Period: $PERIOD_START → $PERIOD_END"
+ fi
+ echo ""
+}
+
+# ── Text table output ────────────────────────────────────────────────
+output_text_table() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="SERVICE"
+ case "$GROUP_BY" in
+ RESOURCE_GROUP) label="RESOURCE_GROUP" ;;
+ TAG) label="TAG" ;;
+ esac
+ local divider="──────────────────────────────────────────────────────────────────────"
+ printf " %-38s %-12s %-12s %s\n" "$label" "COST" "PREV" "DELTA"
+ printf " %s\n" "$divider"
+ local total_curr=0 total_prev=0
+ for key in "${!curr_data[@]}"; do
+ local cost="${curr_data[$key]}" prev_cost="${prev_data[$key]:-0}"
+ printf " %-38s %-12s %-12s %s\n" \
+ "$key" "$(fmt_currency "$cost")" "$(fmt_currency "$prev_cost")" "$(fmt_delta "$cost" "$prev_cost")"
+ total_curr=$(echo "$total_curr + $cost" | bc -l)
+ total_prev=$(echo "$total_prev + $prev_cost" | bc -l)
+ done
+ printf " %s\n" "$divider"
+ printf " %-38s %-12s %-12s %s\n" \
+ "TOTAL" "$(fmt_currency "$total_curr")" "$(fmt_currency "$total_prev")" "$(fmt_delta "$total_curr" "$total_prev")"
+}
+
+# ── CSV output ────────────────────────────────────────────────────────
+output_csv() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="service"
+ case "$GROUP_BY" in
+ RESOURCE_GROUP) label="resource_group" ;;
+ TAG) label="tag" ;;
+ esac
+ echo "${label},cost,previous_cost,delta_pct"
+ for key in "${!curr_data[@]}"; do
+ local cost="${curr_data[$key]}" prev_cost="${prev_data[$key]:-0}" pct="0"
+ if (( $(echo "$prev_cost != 0" | bc -l) )); then
+ pct=$(echo "scale=2; (($cost - $prev_cost) / $prev_cost) * 100" | bc -l)
+ fi
+ echo "\"$key\",$cost,$prev_cost,$pct"
+ done
+}
+
+# ── JSON output ───────────────────────────────────────────────────────
+output_json() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="service"
+ case "$GROUP_BY" in
+ RESOURCE_GROUP) label="resource_group" ;;
+ TAG) label="tag" ;;
+ esac
+ local items=()
+ for key in "${!curr_data[@]}"; do
+ items+=("{\"${label}\": \"${key}\", \"cost\": ${curr_data[$key]}, \"previous_cost\": ${prev_data[$key]:-0}}")
+ done
+ local joined
+ joined=$(printf ",%s" "${items[@]}")
+ joined="${joined:1}"
+ printf '{"mode":"%s","period_start":"%s","period_end":"%s","previous_start":"%s","previous_end":"%s","group_by":"%s","items":[%s]}\n' \
+ "$RUN_MODE" "$PERIOD_START" "$PERIOD_END" "$PREV_START" "$PREV_END" "$GROUP_BY" "$joined"
+}
+
+# ── Render report ─────────────────────────────────────────────────────
+render_report() {
+ local curr_raw="$1" prev_raw="$2"
+
+ # Parse into associative arrays
+ declare -A curr_costs
+ declare -A prev_costs
+
+ while IFS=$'\t' read -r key amount; do
+ [[ -z "$key" ]] && continue
+ curr_costs["$key"]="$amount"
+ done <<< "$(parse_costs "$curr_raw")"
+
+ while IFS=$'\t' read -r key amount; do
+ [[ -z "$key" ]] && continue
+ prev_costs["$key"]="$amount"
+ done <<< "$(parse_costs "$prev_raw")"
+
+ # Ensure previous-only keys appear in current with 0
+ for key in "${!prev_costs[@]}"; do
+ if [[ -z "${curr_costs[$key]+x}" ]]; then
+ curr_costs["$key"]="0"
+ fi
+ done
+
+ case "$OUTPUT_FORMAT" in
+ text)
+ print_header
+ local title="Cost Breakdown — ${PERIOD_START} → ${PERIOD_END}"
+ echo "$title"
+ output_text_table curr_costs prev_costs
+ echo ""
+ ;;
+ csv)
+ output_csv curr_costs prev_costs
+ ;;
+ json)
+ output_json curr_costs prev_costs
+ ;;
+ *)
+ die "Unknown format: $OUTPUT_FORMAT"
+ ;;
+ esac
+}
+
+# ── Slack webhook ─────────────────────────────────────────────────────
+send_slack() {
+ local report="$1" webhook="$2"
+
+ log_info "Posting report to Slack..."
+
+ # Truncate for Slack message limits
+ local max_len=3000
+ local body="$report"
+ if (( ${#body} > max_len )); then
+ body="${body:0:$max_len}
+
+... (truncated — full report exceeds Slack message limit)"
+ fi
+
+ local payload
+ payload=$(jq -n --arg text "\`\`\`${body}\`\`\`" '{ text: $text }')
+
+ local http_code
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" \
+ -X POST \
+ -H "Content-Type: application/json" \
+ -d "$payload" \
+ "$webhook")
+
+ if [[ "$http_code" != "200" ]]; then
+ log_error "Slack webhook returned HTTP $http_code"
+ return 1
+ fi
+
+ log_info "Slack message posted"
+}
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat < 0 )); do
+ case "$1" in
+ --daily|--weekly|--monthly)
+ RUN_MODE="${1#--}"; shift ;;
+ --custom)
+ RUN_MODE="custom"
+ [[ $# -lt 3 ]] && die "--custom requires START and END dates"
+ CUSTOM_START="$2"; CUSTOM_END="$3"
+ validate_date "$CUSTOM_START"; validate_date "$CUSTOM_END"
+ shift 3 ;;
+ --group-by)
+ [[ $# -lt 2 ]] && die "--group-by requires a value"
+ GROUP_BY="$2"; shift 2 ;;
+ --tag)
+ [[ $# -lt 2 ]] && die "--tag requires KEY=VALUE"
+ [[ "$2" != *"="* ]] && die "--tag value must be KEY=VALUE"
+ COST_TAG_KEY="${2%%=*}"; COST_TAG_VALUE="${2#*=}"; shift 2 ;;
+ --subscription)
+ [[ $# -lt 2 ]] && die "--subscription requires a value"
+ SUBSCRIPTION="$2"; shift 2 ;;
+ --format)
+ [[ $# -lt 2 ]] && die "--format requires a value"
+ OUTPUT_FORMAT="$2"; shift 2 ;;
+ --slack)
+ [[ $# -lt 2 ]] && die "--slack requires a webhook URL"
+ SLACK_URL="$2"; shift 2 ;;
+ --verbose) VERBOSE="true"; shift ;;
+ --no-color) COLOR="never"; shift ;;
+ --help|-h) usage ;;
+ *) die "Unknown option: $1 (see --help)" ;;
+ esac
+ done
+
+ if [[ -z "$RUN_MODE" ]]; then log_error "No mode specified"; echo ""; usage; exit 1; fi
+ [[ -z "$SLACK_URL" && -n "$SLACK_WEBHOOK_URL" ]] && SLACK_URL="$SLACK_WEBHOOK_URL"
+
+ case "$GROUP_BY" in
+ SERVICE|RESOURCE_GROUP|TAG) ;;
+ *) die "Invalid --group-by: $GROUP_BY" ;;
+ esac
+ case "$OUTPUT_FORMAT" in
+ text|csv|json) ;;
+ *) die "Invalid --format: $OUTPUT_FORMAT" ;;
+ esac
+}
+
+# ── Main ──────────────────────────────────────────────────────────────
+main() {
+ parse_args "$@"
+ setup_colors
+ check_deps
+
+ START_TIME=$(date +%s)
+
+ # Validate Azure credentials
+ log_debug "Validating Azure credentials..."
+ az account show >/dev/null 2>&1 \
+ || die "Azure credentials not configured — run 'az login' first"
+
+ resolve_subscription
+ compute_ranges
+
+ log_info "Querying Cost Management ($RUN_MODE, group by $GROUP_BY)..."
+
+ local curr_raw prev_raw
+ curr_raw="$(query_costs "$PERIOD_START" "$PERIOD_END")"
+ prev_raw="$(query_costs "$PREV_START" "$PREV_END")"
+
+ if [[ -z "$curr_raw" ]]; then
+ die "No cost data returned for $PERIOD_START → $PERIOD_END"
+ fi
+
+ local report
+ report="$(render_report "$curr_raw" "$prev_raw")"
+
+ # Output to stdout
+ echo "$report"
+
+ # Slack delivery
+ if [[ -n "$SLACK_URL" ]]; then
+ send_slack "$report" "$SLACK_URL"
+ fi
+
+ local elapsed=$(( $(date +%s) - START_TIME ))
+ log_info "Completed in ${elapsed}s"
+}
+
+main "$@"
diff --git a/azure-snapshot-manager.sh b/azure-snapshot-manager.sh
new file mode 100644
index 0000000..c836fd3
--- /dev/null
+++ b/azure-snapshot-manager.sh
@@ -0,0 +1,726 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### azure-snapshot-manager.sh — Create, rotate, list, audit, and restore Azure ####
+#### managed disk snapshots via az CLI. Automated retention and fleet-wide ops ####
+#### Requires: bash 4+, az CLI, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./azure-snapshot-manager.sh --snapshot --all ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+ALSO_ROTATE="false"
+VM_NAME=""
+RESOURCE_GROUP=""
+TARGET_ALL="false"
+SNAPSHOT_ID=""
+KEEP="${ASM_KEEP:-3}"
+PREFIX="${ASM_PREFIX:-auto}"
+MAX_AGE="${ASM_MAX_AGE:-7}"
+OUTPUT_FORMAT="${ASM_FORMAT:-text}"
+DRY_RUN="true"
+FORCE="false"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+SUBSCRIPTION=""
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+SNAP_CREATED=0
+SNAP_DELETED=0
+SNAP_ERRORS=0
+
+# ── Dependency and credential checks ────────────────────────────────
+check_deps() {
+ command -v az &>/dev/null || die "az CLI is required (install: https://aka.ms/InstallAzureCLIDeb)"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+check_credentials() {
+ local acct
+ acct=$(az account show --output json 2>&1) || die "Azure credentials not configured — run 'az login'"
+
+ local sub_name sub_id
+ sub_name=$(echo "$acct" | jq -r '.name')
+ sub_id=$(echo "$acct" | jq -r '.id')
+ verbose "Subscription: ${sub_name} (${sub_id})"
+ log "Subscription: ${sub_name}"
+
+ if [[ -n "$SUBSCRIPTION" ]]; then
+ az account set --subscription "$SUBSCRIPTION" 2>/dev/null \
+ || die "Cannot switch to subscription: ${SUBSCRIPTION}"
+ log "Switched to subscription: ${SUBSCRIPTION}"
+ fi
+}
+
+# ── Azure CLI wrapper ────────────────────────────────────────────────
+az_cmd() {
+ local args=("$@")
+ [[ -n "$SUBSCRIPTION" ]] && args+=(--subscription "$SUBSCRIPTION")
+ verbose "az ${args[*]}"
+ az "${args[@]}"
+}
+
+# ── VM helpers ───────────────────────────────────────────────────────
+get_all_vms() {
+ local args=(vm list --output json)
+ [[ -n "$RESOURCE_GROUP" ]] && args+=(--resource-group "$RESOURCE_GROUP")
+ az_cmd "${args[@]}" 2>/dev/null
+}
+
+get_vm_os_disk_id() {
+ local vm_name="$1" rg="$2"
+ az_cmd vm show --name "$vm_name" --resource-group "$rg" \
+ --query 'storageProfile.osDisk.managedDisk.id' --output tsv 2>/dev/null
+}
+
+get_vm_rg() {
+ local vm_json="$1"
+ echo "$vm_json" | jq -r '.resourceGroup'
+}
+
+# ── Snapshot helpers ─────────────────────────────────────────────────
+list_snapshots() {
+ local args=(snapshot list --output json)
+ [[ -n "$RESOURCE_GROUP" ]] && args+=(--resource-group "$RESOURCE_GROUP")
+ az_cmd "${args[@]}" 2>/dev/null
+}
+
+managed_snapshots() {
+ list_snapshots | jq --arg pfx "$PREFIX" \
+ '[.[] | select(.name | startswith($pfx))]'
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SNAPSHOT
+# ══════════════════════════════════════════════════════════════════════
+do_snapshot() {
+ local vm_json
+ vm_json=$(get_all_vms)
+
+ local vms
+ if [[ "$TARGET_ALL" == "true" ]]; then
+ vms="$vm_json"
+ elif [[ -n "$VM_NAME" ]]; then
+ vms=$(echo "$vm_json" | jq --arg n "$VM_NAME" '[.[] | select(.name == $n)]')
+ else
+ die "Specify --vm NAME or --all"
+ fi
+
+ local count
+ count=$(echo "$vms" | jq 'length')
+ [[ "$count" -eq 0 ]] && die "No VMs found"
+
+ local target_label="$VM_NAME"
+ [[ "$TARGET_ALL" == "true" ]] && target_label="all (${count} VMs)"
+
+ section_header "Creating Snapshots"
+ field "Target:" "$target_label"
+ field "Prefix:" "$PREFIX"
+ echo ""
+
+ echo "$vms" | jq -c '.[]' | while IFS= read -r vm; do
+ local name rg disk_id snap_name
+ name=$(echo "$vm" | jq -r '.name')
+ rg=$(echo "$vm" | jq -r '.resourceGroup')
+ disk_id=$(get_vm_os_disk_id "$name" "$rg")
+ snap_name="${PREFIX}-${name}-$(date +%Y%m%d-%H%M%S)"
+
+ if [[ -z "$disk_id" ]]; then
+ echo -e " ${RED}✗${RESET} ${name} (${rg}) no OS disk found"
+ ((SNAP_ERRORS++)) || true
+ continue
+ fi
+
+ verbose "Snapshotting ${name} disk ${disk_id}"
+
+ if az_cmd snapshot create \
+ --resource-group "$rg" \
+ --name "$snap_name" \
+ --source "$disk_id" \
+ --tags "managed-by=${SCRIPT_NAME}" "source-vm=${name}" \
+ --output none 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} ${name} (${rg}) ${snap_name}"
+ ((SNAP_CREATED++)) || true
+ else
+ echo -e " ${RED}✗${RESET} ${name} (${rg}) failed"
+ ((SNAP_ERRORS++)) || true
+ fi
+
+ sleep 1
+ done
+
+ echo ""
+ field_color "Created:" "${GREEN}${SNAP_CREATED}${RESET}"
+ if [[ "$SNAP_ERRORS" -gt 0 ]]; then
+ field_color "Errors:" "${RED}${SNAP_ERRORS}${RESET}"
+ fi
+
+ if [[ "$ALSO_ROTATE" == "true" ]]; then
+ do_rotate
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ROTATE
+# ══════════════════════════════════════════════════════════════════════
+do_rotate() {
+ section_header "Rotating Snapshots"
+ field "Keep:" "$KEEP per VM"
+ field "Prefix:" "$PREFIX"
+ if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then
+ field "Mode:" "DRY RUN (use --force to delete)"
+ else
+ field "Mode:" "LIVE — deletions are permanent"
+ fi
+ echo ""
+
+ local snaps
+ snaps=$(managed_snapshots)
+
+ local vm_names
+ vm_names=$(echo "$snaps" | jq -r '.[].tags["source-vm"] // empty' | sort -u)
+
+ if [[ -z "$vm_names" ]]; then
+ log "No managed snapshots found matching prefix '${PREFIX}'"
+ return
+ fi
+
+ while IFS= read -r vm; do
+ [[ -z "$vm" ]] && continue
+ local vm_snaps
+ vm_snaps=$(echo "$snaps" | jq --arg vm "$vm" \
+ '[.[] | select(.tags["source-vm"] == $vm)] | sort_by(.timeCreated) | reverse')
+ local total
+ total=$(echo "$vm_snaps" | jq 'length')
+
+ if (( total <= KEEP )); then
+ verbose "${vm}: ${total} snapshots, keeping all"
+ continue
+ fi
+
+ local to_delete
+ to_delete=$(echo "$vm_snaps" | jq --argjson k "$KEEP" '.[$k:]')
+ local del_count
+ del_count=$(echo "$to_delete" | jq 'length')
+
+ echo "$to_delete" | jq -c '.[]' | while IFS= read -r snap; do
+ local sname srg
+ sname=$(echo "$snap" | jq -r '.name')
+ srg=$(echo "$snap" | jq -r '.resourceGroup')
+
+ if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then
+ echo -e " ${DIM}[DRY RUN]${RESET} would delete ${sname} (${srg})"
+ else
+ if az_cmd snapshot delete --name "$sname" --resource-group "$srg" \
+ --output none 2>/dev/null; then
+ echo -e " ${YELLOW}✓${RESET} deleted ${sname}"
+ ((SNAP_DELETED++)) || true
+ else
+ echo -e " ${RED}✗${RESET} failed to delete ${sname}"
+ ((SNAP_ERRORS++)) || true
+ fi
+ fi
+ done
+
+ log "${vm}: ${total} total, keeping ${KEEP}, removing ${del_count}"
+ done <<< "$vm_names"
+
+ echo ""
+ field_color "Deleted:" "${YELLOW}${SNAP_DELETED}${RESET}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIST
+# ══════════════════════════════════════════════════════════════════════
+do_list() {
+ section_header "All Snapshots"
+
+ local snaps
+ snaps=$(list_snapshots)
+ local count
+ count=$(echo "$snaps" | jq 'length')
+
+ if [[ "$count" -eq 0 ]]; then
+ log "No snapshots found"
+ return
+ fi
+
+ printf " %-36s %-16s %-8s %-12s %s\n" \
+ "NAME" "RESOURCE_GROUP" "SIZE_GB" "AGE" "SOURCE_VM"
+ printf " %s\n" "$(printf '%.0s─' {1..90})"
+
+ local now
+ now=$(date +%s)
+
+ echo "$snaps" | jq -c '.[]' | while IFS= read -r snap; do
+ local name rg size_gb created source_vm age_str
+ name=$(echo "$snap" | jq -r '.name')
+ rg=$(echo "$snap" | jq -r '.resourceGroup')
+ size_gb=$(echo "$snap" | jq -r '.diskSizeGb // 0')
+ created=$(echo "$snap" | jq -r '.timeCreated // ""')
+ source_vm=$(echo "$snap" | jq -r '.tags["source-vm"] // "manual"')
+
+ if [[ -n "$created" ]]; then
+ local snap_epoch
+ snap_epoch=$(date -d "$created" +%s 2>/dev/null || echo 0)
+ if [[ "$snap_epoch" -gt 0 ]]; then
+ local age_days=$(( (now - snap_epoch) / 86400 ))
+ age_str="${age_days}d"
+ else
+ age_str="unknown"
+ fi
+ else
+ age_str="unknown"
+ fi
+
+ printf " %-36s %-16s %-8s %-12s %s\n" \
+ "${name:0:35}" "${rg:0:15}" "$size_gb" "$age_str" "${source_vm:0:20}"
+ done
+
+ echo ""
+ field "Total snapshots:" "$count"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# AUDIT
+# ══════════════════════════════════════════════════════════════════════
+do_audit() {
+ section_header "Snapshot Audit"
+
+ local vm_json
+ vm_json=$(get_all_vms)
+ local snaps
+ snaps=$(list_snapshots)
+ local now
+ now=$(date +%s)
+
+ printf " %-24s %-16s %-20s %-8s %-8s %s\n" \
+ "VM_NAME" "RESOURCE_GROUP" "LATEST_SNAPSHOT" "AGE" "COUNT" "STATUS"
+ printf " %s\n" "$(printf '%.0s─' {1..95})"
+
+ echo "$vm_json" | jq -c '.[]' | while IFS= read -r vm; do
+ local name rg
+ name=$(echo "$vm" | jq -r '.name')
+ rg=$(echo "$vm" | jq -r '.resourceGroup')
+
+ local vm_snaps snap_count
+ vm_snaps=$(echo "$snaps" | jq --arg vm "$name" \
+ '[.[] | select(.tags["source-vm"] == $vm)]')
+ snap_count=$(echo "$vm_snaps" | jq 'length')
+
+ if [[ "$snap_count" -eq 0 ]]; then
+ printf " %-24s %-16s %-20s %-8s %-8s %b%s%b\n" \
+ "${name:0:23}" "${rg:0:15}" "(none)" "—" "0" \
+ "$RED" "✗ Unprotected" "$RESET"
+ continue
+ fi
+
+ local latest_name latest_date age_str status color
+ latest_name=$(echo "$vm_snaps" | jq -r 'sort_by(.timeCreated) | last | .name // ""')
+ latest_date=$(echo "$vm_snaps" | jq -r 'sort_by(.timeCreated) | last | .timeCreated // ""')
+
+ if [[ -n "$latest_date" ]]; then
+ local snap_epoch
+ snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0)
+ if [[ "$snap_epoch" -gt 0 ]]; then
+ local age_days=$(( (now - snap_epoch) / 86400 ))
+ age_str="${age_days}d"
+ if (( age_days > MAX_AGE )); then
+ status="⚠ Stale"
+ color="$YELLOW"
+ else
+ status="✓ OK"
+ color="$GREEN"
+ fi
+ else
+ age_str="unknown"
+ status="✓ OK"
+ color="$GREEN"
+ fi
+ else
+ age_str="unknown"
+ status="✓ OK"
+ color="$GREEN"
+ fi
+
+ printf " %-24s %-16s %-20s %-8s %-8s %b%s%b\n" \
+ "${name:0:23}" "${rg:0:15}" "${latest_name:0:19}" \
+ "$age_str" "$snap_count" "$color" "$status" "$RESET"
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# RESTORE
+# ══════════════════════════════════════════════════════════════════════
+do_restore() {
+ [[ -z "$VM_NAME" ]] && die "--restore requires --vm NAME"
+ [[ -z "$SNAPSHOT_ID" ]] && die "--restore requires --snapshot-id NAME"
+ [[ -z "$RESOURCE_GROUP" ]] && die "--restore requires --resource-group RG"
+
+ section_header "Restore from Snapshot"
+ field "VM:" "$VM_NAME"
+ field "Snapshot:" "$SNAPSHOT_ID"
+ field "Resource Group:" "$RESOURCE_GROUP"
+ echo ""
+
+ if [[ "$FORCE" != "true" ]]; then
+ warn "This will replace the VM's OS disk. Use --force to confirm."
+ return
+ fi
+
+ log "Creating disk from snapshot..."
+ local disk_name="restored-${VM_NAME}-$(date +%Y%m%d-%H%M%S)"
+ local snap_id
+ snap_id=$(az_cmd snapshot show --name "$SNAPSHOT_ID" --resource-group "$RESOURCE_GROUP" \
+ --query 'id' --output tsv 2>/dev/null) || die "Snapshot not found: ${SNAPSHOT_ID}"
+
+ if az_cmd disk create \
+ --resource-group "$RESOURCE_GROUP" \
+ --name "$disk_name" \
+ --source "$snap_id" \
+ --output none 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} Disk created: ${disk_name}"
+ else
+ die "Failed to create disk from snapshot"
+ fi
+
+ log "Deallocating VM..."
+ az_cmd vm deallocate --name "$VM_NAME" --resource-group "$RESOURCE_GROUP" \
+ --output none 2>/dev/null || die "Failed to deallocate VM"
+
+ local new_disk_id
+ new_disk_id=$(az_cmd disk show --name "$disk_name" --resource-group "$RESOURCE_GROUP" \
+ --query 'id' --output tsv 2>/dev/null)
+
+ log "Swapping OS disk..."
+ if az_cmd vm update --name "$VM_NAME" --resource-group "$RESOURCE_GROUP" \
+ --os-disk "$new_disk_id" --output none 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} OS disk swapped"
+ else
+ die "Failed to swap OS disk"
+ fi
+
+ log "Starting VM..."
+ az_cmd vm start --name "$VM_NAME" --resource-group "$RESOURCE_GROUP" \
+ --output none 2>/dev/null
+ echo -e " ${GREEN}✓${RESET} VM started"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# STATUS
+# ══════════════════════════════════════════════════════════════════════
+do_status() {
+ local vm_json
+ vm_json=$(get_all_vms)
+ local snaps
+ snaps=$(list_snapshots)
+ local now
+ now=$(date +%s)
+
+ local total_vms=0 total_snaps=0 total_gb=0
+ local protected=0 stale=0 unprotected=0
+
+ while IFS= read -r vm; do
+ [[ -z "$vm" ]] && continue
+ ((total_vms++)) || true
+
+ local name
+ name=$(echo "$vm" | jq -r '.name')
+
+ local vm_snaps snap_count
+ vm_snaps=$(echo "$snaps" | jq --arg vm "$name" \
+ '[.[] | select(.tags["source-vm"] == $vm)]')
+ snap_count=$(echo "$vm_snaps" | jq 'length')
+ total_snaps=$(( total_snaps + snap_count ))
+
+ local gb
+ gb=$(echo "$vm_snaps" | jq '[.[].diskSizeGb // 0] | add // 0')
+ total_gb=$(( total_gb + gb ))
+
+ if [[ "$snap_count" -eq 0 ]]; then
+ ((unprotected++)) || true
+ continue
+ fi
+
+ local latest_date
+ latest_date=$(echo "$vm_snaps" | jq -r \
+ 'sort_by(.timeCreated) | last | .timeCreated // ""')
+
+ if [[ -n "$latest_date" ]]; then
+ local snap_epoch
+ snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0)
+ if [[ "$snap_epoch" -gt 0 ]]; then
+ local age_days=$(( (now - snap_epoch) / 86400 ))
+ if (( age_days > MAX_AGE )); then
+ ((stale++)) || true
+ else
+ ((protected++)) || true
+ fi
+ else
+ ((protected++)) || true
+ fi
+ else
+ ((protected++)) || true
+ fi
+ done < <(echo "$vm_json" | jq -c '.[]')
+
+ if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then
+ cat <${MAX_AGE}d):" "${YELLOW}${stale}${RESET}"
+ else
+ field_color "Stale (>${MAX_AGE}d):" "${GREEN}0${RESET}"
+ fi
+ if [[ "$unprotected" -gt 0 ]]; then
+ field_color "Unprotected:" "${RED}${unprotected}${RESET}"
+ else
+ field_color "Unprotected:" "${GREEN}0${RESET}"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HELP
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat <&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; }
+
+# ── Test Result Recording ─────────────────────────────────────────────
+record_pass() {
+ local name="$1" detail="${2:-}"
+ ((PASS++)) || true; ((TOTAL++)) || true
+ RESULTS+=("PASS|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name}"
+ else echo -e " ${GREEN}✓${RESET} ${name}${detail:+ — ${detail}}"; fi
+}
+
+record_fail() {
+ local name="$1" detail="${2:-}"
+ ((FAIL++)) || true; ((TOTAL++)) || true
+ RESULTS+=("FAIL|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "not ok ${TOTAL} - ${name}"
+ [[ -n "$detail" ]] && echo " # ${detail}"
+ else echo -e " ${RED}✗${RESET} ${name}${detail:+ — ${detail}}"; fi
+}
+
+record_skip() {
+ local name="$1" reason="${2:-}"
+ ((SKIP++)) || true; ((TOTAL++)) || true
+ RESULTS+=("SKIP|${name}|${reason}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name} # SKIP ${reason}"
+ else echo -e " ${YELLOW}⊘${RESET} ${name}${reason:+ — ${reason}}"; fi
+}
+
+# ── Helpers ───────────────────────────────────────────────────────────
+has_cmd() { command -v "$1" >/dev/null 2>&1; }
+restore_ok() { find "${RESTORE_TMP}" -type f | grep -q .; }
+require_tool() { if ! has_cmd "$1"; then record_skip "$2" "$1 not installed"; return 1; fi; }
+
+# ── Cleanup ───────────────────────────────────────────────────────────
+# shellcheck disable=SC2317
+cleanup() { [[ -n "${RESTORE_TMP}" && -d "${RESTORE_TMP}" ]] && rm -rf "${RESTORE_TMP}"; }
+trap cleanup EXIT
+
+# ══════════════════════════════════════════════════════════════════════
+# TEST SUITES
+# ══════════════════════════════════════════════════════════════════════
+
+# ── 1. Repository Health ─────────────────────────────────────────────
+test_repo_health() {
+ echo ""
+ echo -e "${BOLD}Repository Health${RESET}"
+ # 1a. Mount check (if configured)
+ if [[ -n "${MOUNT_CHECK}" ]]; then
+ if mountpoint -q "${MOUNT_CHECK}" 2>/dev/null; then
+ record_pass "Mount check" "${MOUNT_CHECK} is mounted"
+ else
+ record_fail "Mount check" "${MOUNT_CHECK} is not mounted"
+ warn "Skipping remaining tests — mount not available"
+ return
+ fi
+ fi
+ # 1b. Backup exists
+ case "${BACKUP_TYPE}" in
+ restic)
+ require_tool restic "Repository exists" || return
+ if restic cat config >/dev/null 2>&1; then record_pass "Repository exists"
+ else record_fail "Repository exists" "not accessible"; fi ;;
+ borg)
+ require_tool borg "Repository exists" || return
+ if borg info 2>/dev/null | grep -q "Repository ID"; then record_pass "Repository exists"
+ else record_fail "Repository exists" "not accessible"; fi ;;
+ directory|rsnapshot)
+ if [[ -d "${BACKUP_DIR}" ]]; then record_pass "Backup directory exists"
+ else record_fail "Backup directory exists" "${BACKUP_DIR} not found"; fi ;;
+ esac
+ # 1c. Repository reachable
+ case "${BACKUP_TYPE}" in
+ restic)
+ if [[ "${BACKUP_REPO}" =~ ^(s3|sftp|rest): ]]; then
+ require_tool restic "Repository reachable" || return
+ if restic cat config >/dev/null 2>&1; then record_pass "Repository reachable"
+ else record_fail "Repository reachable" "remote repository unreachable"; fi
+ else record_pass "Repository reachable" "local"; fi ;;
+ borg)
+ if [[ "${BACKUP_REPO}" =~ ^ssh:// || "${BACKUP_REPO}" =~ .*@.*:.* ]]; then
+ require_tool borg "Repository reachable" || return
+ if borg info >/dev/null 2>&1; then record_pass "Repository reachable"
+ else record_fail "Repository reachable" "remote repository unreachable"; fi
+ else record_pass "Repository reachable" "local"; fi ;;
+ directory|rsnapshot)
+ if [[ -r "${BACKUP_DIR}" ]]; then record_pass "Backup directory reachable"
+ else record_fail "Backup directory reachable" "${BACKUP_DIR} not readable"; fi ;;
+ esac
+}
+
+# ── 2. Backup Status ─────────────────────────────────────────────────
+test_backup_status() {
+ echo ""
+ echo -e "${BOLD}Backup Status${RESET}"
+ # 2a. Recent backup
+ local last_ts="" max_age_s=$((MAX_AGE_HOURS * 3600))
+ case "${BACKUP_TYPE}" in
+ restic)
+ require_tool restic "Recent backup" || { test_size; test_snapshot_count; return; }
+ local latest
+ latest=$(restic snapshots --json --latest 1 2>/dev/null) || true
+ if [[ -z "${latest}" || "${latest}" == "[]" || "${latest}" == "null" ]]; then
+ record_fail "Recent backup" "no snapshots found"
+ else
+ local time_str
+ time_str=$(echo "${latest}" | grep -oP '"time"\s*:\s*"\K[^"]+' | head -1)
+ if [[ -z "${time_str}" ]]; then record_fail "Recent backup" "could not parse snapshot time"
+ else last_ts=$(date -d "${time_str}" +%s 2>/dev/null) || true; fi
+ fi ;;
+ borg)
+ require_tool borg "Recent backup" || { test_size; test_snapshot_count; return; }
+ local borg_time
+ borg_time=$(borg list --format '{time}{NL}' 2>/dev/null | tail -1) || true
+ if [[ -z "${borg_time}" ]]; then record_fail "Recent backup" "no archives found"
+ else last_ts=$(date -d "${borg_time}" +%s 2>/dev/null) || true; fi ;;
+ directory|rsnapshot)
+ local newest
+ newest=$(find "${BACKUP_DIR}" -maxdepth 1 -mindepth 1 -type d -printf '%T@\n' 2>/dev/null | sort -rn | head -1)
+ [[ -z "${newest}" ]] && newest=$(find "${BACKUP_DIR}" -maxdepth 1 -mindepth 1 -printf '%T@\n' 2>/dev/null | sort -rn | head -1)
+ if [[ -z "${newest}" ]]; then record_fail "Recent backup" "no backups found in ${BACKUP_DIR}"
+ else last_ts="${newest%%.*}"; fi ;;
+ esac
+ if [[ -n "${last_ts}" ]]; then
+ local now_ts age_s age_h
+ now_ts=$(date +%s); age_s=$((now_ts - last_ts)); age_h=$((age_s / 3600))
+ if [[ ${age_s} -le ${max_age_s} ]]; then record_pass "Recent backup" "${age_h}h ago (max ${MAX_AGE_HOURS}h)"
+ else record_fail "Recent backup" "${age_h}h ago (max ${MAX_AGE_HOURS}h)"; fi
+ fi
+ test_size
+ test_snapshot_count
+}
+
+test_size() {
+ local size_mb=0
+ case "${BACKUP_TYPE}" in
+ restic)
+ require_tool restic "Backup size" || return
+ local stats total_bytes
+ stats=$(restic stats --json --mode raw-data 2>/dev/null) || true
+ total_bytes=$(echo "${stats}" | grep -oP '"total_size"\s*:\s*\K[0-9]+' | head -1) || true
+ [[ -n "${total_bytes}" ]] && size_mb=$((total_bytes / 1048576)) ;;
+ borg)
+ require_tool borg "Backup size" || return
+ local size_str num unit
+ size_str=$(borg info 2>/dev/null | grep -i "all archives" | grep -oP '[0-9.]+\s*(TB|GB|MB|kB)' | head -1) || true
+ if [[ -n "${size_str}" ]]; then
+ num=$(echo "${size_str}" | grep -oP '[0-9.]+'); unit=$(echo "${size_str}" | grep -oP '[A-Za-z]+')
+ case "${unit}" in
+ TB) size_mb=$(echo "${num} * 1048576" | bc 2>/dev/null | cut -d. -f1) || size_mb=999999 ;;
+ GB) size_mb=$(echo "${num} * 1024" | bc 2>/dev/null | cut -d. -f1) || size_mb=999999 ;;
+ MB) size_mb=$(echo "${num}" | cut -d. -f1) ;;
+ kB) size_mb=0 ;;
+ esac
+ fi ;;
+ directory|rsnapshot)
+ size_mb=$(du -sm "${BACKUP_DIR}" 2>/dev/null | awk '{print $1}') || size_mb=0 ;;
+ esac
+ if [[ ${size_mb} -ge ${MIN_SIZE_MB} ]]; then record_pass "Backup size" "${size_mb} MB (min ${MIN_SIZE_MB} MB)"
+ else record_fail "Backup size" "${size_mb} MB < ${MIN_SIZE_MB} MB"; fi
+}
+
+test_snapshot_count() {
+ local count=0
+ case "${BACKUP_TYPE}" in
+ restic)
+ require_tool restic "Snapshot count" || return
+ count=$(restic snapshots --json 2>/dev/null | grep -c '"time"') || count=0 ;;
+ borg)
+ require_tool borg "Snapshot count" || return
+ count=$(borg list 2>/dev/null | wc -l) || count=0 ;;
+ directory)
+ count=$(find "${BACKUP_DIR}" -maxdepth 1 -mindepth 1 | wc -l) || count=0 ;;
+ rsnapshot)
+ count=$(find "${BACKUP_DIR}" -maxdepth 1 -mindepth 1 -type d | wc -l) || count=0 ;;
+ esac
+ if [[ ${count} -ge ${MIN_SNAPSHOTS} ]]; then record_pass "Snapshot count" "${count} (min ${MIN_SNAPSHOTS})"
+ else record_fail "Snapshot count" "${count} < ${MIN_SNAPSHOTS}"; fi
+}
+
+# ── 3. Integrity ─────────────────────────────────────────────────────
+test_integrity_suite() {
+ echo ""
+ echo -e "${BOLD}Integrity${RESET}"
+ # 3a. Integrity check
+ if [[ "${SKIP_INTEGRITY}" == "true" ]]; then
+ record_skip "Integrity check" "SKIP_INTEGRITY=true"
+ else
+ case "${BACKUP_TYPE}" in
+ restic)
+ require_tool restic "Integrity check" || return
+ if restic check 2>/dev/null; then record_pass "Integrity check"
+ else record_fail "Integrity check" "restic check failed"; fi ;;
+ borg)
+ require_tool borg "Integrity check" || return
+ if borg check 2>/dev/null; then record_pass "Integrity check"
+ else record_fail "Integrity check" "borg check failed"; fi ;;
+ directory|rsnapshot)
+ record_skip "Integrity check" "not applicable for ${BACKUP_TYPE}" ;;
+ esac
+ fi
+ # 3b. Lock check
+ case "${BACKUP_TYPE}" in
+ restic)
+ require_tool restic "Lock check" || return
+ local lock_output
+ lock_output=$(restic list locks 2>/dev/null) || true
+ if [[ -z "${lock_output}" ]]; then record_pass "Lock check" "no stale locks"
+ else record_fail "Lock check" "$(echo "${lock_output}" | wc -l) lock(s) found"; fi ;;
+ borg)
+ require_tool borg "Lock check" || return
+ if borg info 2>&1 | grep -qi "lock"; then record_fail "Lock check" "repository appears locked"
+ else record_pass "Lock check" "no stale locks"; fi ;;
+ directory|rsnapshot)
+ local lc
+ lc=$(find "${BACKUP_DIR}" -maxdepth 1 \( -name "*.lock" -o -name ".lock" \) 2>/dev/null | wc -l) || lc=0
+ if [[ ${lc} -eq 0 ]]; then record_pass "Lock check" "no stale locks"
+ else record_fail "Lock check" "${lc} lock file(s) in ${BACKUP_DIR}"; fi ;;
+ esac
+}
+
+# ── 4. Recovery ──────────────────────────────────────────────────────
+test_recovery() {
+ echo ""
+ echo -e "${BOLD}Recovery${RESET}"
+ if [[ "${SKIP_RESTORE}" == "true" ]]; then record_skip "Test restore" "SKIP_RESTORE=true"; return; fi
+ RESTORE_TMP=$(mktemp -d /tmp/backup-smoke-test-XXXXXX)
+ case "${BACKUP_TYPE}" in
+ restic)
+ require_tool restic "Test restore" || return
+ restic_restore "${RESTORE_TEST_FILE}" ;;
+ borg)
+ require_tool borg "Test restore" || return
+ borg_restore "${RESTORE_TEST_FILE}" ;;
+ directory)
+ dir_restore "${RESTORE_TEST_FILE:+${BACKUP_DIR}/${RESTORE_TEST_FILE}}" ;;
+ rsnapshot)
+ dir_restore "${RESTORE_TEST_FILE:+${BACKUP_DIR}/${RESTORE_TEST_FILE}}" ;;
+ esac
+}
+
+restic_restore() {
+ local target="${1:-}"
+ if [[ -z "${target}" ]]; then
+ target=$(restic ls latest 2>/dev/null | head -1) || true
+ [[ -z "${target}" ]] && { record_skip "Test restore" "no files in latest snapshot"; return; }
+ fi
+ if restic restore latest --target "${RESTORE_TMP}" --include "${target}" 2>/dev/null && restore_ok; then
+ record_pass "Test restore" "file restored successfully"
+ else record_fail "Test restore" "restic restore failed"; fi
+}
+
+borg_restore() {
+ local archive target
+ archive=$(borg list --format '{archive}{NL}' 2>/dev/null | tail -1) || true
+ [[ -z "${archive}" ]] && { record_skip "Test restore" "no archives found"; return; }
+ target="${1:-}"
+ if [[ -z "${target}" ]]; then
+ target=$(borg list "::${archive}" --format '{path}{NL}' 2>/dev/null | grep -v '/$' | head -1) || true
+ [[ -z "${target}" ]] && { record_skip "Test restore" "no files in latest archive"; return; }
+ fi
+ if (cd "${RESTORE_TMP}" && borg extract "::${archive}" "${target}" 2>/dev/null) && restore_ok; then
+ record_pass "Test restore" "file restored successfully"
+ else record_fail "Test restore" "borg extract failed"; fi
+}
+
+dir_restore() {
+ local src_file="${1:-}"
+ [[ -z "${src_file}" ]] && src_file=$(find "${BACKUP_DIR}" -type f 2>/dev/null | head -1)
+ [[ -z "${src_file}" || ! -f "${src_file}" ]] && { record_skip "Test restore" "no files in backup directory"; return; }
+ local dest_file
+ dest_file="${RESTORE_TMP}/$(basename "${src_file}")"
+ if cp "${src_file}" "${dest_file}" 2>/dev/null && [[ -f "${dest_file}" ]]; then
+ record_pass "Test restore" "file copied successfully"
+ else record_fail "Test restore" "copy failed"; fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OUTPUT
+# ══════════════════════════════════════════════════════════════════════
+
+print_summary() {
+ local end_time; end_time=$(date +%s)
+ local duration=$(( end_time - START_TIME ))
+ echo ""
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ echo -e "${BOLD}Summary${RESET} ${BACKUP_TYPE} ${BACKUP_REPO:-${BACKUP_DIR}}"
+ echo -e " ${GREEN}${PASS} passed${RESET} ${RED}${FAIL} failed${RESET} ${YELLOW}${SKIP} skipped${RESET} (${duration}s)"
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ if [[ $FAIL -eq 0 ]]; then echo -e "${GREEN}${BOLD}All tests passed.${RESET}"
+ else echo -e "${RED}${BOLD}${FAIL} test(s) failed.${RESET}"; fi
+}
+
+print_tap_header() {
+ echo "TAP version 13"
+}
+
+print_tap_footer() {
+ echo "1..${TOTAL}"
+ echo "# pass ${PASS}"
+ echo "# fail ${FAIL}"
+ echo "# skip ${SKIP}"
+}
+
+write_junit() {
+ local end_time; end_time=$(date +%s)
+ local duration=$(( end_time - START_TIME ))
+ cat > "$JUNIT_FILE" <
+
+
+JUNIT_EOF
+
+ for result in "${RESULTS[@]}"; do
+ local status name detail
+ IFS='|' read -r status name detail <<< "$result"
+ name=$(echo "$name" | sed 's/&/\&/g; s/\</g; s/>/\>/g; s/"/\"/g')
+ detail=$(echo "$detail" | sed 's/&/\&/g; s/\</g; s/>/\>/g; s/"/\"/g')
+ echo " " >> "$JUNIT_FILE"
+ case "$status" in
+ PASS) [[ -n "$detail" ]] && echo " ${detail}" >> "$JUNIT_FILE" ;;
+ FAIL) echo " FAILED: ${name} — ${detail}" >> "$JUNIT_FILE" ;;
+ SKIP) echo " " >> "$JUNIT_FILE" ;;
+ esac
+ echo " " >> "$JUNIT_FILE"
+ done
+ echo " " >> "$JUNIT_FILE"
+ echo "" >> "$JUNIT_FILE"
+ log "JUnit report written to ${JUNIT_FILE}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+human_bytes() {
+ local bytes="$1"
+ if [[ "$bytes" -ge 1073741824 ]]; then
+ awk "BEGIN { printf \"%.1f GiB\", $bytes / 1073741824 }"
+ elif [[ "$bytes" -ge 1048576 ]]; then
+ awk "BEGIN { printf \"%.1f MiB\", $bytes / 1048576 }"
+ elif [[ "$bytes" -ge 1024 ]]; then
+ awk "BEGIN { printf \"%.1f KiB\", $bytes / 1024 }"
+ else
+ echo "${bytes} B"
+ fi
+}
+
+human_age() {
+ local seconds="$1"
+ local days=$((seconds / 86400))
+ local hours=$(( (seconds % 86400) / 3600 ))
+ local mins=$(( (seconds % 3600) / 60 ))
+
+ if [[ "$days" -gt 0 ]]; then
+ echo "${days}d ${hours}h"
+ elif [[ "$hours" -gt 0 ]]; then
+ echo "${hours}h ${mins}m"
+ else
+ echo "${mins}m"
+ fi
+}
+
+# Convert age string (24h, 7d, 2w) to seconds
+parse_age_to_seconds() {
+ local age="$1"
+ local num="${age%[hdw]*}"
+ local unit="${age##*[0-9]}"
+
+ case "$unit" in
+ h) echo $((num * 3600)) ;;
+ d) echo $((num * 86400)) ;;
+ w) echo $((num * 604800)) ;;
+ *) echo $((num * 3600)) ;;
+ esac
+}
+
+# Convert size string (1, 1K, 1M, 1G) to bytes
+parse_size_to_bytes() {
+ local size="$1"
+
+ # Pure number
+ if [[ "$size" =~ ^[0-9]+$ ]]; then
+ echo "$size"
+ return
+ fi
+
+ local num="${size%[KkMmGg]*}"
+ local unit="${size##*[0-9]}"
+
+ case "${unit^^}" in
+ K) echo $((num * 1024)) ;;
+ M) echo $((num * 1048576)) ;;
+ G) echo $((num * 1073741824)) ;;
+ *) echo "$num" ;;
+ esac
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# VERIFICATION
+# ══════════════════════════════════════════════════════════════════════
+
+verify_file() {
+ local file="$1"
+ local max_age_str="$2"
+ local min_size_str="$3"
+
+ local max_age_secs min_size_bytes
+ max_age_secs=$(parse_age_to_seconds "$max_age_str")
+ min_size_bytes=$(parse_size_to_bytes "$min_size_str")
+
+ COUNT_TOTAL=$((COUNT_TOTAL + 1))
+ verbose "Checking: ${file} (max-age=${max_age_str}, min-size=${min_size_str})"
+
+ # Check existence
+ if [[ ! -e "$file" ]]; then
+ printf " %b%-50s %10s %10s %s%b\n" "$RED" "$file" "--" "--" "MISSING" "$RESET"
+ COUNT_CRITICAL=$((COUNT_CRITICAL + 1))
+ return
+ fi
+
+ # Get file info
+ local file_size file_mtime now_epoch age_secs
+ file_size=$(stat -c%s "$file" 2>/dev/null || echo "0")
+ file_mtime=$(stat -c%Y "$file" 2>/dev/null || echo "0")
+ now_epoch=$(date +%s)
+ age_secs=$((now_epoch - file_mtime))
+
+ local size_str age_str
+ size_str=$(human_bytes "$file_size")
+ age_str=$(human_age "$age_secs")
+
+ # Check zero-size
+ if [[ "$file_size" -eq 0 ]]; then
+ printf " %b%-50s %10s %10s %s%b\n" "$RED" "$file" "$size_str" "$age_str" "EMPTY" "$RESET"
+ COUNT_CRITICAL=$((COUNT_CRITICAL + 1))
+ return
+ fi
+
+ # Check minimum size
+ if [[ "$file_size" -lt "$min_size_bytes" ]]; then
+ printf " %b%-50s %10s %10s %s%b\n" "$YELLOW" "$file" "$size_str" "$age_str" "SMALL" "$RESET"
+ COUNT_WARNING=$((COUNT_WARNING + 1))
+ return
+ fi
+
+ # Check age
+ if [[ "$age_secs" -gt "$max_age_secs" ]]; then
+ printf " %b%-50s %10s %10s %s%b\n" "$YELLOW" "$file" "$size_str" "$age_str" "STALE" "$RESET"
+ COUNT_WARNING=$((COUNT_WARNING + 1))
+ return
+ fi
+
+ # All good
+ printf " %b%-50s %10s %10s %s%b\n" "$GREEN" "$file" "$size_str" "$age_str" "OK" "$RESET"
+ COUNT_OK=$((COUNT_OK + 1))
+}
+
+verify_glob() {
+ local pattern="$1"
+ local max_age_str="$2"
+ local min_size_str="$3"
+
+ local found=false
+ # Use compgen to safely expand globs
+ local files
+ files=$(compgen -G "$pattern" 2>/dev/null || true)
+
+ if [[ -z "$files" ]]; then
+ COUNT_TOTAL=$((COUNT_TOTAL + 1))
+ COUNT_CRITICAL=$((COUNT_CRITICAL + 1))
+ printf " %b%-50s %10s %10s %s%b\n" "$RED" "$pattern" "--" "--" "MISSING" "$RESET"
+ return
+ fi
+
+ while IFS= read -r file; do
+ found=true
+ verify_file "$file" "$max_age_str" "$min_size_str"
+ done <<< "$files"
+
+ if [[ "$found" == "false" ]]; then
+ COUNT_TOTAL=$((COUNT_TOTAL + 1))
+ COUNT_CRITICAL=$((COUNT_CRITICAL + 1))
+ printf " %b%-50s %10s %10s %s%b\n" "$RED" "$pattern" "--" "--" "MISSING" "$RESET"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# INPUT PARSING
+# ══════════════════════════════════════════════════════════════════════
+
+add_entry() {
+ local path="$1"
+ local max_age="${2:-$BACKUP_MAX_AGE}"
+ local min_size="${3:-$BACKUP_MIN_SIZE}"
+ ENTRIES+=("${path}|${max_age}|${min_size}")
+}
+
+load_config_file() {
+ local file="$1"
+ if [[ ! -f "$file" ]]; then
+ err "Config file not found: $file"
+ exit 1
+ fi
+ while IFS= read -r line; do
+ line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+ [[ -z "$line" || "$line" == \#* ]] && continue
+
+ local path max_age
+ path=$(echo "$line" | awk '{print $1}')
+ max_age=$(echo "$line" | awk '{print $2}')
+ if [[ -z "$max_age" ]]; then
+ max_age="$BACKUP_MAX_AGE"
+ fi
+ add_entry "$path" "$max_age"
+ done < "$file"
+}
+
+load_paths_from_file() {
+ local file="$1"
+ if [[ ! -f "$file" ]]; then
+ err "File not found: $file"
+ exit 1
+ fi
+ while IFS= read -r line; do
+ line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+ [[ -z "$line" || "$line" == \#* ]] && continue
+ add_entry "$line"
+ done < "$file"
+}
+
+load_paths_from_stdin() {
+ while IFS= read -r line; do
+ line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+ [[ -z "$line" || "$line" == \#* ]] && continue
+ add_entry "$line"
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2
+ exit 1 ;;
+ *)
+ add_entry "$1"; shift ;;
+ esac
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+
+ # Load from config file
+ if [[ -n "$CONFIG_FILE" ]]; then
+ load_config_file "$CONFIG_FILE"
+ fi
+
+ # Load from path file
+ if [[ -n "$PATH_FILE" ]]; then
+ load_paths_from_file "$PATH_FILE"
+ fi
+
+ # Load from stdin if no entries yet and stdin is not a terminal
+ if [[ ${#ENTRIES[@]} -eq 0 ]] && ! [[ -t 0 ]]; then
+ load_paths_from_stdin
+ fi
+
+ if [[ ${#ENTRIES[@]} -eq 0 ]]; then
+ err "No backup paths specified"
+ echo "Run ${SCRIPT_NAME} --help for usage" >&2
+ exit 1
+ fi
+
+ echo ""
+ echo -e "${BOLD}Backup Verification — $(hostname -f 2>/dev/null || hostname)${RESET}"
+ echo -e "${DIM}$(date '+%Y-%m-%d %H:%M:%S %Z')${RESET}"
+ echo -e "${DIM}Defaults: max-age=${BACKUP_MAX_AGE}, min-size=${BACKUP_MIN_SIZE}${RESET}"
+
+ section_header "Backup Status"
+
+ printf " ${BOLD}%-50s %10s %10s %s${RESET}\n" "FILE" "SIZE" "AGE" "STATUS"
+ printf " %s\n" "$(printf '%.0s─' {1..85})"
+
+ for entry in "${ENTRIES[@]}"; do
+ local path max_age min_size
+ path=$(echo "$entry" | cut -d'|' -f1)
+ max_age=$(echo "$entry" | cut -d'|' -f2)
+ min_size=$(echo "$entry" | cut -d'|' -f3)
+
+ # Check if path contains glob characters
+ if [[ "$path" == *\** || "$path" == *\?* || "$path" == *\[* ]]; then
+ verify_glob "$path" "$max_age" "$min_size"
+ else
+ verify_file "$path" "$max_age" "$min_size"
+ fi
+ done
+
+ section_header "Summary"
+ field "Total checked:" "$COUNT_TOTAL"
+ field_color "OK:" "${GREEN}${COUNT_OK}${RESET}"
+ if [[ "$COUNT_WARNING" -gt 0 ]]; then
+ field_color "Warnings:" "${YELLOW}${COUNT_WARNING}${RESET}"
+ else
+ field "Warnings:" "$COUNT_WARNING"
+ fi
+ if [[ "$COUNT_CRITICAL" -gt 0 ]]; then
+ field_color "Critical:" "${RED}${COUNT_CRITICAL}${RESET}"
+ else
+ field "Critical:" "$COUNT_CRITICAL"
+ fi
+
+ echo ""
+
+ # Exit with error code if any critical issues
+ if [[ "$COUNT_CRITICAL" -gt 0 ]]; then
+ return 2
+ elif [[ "$COUNT_WARNING" -gt 0 ]]; then
+ return 1
+ fi
+}
+
+main "$@"
diff --git a/bastion-hardener.sh b/bastion-hardener.sh
new file mode 100755
index 0000000..d7909e3
--- /dev/null
+++ b/bastion-hardener.sh
@@ -0,0 +1,614 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### bastion-hardener.sh — Harden SSH bastion/jump hosts with audit and rollback ####
+#### Disables password auth, restricts ciphers, sets idle timeout, fail2ban config ####
+#### Requires: bash 4+, root privileges ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### sudo ./bastion-hardener.sh --audit ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+# v1.01 changes:
+# - Fixed: ((0++)) returns 1 under set -e; added || true guards
+# - Fixed: grep in pipeline crashes under set -euo pipefail when no matches found. Added || true guard
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+SSHD_CONFIG="${SSHD_CONFIG:-/etc/ssh/sshd_config}"
+BACKUP_ROOT="${BACKUP_ROOT:-/etc/ssh}"
+ALLOW_USERS="${ALLOW_USERS:-}"
+ALLOW_GROUPS="${ALLOW_GROUPS:-}"
+IDLE_TIMEOUT="${IDLE_TIMEOUT:-300}"
+MAX_AUTH_TRIES="${MAX_AUTH_TRIES:-3}"
+MAX_SESSIONS="${MAX_SESSIONS:-2}"
+SESSION_LOG_DIR="${SESSION_LOG_DIR:-/var/log/bastion-sessions}"
+FAIL2BAN_BANTIME="${FAIL2BAN_BANTIME:-3600}"
+FAIL2BAN_MAXRETRY="${FAIL2BAN_MAXRETRY:-3}"
+DRY_RUN="${DRY_RUN:-false}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+ENABLE_SESSION_LOGGING="false"
+CONFIGURE_FAIL2BAN="false"
+ROLLBACK_DIR=""
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+PASS_COUNT=0
+FAIL_COUNT=0
+WARN_COUNT=0
+CHANGES=0
+
+# ── Hardening settings ───────────────────────────────────────────────
+readonly RECOMMENDED_CIPHERS="chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com"
+readonly RECOMMENDED_MACS="hmac-sha2-512-etm@openssh.com,hmac-sha2-256-etm@openssh.com"
+readonly RECOMMENDED_KEX="curve25519-sha256,curve25519-sha256@libssh.org"
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ # shellcheck disable=SC2034
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ else
+ # shellcheck disable=SC2034
+ RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+die() { err "$*"; exit 1; }
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+require_root() {
+ if [[ $EUID -ne 0 ]]; then
+ die "This operation requires root privileges. Run with sudo."
+ fi
+}
+
+# ── SSHD config helpers ──────────────────────────────────────────────
+get_sshd_setting() {
+ local key="$1"
+ local val
+ val=$({ grep -i "^[[:space:]]*${key}[[:space:]]" "$SSHD_CONFIG" 2>/dev/null || true; } | tail -1 | awk '{print $2}')
+ if [[ -z "$val" ]]; then
+ echo "(not set)"
+ else
+ echo "$val"
+ fi
+}
+
+set_sshd_config() {
+ local key="$1"
+ local value="$2"
+ local file="$3"
+
+ if grep -qi "^[[:space:]]*${key}[[:space:]]" "$file" 2>/dev/null; then
+ sed -i "s|^[[:space:]]*${key}[[:space:]].*|${key} ${value}|i" "$file"
+ elif grep -qi "^[[:space:]]*#[[:space:]]*${key}[[:space:]]" "$file" 2>/dev/null; then
+ sed -i "s|^[[:space:]]*#[[:space:]]*${key}[[:space:]].*|${key} ${value}|i" "$file"
+ else
+ echo "${key} ${value}" >> "$file"
+ fi
+ verbose "Set ${key} = ${value}"
+}
+
+# ── Audit check helper ───────────────────────────────────────────────
+check_setting() {
+ local name="$1"
+ local current="$2"
+ local recommended="$3"
+ local is_warn="${4:-false}"
+
+ local status_icon
+ if [[ "${current,,}" == "${recommended,,}" ]]; then
+ status_icon="${GREEN}✓ PASS${RESET}"
+ ((PASS_COUNT++)) || true
+ elif [[ "$is_warn" == "true" ]]; then
+ status_icon="${YELLOW}! WARN${RESET}"
+ ((WARN_COUNT++)) || true
+ else
+ status_icon="${RED}✗ FAIL${RESET}"
+ ((FAIL_COUNT++)) || true
+ fi
+
+ printf " %-34s %-16s %-16s %b\n" "$name" "$current" "$recommended" "$status_icon"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# AUDIT MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_audit() {
+ if [[ ! -f "$SSHD_CONFIG" ]]; then
+ die "sshd_config not found at ${SSHD_CONFIG}"
+ fi
+
+ log "Auditing SSH configuration..."
+ echo ""
+ echo -e " ${BOLD}SSH Configuration Audit${RESET}"
+ printf " ${BOLD}%-34s %-16s %-16s %s${RESET}\n" "SETTING" "CURRENT" "RECOMMENDED" "STATUS"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ # Core auth settings
+ check_setting "PermitRootLogin" "$(get_sshd_setting PermitRootLogin)" "no"
+ check_setting "PasswordAuthentication" "$(get_sshd_setting PasswordAuthentication)" "no"
+ check_setting "ChallengeResponseAuthentication" "$(get_sshd_setting ChallengeResponseAuthentication)" "no"
+ check_setting "PubkeyAuthentication" "$(get_sshd_setting PubkeyAuthentication)" "yes"
+
+ # Limits
+ check_setting "MaxAuthTries" "$(get_sshd_setting MaxAuthTries)" "$MAX_AUTH_TRIES"
+ check_setting "MaxSessions" "$(get_sshd_setting MaxSessions)" "$MAX_SESSIONS"
+
+ # Timeouts
+ check_setting "ClientAliveInterval" "$(get_sshd_setting ClientAliveInterval)" "$IDLE_TIMEOUT"
+
+ local cac_current
+ cac_current=$(get_sshd_setting ClientAliveCountMax)
+ if [[ "$cac_current" == "3" ]]; then
+ check_setting "ClientAliveCountMax" "$cac_current" "2" "true"
+ else
+ check_setting "ClientAliveCountMax" "$cac_current" "2"
+ fi
+
+ # Forwarding
+ check_setting "X11Forwarding" "$(get_sshd_setting X11Forwarding)" "no"
+ check_setting "AllowTcpForwarding" "$(get_sshd_setting AllowTcpForwarding)" "no"
+ check_setting "AllowAgentForwarding" "$(get_sshd_setting AllowAgentForwarding)" "no"
+ check_setting "PermitTunnel" "$(get_sshd_setting PermitTunnel)" "no"
+
+ # Crypto
+ local ciphers_current
+ ciphers_current=$(get_sshd_setting Ciphers)
+ if [[ "$ciphers_current" == "(not set)" ]]; then
+ check_setting "Ciphers" "(default)" "(restricted)"
+ elif [[ "$ciphers_current" == "$RECOMMENDED_CIPHERS" ]]; then
+ check_setting "Ciphers" "(restricted)" "(restricted)"
+ else
+ check_setting "Ciphers" "(custom)" "(restricted)"
+ fi
+
+ local macs_current
+ macs_current=$(get_sshd_setting MACs)
+ if [[ "$macs_current" == "(not set)" ]]; then
+ check_setting "MACs" "(default)" "(restricted)"
+ elif [[ "$macs_current" == "$RECOMMENDED_MACS" ]]; then
+ check_setting "MACs" "(restricted)" "(restricted)"
+ else
+ check_setting "MACs" "(custom)" "(restricted)"
+ fi
+
+ local kex_current
+ kex_current=$(get_sshd_setting KexAlgorithms)
+ if [[ "$kex_current" == "(not set)" ]]; then
+ check_setting "KexAlgorithms" "(default)" "(restricted)"
+ elif [[ "$kex_current" == "$RECOMMENDED_KEX" ]]; then
+ check_setting "KexAlgorithms" "(restricted)" "(restricted)"
+ else
+ check_setting "KexAlgorithms" "(custom)" "(restricted)"
+ fi
+
+ # Logging and misc
+ check_setting "LogLevel" "$(get_sshd_setting LogLevel)" "VERBOSE"
+ check_setting "LoginGraceTime" "$(get_sshd_setting LoginGraceTime)" "30"
+
+ # AllowUsers / AllowGroups (warn if not set)
+ local au_current ag_current
+ au_current=$(get_sshd_setting AllowUsers)
+ ag_current=$(get_sshd_setting AllowGroups)
+ if [[ "$au_current" == "(not set)" ]]; then
+ check_setting "AllowUsers" "(not set)" "(recommended)" "true"
+ else
+ check_setting "AllowUsers" "(configured)" "(recommended)"
+ fi
+ if [[ "$ag_current" == "(not set)" ]]; then
+ check_setting "AllowGroups" "(not set)" "(recommended)" "true"
+ else
+ check_setting "AllowGroups" "(configured)" "(recommended)"
+ fi
+
+ # Summary
+ local total_checks=$((PASS_COUNT + FAIL_COUNT + WARN_COUNT))
+ local score=0
+ if [[ "$total_checks" -gt 0 ]]; then
+ score=$(( PASS_COUNT * 100 / total_checks ))
+ fi
+
+ echo ""
+ echo -e " ${BOLD}Summary${RESET}"
+ echo " Total checks: ${total_checks}"
+ echo -e " Passed: ${GREEN}${PASS_COUNT}${RESET}"
+ echo -e " Failed: ${RED}${FAIL_COUNT}${RESET}"
+ echo -e " Warnings: ${YELLOW}${WARN_COUNT}${RESET}"
+ echo " Score: ${score} / 100"
+
+ # Extra warnings
+ echo ""
+ if ! command -v fail2ban-client &>/dev/null; then
+ warn "Fail2ban not installed — brute-force protection unavailable"
+ fi
+ if [[ "$au_current" == "(not set)" && "$ag_current" == "(not set)" ]]; then
+ warn "No AllowUsers/AllowGroups configured — all users can SSH in"
+ fi
+
+ log "Run with --apply to harden this host"
+ log "Completed in $(elapsed)"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# APPLY MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_apply() {
+ require_root
+
+ if [[ ! -f "$SSHD_CONFIG" ]]; then
+ die "sshd_config not found at ${SSHD_CONFIG}"
+ fi
+
+ # Create backup
+ local backup_dir
+ backup_dir="${BACKUP_ROOT}/bastion-hardener-backup-$(date +%Y%m%d-%H%M%S)"
+ log "Backing up ${SSHD_CONFIG} → ${backup_dir}/sshd_config"
+ mkdir -p "$backup_dir"
+ cp -p "$SSHD_CONFIG" "${backup_dir}/sshd_config"
+ [[ -f /etc/ssh/banner.txt ]] && cp -p /etc/ssh/banner.txt "${backup_dir}/banner.txt"
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "${YELLOW}DRY RUN${RESET} — previewing changes (no files will be modified)"
+ local tmp_config
+ tmp_config=$(mktemp)
+ cp "$SSHD_CONFIG" "$tmp_config"
+ apply_settings "$tmp_config"
+ echo ""
+ log "Diff preview:"
+ diff "$SSHD_CONFIG" "$tmp_config" || true
+ rm -f "$tmp_config"
+ log "Run without --dry-run to apply changes"
+ return
+ fi
+
+ log "Applying SSH hardening..."
+ apply_settings "$SSHD_CONFIG"
+
+ # Create banner
+ if [[ ! -f /etc/ssh/banner.txt ]]; then
+ cat > /etc/ssh/banner.txt <<'BANNER'
+***************************************************************************
+* AUTHORIZED ACCESS ONLY *
+* *
+* This system is restricted to authorized users. All activities are *
+* monitored and logged. Unauthorized access is prohibited and subject *
+* to prosecution under applicable law. *
+* *
+* By proceeding, you acknowledge that you have read and agree to the *
+* organization's acceptable use policies. *
+***************************************************************************
+BANNER
+ log "Created warning banner at /etc/ssh/banner.txt"
+ fi
+
+ # Session logging directory
+ if [[ "$ENABLE_SESSION_LOGGING" == "true" ]]; then
+ mkdir -p "$SESSION_LOG_DIR"
+ chmod 700 "$SESSION_LOG_DIR"
+ log "Session log directory: ${SESSION_LOG_DIR}"
+ fi
+
+ # Fail2ban configuration
+ if [[ "$CONFIGURE_FAIL2BAN" == "true" ]]; then
+ configure_fail2ban
+ fi
+
+ # Validate config
+ log "Validating sshd configuration..."
+ if sshd -t -f "$SSHD_CONFIG" 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} sshd -t passed"
+ else
+ err "sshd -t validation failed — restoring backup"
+ cp -p "${backup_dir}/sshd_config" "$SSHD_CONFIG"
+ die "Config validation failed. Original config restored."
+ fi
+
+ # Restart sshd
+ log "Restarting sshd..."
+ if systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} sshd restarted successfully"
+ else
+ warn "Could not restart sshd — restart manually"
+ fi
+
+ # Write audit report
+ local report_file
+ report_file="/var/log/bastion-hardener-$(date +%Y%m%d-%H%M%S).log"
+ {
+ echo "Bastion Hardener — Apply Report"
+ echo "Time: $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
+ echo "Host: $(hostname -f 2>/dev/null || hostname)"
+ echo "Changes: ${CHANGES}"
+ echo "Backup: ${backup_dir}"
+ } > "$report_file" 2>/dev/null || true
+ log "Writing audit report → ${report_file}"
+
+ log "Changes applied: ${CHANGES}, skipped: 0"
+ log "Backup directory: ${backup_dir}"
+ log "To rollback: ./${SCRIPT_NAME} --rollback"
+ log "Completed in $(elapsed)"
+}
+
+apply_settings() {
+ local config_file="$1"
+
+ local settings=(
+ "PermitRootLogin no"
+ "PasswordAuthentication no"
+ "ChallengeResponseAuthentication no"
+ "KbdInteractiveAuthentication no"
+ "PubkeyAuthentication yes"
+ "MaxAuthTries ${MAX_AUTH_TRIES}"
+ "MaxSessions ${MAX_SESSIONS}"
+ "ClientAliveInterval ${IDLE_TIMEOUT}"
+ "ClientAliveCountMax 2"
+ "X11Forwarding no"
+ "AllowTcpForwarding no"
+ "AllowAgentForwarding no"
+ "PermitTunnel no"
+ "Ciphers ${RECOMMENDED_CIPHERS}"
+ "MACs ${RECOMMENDED_MACS}"
+ "KexAlgorithms ${RECOMMENDED_KEX}"
+ "LoginGraceTime 30"
+ "LogLevel VERBOSE"
+ "Banner /etc/ssh/banner.txt"
+ )
+
+ for setting in "${settings[@]}"; do
+ local key value
+ key="${setting%% *}"
+ value="${setting#* }"
+ set_sshd_config "$key" "$value" "$config_file"
+ echo -e " ${GREEN}✓${RESET} ${key} → ${value}"
+ ((CHANGES++)) || true
+ done
+
+ # AllowUsers
+ if [[ -n "$ALLOW_USERS" ]]; then
+ local users_val="${ALLOW_USERS//,/ }"
+ set_sshd_config "AllowUsers" "$users_val" "$config_file"
+ echo -e " ${GREEN}✓${RESET} AllowUsers → ${users_val}"
+ ((CHANGES++)) || true
+ fi
+
+ # AllowGroups
+ if [[ -n "$ALLOW_GROUPS" ]]; then
+ local groups_val="${ALLOW_GROUPS//,/ }"
+ set_sshd_config "AllowGroups" "$groups_val" "$config_file"
+ echo -e " ${GREEN}✓${RESET} AllowGroups → ${groups_val}"
+ ((CHANGES++)) || true
+ fi
+}
+
+configure_fail2ban() {
+ if ! command -v fail2ban-client &>/dev/null; then
+ warn "fail2ban not installed — skipping jail configuration"
+ return
+ fi
+
+ local jail_file="/etc/fail2ban/jail.d/bastion-ssh.conf"
+ log "Configuring fail2ban SSH jail → ${jail_file}"
+
+ cat > "$jail_file" </dev/null; then
+ echo -e " ${GREEN}✓${RESET} fail2ban SSH jail configured and restarted"
+ else
+ warn "Could not restart fail2ban"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ROLLBACK MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_rollback() {
+ require_root
+
+ local target_dir="$ROLLBACK_DIR"
+
+ if [[ -z "$target_dir" ]]; then
+ # Find most recent backup
+ target_dir=$(find "$BACKUP_ROOT" -maxdepth 1 -type d -name "bastion-hardener-backup-*" 2>/dev/null | sort -r | head -1)
+ if [[ -z "$target_dir" ]]; then
+ die "No backup directories found in ${BACKUP_ROOT}"
+ fi
+ fi
+
+ if [[ ! -d "$target_dir" ]]; then
+ die "Backup directory not found: ${target_dir}"
+ fi
+
+ log "Restoring from ${target_dir}..."
+
+ if [[ -f "${target_dir}/sshd_config" ]]; then
+ cp -p "${target_dir}/sshd_config" "$SSHD_CONFIG"
+ echo -e " ${GREEN}✓${RESET} Restored sshd_config"
+ else
+ die "No sshd_config found in backup directory"
+ fi
+
+ if [[ -f "${target_dir}/banner.txt" ]]; then
+ cp -p "${target_dir}/banner.txt" /etc/ssh/banner.txt
+ echo -e " ${GREEN}✓${RESET} Restored banner.txt"
+ fi
+
+ # Validate
+ log "Validating restored configuration..."
+ if sshd -t -f "$SSHD_CONFIG" 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} sshd -t passed"
+ else
+ die "Restored config failed validation"
+ fi
+
+ # Restart
+ log "Restarting sshd..."
+ if systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} sshd restarted successfully"
+ else
+ warn "Could not restart sshd — restart manually"
+ fi
+
+ log "Rollback complete from ${target_dir}"
+ log "Completed in $(elapsed)"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HELP
+# ══════════════════════════════════════════════════════════════════════
+
+show_help() {
+ cat </dev/null || hostname)"
+ echo "Mode: ${RUN_MODE}"
+ echo "Time: $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
+ echo ""
+
+ case "$RUN_MODE" in
+ audit) do_audit ;;
+ apply) do_apply ;;
+ rollback) do_rollback ;;
+ esac
+}
+
+main "$@"
diff --git a/borg-backup-exporter.sh b/borg-backup-exporter.sh
new file mode 100755
index 0000000..70c1720
--- /dev/null
+++ b/borg-backup-exporter.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+################################################################################
+# Script Name: borg-backup-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for Borg backups — last archive time,
+# backup age, repo size, archive counts, and deduplication metrics
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - borg installed
+# - BORG_PASSPHRASE or BORG_PASSCOMMAND env set
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# ./borg-backup-exporter.sh --repo /mnt/backup --textfile
+# ./borg-backup-exporter.sh --repo /mnt/backup --http -p 9201
+# BORG_REPO=/mnt/backup ./borg-backup-exporter.sh
+#
+# Configuration:
+# Default HTTP port: 9201
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+EXPORTER_VERSION="1.0"
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9201
+REPOS=()
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+ if [ ${#REPOS[@]} -eq 0 ] && [ -n "$BORG_REPO" ]; then
+ REPOS+=("$BORG_REPO")
+ fi
+}
+
+check_borg() {
+ if ! command -v borg >/dev/null 2>&1; then
+ echo "# ERROR: borg not found" >&2
+ return 1
+ fi
+ return 0
+}
+
+generate_repo_metrics() {
+ local repo="$1"
+ local repo_label
+ repo_label=$(echo "$repo" | sed 's/[^a-zA-Z0-9_\/-]/_/g')
+
+ # Get repo info
+ local info_json
+ if ! info_json=$(borg info --json "$repo" 2>/dev/null) || [ -z "$info_json" ]; then
+ echo "# WARNING: could not read repo $repo" >&2
+ echo "borg_backup_up 0"
+ return
+ fi
+
+ local location
+ location=$(echo "$info_json" | jq -r '.repository.location // empty')
+ echo "borg_backup_repo_info{repo=\"$repo_label\",location=\"$location\"} 1"
+
+ # Repo size metrics from cache/stats
+ local total_size total_csize unique_csize
+ total_size=$(echo "$info_json" | jq '.cache.stats.total_size // 0')
+ total_csize=$(echo "$info_json" | jq '.cache.stats.total_csize // 0')
+ unique_csize=$(echo "$info_json" | jq '.cache.stats.unique_csize // 0')
+ echo "borg_backup_repo_total_size_bytes{repo=\"$repo_label\"} ${total_size:-0}"
+ echo "borg_backup_repo_total_csize_bytes{repo=\"$repo_label\"} ${total_csize:-0}"
+ echo "borg_backup_repo_unique_csize_bytes{repo=\"$repo_label\"} ${unique_csize:-0}"
+
+ # List archives
+ local list_json
+ list_json=$(borg list --json "$repo" 2>/dev/null)
+ local archive_count
+ archive_count=$(echo "$list_json" | jq '.archives | length')
+ echo "borg_backup_archive_count{repo=\"$repo_label\"} ${archive_count:-0}"
+
+ # Last archive metrics
+ local last_archive
+ last_archive=$(echo "$list_json" | jq -r '.archives | sort_by(.start) | last | .start // empty' 2>/dev/null)
+ if [ -n "$last_archive" ]; then
+ local last_unix
+ last_unix=$(date -d "$last_archive" +%s 2>/dev/null || echo 0)
+ local now
+ now=$(date +%s)
+ local age=$((now - last_unix))
+ echo "borg_backup_last_archive_timestamp{repo=\"$repo_label\"} $last_unix"
+ echo "borg_backup_last_archive_age_seconds{repo=\"$repo_label\"} $age"
+ else
+ echo "borg_backup_last_archive_timestamp{repo=\"$repo_label\"} 0"
+ echo "borg_backup_last_archive_age_seconds{repo=\"$repo_label\"} 0"
+ fi
+
+ # Last archive detailed info
+ local last_archive_name
+ last_archive_name=$(echo "$list_json" | jq -r '.archives | sort_by(.start) | last | .archive // empty' 2>/dev/null)
+ if [ -n "$last_archive_name" ]; then
+ local archive_info_json
+ archive_info_json=$(borg info --json "$repo::$last_archive_name" 2>/dev/null)
+ if [ -n "$archive_info_json" ]; then
+ local original_size dedup_size duration
+ original_size=$(echo "$archive_info_json" | jq '.archives[0].stats.original_size // 0')
+ dedup_size=$(echo "$archive_info_json" | jq '.archives[0].stats.deduplicated_size // 0')
+ duration=$(echo "$archive_info_json" | jq '.archives[0].duration // empty')
+ echo "borg_backup_last_archive_original_size_bytes{repo=\"$repo_label\"} ${original_size:-0}"
+ echo "borg_backup_last_archive_deduplicated_size_bytes{repo=\"$repo_label\"} ${dedup_size:-0}"
+ if [ -n "$duration" ] && [ "$duration" != "null" ]; then
+ echo "borg_backup_last_archive_duration_seconds{repo=\"$repo_label\"} $duration"
+ fi
+ fi
+ fi
+
+ # Borg check age (from log file if available)
+ local check_log="/var/log/borg-check.log"
+ if [ -f "$check_log" ]; then
+ local check_mtime
+ check_mtime=$(stat -c %Y "$check_log" 2>/dev/null)
+ if [ -n "$check_mtime" ]; then
+ local now
+ now=$(date +%s)
+ local check_age=$((now - check_mtime))
+ echo "borg_backup_repo_check_age_seconds{repo=\"$repo_label\"} $check_age"
+ fi
+ fi
+}
+
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ if ! check_borg; then
+ echo "# HELP borg_backup_up Exporter status (1=up, 0=down)"
+ echo "# TYPE borg_backup_up gauge"
+ echo "borg_backup_up 0"
+ return
+ fi
+
+ echo "# HELP borg_backup_up Exporter status (1=up, 0=down)"
+ echo "# TYPE borg_backup_up gauge"
+ echo "borg_backup_up 1"
+ echo "# HELP borg_backup_exporter_info Exporter version info"
+ echo "# TYPE borg_backup_exporter_info gauge"
+ echo "borg_backup_exporter_info{version=\"$EXPORTER_VERSION\"} 1"
+
+ # Collect all per-repo metric lines, then output grouped by metric name
+ local all_output
+ all_output=""
+ for repo in "${REPOS[@]}"; do
+ all_output+="$(generate_repo_metrics "$repo")"$'\n'
+ done
+
+ # Output each metric type with HELP/TYPE immediately before its values
+ local -a metric_names=(
+ "borg_backup_repo_info|Repository info"
+ "borg_backup_repo_total_size_bytes|Total deduplicated size in bytes"
+ "borg_backup_repo_total_csize_bytes|Total compressed size in bytes"
+ "borg_backup_repo_unique_csize_bytes|Unique compressed size (actual disk usage) in bytes"
+ "borg_backup_archive_count|Total number of archives"
+ "borg_backup_last_archive_timestamp|Unix timestamp of most recent archive"
+ "borg_backup_last_archive_age_seconds|Seconds since last archive"
+ "borg_backup_last_archive_original_size_bytes|Original size of last archive"
+ "borg_backup_last_archive_deduplicated_size_bytes|Deduplicated size of last archive"
+ "borg_backup_last_archive_duration_seconds|Duration of last archive"
+ "borg_backup_repo_check_age_seconds|Seconds since last borg check"
+ )
+
+ for entry in "${metric_names[@]}"; do
+ local mname="${entry%%|*}"
+ local mdesc="${entry#*|}"
+ local lines
+ lines=$(echo "$all_output" | grep "^${mname}[{[:space:]]" || true)
+ if [ -n "$lines" ]; then
+ echo "# HELP ${mname} ${mdesc}"
+ echo "# TYPE ${mname} gauge"
+ echo "$lines"
+ fi
+ done
+
+ local script_end script_duration
+ script_end=$(date +%s)
+ script_duration=$((script_end - script_start))
+ echo "# HELP borg_backup_exporter_duration_seconds Script execution time"
+ echo "# TYPE borg_backup_exporter_duration_seconds gauge"
+ echo "borg_backup_exporter_duration_seconds $script_duration"
+ echo "# HELP borg_backup_exporter_last_run_timestamp Last successful run"
+ echo "# TYPE borg_backup_exporter_last_run_timestamp gauge"
+ echo "borg_backup_exporter_last_run_timestamp $script_end"
+}
+
+run_http_server() {
+ echo "# Starting borg backup exporter on port $HTTP_PORT..." >&2
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "# ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+ while true; do
+ {
+ read -r request
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ echo "Borg Backup ExporterBorg Backup Prometheus Exporter
Metrics
"
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+main() {
+ parse_args "$@"
+ if [ "$HTTP_MODE" = true ]; then
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.borg_metrics.XXXXXX")
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "# ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+ chmod 644 "$temp_file"
+ mv -f "$temp_file" "$OUTPUT_FILE"
+ echo "# Metrics written to $OUTPUT_FILE" >&2
+ else
+ generate_metrics
+ fi
+}
+
+main "$@"
diff --git a/caprover-backup.sh b/caprover-backup.sh
new file mode 100644
index 0000000..0c9ee6e
--- /dev/null
+++ b/caprover-backup.sh
@@ -0,0 +1,398 @@
+#!/usr/bin/env bash
+# caprover-backup.sh — Comprehensive CapRover backup script
+# Author: Phil Connor
+# License: MIT
+# Version: 1.11
+#
+# Backs up /captain config, Docker volumes (captain-- prefixed),
+# and app definitions via CapRover API.
+# Supports local, NFS, and S3 (via aws cli or rclone) destinations.
+#
+# Migration mode (--migrate) stops all CapRover app containers before
+# backing up volumes, ensuring database-consistent snapshots. Produces
+# a single migration tarball for transfer to a new server.
+#
+# Usage:
+# ./caprover-backup.sh # local backup to /backups/caprover
+# ./caprover-backup.sh --migrate # full server migration (stops containers)
+# BACKUP_DEST=s3 S3_BUCKET=my-bucket ./caprover-backup.sh
+# BACKUP_DEST=rclone RCLONE_REMOTE=myremote:backups ./caprover-backup.sh
+# BACKUP_DEST=nfs NFS_MOUNT=/mnt/nfs/backups ./caprover-backup.sh
+
+set -euo pipefail
+
+# ---------------------------------------------------------------------------
+# Configuration — override via environment variables
+# ---------------------------------------------------------------------------
+BACKUP_DIR="${BACKUP_DIR:-/backups/caprover}"
+BACKUP_DEST="${BACKUP_DEST:-local}" # local | nfs | s3 | rclone
+RETENTION_DAYS="${RETENTION_DAYS:-30}"
+DATE=$(date +%Y%m%d-%H%M%S)
+LOG_FILE="${LOG_FILE:-/var/log/caprover-backup.log}"
+MIGRATE=false
+
+# CapRover API settings (for app definition export)
+CAPROVER_URL="${CAPROVER_URL:-https://captain.apps.example.com}"
+CAPROVER_PASSWORD="${CAPROVER_PASSWORD:-}"
+
+# S3 settings
+S3_BUCKET="${S3_BUCKET:-}"
+S3_PREFIX="${S3_PREFIX:-caprover-backups}"
+
+# rclone settings
+RCLONE_REMOTE="${RCLONE_REMOTE:-}"
+
+# NFS settings
+NFS_MOUNT="${NFS_MOUNT:-/mnt/nfs/backups}"
+
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+log() {
+ local msg
+ msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1"
+ echo "$msg" | tee -a "$LOG_FILE"
+}
+
+log_error() {
+ log "ERROR: $1"
+}
+
+# ---------------------------------------------------------------------------
+# Argument parsing
+# ---------------------------------------------------------------------------
+for arg in "$@"; do
+ case "$arg" in
+ --migrate) MIGRATE=true ;;
+ -h|--help)
+ sed -n '2,/^$/{ s/^# \?//; p }' "$0"
+ exit 0
+ ;;
+ esac
+done
+
+# ---------------------------------------------------------------------------
+# Pre-flight checks
+# ---------------------------------------------------------------------------
+preflight() {
+ if [ "$(id -u)" -ne 0 ]; then
+ log_error "Run as root."
+ exit 1
+ fi
+
+ if ! command -v docker &>/dev/null; then
+ log_error "Docker not found."
+ exit 1
+ fi
+
+ if [ "$BACKUP_DEST" = "s3" ] && ! command -v aws &>/dev/null; then
+ log_error "aws CLI not found. Install it or use BACKUP_DEST=rclone."
+ exit 1
+ fi
+
+ if [ "$BACKUP_DEST" = "rclone" ] && ! command -v rclone &>/dev/null; then
+ log_error "rclone not found."
+ exit 1
+ fi
+
+ mkdir -p "$BACKUP_DIR"
+ mkdir -p "$(dirname "$LOG_FILE")"
+}
+
+# ---------------------------------------------------------------------------
+# Backup /captain directory
+# ---------------------------------------------------------------------------
+backup_captain_config() {
+ log "Backing up /captain directory..."
+ local dest="${BACKUP_DIR}/captain-config-${DATE}.tar.gz"
+
+ if [ ! -d /captain ]; then
+ log_error "/captain directory not found. Is CapRover installed?"
+ return 1
+ fi
+
+ tar czf "$dest" -C / captain
+ log "Captain config saved: $dest ($(du -sh "$dest" | cut -f1))"
+}
+
+# ---------------------------------------------------------------------------
+# Backup Docker volumes (captain-- prefixed)
+# ---------------------------------------------------------------------------
+backup_volumes() {
+ log "Backing up Docker volumes..."
+ local volumes
+ volumes=$(docker volume ls -q | grep "^captain--" || true)
+
+ if [ -z "$volumes" ]; then
+ log "No captain-- volumes found. Skipping."
+ return 0
+ fi
+
+ for vol in $volumes; do
+ local app_name="${vol#captain--}"
+ local dest="${BACKUP_DIR}/vol-${app_name}-${DATE}.tar.gz"
+
+ log " Backing up volume: $vol"
+ docker run --rm \
+ -v "${vol}:/source:ro" \
+ -v "${BACKUP_DIR}:/backup" \
+ alpine tar czf "/backup/vol-${app_name}-${DATE}.tar.gz" -C /source .
+
+ log " Volume $vol saved: $dest ($(du -sh "$dest" | cut -f1))"
+ done
+}
+
+# ---------------------------------------------------------------------------
+# Export app definitions via CapRover API
+# ---------------------------------------------------------------------------
+export_app_definitions() {
+ if [ -z "$CAPROVER_PASSWORD" ]; then
+ log "CAPROVER_PASSWORD not set. Skipping API export."
+ return 0
+ fi
+
+ log "Exporting app definitions via CapRover API..."
+
+ # Get auth token
+ local token
+ token=$(curl -s -X POST "${CAPROVER_URL}/api/v2/login" \
+ -H "Content-Type: application/json" \
+ -H "x-namespace: captain" \
+ -d "{\"password\":\"${CAPROVER_PASSWORD}\"}" \
+ | python3 -c "import sys,json; print(json.load(sys.stdin)['data']['token'])" 2>/dev/null) || true
+
+ if [ -z "$token" ]; then
+ log_error "Failed to authenticate with CapRover API."
+ return 1
+ fi
+
+ # Export app definitions
+ local dest="${BACKUP_DIR}/app-definitions-${DATE}.json"
+ local http_code
+ http_code=$(curl -s -o "$dest" -w "%{http_code}" \
+ "${CAPROVER_URL}/api/v2/user/apps/appDefinitions" \
+ -H "Content-Type: application/json" \
+ -H "x-namespace: captain" \
+ -H "x-captain-auth: ${token}")
+
+ if [ "$http_code" = "200" ]; then
+ log "App definitions saved: $dest"
+ else
+ log_error "API returned HTTP $http_code. App definitions export failed."
+ rm -f "$dest"
+ return 1
+ fi
+}
+
+# ---------------------------------------------------------------------------
+# Upload to remote destination
+# ---------------------------------------------------------------------------
+upload_remote() {
+ case "$BACKUP_DEST" in
+ local)
+ log "Backup stored locally at $BACKUP_DIR"
+ ;;
+ nfs)
+ log "Copying backups to NFS mount: $NFS_MOUNT"
+ if ! mountpoint -q "$NFS_MOUNT" 2>/dev/null; then
+ log_error "$NFS_MOUNT is not mounted."
+ return 1
+ fi
+ mkdir -p "${NFS_MOUNT}/caprover"
+ cp "${BACKUP_DIR}"/*-"${DATE}"* "${NFS_MOUNT}/caprover/"
+ log "Copied to NFS."
+ ;;
+ s3)
+ log "Uploading backups to S3: s3://${S3_BUCKET}/${S3_PREFIX}/"
+ for f in "${BACKUP_DIR}"/*-"${DATE}"*; do
+ aws s3 cp "$f" "s3://${S3_BUCKET}/${S3_PREFIX}/$(basename "$f")" --quiet
+ done
+ log "S3 upload complete."
+ ;;
+ rclone)
+ log "Uploading backups via rclone to: $RCLONE_REMOTE"
+ for f in "${BACKUP_DIR}"/*-"${DATE}"*; do
+ rclone copy "$f" "$RCLONE_REMOTE" --quiet
+ done
+ log "rclone upload complete."
+ ;;
+ *)
+ log_error "Unknown BACKUP_DEST: $BACKUP_DEST"
+ return 1
+ ;;
+ esac
+}
+
+# ---------------------------------------------------------------------------
+# Retention — delete local backups older than RETENTION_DAYS
+# ---------------------------------------------------------------------------
+apply_retention() {
+ log "Applying retention policy: deleting backups older than ${RETENTION_DAYS} days..."
+ local count
+ count=$(find "$BACKUP_DIR" -name "*.tar.gz" -o -name "*.json" | \
+ xargs -I{} find {} -mtime +"$RETENTION_DAYS" 2>/dev/null | wc -l)
+
+ find "$BACKUP_DIR" \( -name "*.tar.gz" -o -name "*.json" \) \
+ -mtime +"$RETENTION_DAYS" -delete
+
+ log "Removed $count old backup file(s)."
+}
+
+# ---------------------------------------------------------------------------
+# Migration — stop all CapRover app containers
+# ---------------------------------------------------------------------------
+stop_captain_containers() {
+ log "Stopping all CapRover app containers..."
+ local containers
+ containers=$(docker ps -q --filter "label=com.docker.swarm.service.name" \
+ --filter "name=srv-captain--" 2>/dev/null || true)
+
+ if [ -z "$containers" ]; then
+ # fallback: stop services via Docker Swarm
+ local services
+ services=$(docker service ls -q --filter "name=srv-captain--" 2>/dev/null || true)
+ if [ -n "$services" ]; then
+ local count=0
+ for svc in $services; do
+ local svc_name
+ svc_name=$(docker service inspect --format '{{.Spec.Name}}' "$svc")
+ log " Scaling down: $svc_name"
+ docker service scale "$svc_name=0" --detach 2>/dev/null
+ ((count++)) || true
+ done
+ log "Scaled down $count service(s). Waiting 10s for graceful shutdown..."
+ sleep 10
+ else
+ log "No CapRover app services found."
+ fi
+ else
+ local count
+ count=$(echo "$containers" | wc -w)
+ docker stop $containers
+ log "Stopped $count container(s). Waiting 5s..."
+ sleep 5
+ fi
+}
+
+# ---------------------------------------------------------------------------
+# Migration — record service state for restore on new server
+# ---------------------------------------------------------------------------
+save_service_state() {
+ log "Saving Docker service state..."
+ local dest="${BACKUP_DIR}/service-state-${DATE}.json"
+ docker service ls --format '{{json .}}' > "$dest"
+ log "Service state saved: $dest"
+
+ # Save docker info for reference (Swarm tokens, node info)
+ local info_dest="${BACKUP_DIR}/docker-info-${DATE}.txt"
+ docker info > "$info_dest" 2>&1
+ docker node ls >> "$info_dest" 2>/dev/null || true
+ log "Docker info saved: $info_dest"
+}
+
+# ---------------------------------------------------------------------------
+# Migration — package everything into a single tarball
+# ---------------------------------------------------------------------------
+create_migration_bundle() {
+ local bundle="${BACKUP_DIR}/caprover-migration-${DATE}.tar.gz"
+ log "Creating migration bundle..."
+
+ # Collect all files from this run
+ tar czf "$bundle" -C "$BACKUP_DIR" \
+ $(ls -1 "$BACKUP_DIR" | grep "$DATE" | grep -v "caprover-migration")
+
+ local size
+ size=$(du -sh "$bundle" | cut -f1)
+ log "Migration bundle ready: $bundle ($size)"
+ log ""
+ log "========================================="
+ log " MIGRATION INSTRUCTIONS"
+ log "========================================="
+ log "1. Copy bundle to new server:"
+ log " scp $bundle root@new-server:/backups/"
+ log ""
+ log "2. On the new server, install CapRover:"
+ log " docker run -p 80:80 -p 443:443 -p 3000:3000 \\"
+ log " -e ACCEPTED_TERMS=true -v /captain:/captain \\"
+ log " caprover/caprover-edge"
+ log ""
+ log "3. Extract the bundle:"
+ log " mkdir -p /backups/restore && cd /backups/restore"
+ log " tar xzf caprover-migration-${DATE}.tar.gz"
+ log ""
+ log "4. Stop CapRover on new server:"
+ log " docker service rm captain-captain --force"
+ log ""
+ log "5. Restore /captain config:"
+ log " tar xzf captain-config-${DATE}.tar.gz -C /"
+ log ""
+ log "6. Restore Docker volumes:"
+ log " for f in vol-*-${DATE}.tar.gz; do"
+ log ' vol="captain--${f#vol-}"'
+ log ' vol="${vol%-'"${DATE}"'.tar.gz}"'
+ log " docker volume create \"\$vol\""
+ log " docker run --rm -v \"\${vol}:/dest\" -v \"\$(pwd):/backup:ro\" \\"
+ log " alpine sh -c \"tar xzf /backup/\$f -C /dest\""
+ log " done"
+ log ""
+ log "7. Start CapRover and re-deploy apps:"
+ log " docker run -p 80:80 -p 443:443 -p 3000:3000 \\"
+ log " -e ACCEPTED_TERMS=true -v /captain:/captain \\"
+ log " caprover/caprover-edge"
+ log ""
+ log "8. Update DNS to point to the new server IP."
+ log "========================================="
+}
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+main() {
+ if $MIGRATE; then
+ log "========================================="
+ log "CapRover MIGRATION backup — ${DATE}"
+ log "Mode: full migration (containers will be stopped)"
+ log "========================================="
+
+ preflight
+ local errors=0
+
+ export_app_definitions || { ((errors++)) || true; }
+ save_service_state || { ((errors++)) || true; }
+ stop_captain_containers
+ backup_captain_config || { ((errors++)) || true; }
+ backup_volumes || { ((errors++)) || true; }
+ create_migration_bundle
+
+ if [ "$errors" -gt 0 ]; then
+ log "Migration backup completed with $errors error(s)."
+ exit 1
+ else
+ log "Migration backup completed successfully."
+ log "Containers remain stopped. This server is ready to decommission."
+ fi
+ else
+ log "========================================="
+ log "CapRover backup started — ${DATE}"
+ log "Destination: ${BACKUP_DEST}"
+ log "========================================="
+
+ preflight
+ local errors=0
+
+ backup_captain_config || { ((errors++)) || true; }
+ backup_volumes || { ((errors++)) || true; }
+ export_app_definitions || { ((errors++)) || true; }
+ upload_remote || { ((errors++)) || true; }
+ apply_retention
+
+ if [ "$errors" -gt 0 ]; then
+ log "Backup completed with $errors error(s)."
+ exit 1
+ else
+ log "Backup completed successfully."
+ fi
+ fi
+}
+
+main "$@"
diff --git a/caprover-exporter.sh b/caprover-exporter.sh
new file mode 100644
index 0000000..fa19b32
--- /dev/null
+++ b/caprover-exporter.sh
@@ -0,0 +1,427 @@
+#!/bin/bash
+################################################################################
+# Script Name: caprover-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for CapRover PaaS providing operational
+# metrics via the CapRover API — app deployment status, container
+# health, resource usage, and platform metrics
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - CapRover installed and running
+# - CapRover API accessible (default: http://localhost:3000)
+# - curl for API calls
+# - jq for JSON parsing
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# ./caprover-exporter.sh # Output to stdout
+# ./caprover-exporter.sh --http -p 9196 # HTTP server mode
+# ./caprover-exporter.sh --textfile # Textfile collector mode
+# ./caprover-exporter.sh --password secret # Custom password
+#
+# Metrics Exported:
+# - caprover_up - API reachability (1=up, 0=down)
+# - caprover_info{version} - CapRover version info
+# - caprover_apps_total - Total app count
+# - caprover_apps_running - Running app count
+# - caprover_apps_stopped - Stopped app count
+# - caprover_app_running{app} - Per-app running status (1/0)
+# - caprover_app_instance_count{app} - Per-app replica count
+# - caprover_app_has_ssl{app} - Per-app SSL status (1/0)
+# - caprover_app_force_ssl{app} - Per-app force SSL status (1/0)
+# - caprover_nodes_total - Swarm node count
+# - caprover_volumes_total - Docker volume count
+# - caprover_disk_used_bytes - Disk usage in bytes
+# - caprover_disk_total_bytes - Total disk in bytes
+# - caprover_exporter_duration_seconds - Script execution time
+# - caprover_exporter_last_run_timestamp - Last run timestamp
+#
+# Configuration:
+# Default HTTP port: 9196
+# Default CapRover URL: http://localhost:3000
+# Default password: captain42
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9196
+CAPROVER_URL="http://localhost:3000"
+CAPROVER_PASSWORD="captain42"
+AUTH_TOKEN=""
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+check_prerequisites() {
+ if ! command -v curl >/dev/null 2>&1; then
+ echo "ERROR: curl not found" >&2; return 1
+ fi
+ if ! command -v jq >/dev/null 2>&1; then
+ echo "ERROR: jq not found (required for JSON parsing)" >&2; return 1
+ fi
+ return 0
+}
+
+prom_escape() {
+ local val="$1"
+ val="${val//\\/\\\\}"
+ val="${val//\"/\\\"}"
+ val="${val//$'\n'/}"
+ echo "$val"
+}
+
+authenticate() {
+ if [ -n "$AUTH_TOKEN" ]; then return 0; fi
+
+ local response
+ response=$(curl -s -X POST \
+ -H "Content-Type: application/json" \
+ -H "x-namespace: captain" \
+ -d "{\"password\":\"${CAPROVER_PASSWORD}\"}" \
+ "${CAPROVER_URL}/api/v2/login" 2>/dev/null) || return 1
+
+ local status
+ status=$(echo "$response" | jq -r '.status // 0' 2>/dev/null)
+ if [ "$status" != "100" ]; then
+ echo "ERROR: Failed to authenticate with CapRover API" >&2; return 1
+ fi
+
+ AUTH_TOKEN=$(echo "$response" | jq -r '.data.token // empty' 2>/dev/null)
+ if [ -z "$AUTH_TOKEN" ]; then
+ echo "ERROR: No auth token received from CapRover API" >&2; return 1
+ fi
+ return 0
+}
+
+api_call() {
+ local endpoint="$1"
+ curl -s -X POST \
+ -H "Content-Type: application/json" \
+ -H "x-namespace: captain" \
+ -H "x-captain-auth: ${AUTH_TOKEN}" \
+ -d "{}" \
+ "${CAPROVER_URL}${endpoint}" 2>/dev/null
+}
+
+api_get() {
+ local endpoint="$1"
+ curl -s -X GET \
+ -H "Content-Type: application/json" \
+ -H "x-namespace: captain" \
+ -H "x-captain-auth: ${AUTH_TOKEN}" \
+ "${CAPROVER_URL}${endpoint}" 2>/dev/null
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ if ! check_prerequisites; then
+ echo "# HELP caprover_up CapRover API reachability (1=up, 0=down)"
+ echo "# TYPE caprover_up gauge"
+ echo "caprover_up 0"
+ return
+ fi
+
+ AUTH_TOKEN=""
+ if ! authenticate; then
+ echo "# HELP caprover_up CapRover API reachability (1=up, 0=down)"
+ echo "# TYPE caprover_up gauge"
+ echo "caprover_up 0"
+ return
+ fi
+
+ cat </dev/null)
+ if [ "$info_status" = "100" ]; then
+ caprover_version=$(echo "$system_info" | jq -r '.data.caproverVersion // "unknown"' 2>/dev/null)
+ node_count=$(echo "$system_info" | jq -r '.data.swarmNodesCount // 0' 2>/dev/null)
+ node_count=${node_count:-0}
+ disk_used=$(echo "$system_info" | jq -r '.data.diskUsedInMb // 0' 2>/dev/null)
+ disk_total=$(echo "$system_info" | jq -r '.data.diskTotalInMb // 0' 2>/dev/null)
+ disk_used=${disk_used:-0}; disk_total=${disk_total:-0}
+ [ "$disk_used" != "0" ] && disk_used=$((disk_used * 1024 * 1024))
+ [ "$disk_total" != "0" ] && disk_total=$((disk_total * 1024 * 1024))
+ volume_count=$(echo "$system_info" | jq -r '.data.dockerVolumesCount // 0' 2>/dev/null)
+ volume_count=${volume_count:-0}
+ fi
+ fi
+
+ cat </dev/null)
+
+ if [ "$apps_status" = "100" ]; then
+ total_apps=$(echo "$apps_response" | jq '.data.appDefinitions | length // 0' 2>/dev/null)
+ total_apps=${total_apps:-0}
+ running_apps=$(echo "$apps_response" | jq '[.data.appDefinitions[] | select(.deployedVersion != null and .deployedVersion != 0)] | length' 2>/dev/null)
+ stopped_apps=$(echo "$apps_response" | jq '[.data.appDefinitions[] | select(.deployedVersion == null or .deployedVersion == 0)] | length' 2>/dev/null)
+ running_apps=${running_apps:-0}; stopped_apps=${stopped_apps:-0}
+
+ cat </dev/null | while read -r name val; do
+ [ -z "$name" ] && continue
+ echo "caprover_app_running{app=\"$(prom_escape "$name")\"} $val"
+ done
+ echo ""
+
+ # Per-app instance count
+ echo "# HELP caprover_app_instance_count Number of replicas per app"
+ echo "# TYPE caprover_app_instance_count gauge"
+ echo "$apps_response" | jq -r '.data.appDefinitions[] | "\(.appName) \(.instanceCount // 1)"' 2>/dev/null | while read -r name val; do
+ [ -z "$name" ] && continue
+ echo "caprover_app_instance_count{app=\"$(prom_escape "$name")\"} $val"
+ done
+ echo ""
+
+ # Per-app SSL status
+ echo "# HELP caprover_app_has_ssl SSL enabled per app (1=yes, 0=no)"
+ echo "# TYPE caprover_app_has_ssl gauge"
+ echo "$apps_response" | jq -r '.data.appDefinitions[] | "\(.appName) \(if .hasDefaultSubDomainSsl == true then 1 else 0 end)"' 2>/dev/null | while read -r name val; do
+ [ -z "$name" ] && continue
+ echo "caprover_app_has_ssl{app=\"$(prom_escape "$name")\"} $val"
+ done
+ echo ""
+
+ # Per-app force SSL status
+ echo "# HELP caprover_app_force_ssl Force SSL per app (1=yes, 0=no)"
+ echo "# TYPE caprover_app_force_ssl gauge"
+ echo "$apps_response" | jq -r '.data.appDefinitions[] | "\(.appName) \(if .forceSsl == true then 1 else 0 end)"' 2>/dev/null | while read -r name val; do
+ [ -z "$name" ] && continue
+ echo "caprover_app_force_ssl{app=\"$(prom_escape "$name")\"} $val"
+ done
+ else
+ echo "# HELP caprover_apps_total Total number of deployed apps"
+ echo "# TYPE caprover_apps_total gauge"
+ echo "caprover_apps_total 0"
+ fi
+ else
+ echo "# HELP caprover_apps_total Total number of deployed apps"
+ echo "# TYPE caprover_apps_total gauge"
+ echo "caprover_apps_total 0"
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # EXPORTER RUNTIME
+ # ========================================================================
+
+ local script_end script_duration
+ script_end=$(date +%s)
+ script_duration=$((script_end - script_start))
+
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ while true; do
+ {
+ read -r request
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+CapRover Exporter v1.0
+
+CapRover Prometheus Exporter v1.0
+Metrics
+Operational metrics from the CapRover API.
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ if [ "$HTTP_MODE" = true ]; then
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.caprover_metrics.XXXXXX")
+
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 10 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ chmod 644 "$temp_file"
+ mv -f "$temp_file" "$OUTPUT_FILE"
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ generate_metrics
+ fi
+}
+
+main "$@"
diff --git a/caprover-smoke-tests.sh b/caprover-smoke-tests.sh
new file mode 100755
index 0000000..36464c6
--- /dev/null
+++ b/caprover-smoke-tests.sh
@@ -0,0 +1,518 @@
+#!/bin/bash
+################################################################################
+# Script Name: caprover-smoke-tests.sh
+# Version: 1.01
+# Description: Smoke test suite for CapRover PaaS — validates API health,
+# app deployment lifecycle, SSL certificates, Docker Swarm status,
+# and resource usage
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - bash 4+
+# - curl
+# - jq
+# - openssl (for SSL checks)
+#
+# Usage:
+# export CAPROVER_URL="https://captain.apps.example.com"
+# export CAPROVER_PASSWORD="your-password"
+# ./caprover-smoke-tests.sh
+# ./caprover-smoke-tests.sh --skip-app --skip-ssl
+# ./caprover-smoke-tests.sh --format tap
+# ./caprover-smoke-tests.sh --format junit --junit-file results.xml
+#
+################################################################################
+
+set -euo pipefail
+
+# --- Defaults ---
+CAPROVER_URL="${CAPROVER_URL:-}"
+CAPROVER_PASSWORD="${CAPROVER_PASSWORD:-}"
+CURL_TIMEOUT="${CURL_TIMEOUT:-10}"
+CURL_INSECURE="${CURL_INSECURE:-false}"
+SKIP_APP="${SKIP_APP_LIFECYCLE:-false}"
+SKIP_SSL="${SKIP_SSL:-false}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+JUNIT_FILE="${JUNIT_FILE:-smoke-results.xml}"
+VERBOSE=false
+USE_COLOR=true
+AUTH_TOKEN=""
+TEST_APP_NAME=""
+PASSED=0
+FAILED=0
+SKIPPED=0
+START_TIME=""
+CURL_OPTS=()
+JUNIT_RESULTS=()
+TAP_RESULTS=()
+TEST_NUM=0
+
+# --- Colors ---
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+usage() {
+ cat <&2
+ exit 1
+fi
+if [[ -z "$CAPROVER_PASSWORD" ]]; then
+ echo "Error: CAPROVER_PASSWORD is required" >&2
+ exit 1
+fi
+
+# Strip trailing slash
+CAPROVER_URL="${CAPROVER_URL%/}"
+
+# --- Helpers ---
+debug() {
+ if [[ "$VERBOSE" == "true" ]]; then
+ echo -e " ${CYAN}[debug]${NC} $*" >&2
+ fi
+}
+
+api_call() {
+ local method="$1" endpoint="$2"
+ shift 2
+ local url="${CAPROVER_URL}${endpoint}"
+ debug "curl -s -X $method $url"
+ curl -s -X "$method" \
+ --connect-timeout "$CURL_TIMEOUT" \
+ --max-time "$((CURL_TIMEOUT * 3))" \
+ -H "Content-Type: application/json" \
+ -H "x-captain-auth: ${AUTH_TOKEN}" \
+ "${CURL_OPTS[@]}" \
+ "$url" "$@"
+}
+
+pass() {
+ local suite="$1" msg="$2"
+ ((TEST_NUM++)) || true
+ ((PASSED++)) || true
+ case "$OUTPUT_FORMAT" in
+ tap) TAP_RESULTS+=("ok $TEST_NUM - [$suite] $msg") ;;
+ junit) JUNIT_RESULTS+=("") ;;
+ *) echo -e " ${GREEN}✓${NC} $msg" ;;
+ esac
+}
+
+fail() {
+ local suite="$1" msg="$2" detail="${3:-}"
+ ((TEST_NUM++)) || true
+ ((FAILED++)) || true
+ case "$OUTPUT_FORMAT" in
+ tap) TAP_RESULTS+=("not ok $TEST_NUM - [$suite] $msg") ;;
+ junit) JUNIT_RESULTS+=("$detail") ;;
+ *) echo -e " ${RED}✗${NC} $msg${detail:+ — $detail}" ;;
+ esac
+}
+
+skip() {
+ local suite="$1" msg="$2"
+ ((TEST_NUM++)) || true
+ ((SKIPPED++)) || true
+ case "$OUTPUT_FORMAT" in
+ tap) TAP_RESULTS+=("ok $TEST_NUM - [$suite] $msg # SKIP") ;;
+ junit) JUNIT_RESULTS+=("") ;;
+ *) echo -e " ${YELLOW}⊘${NC} $msg — skipped" ;;
+ esac
+}
+
+suite_header() {
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e "\n${BOLD}$1${NC}"
+ fi
+}
+
+# --- Cleanup ---
+cleanup() {
+ if [[ -n "$TEST_APP_NAME" && -n "$AUTH_TOKEN" ]]; then
+ debug "Cleaning up test app: $TEST_APP_NAME"
+ api_call POST "/api/v2/user/apps/appDefinitions/delete" \
+ -d "{\"appName\":\"$TEST_APP_NAME\"}" >/dev/null 2>&1 || true
+ TEST_APP_NAME=""
+ fi
+}
+trap cleanup EXIT INT TERM
+
+# --- Header ---
+START_TIME=$(date +%s)
+if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e "${BOLD}CapRover Smoke Tests${NC}"
+ echo "Target: $CAPROVER_URL"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+fi
+
+# =====================================================
+# Suite 1: Connectivity
+# =====================================================
+suite_header "Connectivity"
+
+http_code=$(curl -s -o /dev/null -w "%{http_code}" \
+ --connect-timeout "$CURL_TIMEOUT" \
+ "${CURL_OPTS[@]}" \
+ "$CAPROVER_URL/" 2>/dev/null || echo "000")
+
+if [[ "$http_code" =~ ^(200|302)$ ]]; then
+ pass "Connectivity" "Dashboard reachable — HTTP $http_code"
+else
+ fail "Connectivity" "Dashboard unreachable" "HTTP $http_code"
+fi
+
+api_code=$(curl -s -o /dev/null -w "%{http_code}" \
+ --connect-timeout "$CURL_TIMEOUT" \
+ "${CURL_OPTS[@]}" \
+ "$CAPROVER_URL/api/v2/user/system/info" 2>/dev/null || echo "000")
+
+if [[ "$api_code" != "000" ]]; then
+ pass "Connectivity" "API endpoint responding — HTTP $api_code"
+else
+ fail "Connectivity" "API endpoint not responding"
+fi
+
+# =====================================================
+# Suite 2: API
+# =====================================================
+suite_header "API"
+
+login_response=$(curl -s -X POST \
+ --connect-timeout "$CURL_TIMEOUT" \
+ --max-time "$((CURL_TIMEOUT * 3))" \
+ -H "Content-Type: application/json" \
+ "${CURL_OPTS[@]}" \
+ "$CAPROVER_URL/api/v2/login" \
+ -d "{\"password\":\"$CAPROVER_PASSWORD\"}" 2>/dev/null || echo "{}")
+
+debug "Login response: $login_response"
+
+AUTH_TOKEN=$(echo "$login_response" | jq -r '.data.token // empty' 2>/dev/null || true)
+
+if [[ -n "$AUTH_TOKEN" ]]; then
+ pass "API" "API login — authenticated successfully"
+else
+ fail "API" "API login failed" "Could not obtain auth token"
+ # Cannot continue without auth
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e "\n${RED}Cannot continue without authentication. Aborting.${NC}"
+ fi
+ exit 1
+fi
+
+# App definitions
+app_response=$(api_call GET "/api/v2/user/apps/appDefinitions" 2>/dev/null || echo "{}")
+app_count=$(echo "$app_response" | jq -r '.data.appDefinitions | length // 0' 2>/dev/null || echo "0")
+status_code=$(echo "$app_response" | jq -r '.status // 0' 2>/dev/null || echo "0")
+
+if [[ "$status_code" == "100" ]]; then
+ pass "API" "App definitions — $app_count apps found"
+else
+ fail "API" "App definitions" "Unexpected status: $status_code"
+fi
+
+# Version
+version_info=$(api_call GET "/api/v2/user/system/versioninfo" 2>/dev/null || echo "{}")
+cr_version=$(echo "$version_info" | jq -r '.data.currentVersion // "unknown"' 2>/dev/null || echo "unknown")
+
+if [[ "$cr_version" != "unknown" ]]; then
+ pass "API" "CapRover version — $cr_version"
+else
+ fail "API" "CapRover version" "Could not retrieve version"
+fi
+
+# System info
+sys_response=$(api_call GET "/api/v2/user/system/info" 2>/dev/null || echo "{}")
+sys_status=$(echo "$sys_response" | jq -r '.status // 0' 2>/dev/null || echo "0")
+
+if [[ "$sys_status" == "100" ]]; then
+ pass "API" "System info — retrieved successfully"
+else
+ fail "API" "System info" "Unexpected status: $sys_status"
+fi
+
+# =====================================================
+# Suite 3: App Lifecycle
+# =====================================================
+if [[ "$SKIP_APP" == "true" ]]; then
+ suite_header "App Lifecycle"
+ skip "App Lifecycle" "Create test app"
+ skip "App Lifecycle" "Deploy image"
+ skip "App Lifecycle" "App responding"
+ skip "App Lifecycle" "Delete test app"
+else
+ suite_header "App Lifecycle"
+
+ TEST_APP_NAME="smoke-test-$(date +%s)"
+ debug "Test app name: $TEST_APP_NAME"
+
+ # Create app
+ create_response=$(api_call POST "/api/v2/user/apps/appDefinitions/register" \
+ -d "{\"appName\":\"$TEST_APP_NAME\",\"hasPersistentData\":false}" 2>/dev/null || echo "{}")
+ create_status=$(echo "$create_response" | jq -r '.status // 0' 2>/dev/null || echo "0")
+
+ if [[ "$create_status" == "100" ]]; then
+ pass "App Lifecycle" "Create test app — $TEST_APP_NAME"
+ else
+ fail "App Lifecycle" "Create test app" "$(echo "$create_response" | jq -r '.description // "unknown error"' 2>/dev/null)"
+ skip "App Lifecycle" "Deploy image"
+ skip "App Lifecycle" "App responding"
+ skip "App Lifecycle" "Delete test app"
+ TEST_APP_NAME=""
+ SKIP_APP=true
+ fi
+
+ if [[ "$SKIP_APP" != "true" ]]; then
+ # Deploy image
+ deploy_response=$(api_call POST "/api/v2/user/apps/appData/$TEST_APP_NAME" \
+ -d "{\"captainDefinitionContent\":\"{\\\"schemaVersion\\\":2,\\\"imageName\\\":\\\"nginxdemos/hello\\\"}\"}" 2>/dev/null || echo "{}")
+ deploy_status=$(echo "$deploy_response" | jq -r '.status // 0' 2>/dev/null || echo "0")
+
+ if [[ "$deploy_status" == "100" ]]; then
+ pass "App Lifecycle" "Deploy image — nginxdemos/hello deployed"
+ else
+ fail "App Lifecycle" "Deploy image" "$(echo "$deploy_response" | jq -r '.description // "deploy failed"' 2>/dev/null)"
+ fi
+
+ # Wait for app to be running (up to 60 seconds)
+ app_ready=false
+ for i in $(seq 1 12); do
+ sleep 5
+ debug "Waiting for app to start... attempt $i/12"
+ check=$(api_call GET "/api/v2/user/apps/appDefinitions" 2>/dev/null || echo "{}")
+ is_running=$(echo "$check" | jq -r ".data.appDefinitions[] | select(.appName==\"$TEST_APP_NAME\") | .isAppBuilding" 2>/dev/null || echo "true")
+ if [[ "$is_running" == "false" ]]; then
+ app_ready=true
+ break
+ fi
+ done
+
+ # Extract root domain from CapRover URL to build app URL
+ root_domain=$(echo "$CAPROVER_URL" | sed -E 's|https?://captain\.||')
+ app_url="http://${TEST_APP_NAME}.${root_domain}"
+ debug "App URL: $app_url"
+
+ if [[ "$app_ready" == "true" ]]; then
+ # Give nginx a moment to reconfigure
+ sleep 3
+ app_http=$(curl -s -o /dev/null -w "%{http_code}" \
+ --connect-timeout "$CURL_TIMEOUT" \
+ "${CURL_OPTS[@]}" \
+ "$app_url" 2>/dev/null || echo "000")
+
+ if [[ "$app_http" == "200" ]]; then
+ pass "App Lifecycle" "App responding — HTTP 200 at ${TEST_APP_NAME}.${root_domain}"
+ else
+ fail "App Lifecycle" "App responding" "HTTP $app_http at $app_url"
+ fi
+ else
+ fail "App Lifecycle" "App responding" "Timed out waiting for app to start"
+ fi
+
+ # Delete test app
+ delete_response=$(api_call POST "/api/v2/user/apps/appDefinitions/delete" \
+ -d "{\"appName\":\"$TEST_APP_NAME\"}" 2>/dev/null || echo "{}")
+ delete_status=$(echo "$delete_response" | jq -r '.status // 0' 2>/dev/null || echo "0")
+
+ if [[ "$delete_status" == "100" ]]; then
+ pass "App Lifecycle" "Delete test app — cleaned up"
+ TEST_APP_NAME=""
+ else
+ fail "App Lifecycle" "Delete test app" "Manual cleanup may be required"
+ fi
+ fi
+fi
+
+# =====================================================
+# Suite 4: SSL
+# =====================================================
+if [[ "$SKIP_SSL" == "true" ]]; then
+ suite_header "SSL"
+ skip "SSL" "TLS certificate valid"
+ skip "SSL" "Certificate chain complete"
+else
+ suite_header "SSL"
+
+ # Extract hostname from URL
+ cr_host=$(echo "$CAPROVER_URL" | sed -E 's|https?://||;s|/.*||;s|:.*||')
+ cr_port=$(echo "$CAPROVER_URL" | grep -oP ':\K[0-9]+' || echo "443")
+
+ if [[ "$CAPROVER_URL" == https://* ]]; then
+ cert_output=$(echo | openssl s_client -servername "$cr_host" -connect "${cr_host}:${cr_port}" 2>/dev/null || true)
+ cert_enddate=$(echo "$cert_output" | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2 || true)
+
+ if [[ -n "$cert_enddate" ]]; then
+ expiry_epoch=$(date -d "$cert_enddate" +%s 2>/dev/null || echo "0")
+ now_epoch=$(date +%s)
+ days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
+
+ if [[ "$days_left" -gt 0 ]]; then
+ pass "SSL" "TLS certificate valid — $days_left days remaining"
+ else
+ fail "SSL" "TLS certificate expired" "$days_left days past expiry"
+ fi
+ else
+ fail "SSL" "TLS certificate valid" "Could not read certificate"
+ fi
+
+ # Check chain
+ verify_result=$(echo | openssl s_client -servername "$cr_host" -connect "${cr_host}:${cr_port}" 2>&1 | grep "Verify return code" || true)
+ if echo "$verify_result" | grep -q "0 (ok)"; then
+ pass "SSL" "Certificate chain complete"
+ else
+ fail "SSL" "Certificate chain complete" "$verify_result"
+ fi
+ else
+ skip "SSL" "TLS certificate valid"
+ skip "SSL" "Certificate chain complete"
+ fi
+fi
+
+# =====================================================
+# Suite 5: Docker Swarm
+# =====================================================
+suite_header "Docker Swarm"
+
+node_count=$(echo "$sys_response" | jq -r '.data.swarmNodesCount // "unknown"' 2>/dev/null || echo "unknown")
+
+if [[ "$node_count" != "unknown" && "$node_count" -gt 0 ]] 2>/dev/null; then
+ pass "Docker Swarm" "Swarm active — $node_count node(s)"
+else
+ fail "Docker Swarm" "Swarm status" "Could not determine node count"
+fi
+
+# Count running services from app definitions
+running_count=$(echo "$app_response" | jq '[.data.appDefinitions[] | select(.isAppBuilding == false)] | length' 2>/dev/null || echo "0")
+total_count=$(echo "$app_response" | jq '.data.appDefinitions | length' 2>/dev/null || echo "0")
+# Add 3 for captain-captain, captain-nginx, captain-certbot
+service_count=$((running_count + 3))
+
+pass "Docker Swarm" "Services running — $service_count services ($total_count apps + 3 system)"
+
+# =====================================================
+# Suite 6: Resources
+# =====================================================
+suite_header "Resources"
+
+disk_used=$(echo "$sys_response" | jq -r '.data.diskUsedPercentage // "unknown"' 2>/dev/null || echo "unknown")
+
+if [[ "$disk_used" != "unknown" ]]; then
+ pass "Resources" "Disk usage — ${disk_used}%"
+else
+ fail "Resources" "Disk usage" "Could not retrieve disk info"
+fi
+
+volume_count=$(echo "$sys_response" | jq -r '.data.dockerVolumes | length // "unknown"' 2>/dev/null || echo "unknown")
+if [[ "$volume_count" != "unknown" ]]; then
+ pass "Resources" "Docker volumes — $volume_count volumes"
+else
+ # Volumes may not be in system info, skip gracefully
+ skip "Resources" "Docker volumes"
+fi
+
+image_count=$(echo "$sys_response" | jq -r '.data.dockerImages | length // "unknown"' 2>/dev/null || echo "unknown")
+if [[ "$image_count" != "unknown" ]]; then
+ pass "Resources" "Docker images — $image_count images"
+else
+ skip "Resources" "Docker images"
+fi
+
+# =====================================================
+# Summary
+# =====================================================
+END_TIME=$(date +%s)
+DURATION=$((END_TIME - START_TIME))
+
+case "$OUTPUT_FORMAT" in
+ tap)
+ echo "TAP version 13"
+ echo "1..$TEST_NUM"
+ for line in "${TAP_RESULTS[@]}"; do
+ echo "$line"
+ done
+ echo "# passed: $PASSED"
+ echo "# failed: $FAILED"
+ echo "# skipped: $SKIPPED"
+ echo "# duration: ${DURATION}s"
+ ;;
+ junit)
+ {
+ echo ''
+ echo ""
+ echo " "
+ echo " "
+ echo " "
+ for result in "${JUNIT_RESULTS[@]}"; do
+ echo " $result"
+ done
+ echo ""
+ } > "$JUNIT_FILE"
+ echo "JUnit results written to $JUNIT_FILE"
+ ;;
+ *)
+ echo ""
+ echo "────────────────────────────────────────"
+ echo -e "Summary ${BOLD}$CAPROVER_URL${NC}"
+ echo -e " ${GREEN}$PASSED passed${NC} ${RED}$FAILED failed${NC} ${YELLOW}$SKIPPED skipped${NC} (${DURATION}s)"
+ echo "────────────────────────────────────────"
+ if [[ "$FAILED" -eq 0 ]]; then
+ echo -e "${GREEN}All tests passed.${NC}"
+ else
+ echo -e "${RED}Some tests failed.${NC}"
+ fi
+ ;;
+esac
+
+exit $((FAILED > 0 ? 1 : 0))
diff --git a/certificate-smoke-tests.sh b/certificate-smoke-tests.sh
new file mode 100755
index 0000000..9d02c0e
--- /dev/null
+++ b/certificate-smoke-tests.sh
@@ -0,0 +1,650 @@
+#!/usr/bin/env bash
+
+#####################################################################################
+#### certificate-smoke-tests.sh — Verify TLS certificates are healthy ####
+#### Checks expiry, chain, OCSP, TLS version, ciphers, SAN, on-disk certs. ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.0 ####
+#### ####
+#### Usage: TARGETS="example.com:443" ./certificate-smoke-tests.sh ####
+#### CERT_FILES="/etc/ssl/certs/app.pem" ./certificate-smoke-tests.sh ####
+#### ####
+#### See --help for all options. ####
+#####################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+TARGETS="${TARGETS:-}"
+WARN_DAYS="${WARN_DAYS:-30}"
+CRITICAL_DAYS="${CRITICAL_DAYS:-7}"
+CERT_FILES="${CERT_FILES:-}"
+CERT_FILE="${CERT_FILE:-}"
+KEY_FILE="${KEY_FILE:-}"
+CHECK_OCSP="${CHECK_OCSP:-true}"
+CHECK_TLS_VERSION="${CHECK_TLS_VERSION:-true}"
+CHECK_HSTS="${CHECK_HSTS:-true}"
+REJECT_SELF_SIGNED="${REJECT_SELF_SIGNED:-false}"
+SKIP_CHAIN="${SKIP_CHAIN:-false}"
+SKIP_OCSP="${SKIP_OCSP:-false}"
+SKIP_TLS_VERSION="${SKIP_TLS_VERSION:-false}"
+CONNECT_TIMEOUT="${CONNECT_TIMEOUT:-10}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+COLOR="${COLOR:-auto}"
+VERBOSE="${VERBOSE:-false}"
+
+# ── State ─────────────────────────────────────────────────────────────
+PASS=0; FAIL=0; SKIP=0; TOTAL=0
+RESULTS=()
+START_TIME=""
+CERT_TMP=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; }
+
+# ── Test Result Recording ─────────────────────────────────────────────
+record_pass() {
+ local name="$1" detail="${2:-}"
+ ((PASS++)) || true; ((TOTAL++)) || true
+ RESULTS+=("PASS|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name}${detail:+ (${detail})}"
+ else echo -e " ${GREEN}✓${RESET} ${name}${detail:+ — ${detail}}"; fi
+}
+
+record_fail() {
+ local name="$1" detail="${2:-}"
+ ((FAIL++)) || true; ((TOTAL++)) || true
+ RESULTS+=("FAIL|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "not ok ${TOTAL} - ${name}"
+ [[ -n "$detail" ]] && echo " # ${detail}"
+ else echo -e " ${RED}✗${RESET} ${name}${detail:+ — ${detail}}"; fi
+}
+
+record_skip() {
+ local name="$1" reason="${2:-}"
+ ((SKIP++)) || true; ((TOTAL++)) || true
+ RESULTS+=("SKIP|${name}|${reason}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name} # SKIP ${reason}"
+ else echo -e " ${YELLOW}⊘${RESET} ${name}${reason:+ — ${reason}}"; fi
+}
+
+# ── Helpers ───────────────────────────────────────────────────────────
+has_cmd() { command -v "$1" >/dev/null 2>&1; }
+
+section() {
+ if [[ "$OUTPUT_FORMAT" != "tap" ]]; then echo ""; echo -e "${BOLD}$1${RESET}"; fi
+}
+
+# ── Cleanup ───────────────────────────────────────────────────────────
+# shellcheck disable=SC2317
+cleanup() {
+ [[ -n "${CERT_TMP}" && -d "${CERT_TMP}" ]] && rm -rf "${CERT_TMP}"
+}
+trap cleanup EXIT
+
+# ══════════════════════════════════════════════════════════════════════
+# HELPER FUNCTIONS
+# ══════════════════════════════════════════════════════════════════════
+
+# Fetch certificate from a remote host:port, store in temp file
+# Returns path to PEM file on stdout, empty on failure
+fetch_cert() {
+ local host="$1" port="$2" pem_file
+ pem_file="${CERT_TMP}/${host}_${port}.pem"
+ verbose "Fetching certificate from ${host}:${port}"
+ if echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" \
+ -servername "${host}" \
+ -showcerts /dev/null \
+ | openssl x509 -outform PEM > "${pem_file}" 2>/dev/null; then
+ if [[ -s "${pem_file}" ]]; then
+ echo "${pem_file}"
+ return 0
+ fi
+ fi
+ return 1
+}
+
+# Fetch full chain from remote host:port
+fetch_chain() {
+ local host="$1" port="$2" chain_file
+ chain_file="${CERT_TMP}/${host}_${port}_chain.pem"
+ echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" \
+ -servername "${host}" \
+ -showcerts "${CERT_TMP}/s_client_err.txt" \
+ > "${chain_file}" 2>/dev/null || true
+ if [[ -s "${chain_file}" ]]; then
+ echo "${chain_file}"
+ fi
+}
+
+# Get days until certificate expires
+# Args: path to PEM file
+# Returns: integer days (negative = already expired)
+cert_days_remaining() {
+ local pem_file="$1"
+ local end_date epoch_end epoch_now
+ end_date=$(openssl x509 -in "${pem_file}" -noout -enddate 2>/dev/null | sed 's/notAfter=//') || return 1
+ epoch_end=$(date -d "${end_date}" +%s 2>/dev/null) || return 1
+ epoch_now=$(date +%s)
+ echo $(( (epoch_end - epoch_now) / 86400 ))
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# TEST FUNCTIONS
+# ══════════════════════════════════════════════════════════════════════
+
+# ── Certificate Expiry ───────────────────────────────────────────────
+test_cert_expiry() {
+ local host="$1" port="$2" pem_file
+ pem_file=$(fetch_cert "$host" "$port") || {
+ record_fail "Certificate expiry (${host}:${port})" "could not connect"
+ return
+ }
+
+ local days
+ days=$(cert_days_remaining "$pem_file") || {
+ record_fail "Certificate expiry (${host}:${port})" "could not parse expiry date"
+ return
+ }
+
+ if [[ $days -lt 0 ]]; then
+ record_fail "Certificate expiry (${host}:${port})" "EXPIRED ${days#-} days ago"
+ elif [[ $days -lt $CRITICAL_DAYS ]]; then
+ record_fail "Certificate expiry (${host}:${port})" "expires in ${days}d (critical < ${CRITICAL_DAYS}d)"
+ elif [[ $days -lt $WARN_DAYS ]]; then
+ record_pass "Certificate expiry (${host}:${port})" "expires in ${days}d (warning < ${WARN_DAYS}d)"
+ else
+ record_pass "Certificate expiry (${host}:${port})" "expires in ${days}d"
+ fi
+}
+
+# ── Subject / SAN Match ─────────────────────────────────────────────
+test_san_match() {
+ local host="$1" port="$2" pem_file
+ pem_file="${CERT_TMP}/${host}_${port}.pem"
+ [[ ! -s "$pem_file" ]] && { record_skip "SAN match (${host}:${port})" "no certificate fetched"; return; }
+
+ local san_output cn_output matched=false
+ san_output=$(openssl x509 -in "${pem_file}" -noout -ext subjectAltName 2>/dev/null) || true
+ cn_output=$(openssl x509 -in "${pem_file}" -noout -subject 2>/dev/null | grep -oP 'CN\s*=\s*\K[^/,]+') || true
+
+ if echo "$san_output" | grep -qi "DNS:${host}"; then
+ matched=true
+ elif echo "$san_output" | grep -qi "DNS:\*.$(echo "$host" | sed 's/^[^.]*\.//')"; then
+ matched=true
+ elif [[ "${cn_output}" == "${host}" ]]; then
+ matched=true
+ fi
+
+ if $matched; then
+ record_pass "SAN match (${host}:${port})" "hostname matches certificate"
+ else
+ record_fail "SAN match (${host}:${port})" "hostname not in CN or SAN"
+ fi
+}
+
+# ── Chain Validation ─────────────────────────────────────────────────
+test_chain_valid() {
+ local host="$1" port="$2"
+ if [[ "$SKIP_CHAIN" == "true" ]]; then
+ record_skip "Chain valid (${host}:${port})" "SKIP_CHAIN=true"
+ return
+ fi
+
+ local verify_output
+ verify_output=$(echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" \
+ -servername "${host}" \
+ -verify_return_error &1) || true
+
+ if echo "$verify_output" | grep -q "Verify return code: 0"; then
+ record_pass "Chain valid (${host}:${port})" "full chain verified"
+ elif echo "$verify_output" | grep -q "Verify return code: 18\|Verify return code: 19"; then
+ if [[ "$REJECT_SELF_SIGNED" == "true" ]]; then
+ record_fail "Chain valid (${host}:${port})" "self-signed certificate"
+ else
+ record_pass "Chain valid (${host}:${port})" "self-signed (allowed)"
+ fi
+ else
+ local code
+ code=$(echo "$verify_output" | grep -oP 'Verify return code: \K[0-9]+' | head -1) || code="unknown"
+ record_fail "Chain valid (${host}:${port})" "verify failed (code ${code})"
+ fi
+}
+
+# ── Self-signed Detection ────────────────────────────────────────────
+test_self_signed() {
+ local host="$1" port="$2" pem_file
+ pem_file="${CERT_TMP}/${host}_${port}.pem"
+ [[ ! -s "$pem_file" ]] && { record_skip "Self-signed check (${host}:${port})" "no certificate fetched"; return; }
+
+ local issuer subject
+ issuer=$(openssl x509 -in "${pem_file}" -noout -issuer 2>/dev/null) || true
+ subject=$(openssl x509 -in "${pem_file}" -noout -subject 2>/dev/null) || true
+
+ if [[ "$issuer" == "$subject" ]]; then
+ if [[ "$REJECT_SELF_SIGNED" == "true" ]]; then
+ record_fail "Self-signed check (${host}:${port})" "certificate is self-signed"
+ else
+ record_pass "Self-signed check (${host}:${port})" "self-signed (allowed)"
+ fi
+ else
+ record_pass "Self-signed check (${host}:${port})" "CA-signed"
+ fi
+}
+
+# ── OCSP Stapling ────────────────────────────────────────────────────
+test_ocsp_stapling() {
+ local host="$1" port="$2"
+ if [[ "$SKIP_OCSP" == "true" ]]; then
+ record_skip "OCSP stapling (${host}:${port})" "SKIP_OCSP=true"
+ return
+ fi
+
+ local ocsp_output
+ ocsp_output=$(echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" \
+ -servername "${host}" \
+ -status &1) || true
+
+ if echo "$ocsp_output" | grep -q "OCSP Response Status: successful"; then
+ record_pass "OCSP stapling (${host}:${port})" "stapled response present"
+ elif echo "$ocsp_output" | grep -q "OCSP response: no response sent"; then
+ record_pass "OCSP stapling (${host}:${port})" "not configured (optional)"
+ else
+ record_pass "OCSP stapling (${host}:${port})" "status unknown (non-critical)"
+ fi
+}
+
+# ── OCSP Responder Reachable ─────────────────────────────────────────
+test_ocsp_responder() {
+ local host="$1" port="$2" pem_file
+ if [[ "$SKIP_OCSP" == "true" ]]; then
+ record_skip "OCSP responder (${host}:${port})" "SKIP_OCSP=true"
+ return
+ fi
+
+ pem_file="${CERT_TMP}/${host}_${port}.pem"
+ [[ ! -s "$pem_file" ]] && { record_skip "OCSP responder (${host}:${port})" "no certificate fetched"; return; }
+
+ local ocsp_uri
+ ocsp_uri=$(openssl x509 -in "${pem_file}" -noout -ocsp_uri 2>/dev/null) || true
+
+ if [[ -z "$ocsp_uri" ]]; then
+ record_skip "OCSP responder (${host}:${port})" "no OCSP URI in certificate"
+ return
+ fi
+
+ verbose "OCSP URI: ${ocsp_uri}"
+ local ocsp_host
+ ocsp_host=$(echo "$ocsp_uri" | sed 's|https\?://||' | cut -d/ -f1)
+
+ if has_cmd curl; then
+ if curl -sf --max-time 5 -o /dev/null "${ocsp_uri}" 2>/dev/null; then
+ record_pass "OCSP responder (${host}:${port})" "${ocsp_host} reachable"
+ else
+ record_fail "OCSP responder (${host}:${port})" "${ocsp_host} unreachable"
+ fi
+ elif ping -c1 -W3 "$ocsp_host" >/dev/null 2>&1; then
+ record_pass "OCSP responder (${host}:${port})" "${ocsp_host} reachable (ping)"
+ else
+ record_fail "OCSP responder (${host}:${port})" "${ocsp_host} unreachable"
+ fi
+}
+
+# ── TLS Version Check ────────────────────────────────────────────────
+test_tls_version() {
+ local host="$1" port="$2"
+ if [[ "$SKIP_TLS_VERSION" == "true" ]]; then
+ record_skip "TLS version (${host}:${port})" "SKIP_TLS_VERSION=true"
+ return
+ fi
+
+ # Check TLS 1.2 supported
+ local tls12_ok=false tls13_ok=false
+ if echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" -servername "${host}" \
+ -tls1_2 &1 | grep -q "Protocol.*TLSv1.2"; then
+ tls12_ok=true
+ fi
+
+ # Check TLS 1.3 supported
+ if echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" -servername "${host}" \
+ -tls1_3 &1 | grep -q "Protocol.*TLSv1.3"; then
+ tls13_ok=true
+ fi
+
+ if $tls13_ok; then
+ record_pass "TLS version (${host}:${port})" "TLS 1.3 supported"
+ elif $tls12_ok; then
+ record_pass "TLS version (${host}:${port})" "TLS 1.2 supported"
+ else
+ record_fail "TLS version (${host}:${port})" "neither TLS 1.2 nor 1.3 supported"
+ fi
+
+ # Check TLS 1.0 rejected
+ local tls10_output
+ tls10_output=$(echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" -servername "${host}" \
+ -tls1 &1) || true
+
+ if echo "$tls10_output" | grep -q "Protocol.*TLSv1$\|Protocol.*TLSv1.0"; then
+ record_fail "TLS 1.0 rejected (${host}:${port})" "TLS 1.0 still accepted"
+ else
+ record_pass "TLS 1.0 rejected (${host}:${port})" "correctly refused"
+ fi
+}
+
+# ── Cipher Strength ──────────────────────────────────────────────────
+test_cipher_strength() {
+ local host="$1" port="$2"
+
+ local cipher_output negotiated
+ cipher_output=$(echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" -servername "${host}" \
+ &1) || true
+
+ negotiated=$(echo "$cipher_output" | grep -oP 'Cipher\s+:\s+\K\S+' | head -1) || true
+
+ if [[ -z "$negotiated" ]]; then
+ record_skip "Cipher strength (${host}:${port})" "could not determine cipher"
+ return
+ fi
+
+ local weak_ciphers="RC4|DES|3DES|NULL|EXPORT|MD5|anon"
+ if echo "$negotiated" | grep -qiE "$weak_ciphers"; then
+ record_fail "Cipher strength (${host}:${port})" "weak cipher: ${negotiated}"
+ else
+ record_pass "Cipher strength (${host}:${port})" "${negotiated}"
+ fi
+}
+
+# ── HSTS Header ──────────────────────────────────────────────────────
+test_hsts() {
+ local host="$1" port="$2"
+ if [[ "$CHECK_HSTS" != "true" ]]; then
+ record_skip "HSTS header (${host}:${port})" "CHECK_HSTS=false"
+ return
+ fi
+ if ! has_cmd curl; then
+ record_skip "HSTS header (${host}:${port})" "curl not installed"
+ return
+ fi
+ if [[ "$port" != "443" ]]; then
+ record_skip "HSTS header (${host}:${port})" "not HTTPS port"
+ return
+ fi
+
+ local headers
+ headers=$(curl -sI --max-time 5 -k "https://${host}/" 2>/dev/null) || true
+
+ if echo "$headers" | grep -qi "Strict-Transport-Security"; then
+ local max_age
+ max_age=$(echo "$headers" | grep -oi 'max-age=[0-9]*' | head -1 | cut -d= -f2) || true
+ record_pass "HSTS header (${host}:${port})" "max-age=${max_age:-unknown}"
+ else
+ record_fail "HSTS header (${host}:${port})" "header not present"
+ fi
+}
+
+# ── Certificate SCT ─────────────────────────────────────────────────
+test_sct() {
+ local host="$1" port="$2"
+
+ local sct_output
+ sct_output=$(echo | timeout "${CONNECT_TIMEOUT}" openssl s_client \
+ -connect "${host}:${port}" -servername "${host}" \
+ -ct &1) || true
+
+ if echo "$sct_output" | grep -qi "SCT validation status\|Signed Certificate Timestamp"; then
+ record_pass "Certificate transparency (${host}:${port})" "SCT present"
+ elif echo "$sct_output" | grep -qi "unknown option\|unrecognized option"; then
+ record_skip "Certificate transparency (${host}:${port})" "openssl does not support -ct"
+ else
+ record_pass "Certificate transparency (${host}:${port})" "SCT status unknown (non-critical)"
+ fi
+}
+
+# ── On-disk Certificate File Expiry ──────────────────────────────────
+test_cert_file_expiry() {
+ local cert_path="$1"
+ local filename
+ filename=$(basename "$cert_path")
+
+ if [[ ! -f "$cert_path" ]]; then
+ record_fail "File expiry (${filename})" "file not found: ${cert_path}"
+ return
+ fi
+
+ local days
+ days=$(cert_days_remaining "$cert_path") || {
+ record_fail "File expiry (${filename})" "could not parse certificate"
+ return
+ }
+
+ if [[ $days -lt 0 ]]; then
+ record_fail "File expiry (${filename})" "EXPIRED ${days#-} days ago"
+ elif [[ $days -lt $CRITICAL_DAYS ]]; then
+ record_fail "File expiry (${filename})" "expires in ${days}d (critical < ${CRITICAL_DAYS}d)"
+ elif [[ $days -lt $WARN_DAYS ]]; then
+ record_pass "File expiry (${filename})" "expires in ${days}d (warning < ${WARN_DAYS}d)"
+ else
+ record_pass "File expiry (${filename})" "expires in ${days}d"
+ fi
+}
+
+# ── Key / Cert Match ────────────────────────────────────────────────
+test_key_cert_match() {
+ if [[ -z "$CERT_FILE" || -z "$KEY_FILE" ]]; then
+ record_skip "Key/cert match" "CERT_FILE or KEY_FILE not set"
+ return
+ fi
+ if [[ ! -f "$CERT_FILE" ]]; then
+ record_fail "Key/cert match" "cert file not found: ${CERT_FILE}"
+ return
+ fi
+ if [[ ! -f "$KEY_FILE" ]]; then
+ record_fail "Key/cert match" "key file not found: ${KEY_FILE}"
+ return
+ fi
+
+ local cert_mod key_mod
+ cert_mod=$(openssl x509 -in "${CERT_FILE}" -noout -modulus 2>/dev/null | md5sum | awk '{print $1}') || true
+ key_mod=$(openssl rsa -in "${KEY_FILE}" -noout -modulus 2>/dev/null | md5sum | awk '{print $1}') || {
+ key_mod=$(openssl ec -in "${KEY_FILE}" -noout -text 2>/dev/null | md5sum | awk '{print $1}') || true
+ }
+
+ if [[ -n "$cert_mod" && "$cert_mod" == "$key_mod" ]]; then
+ record_pass "Key/cert match" "modulus matches"
+ elif [[ -z "$cert_mod" || -z "$key_mod" ]]; then
+ record_skip "Key/cert match" "could not extract modulus"
+ else
+ record_fail "Key/cert match" "cert and key do not match"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OUTPUT
+# ══════════════════════════════════════════════════════════════════════
+
+print_tap_header() { echo "TAP version 13"; }
+
+print_tap_footer() {
+ echo "1..${TOTAL}"
+ echo "# pass ${PASS}"
+ echo "# fail ${FAIL}"
+ echo "# skip ${SKIP}"
+}
+
+print_summary() {
+ local end_time; end_time=$(date +%s)
+ local duration=$(( end_time - START_TIME ))
+ echo ""
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ echo -e "${BOLD}Summary${RESET} Certificate Smoke Tests"
+ echo -e " ${GREEN}${PASS} passed${RESET} ${RED}${FAIL} failed${RESET} ${YELLOW}${SKIP} skipped${RESET} (${duration}s)"
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ if [[ $FAIL -eq 0 ]]; then echo -e "${GREEN}${BOLD}All tests passed.${RESET}"
+ else echo -e "${RED}${BOLD}${FAIL} test(s) failed.${RESET}"; fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+detect_package_manager() {
+ if command -v dpkg-query &>/dev/null; then
+ echo "dpkg"
+ elif command -v rpm &>/dev/null; then
+ echo "rpm"
+ else
+ echo "unknown"
+ fi
+}
+
+get_package_list() {
+ local pm
+ pm=$(detect_package_manager)
+ case "$pm" in
+ dpkg)
+ dpkg-query -W -f='${Package} ${Version}\n' 2>/dev/null | sort
+ ;;
+ rpm)
+ rpm -qa --queryformat '%{NAME} %{VERSION}-%{RELEASE}\n' 2>/dev/null | sort
+ ;;
+ *)
+ err "No supported package manager found (need dpkg or rpm)"
+ exit 1
+ ;;
+ esac
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SNAPSHOT
+# ══════════════════════════════════════════════════════════════════════
+
+do_snapshot() {
+ if [[ ! -d "$SNAPSHOT_DIR" ]]; then
+ mkdir -p "$SNAPSHOT_DIR" 2>/dev/null || {
+ err "Cannot create snapshot directory: ${SNAPSHOT_DIR}"
+ exit 1
+ }
+ fi
+
+ local timestamp
+ timestamp=$(date +%Y%m%d-%H%M%S)
+ local host
+ host=$(hostname -s 2>/dev/null || hostname)
+ local pm
+ pm=$(detect_package_manager)
+ local snapshot_file="${SNAPSHOT_DIR}/${host}-${pm}-${timestamp}.txt"
+
+ verbose "Package manager: ${pm}"
+ verbose "Snapshot file: ${snapshot_file}"
+
+ get_package_list > "$snapshot_file"
+
+ local pkg_count
+ pkg_count=$(wc -l < "$snapshot_file")
+
+ log "Snapshot saved: ${snapshot_file}"
+ field "Packages:" "$pkg_count"
+ field "Package manager:" "$pm"
+ field "File:" "$snapshot_file"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# DIFF
+# ══════════════════════════════════════════════════════════════════════
+
+do_diff() {
+ local file_a="$1"
+ local file_b="$2"
+
+ if [[ ! -f "$file_a" ]]; then
+ err "File not found: ${file_a}"
+ exit 1
+ fi
+ if [[ ! -f "$file_b" ]]; then
+ err "File not found: ${file_b}"
+ exit 1
+ fi
+
+ echo ""
+ echo -e "${BOLD}Package Diff${RESET}"
+ field "Before:" "$file_a"
+ field "After:" "$file_b"
+
+ local added=0 removed=0 upgraded=0 downgraded=0
+
+ # Build associative arrays
+ local tmp_added tmp_removed tmp_changed
+ tmp_added=$(mktemp)
+ tmp_removed=$(mktemp)
+ tmp_changed=$(mktemp)
+ trap 'rm -f "$tmp_added" "$tmp_removed" "$tmp_changed"' EXIT
+
+ # Find added packages (in B but not A)
+ while IFS=' ' read -r pkg ver; do
+ if ! grep -q "^${pkg} " "$file_a"; then
+ echo "${pkg} ${ver}" >> "$tmp_added"
+ fi
+ done < "$file_b"
+
+ # Find removed packages (in A but not B)
+ while IFS=' ' read -r pkg ver; do
+ if ! grep -q "^${pkg} " "$file_b"; then
+ echo "${pkg} ${ver}" >> "$tmp_removed"
+ fi
+ done < "$file_a"
+
+ # Find changed packages
+ while IFS=' ' read -r pkg ver_b; do
+ local ver_a
+ ver_a=$(grep "^${pkg} " "$file_a" 2>/dev/null | head -1 | cut -d' ' -f2-)
+ if [[ -n "$ver_a" && "$ver_a" != "$ver_b" ]]; then
+ echo "${pkg} ${ver_a} ${ver_b}" >> "$tmp_changed"
+ fi
+ done < "$file_b"
+
+ # Display additions
+ if [[ -s "$tmp_added" ]]; then
+ section_header "Added Packages"
+ while IFS=' ' read -r pkg ver; do
+ printf " ${CYAN}+${RESET} %-40s %s\n" "$pkg" "$ver"
+ added=$((added + 1))
+ done < "$tmp_added"
+ fi
+
+ # Display removals
+ if [[ -s "$tmp_removed" ]]; then
+ section_header "Removed Packages"
+ while IFS=' ' read -r pkg ver; do
+ printf " ${RED}-${RESET} %-40s %s\n" "$pkg" "$ver"
+ removed=$((removed + 1))
+ done < "$tmp_removed"
+ fi
+
+ # Display upgrades and downgrades
+ if [[ -s "$tmp_changed" ]]; then
+ local has_upgrades=false
+ local has_downgrades=false
+
+ # First pass: categorize
+ while IFS=' ' read -r pkg ver_a ver_b; do
+ if dpkg --compare-versions "$ver_b" gt "$ver_a" 2>/dev/null; then
+ has_upgrades=true
+ elif dpkg --compare-versions "$ver_b" lt "$ver_a" 2>/dev/null; then
+ has_downgrades=true
+ else
+ # Fallback: string comparison
+ if [[ "$ver_b" > "$ver_a" ]]; then
+ has_upgrades=true
+ else
+ has_downgrades=true
+ fi
+ fi
+ done < "$tmp_changed"
+
+ if [[ "$has_upgrades" == "true" ]]; then
+ section_header "Upgraded Packages"
+ while IFS=' ' read -r pkg ver_a ver_b; do
+ local is_upgrade=false
+ if dpkg --compare-versions "$ver_b" gt "$ver_a" 2>/dev/null; then
+ is_upgrade=true
+ elif ! dpkg --compare-versions "$ver_b" lt "$ver_a" 2>/dev/null && [[ "$ver_b" > "$ver_a" ]]; then
+ is_upgrade=true
+ fi
+ if [[ "$is_upgrade" == "true" ]]; then
+ printf " ${GREEN}↑${RESET} %-35s %s → %s\n" "$pkg" "$ver_a" "$ver_b"
+ upgraded=$((upgraded + 1))
+ fi
+ done < "$tmp_changed"
+ fi
+
+ if [[ "$has_downgrades" == "true" ]]; then
+ section_header "Downgraded Packages"
+ while IFS=' ' read -r pkg ver_a ver_b; do
+ local is_downgrade=false
+ if dpkg --compare-versions "$ver_b" lt "$ver_a" 2>/dev/null; then
+ is_downgrade=true
+ elif ! dpkg --compare-versions "$ver_b" gt "$ver_a" 2>/dev/null && [[ "$ver_b" < "$ver_a" ]]; then
+ is_downgrade=true
+ fi
+ if [[ "$is_downgrade" == "true" ]]; then
+ printf " ${YELLOW}↓${RESET} %-35s %s → %s\n" "$pkg" "$ver_a" "$ver_b"
+ downgraded=$((downgraded + 1))
+ fi
+ done < "$tmp_changed"
+ fi
+ fi
+
+ # Summary
+ local total=$((added + removed + upgraded + downgraded))
+
+ echo ""
+ echo -e " ${BOLD}══════════════════════════════════════════${RESET}"
+ echo -e " ${BOLD}Change Summary${RESET}"
+ echo -e " ${BOLD}══════════════════════════════════════════${RESET}"
+ field "Total changes:" "$total"
+ if [[ "$added" -gt 0 ]]; then
+ printf " ${BOLD}%-22s${RESET} ${CYAN}%s${RESET}\n" "Additions:" "$added"
+ else
+ field "Additions:" "0"
+ fi
+ if [[ "$removed" -gt 0 ]]; then
+ printf " ${BOLD}%-22s${RESET} ${RED}%s${RESET}\n" "Removals:" "$removed"
+ else
+ field "Removals:" "0"
+ fi
+ if [[ "$upgraded" -gt 0 ]]; then
+ printf " ${BOLD}%-22s${RESET} ${GREEN}%s${RESET}\n" "Upgrades:" "$upgraded"
+ else
+ field "Upgrades:" "0"
+ fi
+ if [[ "$downgraded" -gt 0 ]]; then
+ printf " ${BOLD}%-22s${RESET} ${YELLOW}%s${RESET}\n" "Downgrades:" "$downgraded"
+ else
+ field "Downgrades:" "0"
+ fi
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# REMOTE COMPARE
+# ══════════════════════════════════════════════════════════════════════
+
+do_remote() {
+ local remote="$1"
+
+ if ! command -v ssh &>/dev/null; then
+ err "ssh is required for remote comparison"
+ exit 1
+ fi
+
+ log "Fetching local package list..."
+ local local_file
+ local_file=$(mktemp)
+
+ log "Fetching remote package list from ${remote}..."
+ local remote_file
+ remote_file=$(mktemp)
+ trap 'rm -f "$local_file" "$remote_file"' EXIT
+
+ get_package_list > "$local_file"
+
+ local pm
+ pm=$(detect_package_manager)
+ case "$pm" in
+ dpkg)
+ ssh "$remote" "dpkg-query -W -f='\${Package} \${Version}\n' 2>/dev/null | sort" > "$remote_file" || {
+ err "Failed to fetch package list from ${remote}"
+ exit 1
+ }
+ ;;
+ rpm)
+ ssh "$remote" "rpm -qa --queryformat '%{NAME} %{VERSION}-%{RELEASE}\n' 2>/dev/null | sort" > "$remote_file" || {
+ err "Failed to fetch package list from ${remote}"
+ exit 1
+ }
+ ;;
+ esac
+
+ log "Comparing local vs ${remote}..."
+ do_diff "$local_file" "$remote_file"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <
+ ${SCRIPT_NAME} --remote
+
+MODES:
+ --snapshot Save current package list to a timestamped file
+ --diff FILE1 FILE2 Compare two snapshot files
+ --remote HOST Compare local packages with a remote host via SSH
+
+OPTIONS:
+ --snapshot-dir DIR Directory for snapshots (default: ${SNAPSHOT_DIR})
+ --verbose Enable debug output
+ --no-color Disable colored output
+ --help Show this help
+
+ENVIRONMENT VARIABLES:
+ SNAPSHOT_DIR Snapshot directory (default: /var/backups/pkg-snapshots)
+ COLOR Color mode: auto, always, never (default: auto)
+
+EXAMPLES:
+ # Take a snapshot before upgrade
+ ./changelog-diff.sh --snapshot
+
+ # Upgrade packages, take another snapshot, then diff
+ ./changelog-diff.sh --snapshot
+ sudo apt upgrade -y
+ ./changelog-diff.sh --snapshot
+ ./changelog-diff.sh --diff /var/backups/pkg-snapshots/host-*.txt
+
+ # Compare with a remote server
+ ./changelog-diff.sh --remote admin@prod-server
+
+ # Custom snapshot directory
+ ./changelog-diff.sh --snapshot --snapshot-dir /tmp/snapshots
+EOF
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ARGUMENT PARSING
+# ══════════════════════════════════════════════════════════════════════
+
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --snapshot)
+ MODE="snapshot"; shift ;;
+ --diff)
+ MODE="diff"
+ if [[ $# -lt 3 ]]; then
+ err "--diff requires two file arguments"
+ exit 1
+ fi
+ DIFF_FILE_A="$2"
+ DIFF_FILE_B="$3"
+ shift 3 ;;
+ --remote)
+ MODE="remote"
+ if [[ $# -lt 2 ]]; then
+ err "--remote requires a host argument"
+ exit 1
+ fi
+ REMOTE_HOST="$2"
+ shift 2 ;;
+ --snapshot-dir)
+ SNAPSHOT_DIR="$2"; shift 2 ;;
+ --verbose)
+ VERBOSE="true"; shift ;;
+ --no-color)
+ COLOR="never"; shift ;;
+ --help|-h)
+ setup_colors
+ usage
+ exit 0 ;;
+ *)
+ err "Unknown option: $1"
+ echo "Run ${SCRIPT_NAME} --help for usage" >&2
+ exit 1 ;;
+ esac
+ done
+
+ if [[ -z "$MODE" ]]; then
+ err "No mode specified. Use --snapshot, --diff, or --remote"
+ echo "Run ${SCRIPT_NAME} --help for usage" >&2
+ exit 1
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+
+ echo ""
+ echo -e "${BOLD}Package Changelog Diff — $(hostname -f 2>/dev/null || hostname)${RESET}"
+ echo -e "${DIM}$(date '+%Y-%m-%d %H:%M:%S %Z')${RESET}"
+
+ case "$MODE" in
+ snapshot)
+ do_snapshot
+ ;;
+ diff)
+ do_diff "$DIFF_FILE_A" "$DIFF_FILE_B"
+ ;;
+ remote)
+ do_remote "$REMOTE_HOST"
+ ;;
+ esac
+}
+
+main "$@"
diff --git a/chaos-runner.sh b/chaos-runner.sh
new file mode 100755
index 0000000..2755fe3
--- /dev/null
+++ b/chaos-runner.sh
@@ -0,0 +1,739 @@
+#!/usr/bin/env bash
+#########################################################################################
+#### chaos-runner.sh — Inject controlled failures and verify system recovery ####
+#### CPU stress, memory pressure, disk fill, service kill, network faults ####
+#### Requires: bash 4+, root privileges ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### sudo ./chaos-runner.sh --fault cpu-stress --duration 30 ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ---------------------------------------------------------------------------
+# Color variables — pre-initialized empty, set by setup_colors()
+# ---------------------------------------------------------------------------
+RED=""
+GREEN=""
+YELLOW=""
+BLUE=""
+CYAN=""
+BOLD=""
+DIM=""
+RESET=""
+
+setup_colors() {
+ if [[ "${COLOR}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR}" == "always" ]] || [[ -t 1 ]]; then
+ RED="\033[0;31m"
+ GREEN="\033[0;32m"
+ YELLOW="\033[0;33m"
+ BLUE="\033[0;34m"
+ CYAN="\033[0;36m"
+ BOLD="\033[1m"
+ DIM="\033[2m"
+ RESET="\033[0m"
+ fi
+}
+
+# ---------------------------------------------------------------------------
+# Standard helpers
+# ---------------------------------------------------------------------------
+log() { printf "%b[+]%b %s\n" "$GREEN" "$RESET" "$*"; }
+warn() { printf "%b[!]%b %s\n" "$YELLOW" "$RESET" "$*" >&2; }
+err() { printf "%b[-]%b %s\n" "$RED" "$RESET" "$*" >&2; }
+verbose() { [[ "$VERBOSE" == "true" ]] && printf "%b[~]%b %s\n" "$DIM" "$RESET" "$*"; return 0; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ printf "\n%b%b══ %b%s%b\n" "$CYAN" "$BOLD" "$BLUE" "$*" "$RESET"
+}
+
+field() {
+ printf " %-24s %s\n" "$1" "$2"
+}
+
+field_color() {
+ local label="$1" color="$2" value="$3"
+ printf " %-24s %b%s%b\n" "$label" "$color" "$value" "$RESET"
+}
+
+# ---------------------------------------------------------------------------
+# Defaults
+# ---------------------------------------------------------------------------
+RUN_MODE=""
+FAULT_TYPE=""
+DURATION="${CHAOS_DURATION:-30}"
+TARGET_SERVICE=""
+FILL_PATH="${CHAOS_FILL_PATH:-/tmp}"
+FILL_SIZE="${CHAOS_FILL_SIZE:-90}"
+LATENCY_MS="${CHAOS_LATENCY:-200}"
+DROP_PERCENT="${CHAOS_DROP:-50}"
+NETWORK_IFACE="${CHAOS_IFACE:-eth0}"
+PLAN_FILE=""
+CONFIRM_YES=false
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+CLEANUP_PIDS=()
+CLEANUP_FILES=()
+CHAOS_ACTIVE=false
+
+# ---------------------------------------------------------------------------
+# State
+# ---------------------------------------------------------------------------
+readonly SCRIPT_NAME="${0##*/}"
+START_TIME=""
+
+# ---------------------------------------------------------------------------
+# Trap
+# ---------------------------------------------------------------------------
+trap cleanup_all EXIT INT TERM
+
+# ---------------------------------------------------------------------------
+# Safety — cleanup
+# ---------------------------------------------------------------------------
+cleanup_all() {
+ if [[ "${CHAOS_ACTIVE}" != "true" ]]; then
+ return
+ fi
+ CHAOS_ACTIVE=false
+ warn "Running cleanup..."
+
+ # Kill tracked background PIDs
+ local pid
+ for pid in "${CLEANUP_PIDS[@]}"; do
+ kill "$pid" 2>/dev/null || true
+ wait "$pid" 2>/dev/null || true
+ done
+ CLEANUP_PIDS=()
+
+ # Remove tracked temp files
+ local f
+ for f in "${CLEANUP_FILES[@]}"; do
+ if [[ -d "$f" ]] && mountpoint -q "$f" 2>/dev/null; then
+ umount "$f" 2>/dev/null || true
+ rmdir "$f" 2>/dev/null || true
+ elif [[ -f "$f" ]]; then
+ rm -f "$f" 2>/dev/null || true
+ elif [[ -d "$f" ]]; then
+ rmdir "$f" 2>/dev/null || true
+ fi
+ done
+ CLEANUP_FILES=()
+
+ # Remove tc qdiscs
+ tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
+
+ # Restore resolv.conf from backup
+ if [[ -f /etc/resolv.conf.chaos-backup ]]; then
+ mv /etc/resolv.conf.chaos-backup /etc/resolv.conf 2>/dev/null || true
+ log "Restored /etc/resolv.conf from backup"
+ fi
+
+ log "Cleanup complete"
+}
+
+# ---------------------------------------------------------------------------
+# Utilities
+# ---------------------------------------------------------------------------
+require_root() {
+ if [[ "$(id -u)" -ne 0 ]]; then
+ die "This operation requires root privileges. Run with sudo."
+ fi
+}
+
+confirm_action() {
+ local message="$1"
+ if [[ "$CONFIRM_YES" == "true" ]]; then
+ return 0
+ fi
+ printf "%b[?]%b %s [y/N] " "$YELLOW" "$RESET" "$message"
+ local answer
+ read -r answer
+ case "$answer" in
+ [yY]|[yY][eE][sS]) return 0 ;;
+ *) die "Aborted by user" ;;
+ esac
+}
+
+wait_duration() {
+ local remaining="$DURATION"
+ while [[ "$remaining" -gt 0 ]]; do
+ printf "\r %bTime remaining: %ds%b " "$DIM" "$remaining" "$RESET"
+ sleep 1
+ ((remaining--)) || true
+ done
+ printf "\r%40s\r" ""
+}
+
+# ---------------------------------------------------------------------------
+# Fault: cpu-stress
+# ---------------------------------------------------------------------------
+fault_cpu_stress() {
+ local cores
+ cores=$(nproc)
+ section_header "CPU Stress — saturating $cores cores for ${DURATION}s"
+ CHAOS_ACTIVE=true
+
+ local i
+ for ((i = 0; i < cores; i++)); do
+ while :; do :; done &
+ CLEANUP_PIDS+=("$!")
+ verbose "Spawned CPU worker PID $!"
+ done
+
+ log "Started $cores CPU stress workers"
+ wait_duration
+ cleanup_all
+}
+
+# ---------------------------------------------------------------------------
+# Fault: memory-pressure
+# ---------------------------------------------------------------------------
+fault_memory_pressure() {
+ section_header "Memory Pressure — filling tmpfs for ${DURATION}s"
+ CHAOS_ACTIVE=true
+
+ local mount_dir
+ mount_dir=$(mktemp -d /tmp/chaos-mem-XXXXXX)
+ mount -t tmpfs -o size=256M tmpfs "$mount_dir"
+ CLEANUP_FILES+=("$mount_dir")
+
+ log "Mounted tmpfs at $mount_dir (256M)"
+ head -c 240M /dev/urandom > "${mount_dir}/fill.dat" 2>/dev/null || true
+ log "Filled tmpfs with ~240M of data"
+
+ wait_duration
+ cleanup_all
+}
+
+# ---------------------------------------------------------------------------
+# Fault: disk-fill
+# ---------------------------------------------------------------------------
+fault_disk_fill() {
+ section_header "Disk Fill — filling ${FILL_PATH} to ${FILL_SIZE}% for ${DURATION}s"
+ CHAOS_ACTIVE=true
+
+ local current_usage target_bytes fill_file total_kb
+ fill_file="${FILL_PATH}/chaos-fill-$(date +%s).dat"
+
+ total_kb=$(df --output=size -k "$FILL_PATH" | tail -1 | tr -d ' ')
+ current_usage=$(df --output=pcent "$FILL_PATH" | tail -1 | tr -d ' %')
+
+ if [[ "$current_usage" -ge "$FILL_SIZE" ]]; then
+ warn "Disk already at ${current_usage}% — above target ${FILL_SIZE}%"
+ return
+ fi
+
+ target_bytes=$(( (FILL_SIZE - current_usage) * total_kb * 1024 / 100 ))
+ local target_mb=$(( target_bytes / 1048576 ))
+
+ log "Writing ${target_mb}M to $fill_file"
+ dd if=/dev/zero of="$fill_file" bs=1M count="$target_mb" status=none 2>/dev/null || true
+ CLEANUP_FILES+=("$fill_file")
+
+ log "Disk fill complete — $(df --output=pcent "$FILL_PATH" | tail -1 | tr -d ' ') used"
+ wait_duration
+ cleanup_all
+}
+
+# ---------------------------------------------------------------------------
+# Fault: service-kill
+# ---------------------------------------------------------------------------
+fault_service_kill() {
+ if [[ -z "$TARGET_SERVICE" ]]; then
+ die "service-kill requires --target SERVICE_NAME"
+ fi
+ section_header "Service Kill — stopping ${TARGET_SERVICE} for ${DURATION}s"
+ CHAOS_ACTIVE=true
+
+ if ! systemctl is-active --quiet "$TARGET_SERVICE"; then
+ die "Service '$TARGET_SERVICE' is not currently active"
+ fi
+
+ confirm_action "Stop service '$TARGET_SERVICE' for ${DURATION}s?"
+
+ systemctl stop "$TARGET_SERVICE"
+ log "Stopped $TARGET_SERVICE"
+
+ wait_duration
+
+ log "Restarting $TARGET_SERVICE..."
+ systemctl start "$TARGET_SERVICE"
+ log "Service $TARGET_SERVICE restarted"
+ CHAOS_ACTIVE=false
+}
+
+# ---------------------------------------------------------------------------
+# Fault: network-latency
+# ---------------------------------------------------------------------------
+fault_network_latency() {
+ section_header "Network Latency — ${LATENCY_MS}ms on ${NETWORK_IFACE} for ${DURATION}s"
+ CHAOS_ACTIVE=true
+
+ if ! command -v tc &>/dev/null; then
+ die "tc (iproute2) is required for network faults"
+ fi
+
+ tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
+ tc qdisc add dev "$NETWORK_IFACE" root netem delay "${LATENCY_MS}ms"
+ log "Added ${LATENCY_MS}ms latency to $NETWORK_IFACE"
+
+ wait_duration
+ cleanup_all
+}
+
+# ---------------------------------------------------------------------------
+# Fault: network-drop
+# ---------------------------------------------------------------------------
+fault_network_drop() {
+ section_header "Network Drop — ${DROP_PERCENT}% loss on ${NETWORK_IFACE} for ${DURATION}s"
+ CHAOS_ACTIVE=true
+
+ if ! command -v tc &>/dev/null; then
+ die "tc (iproute2) is required for network faults"
+ fi
+
+ tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
+ tc qdisc add dev "$NETWORK_IFACE" root netem loss "${DROP_PERCENT}%"
+ log "Added ${DROP_PERCENT}% packet loss to $NETWORK_IFACE"
+
+ wait_duration
+ cleanup_all
+}
+
+# ---------------------------------------------------------------------------
+# Fault: dns-failure
+# ---------------------------------------------------------------------------
+fault_dns_failure() {
+ section_header "DNS Failure — breaking DNS for ${DURATION}s"
+ CHAOS_ACTIVE=true
+
+ if [[ -f /etc/resolv.conf.chaos-backup ]]; then
+ die "A chaos backup of resolv.conf already exists — run --cleanup first"
+ fi
+
+ cp /etc/resolv.conf /etc/resolv.conf.chaos-backup
+ CLEANUP_FILES+=("/etc/resolv.conf.chaos-backup")
+
+ printf "# Chaos: DNS intentionally broken\nnameserver 127.0.0.254\n" > /etc/resolv.conf
+ log "Replaced /etc/resolv.conf with broken nameserver"
+
+ wait_duration
+ cleanup_all
+}
+
+# ---------------------------------------------------------------------------
+# Fault: io-latency
+# ---------------------------------------------------------------------------
+fault_io_latency() {
+ section_header "I/O Latency — degrading I/O for ${DURATION}s"
+ CHAOS_ACTIVE=true
+
+ local io_file
+ io_file="${FILL_PATH}/chaos-io-$(date +%s).dat"
+
+ ionice -c 2 -n 7 dd if=/dev/urandom of="$io_file" bs=4K count=0 status=none 2>/dev/null &
+ CLEANUP_PIDS+=("$!")
+ CLEANUP_FILES+=("$io_file")
+
+ # Run continuous slow I/O in background
+ (
+ while :; do
+ ionice -c 3 dd if=/dev/zero of="$io_file" bs=4K count=256 conv=fdatasync status=none 2>/dev/null || true
+ sync
+ sleep 0.5
+ done
+ ) &
+ CLEANUP_PIDS+=("$!")
+
+ log "Started degraded I/O worker (idle-class ionice)"
+ wait_duration
+ cleanup_all
+}
+
+# ---------------------------------------------------------------------------
+# Dispatch
+# ---------------------------------------------------------------------------
+do_fault() {
+ require_root
+ if [[ -z "$FAULT_TYPE" ]]; then
+ die "No fault type specified. Use --fault TYPE"
+ fi
+
+ confirm_action "Inject fault '${FAULT_TYPE}' for ${DURATION}s?"
+
+ START_TIME=$(date +%s)
+ log "Starting fault injection: $FAULT_TYPE (duration: ${DURATION}s)"
+
+ case "$FAULT_TYPE" in
+ cpu-stress) fault_cpu_stress ;;
+ memory-pressure) fault_memory_pressure ;;
+ disk-fill) fault_disk_fill ;;
+ service-kill) fault_service_kill ;;
+ network-latency) fault_network_latency ;;
+ network-drop) fault_network_drop ;;
+ dns-failure) fault_dns_failure ;;
+ io-latency) fault_io_latency ;;
+ *) die "Unknown fault type: $FAULT_TYPE" ;;
+ esac
+
+ local elapsed=$(( $(date +%s) - START_TIME ))
+ log "Fault injection complete (${elapsed}s elapsed)"
+}
+
+# ---------------------------------------------------------------------------
+# List fault types
+# ---------------------------------------------------------------------------
+do_list() {
+ section_header "Available Fault Types"
+ printf "\n"
+ printf " %-20s %s\n" "FAULT TYPE" "DESCRIPTION"
+ printf " ─────────────────────────────────────────────────────────────\n"
+ printf " %-20s %s\n" "cpu-stress" "Saturate all CPU cores"
+ printf " %-20s %s\n" "memory-pressure" "Fill memory via tmpfs allocation"
+ printf " %-20s %s\n" "disk-fill" "Fill disk to threshold percentage"
+ printf " %-20s %s\n" "service-kill" "Stop a systemd service temporarily"
+ printf " %-20s %s\n" "network-latency" "Add network latency via tc netem"
+ printf " %-20s %s\n" "network-drop" "Drop packets via tc netem"
+ printf " %-20s %s\n" "dns-failure" "Break DNS resolution temporarily"
+ printf " %-20s %s\n" "io-latency" "Degrade I/O performance via ionice"
+ printf "\n"
+}
+
+# ---------------------------------------------------------------------------
+# Verify system health
+# ---------------------------------------------------------------------------
+do_verify() {
+ section_header "System Health Check"
+ local issues=0
+
+ # CPU load
+ local load_1m
+ load_1m=$(awk '{print $1}' /proc/loadavg)
+ local cores
+ cores=$(nproc)
+ if awk "BEGIN {exit !($load_1m > $cores * 0.9)}"; then
+ field_color "CPU load (1m):" "$RED" "${load_1m} — HIGH (cores: ${cores})"
+ ((issues++)) || true
+ else
+ field_color "CPU load (1m):" "$GREEN" "${load_1m} (cores: ${cores})"
+ fi
+
+ # Memory
+ local mem_avail_kb mem_total_kb mem_pct
+ mem_total_kb=$(awk '/MemTotal/ {print $2}' /proc/meminfo)
+ mem_avail_kb=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
+ mem_pct=$(( (mem_total_kb - mem_avail_kb) * 100 / mem_total_kb ))
+ if [[ "$mem_pct" -gt 90 ]]; then
+ field_color "Memory usage:" "$RED" "${mem_pct}% — HIGH"
+ ((issues++)) || true
+ else
+ field_color "Memory usage:" "$GREEN" "${mem_pct}%"
+ fi
+
+ # Disk
+ local disk_pct
+ disk_pct=$(df --output=pcent / | tail -1 | tr -d ' %')
+ if [[ "$disk_pct" -gt 90 ]]; then
+ field_color "Disk usage (/):" "$RED" "${disk_pct}% — HIGH"
+ ((issues++)) || true
+ else
+ field_color "Disk usage (/):" "$GREEN" "${disk_pct}%"
+ fi
+
+ # Network connectivity
+ if ping -c 1 -W 3 8.8.8.8 &>/dev/null; then
+ field_color "Network (ping):" "$GREEN" "OK"
+ else
+ field_color "Network (ping):" "$RED" "UNREACHABLE"
+ ((issues++)) || true
+ fi
+
+ # DNS resolution
+ if host google.com &>/dev/null; then
+ field_color "DNS resolution:" "$GREEN" "OK"
+ else
+ field_color "DNS resolution:" "$RED" "FAILING"
+ ((issues++)) || true
+ fi
+
+ # Chaos artifacts
+ if [[ -f /etc/resolv.conf.chaos-backup ]]; then
+ field_color "Chaos artifacts:" "$YELLOW" "resolv.conf backup found"
+ ((issues++)) || true
+ else
+ field_color "Chaos artifacts:" "$GREEN" "None"
+ fi
+
+ printf "\n"
+ if [[ "$issues" -gt 0 ]]; then
+ warn "Found $issues issue(s)"
+ return 1
+ else
+ log "All checks passed"
+ return 0
+ fi
+}
+
+# ---------------------------------------------------------------------------
+# Plan execution
+# ---------------------------------------------------------------------------
+do_plan() {
+ require_root
+ if [[ -z "$PLAN_FILE" ]]; then
+ die "No plan file specified. Use --plan FILE"
+ fi
+ if [[ ! -f "$PLAN_FILE" ]]; then
+ die "Plan file not found: $PLAN_FILE"
+ fi
+ if ! command -v jq &>/dev/null; then
+ die "jq is required for plan execution"
+ fi
+
+ section_header "Executing Chaos Plan: $PLAN_FILE"
+
+ local plan_length
+ plan_length=$(jq '.faults | length' "$PLAN_FILE")
+ log "Plan contains $plan_length fault(s)"
+
+ local i fault_entry f_type f_duration
+ for ((i = 0; i < plan_length; i++)); do
+ fault_entry=$(jq -r ".faults[$i]" "$PLAN_FILE")
+ f_type=$(printf '%s' "$fault_entry" | jq -r '.type')
+ f_duration=$(printf '%s' "$fault_entry" | jq -r '.duration // 30')
+
+ log "Step $((i + 1))/$plan_length: $f_type (${f_duration}s)"
+
+ FAULT_TYPE="$f_type"
+ DURATION="$f_duration"
+
+ # Extract optional fields
+ local f_target f_iface
+ f_target=$(printf '%s' "$fault_entry" | jq -r '.target // empty')
+ f_iface=$(printf '%s' "$fault_entry" | jq -r '.iface // empty')
+
+ [[ -n "$f_target" ]] && TARGET_SERVICE="$f_target"
+ [[ -n "$f_iface" ]] && NETWORK_IFACE="$f_iface"
+
+ case "$FAULT_TYPE" in
+ cpu-stress) fault_cpu_stress ;;
+ memory-pressure) fault_memory_pressure ;;
+ disk-fill) fault_disk_fill ;;
+ service-kill) fault_service_kill ;;
+ network-latency) fault_network_latency ;;
+ network-drop) fault_network_drop ;;
+ dns-failure) fault_dns_failure ;;
+ io-latency) fault_io_latency ;;
+ *) warn "Unknown fault type in plan: $FAULT_TYPE — skipping" ;;
+ esac
+
+ if [[ "$i" -lt $((plan_length - 1)) ]]; then
+ log "Pausing 5s before next fault..."
+ sleep 5
+ fi
+ done
+
+ log "Plan execution complete"
+}
+
+# ---------------------------------------------------------------------------
+# Force cleanup
+# ---------------------------------------------------------------------------
+do_cleanup() {
+ require_root
+ section_header "Force Cleanup"
+ CHAOS_ACTIVE=true
+ cleanup_all
+ log "Force cleanup complete"
+}
+
+# ---------------------------------------------------------------------------
+# Help
+# ---------------------------------------------------------------------------
+show_help() {
+ cat <&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; ((ERRORS++)) || true; }
+debug() { [[ "$VERBOSE" == "true" ]] && echo -e "${CYAN}[DEBUG]${RESET} $*"; }
+step() { echo -e "\n${BOLD}${BLUE}── $* ──${RESET}"; }
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat << EOF
+${BOLD}$SCRIPT_NAME${RESET} — Bootstrap a new server with chezmoi dotfiles
+
+${BOLD}USAGE${RESET}
+ $SCRIPT_NAME --repo [OPTIONS]
+
+${BOLD}REQUIRED${RESET}
+ --repo Git repository URL (HTTPS or SSH)
+
+${BOLD}OPTIONS${RESET}
+ --force Apply changes (default: dry-run)
+ --install-dir Chezmoi install directory (default: /usr/local/bin)
+ --install-age Also install age for encrypted files
+ --age-key Path to age key file (default: ~/.config/chezmoi/key.txt)
+ --packages Comma-separated packages to install first
+ --chezmoi-args Extra arguments to pass to chezmoi init
+ --verbose Show debug output
+ --no-color Disable colored output
+ --help Show this help
+
+${BOLD}EXAMPLES${RESET}
+ # Dry run — see what would happen
+ $SCRIPT_NAME --repo https://github.com/user/dotfiles.git
+
+ # Apply dotfiles from a private repo
+ $SCRIPT_NAME --repo git@github.com:user/dotfiles.git --force
+
+ # Install age + pre-install packages + apply
+ $SCRIPT_NAME --repo git@github.com:user/dotfiles.git \\
+ --install-age --packages vim,tmux,htop --force
+
+ # Custom install dir + verbose
+ $SCRIPT_NAME --repo https://github.com/user/dotfiles.git \\
+ --install-dir \$HOME/.local/bin --verbose --force
+
+${BOLD}ENVIRONMENT VARIABLES${RESET}
+ DRY_RUN Set to false to apply (same as --force)
+ VERBOSE Set to true for debug output
+ COLOR auto | always | never
+ AGE_KEY_PATH Path to age identity file
+EOF
+}
+
+# ── Argument Parsing ──────────────────────────────────────────────────
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --repo) REPO_URL="$2"; shift 2 ;;
+ --force) DRY_RUN="false"; shift ;;
+ --install-dir) INSTALL_DIR="$2"; shift 2 ;;
+ --install-age) INSTALL_AGE="true"; shift ;;
+ --age-key) AGE_KEY_PATH="$2"; shift 2 ;;
+ --packages) PRE_PACKAGES="$2"; shift 2 ;;
+ --chezmoi-args) CHEZMOI_ARGS="$2"; shift 2 ;;
+ --verbose) VERBOSE="true"; shift ;;
+ --no-color) COLOR="never"; shift ;;
+ --help|-h) usage; exit 0 ;;
+ *) err "Unknown option: $1"; usage; exit 1 ;;
+ esac
+ done
+
+ if [[ -z "$REPO_URL" ]]; then
+ err "Missing required --repo argument"
+ usage
+ exit 1
+ fi
+}
+
+# ── System Detection ──────────────────────────────────────────────────
+detect_system() {
+ step "Detecting system"
+
+ OS="$(uname -s)"
+ ARCH="$(uname -m)"
+ HOSTNAME_SHORT="$(hostname -s)"
+
+ if [[ -f /etc/os-release ]]; then
+ # shellcheck disable=SC1091
+ . /etc/os-release
+ DISTRO="${ID:-unknown}"
+ DISTRO_VERSION="${VERSION_ID:-unknown}"
+ else
+ DISTRO="unknown"
+ DISTRO_VERSION="unknown"
+ fi
+
+ # Detect package manager
+ if command -v apt > /dev/null 2>&1; then
+ PKG_MGR="apt"
+ elif command -v dnf > /dev/null 2>&1; then
+ PKG_MGR="dnf"
+ elif command -v yum > /dev/null 2>&1; then
+ PKG_MGR="yum"
+ elif command -v pacman > /dev/null 2>&1; then
+ PKG_MGR="pacman"
+ else
+ PKG_MGR="unknown"
+ fi
+
+ log "OS: $OS ($ARCH)"
+ log "Distro: $DISTRO $DISTRO_VERSION"
+ log "Package manager: $PKG_MGR"
+ log "Hostname: $HOSTNAME_SHORT"
+}
+
+# ── Package Installation ─────────────────────────────────────────────
+install_packages() {
+ local packages="$1"
+ if [[ -z "$packages" ]]; then
+ return 0
+ fi
+
+ step "Installing prerequisite packages"
+
+ # Convert comma-separated to space-separated
+ local pkg_list
+ pkg_list="${packages//,/ }"
+
+ log "Packages: $pkg_list"
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY RUN] Would install: $pkg_list"
+ return 0
+ fi
+
+ case "$PKG_MGR" in
+ apt)
+ sudo apt update -qq
+ # shellcheck disable=SC2086
+ sudo apt install -y -qq $pkg_list
+ ;;
+ dnf|yum)
+ # shellcheck disable=SC2086
+ sudo "$PKG_MGR" install -y -q $pkg_list
+ ;;
+ pacman)
+ # shellcheck disable=SC2086
+ sudo pacman -S --noconfirm $pkg_list
+ ;;
+ *)
+ warn "Unknown package manager — install manually: $pkg_list"
+ ;;
+ esac
+
+ log "Packages installed"
+}
+
+# ── Install chezmoi ───────────────────────────────────────────────────
+install_chezmoi() {
+ step "Installing chezmoi"
+
+ if command -v chezmoi > /dev/null 2>&1; then
+ local current_version
+ current_version="$(chezmoi --version | awk '{print $3}')"
+ log "chezmoi already installed: $current_version"
+ return 0
+ fi
+
+ log "Install directory: $INSTALL_DIR"
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY RUN] Would install chezmoi to $INSTALL_DIR"
+ return 0
+ fi
+
+ mkdir -p "$INSTALL_DIR"
+ sh -c "$(curl -fsLS get.chezmoi.io)" -- -b "$INSTALL_DIR"
+
+ if command -v chezmoi > /dev/null 2>&1; then
+ log "chezmoi installed: $(chezmoi --version | awk '{print $3}')"
+ else
+ # Might not be in PATH yet
+ if [[ -x "$INSTALL_DIR/chezmoi" ]]; then
+ export PATH="$INSTALL_DIR:$PATH"
+ log "chezmoi installed: $(chezmoi --version | awk '{print $3}')"
+ log "Added $INSTALL_DIR to PATH"
+ else
+ err "chezmoi installation failed"
+ return 1
+ fi
+ fi
+}
+
+# ── Install age ───────────────────────────────────────────────────────
+install_age() {
+ if [[ "$INSTALL_AGE" != "true" ]]; then
+ return 0
+ fi
+
+ step "Installing age"
+
+ if command -v age > /dev/null 2>&1; then
+ log "age already installed: $(age --version)"
+ return 0
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY RUN] Would install age"
+ return 0
+ fi
+
+ case "$PKG_MGR" in
+ apt) sudo apt install -y -qq age ;;
+ dnf) sudo dnf install -y -q age ;;
+ *)
+ # Fallback: install from GitHub
+ local age_version
+ age_version="$(curl -s https://api.github.com/repos/FiloSottile/age/releases/latest | grep tag_name | cut -d'"' -f4)"
+ curl -fsSL "https://github.com/FiloSottile/age/releases/download/${age_version}/age-${age_version}-linux-amd64.tar.gz" | \
+ sudo tar -xz -C /usr/local/bin/ --strip-components=1 age/age age/age-keygen
+ ;;
+ esac
+
+ log "age installed: $(age --version)"
+}
+
+# ── Age Key Setup ─────────────────────────────────────────────────────
+setup_age_key() {
+ if [[ "$INSTALL_AGE" != "true" ]]; then
+ return 0
+ fi
+
+ step "Checking age key"
+
+ if [[ -f "$AGE_KEY_PATH" ]]; then
+ log "Age key exists: $AGE_KEY_PATH"
+ return 0
+ fi
+
+ warn "No age key found at $AGE_KEY_PATH"
+ warn "If your dotfiles use encrypted files, create a key:"
+ warn " age-keygen -o $AGE_KEY_PATH"
+ warn "Or copy your existing key from another machine"
+}
+
+# ── Initialize chezmoi ────────────────────────────────────────────────
+init_chezmoi() {
+ step "Initializing chezmoi"
+
+ log "Repository: $REPO_URL"
+
+ if [[ -d "$HOME/.local/share/chezmoi" ]]; then
+ warn "chezmoi source directory already exists"
+ warn "Use 'chezmoi update' to pull latest changes"
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY RUN] Would run: chezmoi update"
+ else
+ log "Running chezmoi update..."
+ chezmoi update -v
+ fi
+ return 0
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY RUN] Would run: chezmoi init --apply $REPO_URL $CHEZMOI_ARGS"
+ return 0
+ fi
+
+ # shellcheck disable=SC2086
+ chezmoi init --apply -v "$REPO_URL" $CHEZMOI_ARGS
+ log "chezmoi initialized and applied"
+}
+
+# ── Verify ────────────────────────────────────────────────────────────
+verify() {
+ step "Verification"
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY RUN] Skipping verification"
+ return 0
+ fi
+
+ if ! command -v chezmoi > /dev/null 2>&1; then
+ err "chezmoi not found in PATH"
+ return 1
+ fi
+
+ local managed_count
+ managed_count="$(chezmoi managed | wc -l)"
+ log "Managed files: $managed_count"
+
+ # List managed files
+ if [[ "$VERBOSE" == "true" ]]; then
+ debug "Managed files:"
+ chezmoi managed | while read -r f; do
+ debug " $f"
+ done
+ fi
+
+ # Check for issues
+ local status_count
+ status_count="$(chezmoi status | wc -l)"
+ if [[ "$status_count" -gt 0 ]]; then
+ warn "$status_count files differ from source:"
+ chezmoi status
+ else
+ log "All managed files match source"
+ fi
+
+ # Run chezmoi doctor
+ debug "Running chezmoi doctor..."
+ if [[ "$VERBOSE" == "true" ]]; then
+ chezmoi doctor || true
+ fi
+}
+
+# ── Summary ───────────────────────────────────────────────────────────
+summary() {
+ step "Summary"
+
+ echo ""
+ echo -e " ${BOLD}Hostname:${RESET} $HOSTNAME_SHORT"
+ echo -e " ${BOLD}Distro:${RESET} $DISTRO $DISTRO_VERSION"
+ echo -e " ${BOLD}Repository:${RESET} $REPO_URL"
+ echo -e " ${BOLD}chezmoi:${RESET} $(command -v chezmoi 2>/dev/null || echo 'not installed')"
+
+ if command -v age > /dev/null 2>&1; then
+ echo -e " ${BOLD}age:${RESET} $(age --version)"
+ fi
+
+ if [[ -d "$HOME/.local/share/chezmoi" ]]; then
+ echo -e " ${BOLD}Source dir:${RESET} $HOME/.local/share/chezmoi"
+ echo -e " ${BOLD}Managed:${RESET} $(chezmoi managed 2>/dev/null | wc -l) files"
+ fi
+
+ echo ""
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo -e " ${YELLOW}${BOLD}DRY RUN${RESET} — no changes were made."
+ echo -e " Run with ${BOLD}--force${RESET} to apply."
+ elif [[ "$ERRORS" -gt 0 ]]; then
+ echo -e " ${RED}${BOLD}Completed with $ERRORS error(s)${RESET}"
+ else
+ echo -e " ${GREEN}${BOLD}Bootstrap complete${RESET}"
+ fi
+
+ echo ""
+}
+
+# ── Main ──────────────────────────────────────────────────────────────
+main() {
+ parse_args "$@"
+ setup_colors
+
+ echo ""
+ echo -e "${BOLD}$SCRIPT_NAME${RESET} — chezmoi dotfile bootstrap"
+ echo ""
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "Running in ${YELLOW}DRY RUN${RESET} mode (use --force to apply)"
+ fi
+
+ detect_system
+ install_packages "$PRE_PACKAGES"
+ install_chezmoi
+ install_age
+ setup_age_key
+ init_chezmoi
+ verify
+ summary
+
+ [[ "$ERRORS" -gt 0 ]] && exit 1
+ exit 0
+}
+
+main "$@"
diff --git a/cisa-kev-monitor.sh b/cisa-kev-monitor.sh
new file mode 100644
index 0000000..dd70212
--- /dev/null
+++ b/cisa-kev-monitor.sh
@@ -0,0 +1,575 @@
+#!/usr/bin/env bash
+
+##########################################################################################
+#### cisa-kev-monitor.sh — Monitor CISA Known Exploited Vulnerabilities catalog ####
+#### Polls the KEV JSON feed, detects new entries, alerts via email/Slack/Telegram ####
+#### Requires: bash 4+, curl, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./cisa-kev-monitor.sh ####
+#### ./cisa-kev-monitor.sh --filter linux,kernel ####
+#### ./cisa-kev-monitor.sh --telegram --filter linux ####
+#### ####
+#### See --help for all options. ####
+##########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+KEV_URL="https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json"
+STATE_DIR="${KEV_STATE_DIR:-${HOME:-/tmp}/.cisa-kev-monitor}"
+STATE_FILE="$STATE_DIR/known-cves.txt"
+FILTER_KEYWORDS="${KEV_FILTER:-}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# Notification channels
+SMTP_TO="${KEV_SMTP_TO:-}"
+SMTP_FROM="${KEV_SMTP_FROM:-cisa-kev-monitor@$(hostname -f 2>/dev/null || echo localhost)}"
+SLACK_WEBHOOK="${KEV_SLACK_WEBHOOK:-}"
+TELEGRAM_BOT_TOKEN="${KEV_TELEGRAM_BOT_TOKEN:-}"
+TELEGRAM_CHAT_ID="${KEV_TELEGRAM_CHAT_ID:-}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+NEW_CVES=()
+NEW_COUNT=0
+TOTAL_COUNT=0
+START_TIME=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then return; fi
+ if [[ "$COLOR" == "auto" && ! -t 1 ]]; then return; fi
+ RED="\033[0;31m"
+ GREEN="\033[0;32m"
+ YELLOW="\033[0;33m"
+ BLUE="\033[0;34m"
+ BOLD="\033[1m"
+ DIM="\033[2m"
+ RESET="\033[0m"
+}
+
+# ── Helpers ───────────────────────────────────────────────────────────
+die() { printf "%b\n" "${RED}[ERROR]${RESET} $*" >&2; exit 1; }
+log_info() { printf "%b\n" "${GREEN}[INFO]${RESET} $*"; }
+log_warn() { printf "%b\n" "${YELLOW}[WARN]${RESET} $*"; }
+log_verbose() { [[ "$VERBOSE" == "true" ]] && printf "%b\n" "${DIM}[DEBUG]${RESET} $*" || true; }
+
+usage() {
+ cat <&1 | logger -t kev-monitor
+EOF
+ exit 0
+}
+
+# ── Dependency Check ──────────────────────────────────────────────────
+check_deps() {
+ local missing=()
+ for cmd in curl jq; do
+ if ! command -v "$cmd" &>/dev/null; then
+ missing+=("$cmd")
+ fi
+ done
+ if [[ ${#missing[@]} -gt 0 ]]; then
+ die "Missing required commands: ${missing[*]}"
+ fi
+}
+
+# ── Argument Parsing ──────────────────────────────────────────────────
+DRY_RUN="false"
+LIST_MODE="false"
+LIST_NEW_DAYS=""
+STATS_MODE="false"
+RESET_MODE="false"
+NOTIFY_EMAIL="false"
+NOTIFY_SLACK="false"
+NOTIFY_TELEGRAM="false"
+
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --filter) FILTER_KEYWORDS="${2:?--filter requires keywords}"; shift 2 ;;
+ --email) SMTP_TO="${2:?--email requires an address}"; NOTIFY_EMAIL="true"; shift 2 ;;
+ --slack) SLACK_WEBHOOK="${2:?--slack requires a webhook URL}"; NOTIFY_SLACK="true"; shift 2 ;;
+ --telegram) NOTIFY_TELEGRAM="true"; shift ;;
+ --list) LIST_MODE="true"; shift ;;
+ --list-new) LIST_NEW_DAYS="${2:?--list-new requires days}"; shift 2 ;;
+ --stats) STATS_MODE="true"; shift ;;
+ --state-dir) STATE_DIR="${2:?--state-dir requires a path}"; STATE_FILE="$STATE_DIR/known-cves.txt"; shift 2 ;;
+ --reset) RESET_MODE="true"; shift ;;
+ --dry-run) DRY_RUN="true"; shift ;;
+ --verbose) VERBOSE="true"; shift ;;
+ --no-color) COLOR="never"; shift ;;
+ --help) usage ;;
+ *) die "Unknown option: $1" ;;
+ esac
+ done
+
+ if [[ "$NOTIFY_TELEGRAM" == "true" ]]; then
+ [[ -z "$TELEGRAM_BOT_TOKEN" ]] && die "KEV_TELEGRAM_BOT_TOKEN not set"
+ [[ -z "$TELEGRAM_CHAT_ID" ]] && die "KEV_TELEGRAM_CHAT_ID not set"
+ fi
+}
+
+# ── Fetch KEV Feed ────────────────────────────────────────────────────
+fetch_kev() {
+ log_verbose "Fetching KEV catalog from CISA..."
+ local tmpfile
+ tmpfile=$(mktemp)
+
+ if ! curl -sS --max-time 30 --retry 2 -o "$tmpfile" "$KEV_URL" 2>/dev/null; then
+ rm -f "$tmpfile"
+ die "Failed to fetch KEV catalog from $KEV_URL"
+ fi
+
+ # Validate JSON
+ if ! jq empty "$tmpfile" 2>/dev/null; then
+ rm -f "$tmpfile"
+ die "Invalid JSON received from KEV feed"
+ fi
+
+ echo "$tmpfile"
+}
+
+# ── Filter Entries ────────────────────────────────────────────────────
+filter_entries() {
+ local json_file="$1"
+
+ if [[ -z "$FILTER_KEYWORDS" ]]; then
+ jq -r '.vulnerabilities[]' "$json_file"
+ return
+ fi
+
+ # Build jq filter from comma-separated keywords
+ local jq_filter=""
+ IFS=',' read -ra keywords <<< "$FILTER_KEYWORDS"
+ for kw in "${keywords[@]}"; do
+ kw=$(echo "$kw" | xargs) # trim whitespace
+ kw_lower=$(echo "$kw" | tr '[:upper:]' '[:lower:]')
+ if [[ -n "$jq_filter" ]]; then
+ jq_filter="$jq_filter or"
+ fi
+ jq_filter="$jq_filter ((.vendorProject // \"\" | ascii_downcase | contains(\"$kw_lower\")) or (.product // \"\" | ascii_downcase | contains(\"$kw_lower\")) or (.shortDescription // \"\" | ascii_downcase | contains(\"$kw_lower\")) or (.vulnerabilityName // \"\" | ascii_downcase | contains(\"$kw_lower\")))"
+ done
+
+ jq -r ".vulnerabilities[] | select($jq_filter)" "$json_file"
+}
+
+# ── Initialize State ─────────────────────────────────────────────────
+init_state() {
+ mkdir -p "$STATE_DIR"
+
+ if [[ "$RESET_MODE" == "true" && -f "$STATE_FILE" ]]; then
+ rm -f "$STATE_FILE"
+ log_info "State file reset"
+ fi
+
+ if [[ ! -f "$STATE_FILE" ]]; then
+ log_info "First run — initializing state file"
+ return 1
+ fi
+ return 0
+}
+
+# ── Format CVE for Display ────────────────────────────────────────────
+format_cve_text() {
+ local cve="$1"
+ local cve_id vendor product name date_added desc due_date ransomware
+
+ cve_id=$(echo "$cve" | jq -r '.cveID')
+ vendor=$(echo "$cve" | jq -r '.vendorProject')
+ product=$(echo "$cve" | jq -r '.product')
+ name=$(echo "$cve" | jq -r '.vulnerabilityName')
+ date_added=$(echo "$cve" | jq -r '.dateAdded')
+ desc=$(echo "$cve" | jq -r '.shortDescription')
+ due_date=$(echo "$cve" | jq -r '.dueDate')
+ ransomware=$(echo "$cve" | jq -r '.knownRansomwareCampaignUse')
+
+ printf "%b%s%b — %s\n" "$BOLD" "$cve_id" "$RESET" "$name"
+ printf " Vendor: %s / %s\n" "$vendor" "$product"
+ printf " Added: %s\n" "$date_added"
+ printf " Due: %s\n" "$due_date"
+ printf " Ransomware: %s\n" "$ransomware"
+ printf " %s\n" "$desc"
+ printf " NVD: https://nvd.nist.gov/vuln/detail/%s\n" "$cve_id"
+ echo ""
+}
+
+# ── Format CVE for Notifications ──────────────────────────────────────
+format_cve_plain() {
+ local cve="$1"
+ local cve_id vendor product name date_added desc
+
+ cve_id=$(echo "$cve" | jq -r '.cveID')
+ vendor=$(echo "$cve" | jq -r '.vendorProject')
+ product=$(echo "$cve" | jq -r '.product')
+ name=$(echo "$cve" | jq -r '.vulnerabilityName')
+ date_added=$(echo "$cve" | jq -r '.dateAdded')
+ desc=$(echo "$cve" | jq -r '.shortDescription')
+
+ echo "$cve_id — $name"
+ echo "Vendor: $vendor / $product"
+ echo "Added: $date_added"
+ echo "$desc"
+ echo "https://nvd.nist.gov/vuln/detail/$cve_id"
+ echo ""
+}
+
+format_cve_telegram() {
+ local cve="$1"
+ local cve_id vendor product name date_added desc ransomware
+
+ cve_id=$(echo "$cve" | jq -r '.cveID')
+ vendor=$(echo "$cve" | jq -r '.vendorProject')
+ product=$(echo "$cve" | jq -r '.product')
+ name=$(echo "$cve" | jq -r '.vulnerabilityName')
+ date_added=$(echo "$cve" | jq -r '.dateAdded')
+ desc=$(echo "$cve" | jq -r '.shortDescription' | head -c 200)
+ ransomware=$(echo "$cve" | jq -r '.knownRansomwareCampaignUse')
+
+ local emoji="🔴"
+ [[ "$ransomware" == "Known" ]] && emoji="🔴🛑"
+
+ echo "${emoji} ${cve_id} — ${name}"
+ echo "📦 ${vendor} / ${product}"
+ echo "📅 Added: ${date_added}"
+ [[ "$ransomware" == "Known" ]] && echo "💀 Known ransomware use"
+ echo ""
+ echo "${desc}..."
+ echo ""
+ echo "🔗 NVD"
+}
+
+# ── Notification: Email ───────────────────────────────────────────────
+send_email() {
+ local subject="$1"
+ local body="$2"
+
+ if ! command -v sendmail &>/dev/null && ! command -v msmtp &>/dev/null; then
+ log_warn "No sendmail or msmtp found — skipping email"
+ return
+ fi
+
+ local mailer="sendmail"
+ command -v msmtp &>/dev/null && mailer="msmtp"
+
+ {
+ echo "From: $SMTP_FROM"
+ echo "To: $SMTP_TO"
+ echo "Subject: $subject"
+ echo "Content-Type: text/plain; charset=utf-8"
+ echo ""
+ echo "$body"
+ } | "$mailer" -t "$SMTP_TO"
+
+ log_verbose "Email sent to $SMTP_TO"
+}
+
+# ── Notification: Slack ───────────────────────────────────────────────
+send_slack() {
+ local text="$1"
+
+ # Truncate for Slack's 3000 char limit
+ text=$(echo "$text" | head -c 2900)
+
+ local payload
+ payload=$(jq -n --arg text "$text" '{text: $text}')
+
+ curl -sS --max-time 10 -X POST \
+ -H "Content-Type: application/json" \
+ -d "$payload" \
+ "$SLACK_WEBHOOK" >/dev/null 2>&1
+
+ log_verbose "Slack notification sent"
+}
+
+# ── Notification: Telegram ────────────────────────────────────────────
+send_telegram() {
+ local text="$1"
+
+ # Telegram message limit is 4096 chars
+ text=$(echo "$text" | head -c 4000)
+
+ curl -sS --max-time 10 -X POST \
+ "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
+ -d "chat_id=${TELEGRAM_CHAT_ID}" \
+ -d "parse_mode=HTML" \
+ -d "disable_web_page_preview=true" \
+ --data-urlencode "text=$text" >/dev/null 2>&1
+
+ log_verbose "Telegram notification sent"
+}
+
+# ── Notify All Channels ──────────────────────────────────────────────
+notify() {
+ local count=${#NEW_CVES[@]}
+ local filter_label=""
+ [[ -n "$FILTER_KEYWORDS" ]] && filter_label=" (filter: $FILTER_KEYWORDS)"
+
+ # Build plain text body
+ local plain_body=""
+ plain_body+="CISA KEV Monitor — $count new CVE(s) detected${filter_label}"
+ plain_body+=$'\n\n'
+ for cve_json in "${NEW_CVES[@]}"; do
+ plain_body+=$(format_cve_plain "$cve_json")
+ plain_body+=$'\n'
+ done
+
+ # Build Telegram body
+ local tg_body=""
+ tg_body+="🚨 CISA KEV — ${count} new CVE(s)${filter_label}"
+ tg_body+=$'\n\n'
+ for cve_json in "${NEW_CVES[@]}"; do
+ tg_body+=$(format_cve_telegram "$cve_json")
+ tg_body+=$'\n'
+ done
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log_warn "DRY-RUN — would send notifications to:"
+ [[ "$NOTIFY_EMAIL" == "true" ]] && echo " Email: $SMTP_TO"
+ [[ "$NOTIFY_SLACK" == "true" ]] && echo " Slack: (webhook configured)"
+ [[ "$NOTIFY_TELEGRAM" == "true" ]] && echo " Telegram: chat $TELEGRAM_CHAT_ID"
+ return
+ fi
+
+ [[ "$NOTIFY_EMAIL" == "true" ]] && send_email "CISA KEV: $count new CVE(s)${filter_label}" "$plain_body"
+ [[ "$NOTIFY_SLACK" == "true" ]] && send_slack "$plain_body"
+ [[ "$NOTIFY_TELEGRAM" == "true" ]] && send_telegram "$tg_body"
+}
+
+# ── Mode: Stats ───────────────────────────────────────────────────────
+run_stats() {
+ local json_file
+ json_file=$(fetch_kev)
+
+ local total last_updated
+ total=$(jq '.vulnerabilities | length' "$json_file")
+ last_updated=$(jq -r '.catalogVersion' "$json_file")
+
+ local last_7d last_30d
+ local cutoff_7d cutoff_30d
+ cutoff_7d=$(date -u -d "7 days ago" '+%Y-%m-%d' 2>/dev/null || date -u -v-7d '+%Y-%m-%d' 2>/dev/null)
+ cutoff_30d=$(date -u -d "30 days ago" '+%Y-%m-%d' 2>/dev/null || date -u -v-30d '+%Y-%m-%d' 2>/dev/null)
+
+ last_7d=$(jq --arg d "$cutoff_7d" '[.vulnerabilities[] | select(.dateAdded >= $d)] | length' "$json_file")
+ last_30d=$(jq --arg d "$cutoff_30d" '[.vulnerabilities[] | select(.dateAdded >= $d)] | length' "$json_file")
+
+ local ransomware_known
+ ransomware_known=$(jq '[.vulnerabilities[] | select(.knownRansomwareCampaignUse == "Known")] | length' "$json_file")
+
+ echo ""
+ printf "%bCISA KEV Catalog Statistics%b\n" "$BOLD" "$RESET"
+ echo "Catalog version: $last_updated"
+ echo "Total CVEs: $total"
+ echo "Last 7 days: $last_7d"
+ echo "Last 30 days: $last_30d"
+ echo "Ransomware use: $ransomware_known"
+
+ if [[ -n "$FILTER_KEYWORDS" ]]; then
+ local filtered
+ filtered=$(filter_entries "$json_file" | jq -s 'length')
+ echo "Matching filter: $filtered (keywords: $FILTER_KEYWORDS)"
+ fi
+
+ rm -f "$json_file"
+}
+
+# ── Mode: List New ────────────────────────────────────────────────────
+run_list_new() {
+ local days="$1"
+ local json_file
+ json_file=$(fetch_kev)
+
+ local cutoff
+ cutoff=$(date -u -d "$days days ago" '+%Y-%m-%d' 2>/dev/null || date -u -v-"${days}d" '+%Y-%m-%d' 2>/dev/null)
+
+ echo ""
+ printf "%bCISA KEV — entries added in the last %s days%b\n\n" "$BOLD" "$days" "$RESET"
+
+ local count=0
+ while IFS= read -r entry; do
+ [[ -z "$entry" ]] && continue
+ local date_added
+ date_added=$(echo "$entry" | jq -r '.dateAdded')
+ if [[ "$date_added" > "$cutoff" || "$date_added" == "$cutoff" ]]; then
+ format_cve_text "$entry"
+ count=$((count + 1))
+ fi
+ done < <(filter_entries "$json_file" | jq -c '.')
+
+ log_info "$count entries found"
+ rm -f "$json_file"
+}
+
+# ── Mode: List ────────────────────────────────────────────────────────
+run_list() {
+ local json_file
+ json_file=$(fetch_kev)
+
+ echo ""
+ printf "%bCISA KEV — all matching entries%b\n" "$BOLD" "$RESET"
+ [[ -n "$FILTER_KEYWORDS" ]] && echo "Filter: $FILTER_KEYWORDS"
+ echo ""
+
+ local count=0
+ while IFS= read -r entry; do
+ [[ -z "$entry" ]] && continue
+ format_cve_text "$entry"
+ count=$((count + 1))
+ done < <(filter_entries "$json_file" | jq -c '.')
+
+ log_info "$count entries"
+ rm -f "$json_file"
+}
+
+# ── Mode: Monitor ─────────────────────────────────────────────────────
+run_monitor() {
+ local json_file
+ json_file=$(fetch_kev)
+
+ TOTAL_COUNT=$(jq '.vulnerabilities | length' "$json_file")
+
+ # Initialize state on first run
+ if ! init_state; then
+ jq -r '.vulnerabilities[].cveID' "$json_file" | sort > "$STATE_FILE"
+ local init_count
+ init_count=$(wc -l < "$STATE_FILE")
+ log_info "Initialized with $init_count CVEs. Future runs will detect new entries."
+ rm -f "$json_file"
+ return
+ fi
+
+ # Extract current CVE IDs
+ local current_cves
+ current_cves=$(mktemp)
+ jq -r '.vulnerabilities[].cveID' "$json_file" | sort > "$current_cves"
+
+ # Find new CVEs not in state file
+ local new_ids
+ new_ids=$(comm -13 "$STATE_FILE" "$current_cves")
+
+ if [[ -z "$new_ids" ]]; then
+ log_info "No new KEV entries (catalog: $TOTAL_COUNT CVEs)"
+ rm -f "$current_cves" "$json_file"
+ return
+ fi
+
+ # Collect new CVE details, applying filter
+ while IFS= read -r cve_id; do
+ [[ -z "$cve_id" ]] && continue
+
+ local cve_json
+ cve_json=$(jq -c --arg id "$cve_id" '.vulnerabilities[] | select(.cveID == $id)' "$json_file")
+
+ # Apply filter if set
+ if [[ -n "$FILTER_KEYWORDS" ]]; then
+ local matches="false"
+ IFS=',' read -ra keywords <<< "$FILTER_KEYWORDS"
+ for kw in "${keywords[@]}"; do
+ kw=$(echo "$kw" | xargs | tr '[:upper:]' '[:lower:]')
+ if echo "$cve_json" | tr '[:upper:]' '[:lower:]' | grep -q "$kw"; then
+ matches="true"
+ break
+ fi
+ done
+ [[ "$matches" == "false" ]] && continue
+ fi
+
+ NEW_CVES+=("$cve_json")
+ format_cve_text "$cve_json"
+ done <<< "$new_ids"
+
+ NEW_COUNT=${#NEW_CVES[@]}
+
+ # Update state file with all current CVEs
+ mv "$current_cves" "$STATE_FILE"
+
+ if [[ $NEW_COUNT -eq 0 ]]; then
+ local total_new
+ total_new=$(echo "$new_ids" | wc -w)
+ log_info "No new entries matching filter (${total_new} new total, $TOTAL_COUNT in catalog)"
+ rm -f "$json_file"
+ return
+ fi
+
+ log_info "$NEW_COUNT new KEV entry/entries matching filter"
+
+ # Send notifications
+ if [[ "$NOTIFY_EMAIL" == "true" || "$NOTIFY_SLACK" == "true" || "$NOTIFY_TELEGRAM" == "true" ]]; then
+ notify
+ fi
+
+ rm -f "$json_file"
+}
+
+# ── Entry Point ───────────────────────────────────────────────────────
+main() {
+ START_TIME=$(date +%s)
+ setup_colors
+ parse_args "$@"
+ check_deps
+
+ if [[ "$STATS_MODE" == "true" ]]; then
+ run_stats
+ elif [[ -n "$LIST_NEW_DAYS" ]]; then
+ run_list_new "$LIST_NEW_DAYS"
+ elif [[ "$LIST_MODE" == "true" ]]; then
+ run_list
+ else
+ run_monitor
+ fi
+
+ local elapsed=$(( $(date +%s) - START_TIME ))
+ log_verbose "Completed in ${elapsed}s"
+}
+
+main "$@"
diff --git a/clickhouse-exporter.sh b/clickhouse-exporter.sh
new file mode 100644
index 0000000..dccfbf3
--- /dev/null
+++ b/clickhouse-exporter.sh
@@ -0,0 +1,350 @@
+#!/usr/bin/env bash
+################################################################################
+# Script Name: clickhouse-exporter.sh
+# Version: 1.0
+# Description: Prometheus textfile exporter for ClickHouse. Pulls metrics from
+# the native Prometheus endpoint (/metrics on port 9363) and writes
+# a filtered subset to a .prom file for node_exporter's textfile
+# collector. Keeps original ClickHouse metric names for community
+# dashboard compatibility.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - curl
+# - ClickHouse Prometheus endpoint enabled (port 9363)
+#
+# Usage:
+# ./clickhouse-exporter.sh
+# ./clickhouse-exporter.sh --textfile
+# ./clickhouse-exporter.sh --http
+# CLICKHOUSE_URL="http://ch-node:9363" ./clickhouse-exporter.sh --textfile
+#
+# Parameters:
+# --textfile Write to textfile collector directory
+# --http Run as HTTP server
+# --install Create cron job for automatic collection
+# --help Show usage
+#
+# Environment:
+# CLICKHOUSE_URL ClickHouse Prometheus endpoint (default: http://localhost:9363)
+# METRICS_PATH Metrics path (default: /metrics)
+# TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter/textfile_collector)
+# CURL_TIMEOUT Request timeout in seconds (default: 10)
+#
+# Metrics Exported (Tier 2 — ~30 key metrics, original ClickHouse names):
+#
+# Gauges (ClickHouseMetrics_*):
+# - ClickHouseMetrics_Query
+# - ClickHouseMetrics_Merge
+# - ClickHouseMetrics_MemoryTracking
+# - ClickHouseMetrics_TCPConnection
+# - ClickHouseMetrics_HTTPConnection
+# - ClickHouseMetrics_OpenFileForRead
+# - ClickHouseMetrics_OpenFileForWrite
+# - ClickHouseMetrics_ReplicasMaxQueueSize
+# - ClickHouseMetrics_BackgroundMergesAndMutationsPoolTask
+# - ClickHouseMetrics_DelayedInserts
+#
+# Counters (ClickHouseProfileEvents_*):
+# - ClickHouseProfileEvents_Query
+# - ClickHouseProfileEvents_SelectQuery
+# - ClickHouseProfileEvents_InsertQuery
+# - ClickHouseProfileEvents_FailedQuery
+# - ClickHouseProfileEvents_InsertedRows
+# - ClickHouseProfileEvents_InsertedBytes
+# - ClickHouseProfileEvents_MergedRows
+# - ClickHouseProfileEvents_ReadCompressedBytes
+# - ClickHouseProfileEvents_CompressedReadBufferBytes
+# - ClickHouseProfileEvents_ReplicatedPartFetches
+# - ClickHouseProfileEvents_ReplicatedPartFailedFetches
+# - ClickHouseProfileEvents_DiskReadElapsedMicroseconds
+# - ClickHouseProfileEvents_DiskWriteElapsedMicroseconds
+# - ClickHouseProfileEvents_NetworkSendBytes
+# - ClickHouseProfileEvents_NetworkReceiveBytes
+# - ClickHouseProfileEvents_ZooKeeperTransactions
+# - ClickHouseProfileEvents_DNSError
+#
+# Async Metrics (ClickHouseAsyncMetrics_*):
+# - ClickHouseAsyncMetrics_Uptime
+# - ClickHouseAsyncMetrics_MaxPartCountForPartition
+# - ClickHouseAsyncMetrics_MemoryResident
+# - ClickHouseAsyncMetrics_ReplicasMaxAbsoluteDelay
+#
+# Exporter:
+# - clickhouse_exporter_up
+# - clickhouse_exporter_duration_seconds
+# - clickhouse_exporter_last_run_timestamp
+#
+################################################################################
+
+set -euo pipefail
+
+# --- Configuration ---
+readonly VERSION="1.0"
+readonly SCRIPT_NAME="$(basename "$0")"
+CLICKHOUSE_URL="${CLICKHOUSE_URL:-http://localhost:9363}"
+METRICS_PATH="${METRICS_PATH:-/metrics}"
+TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}"
+CURL_TIMEOUT="${CURL_TIMEOUT:-10}"
+TEXTFILE_MODE=false
+HTTP_MODE=false
+HTTP_PORT=9201
+OUTPUT=""
+START_TIME=""
+
+# Tier 2 metric filter — grep pattern (one metric name per line)
+readonly METRIC_FILTER='ClickHouseMetrics_Query[[:space:]]
+ClickHouseMetrics_Merge[[:space:]]
+ClickHouseMetrics_MemoryTracking[[:space:]]
+ClickHouseMetrics_TCPConnection[[:space:]]
+ClickHouseMetrics_HTTPConnection[[:space:]]
+ClickHouseMetrics_OpenFileForRead[[:space:]]
+ClickHouseMetrics_OpenFileForWrite[[:space:]]
+ClickHouseMetrics_ReplicasMaxQueueSize[[:space:]]
+ClickHouseMetrics_BackgroundMergesAndMutationsPoolTask[[:space:]]
+ClickHouseMetrics_DelayedInserts[[:space:]]
+ClickHouseProfileEvents_Query[[:space:]]
+ClickHouseProfileEvents_SelectQuery[[:space:]]
+ClickHouseProfileEvents_InsertQuery[[:space:]]
+ClickHouseProfileEvents_FailedQuery[[:space:]]
+ClickHouseProfileEvents_InsertedRows[[:space:]]
+ClickHouseProfileEvents_InsertedBytes[[:space:]]
+ClickHouseProfileEvents_MergedRows[[:space:]]
+ClickHouseProfileEvents_ReadCompressedBytes[[:space:]]
+ClickHouseProfileEvents_CompressedReadBufferBytes[[:space:]]
+ClickHouseProfileEvents_ReplicatedPartFetches[[:space:]]
+ClickHouseProfileEvents_ReplicatedPartFailedFetches[[:space:]]
+ClickHouseProfileEvents_DiskReadElapsedMicroseconds[[:space:]]
+ClickHouseProfileEvents_DiskWriteElapsedMicroseconds[[:space:]]
+ClickHouseProfileEvents_NetworkSendBytes[[:space:]]
+ClickHouseProfileEvents_NetworkReceiveBytes[[:space:]]
+ClickHouseProfileEvents_ZooKeeperTransactions[[:space:]]
+ClickHouseProfileEvents_DNSError[[:space:]]
+ClickHouseAsyncMetrics_Uptime[[:space:]]
+ClickHouseAsyncMetrics_MaxPartCountForPartition[[:space:]]
+ClickHouseAsyncMetrics_MemoryResident[[:space:]]
+ClickHouseAsyncMetrics_ReplicasMaxAbsoluteDelay[[:space:]]'
+
+# --- Functions ---
+
+usage() {
+ cat </dev/null; then
+ echo "# ERROR: curl is required" >&2
+ echo "# Install with: apt install curl OR dnf install curl" >&2
+ exit 1
+ fi
+}
+
+collect_metrics() {
+ local raw
+ raw=$(curl -sf --max-time "$CURL_TIMEOUT" \
+ "${CLICKHOUSE_URL}${METRICS_PATH}" 2>/dev/null) || {
+ OUTPUT+="# HELP clickhouse_exporter_up ClickHouse Prometheus endpoint reachability (1=up, 0=down)
+# TYPE clickhouse_exporter_up gauge
+clickhouse_exporter_up 0
+"
+ return 1
+ }
+
+ OUTPUT+="# HELP clickhouse_exporter_up ClickHouse Prometheus endpoint reachability (1=up, 0=down)
+# TYPE clickhouse_exporter_up gauge
+clickhouse_exporter_up 1
+"
+
+ # Filter raw metrics to Tier 2 subset
+ # Include HELP and TYPE lines for matched metrics, plus the value lines
+ local filtered
+ filtered=$(echo "$raw" | grep -E "$METRIC_FILTER" || true)
+
+ if [[ -z "$filtered" ]]; then
+ return 0
+ fi
+
+ # For each matched metric, also grab its HELP and TYPE lines
+ local seen_metrics=""
+ while IFS= read -r line; do
+ # Extract metric name (before space or brace)
+ local metric_name
+ metric_name=$(echo "$line" | awk '{print $1}' | sed 's/{.*//')
+
+ # Add HELP/TYPE lines if we haven't seen this metric yet
+ if [[ ! "$seen_metrics" == *"|${metric_name}|"* ]]; then
+ local help_line type_line
+ help_line=$(echo "$raw" | grep "^# HELP ${metric_name} " || true)
+ type_line=$(echo "$raw" | grep "^# TYPE ${metric_name} " || true)
+ if [[ -n "$help_line" ]]; then
+ OUTPUT+="${help_line}
+"
+ fi
+ if [[ -n "$type_line" ]]; then
+ OUTPUT+="${type_line}
+"
+ fi
+ seen_metrics+="|${metric_name}|"
+ fi
+
+ OUTPUT+="${line}
+"
+ done <<< "$filtered"
+
+ return 0
+}
+
+# --- Output ---
+
+write_output() {
+ if [[ "$TEXTFILE_MODE" == true ]]; then
+ local output_file="${TEXTFILE_DIR}/clickhouse.prom"
+ local temp_file="${output_file}.$$"
+
+ mkdir -p "$TEXTFILE_DIR"
+ echo "$OUTPUT" > "$temp_file"
+ mv "$temp_file" "$output_file"
+ echo "# Wrote metrics to ${output_file}" >&2
+ else
+ echo "$OUTPUT"
+ fi
+}
+
+serve_http() {
+ if ! command -v nc &>/dev/null && ! command -v ncat &>/dev/null; then
+ echo "# ERROR: nc (netcat) or ncat required for HTTP mode" >&2
+ exit 1
+ fi
+
+ echo "# ClickHouse exporter listening on port ${HTTP_PORT}" >&2
+ echo "# Metrics endpoint: http://localhost:${HTTP_PORT}/metrics" >&2
+
+ local nc_cmd="nc"
+ if command -v ncat &>/dev/null; then
+ nc_cmd="ncat"
+ fi
+
+ while true; do
+ OUTPUT=""
+ START_TIME=$(date +%s%N)
+
+ collect_metrics
+
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ OUTPUT+="# HELP clickhouse_exporter_duration_seconds Time to collect and filter metrics
+# TYPE clickhouse_exporter_duration_seconds gauge
+clickhouse_exporter_duration_seconds ${duration}
+# HELP clickhouse_exporter_last_run_timestamp Unix timestamp of last successful run
+# TYPE clickhouse_exporter_last_run_timestamp gauge
+clickhouse_exporter_last_run_timestamp $(date +%s)
+"
+
+ local content_length=${#OUTPUT}
+ local response="HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\nContent-Length: ${content_length}\r\nConnection: close\r\n\r\n${OUTPUT}"
+
+ echo -e "$response" | $nc_cmd -l -p "$HTTP_PORT" -q 1 2>/dev/null || \
+ echo -e "$response" | $nc_cmd -l "$HTTP_PORT" -c 2>/dev/null || \
+ echo -e "$response" | $nc_cmd -l -p "$HTTP_PORT" 2>/dev/null || true
+ done
+}
+
+install_cron() {
+ if [[ $EUID -ne 0 ]]; then
+ echo "# ERROR: --install requires root" >&2
+ exit 1
+ fi
+
+ local script_path
+ script_path=$(readlink -f "$0")
+
+ cat > /etc/cron.d/clickhouse-exporter </dev/null
+EOF
+
+ chmod 644 /etc/cron.d/clickhouse-exporter
+ echo "# Installed cron job: /etc/cron.d/clickhouse-exporter" >&2
+ echo "# Metrics will be written to: ${TEXTFILE_DIR}/clickhouse.prom" >&2
+}
+
+# --- Main ---
+
+main() {
+ for arg in "$@"; do
+ case "$arg" in
+ --textfile) TEXTFILE_MODE=true ;;
+ --http) HTTP_MODE=true ;;
+ -p|--port) shift; HTTP_PORT="${1:-$HTTP_PORT}" ;;
+ --install)
+ check_dependencies
+ install_cron
+ exit 0
+ ;;
+ --help|-h) usage ;;
+ *) ;;
+ esac
+ done
+
+ check_dependencies
+
+ if [[ "$HTTP_MODE" == true ]]; then
+ serve_http
+ exit 0
+ fi
+
+ START_TIME=$(date +%s%N)
+
+ collect_metrics
+
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ OUTPUT+="# HELP clickhouse_exporter_duration_seconds Time to collect and filter metrics
+# TYPE clickhouse_exporter_duration_seconds gauge
+clickhouse_exporter_duration_seconds ${duration}
+# HELP clickhouse_exporter_last_run_timestamp Unix timestamp of last successful run
+# TYPE clickhouse_exporter_last_run_timestamp gauge
+clickhouse_exporter_last_run_timestamp $(date +%s)
+"
+
+ write_output
+}
+
+main "$@"
diff --git a/config-backup.sh b/config-backup.sh
new file mode 100644
index 0000000..90c818a
--- /dev/null
+++ b/config-backup.sh
@@ -0,0 +1,507 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### config-backup.sh — Snapshot system configs into a timestamped tarball ####
+#### Backs up /etc, crontabs, package lists, systemd units, and firewall rules ####
+#### Dry-run by default — nothing is written without --force ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./config-backup.sh ####
+#### ./config-backup.sh --force ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+BACKUP_DIR="${BACKUP_DIR:-/var/backups/config-snapshots}"
+DRY_RUN="${DRY_RUN:-true}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+INCLUDE_PATHS=()
+EXCLUDE_PATHS=()
+STAGING_DIR=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${DIM}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+human_bytes() {
+ local bytes="$1"
+ if [[ "$bytes" -ge 1073741824 ]]; then
+ awk "BEGIN { printf \"%.1f GiB\", $bytes / 1073741824 }"
+ elif [[ "$bytes" -ge 1048576 ]]; then
+ awk "BEGIN { printf \"%.1f MiB\", $bytes / 1048576 }"
+ elif [[ "$bytes" -ge 1024 ]]; then
+ awk "BEGIN { printf \"%.1f KiB\", $bytes / 1024 }"
+ else
+ echo "${bytes} B"
+ fi
+}
+
+cleanup_staging() {
+ if [[ -n "$STAGING_DIR" && -d "$STAGING_DIR" ]]; then
+ rm -rf "$STAGING_DIR"
+ verbose "Cleaned up staging directory"
+ fi
+}
+
+is_excluded() {
+ local path="$1"
+ for exc in "${EXCLUDE_PATHS[@]}"; do
+ if [[ "$path" == "$exc" || "$path" == "$exc"/* ]]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# COLLECT ITEMS
+# ══════════════════════════════════════════════════════════════════════
+
+collect_etc() {
+ section_header "/etc Configuration"
+
+ if [[ ! -d /etc ]]; then
+ warn "/etc not found"
+ return
+ fi
+
+ if is_excluded "/etc"; then
+ log "Skipping /etc (excluded)"
+ return
+ fi
+
+ local etc_size
+ etc_size=$(du -sb /etc 2>/dev/null | awk '{print $1}' || echo "0")
+ field "Size:" "$(human_bytes "$etc_size")"
+
+ local etc_files
+ etc_files=$(find /etc -type f 2>/dev/null | wc -l)
+ field "Files:" "$etc_files"
+
+ if [[ "$DRY_RUN" == "false" ]]; then
+ cp -a /etc "$STAGING_DIR/etc" 2>/dev/null || warn "Some /etc files could not be copied"
+ log "Collected /etc"
+ else
+ log "[DRY-RUN] Would collect /etc"
+ fi
+}
+
+collect_crontabs() {
+ section_header "User Crontabs"
+
+ local crontab_dir="/var/spool/cron/crontabs"
+ local count=0
+
+ if [[ -d "$crontab_dir" ]]; then
+ count=$(find "$crontab_dir" -type f 2>/dev/null | wc -l)
+ field "User crontabs:" "$count"
+
+ if [[ "$VERBOSE" == "true" && "$count" -gt 0 ]]; then
+ find "$crontab_dir" -type f 2>/dev/null | while IFS= read -r f; do
+ printf " %s\n" "$(basename "$f")"
+ done
+ fi
+
+ if [[ "$DRY_RUN" == "false" && "$count" -gt 0 ]]; then
+ mkdir -p "$STAGING_DIR/crontabs"
+ cp -a "$crontab_dir"/* "$STAGING_DIR/crontabs/" 2>/dev/null || warn "Some crontabs could not be copied"
+ log "Collected user crontabs"
+ fi
+ else
+ field "User crontabs:" "0 (${crontab_dir} not found)"
+ fi
+
+ # Root crontab via crontab -l
+ if crontab -l &>/dev/null; then
+ if [[ "$DRY_RUN" == "false" ]]; then
+ mkdir -p "$STAGING_DIR/crontabs"
+ crontab -l > "$STAGING_DIR/crontabs/root-crontab-l.txt" 2>/dev/null || true
+ fi
+ field "Root crontab:" "present"
+ else
+ field "Root crontab:" "none"
+ fi
+
+ if [[ "$DRY_RUN" == "true" && "$count" -gt 0 ]]; then
+ log "[DRY-RUN] Would collect crontabs"
+ fi
+}
+
+collect_package_list() {
+ section_header "Package List"
+
+ if command -v dpkg &>/dev/null; then
+ local dpkg_count
+ dpkg_count=$(dpkg -l 2>/dev/null | grep -c "^ii" || true)
+ field "dpkg packages:" "$dpkg_count"
+
+ if [[ "$DRY_RUN" == "false" ]]; then
+ mkdir -p "$STAGING_DIR/packages"
+ dpkg --get-selections > "$STAGING_DIR/packages/dpkg-selections.txt" 2>/dev/null || true
+ dpkg -l > "$STAGING_DIR/packages/dpkg-list.txt" 2>/dev/null || true
+ log "Collected dpkg package list"
+ else
+ log "[DRY-RUN] Would collect dpkg package list"
+ fi
+ fi
+
+ if command -v rpm &>/dev/null; then
+ local rpm_count
+ rpm_count=$(rpm -qa 2>/dev/null | wc -l || echo "0")
+ field "rpm packages:" "$rpm_count"
+
+ if [[ "$DRY_RUN" == "false" ]]; then
+ mkdir -p "$STAGING_DIR/packages"
+ rpm -qa --qf '%{NAME}-%{VERSION}-%{RELEASE}.%{ARCH}\n' > "$STAGING_DIR/packages/rpm-list.txt" 2>/dev/null || true
+ log "Collected rpm package list"
+ else
+ log "[DRY-RUN] Would collect rpm package list"
+ fi
+ fi
+
+ if ! command -v dpkg &>/dev/null && ! command -v rpm &>/dev/null; then
+ log "No package manager detected (dpkg/rpm)"
+ fi
+}
+
+collect_systemd_units() {
+ section_header "Systemd Units"
+
+ if ! command -v systemctl &>/dev/null; then
+ log "systemd not available"
+ return
+ fi
+
+ local enabled_count
+ enabled_count=$(systemctl list-unit-files --state=enabled --no-legend 2>/dev/null | wc -l)
+ field "Enabled units:" "$enabled_count"
+
+ local custom_count=0
+ for unit_dir in /etc/systemd/system /etc/systemd/user; do
+ if [[ -d "$unit_dir" ]]; then
+ local dir_count
+ dir_count=$(find "$unit_dir" -maxdepth 1 -name "*.service" -o -name "*.timer" 2>/dev/null | wc -l)
+ custom_count=$((custom_count + dir_count))
+ fi
+ done
+ field "Custom unit files:" "$custom_count"
+
+ if [[ "$DRY_RUN" == "false" ]]; then
+ mkdir -p "$STAGING_DIR/systemd"
+ systemctl list-unit-files --no-legend > "$STAGING_DIR/systemd/unit-files.txt" 2>/dev/null || true
+
+ for unit_dir in /etc/systemd/system /etc/systemd/user; do
+ if [[ -d "$unit_dir" ]]; then
+ cp -a "$unit_dir" "$STAGING_DIR/systemd/" 2>/dev/null || true
+ fi
+ done
+ log "Collected systemd units"
+ else
+ log "[DRY-RUN] Would collect systemd units"
+ fi
+}
+
+collect_firewall_rules() {
+ section_header "Firewall Rules"
+
+ local fw_found=false
+
+ if command -v iptables &>/dev/null; then
+ fw_found=true
+ local ipt_rules
+ ipt_rules=$(iptables -S 2>/dev/null | wc -l || echo "0")
+ field "iptables rules:" "$ipt_rules"
+
+ if [[ "$DRY_RUN" == "false" ]]; then
+ mkdir -p "$STAGING_DIR/firewall"
+ iptables-save > "$STAGING_DIR/firewall/iptables.rules" 2>/dev/null || warn "Could not save iptables rules"
+ log "Collected iptables rules"
+ fi
+ fi
+
+ if command -v nft &>/dev/null; then
+ fw_found=true
+ local nft_tables
+ nft_tables=$(nft list tables 2>/dev/null | wc -l || echo "0")
+ field "nftables tables:" "$nft_tables"
+
+ if [[ "$DRY_RUN" == "false" ]]; then
+ mkdir -p "$STAGING_DIR/firewall"
+ nft list ruleset > "$STAGING_DIR/firewall/nftables.rules" 2>/dev/null || warn "Could not save nftables rules"
+ log "Collected nftables rules"
+ fi
+ fi
+
+ if [[ "$fw_found" == "false" ]]; then
+ log "No firewall tools detected (iptables, nftables)"
+ elif [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY-RUN] Would collect firewall rules"
+ fi
+}
+
+collect_custom_includes() {
+ if [[ ${#INCLUDE_PATHS[@]} -eq 0 ]]; then
+ return
+ fi
+
+ section_header "Custom Includes"
+
+ for inc_path in "${INCLUDE_PATHS[@]}"; do
+ if [[ ! -e "$inc_path" ]]; then
+ warn "Include path not found: $inc_path"
+ continue
+ fi
+
+ local inc_size
+ inc_size=$(du -sb "$inc_path" 2>/dev/null | awk '{print $1}' || echo "0")
+ field "$inc_path:" "$(human_bytes "$inc_size")"
+
+ if [[ "$DRY_RUN" == "false" ]]; then
+ local dest_dir="$STAGING_DIR/custom${inc_path}"
+ mkdir -p "$(dirname "$dest_dir")"
+ cp -a "$inc_path" "$dest_dir" 2>/dev/null || warn "Could not copy $inc_path"
+ fi
+ done
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY-RUN] Would collect custom paths"
+ else
+ log "Collected custom paths"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# CREATE TARBALL
+# ══════════════════════════════════════════════════════════════════════
+
+create_tarball() {
+ local timestamp hostname_val tarball_name tarball_path
+
+ timestamp=$(date '+%Y%m%d-%H%M%S')
+ hostname_val=$(hostname -s 2>/dev/null || hostname)
+ tarball_name="config-backup-${hostname_val}-${timestamp}.tar.gz"
+ tarball_path="${BACKUP_DIR}/${tarball_name}"
+
+ section_header "Creating Backup"
+
+ field "Output directory:" "$BACKUP_DIR"
+ field "Tarball:" "$tarball_name"
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ # Estimate total size
+ local est_size=0
+
+ if [[ -d /etc ]] && ! is_excluded "/etc"; then
+ est_size=$((est_size + $(du -sb /etc 2>/dev/null | awk '{print $1}' || echo 0)))
+ fi
+
+ for inc_path in "${INCLUDE_PATHS[@]}"; do
+ if [[ -e "$inc_path" ]]; then
+ est_size=$((est_size + $(du -sb "$inc_path" 2>/dev/null | awk '{print $1}' || echo 0)))
+ fi
+ done
+
+ field_color "Estimated size:" "${YELLOW}~$(human_bytes "$est_size") (uncompressed)${RESET}"
+ echo ""
+ echo -e " ${YELLOW}Dry-run mode — no backup created${RESET}"
+ echo -e " Run with --force to create the backup"
+ return
+ fi
+
+ # Create output directory
+ mkdir -p "$BACKUP_DIR" || { err "Cannot create ${BACKUP_DIR}"; exit 1; }
+
+ # Create tarball from staging
+ local staging_size
+ staging_size=$(du -sb "$STAGING_DIR" 2>/dev/null | awk '{print $1}' || echo "0")
+ field "Staging size:" "$(human_bytes "$staging_size")"
+
+ tar -czf "$tarball_path" -C "$STAGING_DIR" . 2>/dev/null || { err "Failed to create tarball"; exit 1; }
+
+ # Validate tarball
+ log "Validating tarball..."
+ local file_count
+ file_count=$(tar -tzf "$tarball_path" 2>/dev/null | wc -l)
+
+ if [[ "$file_count" -eq 0 ]]; then
+ err "Tarball validation failed — archive appears empty"
+ exit 1
+ fi
+
+ local tarball_size
+ tarball_size=$(stat -c%s "$tarball_path" 2>/dev/null || echo "0")
+
+ field_color "Status:" "${GREEN}Success${RESET}"
+ field "Archive size:" "$(human_bytes "$tarball_size")"
+ field "Files archived:" "$file_count"
+ field "Location:" "$tarball_path"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2
+ exit 1 ;;
+ esac
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+
+ echo ""
+ echo -e "${BOLD}Config Backup — $(hostname -f 2>/dev/null || hostname)${RESET}"
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo -e "Safety: ${YELLOW}dry-run (use --force to create backup)${RESET}"
+ else
+ echo -e "Safety: ${RED}LIVE — backup will be created${RESET}"
+ fi
+
+ echo -e "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+
+ # Create staging directory for live runs
+ if [[ "$DRY_RUN" == "false" ]]; then
+ STAGING_DIR=$(mktemp -d "/tmp/config-backup-XXXXXX")
+ trap cleanup_staging EXIT
+ verbose "Staging directory: $STAGING_DIR"
+ fi
+
+ collect_etc
+ collect_crontabs
+ collect_package_list
+ collect_systemd_units
+ collect_firewall_rules
+ collect_custom_includes
+ create_tarball
+
+ echo ""
+}
+
+main "$@"
diff --git a/configure-miab-metrics.sh b/configure-miab-metrics.sh
new file mode 100755
index 0000000..b77d2db
--- /dev/null
+++ b/configure-miab-metrics.sh
@@ -0,0 +1,633 @@
+#!/bin/bash
+#
+# configure-miab-metrics.sh - Enable extended metrics logging on Mail-in-a-Box
+#
+# Enables SpamAssassin rules logging and/or TLS cipher logging for
+# postfix-metrics.sh to collect detailed metrics.
+#
+
+set -euo pipefail
+
+SCRIPT_NAME=$(basename "$0")
+VERSION="1.0.0"
+
+# Defaults
+DRY_RUN=false
+VERBOSE=false
+ENABLE_SPAMASSASSIN=false
+ENABLE_TLS=false
+BACKUP=true
+FORCE=false
+STATUS_ONLY=false
+
+# Colors (disabled if not a terminal)
+if [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[1;33m'
+ BLUE='\033[0;34m'
+ NC='\033[0m'
+else
+ RED='' GREEN='' YELLOW='' BLUE='' NC=''
+fi
+
+usage() {
+ cat <&2
+}
+
+log_dry() {
+ echo -e "${YELLOW}[DRY-RUN]${NC} $*"
+}
+
+log_verbose() {
+ if $VERBOSE; then
+ echo -e "${BLUE}[VERBOSE]${NC} $*"
+ fi
+}
+
+run_cmd() {
+ local desc="$1"
+ shift
+ if $DRY_RUN; then
+ log_dry "$desc: $*"
+ return 0
+ fi
+ log_verbose "Running: $*"
+ if ! "$@"; then
+ log_error "Failed: $desc"
+ return 1
+ fi
+ return 0
+}
+
+write_file() {
+ local file="$1"
+ local content="$2"
+ local desc="${3:-$file}"
+
+ if $DRY_RUN; then
+ log_dry "Would create $desc:"
+ echo "$content" | sed 's/^/ /'
+ return 0
+ fi
+
+ if [[ -f "$file" ]] && $BACKUP; then
+ local backup="${file}.bak.$(date +%Y%m%d%H%M%S)"
+ log_verbose "Backing up $file to $backup"
+ cp "$file" "$backup"
+ fi
+
+ echo "$content" > "$file"
+ log_ok "Created $desc"
+}
+
+check_root() {
+ if [[ $EUID -ne 0 ]]; then
+ log_error "This script must be run as root (use sudo)"
+ exit 1
+ fi
+}
+
+check_miab() {
+ if [[ ! -d /home/user-data ]] && [[ ! -f /etc/mailinabox.conf ]]; then
+ log_warn "This doesn't appear to be a Mail-in-a-Box installation"
+ if ! $FORCE; then
+ read -rp "Continue anyway? [y/N] " response
+ if [[ ! "$response" =~ ^[Yy] ]]; then
+ exit 1
+ fi
+ fi
+ fi
+}
+
+confirm_action() {
+ local msg="$1"
+ if $FORCE || $DRY_RUN; then
+ return 0
+ fi
+ read -rp "$msg [y/N] " response
+ [[ "$response" =~ ^[Yy] ]]
+}
+
+# ============================================================================
+# SpamAssassin Configuration
+# ============================================================================
+
+SPAMPD_DEFAULTS="/etc/default/spampd"
+
+check_spampd() {
+ if ! systemctl list-unit-files spampd.service &>/dev/null; then
+ log_error "spampd service not found"
+ return 1
+ fi
+ return 0
+}
+
+is_spampd_debug_enabled() {
+ # Check /etc/default/spampd for --debug in ADDOPTS
+ if [[ -f "$SPAMPD_DEFAULTS" ]]; then
+ if grep -qE '^ADDOPTS\s*=.*--debug' "$SPAMPD_DEFAULTS" 2>/dev/null; then
+ return 0
+ fi
+ fi
+ return 1
+}
+
+is_rsyslog_spamassassin_configured() {
+ [[ -f /etc/rsyslog.d/50-spamassassin.conf ]]
+}
+
+is_logrotate_configured() {
+ [[ -f /etc/logrotate.d/spamassassin ]]
+}
+
+configure_spamassassin() {
+ log_info "Configuring SpamAssassin rules logging..."
+
+ if ! check_spampd; then
+ return 1
+ fi
+
+ local changes_made=false
+
+ # 1. Enable debug mode via /etc/default/spampd
+ if is_spampd_debug_enabled; then
+ log_ok "spampd debug mode already enabled"
+ else
+ log_info "Enabling spampd debug mode..."
+
+ if [[ ! -f "$SPAMPD_DEFAULTS" ]]; then
+ log_error "$SPAMPD_DEFAULTS not found"
+ return 1
+ fi
+
+ # Get current ADDOPTS value
+ local current_addopts=""
+ if grep -qE '^ADDOPTS\s*=' "$SPAMPD_DEFAULTS" 2>/dev/null; then
+ current_addopts=$(grep -E '^ADDOPTS\s*=' "$SPAMPD_DEFAULTS" | tail -1 | sed 's/^ADDOPTS\s*=\s*//' | tr -d '"'"'")
+ fi
+
+ # Build new ADDOPTS with --debug appended
+ local new_addopts
+ if [[ -n "$current_addopts" ]]; then
+ new_addopts="${current_addopts} --debug"
+ else
+ new_addopts="--debug"
+ fi
+
+ if $DRY_RUN; then
+ log_dry "Would update ADDOPTS in $SPAMPD_DEFAULTS:"
+ if [[ -n "$current_addopts" ]]; then
+ echo " Current: ADDOPTS=\"$current_addopts\""
+ fi
+ echo " New: ADDOPTS=\"$new_addopts\""
+ else
+ if $BACKUP; then
+ local backup="${SPAMPD_DEFAULTS}.bak.$(date +%Y%m%d%H%M%S)"
+ cp "$SPAMPD_DEFAULTS" "$backup"
+ log_verbose "Backed up to $backup"
+ fi
+
+ if grep -qE '^ADDOPTS\s*=' "$SPAMPD_DEFAULTS"; then
+ # Update existing ADDOPTS line
+ sed -i "s|^ADDOPTS\s*=.*|ADDOPTS=\"$new_addopts\"|" "$SPAMPD_DEFAULTS"
+ log_ok "Updated ADDOPTS to include --debug"
+ else
+ # Add new ADDOPTS line
+ cat >> "$SPAMPD_DEFAULTS" </dev/null | tail -1 | awk -F= '{print $2}' | tr -d ' ')
+ smtp_level=$(grep -E '^smtp_tls_loglevel\s*=' "$POSTFIX_MAIN_CF" 2>/dev/null | tail -1 | awk -F= '{print $2}' | tr -d ' ')
+
+ [[ "$smtpd_level" -ge 1 ]] 2>/dev/null && [[ "$smtp_level" -ge 1 ]] 2>/dev/null
+}
+
+configure_tls() {
+ log_info "Configuring TLS cipher logging..."
+
+ if [[ ! -f "$POSTFIX_MAIN_CF" ]]; then
+ log_error "Postfix main.cf not found at $POSTFIX_MAIN_CF"
+ return 1
+ fi
+
+ if is_tls_logging_enabled; then
+ log_ok "TLS logging already enabled in Postfix"
+ return 0
+ fi
+
+ log_info "Adding TLS log level settings to $POSTFIX_MAIN_CF..."
+
+ if $DRY_RUN; then
+ log_dry "Would add to $POSTFIX_MAIN_CF:"
+ echo " smtpd_tls_loglevel = 1"
+ echo " smtp_tls_loglevel = 1"
+ else
+ if $BACKUP; then
+ local backup="${POSTFIX_MAIN_CF}.bak.$(date +%Y%m%d%H%M%S)"
+ cp "$POSTFIX_MAIN_CF" "$backup"
+ log_verbose "Backed up to $backup"
+ fi
+
+ # Remove any existing settings first (to avoid duplicates)
+ sed -i '/^smtpd_tls_loglevel\s*=/d' "$POSTFIX_MAIN_CF"
+ sed -i '/^smtp_tls_loglevel\s*=/d' "$POSTFIX_MAIN_CF"
+
+ # Add new settings
+ cat >> "$POSTFIX_MAIN_CF" </dev/null | cut -f1)
+ echo -e "${GREEN}EXISTS${NC} ($size)"
+ else
+ echo -e "${YELLOW}MISSING${NC}"
+ fi
+
+ echo -n "SpamAssassin logrotate: "
+ if is_logrotate_configured; then
+ echo -e "${GREEN}CONFIGURED${NC}"
+ else
+ echo -e "${YELLOW}NOT CONFIGURED${NC}"
+ fi
+
+ echo -n "Postfix TLS logging: "
+ if is_tls_logging_enabled; then
+ echo -e "${GREEN}ENABLED${NC}"
+ else
+ echo -e "${YELLOW}DISABLED${NC}"
+ fi
+
+ # Show service status
+ echo ""
+ echo -n "spampd service: "
+ if systemctl is-active --quiet spampd 2>/dev/null; then
+ echo -e "${GREEN}RUNNING${NC}"
+ else
+ echo -e "${YELLOW}STOPPED${NC}"
+ fi
+
+ echo -n "postfix service: "
+ if systemctl is-active --quiet postfix 2>/dev/null; then
+ echo -e "${GREEN}RUNNING${NC}"
+ else
+ echo -e "${YELLOW}STOPPED${NC}"
+ fi
+
+ echo ""
+}
+
+# ============================================================================
+# Main
+# ============================================================================
+
+main() {
+ # Parse arguments
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ -s|--spamassassin)
+ ENABLE_SPAMASSASSIN=true
+ shift
+ ;;
+ -t|--tls)
+ ENABLE_TLS=true
+ shift
+ ;;
+ -a|--all)
+ ENABLE_SPAMASSASSIN=true
+ ENABLE_TLS=true
+ shift
+ ;;
+ -n|--dry-run)
+ DRY_RUN=true
+ shift
+ ;;
+ -f|--force)
+ FORCE=true
+ shift
+ ;;
+ -v|--verbose)
+ VERBOSE=true
+ shift
+ ;;
+ --no-backup)
+ BACKUP=false
+ shift
+ ;;
+ --status)
+ STATUS_ONLY=true
+ shift
+ ;;
+ -h|--help)
+ usage
+ ;;
+ --version)
+ version
+ ;;
+ *)
+ log_error "Unknown option: $1"
+ echo "Use --help for usage information"
+ exit 1
+ ;;
+ esac
+ done
+
+ # Default to both if neither specified
+ if ! $ENABLE_SPAMASSASSIN && ! $ENABLE_TLS; then
+ ENABLE_SPAMASSASSIN=true
+ ENABLE_TLS=true
+ fi
+
+ # Check root (skip for dry-run or status-only)
+ if ! $DRY_RUN && ! $STATUS_ONLY; then
+ check_root
+ fi
+
+ check_miab
+
+ # Show current status
+ show_status
+
+ # Exit if status-only mode
+ if $STATUS_ONLY; then
+ exit 0
+ fi
+
+ # Build action summary
+ local actions=""
+ if $ENABLE_SPAMASSASSIN; then
+ actions+="SpamAssassin rules logging"
+ fi
+ if $ENABLE_TLS; then
+ [[ -n "$actions" ]] && actions+=", "
+ actions+="TLS cipher logging"
+ fi
+
+ if $DRY_RUN; then
+ log_info "DRY RUN - showing changes that would be made for: $actions"
+ echo ""
+ else
+ if ! confirm_action "Enable $actions?"; then
+ log_info "Aborted"
+ exit 0
+ fi
+ echo ""
+ fi
+
+ local exit_code=0
+
+ if $ENABLE_SPAMASSASSIN; then
+ if ! configure_spamassassin; then
+ exit_code=1
+ fi
+ echo ""
+ fi
+
+ if $ENABLE_TLS; then
+ if ! configure_tls; then
+ exit_code=1
+ fi
+ echo ""
+ fi
+
+ if [[ $exit_code -eq 0 ]]; then
+ if $DRY_RUN; then
+ log_info "Dry run complete - no changes made"
+ else
+ log_ok "Configuration complete!"
+ echo ""
+ echo "Metrics should now be available after some mail traffic."
+ echo "Run your postfix-metrics.sh script to verify."
+ fi
+ else
+ log_error "Some configurations failed"
+ fi
+
+ exit $exit_code
+}
+
+main "$@"
diff --git a/configure-openshift-metrics.sh b/configure-openshift-metrics.sh
new file mode 100644
index 0000000..888a9fb
--- /dev/null
+++ b/configure-openshift-metrics.sh
@@ -0,0 +1,687 @@
+#!/bin/bash
+###############################################################################
+# configure-openshift-metrics.sh
+#
+# Configure an external Prometheus server to receive metrics from OpenShift.
+# Supports federation (pull) and remote write (push) modes.
+#
+# Usage:
+# sudo ./configure-openshift-metrics.sh --method federation \
+# --openshift-url ROUTE --cluster-name NAME
+#
+# sudo ./configure-openshift-metrics.sh --method remote-write \
+# --prometheus-url URL --cluster-name NAME
+#
+# Requirements:
+# - Root or sudo access on the Prometheus server
+# - oc CLI logged in with cluster-admin (unless --skip-openshift)
+# - Prometheus installed via binary (not containerized)
+#
+# https://mylinux.work/guides/openshift-metrics-to-external-prometheus/
+###############################################################################
+
+set -euo pipefail
+
+VERSION="1.0"
+
+#------------------------------------------------------------------------------
+# Defaults
+#------------------------------------------------------------------------------
+METHOD="federation"
+OPENSHIFT_URL=""
+PROMETHEUS_URL=""
+CLUSTER_NAME="openshift"
+PROMETHEUS_CONFIG="/etc/prometheus/prometheus.yml"
+PROMETHEUS_SERVICE="prometheus"
+RULES_DIR="/etc/prometheus/rules"
+TOKEN_FILE="/etc/prometheus/openshift-token"
+PROMETHEUS_USER="prometheus"
+SKIP_OPENSHIFT=false
+SKIP_RULES=false
+DRY_RUN=false
+OC_NAMESPACE="openshift-monitoring"
+SA_NAME="prometheus-external"
+TOKEN_DURATION="8760h"
+
+#------------------------------------------------------------------------------
+# Colors and logging
+#------------------------------------------------------------------------------
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+log() { echo -e "${GREEN}[openshift-metrics]${NC} $1"; }
+warn() { echo -e "${YELLOW}[openshift-metrics]${NC} $1"; }
+error() { echo -e "${RED}[openshift-metrics]${NC} $1" >&2; }
+info() { echo -e "${BLUE}[openshift-metrics]${NC} $1"; }
+
+#------------------------------------------------------------------------------
+# Usage
+#------------------------------------------------------------------------------
+usage() {
+ cat </dev/null; then
+ warn "promtool not found — config validation will be skipped"
+ fi
+
+ if [[ "$SKIP_OPENSHIFT" == false ]] && ! command -v oc &>/dev/null; then
+ error "oc CLI not found. Install it or use --skip-openshift with an existing token"
+ exit 1
+ fi
+}
+
+#------------------------------------------------------------------------------
+# Backup existing config
+#------------------------------------------------------------------------------
+backup_config() {
+ local backup_dir
+ backup_dir="$(dirname "$PROMETHEUS_CONFIG")/backups"
+ mkdir -p "$backup_dir"
+
+ local timestamp
+ timestamp=$(date +%F_%H%M%S)
+ local backup_file="${backup_dir}/prometheus.yml.${timestamp}"
+
+ if $DRY_RUN; then
+ info "[dry-run] Would backup $PROMETHEUS_CONFIG to $backup_file"
+ else
+ cp "$PROMETHEUS_CONFIG" "$backup_file"
+ log "Backed up config to $backup_file"
+ fi
+}
+
+#------------------------------------------------------------------------------
+# OpenShift: Create service account and token
+#------------------------------------------------------------------------------
+setup_openshift_sa() {
+ if $SKIP_OPENSHIFT; then
+ if [[ -f "$TOKEN_FILE" ]]; then
+ log "Using existing token from $TOKEN_FILE"
+ else
+ error "No token found at $TOKEN_FILE. Provide a token or remove --skip-openshift."
+ exit 1
+ fi
+ return
+ fi
+
+ log "Setting up OpenShift service account..."
+
+ # Check oc is logged in
+ if ! oc whoami &>/dev/null; then
+ error "Not logged into OpenShift. Run: oc login "
+ exit 1
+ fi
+
+ local cluster_info
+ cluster_info=$(oc whoami --show-server 2>/dev/null || echo "unknown")
+ log "Connected to: $cluster_info"
+
+ if $DRY_RUN; then
+ info "[dry-run] Would create service account $SA_NAME in $OC_NAMESPACE"
+ info "[dry-run] Would grant cluster-monitoring-view role"
+ info "[dry-run] Would generate token with duration $TOKEN_DURATION"
+ return
+ fi
+
+ # Create service account (ignore if exists)
+ if oc get serviceaccount "$SA_NAME" -n "$OC_NAMESPACE" &>/dev/null; then
+ warn "Service account $SA_NAME already exists in $OC_NAMESPACE"
+ else
+ oc create serviceaccount "$SA_NAME" -n "$OC_NAMESPACE"
+ log "Created service account: $SA_NAME"
+ fi
+
+ # Grant cluster-monitoring-view role
+ if oc get clusterrolebinding "${SA_NAME}-monitoring-view" &>/dev/null 2>&1; then
+ warn "Role binding already exists"
+ else
+ oc adm policy add-cluster-role-to-user cluster-monitoring-view \
+ -z "$SA_NAME" -n "$OC_NAMESPACE"
+ log "Granted cluster-monitoring-view role"
+ fi
+
+ # Generate token
+ local token
+ token=$(oc create token "$SA_NAME" -n "$OC_NAMESPACE" --duration="$TOKEN_DURATION")
+
+ echo "$token" > "$TOKEN_FILE"
+ chmod 600 "$TOKEN_FILE"
+ chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$TOKEN_FILE"
+ log "Token saved to $TOKEN_FILE (expires in $TOKEN_DURATION)"
+}
+
+#------------------------------------------------------------------------------
+# Generate federation scrape config
+#------------------------------------------------------------------------------
+generate_federation_config() {
+ cat < 0.9
+ for: 10m
+ labels:
+ severity: warning
+ annotations:
+ summary: "High CPU on OpenShift node {{ \$labels.instance }}"
+ description: "CPU usage above 90% for 10 minutes (current: {{ \$value | humanizePercentage }})."
+
+ - alert: OpenShiftNodeHighMemory
+ expr: openshift:node_memory_utilization:ratio > 0.9
+ for: 10m
+ labels:
+ severity: warning
+ annotations:
+ summary: "High memory on OpenShift node {{ \$labels.instance }}"
+ description: "Memory usage above 90% for 10 minutes (current: {{ \$value | humanizePercentage }})."
+
+ - alert: OpenShiftPodCrashLooping
+ expr: rate(kube_pod_container_status_restarts_total{cluster="${CLUSTER_NAME}"}[15m]) * 60 * 5 > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Pod {{ \$labels.namespace }}/{{ \$labels.pod }} is crash looping"
+ description: "Pod has restarted {{ \$value | humanize }} times in the last 15 minutes."
+
+ - alert: OpenShiftDeploymentReplicasMismatch
+ expr: |
+ kube_deployment_spec_replicas{cluster="${CLUSTER_NAME}"}
+ != kube_deployment_status_ready_replicas{cluster="${CLUSTER_NAME}"}
+ for: 10m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Deployment {{ \$labels.namespace }}/{{ \$labels.deployment }} replica mismatch"
+ description: "Deployment does not have expected number of ready replicas."
+
+ - alert: OpenShiftEtcdLeaderChanges
+ expr: increase(etcd_server_leader_changes_seen_total{cluster="${CLUSTER_NAME}"}[1h]) > 3
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Frequent etcd leader changes on {{ \$labels.cluster }}"
+ description: "etcd leader changed {{ \$value | humanize }} times in the last hour."
+YAML
+}
+
+#------------------------------------------------------------------------------
+# Apply federation configuration
+#------------------------------------------------------------------------------
+apply_federation() {
+ log "Configuring federation from $OPENSHIFT_URL..."
+
+ # Set up OpenShift service account and token
+ setup_openshift_sa
+
+ # Backup existing config
+ backup_config
+
+ # Generate and append federation scrape config
+ local federation_config
+ federation_config=$(generate_federation_config)
+
+ if $DRY_RUN; then
+ info "[dry-run] Would append to $PROMETHEUS_CONFIG:"
+ echo "$federation_config"
+ else
+ # Check if the job already exists
+ if grep -q 'job_name: "openshift-federate"' "$PROMETHEUS_CONFIG" 2>/dev/null; then
+ warn "Federation job 'openshift-federate' already exists in $PROMETHEUS_CONFIG"
+ warn "Remove the existing job first or edit it manually."
+ return 1
+ fi
+
+ echo "$federation_config" >> "$PROMETHEUS_CONFIG"
+ chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$PROMETHEUS_CONFIG"
+ log "Federation scrape job added to $PROMETHEUS_CONFIG"
+ fi
+
+ # Generate rules
+ if [[ "$SKIP_RULES" == false ]]; then
+ generate_rules
+ fi
+
+ # Validate and reload
+ validate_and_reload
+}
+
+#------------------------------------------------------------------------------
+# Apply remote write configuration
+#------------------------------------------------------------------------------
+apply_remote_write() {
+ log "Configuring remote write to $PROMETHEUS_URL..."
+
+ # Backup existing config
+ backup_config
+
+ # Enable remote write receiver
+ local service_file="/etc/systemd/system/${PROMETHEUS_SERVICE}.service"
+ if [[ -f "$service_file" ]]; then
+ if grep -q "web.enable-remote-write-receiver" "$service_file"; then
+ log "Remote write receiver already enabled"
+ else
+ if $DRY_RUN; then
+ info "[dry-run] Would add --web.enable-remote-write-receiver to $service_file"
+ else
+ warn "You need to add --web.enable-remote-write-receiver to your Prometheus service."
+ warn "Edit $service_file and add the flag to ExecStart, then run:"
+ warn " sudo systemctl daemon-reload && sudo systemctl restart prometheus"
+ echo ""
+ fi
+ fi
+ fi
+
+ # Generate basic auth credentials
+ local rw_password
+ rw_password=$(openssl rand -base64 24 2>/dev/null || head -c 24 /dev/urandom | base64)
+ local rw_user="openshift"
+
+ log "Generated remote write credentials:"
+ log " Username: $rw_user"
+ log " Password: $rw_password"
+ echo ""
+
+ # Generate web.yml with basic auth
+ local web_config_file
+ web_config_file="$(dirname "$PROMETHEUS_CONFIG")/web.yml"
+
+ if command -v htpasswd &>/dev/null; then
+ local hash
+ hash=$(htpasswd -nbBC 12 "" "$rw_password" | tr -d ':\n')
+
+ if $DRY_RUN; then
+ info "[dry-run] Would create $web_config_file with basic_auth_users"
+ else
+ if [[ -f "$web_config_file" ]]; then
+ warn "$web_config_file already exists — add this entry manually:"
+ echo " $rw_user: \"$hash\""
+ else
+ cat > "$web_config_file" < "$RULES_DIR/openshift-rules.yml"
+ chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$RULES_DIR/openshift-rules.yml"
+ log "Created $RULES_DIR/openshift-rules.yml"
+
+ generate_alert_rules > "$RULES_DIR/openshift-alerts.yml"
+ chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$RULES_DIR/openshift-alerts.yml"
+ log "Created $RULES_DIR/openshift-alerts.yml"
+}
+
+#------------------------------------------------------------------------------
+# Validate config and reload Prometheus
+#------------------------------------------------------------------------------
+validate_and_reload() {
+ if $DRY_RUN; then
+ info "[dry-run] Would validate config and reload Prometheus"
+ return
+ fi
+
+ # Validate with promtool
+ if command -v promtool &>/dev/null; then
+ log "Validating Prometheus configuration..."
+
+ if ! promtool check config "$PROMETHEUS_CONFIG"; then
+ error "Config validation failed. Restoring backup..."
+ local backup_dir
+ backup_dir="$(dirname "$PROMETHEUS_CONFIG")/backups"
+ local latest_backup
+ latest_backup=$(ls -t "$backup_dir"/prometheus.yml.* 2>/dev/null | head -1)
+ if [[ -n "$latest_backup" ]]; then
+ cp "$latest_backup" "$PROMETHEUS_CONFIG"
+ log "Restored from $latest_backup"
+ fi
+ exit 1
+ fi
+ log "Config validation passed"
+
+ # Validate rules
+ if [[ "$SKIP_RULES" == false ]]; then
+ for rule_file in "$RULES_DIR"/openshift-*.yml; do
+ if [[ -f "$rule_file" ]]; then
+ if ! promtool check rules "$rule_file"; then
+ error "Rule validation failed: $rule_file"
+ exit 1
+ fi
+ fi
+ done
+ log "Rule validation passed"
+ fi
+ fi
+
+ # Reload Prometheus
+ if systemctl is-active --quiet "$PROMETHEUS_SERVICE"; then
+ systemctl reload "$PROMETHEUS_SERVICE" 2>/dev/null || \
+ systemctl restart "$PROMETHEUS_SERVICE"
+ log "Prometheus reloaded"
+ else
+ warn "Prometheus service is not running. Start it with: sudo systemctl start $PROMETHEUS_SERVICE"
+ fi
+}
+
+#------------------------------------------------------------------------------
+# Print summary
+#------------------------------------------------------------------------------
+print_summary() {
+ echo ""
+ echo "============================================"
+ echo " OpenShift Metrics Configuration Complete"
+ echo "============================================"
+ echo ""
+ echo " Method: $METHOD"
+ echo " Cluster name: $CLUSTER_NAME"
+
+ if [[ "$METHOD" == "federation" ]]; then
+ echo " OpenShift URL: $OPENSHIFT_URL"
+ echo " Token file: $TOKEN_FILE"
+ else
+ echo " Prometheus URL: $PROMETHEUS_URL"
+ fi
+
+ echo " Config file: $PROMETHEUS_CONFIG"
+
+ if [[ "$SKIP_RULES" == false ]]; then
+ echo " Rules dir: $RULES_DIR"
+ fi
+
+ echo ""
+ echo " Verify:"
+ echo " - Check targets: http://localhost:9090/targets"
+
+ if [[ "$METHOD" == "federation" ]]; then
+ echo " - Test query: node_memory_MemAvailable_bytes{cluster=\"${CLUSTER_NAME}\"}"
+ else
+ echo " - Test query: up{cluster=\"${CLUSTER_NAME}\"}"
+ fi
+
+ echo ""
+}
+
+#------------------------------------------------------------------------------
+# Main
+#------------------------------------------------------------------------------
+main() {
+ echo ""
+ log "configure-openshift-metrics.sh v${VERSION}"
+ echo ""
+
+ validate
+
+ if $DRY_RUN; then
+ warn "DRY RUN — no changes will be made"
+ echo ""
+ fi
+
+ case "$METHOD" in
+ federation) apply_federation ;;
+ remote-write) apply_remote_write ;;
+ esac
+
+ if ! $DRY_RUN; then
+ print_summary
+ fi
+
+ log "Done."
+}
+
+main
diff --git a/consul-exporter.sh b/consul-exporter.sh
new file mode 100644
index 0000000..4954db8
--- /dev/null
+++ b/consul-exporter.sh
@@ -0,0 +1,358 @@
+#!/usr/bin/env bash
+#
+# Consul Prometheus Metrics Exporter
+#
+# Prometheus textfile collector exporter for Consul.
+# Uses the Consul HTTP API to collect cluster health, Raft consensus,
+# service catalog, health check states, KV store entry counts,
+# and node membership.
+#
+# Usage:
+# ./consul-exporter.sh
+# ./consul-exporter.sh --textfile
+# CONSUL_TOKEN="xxx" ./consul-exporter.sh --textfile
+# ./consul-exporter.sh --install
+#
+# Parameters:
+# --textfile Write to textfile collector directory
+# --install Create cron job for automatic collection
+# --help Show usage
+#
+# Environment:
+# CONSUL_URL Consul HTTP API base URL (default: http://127.0.0.1:8500)
+# CONSUL_TOKEN ACL token (optional, required if ACLs are enabled)
+# TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter/textfile_collector)
+# CURL_TIMEOUT API request timeout in seconds (default: 10)
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+# Version: 1.0
+#
+# Metrics Exported:
+# Core:
+# - consul_up
+# - consul_exporter_info{version}
+# - consul_peers_total
+# - consul_leader
+#
+# Catalog:
+# - consul_services_total
+# - consul_nodes_total
+#
+# Health:
+# - consul_health_checks_passing
+# - consul_health_checks_warning
+# - consul_health_checks_critical
+#
+# KV:
+# - consul_kv_entries_total
+#
+# Raft:
+# - consul_raft_commit_time_seconds
+# - consul_raft_last_contact_seconds
+#
+# Exporter:
+# - consul_exporter_duration_seconds
+# - consul_exporter_last_run_timestamp
+
+set -euo pipefail
+
+# --- Configuration ---
+readonly VERSION="1.0"
+readonly SCRIPT_NAME="$(basename "$0")"
+CONSUL_URL="${CONSUL_URL:-http://127.0.0.1:8500}"
+CONSUL_TOKEN="${CONSUL_TOKEN:-}"
+TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}"
+CURL_TIMEOUT="${CURL_TIMEOUT:-10}"
+TEXTFILE_MODE=false
+OUTPUT=""
+START_TIME=""
+
+# --- Functions ---
+
+usage() {
+ cat </dev/null; then
+ missing+=("$cmd")
+ fi
+ done
+ if [[ ${#missing[@]} -gt 0 ]]; then
+ echo "ERROR: Missing required commands: ${missing[*]}" >&2
+ echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2
+ exit 1
+ fi
+}
+
+validate_config() {
+ # Strip trailing slash
+ CONSUL_URL="${CONSUL_URL%/}"
+}
+
+api_get() {
+ local endpoint="$1"
+ local curl_args=(-sf --max-time "$CURL_TIMEOUT")
+
+ if [[ -n "$CONSUL_TOKEN" ]]; then
+ curl_args+=(-H "X-Consul-Token: ${CONSUL_TOKEN}")
+ fi
+
+ curl "${curl_args[@]}" "${CONSUL_URL}${endpoint}" 2>/dev/null || echo ""
+}
+
+add_metric() {
+ local name="$1"
+ local type="$2"
+ local help="$3"
+ local value="$4"
+ local labels="${5:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name} ${value}
+"
+ fi
+}
+
+add_metric_value() {
+ local name="$1"
+ local value="$2"
+ local labels="${3:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="${name} ${value}
+"
+ fi
+}
+
+collect_health() {
+ local members_json
+ members_json=$(api_get "/v1/agent/members")
+
+ if [[ -z "$members_json" ]]; then
+ add_metric "consul_up" "gauge" "Consul reachability (1=up, 0=down)" "0"
+ return 1
+ fi
+
+ add_metric "consul_up" "gauge" "Consul reachability (1=up, 0=down)" "1"
+
+ # Node count from members
+ local node_count
+ node_count=$(echo "$members_json" | jq 'length' 2>/dev/null)
+ add_metric "consul_nodes_total" "gauge" "Total number of cluster nodes" "${node_count:-0}"
+
+ return 0
+}
+
+collect_raft() {
+ local raft_json
+ raft_json=$(api_get "/v1/operator/raft/configuration")
+
+ if [[ -z "$raft_json" ]]; then
+ return
+ fi
+
+ # Peer count
+ local peer_count
+ peer_count=$(echo "$raft_json" | jq '.Servers | length' 2>/dev/null)
+ add_metric "consul_peers_total" "gauge" "Number of Raft peers in the cluster" "${peer_count:-0}"
+
+ # Leader detection — check if current node is leader
+ local self_json leader_addr self_addr
+ self_json=$(api_get "/v1/agent/self")
+
+ if [[ -n "$self_json" ]]; then
+ leader_addr=$(echo "$raft_json" | jq -r '.Servers[] | select(.Leader == true) | .Address' 2>/dev/null)
+ self_addr=$(echo "$self_json" | jq -r '.Config.RaftAddress // .Stats.raft.applied_index // empty' 2>/dev/null)
+ local self_name self_leader_name
+ self_name=$(echo "$self_json" | jq -r '.Config.NodeName // empty' 2>/dev/null)
+ self_leader_name=$(echo "$raft_json" | jq -r '.Servers[] | select(.Leader == true) | .Node' 2>/dev/null)
+
+ if [[ -n "$self_name" && "$self_name" == "$self_leader_name" ]]; then
+ add_metric "consul_leader" "gauge" "Whether this node is the cluster leader (1=leader, 0=follower)" "1"
+ else
+ add_metric "consul_leader" "gauge" "Whether this node is the cluster leader (1=leader, 0=follower)" "0"
+ fi
+
+ # Raft stats from /v1/agent/self
+ local raft_commit_time raft_last_contact
+ raft_commit_time=$(echo "$self_json" | jq -r '.Stats.raft.commit_time // empty' 2>/dev/null)
+ raft_last_contact=$(echo "$self_json" | jq -r '.Stats.raft.last_contact // empty' 2>/dev/null)
+
+ if [[ -n "$raft_commit_time" ]]; then
+ # Convert milliseconds to seconds
+ local commit_seconds
+ commit_seconds=$(echo "scale=6; ${raft_commit_time%ms} / 1000" | bc 2>/dev/null || echo "0")
+ add_metric "consul_raft_commit_time_seconds" "gauge" "Raft commit time in seconds" "$commit_seconds"
+ fi
+
+ if [[ -n "$raft_last_contact" ]]; then
+ # Convert milliseconds to seconds
+ local contact_seconds
+ contact_seconds=$(echo "scale=6; ${raft_last_contact%ms} / 1000" | bc 2>/dev/null || echo "0")
+ add_metric "consul_raft_last_contact_seconds" "gauge" "Time since last Raft leader contact in seconds" "$contact_seconds"
+ fi
+ fi
+}
+
+collect_services() {
+ local services_json
+ services_json=$(api_get "/v1/catalog/services")
+
+ if [[ -z "$services_json" ]]; then
+ return
+ fi
+
+ local service_count
+ service_count=$(echo "$services_json" | jq 'keys | length' 2>/dev/null)
+ add_metric "consul_services_total" "gauge" "Total number of registered services" "${service_count:-0}"
+}
+
+collect_health_checks() {
+ local checks_json
+ checks_json=$(api_get "/v1/health/state/any")
+
+ if [[ -z "$checks_json" ]]; then
+ return
+ fi
+
+ local passing warning critical
+ passing=$(echo "$checks_json" | jq '[.[] | select(.Status == "passing")] | length' 2>/dev/null)
+ warning=$(echo "$checks_json" | jq '[.[] | select(.Status == "warning")] | length' 2>/dev/null)
+ critical=$(echo "$checks_json" | jq '[.[] | select(.Status == "critical")] | length' 2>/dev/null)
+
+ add_metric "consul_health_checks_passing" "gauge" "Number of passing health checks" "${passing:-0}"
+ add_metric "consul_health_checks_warning" "gauge" "Number of warning health checks" "${warning:-0}"
+ add_metric "consul_health_checks_critical" "gauge" "Number of critical health checks" "${critical:-0}"
+}
+
+collect_kv() {
+ local kv_json
+ kv_json=$(api_get "/v1/kv/?keys")
+
+ if [[ -z "$kv_json" ]]; then
+ add_metric "consul_kv_entries_total" "gauge" "Total number of KV store entries" "0"
+ return
+ fi
+
+ local kv_count
+ kv_count=$(echo "$kv_json" | jq 'length' 2>/dev/null)
+ add_metric "consul_kv_entries_total" "gauge" "Total number of KV store entries" "${kv_count:-0}"
+}
+
+write_output() {
+ if [[ "$TEXTFILE_MODE" == true ]]; then
+ local output_file="${TEXTFILE_DIR}/consul.prom"
+ local temp_file="${output_file}.$$"
+
+ mkdir -p "$TEXTFILE_DIR"
+ echo "$OUTPUT" > "$temp_file"
+ mv "$temp_file" "$output_file"
+ else
+ echo "$OUTPUT"
+ fi
+}
+
+install_cron() {
+ if [[ $EUID -ne 0 ]]; then
+ echo "ERROR: --install requires root" >&2
+ exit 1
+ fi
+
+ local script_path
+ script_path=$(readlink -f "$0")
+
+ cat > /etc/cron.d/consul-exporter </dev/null
+EOF
+
+ chmod 644 /etc/cron.d/consul-exporter
+ echo "Installed cron job: /etc/cron.d/consul-exporter"
+ echo "Metrics will be written to: ${TEXTFILE_DIR}/consul.prom"
+}
+
+# --- Main ---
+
+main() {
+ # Parse arguments
+ for arg in "$@"; do
+ case "$arg" in
+ --textfile) TEXTFILE_MODE=true ;;
+ --install)
+ check_dependencies
+ validate_config
+ install_cron
+ exit 0
+ ;;
+ --help|-h) usage ;;
+ *) echo "Unknown option: $arg" >&2; usage ;;
+ esac
+ done
+
+ check_dependencies
+ validate_config
+
+ START_TIME=$(date +%s%N)
+
+ # Exporter info
+ add_metric "consul_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\""
+
+ # Collect metrics
+ if collect_health; then
+ collect_raft
+ collect_services
+ collect_health_checks
+ collect_kv
+ fi
+
+ # Exporter performance
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ add_metric "consul_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration"
+ add_metric "consul_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)"
+
+ write_output
+}
+
+main "$@"
diff --git a/contabo-backup-auditor.sh b/contabo-backup-auditor.sh
new file mode 100644
index 0000000..27b53fb
--- /dev/null
+++ b/contabo-backup-auditor.sh
@@ -0,0 +1,521 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### contabo-backup-auditor.sh — Audit snapshot ages and backup coverage for ####
+#### Contabo VPS/VDS instances via the REST API ####
+#### Requires: bash 4+, curl, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./contabo-backup-auditor.sh --audit ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+INSTANCE_ID=""
+TAG_ID=""
+OUTPUT_FORMAT="${CBA_FORMAT:-table}"
+MAX_AGE_HOURS="${CBA_MAX_AGE:-48}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── Credentials ───────────────────────────────────────────────────────
+CONTABO_CLIENT_ID="${CONTABO_CLIENT_ID:-}"
+CONTABO_CLIENT_SECRET="${CONTABO_CLIENT_SECRET:-}"
+CONTABO_API_USER="${CONTABO_API_USER:-}"
+CONTABO_API_PASS="${CONTABO_API_PASS:-}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+
+# ── API helpers ──────────────────────────────────────────────────────
+contabo_token() {
+ local resp
+ resp=$(curl -s -d "client_id=${CONTABO_CLIENT_ID}" \
+ -d "client_secret=${CONTABO_CLIENT_SECRET}" \
+ --data-urlencode "username=${CONTABO_API_USER}" \
+ --data-urlencode "password=${CONTABO_API_PASS}" \
+ -d "grant_type=password" \
+ "https://auth.contabo.com/auth/realms/contabo/protocol/openid-connect/token")
+ local token
+ token=$(echo "$resp" | jq -r '.access_token // empty' 2>/dev/null)
+ if [[ -z "$token" ]]; then
+ die "Failed to obtain access token — check credentials"
+ fi
+ echo "$token"
+}
+
+contabo_api() {
+ local method="$1" endpoint="$2"
+ shift 2
+ local attempt=0 max_attempts=3
+
+ while (( attempt < max_attempts )); do
+ local http_code
+ http_code=$(curl -s -o /tmp/cba_resp.json -w "%{http_code}" \
+ -X "$method" \
+ -H "Authorization: Bearer $(contabo_token)" \
+ -H "Content-Type: application/json" \
+ -H "x-request-id: $(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N)" \
+ "https://api.contabo.com/v1${endpoint}" "$@")
+
+ verbose "API ${method} ${endpoint} → HTTP ${http_code}"
+
+ if [[ "$http_code" == "429" ]]; then
+ ((attempt++)) || true
+ local wait=$(( attempt * 5 ))
+ warn "Rate limited — retrying in ${wait}s (attempt ${attempt}/${max_attempts})"
+ sleep "$wait"
+ continue
+ fi
+
+ cat /tmp/cba_resp.json
+ return 0
+ done
+
+ err "API request failed after ${max_attempts} attempts: ${method} ${endpoint}"
+ return 1
+}
+
+check_credentials() {
+ [[ -z "$CONTABO_CLIENT_ID" ]] && die "CONTABO_CLIENT_ID not set"
+ [[ -z "$CONTABO_CLIENT_SECRET" ]] && die "CONTABO_CLIENT_SECRET not set"
+ [[ -z "$CONTABO_API_USER" ]] && die "CONTABO_API_USER not set"
+ [[ -z "$CONTABO_API_PASS" ]] && die "CONTABO_API_PASS not set"
+}
+
+check_deps() {
+ command -v curl &>/dev/null || die "curl is required"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+# ── Pagination helper ────────────────────────────────────────────────
+fetch_all_contabo() {
+ local endpoint="$1" key="$2"
+ local page=1 size=100 all_data="[]"
+ while true; do
+ local sep="?"
+ [[ "$endpoint" == *"?"* ]] && sep="&"
+ local resp
+ resp=$(contabo_api GET "${endpoint}${sep}page=${page}&size=${size}")
+ local page_data
+ page_data=$(echo "$resp" | jq ".${key} // []" 2>/dev/null)
+ local page_count
+ page_count=$(echo "$page_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$page_count" -eq 0 ]] && break
+ all_data=$(echo -e "${all_data}\n${page_data}" | jq -s 'add' 2>/dev/null)
+ (( page_count < size )) && break
+ ((page++)) || true
+ done
+ echo "$all_data"
+}
+
+# ── Age helpers ──────────────────────────────────────────────────────
+iso_to_epoch() {
+ date -d "$1" +%s 2>/dev/null || echo 0
+}
+
+age_hours() {
+ local created_epoch="$1"
+ local now
+ now=$(date +%s)
+ echo $(( (now - created_epoch) / 3600 ))
+}
+
+format_age() {
+ local hours="$1"
+ if [[ "$hours" -lt 24 ]]; then
+ echo "${hours}h"
+ else
+ local days=$(( hours / 24 ))
+ local rem=$(( hours % 24 ))
+ echo "${days}d ${rem}h"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# AUDIT
+# ══════════════════════════════════════════════════════════════════════
+do_audit() {
+ local instances
+ instances=$(fetch_all_contabo "/compute/instances" "data")
+ local instance_count
+ instance_count=$(echo "$instances" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$instance_count" -eq 0 ]] && die "No instances found"
+
+ # Filter by tag if specified
+ if [[ -n "$TAG_ID" ]]; then
+ instances=$(echo "$instances" | jq --arg tid "$TAG_ID" \
+ '[.[] | select(.tags[]? | .tagId == ($tid | tonumber))]' 2>/dev/null)
+ instance_count=$(echo "$instances" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$instance_count" -eq 0 ]] && die "No instances found with tag ${TAG_ID}"
+ fi
+
+ # Filter by instance ID
+ if [[ -n "$INSTANCE_ID" ]]; then
+ instances=$(echo "$instances" | jq --arg iid "$INSTANCE_ID" \
+ '[.[] | select(.instanceId == ($iid | tonumber))]' 2>/dev/null)
+ instance_count=$(echo "$instances" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$instance_count" -eq 0 ]] && die "Instance not found: ${INSTANCE_ID}"
+ fi
+
+ local snapshots
+ snapshots=$(fetch_all_contabo "/compute/snapshots" "data")
+
+ local warnings=0
+ local no_backup=0
+ local stale=0
+ local healthy=0
+ local results=""
+
+ while IFS=$'\t' read -r iid iname istatus; do
+ [[ -z "$iid" ]] && continue
+
+ # Find most recent snapshot for this instance
+ local latest_snap
+ latest_snap=$(echo "$snapshots" | jq -r \
+ --arg iid "$iid" \
+ '[.[] | select(.instanceId == ($iid | tonumber))] | sort_by(.createdDate) | last | .createdDate // empty' \
+ 2>/dev/null)
+
+ local age_h="—"
+ local status_flag="none"
+ if [[ -n "$latest_snap" ]]; then
+ local nepoch
+ nepoch=$(iso_to_epoch "$latest_snap")
+ age_h=$(age_hours "$nepoch")
+ if [[ "$age_h" -le "$MAX_AGE_HOURS" ]]; then
+ status_flag="ok"
+ ((healthy++)) || true
+ else
+ status_flag="stale"
+ ((stale++)) || true
+ ((warnings++)) || true
+ fi
+ else
+ ((no_backup++)) || true
+ ((warnings++)) || true
+ fi
+
+ # Count snapshots for this instance
+ local snap_count
+ snap_count=$(echo "$snapshots" | jq --arg iid "$iid" \
+ '[.[] | select(.instanceId == ($iid | tonumber))] | length' 2>/dev/null || echo 0)
+
+ results="${results}${iid}\t${iname}\t${istatus}\t${snap_count}\t${age_h}\t${status_flag}\n"
+ done < <(echo "$instances" | jq -r \
+ '.[] | "\(.instanceId)\t\(.name // .displayName // "unknown")\t\(.status // "—")"' \
+ 2>/dev/null)
+
+ case "$OUTPUT_FORMAT" in
+ json)
+ jq -n \
+ --argjson instances "$instance_count" \
+ --argjson healthy "$healthy" \
+ --argjson stale "$stale" \
+ --argjson no_backup "$no_backup" \
+ --argjson warnings "$warnings" \
+ --argjson max_age "$MAX_AGE_HOURS" \
+ '{instances: $instances, healthy: $healthy, stale: $stale, no_backup: $no_backup, warnings: $warnings, max_age_hours: $max_age}'
+ ;;
+ prometheus)
+ cat </dev/null || echo 0)
+ [[ "$total" -eq 0 ]] && die "No snapshots found"
+
+ case "$OUTPUT_FORMAT" in
+ json)
+ echo "$snapshots" | jq '[.[] | {
+ id: (.snapshotId // .id), name: .name,
+ instance_id: .instanceId, created: .createdDate
+ }]'
+ ;;
+ prometheus)
+ local stale_count=0
+ while IFS=$'\t' read -r sid screated; do
+ [[ -z "$sid" ]] && continue
+ local cepoch
+ cepoch=$(iso_to_epoch "$screated")
+ local ah
+ ah=$(age_hours "$cepoch")
+ [[ "$ah" -gt "$MAX_AGE_HOURS" ]] && ((stale_count++)) || true
+ done < <(echo "$snapshots" | jq -r '.[] | "\(.snapshotId // .id)\t\(.createdDate // "")"' 2>/dev/null)
+
+ cat </dev/null \
+ | while IFS=$'\t' read -r sid sname siid screated; do
+ local cepoch ah age_display age_color
+ cepoch=$(iso_to_epoch "$screated")
+ ah=$(age_hours "$cepoch")
+ age_display=$(format_age "$ah")
+ age_color="$GREEN"
+ [[ "$ah" -gt "$MAX_AGE_HOURS" ]] && age_color="$YELLOW"
+
+ printf " %-38s %-18s %-10s %-20s " \
+ "${sid:0:36}" "${sname:0:16}" "$siid" "${screated:0:19}"
+ echo -e "${age_color}${age_display}${RESET}"
+ done
+
+ echo ""
+ field "Snapshots:" "$total"
+ ;;
+ esac
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HELP
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat < /var/lib/node_exporter/textfile/contabo_backup.prom 2>/dev/null
+
+${BOLD}EXIT CODES${RESET}
+ 0 Success
+ 1 Runtime error
+EOF
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# PARSE ARGS
+# ══════════════════════════════════════════════════════════════════════
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --audit) RUN_MODE="audit"; shift ;;
+ --snapshots) RUN_MODE="snapshots"; shift ;;
+ --instance) INSTANCE_ID="${2:?--instance requires an ID}"; shift 2 ;;
+ --tag) TAG_ID="${2:?--tag requires a TAG_ID}"; shift 2 ;;
+ --max-age) MAX_AGE_HOURS="${2:?--max-age requires HOURS}"; shift 2 ;;
+ --format) OUTPUT_FORMAT="${2:?--format requires a value}"; shift 2 ;;
+ --verbose) VERBOSE="true"; shift ;;
+ --no-color) COLOR="never"; shift ;;
+ --help|-h) setup_colors; show_help; exit 0 ;;
+ *) die "Unknown option: $1 (see --help)" ;;
+ esac
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+main() {
+ parse_args "$@"
+ setup_colors
+
+ if [[ -z "$RUN_MODE" ]]; then
+ RUN_MODE="audit"
+ fi
+
+ check_deps
+ check_credentials
+
+ START_TIME=$(date +%s)
+
+ case "$RUN_MODE" in
+ audit) do_audit ;;
+ snapshots) do_snapshots ;;
+ *) die "Unknown mode: ${RUN_MODE}" ;;
+ esac
+
+ if [[ "$OUTPUT_FORMAT" != "prometheus" ]]; then
+ echo ""
+ field "Duration:" "$(elapsed)"
+ fi
+}
+
+main "$@"
diff --git a/contabo-cost-monitor.sh b/contabo-cost-monitor.sh
new file mode 100644
index 0000000..928ae2a
--- /dev/null
+++ b/contabo-cost-monitor.sh
@@ -0,0 +1,551 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### contabo-cost-monitor.sh — Track and report Contabo spending via the REST API. ####
+#### Instance costs, snapshot usage, and alert thresholds with Prometheus output ####
+#### Requires: bash 4+, curl, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./contabo-cost-monitor.sh --summary ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+OUTPUT_FORMAT="${CCM_FORMAT:-table}"
+ALERT_THRESHOLD="${CCM_ALERT:-0}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── Credentials ───────────────────────────────────────────────────────
+CONTABO_CLIENT_ID="${CONTABO_CLIENT_ID:-}"
+CONTABO_CLIENT_SECRET="${CONTABO_CLIENT_SECRET:-}"
+CONTABO_API_USER="${CONTABO_API_USER:-}"
+CONTABO_API_PASS="${CONTABO_API_PASS:-}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+
+# ── API helpers ──────────────────────────────────────────────────────
+contabo_token() {
+ local resp
+ resp=$(curl -s -d "client_id=${CONTABO_CLIENT_ID}" \
+ -d "client_secret=${CONTABO_CLIENT_SECRET}" \
+ --data-urlencode "username=${CONTABO_API_USER}" \
+ --data-urlencode "password=${CONTABO_API_PASS}" \
+ -d "grant_type=password" \
+ "https://auth.contabo.com/auth/realms/contabo/protocol/openid-connect/token")
+ local token
+ token=$(echo "$resp" | jq -r '.access_token // empty' 2>/dev/null)
+ if [[ -z "$token" ]]; then
+ die "Failed to obtain access token — check credentials"
+ fi
+ echo "$token"
+}
+
+contabo_api() {
+ local method="$1" endpoint="$2"
+ shift 2
+ local attempt=0 max_attempts=3
+
+ while (( attempt < max_attempts )); do
+ local http_code
+ http_code=$(curl -s -o /tmp/ccm_resp.json -w "%{http_code}" \
+ -X "$method" \
+ -H "Authorization: Bearer $(contabo_token)" \
+ -H "Content-Type: application/json" \
+ -H "x-request-id: $(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N)" \
+ "https://api.contabo.com/v1${endpoint}" "$@")
+
+ verbose "API ${method} ${endpoint} → HTTP ${http_code}"
+
+ if [[ "$http_code" == "429" ]]; then
+ ((attempt++)) || true
+ local wait=$(( attempt * 5 ))
+ warn "Rate limited — retrying in ${wait}s (attempt ${attempt}/${max_attempts})"
+ sleep "$wait"
+ continue
+ fi
+
+ cat /tmp/ccm_resp.json
+ return 0
+ done
+
+ err "API request failed after ${max_attempts} attempts: ${method} ${endpoint}"
+ return 1
+}
+
+check_credentials() {
+ [[ -z "$CONTABO_CLIENT_ID" ]] && die "CONTABO_CLIENT_ID not set"
+ [[ -z "$CONTABO_CLIENT_SECRET" ]] && die "CONTABO_CLIENT_SECRET not set"
+ [[ -z "$CONTABO_API_USER" ]] && die "CONTABO_API_USER not set"
+ [[ -z "$CONTABO_API_PASS" ]] && die "CONTABO_API_PASS not set"
+}
+
+check_deps() {
+ command -v curl &>/dev/null || die "curl is required"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+# ── Pagination helper ────────────────────────────────────────────────
+fetch_all_contabo() {
+ local endpoint="$1" key="$2"
+ local page=1 size=100 all_data="[]"
+ while true; do
+ local sep="?"
+ [[ "$endpoint" == *"?"* ]] && sep="&"
+ local resp
+ resp=$(contabo_api GET "${endpoint}${sep}page=${page}&size=${size}")
+ local page_data
+ page_data=$(echo "$resp" | jq ".${key} // []" 2>/dev/null)
+ local page_count
+ page_count=$(echo "$page_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$page_count" -eq 0 ]] && break
+ all_data=$(echo -e "${all_data}\n${page_data}" | jq -s 'add' 2>/dev/null)
+ (( page_count < size )) && break
+ ((page++)) || true
+ done
+ echo "$all_data"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+do_summary() {
+ local instances
+ instances=$(fetch_all_contabo "/compute/instances" "data")
+ local instance_count
+ instance_count=$(echo "$instances" | jq 'length' 2>/dev/null || echo 0)
+ local running_count
+ running_count=$(echo "$instances" | jq '[.[] | select(.status == "running")] | length' 2>/dev/null || echo 0)
+
+ local snapshots
+ snapshots=$(fetch_all_contabo "/compute/snapshots" "data")
+ local snapshot_count
+ snapshot_count=$(echo "$snapshots" | jq 'length' 2>/dev/null || echo 0)
+
+ local object_storage
+ object_storage=$(fetch_all_contabo "/object-storages" "data")
+ local storage_count
+ storage_count=$(echo "$object_storage" | jq 'length' 2>/dev/null || echo 0)
+ local storage_tb
+ storage_tb=$(echo "$object_storage" | jq '[.[].totalPurchasedSpaceTB // 0] | add // 0' 2>/dev/null || echo 0)
+ local storage_used_bytes
+ storage_used_bytes=$(echo "$object_storage" | jq '[.[].usedSpaceBytes // 0] | add // 0' 2>/dev/null || echo 0)
+ local storage_used_gb
+ storage_used_gb=$(awk "BEGIN {printf \"%.1f\", ${storage_used_bytes} / 1073741824}")
+
+ # Estimate costs from instance product IDs
+ # Contabo uses fixed monthly pricing per product tier
+ local instance_cost="0.00"
+ while IFS=$'\t' read -r pid pname status; do
+ [[ -z "$pid" ]] && continue
+ # Extract monthly cost from product info if available
+ local cost_per_month="0"
+ # Contabo productId maps to fixed monthly rates
+ # These are approximations — actual billing comes from the Contabo panel
+ case "$pid" in
+ V1) cost_per_month="4.99" ;;
+ V2) cost_per_month="5.99" ;;
+ V4) cost_per_month="8.99" ;;
+ V8) cost_per_month="13.99" ;;
+ V16) cost_per_month="19.99" ;;
+ V24) cost_per_month="24.99" ;;
+ V30) cost_per_month="29.99" ;;
+ V45) cost_per_month="39.99" ;;
+ V60) cost_per_month="49.99" ;;
+ *) cost_per_month="0" ;;
+ esac
+ instance_cost=$(awk "BEGIN {printf \"%.2f\", ${instance_cost} + ${cost_per_month}}")
+ done < <(echo "$instances" | jq -r \
+ '.[] | "\(.productId // "—")\t\(.name // .displayName // "unknown")\t\(.status // "—")"' \
+ 2>/dev/null)
+
+ local total_cost="$instance_cost"
+
+ # Alert check
+ local alert_triggered="false"
+ if [[ "$ALERT_THRESHOLD" != "0" ]]; then
+ local over
+ over=$(awk "BEGIN {print (${total_cost} > ${ALERT_THRESHOLD}) ? 1 : 0}")
+ [[ "$over" == "1" ]] && alert_triggered="true"
+ fi
+
+ case "$OUTPUT_FORMAT" in
+ json)
+ jq -n \
+ --argjson instances "$instance_count" \
+ --argjson running "$running_count" \
+ --argjson snapshots "$snapshot_count" \
+ --argjson object_storage "$storage_count" \
+ --arg storage_tb "$storage_tb" \
+ --arg storage_used_gb "$storage_used_gb" \
+ --arg instance_cost "$instance_cost" \
+ --arg total_cost "$total_cost" \
+ --arg alert_threshold "$ALERT_THRESHOLD" \
+ --argjson alert_triggered "$alert_triggered" \
+ '{
+ instances: $instances, running: $running,
+ snapshots: $snapshots,
+ object_storage: $object_storage,
+ storage_purchased_tb: ($storage_tb | tonumber),
+ storage_used_gb: ($storage_used_gb | tonumber),
+ monthly_estimate: {
+ instances: $instance_cost, total: $total_cost
+ },
+ alert: { threshold: $alert_threshold, triggered: $alert_triggered }
+ }'
+ ;;
+ prometheus)
+ cat </dev/null || echo 0)
+ [[ "$instance_count" -eq 0 ]] && die "No instances found"
+
+ case "$OUTPUT_FORMAT" in
+ json)
+ echo "$instances" | jq '[.[] | {
+ id: .instanceId, name: (.name // .displayName),
+ status: .status, product: .productId,
+ region: .region, ip: .ipConfig.v4.ip
+ }]'
+ ;;
+ *)
+ section_header "Instance Cost Breakdown"
+
+ printf " ${BOLD}%-10s %-20s %-8s %-10s %-10s %10s${RESET}\n" \
+ "ID" "NAME" "PRODUCT" "STATUS" "REGION" "MONTHLY €"
+ printf " %s\n" "$(printf '%.0s─' {1..72})"
+
+ while IFS=$'\t' read -r iid iname pid status region; do
+ [[ -z "$iid" ]] && continue
+ local cost_per_month="0.00"
+ case "$pid" in
+ V1) cost_per_month="4.99" ;;
+ V2) cost_per_month="5.99" ;;
+ V4) cost_per_month="8.99" ;;
+ V8) cost_per_month="13.99" ;;
+ V16) cost_per_month="19.99" ;;
+ V24) cost_per_month="24.99" ;;
+ V30) cost_per_month="29.99" ;;
+ V45) cost_per_month="39.99" ;;
+ V60) cost_per_month="49.99" ;;
+ *) cost_per_month="—" ;;
+ esac
+
+ local status_color="$GREEN"
+ case "$status" in
+ running) status_color="$GREEN" ;;
+ stopped) status_color="$YELLOW" ;;
+ *) status_color="$RED" ;;
+ esac
+
+ printf " %-10s %-20s %-8s " "$iid" "${iname:0:18}" "$pid"
+ echo -ne "${status_color}"
+ printf "%-10s" "$status"
+ echo -ne "${RESET}"
+ printf " %-10s %10s\n" "${region:0:8}" "$cost_per_month"
+ done < <(echo "$instances" | jq -r \
+ '.[] | "\(.instanceId)\t\(.name // .displayName // "unknown")\t\(.productId // "—")\t\(.status // "—")\t\(.region // "—")"' \
+ 2>/dev/null)
+
+ echo ""
+ field "Instances:" "$instance_count"
+ ;;
+ esac
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# RESOURCES
+# ══════════════════════════════════════════════════════════════════════
+do_resources() {
+ # Snapshots
+ local snapshots
+ snapshots=$(fetch_all_contabo "/compute/snapshots" "data")
+ local snap_count
+ snap_count=$(echo "$snapshots" | jq 'length' 2>/dev/null || echo 0)
+
+ case "$OUTPUT_FORMAT" in
+ json)
+ local storage
+ storage=$(fetch_all_contabo "/object-storages" "data")
+ jq -n \
+ --argjson snapshots "$snapshots" \
+ --argjson object_storage "$storage" \
+ '{snapshots: $snapshots, object_storage: $object_storage}'
+ ;;
+ *)
+ if [[ "$snap_count" -gt 0 ]]; then
+ section_header "Snapshots"
+ printf " ${BOLD}%-38s %-18s %-20s${RESET}\n" \
+ "SNAPSHOT_ID" "NAME" "CREATED"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ echo "$snapshots" | jq -r \
+ '.[] | "\(.snapshotId // .id // "—")\t\(.name // "—")\t\(.createdDate // "—")"' \
+ 2>/dev/null \
+ | while IFS=$'\t' read -r sid sname screated; do
+ printf " %-38s %-18s %-20s\n" \
+ "${sid:0:36}" "${sname:0:16}" "${screated:0:19}"
+ done
+ echo ""
+ field "Snapshots:" "$snap_count"
+ fi
+
+ # Object Storage
+ local storage
+ storage=$(fetch_all_contabo "/object-storages" "data")
+ local storage_count
+ storage_count=$(echo "$storage" | jq 'length' 2>/dev/null || echo 0)
+
+ if [[ "$storage_count" -gt 0 ]]; then
+ section_header "Object Storage"
+ printf " ${BOLD}%-38s %-10s %-12s %-12s${RESET}\n" \
+ "STORAGE_ID" "REGION" "SIZE (TB)" "USED (GB)"
+ printf " %s\n" "$(printf '%.0s─' {1..74})"
+
+ echo "$storage" | jq -r \
+ '.[] | "\(.objectStorageId // .id // "—")\t\(.region // "—")\t\(.totalPurchasedSpaceTB // 0)\t\(.usedSpaceBytes // 0)"' \
+ 2>/dev/null \
+ | while IFS=$'\t' read -r oid oregion osize oused; do
+ local used_gb
+ used_gb=$(awk "BEGIN {printf \"%.1f\", ${oused} / 1073741824}")
+ printf " %-38s %-10s %-12s %-12s\n" \
+ "${oid:0:36}" "${oregion:0:8}" "$osize" "$used_gb"
+ done
+ echo ""
+ field "Object Storage:" "$storage_count"
+ fi
+ ;;
+ esac
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HELP
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat < /var/lib/node_exporter/textfile/contabo_cost.prom 2>/dev/null
+
+${BOLD}NOTES${RESET}
+ Contabo uses fixed monthly pricing per product tier.
+ Cost estimates are based on productId mapping — verify against your invoice.
+ Snapshots and object storage are typically included in Contabo plans.
+
+${BOLD}EXIT CODES${RESET}
+ 0 Success
+ 1 Runtime error
+EOF
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# PARSE ARGS
+# ══════════════════════════════════════════════════════════════════════
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --summary) RUN_MODE="summary"; shift ;;
+ --breakdown) RUN_MODE="breakdown"; shift ;;
+ --resources) RUN_MODE="resources"; shift ;;
+ --format) OUTPUT_FORMAT="${2:?--format requires a value}"; shift 2 ;;
+ --alert) ALERT_THRESHOLD="${2:?--alert requires a threshold}"; shift 2 ;;
+ --verbose) VERBOSE="true"; shift ;;
+ --no-color) COLOR="never"; shift ;;
+ --help|-h) setup_colors; show_help; exit 0 ;;
+ *) die "Unknown option: $1 (see --help)" ;;
+ esac
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+main() {
+ parse_args "$@"
+ setup_colors
+
+ if [[ -z "$RUN_MODE" ]]; then
+ RUN_MODE="summary"
+ fi
+
+ check_deps
+ check_credentials
+
+ START_TIME=$(date +%s)
+
+ case "$RUN_MODE" in
+ summary) do_summary ;;
+ breakdown) do_breakdown ;;
+ resources) do_resources ;;
+ *) die "Unknown mode: ${RUN_MODE}" ;;
+ esac
+
+ if [[ "$OUTPUT_FORMAT" != "prometheus" ]]; then
+ echo ""
+ field "Duration:" "$(elapsed)"
+ fi
+}
+
+main "$@"
diff --git a/contabo-dns-manager.sh b/contabo-dns-manager.sh
new file mode 100644
index 0000000..94adf05
--- /dev/null
+++ b/contabo-dns-manager.sh
@@ -0,0 +1,648 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### contabo-dns-manager.sh — Manage DNS zones and records via the Contabo DNS API ####
+#### List zones, add/update/delete records, audit, bulk operations ####
+#### Requires: bash 4+, curl, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./contabo-dns-manager.sh --zones ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+ZONE_NAME=""
+RECORD_ID=""
+RECORD_TYPE=""
+RECORD_NAME=""
+RECORD_CONTENT=""
+RECORD_TTL="3600"
+RECORD_PRIO=""
+CSV_FILE=""
+OUTPUT_FORMAT="${CDM_FORMAT:-table}"
+FORCE="false"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── Credentials ───────────────────────────────────────────────────────
+CONTABO_CLIENT_ID="${CONTABO_CLIENT_ID:-}"
+CONTABO_CLIENT_SECRET="${CONTABO_CLIENT_SECRET:-}"
+CONTABO_API_USER="${CONTABO_API_USER:-}"
+CONTABO_API_PASS="${CONTABO_API_PASS:-}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+ACTION_OK=0
+ACTION_FAIL=0
+
+# ── API helpers ──────────────────────────────────────────────────────
+contabo_token() {
+ local resp
+ resp=$(curl -s -d "client_id=${CONTABO_CLIENT_ID}" \
+ -d "client_secret=${CONTABO_CLIENT_SECRET}" \
+ --data-urlencode "username=${CONTABO_API_USER}" \
+ --data-urlencode "password=${CONTABO_API_PASS}" \
+ -d "grant_type=password" \
+ "https://auth.contabo.com/auth/realms/contabo/protocol/openid-connect/token")
+ local token
+ token=$(echo "$resp" | jq -r '.access_token // empty' 2>/dev/null)
+ if [[ -z "$token" ]]; then
+ die "Failed to obtain access token — check credentials"
+ fi
+ echo "$token"
+}
+
+contabo_api() {
+ local method="$1" endpoint="$2"
+ shift 2
+ local attempt=0 max_attempts=3
+
+ while (( attempt < max_attempts )); do
+ local http_code
+ http_code=$(curl -s -o /tmp/cdm_resp.json -w "%{http_code}" \
+ -X "$method" \
+ -H "Authorization: Bearer $(contabo_token)" \
+ -H "Content-Type: application/json" \
+ -H "x-request-id: $(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N)" \
+ "https://api.contabo.com/v1${endpoint}" "$@")
+
+ verbose "API ${method} ${endpoint} → HTTP ${http_code}"
+
+ if [[ "$http_code" == "429" ]]; then
+ ((attempt++)) || true
+ local wait=$(( attempt * 5 ))
+ warn "Rate limited — retrying in ${wait}s (attempt ${attempt}/${max_attempts})"
+ sleep "$wait"
+ continue
+ fi
+
+ cat /tmp/cdm_resp.json
+ return 0
+ done
+
+ err "API request failed after ${max_attempts} attempts: ${method} ${endpoint}"
+ return 1
+}
+
+check_credentials() {
+ [[ -z "$CONTABO_CLIENT_ID" ]] && die "CONTABO_CLIENT_ID not set"
+ [[ -z "$CONTABO_CLIENT_SECRET" ]] && die "CONTABO_CLIENT_SECRET not set"
+ [[ -z "$CONTABO_API_USER" ]] && die "CONTABO_API_USER not set"
+ [[ -z "$CONTABO_API_PASS" ]] && die "CONTABO_API_PASS not set"
+}
+
+check_deps() {
+ command -v curl &>/dev/null || die "curl is required"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ZONES
+# ══════════════════════════════════════════════════════════════════════
+do_zones() {
+ local page=1 size=100 all_data="[]"
+
+ while true; do
+ local resp
+ resp=$(contabo_api GET "/dns/zones?page=${page}&size=${size}")
+ local page_data
+ page_data=$(echo "$resp" | jq '.data // []' 2>/dev/null)
+ local page_count
+ page_count=$(echo "$page_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$page_count" -eq 0 ]] && break
+ all_data=$(echo -e "${all_data}\n${page_data}" | jq -s 'add' 2>/dev/null)
+ (( page_count < size )) && break
+ ((page++)) || true
+ done
+
+ local total
+ total=$(echo "$all_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$total" -eq 0 ]] && die "No zones found"
+
+ case "$OUTPUT_FORMAT" in
+ json)
+ echo "$all_data" | jq '.'
+ ;;
+ prometheus)
+ cat </dev/null \
+ | while IFS=$'\t' read -r name status zid rcount; do
+ printf " %-25s %-10s %-36s %-8s\n" \
+ "${name:0:23}" "$status" "${zid:0:34}" "$rcount"
+ done
+
+ echo ""
+ field "Zones:" "$total"
+ ;;
+ esac
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# RECORDS
+# ══════════════════════════════════════════════════════════════════════
+do_records() {
+ [[ -z "$ZONE_NAME" ]] && die "Specify --zone DOMAIN"
+
+ local resp
+ resp=$(contabo_api GET "/dns/zones/${ZONE_NAME}/records")
+ local records
+ records=$(echo "$resp" | jq '.data // []' 2>/dev/null)
+ local total
+ total=$(echo "$records" | jq 'length' 2>/dev/null || echo 0)
+
+ case "$OUTPUT_FORMAT" in
+ json)
+ echo "$records" | jq '.'
+ ;;
+ prometheus)
+ cat </dev/null \
+ | while IFS=$'\t' read -r rid rtype rname rcontent rttl rprio; do
+ printf " %-36s %-6s %-10s %-26s %-6s %-5s\n" \
+ "${rid:0:34}" "$rtype" "${rname:0:8}" "${rcontent:0:24}" "$rttl" "$rprio"
+ done
+
+ echo ""
+ field "Records:" "$total"
+ ;;
+ esac
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ADD
+# ══════════════════════════════════════════════════════════════════════
+do_add() {
+ [[ -z "$ZONE_NAME" ]] && die "Specify --zone DOMAIN"
+ [[ -z "$RECORD_TYPE" ]] && die "Specify --type TYPE"
+ [[ -z "$RECORD_NAME" ]] && die "Specify --name NAME"
+ [[ -z "$RECORD_CONTENT" ]] && die "Specify --content CONTENT"
+
+ local payload
+ payload=$(jq -n \
+ --arg type "$RECORD_TYPE" \
+ --arg name "$RECORD_NAME" \
+ --arg content "$RECORD_CONTENT" \
+ --argjson ttl "$RECORD_TTL" \
+ '{type: $type, name: $name, content: $content, ttl: $ttl}')
+
+ if [[ -n "$RECORD_PRIO" ]]; then
+ payload=$(echo "$payload" | jq --argjson prio "$RECORD_PRIO" '. + {prio: $prio}')
+ fi
+
+ local resp
+ resp=$(contabo_api POST "/dns/zones/${ZONE_NAME}/records" -d "$payload")
+
+ local rid
+ rid=$(echo "$resp" | jq -r '.data[0].recordId // .data[0].id // empty' 2>/dev/null)
+
+ if [[ -n "$rid" ]]; then
+ echo -e " ${GREEN}✓${RESET} Record created: ${RECORD_TYPE} ${RECORD_NAME} → ${RECORD_CONTENT} (ID: ${rid})"
+ ((ACTION_OK++)) || true
+ else
+ local errmsg
+ errmsg=$(echo "$resp" | jq -r '.message // "unknown error"' 2>/dev/null)
+ echo -e " ${RED}✗${RESET} Failed to create record: ${errmsg}"
+ ((ACTION_FAIL++)) || true
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# UPDATE
+# ══════════════════════════════════════════════════════════════════════
+do_update() {
+ [[ -z "$RECORD_ID" ]] && die "Specify --record-id ID"
+ [[ -z "$ZONE_NAME" ]] && die "Specify --zone DOMAIN"
+ [[ -z "$RECORD_TYPE" ]] && die "Specify --type TYPE"
+ [[ -z "$RECORD_NAME" ]] && die "Specify --name NAME"
+ [[ -z "$RECORD_CONTENT" ]] && die "Specify --content CONTENT"
+
+ local payload
+ payload=$(jq -n \
+ --arg type "$RECORD_TYPE" \
+ --arg name "$RECORD_NAME" \
+ --arg content "$RECORD_CONTENT" \
+ --argjson ttl "$RECORD_TTL" \
+ '{type: $type, name: $name, content: $content, ttl: $ttl}')
+
+ if [[ -n "$RECORD_PRIO" ]]; then
+ payload=$(echo "$payload" | jq --argjson prio "$RECORD_PRIO" '. + {prio: $prio}')
+ fi
+
+ local resp
+ resp=$(contabo_api PUT "/dns/zones/${ZONE_NAME}/records/${RECORD_ID}" -d "$payload")
+
+ local rid
+ rid=$(echo "$resp" | jq -r '.data[0].recordId // .data[0].id // empty' 2>/dev/null)
+
+ if [[ -n "$rid" ]]; then
+ echo -e " ${GREEN}✓${RESET} Record updated: ${RECORD_TYPE} ${RECORD_NAME} → ${RECORD_CONTENT} (ID: ${rid})"
+ ((ACTION_OK++)) || true
+ else
+ local errmsg
+ errmsg=$(echo "$resp" | jq -r '.message // "unknown error"' 2>/dev/null)
+ echo -e " ${RED}✗${RESET} Failed to update record: ${errmsg}"
+ ((ACTION_FAIL++)) || true
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# DELETE
+# ══════════════════════════════════════════════════════════════════════
+do_delete() {
+ [[ -z "$RECORD_ID" ]] && die "Specify --record-id ID"
+ [[ -z "$ZONE_NAME" ]] && die "Specify --zone DOMAIN"
+ [[ "$FORCE" != "true" ]] && die "Delete is destructive — use --force to confirm"
+
+ local resp
+ resp=$(contabo_api DELETE "/dns/zones/${ZONE_NAME}/records/${RECORD_ID}")
+
+ echo -e " ${GREEN}✓${RESET} Record deleted: ${RECORD_ID}"
+ ((ACTION_OK++)) || true
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# BULK ADD
+# ══════════════════════════════════════════════════════════════════════
+do_bulk_add() {
+ [[ -z "$ZONE_NAME" ]] && die "Specify --zone DOMAIN"
+ [[ -z "$CSV_FILE" ]] && die "Specify --csv FILE"
+ [[ ! -f "$CSV_FILE" ]] && die "CSV file not found: ${CSV_FILE}"
+
+ section_header "Bulk Add — ${ZONE_NAME}"
+
+ local line_num=0
+ while IFS=',' read -r rtype rname rcontent rttl rprio; do
+ ((line_num++)) || true
+ [[ -z "$rtype" || "$rtype" =~ ^# ]] && continue
+ rtype=$(echo "$rtype" | xargs)
+ rname=$(echo "$rname" | xargs)
+ rcontent=$(echo "$rcontent" | xargs)
+ rttl=$(echo "${rttl:-3600}" | xargs)
+ rprio=$(echo "${rprio:-}" | xargs)
+
+ local payload
+ payload=$(jq -n \
+ --arg type "$rtype" \
+ --arg name "$rname" \
+ --arg content "$rcontent" \
+ --argjson ttl "$rttl" \
+ '{type: $type, name: $name, content: $content, ttl: $ttl}')
+
+ if [[ -n "$rprio" ]]; then
+ payload=$(echo "$payload" | jq --argjson prio "$rprio" '. + {prio: $prio}')
+ fi
+
+ local resp
+ resp=$(contabo_api POST "/dns/zones/${ZONE_NAME}/records" -d "$payload")
+
+ local rid
+ rid=$(echo "$resp" | jq -r '.data[0].recordId // .data[0].id // empty' 2>/dev/null)
+
+ if [[ -n "$rid" ]]; then
+ echo -e " ${GREEN}✓${RESET} ${rtype} ${rname} → ${rcontent} (line ${line_num})"
+ ((ACTION_OK++)) || true
+ else
+ echo -e " ${RED}✗${RESET} ${rtype} ${rname} → ${rcontent} (line ${line_num})"
+ ((ACTION_FAIL++)) || true
+ fi
+
+ sleep 0.5
+ done < "$CSV_FILE"
+
+ echo ""
+ field_color "Succeeded:" "${GREEN}${ACTION_OK}${RESET}"
+ if [[ "$ACTION_FAIL" -gt 0 ]]; then
+ field_color "Failed:" "${RED}${ACTION_FAIL}${RESET}"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# AUDIT
+# ══════════════════════════════════════════════════════════════════════
+do_audit() {
+ [[ -z "$ZONE_NAME" ]] && die "Specify --zone DOMAIN"
+
+ local resp
+ resp=$(contabo_api GET "/dns/zones/${ZONE_NAME}/records")
+ local records
+ records=$(echo "$resp" | jq '.data // []' 2>/dev/null)
+ local total
+ total=$(echo "$records" | jq 'length' 2>/dev/null || echo 0)
+
+ local warnings=0
+
+ if [[ "$OUTPUT_FORMAT" != "prometheus" ]]; then
+ section_header "DNS Audit — ${ZONE_NAME}"
+ field "Records:" "$total"
+ echo ""
+ fi
+
+ # Check SOA
+ local soa_count
+ soa_count=$(echo "$records" | jq '[.[] | select(.type == "SOA")] | length' 2>/dev/null || echo 0)
+ if [[ "$soa_count" -eq 0 ]]; then
+ ((warnings++)) || true
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${YELLOW}⚠${RESET} No SOA record found"
+ else
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${GREEN}✓${RESET} SOA record present"
+ fi
+
+ # Check NS
+ local ns_count
+ ns_count=$(echo "$records" | jq '[.[] | select(.type == "NS")] | length' 2>/dev/null || echo 0)
+ if [[ "$ns_count" -eq 0 ]]; then
+ ((warnings++)) || true
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${RED}✗${RESET} No NS records found"
+ elif [[ "$ns_count" -lt 2 ]]; then
+ ((warnings++)) || true
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${YELLOW}⚠${RESET} Only ${ns_count} NS record(s) — recommend at least 2"
+ else
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${GREEN}✓${RESET} ${ns_count} NS records"
+ fi
+
+ # Check common types
+ for rtype in A AAAA MX TXT; do
+ local rcount
+ rcount=$(echo "$records" | jq --arg t "$rtype" '[.[] | select(.type == $t)] | length' 2>/dev/null || echo 0)
+ if [[ "$rcount" -eq 0 ]]; then
+ ((warnings++)) || true
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${YELLOW}⚠${RESET} No ${rtype} records found"
+ else
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${GREEN}✓${RESET} ${rcount} ${rtype} record(s)"
+ fi
+ done
+
+ # Check low TTLs
+ local low_ttl
+ low_ttl=$(echo "$records" | jq '[.[] | select(.ttl < 300 and .ttl > 0)] | length' 2>/dev/null || echo 0)
+ if [[ "$low_ttl" -gt 0 ]]; then
+ ((warnings++)) || true
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${YELLOW}⚠${RESET} ${low_ttl} record(s) with TTL < 300s"
+ else
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${GREEN}✓${RESET} All TTLs ≥ 300s"
+ fi
+
+ # Check wildcards
+ local wildcard
+ wildcard=$(echo "$records" | jq '[.[] | select(.name | startswith("*"))] | length' 2>/dev/null || echo 0)
+ if [[ "$wildcard" -gt 0 ]]; then
+ [[ "$OUTPUT_FORMAT" != "prometheus" ]] && echo -e " ${CYAN}ℹ${RESET} ${wildcard} wildcard record(s)"
+ fi
+
+ if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then
+ cat <&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+# ── Severity counters ────────────────────────────────────────────────
+TOTAL_CRIT=0
+TOTAL_WARN=0
+TOTAL_INFO=0
+TOTAL_OK=0
+
+flag_crit() { ((TOTAL_CRIT++)) || true; }
+flag_warn() { ((TOTAL_WARN++)) || true; }
+flag_info() { ((TOTAL_INFO++)) || true; }
+flag_ok() { ((TOTAL_OK++)) || true; }
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+DANGEROUS_PORTS="${DANGEROUS_PORTS:-22,3389,3306,5432,1433,6379,27017,9200,8080,8443}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── Credentials ───────────────────────────────────────────────────────
+CONTABO_CLIENT_ID="${CONTABO_CLIENT_ID:-}"
+CONTABO_CLIENT_SECRET="${CONTABO_CLIENT_SECRET:-}"
+CONTABO_API_USER="${CONTABO_API_USER:-}"
+CONTABO_API_PASS="${CONTABO_API_PASS:-}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+
+# ── API helpers ──────────────────────────────────────────────────────
+contabo_token() {
+ local resp
+ resp=$(curl -s -d "client_id=${CONTABO_CLIENT_ID}" \
+ -d "client_secret=${CONTABO_CLIENT_SECRET}" \
+ --data-urlencode "username=${CONTABO_API_USER}" \
+ --data-urlencode "password=${CONTABO_API_PASS}" \
+ -d "grant_type=password" \
+ "https://auth.contabo.com/auth/realms/contabo/protocol/openid-connect/token")
+ local token
+ token=$(echo "$resp" | jq -r '.access_token // empty' 2>/dev/null)
+ if [[ -z "$token" ]]; then
+ die "Failed to obtain access token — check credentials"
+ fi
+ echo "$token"
+}
+
+contabo_api() {
+ local method="$1" endpoint="$2"
+ shift 2
+ local attempt=0 max_attempts=3
+
+ while (( attempt < max_attempts )); do
+ local http_code
+ http_code=$(curl -s -o /tmp/cfa_resp.json -w "%{http_code}" \
+ -X "$method" \
+ -H "Authorization: Bearer $(contabo_token)" \
+ -H "Content-Type: application/json" \
+ -H "x-request-id: $(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N)" \
+ "https://api.contabo.com/v1${endpoint}" "$@")
+
+ verbose "API ${method} ${endpoint} → HTTP ${http_code}"
+
+ if [[ "$http_code" == "429" ]]; then
+ ((attempt++)) || true
+ local wait=$(( attempt * 5 ))
+ warn "Rate limited — retrying in ${wait}s (attempt ${attempt}/${max_attempts})"
+ sleep "$wait"
+ continue
+ fi
+
+ cat /tmp/cfa_resp.json
+ return 0
+ done
+
+ err "API request failed after ${max_attempts} attempts: ${method} ${endpoint}"
+ return 1
+}
+
+check_credentials() {
+ [[ -z "$CONTABO_CLIENT_ID" ]] && die "CONTABO_CLIENT_ID not set"
+ [[ -z "$CONTABO_CLIENT_SECRET" ]] && die "CONTABO_CLIENT_SECRET not set"
+ [[ -z "$CONTABO_API_USER" ]] && die "CONTABO_API_USER not set"
+ [[ -z "$CONTABO_API_PASS" ]] && die "CONTABO_API_PASS not set"
+}
+
+check_deps() {
+ command -v curl &>/dev/null || die "curl is required"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+# ── Instance helpers ─────────────────────────────────────────────────
+get_all_instances() {
+ local page=1 size=100 result="[]"
+ while true; do
+ local resp
+ resp=$(contabo_api GET "/compute/instances?page=${page}&size=${size}")
+ local page_data
+ page_data=$(echo "$resp" | jq '.data // []' 2>/dev/null)
+ local count
+ count=$(echo "$page_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$count" -eq 0 ]] && break
+ result=$(echo "$result" "$page_data" | jq -s '.[0] + .[1]')
+ (( count < size )) && break
+ ((page++)) || true
+ done
+ echo "$result"
+}
+
+# ── Firewall helpers ─────────────────────────────────────────────────
+get_all_firewalls() {
+ local page=1 size=100 result="[]"
+ while true; do
+ local resp
+ resp=$(contabo_api GET "/firewalls?page=${page}&size=${size}")
+ local page_data
+ page_data=$(echo "$resp" | jq '.data // []' 2>/dev/null)
+ local count
+ count=$(echo "$page_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$count" -eq 0 ]] && break
+ result=$(echo "$result" "$page_data" | jq -s '.[0] + .[1]')
+ (( count < size )) && break
+ ((page++)) || true
+ done
+ echo "$result"
+}
+
+# ── Port-to-service mapping ─────────────────────────────────────────
+port_to_service() {
+ local port="$1"
+ case "$port" in
+ 22) echo "SSH" ;;
+ 80) echo "HTTP" ;;
+ 443) echo "HTTPS" ;;
+ 3306) echo "MySQL" ;;
+ 5432) echo "PostgreSQL" ;;
+ 1433) echo "MSSQL" ;;
+ 3389) echo "RDP" ;;
+ 6379) echo "Redis" ;;
+ 27017) echo "MongoDB" ;;
+ 9200) echo "Elasticsearch" ;;
+ 8080) echo "HTTP-Alt" ;;
+ 8443) echo "HTTPS-Alt" ;;
+ 53) echo "DNS" ;;
+ 25) echo "SMTP" ;;
+ 5900) echo "VNC" ;;
+ 11211) echo "Memcached" ;;
+ 2379) echo "etcd" ;;
+ 9090) echo "Prometheus" ;;
+ *) echo "" ;;
+ esac
+}
+
+# ── Check if port is in dangerous list ───────────────────────────────
+is_dangerous_port() {
+ local port="$1"
+ local IFS=','
+ for dp in $DANGEROUS_PORTS; do
+ if [[ "$port" == "$dp" ]]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OPEN PORTS AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_open_ports() {
+ log "Auditing firewall rules for dangerous open ports..."
+ log "Dangerous ports: ${DANGEROUS_PORTS}"
+ echo ""
+
+ printf " %-10s %-22s %-8s %-8s %-18s %-12s %s\n" \
+ "FW_ID" "FW_NAME" "PORT" "PROTO" "SOURCE" "SERVICE" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..95})"
+
+ local fw_json
+ fw_json=$(get_all_firewalls)
+
+ echo "$fw_json" | jq -c '.[]' 2>/dev/null | while IFS= read -r fw; do
+ local fw_id fw_name
+ fw_id=$(echo "$fw" | jq -r '.firewallId' 2>/dev/null)
+ fw_name=$(echo "$fw" | jq -r '.displayName // .name // "unnamed"' 2>/dev/null)
+
+ echo "$fw" | jq -c '.rules[]? // empty' 2>/dev/null | while IFS= read -r rule; do
+ local action protocol port_str source_cidr
+ action=$(echo "$rule" | jq -r '.action // "accept"' 2>/dev/null)
+ protocol=$(echo "$rule" | jq -r '.protocol // "tcp"' 2>/dev/null)
+ port_str=$(echo "$rule" | jq -r '.port // .destPorts // .destinationPorts // ""' 2>/dev/null)
+ source_cidr=$(echo "$rule" | jq -r '.srcCidr // .source // .ipRange // "0.0.0.0/0"' 2>/dev/null)
+
+ [[ "$action" != "accept" && "$action" != "allow" ]] && continue
+ [[ "$source_cidr" != "0.0.0.0/0" && "$source_cidr" != "::/0" ]] && continue
+
+ if [[ -z "$port_str" || "$port_str" == "null" ]]; then
+ local IFS=','
+ for dp in $DANGEROUS_PORTS; do
+ local svc
+ svc=$(port_to_service "$dp")
+ printf " %-10s %-22s %-8s %-8s %-18s %-12s %b%s%b\n" \
+ "$fw_id" "${fw_name:0:20}" "$dp" "$protocol" \
+ "$source_cidr" "${svc:-unknown}" "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ done
+ continue
+ fi
+
+ local IFS=','
+ for port_entry in $port_str; do
+ local single_port="$port_entry"
+ if [[ "$port_entry" == *-* ]]; then
+ local range_start range_end
+ range_start="${port_entry%-*}"
+ range_end="${port_entry#*-}"
+ local IFS=','
+ for dp in $DANGEROUS_PORTS; do
+ if [[ "$dp" -ge "$range_start" && "$dp" -le "$range_end" ]]; then
+ local svc
+ svc=$(port_to_service "$dp")
+ printf " %-10s %-22s %-8s %-8s %-18s %-12s %b%s%b\n" \
+ "$fw_id" "${fw_name:0:20}" "$dp" "$protocol" \
+ "$source_cidr" "${svc:-unknown}" "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ fi
+ done
+ continue
+ fi
+
+ if is_dangerous_port "$single_port"; then
+ local svc
+ svc=$(port_to_service "$single_port")
+ printf " %-10s %-22s %-8s %-8s %-18s %-12s %b%s%b\n" \
+ "$fw_id" "${fw_name:0:20}" "$single_port" "$protocol" \
+ "$source_cidr" "${svc:-unknown}" "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ elif [[ "$single_port" == "80" || "$single_port" == "443" ]]; then
+ local svc
+ svc=$(port_to_service "$single_port")
+ printf " %-10s %-22s %-8s %-8s %-18s %-12s %b%s%b\n" \
+ "$fw_id" "${fw_name:0:20}" "$single_port" "$protocol" \
+ "$source_cidr" "${svc:-$single_port}" "$CYAN" "INFO" "$RESET"
+ flag_info
+ fi
+ done
+ done
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# UNPROTECTED INSTANCES
+# ══════════════════════════════════════════════════════════════════════
+audit_unprotected() {
+ log "Checking for instances without firewalls..."
+ echo ""
+
+ printf " %-10s %-22s %-16s %-10s %s\n" \
+ "INST_ID" "NAME" "IP" "STATUS" "FIREWALL"
+ printf " %s\n" "$(printf '%.0s─' {1..75})"
+
+ local instances
+ instances=$(get_all_instances)
+
+ local fw_json
+ fw_json=$(get_all_firewalls)
+
+ local assigned_instances
+ assigned_instances=$(echo "$fw_json" | jq -r \
+ '[.[].assignedInstances // [] | .[]] | unique | .[]' 2>/dev/null || true)
+
+ echo "$instances" | jq -c '.[]' 2>/dev/null | while IFS= read -r inst; do
+ local iid iname ip status
+ iid=$(echo "$inst" | jq -r '.instanceId' 2>/dev/null)
+ iname=$(echo "$inst" | jq -r '.name // .displayName // "unknown"' 2>/dev/null)
+ ip=$(echo "$inst" | jq -r '.ipConfig.v4.ip // "N/A"' 2>/dev/null)
+ status=$(echo "$inst" | jq -r '.status // "unknown"' 2>/dev/null)
+
+ local has_fw="false"
+ if echo "$assigned_instances" | grep -q "^${iid}$" 2>/dev/null; then
+ has_fw="true"
+ fi
+
+ if [[ "$has_fw" == "false" ]]; then
+ printf " %-10s %-22s %-16s %-10s %b%s%b\n" \
+ "$iid" "${iname:0:20}" "$ip" "$status" \
+ "$RED" "NONE — UNPROTECTED" "$RESET"
+ flag_crit
+ else
+ printf " %-10s %-22s %-16s %-10s %b%s%b\n" \
+ "$iid" "${iname:0:20}" "$ip" "$status" \
+ "$GREEN" "✓ Protected" "$RESET"
+ flag_ok
+ fi
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# PERMISSIVE RULES AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_permissive() {
+ log "Auditing overly permissive firewall rules..."
+ echo ""
+
+ printf " %-10s %-22s %-10s %-8s %-18s %-14s %s\n" \
+ "FW_ID" "FW_NAME" "PORTS" "PROTO" "SOURCE" "ISSUE" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..100})"
+
+ local fw_json
+ fw_json=$(get_all_firewalls)
+
+ echo "$fw_json" | jq -c '.[]' 2>/dev/null | while IFS= read -r fw; do
+ local fw_id fw_name
+ fw_id=$(echo "$fw" | jq -r '.firewallId' 2>/dev/null)
+ fw_name=$(echo "$fw" | jq -r '.displayName // .name // "unnamed"' 2>/dev/null)
+
+ echo "$fw" | jq -c '.rules[]? // empty' 2>/dev/null | while IFS= read -r rule; do
+ local action protocol port_str source_cidr
+ action=$(echo "$rule" | jq -r '.action // "accept"' 2>/dev/null)
+ protocol=$(echo "$rule" | jq -r '.protocol // "tcp"' 2>/dev/null)
+ port_str=$(echo "$rule" | jq -r '.port // .destPorts // .destinationPorts // ""' 2>/dev/null)
+ source_cidr=$(echo "$rule" | jq -r '.srcCidr // .source // .ipRange // ""' 2>/dev/null)
+
+ [[ "$action" != "accept" && "$action" != "allow" ]] && continue
+
+ if [[ -z "$port_str" || "$port_str" == "null" ]] && [[ "$source_cidr" == "0.0.0.0/0" || "$source_cidr" == "::/0" ]]; then
+ printf " %-10s %-22s %-10s %-8s %-18s %-14s %b%s%b\n" \
+ "$fw_id" "${fw_name:0:20}" "ALL" "$protocol" \
+ "$source_cidr" "all-ports" "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ continue
+ fi
+
+ if [[ "$protocol" == "all" || "$protocol" == "-1" ]] && [[ "$source_cidr" == "0.0.0.0/0" || "$source_cidr" == "::/0" ]]; then
+ printf " %-10s %-22s %-10s %-8s %-18s %-14s %b%s%b\n" \
+ "$fw_id" "${fw_name:0:20}" "${port_str:-ALL}" "all" \
+ "$source_cidr" "all-protocols" "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ continue
+ fi
+
+ if [[ -n "$source_cidr" && "$source_cidr" != "null" ]]; then
+ if [[ "$source_cidr" == *"/8" || "$source_cidr" == *"/16" ]]; then
+ printf " %-10s %-22s %-10s %-8s %-18s %-14s %b%s%b\n" \
+ "$fw_id" "${fw_name:0:20}" "${port_str:-ALL}" "$protocol" \
+ "${source_cidr:0:16}" "wide-cidr" "$YELLOW" "WARN" "$RESET"
+ flag_warn
+ fi
+ fi
+ done
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# UNUSED FIREWALLS
+# ══════════════════════════════════════════════════════════════════════
+audit_unused() {
+ log "Checking for unused firewalls..."
+ echo ""
+
+ printf " %-10s %-28s %-8s %s\n" \
+ "FW_ID" "FW_NAME" "RULES" "STATUS"
+ printf " %s\n" "$(printf '%.0s─' {1..60})"
+
+ local fw_json
+ fw_json=$(get_all_firewalls)
+
+ echo "$fw_json" | jq -c '.[]' 2>/dev/null | while IFS= read -r fw; do
+ local fw_id fw_name rule_count assigned_count
+ fw_id=$(echo "$fw" | jq -r '.firewallId' 2>/dev/null)
+ fw_name=$(echo "$fw" | jq -r '.displayName // .name // "unnamed"' 2>/dev/null)
+ rule_count=$(echo "$fw" | jq '[.rules[]?] | length' 2>/dev/null || echo 0)
+ assigned_count=$(echo "$fw" | jq '[.assignedInstances // [] | .[]] | length' 2>/dev/null || echo 0)
+
+ if [[ "$assigned_count" -eq 0 ]]; then
+ printf " %-10s %-28s %-8s %b%s%b\n" \
+ "$fw_id" "${fw_name:0:26}" "$rule_count" \
+ "$YELLOW" "UNUSED" "$RESET"
+ flag_warn
+ else
+ verbose "Firewall ${fw_id} (${fw_name}): assigned to ${assigned_count} instance(s)"
+ flag_ok
+ fi
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIST ALL RULES
+# ══════════════════════════════════════════════════════════════════════
+list_rules() {
+ log "Listing all firewall rules..."
+ echo ""
+
+ printf " %-10s %-20s %-8s %-8s %-12s %-18s %s\n" \
+ "FW_ID" "FW_NAME" "ACTION" "PROTO" "PORTS" "SOURCE" "SERVICE"
+ printf " %s\n" "$(printf '%.0s─' {1..90})"
+
+ local fw_json
+ fw_json=$(get_all_firewalls)
+
+ echo "$fw_json" | jq -c '.[]' 2>/dev/null | while IFS= read -r fw; do
+ local fw_id fw_name
+ fw_id=$(echo "$fw" | jq -r '.firewallId' 2>/dev/null)
+ fw_name=$(echo "$fw" | jq -r '.displayName // .name // "unnamed"' 2>/dev/null)
+
+ echo "$fw" | jq -c '.rules[]? // empty' 2>/dev/null | while IFS= read -r rule; do
+ local action protocol port_str source_cidr
+ action=$(echo "$rule" | jq -r '.action // "accept"' 2>/dev/null)
+ protocol=$(echo "$rule" | jq -r '.protocol // "tcp"' 2>/dev/null)
+ port_str=$(echo "$rule" | jq -r '.port // .destPorts // .destinationPorts // "all"' 2>/dev/null)
+ source_cidr=$(echo "$rule" | jq -r '.srcCidr // .source // .ipRange // "any"' 2>/dev/null)
+
+ [[ "$port_str" == "null" ]] && port_str="all"
+ [[ "$source_cidr" == "null" ]] && source_cidr="any"
+
+ local svc=""
+ if [[ "$port_str" =~ ^[0-9]+$ ]]; then
+ svc=$(port_to_service "$port_str")
+ fi
+
+ local action_color="$GREEN"
+ [[ "$action" == "drop" || "$action" == "deny" || "$action" == "reject" ]] && action_color="$RED"
+
+ printf " %-10s %-20s %b%-8s%b %-8s %-12s %-18s %s\n" \
+ "$fw_id" "${fw_name:0:18}" "$action_color" "$action" "$RESET" \
+ "$protocol" "${port_str:0:10}" "${source_cidr:0:16}" "${svc}"
+ done
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+print_summary() {
+ local elapsed
+ elapsed=$(( $(date +%s) - START_TIME ))
+
+ echo ""
+ echo " ══════════════════════════════════════════"
+ echo " Firewall Audit Summary"
+ echo " ══════════════════════════════════════════"
+ printf " %-20s %b%d%b\n" "CRITICAL:" "$RED" "$TOTAL_CRIT" "$RESET"
+ printf " %-20s %b%d%b\n" "WARN:" "$YELLOW" "$TOTAL_WARN" "$RESET"
+ printf " %-20s %b%d%b\n" "INFO:" "$CYAN" "$TOTAL_INFO" "$RESET"
+ printf " %-20s %b%d%b\n" "OK:" "$GREEN" "$TOTAL_OK" "$RESET"
+ echo " ──────────────────────────────────────────"
+ printf " Completed in %ds\n" "$elapsed"
+ echo ""
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ echo -e " ${RED}${BOLD}Action required:${RESET} ${TOTAL_CRIT} critical finding(s)"
+ echo ""
+ echo " Top recommendations:"
+ echo " • Assign firewalls to all unprotected instances"
+ echo " • Close 0.0.0.0/0 rules on SSH (22), RDP (3389), and database ports"
+ echo " • Replace all-port allow rules with specific port lists"
+ echo " • Remove unused firewalls to reduce configuration sprawl"
+ echo ""
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ echo -e " ${YELLOW}Review recommended:${RESET} ${TOTAL_WARN} warning(s)"
+ echo ""
+ echo " Suggestions:"
+ echo " • Review wide CIDR rules and narrow where possible"
+ echo " • Delete unused firewalls"
+ echo " • Restrict outbound where applicable"
+ echo ""
+ else
+ echo -e " ${GREEN}All checks passed${RESET}"
+ echo ""
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat <&2
+ exit 1
+ fi
+
+ RUN_MODE="${modes[*]}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+main() {
+ parse_args "$@"
+ setup_colors
+ check_deps
+ check_credentials
+
+ START_TIME=$(date +%s)
+
+ echo ""
+ echo -e "${BOLD}Contabo Firewall Auditor${RESET}"
+ echo -e "Mode: ${RUN_MODE}"
+ echo -e "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ echo ""
+
+ for mode in $RUN_MODE; do
+ case "$mode" in
+ open-ports) audit_open_ports ;;
+ unprotected) audit_unprotected ;;
+ permissive) audit_permissive ;;
+ unused) audit_unused ;;
+ rules) list_rules ;;
+ esac
+ done
+
+ print_summary
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ exit 2
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ exit 1
+ fi
+ exit 0
+}
+
+main "$@"
diff --git a/contabo-fleet-manager.sh b/contabo-fleet-manager.sh
new file mode 100755
index 0000000..9993b9b
--- /dev/null
+++ b/contabo-fleet-manager.sh
@@ -0,0 +1,608 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### contabo-fleet-manager.sh — Inventory, health checks, and bulk operations for ####
+#### Contabo VPS/VDS instances via the REST API. Fleet-wide visibility and control ####
+#### Requires: bash 4+, curl, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./contabo-fleet-manager.sh --inventory --all ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+INSTANCE_ID=""
+TARGET_ALL="false"
+TAG_ID=""
+TAG_SUB_MODE=""
+OUTPUT_FORMAT="${CFM_FORMAT:-text}"
+PING_CHECK="false"
+FORCE="false"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── Credentials ───────────────────────────────────────────────────────
+CONTABO_CLIENT_ID="${CONTABO_CLIENT_ID:-}"
+CONTABO_CLIENT_SECRET="${CONTABO_CLIENT_SECRET:-}"
+CONTABO_API_USER="${CONTABO_API_USER:-}"
+CONTABO_API_PASS="${CONTABO_API_PASS:-}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+ACTION_OK=0
+ACTION_FAIL=0
+
+# ── API helpers ──────────────────────────────────────────────────────
+contabo_token() {
+ local resp
+ resp=$(curl -s -d "client_id=${CONTABO_CLIENT_ID}" \
+ -d "client_secret=${CONTABO_CLIENT_SECRET}" \
+ --data-urlencode "username=${CONTABO_API_USER}" \
+ --data-urlencode "password=${CONTABO_API_PASS}" \
+ -d "grant_type=password" \
+ "https://auth.contabo.com/auth/realms/contabo/protocol/openid-connect/token")
+ local token
+ token=$(echo "$resp" | jq -r '.access_token // empty' 2>/dev/null)
+ if [[ -z "$token" ]]; then
+ die "Failed to obtain access token — check credentials"
+ fi
+ echo "$token"
+}
+
+contabo_api() {
+ local method="$1" endpoint="$2"
+ shift 2
+ local attempt=0 max_attempts=3
+
+ while (( attempt < max_attempts )); do
+ local http_code
+ http_code=$(curl -s -o /tmp/cfm_resp.json -w "%{http_code}" \
+ -X "$method" \
+ -H "Authorization: Bearer $(contabo_token)" \
+ -H "Content-Type: application/json" \
+ -H "x-request-id: $(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N)" \
+ "https://api.contabo.com/v1${endpoint}" "$@")
+
+ verbose "API ${method} ${endpoint} → HTTP ${http_code}"
+
+ if [[ "$http_code" == "429" ]]; then
+ ((attempt++)) || true
+ local wait=$(( attempt * 5 ))
+ warn "Rate limited — retrying in ${wait}s (attempt ${attempt}/${max_attempts})"
+ sleep "$wait"
+ continue
+ fi
+
+ cat /tmp/cfm_resp.json
+ return 0
+ done
+
+ err "API request failed after ${max_attempts} attempts: ${method} ${endpoint}"
+ return 1
+}
+
+check_credentials() {
+ [[ -z "$CONTABO_CLIENT_ID" ]] && die "CONTABO_CLIENT_ID not set"
+ [[ -z "$CONTABO_CLIENT_SECRET" ]] && die "CONTABO_CLIENT_SECRET not set"
+ [[ -z "$CONTABO_API_USER" ]] && die "CONTABO_API_USER not set"
+ [[ -z "$CONTABO_API_PASS" ]] && die "CONTABO_API_PASS not set"
+}
+
+check_deps() {
+ command -v curl &>/dev/null || die "curl is required"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+# ── Instance helpers ─────────────────────────────────────────────────
+get_all_instance_ids() {
+ local page=1 size=100 ids=""
+ while true; do
+ local resp
+ resp=$(contabo_api GET "/compute/instances?page=${page}&size=${size}")
+ local page_ids
+ page_ids=$(echo "$resp" | jq -r '.data[].instanceId' 2>/dev/null)
+ [[ -z "$page_ids" ]] && break
+ ids="${ids}${ids:+$'\n'}${page_ids}"
+ local count
+ count=$(echo "$page_ids" | wc -l)
+ (( count < size )) && break
+ ((page++)) || true
+ done
+ echo "$ids"
+}
+
+get_instance_name() {
+ local iid="$1"
+ contabo_api GET "/compute/instances/${iid}" \
+ | jq -r '.data[0].name // .data[0].displayName // "unknown"' 2>/dev/null
+}
+
+get_instance_ids() {
+ if [[ "$TARGET_ALL" == "true" ]]; then
+ get_all_instance_ids
+ elif [[ -n "$INSTANCE_ID" ]]; then
+ echo "$INSTANCE_ID"
+ elif [[ -n "$TAG_ID" ]]; then
+ get_instances_by_tag "$TAG_ID"
+ else
+ die "Specify --instance ID, --all, or --tag TAG_ID"
+ fi
+}
+
+get_instances_by_tag() {
+ local tid="$1"
+ local page=1 size=100 ids=""
+ while true; do
+ local resp
+ resp=$(contabo_api GET "/compute/instances?page=${page}&size=${size}")
+ local page_ids
+ page_ids=$(echo "$resp" | jq -r --arg tid "$tid" \
+ '.data[] | select(.tags[]? | .tagId == ($tid | tonumber)) | .instanceId' 2>/dev/null)
+ [[ -z "$page_ids" ]] && break
+ ids="${ids}${ids:+$'\n'}${page_ids}"
+ local count
+ count=$(echo "$page_ids" | wc -l)
+ (( count < size )) && break
+ ((page++)) || true
+ done
+ echo "$ids"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# INVENTORY
+# ══════════════════════════════════════════════════════════════════════
+do_inventory() {
+ local page=1 size=100 all_data="[]"
+
+ while true; do
+ local resp
+ resp=$(contabo_api GET "/compute/instances?page=${page}&size=${size}")
+ local page_data
+ page_data=$(echo "$resp" | jq '.data // []' 2>/dev/null)
+ local page_count
+ page_count=$(echo "$page_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$page_count" -eq 0 ]] && break
+ all_data=$(echo -e "${all_data}\n${page_data}" | jq -s 'add' 2>/dev/null)
+ (( page_count < size )) && break
+ ((page++)) || true
+ done
+
+ local total
+ total=$(echo "$all_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$total" -eq 0 ]] && die "No instances found"
+
+ # Filter by tag if specified
+ if [[ -n "$TAG_ID" ]]; then
+ all_data=$(echo "$all_data" | jq --arg tid "$TAG_ID" \
+ '[.[] | select(.tags[]? | .tagId == ($tid | tonumber))]' 2>/dev/null)
+ total=$(echo "$all_data" | jq 'length' 2>/dev/null || echo 0)
+ [[ "$total" -eq 0 ]] && die "No instances found with tag ${TAG_ID}"
+ fi
+
+ case "$OUTPUT_FORMAT" in
+ json)
+ echo "$all_data" | jq '.'
+ ;;
+ ansible)
+ echo "[contabo]"
+ echo "$all_data" | jq -r \
+ '.[] | (.ipConfig.v4.ip // "unknown") + " # " + (.name // .displayName // "unknown") + " id=" + (.instanceId | tostring)' \
+ 2>/dev/null
+ ;;
+ *)
+ section_header "Fleet Inventory"
+
+ printf " ${BOLD}%-13s %-20s %-11s %-16s %-8s %-8s${RESET}\n" \
+ "INSTANCE_ID" "NAME" "STATUS" "IP" "REGION" "PRODUCT"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ echo "$all_data" | jq -r \
+ '.[] | "\(.instanceId)\t\(.name // .displayName // "unknown")\t\(.status // "unknown")\t\(.ipConfig.v4.ip // "—")\t\(.region // "—")\t\(.productId // "—")"' \
+ 2>/dev/null \
+ | while IFS=$'\t' read -r iid name status ip region product; do
+ printf " %-13s %-20s %-11s %-16s %-8s %-8s\n" \
+ "$iid" "${name:0:18}" "$status" "$ip" "${region:0:6}" "$product"
+ done
+
+ echo ""
+ field "Total:" "$total"
+ ;;
+ esac
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HEALTH
+# ══════════════════════════════════════════════════════════════════════
+do_health() {
+ local ids
+ ids=$(get_instance_ids)
+ [[ -z "$ids" ]] && die "No instances found"
+
+ local running=0 stopped=0 errored=0 total_instances=0
+ local results=""
+
+ while IFS= read -r iid; do
+ [[ -z "$iid" ]] && continue
+ ((total_instances++)) || true
+
+ local resp
+ resp=$(contabo_api GET "/compute/instances/${iid}")
+ local name status ip
+ name=$(echo "$resp" | jq -r '.data[0].name // .data[0].displayName // "unknown"' 2>/dev/null)
+ status=$(echo "$resp" | jq -r '.data[0].status // "unknown"' 2>/dev/null)
+ ip=$(echo "$resp" | jq -r '.data[0].ipConfig.v4.ip // ""' 2>/dev/null)
+
+ local ping_result="—"
+ if [[ "$PING_CHECK" == "true" && -n "$ip" ]]; then
+ if ping -c 1 -W 3 "$ip" &>/dev/null; then
+ ping_result="reachable"
+ else
+ ping_result="unreachable"
+ fi
+ fi
+
+ case "$status" in
+ running) ((running++)) || true ;;
+ stopped) ((stopped++)) || true ;;
+ *) ((errored++)) || true ;;
+ esac
+
+ results="${results}${iid}\t${name}\t${status}\t${ip}\t${ping_result}\n"
+ done <<< "$ids"
+
+ if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then
+ cat < /dev/null 2>&1; then
+ echo -e " ${GREEN}✓${RESET} ${iname} (${iid}) ${action} sent"
+ ((ACTION_OK++)) || true
+ else
+ echo -e " ${RED}✗${RESET} ${iname} (${iid}) ${action} failed"
+ ((ACTION_FAIL++)) || true
+ fi
+
+ sleep 1
+ done <<< "$ids"
+
+ echo ""
+ field_color "Succeeded:" "${GREEN}${ACTION_OK}${RESET}"
+ if [[ "$ACTION_FAIL" -gt 0 ]]; then
+ field_color "Failed:" "${RED}${ACTION_FAIL}${RESET}"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# TAGS
+# ══════════════════════════════════════════════════════════════════════
+do_tags() {
+ if [[ "$TAG_SUB_MODE" == "list" ]]; then
+ local resp
+ resp=$(contabo_api GET "/tags?page=1&size=100")
+
+ if [[ "$OUTPUT_FORMAT" == "json" ]]; then
+ echo "$resp" | jq '.data // []'
+ return
+ fi
+
+ section_header "Tags"
+
+ printf " ${BOLD}%-10s %-30s %-10s${RESET}\n" "TAG_ID" "NAME" "COLOR"
+ printf " %s\n" "$(printf '%.0s─' {1..52})"
+
+ echo "$resp" | jq -r '.data[] | "\(.tagId)\t\(.name)\t\(.color // "—")"' 2>/dev/null \
+ | while IFS=$'\t' read -r tid tname tcolor; do
+ printf " %-10s %-30s %-10s\n" "$tid" "${tname:0:28}" "$tcolor"
+ done
+ elif [[ "$TAG_SUB_MODE" == "filter" ]]; then
+ [[ -z "$TAG_ID" ]] && die "Specify --filter TAG_ID"
+ INSTANCE_ID=""
+ TARGET_ALL="false"
+ do_inventory
+ else
+ die "Specify --list or --filter TAG_ID with --tags"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HELP
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat <&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+ALSO_ROTATE="false"
+INSTANCE_ID=""
+TARGET_ALL="false"
+SNAPSHOT_ID=""
+KEEP="${CSM_KEEP:-3}"
+PREFIX="${CSM_PREFIX:-auto}"
+MAX_AGE="${CSM_MAX_AGE:-7}"
+OUTPUT_FORMAT="${CSM_FORMAT:-text}"
+DRY_RUN="true"
+FORCE="false"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── Credentials ───────────────────────────────────────────────────────
+CONTABO_CLIENT_ID="${CONTABO_CLIENT_ID:-}"
+CONTABO_CLIENT_SECRET="${CONTABO_CLIENT_SECRET:-}"
+CONTABO_API_USER="${CONTABO_API_USER:-}"
+CONTABO_API_PASS="${CONTABO_API_PASS:-}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+SNAP_CREATED=0
+SNAP_DELETED=0
+SNAP_ERRORS=0
+
+# ── API helpers ──────────────────────────────────────────────────────
+contabo_token() {
+ local resp
+ resp=$(curl -s -d "client_id=${CONTABO_CLIENT_ID}" \
+ -d "client_secret=${CONTABO_CLIENT_SECRET}" \
+ --data-urlencode "username=${CONTABO_API_USER}" \
+ --data-urlencode "password=${CONTABO_API_PASS}" \
+ -d "grant_type=password" \
+ "https://auth.contabo.com/auth/realms/contabo/protocol/openid-connect/token")
+ local token
+ token=$(echo "$resp" | jq -r '.access_token // empty' 2>/dev/null)
+ if [[ -z "$token" ]]; then
+ die "Failed to obtain access token — check credentials"
+ fi
+ echo "$token"
+}
+
+contabo_api() {
+ local method="$1" endpoint="$2"
+ shift 2
+ local attempt=0 max_attempts=3
+
+ while (( attempt < max_attempts )); do
+ local http_code
+ http_code=$(curl -s -o /tmp/csm_resp.json -w "%{http_code}" \
+ -X "$method" \
+ -H "Authorization: Bearer $(contabo_token)" \
+ -H "Content-Type: application/json" \
+ -H "x-request-id: $(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N)" \
+ "https://api.contabo.com/v1${endpoint}" "$@")
+
+ verbose "API ${method} ${endpoint} → HTTP ${http_code}"
+
+ if [[ "$http_code" == "429" ]]; then
+ ((attempt++)) || true
+ local wait=$(( attempt * 5 ))
+ warn "Rate limited — retrying in ${wait}s (attempt ${attempt}/${max_attempts})"
+ sleep "$wait"
+ continue
+ fi
+
+ cat /tmp/csm_resp.json
+ return 0
+ done
+
+ err "API request failed after ${max_attempts} attempts: ${method} ${endpoint}"
+ return 1
+}
+
+check_credentials() {
+ [[ -z "$CONTABO_CLIENT_ID" ]] && die "CONTABO_CLIENT_ID not set"
+ [[ -z "$CONTABO_CLIENT_SECRET" ]] && die "CONTABO_CLIENT_SECRET not set"
+ [[ -z "$CONTABO_API_USER" ]] && die "CONTABO_API_USER not set"
+ [[ -z "$CONTABO_API_PASS" ]] && die "CONTABO_API_PASS not set"
+}
+
+check_deps() {
+ command -v curl &>/dev/null || die "curl is required"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+# ── Instance helpers ─────────────────────────────────────────────────
+get_all_instance_ids() {
+ local page=1 size=100 ids=""
+ while true; do
+ local resp
+ resp=$(contabo_api GET "/compute/instances?page=${page}&size=${size}")
+ local page_ids
+ page_ids=$(echo "$resp" | jq -r '.data[].instanceId' 2>/dev/null)
+ [[ -z "$page_ids" ]] && break
+ ids="${ids}${ids:+$'\n'}${page_ids}"
+ local count
+ count=$(echo "$page_ids" | wc -l)
+ (( count < size )) && break
+ ((page++)) || true
+ done
+ echo "$ids"
+}
+
+get_instance_name() {
+ local iid="$1"
+ contabo_api GET "/compute/instances/${iid}" \
+ | jq -r '.data[0].name // .data[0].displayName // "unknown"' 2>/dev/null
+}
+
+get_instance_ids() {
+ if [[ "$TARGET_ALL" == "true" ]]; then
+ get_all_instance_ids
+ elif [[ -n "$INSTANCE_ID" ]]; then
+ echo "$INSTANCE_ID"
+ else
+ die "Specify --instance ID or --all"
+ fi
+}
+
+# ── Snapshot helpers ─────────────────────────────────────────────────
+get_snapshots() {
+ local iid="$1"
+ contabo_api GET "/compute/instances/${iid}/snapshots" \
+ | jq -r '.data // []' 2>/dev/null
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SNAPSHOT
+# ══════════════════════════════════════════════════════════════════════
+do_snapshot() {
+ local ids
+ ids=$(get_instance_ids)
+ [[ -z "$ids" ]] && die "No instances found"
+
+ local count
+ count=$(echo "$ids" | grep -c . || true)
+ local target_label="instance ${INSTANCE_ID}"
+ [[ "$TARGET_ALL" == "true" ]] && target_label="all (${count} instances)"
+
+ section_header "Creating Snapshots"
+ field "Target:" "$target_label"
+ field "Prefix:" "$PREFIX"
+ echo ""
+
+ while IFS= read -r iid; do
+ [[ -z "$iid" ]] && continue
+ local snap_name
+ snap_name="${PREFIX}-$(date +%Y%m%d-%H%M%S)"
+ local iname
+ iname=$(get_instance_name "$iid")
+
+ verbose "Snapshotting ${iname} (${iid}) as ${snap_name}"
+
+ if contabo_api POST "/compute/instances/${iid}/snapshots" \
+ -d "{\"name\": \"${snap_name}\", \"description\": \"Managed by ${SCRIPT_NAME}\"}" > /dev/null 2>&1; then
+ echo -e " ${GREEN}✓${RESET} ${iname} (${iid}) ${snap_name}"
+ ((SNAP_CREATED++)) || true
+ else
+ echo -e " ${RED}✗${RESET} ${iname} (${iid}) failed"
+ ((SNAP_ERRORS++)) || true
+ fi
+
+ # Brief pause to avoid rate limiting on large fleets
+ sleep 1
+ done <<< "$ids"
+
+ echo ""
+ field_color "Created:" "${GREEN}${SNAP_CREATED}${RESET}"
+ if [[ "$SNAP_ERRORS" -gt 0 ]]; then
+ field_color "Errors:" "${RED}${SNAP_ERRORS}${RESET}"
+ fi
+
+ if [[ "$ALSO_ROTATE" == "true" ]]; then
+ do_rotate
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ROTATE
+# ══════════════════════════════════════════════════════════════════════
+do_rotate() {
+ local ids
+ ids=$(get_instance_ids)
+ [[ -z "$ids" ]] && die "No instances found"
+
+ section_header "Rotating Snapshots"
+ field "Keep:" "$KEEP per instance"
+ field "Prefix:" "$PREFIX"
+ if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then
+ field_color "Mode:" "${YELLOW}DRY-RUN${RESET} (use --force to delete)"
+ else
+ field_color "Mode:" "${RED}LIVE${RESET}"
+ fi
+ echo ""
+
+ while IFS= read -r iid; do
+ [[ -z "$iid" ]] && continue
+ local iname
+ iname=$(get_instance_name "$iid")
+ local snaps
+ snaps=$(get_snapshots "$iid")
+
+ # Filter to managed snapshots (matching prefix), sort by date descending
+ local managed
+ managed=$(echo "$snaps" | jq -r \
+ --arg prefix "$PREFIX" \
+ '[.[] | select(.name | startswith($prefix))] | sort_by(.createdDate) | reverse' \
+ 2>/dev/null)
+
+ local total
+ total=$(echo "$managed" | jq 'length' 2>/dev/null || echo 0)
+
+ if (( total <= KEEP )); then
+ verbose "${iname}: ${total} managed snapshots, keeping all"
+ continue
+ fi
+
+ local to_delete
+ to_delete=$(echo "$managed" | jq -r ".[$KEEP:][] | .snapshotId" 2>/dev/null)
+
+ while IFS= read -r sid; do
+ [[ -z "$sid" ]] && continue
+ if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then
+ echo -e " ${YELLOW}⊘${RESET} Would delete: ${iname} (${iid}) → ${sid}"
+ else
+ if contabo_api DELETE "/compute/instances/${iid}/snapshots/${sid}" > /dev/null 2>&1; then
+ echo -e " ${GREEN}✓${RESET} Deleted: ${iname} (${iid}) → ${sid}"
+ ((SNAP_DELETED++)) || true
+ else
+ echo -e " ${RED}✗${RESET} Failed: ${iname} (${iid}) → ${sid}"
+ ((SNAP_ERRORS++)) || true
+ fi
+ sleep 1
+ fi
+ done <<< "$to_delete"
+ done <<< "$ids"
+
+ echo ""
+ if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then
+ log "Dry-run complete — use --force to execute"
+ else
+ field_color "Deleted:" "${GREEN}${SNAP_DELETED}${RESET}"
+ if [[ "$SNAP_ERRORS" -gt 0 ]]; then
+ field_color "Errors:" "${RED}${SNAP_ERRORS}${RESET}"
+ fi
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIST
+# ══════════════════════════════════════════════════════════════════════
+do_list() {
+ local ids
+ ids=$(get_instance_ids)
+ [[ -z "$ids" ]] && die "No instances found"
+
+ section_header "Snapshots"
+
+ printf " ${BOLD}%-8s %-18s %-28s %-22s${RESET}\n" "INST" "SNAPSHOT ID" "NAME" "CREATED"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ local total=0
+ while IFS= read -r iid; do
+ [[ -z "$iid" ]] && continue
+ local snaps
+ snaps=$(get_snapshots "$iid")
+
+ echo "$snaps" | jq -r --arg iid "$iid" \
+ '.[] | "\($iid)\t\(.snapshotId)\t\(.name)\t\(.createdDate)"' 2>/dev/null \
+ | while IFS=$'\t' read -r inst sid name created; do
+ printf " %-8s %-18s %-28s %-22s\n" "$inst" "$sid" "${name:0:26}" "${created:0:20}"
+ ((total++)) 2>/dev/null || true
+ done
+ done <<< "$ids"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# AUDIT
+# ══════════════════════════════════════════════════════════════════════
+do_audit() {
+ local ids
+ ids=$(get_all_instance_ids)
+ [[ -z "$ids" ]] && die "No instances found"
+
+ section_header "Snapshot Audit"
+
+ printf " ${BOLD}%-20s %-20s %6s %6s %-12s${RESET}\n" \
+ "INSTANCE" "LATEST SNAPSHOT" "AGE" "COUNT" "STATUS"
+ printf " %s\n" "$(printf '%.0s─' {1..68})"
+
+ local protected=0 stale=0 unprotected=0 total_instances=0
+ local now
+ now=$(date +%s)
+
+ while IFS= read -r iid; do
+ [[ -z "$iid" ]] && continue
+ ((total_instances++)) || true
+
+ local iname
+ iname=$(get_instance_name "$iid")
+ local snaps
+ snaps=$(get_snapshots "$iid")
+ local snap_count
+ snap_count=$(echo "$snaps" | jq 'length' 2>/dev/null || echo 0)
+
+ if [[ "$snap_count" -eq 0 ]]; then
+ printf " %-20s %-20s %6s %6s " "${iname:0:18}" "(none)" "—" "0"
+ echo -e "${RED}✗ Unprotected${RESET}"
+ ((unprotected++)) || true
+ continue
+ fi
+
+ local latest
+ latest=$(echo "$snaps" | jq -r \
+ '[.[] | select(.name)] | sort_by(.createdDate) | last' 2>/dev/null)
+ local latest_name latest_date
+ latest_name=$(echo "$latest" | jq -r '.name // "unknown"' 2>/dev/null)
+ latest_date=$(echo "$latest" | jq -r '.createdDate // ""' 2>/dev/null)
+
+ local age_days="?"
+ if [[ -n "$latest_date" ]]; then
+ local snap_epoch
+ snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0)
+ if [[ "$snap_epoch" -gt 0 ]]; then
+ age_days=$(( (now - snap_epoch) / 86400 ))
+ fi
+ fi
+
+ local status_str status_color
+ if [[ "$age_days" != "?" ]] && (( age_days > MAX_AGE )); then
+ status_str="⚠ Stale"
+ status_color="$YELLOW"
+ ((stale++)) || true
+ else
+ status_str="✓ OK"
+ status_color="$GREEN"
+ ((protected++)) || true
+ fi
+
+ printf " %-20s %-20s %5sd %6s " \
+ "${iname:0:18}" "${latest_name:0:18}" "$age_days" "$snap_count"
+ echo -e "${status_color}${status_str}${RESET}"
+ done <<< "$ids"
+
+ echo ""
+ field "Instances:" "$total_instances"
+ field_color "Protected:" "${GREEN}${protected}${RESET}"
+ if [[ "$stale" -gt 0 ]]; then
+ field_color "Stale (>${MAX_AGE}d):" "${YELLOW}${stale}${RESET}"
+ fi
+ if [[ "$unprotected" -gt 0 ]]; then
+ field_color "Unprotected:" "${RED}${unprotected}${RESET}"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# RESTORE
+# ══════════════════════════════════════════════════════════════════════
+do_restore() {
+ [[ -z "$INSTANCE_ID" ]] && die "Specify --instance ID"
+ [[ -z "$SNAPSHOT_ID" ]] && die "Specify --snapshot-id ID"
+
+ local iname
+ iname=$(get_instance_name "$INSTANCE_ID")
+
+ section_header "Restore Snapshot"
+ field "Instance:" "${iname} (${INSTANCE_ID})"
+ field "Snapshot:" "$SNAPSHOT_ID"
+ echo ""
+
+ if [[ "$FORCE" != "true" ]]; then
+ echo -e " ${RED}WARNING: This will revert the instance to the snapshot state.${RESET}"
+ echo -e " ${RED}All changes since the snapshot will be lost.${RESET}"
+ echo ""
+ read -r -p " Type 'yes' to confirm: " confirm
+ if [[ "$confirm" != "yes" ]]; then
+ log "Restore cancelled"
+ return
+ fi
+ fi
+
+ if contabo_api POST "/compute/instances/${INSTANCE_ID}/snapshots/${SNAPSHOT_ID}" \
+ -d '{}' > /dev/null 2>&1; then
+ echo -e " ${GREEN}✓${RESET} Restore initiated — instance will revert to ${SNAPSHOT_ID}"
+ log "Monitor instance status — revert may take several minutes"
+ else
+ echo -e " ${RED}✗${RESET} Restore failed"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# STATUS
+# ══════════════════════════════════════════════════════════════════════
+do_status() {
+ local ids
+ ids=$(get_all_instance_ids)
+ [[ -z "$ids" ]] && die "No instances found"
+
+ local total_instances=0 total_snaps=0
+ local protected=0 stale=0 unprotected=0
+ local now
+ now=$(date +%s)
+
+ while IFS= read -r iid; do
+ [[ -z "$iid" ]] && continue
+ ((total_instances++)) || true
+
+ local snaps
+ snaps=$(get_snapshots "$iid")
+ local snap_count
+ snap_count=$(echo "$snaps" | jq 'length' 2>/dev/null || echo 0)
+ total_snaps=$(( total_snaps + snap_count ))
+
+ if [[ "$snap_count" -eq 0 ]]; then
+ ((unprotected++)) || true
+ continue
+ fi
+
+ local latest_date
+ latest_date=$(echo "$snaps" | jq -r \
+ '[.[] | select(.createdDate)] | sort_by(.createdDate) | last | .createdDate // ""' \
+ 2>/dev/null)
+
+ if [[ -n "$latest_date" ]]; then
+ local snap_epoch
+ snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0)
+ if [[ "$snap_epoch" -gt 0 ]]; then
+ local age_days=$(( (now - snap_epoch) / 86400 ))
+ if (( age_days > MAX_AGE )); then
+ ((stale++)) || true
+ else
+ ((protected++)) || true
+ fi
+ else
+ ((protected++)) || true
+ fi
+ else
+ ((protected++)) || true
+ fi
+ done <<< "$ids"
+
+ if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then
+ cat <${MAX_AGE}d):" "${YELLOW}${stale}${RESET}"
+ else
+ field_color "Stale (>${MAX_AGE}d):" "${GREEN}0${RESET}"
+ fi
+ if [[ "$unprotected" -gt 0 ]]; then
+ field_color "Unprotected:" "${RED}${unprotected}${RESET}"
+ else
+ field_color "Unprotected:" "${GREEN}0${RESET}"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HELP
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat <&2
+ exit "$exit_code"
+}
+
+trap 'handle_error $? $LINENO' ERR
+
+debug_echo() {
+ if [[ -n "$DEBUG" ]]; then
+ echo "[DEBUG] $*" >&2
+ fi
+}
+
+show_help() {
+ cat << EOF
+Usage: $SCRIPT_NAME [OPTIONS]
+
+Container health metrics collector for Prometheus node_exporter textfile directory.
+
+Collects per-container health check status, image age, restart counts, exit codes,
+and running state via docker inspect and writes them as Prometheus metrics.
+
+OPTIONS:
+ --once Run collection once and exit (default)
+ --daemon Run continuously at COLLECTION_INTERVAL
+ --help, -h Show this help message
+
+ENVIRONMENT VARIABLES:
+ NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR)
+ COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL)
+ DEBUG Enable debug output
+
+EXAMPLES:
+ $SCRIPT_NAME --once
+ $SCRIPT_NAME --daemon
+ COLLECTION_INTERVAL=30 $SCRIPT_NAME --daemon
+
+OUTPUT:
+ Writes metrics to \$NODE_DIR/textfile_collector/container_health.prom
+
+EOF
+ exit 0
+}
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --once) RUN_MODE="once"; shift ;;
+ --daemon) RUN_MODE="daemon"; shift ;;
+ --help|-h) show_help ;;
+ *) echo "Unknown option: $1" >&2; show_help ;;
+ esac
+done
+
+# Validate configuration
+validate_config() {
+ if ! command -v docker &>/dev/null; then
+ echo "Error: docker is not installed or not in PATH" >&2
+ exit 1
+ fi
+
+ local textfile_dir="${NODE_DIR}/textfile_collector"
+ if [[ ! -d "$textfile_dir" ]]; then
+ echo "Error: Textfile collector directory not found: $textfile_dir" >&2
+ echo "Create it: sudo mkdir -p $textfile_dir" >&2
+ exit 1
+ fi
+}
+
+# Collect metrics for all containers
+collect_all() {
+ local output_dir="${NODE_DIR}/textfile_collector"
+ local output_file="${output_dir}/container_health.prom"
+ local temp_file
+ temp_file=$(mktemp "${output_file}.XXXXXX")
+
+ local start_time
+ start_time=$(date +%s%N)
+ local success=1
+
+ debug_echo "Starting collection..."
+
+ {
+ local containers
+ containers=$(docker ps -a --format '{{.Names}}')
+
+ if [[ -z "$containers" ]]; then
+ debug_echo "No containers found"
+ fi
+
+ # Per-container metrics headers
+ echo "# HELP container_health_status Health check status of the container (1 for current status)."
+ echo "# TYPE container_health_status gauge"
+ echo "# HELP container_image_age_seconds Age of the container image in seconds."
+ echo "# TYPE container_image_age_seconds gauge"
+ echo "# HELP container_restart_count Number of container restarts."
+ echo "# TYPE container_restart_count gauge"
+ echo "# HELP container_exit_code Exit code of the container."
+ echo "# TYPE container_exit_code gauge"
+ echo "# HELP container_running Whether the container is running (1=running, 0=stopped)."
+ echo "# TYPE container_running gauge"
+
+ local now
+ now=$(date +%s)
+
+ while IFS= read -r container_name; do
+ [[ -z "$container_name" ]] && continue
+
+ debug_echo "Inspecting container: $container_name"
+
+ # Extract all fields in a single docker inspect call
+ local inspect_data
+ inspect_data=$(docker inspect --format \
+ '{{.Config.Image}}|{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}|{{.Created}}|{{.RestartCount}}|{{.State.ExitCode}}|{{.State.Running}}' \
+ "$container_name" 2>/dev/null) || {
+ debug_echo "Failed to inspect container: $container_name"
+ success=0
+ continue
+ }
+
+ local image health_status created restart_count exit_code running_raw
+ IFS='|' read -r image health_status created restart_count exit_code running_raw <<< "$inspect_data"
+
+ # Calculate image age in seconds
+ local created_epoch image_age
+ created_epoch=$(date -d "$created" +%s 2>/dev/null) || created_epoch=0
+ image_age=$((now - created_epoch))
+
+ # Convert running boolean to 0/1
+ local running=0
+ if [[ "$running_raw" == "true" ]]; then
+ running=1
+ fi
+
+ # Health status — emit a 1 for the current status, 0 for others
+ for status in healthy unhealthy starting none; do
+ if [[ "$health_status" == "$status" ]]; then
+ echo "container_health_status{name=\"${container_name}\",image=\"${image}\",status=\"${status}\"} 1"
+ else
+ echo "container_health_status{name=\"${container_name}\",image=\"${image}\",status=\"${status}\"} 0"
+ fi
+ done
+
+ echo "container_image_age_seconds{name=\"${container_name}\",image=\"${image}\"} ${image_age}"
+ echo "container_restart_count{name=\"${container_name}\",image=\"${image}\"} ${restart_count}"
+ echo "container_exit_code{name=\"${container_name}\",image=\"${image}\"} ${exit_code}"
+ echo "container_running{name=\"${container_name}\",image=\"${image}\"} ${running}"
+
+ done <<< "$containers"
+
+ # Exporter metadata
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(awk "BEGIN {printf \"%.4f\", ($end_time - $start_time) / 1000000000}")
+
+ echo ""
+ echo "# HELP container_health_exporter_duration_seconds Time taken to collect metrics."
+ echo "# TYPE container_health_exporter_duration_seconds gauge"
+ echo "container_health_exporter_duration_seconds ${duration}"
+ echo ""
+ echo "# HELP container_health_exporter_last_run_timestamp Unix timestamp of last collection."
+ echo "# TYPE container_health_exporter_last_run_timestamp gauge"
+ echo "container_health_exporter_last_run_timestamp $(date +%s)"
+ echo ""
+ echo "# HELP container_health_exporter_success Whether the last collection succeeded (1=success, 0=failure)."
+ echo "# TYPE container_health_exporter_success gauge"
+ echo "container_health_exporter_success ${success}"
+
+ } > "$temp_file" 2>/dev/null
+
+ mv "$temp_file" "$output_file"
+
+ debug_echo "Collection complete. Wrote to $output_file"
+}
+
+# Main
+main() {
+ validate_config
+
+ case "$RUN_MODE" in
+ once)
+ collect_all
+ ;;
+ daemon)
+ echo "$SCRIPT_NAME running in daemon mode (interval: ${COLLECTION_INTERVAL}s)"
+ while true; do
+ collect_all
+ sleep "$COLLECTION_INTERVAL"
+ done
+ ;;
+ esac
+}
+
+main
diff --git a/container-update-checker.sh b/container-update-checker.sh
new file mode 100755
index 0000000..74d1b5a
--- /dev/null
+++ b/container-update-checker.sh
@@ -0,0 +1,410 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### container-update-checker.sh — Check Docker/Podman containers for image updates ####
+#### Compares local image digests against remote registry digests ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./container-update-checker.sh ####
+#### ./container-update-checker.sh --docker --filter nginx ####
+#### ./container-update-checker.sh --json --quiet ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUNTIME="${CONTAINER_RUNTIME:-auto}"
+TIMEOUT="${REGISTRY_TIMEOUT:-10}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+JSON_OUTPUT="false"
+QUIET="false"
+FILTER=""
+LABEL=""
+TEXTFILE_DIR="/var/lib/node_exporter"
+PROM_FILE=""
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+COUNT_CURRENT=0
+COUNT_UPDATE=0
+COUNT_ERROR=0
+COUNT_TOTAL=0
+JSON_ITEMS=""
+PROM_LINES=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m'
+ BOLD='\033[1m' DIM='\033[2m' RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*" >&2; fi; }
+
+# ── Runtime Detection ─────────────────────────────────────────────────
+detect_runtime() {
+ if [[ "$RUNTIME" == "docker" || "$RUNTIME" == "podman" ]]; then
+ if ! command -v "$RUNTIME" &>/dev/null; then
+ err "${RUNTIME^} not found"; exit 2
+ fi
+ return
+ fi
+ if command -v docker &>/dev/null && docker info &>/dev/null; then
+ RUNTIME="docker"
+ elif command -v podman &>/dev/null; then
+ RUNTIME="podman"
+ else
+ err "Neither Docker nor Podman found"; exit 2
+ fi
+ verbose "Auto-detected runtime: ${RUNTIME}"
+}
+
+# ── Auth Helper ───────────────────────────────────────────────────────
+get_auth_header() {
+ local registry="$1" config_file=""
+ if [[ "$RUNTIME" == "podman" ]]; then
+ config_file="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}/containers/auth.json"
+ [[ -f "$config_file" ]] || config_file="${HOME}/.config/containers/auth.json"
+ fi
+ [[ -f "${config_file:-}" ]] || config_file="${HOME}/.docker/config.json"
+ [[ -f "$config_file" ]] || return 0
+ local auth
+ auth=$(grep -A1 "\"${registry}\"" "$config_file" 2>/dev/null \
+ | grep '"auth"' | head -1 | sed 's/.*"auth"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') || true
+ if [[ -n "$auth" ]]; then
+ echo "Authorization: Basic ${auth}"
+ fi
+}
+
+# ── Parse Image Reference ────────────────────────────────────────────
+parse_image_ref() {
+ local image="$1" registry="" path="" tag=""
+ local without_tag="${image%%@*}"
+ if [[ "$without_tag" == *:* && "${without_tag##*:}" != */* ]]; then
+ tag="${without_tag##*:}"
+ without_tag="${without_tag%:*}"
+ fi
+ [[ -z "$tag" ]] && tag="latest"
+ if [[ "$without_tag" == *"."*"/"* ]] || [[ "$without_tag" == *":"*"/"* ]] || [[ "$without_tag" == "localhost/"* ]]; then
+ registry="${without_tag%%/*}"
+ path="${without_tag#*/}"
+ else
+ registry="docker.io"
+ [[ "$without_tag" == *"/"* ]] && path="$without_tag" || path="library/${without_tag}"
+ fi
+ echo "${registry}" "${path}" "${tag}"
+}
+
+# ── Get Local Digest ─────────────────────────────────────────────────
+get_local_digest() {
+ local image="$1" digest
+ digest=$($RUNTIME image inspect "$image" --format '{{index .RepoDigests 0}}' 2>/dev/null) || true
+ if [[ -n "$digest" && "$digest" == *"@"* ]]; then
+ echo "${digest##*@}"; return
+ fi
+ digest=$($RUNTIME image inspect "$image" --format '{{.Id}}' 2>/dev/null) || true
+ echo "${digest:-}"
+}
+
+# ── Extract JSON Value (pure bash, no python/jq) ─────────────────────
+json_value() {
+ local key="$1"
+ sed -n "s/.*\"${key}\"[[:space:]]*:[[:space:]]*\"\([^\"]*\)\".*/\1/p" | head -1
+}
+
+# ── Get Remote Digest via Skopeo ──────────────────────────────────────
+get_remote_digest_skopeo() {
+ local registry="$1" path="$2" tag="$3"
+ local digest
+ digest=$(timeout "$TIMEOUT" skopeo inspect --no-tags "docker://${registry}/${path}:${tag}" 2>/dev/null \
+ | json_value "Digest") || true
+ echo "${digest:-}"
+}
+
+# ── Get Remote Digest via Curl ────────────────────────────────────────
+get_remote_digest_curl() {
+ local registry="$1" path="$2" tag="$3"
+ local token="" digest=""
+ if [[ "$registry" == "docker.io" || "$registry" == "registry-1.docker.io" ]]; then
+ token=$(curl -sf --max-time "$TIMEOUT" \
+ "https://auth.docker.io/token?service=registry.docker.io&scope=repository:${path}:pull" \
+ | json_value "token") || true
+ [[ -z "$token" ]] && return
+ digest=$(curl -sf --max-time "$TIMEOUT" \
+ -H "Accept: application/vnd.docker.distribution.manifest.v2+json" \
+ -H "Accept: application/vnd.oci.image.index.v1+json" \
+ -H "Authorization: Bearer ${token}" \
+ "https://registry-1.docker.io/v2/${path}/manifests/${tag}" \
+ -o /dev/null -D - 2>/dev/null \
+ | grep -i "docker-content-digest" | tr -d '\r' | awk '{print $2}') || true
+ else
+ local auth_hdr auth_args=()
+ auth_hdr=$(get_auth_header "$registry")
+ [[ -n "$auth_hdr" ]] && auth_args=(-H "$auth_hdr")
+ digest=$(curl -sf --max-time "$TIMEOUT" \
+ -H "Accept: application/vnd.docker.distribution.manifest.v2+json" \
+ -H "Accept: application/vnd.oci.image.index.v1+json" \
+ "${auth_args[@]+"${auth_args[@]}"}" \
+ "https://${registry}/v2/${path}/manifests/${tag}" \
+ -o /dev/null -D - 2>/dev/null \
+ | grep -i "docker-content-digest" | tr -d '\r' | awk '{print $2}') || true
+ fi
+ echo "${digest:-}"
+}
+# ── Get Remote Digest (skopeo then curl fallback) ─────────────────────
+get_remote_digest() {
+ local registry="$1" path="$2" tag="$3" digest=""
+ if command -v skopeo &>/dev/null; then
+ verbose "Trying skopeo for ${registry}/${path}:${tag}"
+ digest=$(get_remote_digest_skopeo "$registry" "$path" "$tag")
+ fi
+ if [[ -z "$digest" ]]; then
+ verbose "Trying curl fallback for ${registry}/${path}:${tag}"
+ digest=$(get_remote_digest_curl "$registry" "$path" "$tag")
+ fi
+ echo "${digest:-}"
+}
+
+# ── Check Single Container ────────────────────────────────────────────
+check_container() {
+ local name="$1" image="$2"
+ local status="" local_digest="" remote_digest="" registry path tag
+ read -r registry path tag <<< "$(parse_image_ref "$image")"
+ verbose "Container=${name} image=${image} registry=${registry} path=${path} tag=${tag}"
+ local_digest=$(get_local_digest "$image")
+ verbose "Local digest: ${local_digest:-none}"
+ if [[ -z "$local_digest" ]]; then
+ status="error"
+ else
+ remote_digest=$(get_remote_digest "$registry" "$path" "$tag")
+ verbose "Remote digest: ${remote_digest:-none}"
+ if [[ -z "$remote_digest" ]]; then
+ status="error"
+ elif [[ "$local_digest" == "$remote_digest" ]]; then
+ status="current"
+ else
+ status="update"
+ fi
+ fi
+ COUNT_TOTAL=$((COUNT_TOTAL + 1))
+ case "$status" in
+ current) COUNT_CURRENT=$((COUNT_CURRENT + 1)) ;;
+ update) COUNT_UPDATE=$((COUNT_UPDATE + 1)) ;;
+ error) COUNT_ERROR=$((COUNT_ERROR + 1)) ;;
+ esac
+ if [[ -n "$PROM_FILE" ]]; then
+ local val=1; [[ "$status" == "update" ]] && val=0
+ PROM_LINES+="container_image_up_to_date{name=\"${name}\",image=\"${image}\"} ${val}"$'\n'
+ fi
+ [[ "$QUIET" == "true" && "$status" != "update" ]] && return
+ if [[ "$JSON_OUTPUT" == "true" ]]; then
+ local item
+ item=$(printf '{"container":"%s","image":"%s","status":"%s"}' "$name" "$image" "$status")
+ [[ -n "$JSON_ITEMS" ]] && JSON_ITEMS="${JSON_ITEMS},${item}" || JSON_ITEMS="${item}"
+ else
+ local color symbol
+ case "$status" in
+ current) color="$GREEN"; symbol="up-to-date" ;;
+ update) color="$YELLOW"; symbol="update available" ;;
+ error) color="$RED"; symbol="check failed" ;;
+ *) color=""; symbol="?" ;;
+ esac
+ printf " %-30s %-40s %b%s%b\n" "$name" "$image" "$color" "$symbol" "$RESET"
+ fi
+}
+
+# ── List Containers ───────────────────────────────────────────────────
+list_containers() {
+ local filter_args=()
+ [[ -n "$LABEL" ]] && filter_args+=(--filter "label=${LABEL}")
+ $RUNTIME ps --format '{{.Names}}\t{{.Image}}' "${filter_args[@]}" 2>/dev/null
+}
+
+# ── Write Prometheus Metrics ──────────────────────────────────────────
+write_prom_metrics() {
+ local file="$1"
+ local output_dir
+ output_dir="$(dirname "$file")"
+ mkdir -p "$output_dir"
+ local tmp
+ tmp=$(mktemp "${output_dir}/.container_updates.XXXXXX")
+ {
+ echo "# HELP container_image_up_to_date Whether the container image is up to date (1=yes, 0=no)"
+ echo "# TYPE container_image_up_to_date gauge"
+ printf '%s' "$PROM_LINES"
+ echo "# HELP container_update_check_timestamp Unix timestamp of last update check"
+ echo "# TYPE container_update_check_timestamp gauge"
+ echo "container_update_check_timestamp $(date +%s)"
+ echo "# HELP container_update_check_total Total containers checked"
+ echo "# TYPE container_update_check_total gauge"
+ echo "container_update_check_total ${COUNT_TOTAL}"
+ echo "# HELP container_update_available_total Containers with updates available"
+ echo "# TYPE container_update_available_total gauge"
+ echo "container_update_available_total ${COUNT_UPDATE}"
+ } > "$tmp"
+ chmod 644 "$tmp"
+ mv -f "$tmp" "$file"
+ verbose "Metrics written to ${file}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2
+ exit 2 ;;
+ *)
+ err "Unexpected argument: $1"
+ echo "Run ${SCRIPT_NAME} --help for usage" >&2
+ exit 2 ;;
+ esac
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+ detect_runtime
+
+ local containers=()
+ while IFS=$'\t' read -r name image; do
+ [[ -z "$name" ]] && continue
+ [[ -n "$FILTER" && "$name" != *"${FILTER}"* ]] && continue
+ containers+=("${name} ${image}")
+ done < <(list_containers)
+
+ if [[ ${#containers[@]} -eq 0 ]]; then
+ if [[ "$JSON_OUTPUT" == "true" ]]; then
+ echo '{"results":[],"summary":{"total":0,"current":0,"update_available":0,"errors":0}}'
+ else
+ warn "No running containers found"
+ fi
+ exit 0
+ fi
+
+ verbose "Found ${#containers[@]} containers to check"
+
+ if [[ "$JSON_OUTPUT" != "true" ]]; then
+ echo ""
+ echo -e "${BOLD}Container Update Checker${RESET}"
+ echo -e "${DIM}Runtime: ${RUNTIME} | Timeout: ${TIMEOUT}s${RESET}"
+ echo ""
+ printf " ${BOLD}%-30s %-40s %s${RESET}\n" "CONTAINER" "IMAGE" "STATUS"
+ printf " %s\n" "$(printf '%.0s─' {1..82})"
+ fi
+
+ for entry in "${containers[@]}"; do
+ check_container "${entry%% *}" "${entry#* }"
+ done
+
+ if [[ "$JSON_OUTPUT" == "true" ]]; then
+ printf '{"results":[%s],"summary":{"total":%d,"current":%d,"update_available":%d,"errors":%d}}\n' \
+ "$JSON_ITEMS" "$COUNT_TOTAL" "$COUNT_CURRENT" "$COUNT_UPDATE" "$COUNT_ERROR"
+ else
+ echo ""
+ echo -e " ${BOLD}Summary${RESET}"
+ printf " %-20s %d\n" "Total checked:" "$COUNT_TOTAL"
+ printf " %-20s %b%d%b\n" "Up-to-date:" "$GREEN" "$COUNT_CURRENT" "$RESET"
+ printf " %-20s %b%d%b\n" "Update available:" "$YELLOW" "$COUNT_UPDATE" "$RESET"
+ printf " %-20s %b%d%b\n" "Errors:" "$RED" "$COUNT_ERROR" "$RESET"
+ echo ""
+ fi
+
+ [[ -n "$PROM_FILE" ]] && write_prom_metrics "$PROM_FILE"
+
+ if [[ "$COUNT_ERROR" -gt 0 ]]; then exit 2
+ elif [[ "$COUNT_UPDATE" -gt 0 ]]; then exit 1
+ fi
+ exit 0
+}
+
+main "$@"
diff --git a/coolify-exporter.sh b/coolify-exporter.sh
new file mode 100644
index 0000000..2f1e503
--- /dev/null
+++ b/coolify-exporter.sh
@@ -0,0 +1,505 @@
+#!/bin/bash
+################################################################################
+# Script Name: coolify-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for Coolify PaaS providing operational
+# metrics via the Coolify API — application status, deployment
+# counts, database health, server info, SSL certificate expiry,
+# and API health
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - Coolify instance running with API enabled
+# - Coolify API token (generate in Settings → API Tokens)
+# - curl for API calls
+# - jq for JSON parsing
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# # Output to stdout
+# ./coolify-exporter.sh
+#
+# # HTTP server mode
+# ./coolify-exporter.sh --http -p 9196
+#
+# # Textfile collector mode
+# ./coolify-exporter.sh --textfile
+#
+# # Custom API token and URL
+# ./coolify-exporter.sh --api-url http://coolify.local:8000 --api-token mytoken
+#
+# Metrics Exported:
+# - coolify_up - API reachability (1=up, 0=down)
+# - coolify_info{version} - Coolify version info
+# - coolify_applications_total - Total application count
+# - coolify_applications_by_status{status} - Applications by status
+# - coolify_deployments_total - Total deployments
+# - coolify_deployments_running - Currently running deployments
+# - coolify_deployments_failed_total - Total failed deployments
+# - coolify_databases_total - Total managed databases
+# - coolify_databases_running - Running databases
+# - coolify_servers_total - Total servers managed
+# - coolify_servers_reachable - Reachable servers
+# - coolify_services_total - Total services
+# - coolify_services_running - Running services
+# - coolify_exporter_duration_seconds - Script execution time
+# - coolify_exporter_last_run_timestamp - Last run timestamp
+#
+# Configuration:
+# Default HTTP port: 9196
+# Default API URL: http://localhost:8000
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9196
+API_URL="http://localhost:8000"
+API_TOKEN=""
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Check prerequisites
+# Returns: 0 if OK, 1 if error
+check_prerequisites() {
+ if ! command -v curl >/dev/null 2>&1; then
+ echo "ERROR: curl not found" >&2
+ return 1
+ fi
+
+ if ! command -v jq >/dev/null 2>&1; then
+ echo "ERROR: jq not found (required for JSON parsing)" >&2
+ return 1
+ fi
+
+ if [ -z "$API_TOKEN" ]; then
+ echo "ERROR: --api-token is required" >&2
+ return 1
+ fi
+
+ return 0
+}
+
+# Escape special characters in Prometheus label values
+# Args: $1 - string to escape
+# Returns: escaped string safe for Prometheus labels
+prom_escape() {
+ local val="$1"
+ val="${val//\\/\\\\}"
+ val="${val//\"/\\\"}"
+ val="${val//$'\n'/}"
+ echo "$val"
+}
+
+# Make an authenticated API call
+# Args: $1 - API endpoint path (e.g., /api/v1/applications)
+# Returns: JSON response on stdout
+api_call() {
+ local endpoint="$1"
+ curl -s -X GET \
+ -H "Authorization: Bearer ${API_TOKEN}" \
+ -H "Accept: application/json" \
+ "${API_URL}${endpoint}" 2>/dev/null
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+# Generate all Prometheus metrics
+# Returns: Prometheus text format metrics on stdout
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check prerequisites
+ if ! check_prerequisites; then
+ cat </dev/null)
+
+ if [ -z "$version_response" ]; then
+ cat </dev/null)
+
+ if [ -z "$coolify_version" ] || [ "$coolify_version" = "null" ]; then
+ cat </dev/null)
+ running_apps=$(echo "$apps_response" | jq '[.[] | select(.status == "running")] | length' 2>/dev/null)
+ stopped_apps=$(echo "$apps_response" | jq '[.[] | select(.status == "stopped" or .status == "exited")] | length' 2>/dev/null)
+ exited_apps=$(echo "$apps_response" | jq '[.[] | select(.status == "restarting" or .status == "degraded")] | length' 2>/dev/null)
+ total_apps=${total_apps:-0}
+ running_apps=${running_apps:-0}
+ stopped_apps=${stopped_apps:-0}
+ exited_apps=${exited_apps:-0}
+ fi
+
+ cat </dev/null)
+ running_deployments=$(echo "$deployments_response" | jq '[.[] | select(.status == "in_progress")] | length' 2>/dev/null)
+ failed_deployments=$(echo "$deployments_response" | jq '[.[] | select(.status == "failed" or .status == "error")] | length' 2>/dev/null)
+ queued_deployments=$(echo "$deployments_response" | jq '[.[] | select(.status == "queued")] | length' 2>/dev/null)
+ total_deployments=${total_deployments:-0}
+ running_deployments=${running_deployments:-0}
+ failed_deployments=${failed_deployments:-0}
+ queued_deployments=${queued_deployments:-0}
+ fi
+
+ cat </dev/null)
+ running_databases=$(echo "$databases_response" | jq '[.[] | select(.status == "running")] | length' 2>/dev/null)
+ total_databases=${total_databases:-0}
+ running_databases=${running_databases:-0}
+ fi
+
+ cat </dev/null)
+ reachable_servers=$(echo "$servers_response" | jq '[.[] | select(.settings.is_reachable == true)] | length' 2>/dev/null)
+ total_servers=${total_servers:-0}
+ reachable_servers=${reachable_servers:-0}
+ fi
+
+ cat </dev/null)
+ running_services=$(echo "$services_response" | jq '[.[] | select(.status == "running")] | length' 2>/dev/null)
+ total_services=${total_services:-0}
+ running_services=${running_services:-0}
+ fi
+
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ # Infinite loop accepting HTTP requests
+ while true; do
+ {
+ read -r request
+ # Check if request is for /metrics endpoint
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else # Serve HTML landing page for other requests
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+Coolify Exporter v1.0
+
+Coolify Prometheus Exporter v1.0
+Metrics
+Operational metrics from the Coolify API.
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+# Main entry point - routes to appropriate output mode
+main() {
+ parse_args "$@"
+
+ if [ "$HTTP_MODE" = true ]; then
+ # Run HTTP server (blocks until killed)
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ # Textfile collector mode: write atomically using temp file
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ # Create temp file in SAME directory for atomic rename (same filesystem)
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.coolify_metrics.XXXXXX")
+
+ # Generate metrics to temp file
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ # Validate: file must exist, have content
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 10 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ # Set permissions before move
+ chmod 644 "$temp_file"
+
+ # Atomic rename - no gap where file is missing
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ # Default: output to stdout
+ generate_metrics
+ fi
+}
+
+# Execute main function with all script arguments
+main "$@"
diff --git a/create_swap.sh b/create_swap.sh
index c704114..ffc561d 100644
--- a/create_swap.sh
+++ b/create_swap.sh
@@ -5,10 +5,13 @@
#### ####
#### Author: Phil Connor ####
#### Contact: pconnor@ara.com ####
-#### Version 3.50.20250729 ####
+#### Version 3.51.20250729 ####
#### ####
#### Created 06/01/2023 ####
##############################################
+# v3.51 changes:
+# - Fixed: grep in pipeline crashes under set -euo pipefail when no matches found. Added || true guard
+##############################################
# Exit on any error, undefined variables, and pipe failures
set -euo pipefail
@@ -68,7 +71,7 @@ detect_os() {
get_memory_gb() {
local mem_kb
# Extract memory from /proc/meminfo (in KB)
- mem_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
+ mem_kb=$({ grep MemTotal /proc/meminfo || true; } | awk '{print $2}')
if [[ -z "$mem_kb" || "$mem_kb" -eq 0 ]]; then
error "Unable to determine system memory"
diff --git a/cron-doctor.sh b/cron-doctor.sh
new file mode 100644
index 0000000..ff1846b
--- /dev/null
+++ b/cron-doctor.sh
@@ -0,0 +1,522 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### cron-doctor.sh — Diagnose common cron and systemd timer problems ####
+#### Checks PATH, missing binaries, unescaped %, output redirection, permissions, ####
+#### overlap risk, and failed timer services ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.0 ####
+#### ####
+#### Usage: ####
+#### ./cron-doctor.sh ####
+#### ./cron-doctor.sh --user admin ####
+#### ./cron-doctor.sh --fix-suggestions ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+TARGET_USER=""
+FIX_SUGGESTIONS=false
+CRON_ONLY=false
+TIMERS_ONLY=false
+
+# ── Counters ──────────────────────────────────────────────────────────
+WARN_COUNT=0
+FAIL_COUNT=0
+INFO_COUNT=0
+FAIL_MESSAGES=()
+WARN_MESSAGES=()
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'
+ CYAN='\033[0;36m'; BOLD='\033[1m'; DIM='\033[2m'; RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e " ${GREEN}[OK]${RESET} $*"; }
+warn() { echo -e " ${YELLOW}[WARN]${RESET} $*"; WARN_MESSAGES+=("$*"); (( WARN_COUNT++ )) || true; }
+fail() { echo -e " ${RED}[FAIL]${RESET} $*"; FAIL_MESSAGES+=("$*"); (( FAIL_COUNT++ )) || true; }
+info() { echo -e " ${CYAN}[INFO]${RESET} $*"; (( INFO_COUNT++ )) || true; }
+suggest() { [[ "$FIX_SUGGESTIONS" == "true" ]] && echo -e " ${DIM}→ $*${RESET}"; return 0; }
+verbose() { [[ "$VERBOSE" == "true" ]] && echo -e " ${DIM}[DEBUG]${RESET} $*"; return 0; }
+
+section() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat <&2; usage ;;
+ esac
+done
+
+setup_colors # re-init in case --no-color was passed
+
+# ── Detect crontab directory ──────────────────────────────────────────
+detect_cron_spool() {
+ if [[ -d /var/spool/cron/crontabs ]]; then
+ echo "/var/spool/cron/crontabs" # Debian/Ubuntu
+ elif [[ -d /var/spool/cron ]]; then
+ echo "/var/spool/cron" # RHEL/Rocky
+ else
+ echo ""
+ fi
+}
+
+CRON_SPOOL="$(detect_cron_spool)"
+
+# ── Get list of crontab files to check ────────────────────────────────
+get_crontab_files() {
+ local files=()
+
+ if [[ -n "$CRON_SPOOL" ]]; then
+ if [[ -n "$TARGET_USER" ]]; then
+ [[ -f "$CRON_SPOOL/$TARGET_USER" ]] && files+=("$CRON_SPOOL/$TARGET_USER")
+ else
+ for f in "$CRON_SPOOL"/*; do
+ [[ -f "$f" ]] && files+=("$f")
+ done
+ fi
+ fi
+
+ printf '%s\n' "${files[@]}" 2>/dev/null || true
+}
+
+# ── Parse cron entries from a file ────────────────────────────────────
+# Outputs: schedule|command (skips comments, blanks, variables)
+parse_cron_entries() {
+ local file="$1" has_user_field="${2:-false}"
+
+ while IFS= read -r line; do
+ # skip comments and blank lines
+ [[ "$line" =~ ^[[:space:]]*# ]] && continue
+ [[ "$line" =~ ^[[:space:]]*$ ]] && continue
+ # skip variable assignments (MAILTO=, PATH=, SHELL=, etc.)
+ [[ "$line" =~ ^[[:space:]]*[A-Za-z_]+= ]] && continue
+
+ if [[ "$has_user_field" == "true" ]]; then
+ # system crontab: min hour dom mon dow user command
+ echo "$line" | awk '{
+ if ($1 ~ /^@/) { sched=$1; user=$2; cmd=""; for(i=3;i<=NF;i++) cmd=cmd" "$i }
+ else { sched=$1" "$2" "$3" "$4" "$5; user=$6; cmd=""; for(i=7;i<=NF;i++) cmd=cmd" "$i }
+ gsub(/^[[:space:]]+/, "", cmd)
+ print sched"|"cmd
+ }'
+ else
+ # user crontab: min hour dom mon dow command
+ echo "$line" | awk '{
+ if ($1 ~ /^@/) { sched=$1; cmd=""; for(i=2;i<=NF;i++) cmd=cmd" "$i }
+ else { sched=$1" "$2" "$3" "$4" "$5; cmd=""; for(i=6;i<=NF;i++) cmd=cmd" "$i }
+ gsub(/^[[:space:]]+/, "", cmd)
+ print sched"|"cmd
+ }'
+ fi
+ done < "$file"
+}
+
+# ── Check: crontab environment (PATH) ─────────────────────────────────
+check_cron_environment() {
+ local file="$1" label="$2"
+ local has_path=false has_mailto=false
+
+ while IFS= read -r line; do
+ [[ "$line" =~ ^[[:space:]]*PATH= ]] && has_path=true
+ [[ "$line" =~ ^[[:space:]]*MAILTO= ]] && has_mailto=true
+ done < "$file"
+
+ if [[ "$has_path" == "false" ]]; then
+ warn "${label}: no PATH set — cron uses /usr/bin:/bin only"
+ suggest "Add to top of crontab: PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+ else
+ verbose "${label}: PATH is set"
+ fi
+
+ if [[ "$has_mailto" == "false" ]]; then
+ local has_unredirected=false
+ while IFS='|' read -r _sched cmd; do
+ [[ -z "$cmd" ]] && continue
+ if ! echo "$cmd" | grep -qE '>\s*/|>\s*&|2>&1|>/dev/null'; then
+ has_unredirected=true
+ break
+ fi
+ done < <(parse_cron_entries "$file" false)
+
+ if [[ "$has_unredirected" == "true" ]]; then
+ warn "${label}: no MAILTO and some jobs lack output redirection — output may be lost"
+ suggest "Add MAILTO=admin@example.com or redirect: command >> /var/log/job.log 2>&1"
+ fi
+ fi
+}
+
+# ── Check: missing binaries ───────────────────────────────────────────
+check_missing_binaries() {
+ local file="$1" label="$2" has_user="${3:-false}"
+
+ while IFS='|' read -r _sched cmd; do
+ [[ -z "$cmd" ]] && continue
+
+ # extract the first word (binary) — handle cd/env/sudo/flock prefixes
+ local binary
+ binary=$(echo "$cmd" | sed -E '
+ s#^(cd [^ ;]+[; ]+(&&[[:space:]]*)?)##
+ s#^(sudo (-u [^ ]+ )?)##
+ s#^(env (-i )?([A-Za-z_]+=[^ ]+ )*)##
+ s#^(/usr/bin/flock [^ ]+ )##
+ s#^(/bin/sh -c |/bin/bash -c )##
+ ' | awk '{print $1}')
+
+ # strip trailing shell metacharacters (;, &&, ||, |)
+ binary="${binary%%[;&|]*}"
+
+ [[ -z "$binary" ]] && continue
+ # skip shell builtins
+ [[ "$binary" =~ ^(test|true|false|echo|cd|source|\[|\[\[)$ ]] && continue
+
+ # if it's an absolute path, check directly
+ if [[ "$binary" == /* ]]; then
+ if [[ ! -f "$binary" ]]; then
+ fail "${label}: binary not found: ${binary}"
+ suggest "Check path: which $(basename "$binary")"
+ elif [[ ! -x "$binary" ]]; then
+ fail "${label}: not executable: ${binary}"
+ suggest "chmod +x ${binary}"
+ fi
+ else
+ # relative binary — check if it exists in cron's default PATH
+ if ! command -v "$binary" &>/dev/null; then
+ verbose "${label}: can't verify relative command: ${binary}"
+ fi
+ fi
+ done < <(parse_cron_entries "$file" "$has_user")
+}
+
+# ── Check: unescaped percent signs ────────────────────────────────────
+check_percent_signs() {
+ local file="$1" label="$2"
+ local lineno=0
+
+ while IFS= read -r line; do
+ (( lineno++ )) || true
+ [[ "$line" =~ ^[[:space:]]*# ]] && continue
+ [[ "$line" =~ ^[[:space:]]*$ ]] && continue
+ [[ "$line" =~ ^[[:space:]]*[A-Za-z_]+= ]] && continue
+
+ # check for % not preceded by \ (unescaped)
+ if echo "$line" | grep -qP '(?/dev/null) || return
+ owner=$(stat -c '%U' "$file" 2>/dev/null) || return
+
+ # system files (/etc/crontab, /etc/cron.d/*) are expected to be 644 root-owned
+ # user crontabs are expected to be 600 owned by the user
+ if [[ "$file" == /etc/* ]]; then
+ if [[ "$owner" != "root" ]]; then
+ warn "${label}: owned by ${owner}, expected root"
+ fi
+ else
+ local expected_user
+ expected_user=$(basename "$file")
+ if [[ "$perms" != "600" ]]; then
+ warn "${label}: permissions are ${perms}, expected 600"
+ suggest "chmod 600 ${file}"
+ fi
+ if [[ "$owner" != "$expected_user" && "$owner" != "root" ]]; then
+ fail "${label}: owned by ${owner}, expected ${expected_user} or root"
+ fi
+ fi
+}
+
+# ── Check: missing trailing newline ───────────────────────────────────
+check_trailing_newline() {
+ local file="$1" label="$2"
+
+ if [[ ! -r "$file" ]] || [[ ! -s "$file" ]]; then
+ return
+ fi
+
+ # check if file ends with newline
+ if [[ "$(tail -c 1 "$file" | xxd -p)" != "0a" ]]; then
+ fail "${label}: no trailing newline — last cron entry will not run"
+ suggest "echo '' >> ${file}"
+ fi
+}
+
+# ── Check: overlap risk ──────────────────────────────────────────────
+check_overlap_risk() {
+ local file="$1" label="$2" has_user="${3:-false}"
+
+ while IFS='|' read -r sched cmd; do
+ [[ -z "$cmd" ]] && continue
+
+ # check for frequent schedules (every minute or every 5 min)
+ local is_frequent=false
+ if echo "$sched" | grep -qE '^\*[[:space:]]|^\*/[1-5][[:space:]]'; then
+ is_frequent=true
+ fi
+
+ if [[ "$is_frequent" == "true" ]]; then
+ # check if command uses flock or lockfile
+ if ! echo "$cmd" | grep -qiE 'flock|lockfile|lock'; then
+ warn "${label}: frequent job (${sched%% *}) without locking: $(echo "$cmd" | cut -c1-60)"
+ suggest "Wrap with flock: /usr/bin/flock -n /var/lock/myjob.lock $cmd"
+ fi
+ fi
+ done < <(parse_cron_entries "$file" "$has_user")
+}
+
+# ── Check: cron.allow / cron.deny ─────────────────────────────────────
+check_cron_access() {
+ section "Cron Access Control"
+
+ if [[ -f /etc/cron.allow ]]; then
+ info "/etc/cron.allow exists — only listed users can use cron"
+ if [[ -n "$TARGET_USER" ]]; then
+ if grep -qxF "$TARGET_USER" /etc/cron.allow 2>/dev/null; then
+ log "${TARGET_USER} is in cron.allow"
+ else
+ fail "${TARGET_USER} is NOT in cron.allow — cron jobs will not run"
+ suggest "echo '${TARGET_USER}' >> /etc/cron.allow"
+ fi
+ fi
+ elif [[ -f /etc/cron.deny ]]; then
+ info "/etc/cron.deny exists — listed users are blocked"
+ if [[ -n "$TARGET_USER" ]]; then
+ if grep -qxF "$TARGET_USER" /etc/cron.deny 2>/dev/null; then
+ fail "${TARGET_USER} is in cron.deny — cron jobs will not run"
+ suggest "Remove ${TARGET_USER} from /etc/cron.deny"
+ else
+ log "${TARGET_USER} is not in cron.deny"
+ fi
+ fi
+ else
+ verbose "No cron.allow or cron.deny found"
+ fi
+}
+
+# ── Check: systemd timers ─────────────────────────────────────────────
+check_systemd_timers() {
+ section "Systemd Timers"
+
+ if ! command -v systemctl &>/dev/null; then
+ info "systemctl not found — skipping timer checks"
+ return
+ fi
+
+ # failed timer-triggered services
+ local failed
+ failed=$(systemctl list-units --type=service --state=failed --no-pager --plain 2>/dev/null | \
+ awk '{print $1}' | grep -v '^$' | grep -v '^UNIT' || true)
+
+ if [[ -n "$failed" ]]; then
+ while IFS= read -r svc; do
+ # check if this service has a matching timer
+ local timer="${svc%.service}.timer"
+ if systemctl list-unit-files "$timer" &>/dev/null 2>&1; then
+ fail "Timer-triggered service failed: ${svc}"
+ suggest "journalctl -u ${svc} -b --no-pager | tail -20"
+ fi
+ done <<< "$failed"
+ else
+ log "No failed timer-triggered services"
+ fi
+
+ # timers enabled but not active
+ while IFS= read -r line; do
+ local timer_name state
+ timer_name=$(echo "$line" | awk '{print $1}')
+ state=$(echo "$line" | awk '{print $3}')
+
+ [[ -z "$timer_name" ]] && continue
+ [[ "$timer_name" != *.timer ]] && continue
+
+ if [[ "$state" != "active" ]]; then
+ warn "Timer ${timer_name} is loaded but not active (state: ${state})"
+ suggest "systemctl start ${timer_name}"
+ fi
+ done < <(systemctl list-units --type=timer --all --no-pager --plain 2>/dev/null || true)
+
+ # timers without Persistent=true
+ while IFS= read -r timer_name; do
+ [[ -z "$timer_name" ]] && continue
+ [[ "$timer_name" != *.timer ]] && continue
+ local persistent
+ persistent=$(systemctl show "$timer_name" -p Persistent 2>/dev/null | cut -d= -f2)
+ if [[ "$persistent" == "no" ]]; then
+ local has_calendar
+ has_calendar=$(systemctl show "$timer_name" -p TimersCalendar 2>/dev/null)
+ if [[ -n "$has_calendar" && "$has_calendar" != "TimersCalendar=" ]]; then
+ warn "${timer_name}: Persistent=false — missed runs during downtime won't catch up"
+ suggest "Add Persistent=true to [Timer] section: systemctl edit ${timer_name}"
+ fi
+ fi
+ done < <(systemctl list-units --type=timer --state=active --no-pager --plain 2>/dev/null | awk '{print $1}')
+}
+
+# ── Run cron checks on a single file ─────────────────────────────────
+check_crontab_file() {
+ local file="$1" label="$2" has_user="${3:-false}"
+
+ verbose "Checking: ${file}"
+ check_crontab_permissions "$file" "$label"
+ check_trailing_newline "$file" "$label"
+ check_cron_environment "$file" "$label"
+ check_percent_signs "$file" "$label"
+ check_missing_binaries "$file" "$label" "$has_user"
+ check_overlap_risk "$file" "$label" "$has_user"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# Main
+# ══════════════════════════════════════════════════════════════════════
+
+echo ""
+echo -e " ${BOLD}Cron Doctor${RESET} — diagnosing scheduled task issues"
+echo -e " ${DIM}$(date '+%Y-%m-%d %H:%M:%S')${RESET}"
+
+# ── Cron checks ───────────────────────────────────────────────────────
+if [[ "$TIMERS_ONLY" == "false" ]]; then
+
+ check_cron_access
+
+ # User crontabs
+ section "User Crontabs"
+
+ crontab_files=$(get_crontab_files)
+ if [[ -z "$crontab_files" ]]; then
+ if [[ -n "$TARGET_USER" ]]; then
+ info "No crontab found for user: ${TARGET_USER}"
+ else
+ info "No user crontabs found in ${CRON_SPOOL:-/var/spool/cron}"
+ fi
+ else
+ while IFS= read -r file; do
+ [[ -z "$file" ]] && continue
+ user=$(basename "$file")
+ check_crontab_file "$file" "crontab(${user})" false
+ done <<< "$crontab_files"
+ fi
+
+ # System crontab
+ if [[ -f /etc/crontab ]]; then
+ section "System Crontab (/etc/crontab)"
+ check_crontab_file "/etc/crontab" "/etc/crontab" true
+ fi
+
+ # /etc/cron.d drop-ins
+ if [[ -d /etc/cron.d ]]; then
+ section "Drop-ins (/etc/cron.d)"
+ found_drop_ins=false
+ for f in /etc/cron.d/*; do
+ [[ ! -f "$f" ]] && continue
+ # skip dpkg/ucf leftovers
+ [[ "$f" =~ \.(dpkg-|ucf-) ]] && continue
+ found_drop_ins=true
+ check_crontab_file "$f" "cron.d/$(basename "$f")" true
+ done
+ if [[ "$found_drop_ins" == "false" ]]; then
+ info "No drop-in files in /etc/cron.d"
+ fi
+ fi
+fi
+
+# ── Systemd timer checks ─────────────────────────────────────────────
+if [[ "$CRON_ONLY" == "false" ]]; then
+ check_systemd_timers
+fi
+
+# ── Summary ───────────────────────────────────────────────────────────
+echo ""
+echo -e " ${BOLD}── Summary ──${RESET}"
+echo ""
+TOTAL=$(( FAIL_COUNT + WARN_COUNT ))
+if [[ $TOTAL -eq 0 ]]; then
+ echo -e " ${GREEN}✓ No issues found${RESET}"
+else
+ if [[ $FAIL_COUNT -gt 0 ]]; then
+ echo -e " ${RED}${FAIL_COUNT} failure(s):${RESET}"
+ for msg in "${FAIL_MESSAGES[@]}"; do
+ echo -e " ${RED}•${RESET} ${msg}"
+ done
+ fi
+ if [[ $WARN_COUNT -gt 0 ]]; then
+ echo -e " ${YELLOW}${WARN_COUNT} warning(s)${RESET}"
+ fi
+fi
+echo ""
+
+if [[ $FAIL_COUNT -gt 0 ]]; then
+ exit 2
+elif [[ $WARN_COUNT -gt 0 ]]; then
+ exit 1
+else
+ exit 0
+fi
diff --git a/cron-job-exporter.sh b/cron-job-exporter.sh
new file mode 100644
index 0000000..5f84bc2
--- /dev/null
+++ b/cron-job-exporter.sh
@@ -0,0 +1,316 @@
+#!/bin/bash
+#############################################################
+#### Cron Job Monitoring Exporter for Prometheus ####
+#### Tracks whether scheduled cron jobs ran successfully, ####
+#### their exit codes, duration, and staleness ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.0 ####
+#### ####
+#### Usage: ./cron-job-exporter.sh [OPTIONS] ####
+#############################################################
+#
+# Monitors cron job execution by wrapping cron commands.
+# Two modes of operation:
+# 1. Wrapper mode: wrap a cron command to record metrics
+# 2. Collector mode: scan state files and write .prom output
+#
+# Metrics exported:
+# - cron_job_exit_code (last exit code)
+# - cron_job_duration_seconds (last execution time)
+# - cron_job_last_run_timestamp (unix timestamp of last run)
+# - cron_job_success (1 if last run exited 0, else 0)
+# - cron_job_runs_total (total number of runs)
+#
+# Requirements:
+# - Bash 4.0+
+# - node_exporter with textfile collector enabled
+#
+set -euo pipefail
+
+#########################
+### Configuration ###
+#########################
+
+NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}"
+STATE_DIR="${STATE_DIR:-/var/lib/cron-job-exporter}"
+PROM_FILE="${NODE_DIR}/cron_jobs.prom"
+STALE_THRESHOLD="${STALE_THRESHOLD:-86400}" # 24 hours
+DEBUG="${DEBUG:-}"
+
+#########################
+### Logging ###
+#########################
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+log_info() {
+ echo -e "${GREEN}[INFO]${NC} $1" >&2
+}
+
+log_warn() {
+ echo -e "${YELLOW}[WARN]${NC} $1" >&2
+}
+
+log_error() {
+ echo -e "${RED}[ERROR]${NC} $1" >&2
+}
+
+log_debug() {
+ [[ -n "$DEBUG" ]] && echo "[DEBUG] $1" >&2
+}
+
+#########################
+### Parse Arguments ###
+#########################
+
+show_help() {
+ cat < --
+ Collector mode: $0 --collect
+
+WRAPPER MODE (use in crontab):
+ Wraps a cron command, records exit code, duration, and timestamp
+ to a state file. Run --collect separately to generate .prom output.
+
+ Example crontab:
+ * * * * * /opt/cron-job-exporter.sh --wrap --name backup_db -- /opt/backup-db.sh
+ 0 * * * * /opt/cron-job-exporter.sh --wrap --name log_cleanup -- /opt/cleanup-logs.sh
+
+COLLECTOR MODE (run on schedule or as oneshot):
+ Reads all state files and writes a single .prom file for node_exporter.
+
+ Example crontab:
+ * * * * * /opt/cron-job-exporter.sh --collect
+
+OPTIONS:
+ --wrap Wrapper mode: run a command and record metrics
+ --collect Collector mode: generate .prom from state files
+ --name NAME Job name for wrapper mode (required with --wrap)
+ --stale-threshold SEC Seconds before a job is considered stale (default: 86400)
+ --state-dir DIR State file directory (default: /var/lib/cron-job-exporter)
+ --help Show this help
+
+EOF
+ exit 0
+}
+
+MODE=""
+JOB_NAME=""
+JOB_CMD=()
+
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --wrap) MODE="wrap"; shift ;;
+ --collect) MODE="collect"; shift ;;
+ --name) JOB_NAME="$2"; shift 2 ;;
+ --stale-threshold) STALE_THRESHOLD="$2"; shift 2 ;;
+ --state-dir) STATE_DIR="$2"; shift 2 ;;
+ --help) show_help ;;
+ --) shift; JOB_CMD=("$@"); break ;;
+ *) log_error "Unknown option: $1"; exit 1 ;;
+ esac
+ done
+
+ if [[ -z "$MODE" ]]; then
+ log_error "Must specify --wrap or --collect"
+ echo "Run '$0 --help' for usage."
+ exit 1
+ fi
+
+ if [[ "$MODE" == "wrap" ]]; then
+ if [[ -z "$JOB_NAME" ]]; then
+ log_error "--name is required in wrapper mode"
+ exit 1
+ fi
+ if [[ ${#JOB_CMD[@]} -eq 0 ]]; then
+ log_error "No command specified after --"
+ exit 1
+ fi
+ fi
+}
+
+#########################
+### Sanitize ###
+#########################
+
+sanitize_name() {
+ local name="$1"
+ name="${name,,}"
+ name="${name// /_}"
+ name=$(echo "$name" | sed 's/[^a-z0-9_]/_/g')
+ name=$(echo "$name" | sed 's/__*/_/g; s/^_//; s/_$//')
+ echo "$name"
+}
+
+#########################
+### Wrapper Mode ###
+#########################
+
+run_wrapper() {
+ mkdir -p "$STATE_DIR"
+
+ local safe_name
+ safe_name=$(sanitize_name "$JOB_NAME")
+ local state_file="${STATE_DIR}/${safe_name}.state"
+
+ log_debug "Wrapping command: ${JOB_CMD[*]}"
+ log_debug "Job name: $safe_name"
+
+ local start_time end_time duration exit_code
+ start_time=$(date +%s%N)
+
+ # Run the command, capturing exit code
+ set +e
+ "${JOB_CMD[@]}"
+ exit_code=$?
+ set -e
+
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=3; ($end_time - $start_time) / 1000000000" | bc 2>/dev/null || echo "0")
+
+ # Read current run count
+ local runs=0
+ if [[ -f "$state_file" ]]; then
+ runs=$(grep '^runs=' "$state_file" 2>/dev/null | cut -d= -f2 || echo "0")
+ fi
+ runs=$((runs + 1))
+
+ # Write state file atomically
+ local tmpfile
+ tmpfile=$(mktemp "${state_file}.XXXXXX")
+
+ cat > "$tmpfile" < STALE_THRESHOLD )); then
+ stale=1
+ fi
+
+ metrics+="cron_job_exit_code{job=\"${name}\"} ${exit_code}
+"
+ metrics+="cron_job_duration_seconds{job=\"${name}\"} ${duration}
+"
+ metrics+="cron_job_last_run_timestamp{job=\"${name}\"} ${timestamp}
+"
+ metrics+="cron_job_success{job=\"${name}\"} ${success}
+"
+ metrics+="cron_job_runs_total{job=\"${name}\"} ${runs}
+"
+ metrics+="cron_job_stale{job=\"${name}\"} ${stale}
+"
+
+ log_debug "Collected: $name (exit=$exit_code, stale=$stale)"
+ done
+
+ if [[ $found -eq 0 ]]; then
+ log_debug "No state files found in $STATE_DIR"
+ fi
+
+ # Collector metadata
+ metrics+="
+# HELP cron_job_collector_last_run_timestamp Unix timestamp of last collector run
+# TYPE cron_job_collector_last_run_timestamp gauge
+cron_job_collector_last_run_timestamp $now
+"
+
+ # Atomic write
+ local tmpfile
+ tmpfile=$(mktemp "${PROM_FILE}.XXXXXX")
+ echo "$metrics" > "$tmpfile"
+ mv "$tmpfile" "$PROM_FILE"
+
+ log_info "Metrics written to $PROM_FILE ($found jobs)"
+}
+
+#########################
+### Main ###
+#########################
+
+main() {
+ parse_args "$@"
+
+ case "$MODE" in
+ wrap) run_wrapper ;;
+ collect) run_collector ;;
+ esac
+}
+
+main "$@"
diff --git a/cron-lister.sh b/cron-lister.sh
new file mode 100644
index 0000000..46bdcc3
--- /dev/null
+++ b/cron-lister.sh
@@ -0,0 +1,433 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### cron-lister.sh — List all cron jobs across users, system cron, and timers ####
+#### Scans user crontabs, /etc/cron.*, systemd timers, and anacron ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./cron-lister.sh ####
+#### ./cron-lister.sh --format raw ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+FORMAT="${FORMAT:-table}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+COUNT_USER_CRONTAB=0
+COUNT_SYSTEM_CRONTAB=0
+COUNT_CRON_D=0
+COUNT_CRON_DIRS=0
+COUNT_SYSTEMD_TIMER=0
+COUNT_ANACRON=0
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ GREEN="" YELLOW="" BLUE="" MAGENTA="" CYAN="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ MAGENTA='\033[0;35m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ else
+ GREEN="" YELLOW="" BLUE="" MAGENTA="" CYAN="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${DIM}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+print_table_header() {
+ printf " ${BOLD}%-18s %-14s %-22s %s${RESET}\n" "SOURCE" "USER/UNIT" "SCHEDULE" "COMMAND"
+ printf " %s\n" "$(printf '%.0s─' {1..80})"
+}
+
+print_job() {
+ local source="$1"
+ local user="$2"
+ local schedule="$3"
+ local command="$4"
+
+ # Truncate long commands
+ if [[ ${#command} -gt 60 ]]; then
+ command="${command:0:57}..."
+ fi
+
+ if [[ "$FORMAT" == "raw" ]]; then
+ printf "%s\t%s\t%s\t%s\n" "$source" "$user" "$schedule" "$command"
+ return
+ fi
+
+ local color
+ case "$source" in
+ user-crontab) color="$GREEN" ;;
+ /etc/crontab) color="$BLUE" ;;
+ /etc/cron.d/*) color="$CYAN" ;;
+ cron.hourly|cron.daily|cron.weekly|cron.monthly) color="$MAGENTA" ;;
+ systemd-timer) color="$YELLOW" ;;
+ anacron) color="$DIM" ;;
+ *) color="" ;;
+ esac
+
+ printf " %b%-18s%b %-14s %-22s %s\n" "$color" "$source" "$RESET" "$user" "$schedule" "$command"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USER CRONTABS
+# ══════════════════════════════════════════════════════════════════════
+
+scan_user_crontabs() {
+ section_header "User Crontabs"
+
+ local crontab_dir="/var/spool/cron/crontabs"
+ local found=false
+
+ if [[ -d "$crontab_dir" ]] && [[ -r "$crontab_dir" ]]; then
+ while IFS= read -r crontab_file; do
+ [[ -z "$crontab_file" ]] && continue
+ found=true
+ local username
+ username=$(basename "$crontab_file")
+ verbose "Reading crontab for user: $username"
+
+ while IFS= read -r line; do
+ # Skip comments and empty lines
+ [[ -z "$line" || "$line" == "#"* || "$line" == "SHELL="* || "$line" == "PATH="* || "$line" == "MAILTO="* ]] && continue
+
+ local schedule cmd
+ schedule=$(echo "$line" | awk '{print $1, $2, $3, $4, $5}')
+ cmd=$(echo "$line" | awk '{for(i=6;i<=NF;i++) printf "%s ", $i; print ""}' | sed 's/ *$//')
+
+ print_job "user-crontab" "$username" "$schedule" "$cmd"
+ COUNT_USER_CRONTAB=$((COUNT_USER_CRONTAB + 1))
+ done < "$crontab_file"
+ done < <(find "$crontab_dir" -type f 2>/dev/null)
+ fi
+
+ if [[ "$found" == "false" ]]; then
+ verbose "No user crontabs found in $crontab_dir"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SYSTEM CRONTAB
+# ══════════════════════════════════════════════════════════════════════
+
+scan_system_crontab() {
+ section_header "/etc/crontab"
+
+ if [[ ! -f /etc/crontab ]]; then
+ verbose "/etc/crontab not found"
+ return
+ fi
+
+ while IFS= read -r line; do
+ [[ -z "$line" || "$line" == "#"* || "$line" == "SHELL="* || "$line" == "PATH="* || "$line" == "MAILTO="* || "$line" == "HOME="* ]] && continue
+
+ local schedule user cmd
+ schedule=$(echo "$line" | awk '{print $1, $2, $3, $4, $5}')
+ user=$(echo "$line" | awk '{print $6}')
+ cmd=$(echo "$line" | awk '{for(i=7;i<=NF;i++) printf "%s ", $i; print ""}' | sed 's/ *$//')
+
+ if [[ -n "$cmd" ]]; then
+ print_job "/etc/crontab" "$user" "$schedule" "$cmd"
+ COUNT_SYSTEM_CRONTAB=$((COUNT_SYSTEM_CRONTAB + 1))
+ fi
+ done < /etc/crontab
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# /etc/cron.d
+# ══════════════════════════════════════════════════════════════════════
+
+scan_cron_d() {
+ section_header "/etc/cron.d"
+
+ if [[ ! -d /etc/cron.d ]]; then
+ verbose "/etc/cron.d not found"
+ return
+ fi
+
+ while IFS= read -r cron_file; do
+ [[ -z "$cron_file" ]] && continue
+ local filename
+ filename=$(basename "$cron_file")
+
+ # Skip dpkg and package manager files
+ [[ "$filename" == *.dpkg-* || "$filename" == *.ucf-* || "$filename" == "." || "$filename" == ".." ]] && continue
+
+ verbose "Reading /etc/cron.d/$filename"
+
+ while IFS= read -r line; do
+ [[ -z "$line" || "$line" == "#"* || "$line" == "SHELL="* || "$line" == "PATH="* || "$line" == "MAILTO="* || "$line" == "HOME="* ]] && continue
+
+ local schedule user cmd
+ schedule=$(echo "$line" | awk '{print $1, $2, $3, $4, $5}')
+ user=$(echo "$line" | awk '{print $6}')
+ cmd=$(echo "$line" | awk '{for(i=7;i<=NF;i++) printf "%s ", $i; print ""}' | sed 's/ *$//')
+
+ if [[ -n "$cmd" ]]; then
+ print_job "/etc/cron.d/$filename" "$user" "$schedule" "$cmd"
+ COUNT_CRON_D=$((COUNT_CRON_D + 1))
+ fi
+ done < "$cron_file"
+ done < <(find /etc/cron.d -maxdepth 1 -type f 2>/dev/null)
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# CRON DIRECTORIES
+# ══════════════════════════════════════════════════════════════════════
+
+scan_cron_dirs() {
+ section_header "Cron Directories"
+
+ local period
+ for period in hourly daily weekly monthly; do
+ local dir="/etc/cron.${period}"
+ if [[ ! -d "$dir" ]]; then
+ continue
+ fi
+
+ while IFS= read -r script; do
+ [[ -z "$script" ]] && continue
+ local script_name
+ script_name=$(basename "$script")
+
+ # Skip non-executable and package manager leftovers
+ [[ "$script_name" == *.dpkg-* || "$script_name" == *.ucf-* || "$script_name" == "." || "$script_name" == ".." ]] && continue
+
+ if [[ -x "$script" ]]; then
+ print_job "cron.${period}" "root" "$period" "$script_name"
+ COUNT_CRON_DIRS=$((COUNT_CRON_DIRS + 1))
+ else
+ verbose "Skipping non-executable: $script"
+ fi
+ done < <(find "$dir" -maxdepth 1 -type f 2>/dev/null)
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SYSTEMD TIMERS
+# ══════════════════════════════════════════════════════════════════════
+
+scan_systemd_timers() {
+ section_header "Systemd Timers"
+
+ if ! command -v systemctl &>/dev/null; then
+ verbose "systemd not available"
+ return
+ fi
+
+ systemctl list-timers --all --no-legend --no-pager 2>/dev/null | while IFS= read -r line; do
+ [[ -z "$line" ]] && continue
+
+ local unit_name schedule_info
+ # Timer unit is the second-to-last field, schedule is NEXT + LEFT
+ unit_name=$(echo "$line" | awk '{print $(NF-1)}')
+ schedule_info=$(echo "$line" | awk '{print $1, $2, $3}')
+
+ if [[ -n "$unit_name" && "$unit_name" != "UNIT" ]]; then
+ # Get the trigger schedule from the timer unit
+ local on_calendar
+ on_calendar=$(systemctl show "$unit_name" --property=TimersCalendar 2>/dev/null | sed 's/TimersCalendar=//' | head -1)
+
+ if [[ -z "$on_calendar" || "$on_calendar" == "" ]]; then
+ on_calendar=$(systemctl show "$unit_name" --property=TimersMonotonic 2>/dev/null | sed 's/TimersMonotonic=//' | head -1)
+ fi
+
+ on_calendar="${on_calendar:-$schedule_info}"
+
+ # Truncate schedule if too long
+ if [[ ${#on_calendar} -gt 20 ]]; then
+ on_calendar="${on_calendar:0:17}..."
+ fi
+
+ print_job "systemd-timer" "$unit_name" "$on_calendar" "$(echo "$line" | awk '{print $NF}')"
+ COUNT_SYSTEMD_TIMER=$((COUNT_SYSTEMD_TIMER + 1))
+ fi
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ANACRON
+# ══════════════════════════════════════════════════════════════════════
+
+scan_anacron() {
+ section_header "Anacron"
+
+ if [[ ! -f /etc/anacrontab ]]; then
+ verbose "/etc/anacrontab not found"
+ return
+ fi
+
+ while IFS= read -r line; do
+ [[ -z "$line" || "$line" == "#"* || "$line" == "SHELL="* || "$line" == "PATH="* || "$line" == "MAILTO="* || "$line" == "HOME="* || "$line" == "START_HOURS_RANGE="* || "$line" == "RANDOM_DELAY="* ]] && continue
+
+ local period delay ident cmd
+ period=$(echo "$line" | awk '{print $1}')
+ delay=$(echo "$line" | awk '{print $2}')
+ ident=$(echo "$line" | awk '{print $3}')
+ cmd=$(echo "$line" | awk '{for(i=4;i<=NF;i++) printf "%s ", $i; print ""}' | sed 's/ *$//')
+
+ if [[ -n "$cmd" ]]; then
+ print_job "anacron" "$ident" "every ${period}d +${delay}m" "$cmd"
+ COUNT_ANACRON=$((COUNT_ANACRON + 1))
+ fi
+ done < /etc/anacrontab
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+
+print_summary() {
+ local total=$((COUNT_USER_CRONTAB + COUNT_SYSTEM_CRONTAB + COUNT_CRON_D + COUNT_CRON_DIRS + COUNT_SYSTEMD_TIMER + COUNT_ANACRON))
+
+ echo ""
+ echo -e " ${BOLD}══════════════════════════════════════════${RESET}"
+ echo -e " ${BOLD}Summary${RESET}"
+ echo -e " ${BOLD}══════════════════════════════════════════${RESET}"
+
+ printf " %-22s %d\n" "User crontabs:" "$COUNT_USER_CRONTAB"
+ printf " %-22s %d\n" "/etc/crontab:" "$COUNT_SYSTEM_CRONTAB"
+ printf " %-22s %d\n" "/etc/cron.d:" "$COUNT_CRON_D"
+ printf " %-22s %d\n" "cron.{h,d,w,m}:" "$COUNT_CRON_DIRS"
+ printf " %-22s %d\n" "Systemd timers:" "$COUNT_SYSTEMD_TIMER"
+ printf " %-22s %d\n" "Anacron:" "$COUNT_ANACRON"
+ printf " %s\n" "$(printf '%.0s─' {1..30})"
+ printf " ${BOLD}%-22s %d${RESET}\n" "Total:" "$total"
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2
+ echo "Run ${SCRIPT_NAME} --help for usage" >&2
+ exit 1 ;;
+ esac
+ done
+
+ if [[ "$FORMAT" != "table" && "$FORMAT" != "raw" ]]; then
+ echo "Invalid format: $FORMAT (must be 'table' or 'raw')" >&2
+ exit 1
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+
+ if [[ "$FORMAT" != "raw" ]]; then
+ echo ""
+ echo -e "${BOLD}Cron Job Lister — $(hostname -f 2>/dev/null || hostname)${RESET}"
+ echo -e "${DIM}$(date '+%Y-%m-%d %H:%M:%S %Z')${RESET}"
+ fi
+
+ if [[ "$FORMAT" == "table" ]]; then
+ echo ""
+ print_table_header
+ fi
+
+ scan_user_crontabs
+ scan_system_crontab
+ scan_cron_d
+ scan_cron_dirs
+ scan_systemd_timers
+ scan_anacron
+
+ if [[ "$FORMAT" != "raw" ]]; then
+ print_summary
+ fi
+}
+
+main "$@"
diff --git a/crowdsec-decisions-exporter.sh b/crowdsec-decisions-exporter.sh
new file mode 100755
index 0000000..83035ff
--- /dev/null
+++ b/crowdsec-decisions-exporter.sh
@@ -0,0 +1,518 @@
+#!/bin/bash
+################################################################################
+# Script Name: crowdsec-decisions-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for CrowdSec active decisions — detailed
+# metrics on bans, captchas, scopes, origins, countries, and
+# decision lifecycle timestamps
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Note: This exporter focuses exclusively on CrowdSec active decisions
+# (bans/captchas). For general CrowdSec operational metrics (alerts,
+# bouncers, machines, hub items), see crowdsec-exporter.sh.
+#
+# Prerequisites:
+# - CrowdSec installed and running
+# - cscli command available
+# - jq for JSON parsing
+# - Root/sudo access
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# # Output to stdout
+# sudo ./crowdsec-decisions-exporter.sh
+#
+# # HTTP server mode
+# sudo ./crowdsec-decisions-exporter.sh --http -p 9202
+#
+# # Textfile collector mode
+# sudo ./crowdsec-decisions-exporter.sh --textfile
+#
+# Metrics Exported:
+# - crowdsec_decisions_up - Exporter status (1=up, 0=down)
+# - crowdsec_decisions_exporter_info{version} - Exporter version info
+# - crowdsec_decisions_active_total - Total active decisions
+# - crowdsec_decisions_active_by_type{type} - Active decisions by type
+# - crowdsec_decisions_active_by_scope{scope} - Active decisions by scope
+# - crowdsec_decisions_active_by_origin{origin} - Active decisions by origin
+# - crowdsec_decisions_active_by_scenario{scenario} - Active decisions per scenario
+# - crowdsec_decisions_active_by_country{country} - Active decisions per country (top 20)
+# - crowdsec_decisions_oldest_timestamp - Oldest active decision timestamp
+# - crowdsec_decisions_newest_timestamp - Newest active decision timestamp
+# - crowdsec_decisions_expiring_1h - Decisions expiring within 1 hour
+# - crowdsec_decisions_local_api_up - LAPI reachability (1/0)
+# - crowdsec_decisions_exporter_duration_seconds - Script execution time
+# - crowdsec_decisions_exporter_last_run_timestamp - Last run timestamp
+#
+# Configuration:
+# Default HTTP port: 9202
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9202
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Check if CrowdSec is installed and responding
+# Returns: 0 if OK, 1 if error
+check_crowdsec() {
+ if ! command -v cscli >/dev/null 2>&1; then
+ echo "ERROR: cscli command not found" >&2
+ return 1
+ fi
+
+ if ! command -v jq >/dev/null 2>&1; then
+ echo "ERROR: jq not found (required for JSON parsing)" >&2
+ return 1
+ fi
+
+ return 0
+}
+
+# Escape special characters in Prometheus label values
+# Args: $1 - string to escape
+# Returns: escaped string safe for Prometheus labels
+prom_escape() {
+ local val="$1"
+ val="${val//\\/\\\\}"
+ val="${val//\"/\\\"}"
+ val="${val//$'\n'/}"
+ echo "$val"
+}
+
+# Check LAPI health
+# Returns: 1 if healthy, 0 if not
+get_lapi_status() {
+ if cscli lapi status >/dev/null 2>&1; then
+ echo "1"
+ else
+ echo "0"
+ fi
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+# Generate all Prometheus metrics
+# Returns: Prometheus text format metrics on stdout
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check CrowdSec status first
+ if ! check_crowdsec; then
+ cat </dev/null)
+
+ # Handle "null" or empty output from cscli (means no active decisions)
+ local total_decisions=0
+ if [ -n "$decisions_json" ] && [ "$decisions_json" != "null" ]; then
+ total_decisions=$(echo "$decisions_json" | jq 'length' 2>/dev/null)
+ total_decisions=${total_decisions:-0}
+ fi
+
+ # ========================================================================
+ # ACTIVE DECISIONS TOTAL
+ # ========================================================================
+
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ group_by(.type) | .[] |
+ "\(.[0].type) \(length)"
+ ' 2>/dev/null | while read -r dtype count; do
+ [ -z "$dtype" ] && continue
+ echo "crowdsec_decisions_active_by_type{type=\"$(prom_escape "$dtype")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # DECISIONS BY SCOPE (ip, range, country)
+ # ========================================================================
+
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ group_by(.scope) | .[] |
+ "\(.[0].scope) \(length)"
+ ' 2>/dev/null | while read -r scope count; do
+ [ -z "$scope" ] && continue
+ echo "crowdsec_decisions_active_by_scope{scope=\"$(prom_escape "$scope")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # DECISIONS BY ORIGIN (cscli, crowdsec, CAPI)
+ # ========================================================================
+
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ group_by(.origin) | .[] |
+ "\(.[0].origin) \(length)"
+ ' 2>/dev/null | while read -r origin count; do
+ [ -z "$origin" ] && continue
+ echo "crowdsec_decisions_active_by_origin{origin=\"$(prom_escape "$origin")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # DECISIONS BY SCENARIO (top scenarios)
+ # ========================================================================
+
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ group_by(.scenario) | map({scenario: .[0].scenario, count: length}) |
+ sort_by(-.count) | .[] |
+ "\(.scenario) \(.count)"
+ ' 2>/dev/null | while read -r scenario count; do
+ [ -z "$scenario" ] && continue
+ echo "crowdsec_decisions_active_by_scenario{scenario=\"$(prom_escape "$scenario")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # DECISIONS BY COUNTRY (top 20)
+ # ========================================================================
+
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ [.[] | select(.scope == "Country" or .scope == "country")] |
+ if length > 0 then
+ group_by(.value) | map({country: .[0].value, count: length}) |
+ sort_by(-.count) | .[0:20] | .[] |
+ "\(.country) \(.count)"
+ else
+ empty
+ end
+ ' 2>/dev/null | while read -r country count; do
+ [ -z "$country" ] && continue
+ echo "crowdsec_decisions_active_by_country{country=\"$(prom_escape "$country")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # DECISION TIMESTAMPS (oldest and newest)
+ # ========================================================================
+
+ cat </dev/null; then
+ local oldest_ts
+ oldest_ts=$(echo "$decisions_json" | jq -r '[.[].created_at] | sort | first // empty' 2>/dev/null)
+ if [ -n "$oldest_ts" ]; then
+ local oldest_unix
+ oldest_unix=$(date -d "$oldest_ts" +%s 2>/dev/null || echo "0")
+ echo "crowdsec_decisions_oldest_timestamp $oldest_unix"
+ else
+ echo "crowdsec_decisions_oldest_timestamp 0"
+ fi
+ else
+ echo "crowdsec_decisions_oldest_timestamp 0"
+ fi
+
+ echo ""
+
+ cat </dev/null; then
+ local newest_ts
+ newest_ts=$(echo "$decisions_json" | jq -r '[.[].created_at] | sort | last // empty' 2>/dev/null)
+ if [ -n "$newest_ts" ]; then
+ local newest_unix
+ newest_unix=$(date -d "$newest_ts" +%s 2>/dev/null || echo "0")
+ echo "crowdsec_decisions_newest_timestamp $newest_unix"
+ else
+ echo "crowdsec_decisions_newest_timestamp 0"
+ fi
+ else
+ echo "crowdsec_decisions_newest_timestamp 0"
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # DECISIONS EXPIRING WITHIN 1 HOUR
+ # ========================================================================
+
+ cat </dev/null; then
+ local now_epoch cutoff_epoch expiring_count
+ now_epoch=$(date +%s)
+ cutoff_epoch=$((now_epoch + 3600))
+ expiring_count=$(echo "$decisions_json" | jq --arg now "$now_epoch" --arg cutoff "$cutoff_epoch" '
+ [.[] | select(.until != null) |
+ (.until | sub("\\.[0-9]+.*$"; "Z") | fromdateiso8601) as $exp |
+ select($exp > ($now | tonumber) and $exp <= ($cutoff | tonumber))
+ ] | length
+ ' 2>/dev/null)
+ echo "crowdsec_decisions_expiring_1h ${expiring_count:-0}"
+ else
+ echo "crowdsec_decisions_expiring_1h 0"
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # LAPI HEALTH
+ # ========================================================================
+
+ local lapi_status
+ lapi_status=$(get_lapi_status)
+
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ # Infinite loop accepting HTTP requests
+ while true; do
+ {
+ read -r request
+ # Check if request is for /metrics endpoint
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else # Serve HTML landing page for other requests
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+CrowdSec Decisions Exporter v1.0
+
+CrowdSec Decisions Prometheus Exporter v1.0
+Metrics
+Active decision metrics from cscli decisions list.
+For general CrowdSec operational metrics, see crowdsec-exporter.sh.
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+# Main entry point - routes to appropriate output mode
+main() {
+ parse_args "$@"
+
+ if [ "$HTTP_MODE" = true ]; then
+ # Run HTTP server (blocks until killed)
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ # Textfile collector mode: write atomically using temp file
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ # Create temp file in SAME directory for atomic rename (same filesystem)
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.crowdsec_decisions_metrics.XXXXXX")
+
+ # Generate metrics to temp file
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ # Validate: file must exist, have content
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 10 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ # Set permissions before move
+ chmod 644 "$temp_file"
+
+ # Atomic rename - no gap where file is missing
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ # Default: output to stdout
+ generate_metrics
+ fi
+}
+
+# Execute main function with all script arguments
+main "$@"
diff --git a/crowdsec-exporter.sh b/crowdsec-exporter.sh
new file mode 100755
index 0000000..4ea2bc2
--- /dev/null
+++ b/crowdsec-exporter.sh
@@ -0,0 +1,647 @@
+#!/bin/bash
+################################################################################
+# Script Name: crowdsec-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for CrowdSec providing supplementary
+# operational metrics from cscli commands — active decisions,
+# alerts, bouncers, machines, hub items, and threat analysis
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Note: CrowdSec has a built-in Prometheus endpoint at port 6060 for internal
+# metrics (bucket counts, parser hits, etc.). This exporter provides
+# SUPPLEMENTARY operational metrics from cscli commands.
+#
+# Prerequisites:
+# - CrowdSec installed and running
+# - cscli command available
+# - jq for JSON parsing
+# - Root/sudo access
+# - netcat (nc) for HTTP mode
+# - curl for --grab-local mode
+#
+# Usage:
+# # Output to stdout
+# sudo ./crowdsec-exporter.sh
+#
+# # HTTP server mode
+# sudo ./crowdsec-exporter.sh --http -p 9192
+#
+# # Textfile collector mode
+# sudo ./crowdsec-exporter.sh --textfile
+#
+# Metrics Exported:
+# - crowdsec_up - Exporter status (1=up, 0=down)
+# - crowdsec_info{version,exporter_version} - CrowdSec version info
+# - crowdsec_decisions_active - Total active decisions
+# - crowdsec_decisions_active_by_type{type} - Active decisions by type
+# - crowdsec_decisions_active_by_origin{origin} - Active decisions by origin
+# - crowdsec_decisions_active_by_scenario{scenario} - Active decisions by scenario
+# - crowdsec_alerts_total - Total alerts
+# - crowdsec_alerts_per_period{period} - Alerts in 1h/24h
+# - crowdsec_top_attacker_decisions{ip} - Top 5 IPs by decision count
+# - crowdsec_top_scenario_alerts{scenario} - Top 5 scenarios by alert count
+# - crowdsec_bouncer_up{name} - Per-bouncer registered status
+# - crowdsec_bouncer_last_pull_timestamp{name} - Per-bouncer last pull time
+# - crowdsec_machine_up{name} - Machine registration status
+# - crowdsec_lapi_up - LAPI health status
+# - crowdsec_hub_items{type} - Installed hub items per type
+# - crowdsec_exporter_duration_seconds - Script execution time
+# - crowdsec_exporter_last_run_timestamp - Last run timestamp
+#
+# Configuration:
+# Default HTTP port: 9192
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9192
+GRAB_LOCAL=false
+LOCAL_PORT=6060
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Check if CrowdSec is installed and responding
+# Returns: 0 if OK, 1 if error
+check_crowdsec() {
+ if ! command -v cscli >/dev/null 2>&1; then
+ echo "ERROR: cscli command not found" >&2
+ return 1
+ fi
+
+ if ! command -v jq >/dev/null 2>&1; then
+ echo "ERROR: jq not found (required for JSON parsing)" >&2
+ return 1
+ fi
+
+ # Verify LAPI is responding
+ if ! cscli lapi status >/dev/null 2>&1; then
+ echo "ERROR: CrowdSec LAPI not responding" >&2
+ return 1
+ fi
+
+ return 0
+}
+
+# Escape special characters in Prometheus label values
+# Args: $1 - string to escape
+# Returns: escaped string safe for Prometheus labels
+prom_escape() {
+ local val="$1"
+ val="${val//\\/\\\\}"
+ val="${val//\"/\\\"}"
+ val="${val//$'\n'/}"
+ echo "$val"
+}
+
+# Get CrowdSec version string
+# Returns: version string (e.g., "1.5.4")
+get_crowdsec_version() {
+ cscli version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1
+}
+
+# Get active decisions as JSON
+# Returns: JSON array of active decisions, or "null" on error
+get_decisions_json() {
+ cscli decisions list -o json 2>/dev/null
+}
+
+# Get alerts as JSON
+# Args: $1 - optional --since parameter (e.g., "1h")
+# Returns: JSON array of alerts, or "null" on error
+get_alerts_json() {
+ local since="$1"
+ if [ -n "$since" ]; then
+ cscli alerts list --since "$since" -o json 2>/dev/null
+ else
+ cscli alerts list -o json 2>/dev/null
+ fi
+}
+
+# Get bouncers as JSON
+# Returns: JSON array of bouncers
+get_bouncers_json() {
+ cscli bouncers list -o json 2>/dev/null
+}
+
+# Get machines as JSON
+# Returns: JSON array of machines
+get_machines_json() {
+ cscli machines list -o json 2>/dev/null
+}
+
+# Check LAPI health
+# Returns: 1 if healthy, 0 if not
+get_lapi_status() {
+ if cscli lapi status >/dev/null 2>&1; then
+ echo "1"
+ else
+ echo "0"
+ fi
+}
+
+# Get hub items as JSON
+# Returns: JSON output from cscli hub list
+get_hub_json() {
+ cscli hub list -o json 2>/dev/null
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+# Generate all Prometheus metrics
+# Returns: Prometheus text format metrics on stdout
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check CrowdSec status first
+ if ! check_crowdsec; then
+ cat </dev/null)
+ total_decisions=${total_decisions:-0}
+ fi
+
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ group_by(.type) | .[] |
+ "\(.[0].type) \(length)"
+ ' 2>/dev/null | while read -r dtype count; do
+ [ -z "$dtype" ] && continue
+ echo "crowdsec_decisions_active_by_type{type=\"$(prom_escape "$dtype")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # Decisions by origin (crowdsec, cscli, CAPI)
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ group_by(.origin) | .[] |
+ "\(.[0].origin) \(length)"
+ ' 2>/dev/null | while read -r origin count; do
+ [ -z "$origin" ] && continue
+ echo "crowdsec_decisions_active_by_origin{origin=\"$(prom_escape "$origin")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # Decisions by scenario
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ group_by(.scenario) | .[] |
+ "\(.[0].scenario) \(length)"
+ ' 2>/dev/null | while read -r scenario count; do
+ [ -z "$scenario" ] && continue
+ echo "crowdsec_decisions_active_by_scenario{scenario=\"$(prom_escape "$scenario")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # Top 5 attackers by decision count
+ cat </dev/null; then
+ echo "$decisions_json" | jq -r '
+ group_by(.value) | map({ip: .[0].value, count: length}) |
+ sort_by(-.count) | .[0:5] | .[] |
+ "\(.ip) \(.count)"
+ ' 2>/dev/null | while read -r ip count; do
+ [ -z "$ip" ] && continue
+ echo "crowdsec_top_attacker_decisions{ip=\"$(prom_escape "$ip")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # ALERTS METRICS
+ # ========================================================================
+
+ local alerts_json
+ alerts_json=$(get_alerts_json)
+
+ local total_alerts=0
+ if [ -n "$alerts_json" ] && [ "$alerts_json" != "null" ]; then
+ total_alerts=$(echo "$alerts_json" | jq 'length' 2>/dev/null)
+ total_alerts=${total_alerts:-0}
+ fi
+
+ cat </dev/null)
+ alerts_1h=${alerts_1h:-0}
+ fi
+
+ local alerts_24h=0
+ if [ -n "$alerts_24h_json" ] && [ "$alerts_24h_json" != "null" ]; then
+ alerts_24h=$(echo "$alerts_24h_json" | jq 'length' 2>/dev/null)
+ alerts_24h=${alerts_24h:-0}
+ fi
+
+ cat </dev/null; then
+ echo "$alerts_json" | jq -r '
+ group_by(.scenario) | map({scenario: .[0].scenario, count: length}) |
+ sort_by(-.count) | .[0:5] | .[] |
+ "\(.scenario) \(.count)"
+ ' 2>/dev/null | while read -r scenario count; do
+ [ -z "$scenario" ] && continue
+ echo "crowdsec_top_scenario_alerts{scenario=\"$(prom_escape "$scenario")\"} $count"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # BOUNCER METRICS
+ # ========================================================================
+
+ local bouncers_json
+ bouncers_json=$(get_bouncers_json)
+
+ cat </dev/null | while read -r name status; do
+ [ -z "$name" ] && continue
+ echo "crowdsec_bouncer_up{name=\"$(prom_escape "$name")\"} $status"
+ done
+ fi
+
+ echo ""
+
+ cat </dev/null | while read -r name last_pull; do
+ [ -z "$name" ] && continue
+ # Convert ISO timestamp to Unix epoch
+ local ts
+ ts=$(date -d "$last_pull" +%s 2>/dev/null || echo "0")
+ echo "crowdsec_bouncer_last_pull_timestamp{name=\"$(prom_escape "$name")\"} $ts"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # MACHINE METRICS
+ # ========================================================================
+
+ local machines_json
+ machines_json=$(get_machines_json)
+
+ cat </dev/null | while read -r name status; do
+ [ -z "$name" ] && continue
+ echo "crowdsec_machine_up{name=\"$(prom_escape "$name")\"} $status"
+ done
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # LAPI HEALTH
+ # ========================================================================
+
+ local lapi_status
+ lapi_status=$(get_lapi_status)
+
+ cat </dev/null)
+ parsers=$(echo "$hub_json" | jq '[.parsers // [] | .[] | select(.installed == true)] | length' 2>/dev/null)
+ scenarios=$(echo "$hub_json" | jq '[.scenarios // [] | .[] | select(.installed == true)] | length' 2>/dev/null)
+ postoverflows=$(echo "$hub_json" | jq '[.postoverflows // [] | .[] | select(.installed == true)] | length' 2>/dev/null)
+
+ echo "crowdsec_hub_items{type=\"collections\"} ${collections:-0}"
+ echo "crowdsec_hub_items{type=\"parsers\"} ${parsers:-0}"
+ echo "crowdsec_hub_items{type=\"scenarios\"} ${scenarios:-0}"
+ echo "crowdsec_hub_items{type=\"postoverflows\"} ${postoverflows:-0}"
+ else
+ echo "crowdsec_hub_items{type=\"collections\"} 0"
+ echo "crowdsec_hub_items{type=\"parsers\"} 0"
+ echo "crowdsec_hub_items{type=\"scenarios\"} 0"
+ echo "crowdsec_hub_items{type=\"postoverflows\"} 0"
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # BUILT-IN METRICS (optional, via --grab-local)
+ # ========================================================================
+
+ if [ "$GRAB_LOCAL" = true ]; then
+ local builtin_metrics
+ builtin_metrics=$(curl -sf --max-time 5 "http://localhost:${LOCAL_PORT}/metrics" 2>/dev/null)
+
+ if [ -n "$builtin_metrics" ]; then
+ echo "# ================================================================"
+ echo "# CrowdSec built-in metrics from localhost:${LOCAL_PORT}"
+ echo "# ================================================================"
+ echo "$builtin_metrics"
+ echo ""
+ else
+ echo "# WARNING: Failed to fetch built-in metrics from localhost:${LOCAL_PORT}"
+ echo ""
+ fi
+ fi
+
+ # ========================================================================
+ # EXPORTER RUNTIME
+ # ========================================================================
+
+ local script_end script_duration
+ script_end=$(date +%s)
+ script_duration=$((script_end - script_start))
+
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ # Infinite loop accepting HTTP requests
+ while true; do
+ {
+ read -r request
+ # Check if request is for /metrics endpoint
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else # Serve HTML landing page for other requests
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+CrowdSec Exporter v1.0
+
+CrowdSec Prometheus Exporter v1.0
+Metrics
+Supplementary operational metrics from cscli commands.
+For internal CrowdSec metrics (buckets, parsers), see port 6060.
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+# Main entry point - routes to appropriate output mode
+main() {
+ parse_args "$@"
+
+ if [ "$HTTP_MODE" = true ]; then
+ # Run HTTP server (blocks until killed)
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ # Textfile collector mode: write atomically using temp file
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ # Create temp file in SAME directory for atomic rename (same filesystem)
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.crowdsec_metrics.XXXXXX")
+
+ # Generate metrics to temp file
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ # Validate: file must exist, have content
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 10 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ # Set permissions before move
+ chmod 644 "$temp_file"
+
+ # Atomic rename - no gap where file is missing
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ # Default: output to stdout
+ generate_metrics
+ fi
+}
+
+# Execute main function with all script arguments
+main "$@"
diff --git a/crowdsec-install.sh b/crowdsec-install.sh
new file mode 100644
index 0000000..502ee7e
--- /dev/null
+++ b/crowdsec-install.sh
@@ -0,0 +1,444 @@
+#!/bin/bash
+################################################################################
+# Script Name: crowdsec-install.sh
+# Version: 1.0
+# Description: Automated CrowdSec installation with firewall bouncer,
+# collection selection, allowlists, and Prometheus integration
+# on Debian/Ubuntu and RHEL/Rocky/AlmaLinux
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Usage:
+# sudo ./crowdsec-install.sh
+# sudo ./crowdsec-install.sh --collections "sshd,nginx"
+# sudo ./crowdsec-install.sh --allowlist "10.0.0.0/8" --prometheus
+# sudo ./crowdsec-install.sh --dry-run
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# DEFAULTS
+# ============================================================================
+
+COLLECTIONS=""
+ALLOWLIST=""
+BOUNCER_TYPE="iptables"
+PROMETHEUS=false
+ENROLL_KEY=""
+NO_BOUNCER=false
+DRY_RUN=false
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
+
+show_usage() {
+ cat </dev/null || systemctl is-active --quiet ssh 2>/dev/null; then
+ detected+=("crowdsecurity/sshd")
+ log_info " Detected: SSH"
+ fi
+
+ # Nginx
+ if systemctl is-active --quiet nginx 2>/dev/null; then
+ detected+=("crowdsecurity/nginx")
+ log_info " Detected: Nginx"
+ fi
+
+ # Apache
+ if systemctl is-active --quiet apache2 2>/dev/null || systemctl is-active --quiet httpd 2>/dev/null; then
+ detected+=("crowdsecurity/apache2")
+ log_info " Detected: Apache"
+ fi
+
+ # Postfix
+ if systemctl is-active --quiet postfix 2>/dev/null; then
+ detected+=("crowdsecurity/postfix")
+ log_info " Detected: Postfix"
+ fi
+
+ # Dovecot
+ if systemctl is-active --quiet dovecot 2>/dev/null; then
+ detected+=("crowdsecurity/dovecot")
+ log_info " Detected: Dovecot"
+ fi
+
+ # MySQL/MariaDB
+ if systemctl is-active --quiet mysql 2>/dev/null || systemctl is-active --quiet mariadb 2>/dev/null; then
+ detected+=("crowdsecurity/mysql")
+ log_info " Detected: MySQL/MariaDB"
+ fi
+
+ # PostgreSQL
+ if systemctl is-active --quiet postgresql 2>/dev/null; then
+ detected+=("crowdsecurity/pgsql")
+ log_info " Detected: PostgreSQL"
+ fi
+
+ DETECTED_COLLECTIONS="${detected[*]}"
+}
+
+# ============================================================================
+# INSTALLATION
+# ============================================================================
+
+add_repo_debian() {
+ log_step "Adding CrowdSec repository (Debian/Ubuntu)..."
+
+ if [ "$DRY_RUN" = true ]; then
+ log_info "[DRY RUN] Would add CrowdSec apt repository"
+ return
+ fi
+
+ apt-get update -qq
+ apt-get install -y -qq curl gnupg apt-transport-https >/dev/null 2>&1
+
+ curl -s https://packagecloud.io/install/repositories/crowdsec/crowdsec/script.deb.sh | bash >/dev/null 2>&1
+}
+
+add_repo_rhel() {
+ log_step "Adding CrowdSec repository (RHEL/Rocky)..."
+
+ if [ "$DRY_RUN" = true ]; then
+ log_info "[DRY RUN] Would add CrowdSec yum repository"
+ return
+ fi
+
+ curl -s https://packagecloud.io/install/repositories/crowdsec/crowdsec/script.rpm.sh | bash >/dev/null 2>&1
+}
+
+install_agent() {
+ log_step "Installing CrowdSec agent..."
+
+ if [ "$DRY_RUN" = true ]; then
+ log_info "[DRY RUN] Would install crowdsec package"
+ return
+ fi
+
+ case "$OS_FAMILY" in
+ debian) apt-get install -y -qq crowdsec >/dev/null 2>&1 ;;
+ rhel) dnf install -y -q crowdsec >/dev/null 2>&1 ;;
+ esac
+
+ systemctl enable --now crowdsec >/dev/null 2>&1
+ log_info "CrowdSec agent installed and running"
+}
+
+install_bouncer() {
+ if [ "$NO_BOUNCER" = true ]; then
+ log_info "Skipping bouncer installation (--no-bouncer)"
+ return
+ fi
+
+ local pkg="crowdsec-firewall-bouncer-${BOUNCER_TYPE}"
+ log_step "Installing bouncer: $pkg..."
+
+ if [ "$DRY_RUN" = true ]; then
+ log_info "[DRY RUN] Would install $pkg"
+ return
+ fi
+
+ case "$OS_FAMILY" in
+ debian) apt-get install -y -qq "$pkg" >/dev/null 2>&1 ;;
+ rhel) dnf install -y -q "$pkg" >/dev/null 2>&1 ;;
+ esac
+
+ systemctl enable --now "$pkg" >/dev/null 2>&1
+ log_info "Bouncer installed: $pkg"
+}
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+install_collections() {
+ local collections_to_install=""
+
+ if [ -n "$COLLECTIONS" ]; then
+ # User-specified collections
+ IFS=',' read -ra cols <<< "$COLLECTIONS"
+ for col in "${cols[@]}"; do
+ col=$(echo "$col" | xargs)
+ # Add crowdsecurity/ prefix if not present
+ if [[ "$col" != */* ]]; then
+ col="crowdsecurity/$col"
+ fi
+ collections_to_install="$collections_to_install $col"
+ done
+ else
+ # Auto-detected collections
+ collections_to_install="$DETECTED_COLLECTIONS"
+ fi
+
+ log_step "Installing collections: $collections_to_install"
+
+ if [ "$DRY_RUN" = true ]; then
+ log_info "[DRY RUN] Would install: $collections_to_install"
+ return
+ fi
+
+ for col in $collections_to_install; do
+ if cscli collections install "$col" >/dev/null 2>&1; then
+ log_info " Installed: $col"
+ else
+ log_warn " Failed to install: $col"
+ fi
+ done
+}
+
+configure_allowlist() {
+ if [ -z "$ALLOWLIST" ]; then
+ return
+ fi
+
+ log_step "Configuring allowlist..."
+
+ if [ "$DRY_RUN" = true ]; then
+ log_info "[DRY RUN] Would whitelist: $ALLOWLIST"
+ return
+ fi
+
+ IFS=',' read -ra ips <<< "$ALLOWLIST"
+ for ip in "${ips[@]}"; do
+ ip=$(echo "$ip" | xargs)
+ if cscli decisions add --ip "$ip" --type whitelist --duration 87600h >/dev/null 2>&1; then
+ log_info " Whitelisted: $ip"
+ else
+ log_warn " Failed to whitelist: $ip"
+ fi
+ done
+}
+
+configure_prometheus() {
+ if [ "$PROMETHEUS" != true ]; then
+ return
+ fi
+
+ log_step "Enabling Prometheus metrics on :6060..."
+
+ if [ "$DRY_RUN" = true ]; then
+ log_info "[DRY RUN] Would enable Prometheus metrics"
+ return
+ fi
+
+ local config="/etc/crowdsec/config.yaml"
+ if [ -f "$config" ]; then
+ # Prometheus is enabled by default in CrowdSec, verify
+ if grep -q "prometheus:" "$config"; then
+ log_info "Prometheus metrics already configured"
+ fi
+ fi
+
+ log_info "Prometheus metrics available at http://localhost:6060/metrics"
+}
+
+enroll_console() {
+ if [ -z "$ENROLL_KEY" ]; then
+ return
+ fi
+
+ log_step "Enrolling with CrowdSec console..."
+
+ if [ "$DRY_RUN" = true ]; then
+ log_info "[DRY RUN] Would enroll with key: $ENROLL_KEY"
+ return
+ fi
+
+ if cscli console enroll "$ENROLL_KEY" >/dev/null 2>&1; then
+ log_info "Enrolled with CrowdSec console"
+ else
+ log_warn "Console enrollment failed — verify enrollment key"
+ fi
+}
+
+# ============================================================================
+# VERIFICATION
+# ============================================================================
+
+verify_installation() {
+ log_step "Verifying installation..."
+
+ echo ""
+
+ # CrowdSec agent
+ if systemctl is-active --quiet crowdsec 2>/dev/null; then
+ log_info "✓ CrowdSec agent: running"
+ else
+ log_error "✗ CrowdSec agent: not running"
+ fi
+
+ # Bouncer
+ if [ "$NO_BOUNCER" != true ]; then
+ local bouncer_svc="crowdsec-firewall-bouncer-${BOUNCER_TYPE}"
+ if systemctl is-active --quiet "$bouncer_svc" 2>/dev/null; then
+ log_info "✓ Firewall bouncer: running"
+ else
+ log_error "✗ Firewall bouncer: not running"
+ fi
+ fi
+
+ # Collections
+ log_info "Installed collections:"
+ cscli collections list 2>/dev/null | grep -E "enabled|installed" || true
+
+ # Bouncers
+ log_info "Registered bouncers:"
+ cscli bouncers list 2>/dev/null || true
+
+ echo ""
+ log_info "Installation complete"
+ echo ""
+ log_info "Useful commands:"
+ echo " cscli decisions list — view active decisions"
+ echo " cscli alerts list — view recent alerts"
+ echo " cscli metrics — view metrics summary"
+ echo " cscli hub list — view installed hub items"
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ parse_args "$@"
+ check_root
+ detect_os
+
+ echo ""
+ log_info "=== CrowdSec Installation Script v1.0 ==="
+ echo ""
+
+ if [ "$DRY_RUN" = true ]; then
+ log_warn "DRY RUN MODE — no changes will be made"
+ echo ""
+ fi
+
+ detect_services
+
+ case "$OS_FAMILY" in
+ debian) add_repo_debian ;;
+ rhel) add_repo_rhel ;;
+ esac
+
+ install_agent
+ install_collections
+ configure_allowlist
+ install_bouncer
+ configure_prometheus
+ enroll_console
+
+ if [ "$DRY_RUN" != true ]; then
+ verify_installation
+ fi
+}
+
+main "$@"
diff --git a/database-backup-exporter.sh b/database-backup-exporter.sh
new file mode 100644
index 0000000..686b6ef
--- /dev/null
+++ b/database-backup-exporter.sh
@@ -0,0 +1,313 @@
+#!/bin/bash
+#############################################################
+#### Database Backup Exporter for Prometheus ####
+#### Monitor MySQL and PostgreSQL backup freshness, ####
+#### size, and status via node_exporter textfile collector ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.0 ####
+#### ####
+#### Usage: ./database-backup-exporter.sh [OPTIONS] ####
+#############################################################
+
+set -euo pipefail
+
+# -----------------------------
+# Defaults
+# -----------------------------
+BACKUP_DIR="/opt/backups"
+MAX_AGE=86400
+PROM_FILE="/var/lib/node_exporter/database_backups.prom"
+INTERVAL=300
+RUN_ONCE=false
+
+# -----------------------------
+# Color codes
+# -----------------------------
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+GREEN='\033[0;32m'
+NC='\033[0m'
+
+# -----------------------------
+# Logging
+# -----------------------------
+log_info() {
+ echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*"
+}
+
+log_warn() {
+ echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*" >&2
+}
+
+log_error() {
+ echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*" >&2
+}
+
+# -----------------------------
+# Usage
+# -----------------------------
+usage() {
+ cat <_YYYYMMDD[HHMMSS].
+ Examples: myapp_20260309.sql.gz orders_20260308120000.pgdump
+
+EOF
+ exit 0
+}
+
+# -----------------------------
+# Parse arguments
+# -----------------------------
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --backup-dir)
+ BACKUP_DIR="$2"
+ shift 2
+ ;;
+ --max-age)
+ MAX_AGE="$2"
+ shift 2
+ ;;
+ --prom-file)
+ PROM_FILE="$2"
+ shift 2
+ ;;
+ --interval)
+ INTERVAL="$2"
+ shift 2
+ ;;
+ --once)
+ RUN_ONCE=true
+ shift
+ ;;
+ --help)
+ usage
+ ;;
+ *)
+ log_error "Unknown option: $1"
+ usage
+ ;;
+ esac
+ done
+}
+
+# -----------------------------
+# Detect backup type from ext
+# -----------------------------
+detect_type() {
+ local filename="$1"
+ case "$filename" in
+ *.pgdump|*.dump)
+ echo "postgres"
+ ;;
+ *.sql|*.sql.gz)
+ echo "mysql"
+ ;;
+ *)
+ echo "unknown"
+ ;;
+ esac
+}
+
+# -----------------------------
+# Extract database name
+# -----------------------------
+extract_dbname() {
+ local filename
+ filename="$(basename "$1")"
+ # Strip all known extensions
+ filename="${filename%.gz}"
+ filename="${filename%.sql}"
+ filename="${filename%.dump}"
+ filename="${filename%.pgdump}"
+ # Expect pattern: dbname_YYYYMMDD... — grab everything before the date segment
+ echo "$filename" | sed -E 's/_[0-9]{8,14}$//'
+}
+
+# -----------------------------
+# Collect and write metrics
+# -----------------------------
+collect_metrics() {
+ local backup_dir="$1"
+ local max_age="$2"
+ local now
+ now="$(date +%s)"
+
+ if [[ ! -d "$backup_dir" ]]; then
+ log_error "Backup directory does not exist: $backup_dir"
+ return 1
+ fi
+
+ # Associative arrays keyed by "dbname|type"
+ declare -A latest_ts
+ declare -A latest_size
+ declare -A file_count
+
+ # Scan for backup files
+ local found=0
+ while IFS= read -r -d '' file; do
+ local base
+ base="$(basename "$file")"
+ local btype
+ btype="$(detect_type "$base")"
+ [[ "$btype" == "unknown" ]] && continue
+
+ local dbname
+ dbname="$(extract_dbname "$file")"
+ [[ -z "$dbname" ]] && continue
+
+ local key="${dbname}|${btype}"
+ local mtime
+ mtime="$(stat -c '%Y' "$file" 2>/dev/null)" || continue
+ local fsize
+ fsize="$(stat -c '%s' "$file" 2>/dev/null)" || continue
+
+ # Track count
+ file_count[$key]=$(( ${file_count[$key]:-0} + 1 ))
+
+ # Track most recent
+ if [[ -z "${latest_ts[$key]:-}" ]] || (( mtime > latest_ts[$key] )); then
+ latest_ts[$key]="$mtime"
+ latest_size[$key]="$fsize"
+ fi
+
+ found=$((found + 1))
+ done < <(find "$backup_dir" -type f \( -name '*.sql' -o -name '*.sql.gz' -o -name '*.dump' -o -name '*.pgdump' \) -print0 2>/dev/null)
+
+ log_info "Found $found backup file(s) in $backup_dir"
+
+ # Build output
+ local output=""
+
+ output+="# HELP db_backup_last_timestamp Unix timestamp of most recent backup.\n"
+ output+="# TYPE db_backup_last_timestamp gauge\n"
+ for key in "${!latest_ts[@]}"; do
+ local dbname="${key%%|*}"
+ local btype="${key##*|}"
+ output+="db_backup_last_timestamp{database=\"${dbname}\",type=\"${btype}\"} ${latest_ts[$key]}\n"
+ done
+
+ output+="# HELP db_backup_age_seconds Seconds since most recent backup.\n"
+ output+="# TYPE db_backup_age_seconds gauge\n"
+ for key in "${!latest_ts[@]}"; do
+ local dbname="${key%%|*}"
+ local btype="${key##*|}"
+ local age=$(( now - latest_ts[$key] ))
+ output+="db_backup_age_seconds{database=\"${dbname}\",type=\"${btype}\"} ${age}\n"
+ done
+
+ output+="# HELP db_backup_size_bytes Size of most recent backup file in bytes.\n"
+ output+="# TYPE db_backup_size_bytes gauge\n"
+ for key in "${!latest_size[@]}"; do
+ local dbname="${key%%|*}"
+ local btype="${key##*|}"
+ output+="db_backup_size_bytes{database=\"${dbname}\",type=\"${btype}\"} ${latest_size[$key]}\n"
+ done
+
+ output+="# HELP db_backup_count Number of backup files found.\n"
+ output+="# TYPE db_backup_count gauge\n"
+ for key in "${!file_count[@]}"; do
+ local dbname="${key%%|*}"
+ local btype="${key##*|}"
+ output+="db_backup_count{database=\"${dbname}\",type=\"${btype}\"} ${file_count[$key]}\n"
+ done
+
+ output+="# HELP db_backup_fresh 1 if backup is within max_age, 0 if stale.\n"
+ output+="# TYPE db_backup_fresh gauge\n"
+ for key in "${!latest_ts[@]}"; do
+ local dbname="${key%%|*}"
+ local btype="${key##*|}"
+ local age=$(( now - latest_ts[$key] ))
+ local fresh=1
+ if (( age > max_age )); then
+ fresh=0
+ log_warn "Stale backup: database=${dbname} type=${btype} age=${age}s exceeds max_age=${max_age}s"
+ fi
+ output+="db_backup_fresh{database=\"${dbname}\",type=\"${btype}\"} ${fresh}\n"
+ done
+
+ output+="# HELP db_backup_exporter_last_run Timestamp of last exporter run.\n"
+ output+="# TYPE db_backup_exporter_last_run gauge\n"
+ output+="db_backup_exporter_last_run ${now}\n"
+
+ echo "$output"
+}
+
+# -----------------------------
+# Write metrics atomically
+# -----------------------------
+write_metrics() {
+ local content="$1"
+ local prom_file="$2"
+
+ local prom_dir
+ prom_dir="$(dirname "$prom_file")"
+
+ if [[ ! -d "$prom_dir" ]]; then
+ log_error "Prom directory does not exist: $prom_dir"
+ return 1
+ fi
+
+ local tmp_file
+ tmp_file="$(mktemp "${prom_dir}/.database_backups.prom.XXXXXX")"
+
+ echo -e "$content" > "$tmp_file"
+ mv "$tmp_file" "$prom_file"
+
+ log_info "Metrics written to $prom_file"
+}
+
+# -----------------------------
+# Main
+# -----------------------------
+main() {
+ parse_args "$@"
+
+ log_info "Database Backup Exporter starting"
+ log_info "Backup directory: $BACKUP_DIR"
+ log_info "Max backup age: ${MAX_AGE}s"
+ log_info "Prom file: $PROM_FILE"
+
+ while true; do
+ local metrics
+ metrics="$(collect_metrics "$BACKUP_DIR" "$MAX_AGE")" || true
+
+ if [[ -n "$metrics" ]]; then
+ write_metrics "$metrics" "$PROM_FILE"
+ fi
+
+ if [[ "$RUN_ONCE" == true ]]; then
+ log_info "Single run complete, exiting"
+ break
+ fi
+
+ log_info "Sleeping ${INTERVAL}s until next collection"
+ sleep "$INTERVAL"
+ done
+}
+
+main "$@"
diff --git a/database-smoke-tests.sh b/database-smoke-tests.sh
new file mode 100644
index 0000000..080c980
--- /dev/null
+++ b/database-smoke-tests.sh
@@ -0,0 +1,573 @@
+#!/usr/bin/env bash
+
+#####################################################################################
+#### database-smoke-tests.sh — Verify database health ####
+#### Checks connectivity, auth, replication, backup age, bloat, connections. ####
+#### Supports: PostgreSQL, MySQL/MariaDB, Redis ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.0 ####
+#### ####
+#### Usage: DB_TYPE=postgresql DB_HOST=localhost ./database-smoke-tests.sh ####
+#### DB_TYPE=redis REDIS_HOST=localhost ./database-smoke-tests.sh ####
+#### ####
+#### See --help for all options. ####
+#####################################################################################
+
+set -euo pipefail
+
+# ---------------------------------------------------------------------------
+# Help
+# ---------------------------------------------------------------------------
+show_help() {
+ cat <<'EOF'
+database-smoke-tests.sh — Database health smoke testing
+
+ENVIRONMENT VARIABLES:
+ DB_TYPE (required) postgresql | mysql | redis
+ DB_HOST Database host (default: localhost)
+ DB_PORT Database port (default: auto — 5432/3306/6379)
+ DB_USER Database user (default: postgres | root | "")
+ DB_PASS Database password (default: "")
+ DB_NAME Database name (default: postgres | mysql)
+
+ REDIS_HOST Redis host (falls back to DB_HOST)
+ REDIS_PORT Redis port (falls back to DB_PORT)
+ REDIS_AUTH Redis auth (falls back to DB_PASS)
+
+ MAX_REPLICATION_LAG_S Max replication lag in seconds (default: 30)
+ MAX_BACKUP_AGE_H Max backup / last-save age in hours (default: 26)
+ MAX_CONNECTIONS_PCT Connection usage threshold % (default: 80)
+ SKIP_REPLICATION Skip replication checks (default: false)
+ SKIP_BACKUP_AGE Skip backup-age checks (default: false)
+
+ OUTPUT_FORMAT text | tap (default: text)
+ COLOR auto | always | never (default: auto)
+ VERBOSE true | false (default: false)
+
+EXAMPLES:
+ DB_TYPE=postgresql DB_HOST=db1 DB_PASS=secret ./database-smoke-tests.sh
+ DB_TYPE=mysql DB_HOST=db2 DB_USER=app DB_NAME=mydb ./database-smoke-tests.sh
+ DB_TYPE=redis REDIS_HOST=cache1 REDIS_AUTH=pass ./database-smoke-tests.sh
+EOF
+ exit 0
+}
+
+[[ "${1:-}" == "--help" || "${1:-}" == "-h" ]] && show_help
+
+# ---------------------------------------------------------------------------
+# Environment defaults
+# ---------------------------------------------------------------------------
+DB_TYPE="${DB_TYPE:-}"
+DB_HOST="${DB_HOST:-localhost}"
+DB_PORT="${DB_PORT:-}"
+DB_USER="${DB_USER:-}"
+DB_PASS="${DB_PASS:-}"
+DB_NAME="${DB_NAME:-}"
+
+REDIS_HOST="${REDIS_HOST:-$DB_HOST}"
+REDIS_PORT="${REDIS_PORT:-${DB_PORT:-6379}}"
+REDIS_AUTH="${REDIS_AUTH:-$DB_PASS}"
+
+MAX_REPLICATION_LAG_S="${MAX_REPLICATION_LAG_S:-30}"
+MAX_BACKUP_AGE_H="${MAX_BACKUP_AGE_H:-26}"
+MAX_CONNECTIONS_PCT="${MAX_CONNECTIONS_PCT:-80}"
+SKIP_REPLICATION="${SKIP_REPLICATION:-false}"
+SKIP_BACKUP_AGE="${SKIP_BACKUP_AGE:-false}"
+
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+COLOR="${COLOR:-auto}"
+VERBOSE="${VERBOSE:-false}"
+
+# ---------------------------------------------------------------------------
+# Apply per-engine defaults after DB_TYPE is known
+# ---------------------------------------------------------------------------
+apply_defaults() {
+ case "$DB_TYPE" in
+ postgresql)
+ DB_PORT="${DB_PORT:-5432}"
+ DB_USER="${DB_USER:-postgres}"
+ DB_NAME="${DB_NAME:-postgres}"
+ ;;
+ mysql)
+ DB_PORT="${DB_PORT:-3306}"
+ DB_USER="${DB_USER:-root}"
+ DB_NAME="${DB_NAME:-mysql}"
+ ;;
+ redis)
+ REDIS_PORT="${REDIS_PORT:-6379}"
+ ;;
+ *)
+ echo "ERROR: DB_TYPE must be one of: postgresql, mysql, redis" >&2
+ exit 1
+ ;;
+ esac
+}
+
+# ---------------------------------------------------------------------------
+# Colour setup
+# ---------------------------------------------------------------------------
+setup_colors() {
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ local use_color=false
+ case "$COLOR" in
+ always) use_color=true ;;
+ never) use_color=false ;;
+ auto) [[ -t 1 ]] && use_color=true ;;
+ esac
+ if $use_color; then
+ RED=$'\033[0;31m'
+ GREEN=$'\033[0;32m'
+ YELLOW=$'\033[1;33m'
+ BLUE=$'\033[0;34m'
+ BOLD=$'\033[1m'
+ RESET=$'\033[0m'
+ fi
+}
+
+# ---------------------------------------------------------------------------
+# Counters
+# ---------------------------------------------------------------------------
+PASS_COUNT=0
+FAIL_COUNT=0
+SKIP_COUNT=0
+TEST_NUM=0
+
+# ---------------------------------------------------------------------------
+# run_test "description" command...
+# ---------------------------------------------------------------------------
+run_test() {
+ local desc="$1"; shift
+ TEST_NUM=$((TEST_NUM + 1))
+ local output rc
+ output=$("$@" 2>&1) && rc=0 || rc=$?
+
+ if [[ $rc -eq 0 ]]; then
+ PASS_COUNT=$((PASS_COUNT + 1))
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "ok $TEST_NUM - $desc"
+ else
+ echo " ${GREEN}PASS${RESET} $desc"
+ fi
+ elif [[ $rc -eq 2 ]]; then
+ SKIP_COUNT=$((SKIP_COUNT + 1))
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "ok $TEST_NUM - $desc # SKIP ${output:-skipped}"
+ else
+ echo " ${YELLOW}SKIP${RESET} $desc — ${output:-skipped}"
+ fi
+ else
+ FAIL_COUNT=$((FAIL_COUNT + 1))
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "not ok $TEST_NUM - $desc"
+ [[ -n "$output" ]] && echo "# $output"
+ else
+ echo " ${RED}FAIL${RESET} $desc"
+ [[ -n "$output" ]] && echo " $output"
+ fi
+ fi
+
+ if [[ "$VERBOSE" == "true" && -n "$output" && $rc -eq 0 ]]; then
+ echo " ${BLUE}→${RESET} $output"
+ fi
+}
+
+# ---------------------------------------------------------------------------
+# skip_test "description" "reason"
+# ---------------------------------------------------------------------------
+skip_test() {
+ local desc="$1" reason="${2:-skipped}"
+ TEST_NUM=$((TEST_NUM + 1))
+ SKIP_COUNT=$((SKIP_COUNT + 1))
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "ok $TEST_NUM - $desc # SKIP $reason"
+ else
+ echo " ${YELLOW}SKIP${RESET} $desc — $reason"
+ fi
+}
+
+# ---------------------------------------------------------------------------
+# check_port host port [timeout]
+# ---------------------------------------------------------------------------
+check_port() {
+ local host="$1" port="$2" timeout="${3:-5}"
+ if command -v nc &>/dev/null; then
+ nc -z -w "$timeout" "$host" "$port" 2>/dev/null
+ elif [[ -e /dev/tcp ]]; then
+ timeout "$timeout" bash -c "echo >/dev/tcp/$host/$port" 2>/dev/null
+ else
+ (echo >/dev/tcp/"$host"/"$port") 2>/dev/null
+ fi
+}
+
+# ---------------------------------------------------------------------------
+# Helper: build psql / mysql invocations
+# ---------------------------------------------------------------------------
+run_psql() {
+ PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" \
+ -d "${1:-$DB_NAME}" -t -A -c "$2" 2>&1
+}
+
+run_mysql() {
+ MYSQL_PWD="$DB_PASS" mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" \
+ -D "${1:-$DB_NAME}" -N -s -e "$2" 2>&1
+}
+
+run_redis() {
+ local auth_args=()
+ [[ -n "$REDIS_AUTH" ]] && auth_args=(-a "$REDIS_AUTH" --no-auth-warning)
+ redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" "${auth_args[@]}" "$@" 2>&1
+}
+
+# ===========================================================================
+# PostgreSQL tests
+# ===========================================================================
+run_postgresql_tests() {
+ echo "${BOLD}PostgreSQL smoke tests — ${DB_HOST}:${DB_PORT}${RESET}"
+ echo ""
+
+ # 1. TCP connectivity
+ run_test "TCP connectivity to ${DB_HOST}:${DB_PORT}" \
+ check_port "$DB_HOST" "$DB_PORT"
+
+ # 2. Authentication
+ run_test "Authentication as ${DB_USER}" \
+ bash -c 'PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c "SELECT 1" >/dev/null'
+
+ # 3. Version
+ run_test "Server version" \
+ bash -c '
+ ver=$(PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c "SHOW server_version" 2>&1)
+ echo "PostgreSQL $ver"
+ '
+
+ # 4. Database accessible
+ run_test "Database '${DB_NAME}' accessible" \
+ bash -c 'PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c "SELECT current_database()" >/dev/null'
+
+ # 5. Replication lag
+ if [[ "$SKIP_REPLICATION" == "true" ]]; then
+ skip_test "Replication lag" "SKIP_REPLICATION=true"
+ else
+ run_test "Replication lag < ${MAX_REPLICATION_LAG_S}s" \
+ bash -c '
+ is_replica=$(PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c "SELECT pg_is_in_recovery()" 2>&1)
+ if [[ "$is_replica" == "t" ]]; then
+ lag=$(PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c \
+ "SELECT COALESCE(EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::int, 0)" 2>&1)
+ if [[ "$lag" -gt '"$MAX_REPLICATION_LAG_S"' ]]; then
+ echo "lag=${lag}s exceeds ${'"$MAX_REPLICATION_LAG_S"'}s"; exit 1
+ fi
+ echo "replica lag=${lag}s"
+ else
+ echo "not a replica"; exit 2
+ fi
+ '
+ fi
+
+ # 6. Connection count
+ run_test "Connection usage < ${MAX_CONNECTIONS_PCT}%" \
+ bash -c '
+ read -r used max_c <<< $(PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c \
+ "SELECT sum(numbackends), (SELECT setting::int FROM pg_settings WHERE name='"'"'max_connections'"'"') FROM pg_stat_database" 2>&1 | tr "|" " ")
+ pct=$((used * 100 / max_c))
+ if [[ $pct -ge '"$MAX_CONNECTIONS_PCT"' ]]; then
+ echo "${used}/${max_c} (${pct}%)"; exit 1
+ fi
+ echo "${used}/${max_c} (${pct}%)"
+ '
+
+ # 7. Long-running queries
+ run_test "No queries running > 300s" \
+ bash -c '
+ count=$(PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c \
+ "SELECT count(*) FROM pg_stat_activity WHERE state='"'"'active'"'"' AND now()-query_start > interval '"'"'300 seconds'"'"' AND pid <> pg_backend_pid()" 2>&1)
+ if [[ "$count" -gt 0 ]]; then
+ echo "${count} long-running queries found"; exit 1
+ fi
+ echo "none"
+ '
+
+ # 8. Table bloat
+ run_test "Table bloat (dead tuple ratio < 20%)" \
+ bash -c '
+ worst=$(PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c \
+ "SELECT schemaname||'"'"'.'"'"'||relname||'"'"' '"'"'||round(100.0*n_dead_tup/(n_live_tup+n_dead_tup+1),1)||'"'"'%'"'"' FROM pg_stat_user_tables WHERE n_live_tup+n_dead_tup>1000 AND 100.0*n_dead_tup/(n_live_tup+n_dead_tup+1)>20 ORDER BY n_dead_tup DESC LIMIT 3" 2>&1)
+ if [[ -n "$worst" ]]; then
+ echo "bloated: $worst"; exit 1
+ fi
+ echo "ok"
+ '
+
+ # 9. Disk usage
+ run_test "Disk usage for '${DB_NAME}'" \
+ bash -c '
+ size=$(PGPASSWORD="'"$DB_PASS"'" psql -h "'"$DB_HOST"'" -p "'"$DB_PORT"'" -U "'"$DB_USER"'" -d "'"$DB_NAME"'" -t -A -c \
+ "SELECT pg_size_pretty(pg_database_size(current_database()))" 2>&1)
+ echo "$size"
+ '
+}
+
+# ===========================================================================
+# MySQL / MariaDB tests
+# ===========================================================================
+run_mysql_tests() {
+ echo "${BOLD}MySQL smoke tests — ${DB_HOST}:${DB_PORT}${RESET}"
+ echo ""
+
+ # 1. TCP connectivity
+ run_test "TCP connectivity to ${DB_HOST}:${DB_PORT}" \
+ check_port "$DB_HOST" "$DB_PORT"
+
+ # 2. Authentication
+ run_test "Authentication as ${DB_USER}" \
+ bash -c 'MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -e "SELECT 1" >/dev/null'
+
+ # 3. Version
+ run_test "Server version" \
+ bash -c '
+ ver=$(MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -N -s -e "SELECT version()" 2>&1)
+ echo "MySQL $ver"
+ '
+
+ # 4. Database accessible
+ run_test "Database '${DB_NAME}' accessible" \
+ bash -c 'MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -D "'"$DB_NAME"'" -e "SELECT 1" >/dev/null'
+
+ # 5. Replication lag
+ if [[ "$SKIP_REPLICATION" == "true" ]]; then
+ skip_test "Replication lag" "SKIP_REPLICATION=true"
+ else
+ run_test "Replication lag < ${MAX_REPLICATION_LAG_S}s" \
+ bash -c '
+ status=$(MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -N -s -e "SHOW REPLICA STATUS\G" 2>&1)
+ if [[ -z "$status" ]]; then
+ status=$(MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -N -s -e "SHOW SLAVE STATUS\G" 2>&1)
+ fi
+ if [[ -z "$status" ]]; then
+ echo "not a replica"; exit 2
+ fi
+ lag=$(echo "$status" | grep -i "Seconds_Behind" | awk "{print \$NF}")
+ if [[ "$lag" == "NULL" || -z "$lag" ]]; then
+ echo "replication not running (lag=NULL)"; exit 1
+ fi
+ if [[ "$lag" -gt '"$MAX_REPLICATION_LAG_S"' ]]; then
+ echo "lag=${lag}s exceeds '"$MAX_REPLICATION_LAG_S"'s"; exit 1
+ fi
+ echo "replica lag=${lag}s"
+ '
+ fi
+
+ # 6. Connection count
+ run_test "Connection usage < ${MAX_CONNECTIONS_PCT}%" \
+ bash -c '
+ used=$(MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -N -s -e "SELECT count(*) FROM information_schema.processlist" 2>&1)
+ max_c=$(MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -N -s -e "SHOW VARIABLES LIKE '"'"'max_connections'"'"'" 2>&1 | awk "{print \$2}")
+ pct=$((used * 100 / max_c))
+ if [[ $pct -ge '"$MAX_CONNECTIONS_PCT"' ]]; then
+ echo "${used}/${max_c} (${pct}%)"; exit 1
+ fi
+ echo "${used}/${max_c} (${pct}%)"
+ '
+
+ # 7. Slow query log
+ run_test "Slow query log enabled" \
+ bash -c '
+ val=$(MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -N -s -e "SHOW VARIABLES LIKE '"'"'slow_query_log'"'"'" 2>&1 | awk "{print \$2}")
+ if [[ "$val" != "ON" ]]; then
+ echo "slow_query_log=$val"; exit 1
+ fi
+ echo "enabled"
+ '
+
+ # 8. Binary log space
+ run_test "Binary log disk usage" \
+ bash -c '
+ logs=$(MYSQL_PWD="'"$DB_PASS"'" mysql -h "'"$DB_HOST"'" -P "'"$DB_PORT"'" -u "'"$DB_USER"'" -N -s -e "SHOW BINARY LOGS" 2>&1)
+ if [[ "$logs" == *"not enabled"* || -z "$logs" ]]; then
+ echo "binary logging disabled"; exit 2
+ fi
+ total=$(echo "$logs" | awk "{s+=\$2} END {printf \"%.1f MB\", s/1048576}")
+ echo "$total"
+ '
+}
+
+# ===========================================================================
+# Redis tests
+# ===========================================================================
+run_redis_tests() {
+ echo "${BOLD}Redis smoke tests — ${REDIS_HOST}:${REDIS_PORT}${RESET}"
+ echo ""
+
+ local auth_args=()
+ [[ -n "$REDIS_AUTH" ]] && auth_args=(-a "$REDIS_AUTH" --no-auth-warning)
+
+ # 1. TCP connectivity
+ run_test "TCP connectivity to ${REDIS_HOST}:${REDIS_PORT}" \
+ check_port "$REDIS_HOST" "$REDIS_PORT"
+
+ # 2. PING/PONG
+ run_test "PING/PONG" \
+ bash -c '
+ reply=$(redis-cli -h "'"$REDIS_HOST"'" -p "'"$REDIS_PORT"'" '"$(printf '%q ' "${auth_args[@]}")"' PING 2>&1)
+ if [[ "$reply" != "PONG" ]]; then
+ echo "got: $reply"; exit 1
+ fi
+ echo "PONG"
+ '
+
+ # 3. Server info
+ run_test "Server info" \
+ bash -c '
+ info=$(redis-cli -h "'"$REDIS_HOST"'" -p "'"$REDIS_PORT"'" '"$(printf '%q ' "${auth_args[@]}")"' INFO server 2>&1)
+ ver=$(echo "$info" | grep "^redis_version:" | cut -d: -f2 | tr -d "\r")
+ up=$(echo "$info" | grep "^uptime_in_days:" | cut -d: -f2 | tr -d "\r")
+ echo "v${ver}, uptime ${up}d"
+ '
+
+ # 4. Memory usage
+ run_test "Memory usage vs maxmemory" \
+ bash -c '
+ info=$(redis-cli -h "'"$REDIS_HOST"'" -p "'"$REDIS_PORT"'" '"$(printf '%q ' "${auth_args[@]}")"' INFO memory 2>&1)
+ used=$(echo "$info" | grep "^used_memory_human:" | cut -d: -f2 | tr -d "\r")
+ max_raw=$(echo "$info" | grep "^maxmemory:" | cut -d: -f2 | tr -d "\r")
+ max_h=$(echo "$info" | grep "^maxmemory_human:" | cut -d: -f2 | tr -d "\r")
+ if [[ "$max_raw" == "0" ]]; then
+ echo "used=${used}, maxmemory=unlimited"; exit 0
+ fi
+ echo "used=${used}, max=${max_h}"
+ '
+
+ # 5. Connected clients
+ run_test "Connected clients" \
+ bash -c '
+ info=$(redis-cli -h "'"$REDIS_HOST"'" -p "'"$REDIS_PORT"'" '"$(printf '%q ' "${auth_args[@]}")"' INFO clients 2>&1)
+ count=$(echo "$info" | grep "^connected_clients:" | cut -d: -f2 | tr -d "\r")
+ echo "${count} clients"
+ '
+
+ # 6. Replication status
+ if [[ "$SKIP_REPLICATION" == "true" ]]; then
+ skip_test "Replication status" "SKIP_REPLICATION=true"
+ else
+ run_test "Replication status" \
+ bash -c '
+ info=$(redis-cli -h "'"$REDIS_HOST"'" -p "'"$REDIS_PORT"'" '"$(printf '%q ' "${auth_args[@]}")"' INFO replication 2>&1)
+ role=$(echo "$info" | grep "^role:" | cut -d: -f2 | tr -d "\r")
+ if [[ "$role" == "master" ]]; then
+ slaves=$(echo "$info" | grep "^connected_slaves:" | cut -d: -f2 | tr -d "\r")
+ echo "role=master, replicas=${slaves}"
+ elif [[ "$role" == "slave" ]]; then
+ link=$(echo "$info" | grep "^master_link_status:" | cut -d: -f2 | tr -d "\r")
+ if [[ "$link" != "up" ]]; then
+ echo "replica link $link"; exit 1
+ fi
+ echo "role=replica, link=up"
+ else
+ echo "role=$role"
+ fi
+ '
+ fi
+
+ # 7. Last save time
+ if [[ "$SKIP_BACKUP_AGE" == "true" ]]; then
+ skip_test "Last RDB/AOF save" "SKIP_BACKUP_AGE=true"
+ else
+ run_test "Last RDB save < ${MAX_BACKUP_AGE_H}h" \
+ bash -c '
+ info=$(redis-cli -h "'"$REDIS_HOST"'" -p "'"$REDIS_PORT"'" '"$(printf '%q ' "${auth_args[@]}")"' INFO persistence 2>&1)
+ last_save=$(echo "$info" | grep "^rdb_last_save_time:" | cut -d: -f2 | tr -d "\r")
+ if [[ -z "$last_save" || "$last_save" == "0" ]]; then
+ echo "no RDB save recorded"; exit 2
+ fi
+ now=$(date +%s)
+ age_h=$(( (now - last_save) / 3600 ))
+ if [[ $age_h -gt '"$MAX_BACKUP_AGE_H"' ]]; then
+ echo "last save ${age_h}h ago (max '"$MAX_BACKUP_AGE_H"'h)"; exit 1
+ fi
+ echo "last save ${age_h}h ago"
+ '
+ fi
+
+ # 8. Keyspace
+ run_test "Keyspace info" \
+ bash -c '
+ info=$(redis-cli -h "'"$REDIS_HOST"'" -p "'"$REDIS_PORT"'" '"$(printf '%q ' "${auth_args[@]}")"' INFO keyspace 2>&1)
+ dbs=$(echo "$info" | grep "^db[0-9]" || true)
+ if [[ -z "$dbs" ]]; then
+ echo "no databases with keys"; exit 2
+ fi
+ total=0
+ while IFS= read -r line; do
+ keys=$(echo "$line" | grep -oP "keys=\K[0-9]+")
+ total=$((total + keys))
+ done <<< "$dbs"
+ echo "${total} keys across $(echo "$dbs" | wc -l) database(s)"
+ '
+
+ # 9. Eviction policy
+ run_test "Eviction policy" \
+ bash -c '
+ policy=$(redis-cli -h "'"$REDIS_HOST"'" -p "'"$REDIS_PORT"'" '"$(printf '%q ' "${auth_args[@]}")"' CONFIG GET maxmemory-policy 2>&1 | tail -1)
+ echo "policy=$policy"
+ '
+}
+
+# ===========================================================================
+# Summary
+# ===========================================================================
+print_summary() {
+ local total=$((PASS_COUNT + FAIL_COUNT + SKIP_COUNT))
+ echo ""
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "1..$total"
+ echo "# pass $PASS_COUNT"
+ echo "# fail $FAIL_COUNT"
+ echo "# skip $SKIP_COUNT"
+ else
+ echo "${BOLD}───────────────────────────────────────${RESET}"
+ echo " ${GREEN}PASS${RESET} $PASS_COUNT"
+ echo " ${RED}FAIL${RESET} $FAIL_COUNT"
+ echo " ${YELLOW}SKIP${RESET} $SKIP_COUNT"
+ echo " Total $total"
+ echo "${BOLD}───────────────────────────────────────${RESET}"
+ if [[ $FAIL_COUNT -gt 0 ]]; then
+ echo " ${RED}${BOLD}RESULT: FAIL${RESET}"
+ else
+ echo " ${GREEN}${BOLD}RESULT: PASS${RESET}"
+ fi
+ fi
+}
+
+# ===========================================================================
+# Main
+# ===========================================================================
+main() {
+ if [[ -z "$DB_TYPE" ]]; then
+ echo "ERROR: DB_TYPE is required (postgresql, mysql, redis)" >&2
+ echo "Run with --help for usage information." >&2
+ exit 1
+ fi
+
+ apply_defaults
+ setup_colors
+
+ case "$DB_TYPE" in
+ postgresql) run_postgresql_tests ;;
+ mysql) run_mysql_tests ;;
+ redis) run_redis_tests ;;
+ *)
+ echo "ERROR: Unsupported DB_TYPE '${DB_TYPE}'" >&2
+ exit 1
+ ;;
+ esac
+
+ print_summary
+
+ [[ $FAIL_COUNT -gt 0 ]] && exit 1
+ exit 0
+}
+
+main "$@"
diff --git a/deploy-exporter.sh b/deploy-exporter.sh
new file mode 100755
index 0000000..cbe721b
--- /dev/null
+++ b/deploy-exporter.sh
@@ -0,0 +1,588 @@
+#!/bin/bash
+################################################################################
+# Script Name: deploy-exporter.sh
+# Version: 1.0
+# Description: Deployment tool for Prometheus exporters from mylinux.work.
+# Downloads, installs, configures cron jobs, validates output,
+# and manages lifecycle (install, update, remove, status) for
+# any exporter script hosted at mylinux.work/downloads/.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - wget or curl
+# - root access (for /usr/local/bin/ and /etc/cron.d/)
+#
+# Usage:
+# deploy-exporter.sh list # list available
+# deploy-exporter.sh install process-metrics-exporter # install one
+# deploy-exporter.sh install process-metrics-exporter --cron "*/3 * * * *"
+# deploy-exporter.sh install process-metrics-exporter journal-error-exporter
+# deploy-exporter.sh status # check installed
+# deploy-exporter.sh remove process-metrics-exporter # remove one
+# deploy-exporter.sh update # update all
+#
+################################################################################
+
+set -uo pipefail
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+BASE_URL="https://mylinux.work/downloads"
+INSTALL_DIR="/usr/local/bin"
+CRON_DIR="/etc/cron.d"
+TEXTFILE_DIR="/var/lib/node_exporter"
+
+# ============================================================================
+# AVAILABLE EXPORTERS
+# ============================================================================
+
+declare -A EXPORTER_DESC=(
+ [alertmanager-exporter]="Alertmanager notification and silence metrics"
+ [apache-metrics-exporter]="Apache HTTP server performance metrics"
+ [apt-updates-exporter]="Pending apt package updates"
+ [artifactory-exporter]="JFrog Artifactory repository metrics"
+ [backup-status-exporter]="Backup job status and age metrics"
+ [borg-backup-exporter]="Borg backup repository and archive metrics"
+ [caprover-exporter]="CapRover app deployment metrics"
+ [clickhouse-exporter]="ClickHouse query, memory, merge, and replication metrics"
+ [consul-exporter]="HashiCorp Consul service health metrics"
+ [container-health-exporter]="Docker container health and resource metrics"
+ [coolify-exporter]="Coolify deployment platform metrics"
+ [dokku-exporter]="Dokku deployment platform metrics"
+ [dokploy-exporter]="Dokploy deployment platform metrics"
+ [cron-job-exporter]="Cron job execution status and timing"
+ [crowdsec-decisions-exporter]="CrowdSec active decisions and ban metrics"
+ [crowdsec-exporter]="CrowdSec intrusion detection metrics"
+ [database-backup-exporter]="Database backup status and size metrics"
+ [dhcp-lease-exporter]="DHCP lease allocation metrics"
+ [directory-size-exporter]="Directory size and file count metrics"
+ [disk-io-exporter]="Disk I/O throughput and latency metrics"
+ [docker-swarm-exporter]="Docker Swarm node and service metrics"
+ [dovecot-metrics-exporter]="Dovecot mail server metrics"
+ [duplicati-exporter]="Duplicati backup job metrics"
+ [elasticsearch-exporter]="Elasticsearch cluster health and index metrics"
+ [fail2ban-exporter]="Fail2ban jail and ban metrics"
+ [freeradius-exporter]="FreeRADIUS authentication metrics"
+ [game-server-exporter]="Game server player count and status metrics"
+ [gitea-exporter]="Gitea repository and user metrics"
+ [glpi-exporter]="GLPI ITSM ticket and asset metrics"
+ [gitlab-metrics-exporter]="GitLab instance performance metrics"
+ [gitlab-migration-exporter]="GitLab migration progress metrics"
+ [gpu-exporter]="GPU utilization and temperature metrics"
+ [graylog-exporter]="Graylog log management metrics"
+ [headscale-metrics-exporter]="Headscale coordination server metrics"
+ [ip-intel-exporter]="IP intelligence from nginx access logs"
+ [jenkins-exporter]="Jenkins build and queue metrics"
+ [incus-metrics-exporter]="Incus storage pool, snapshot, and instance inventory metrics"
+ [journal-error-exporter]="Journalctl error and warning metrics"
+ [keepalived-exporter]="Keepalived VRRP failover metrics"
+ [login-attempt-exporter]="SSH and system login attempt metrics"
+ [ufw-blocklist-metrics]="UFW blocklist feed, ipset, and block count metrics"
+ [users-logged-in]="User login sessions, terminals, sudo, and failed login metrics"
+ [logrotate-check-exporter]="Logrotate configuration health metrics"
+ [lynis-metrics-exporter]="Lynis security audit score metrics"
+ [mailcow-exporter]="Mailcow mail server metrics"
+ [memory-pressure-exporter]="Memory pressure and swap usage metrics"
+ [mysql-exporter]="MySQL/MariaDB performance metrics"
+ [n8n-exporter]="n8n workflow automation metrics"
+ [network-info-exporter]="Network interface and routing metrics"
+ [nexus-exporter]="Sonatype Nexus Repository metrics"
+ [nextcloud-exporter]="Nextcloud instance health metrics"
+ [nfs-exporter]="NFS client mount and performance metrics"
+ [nfs-server-exporter]="NFS server export and connection metrics"
+ [nginx-metrics-exporter]="Nginx connection and request metrics"
+ [ntp-drift-exporter]="NTP clock drift and sync metrics"
+ [ollama-exporter]="Ollama LLM model and inference metrics"
+ [openvpn-exporter]="OpenVPN tunnel and client metrics"
+ [password-expiry-exporter]="System user password expiry metrics"
+ [pihole-exporter]="Pi-hole DNS filtering metrics"
+ [plex-exporter]="Plex media server activity metrics"
+ [podman-container-exporter]="Podman container health and resource metrics"
+ [postgresql-exporter]="PostgreSQL database performance metrics"
+ [postgresql-ha-exporter]="PostgreSQL HA replication metrics"
+ [process-metrics-exporter]="Process CPU/memory/state metrics"
+ [textfile-health-exporter]="Textfile collector health monitoring"
+ [rabbitmq-exporter]="RabbitMQ queue and connection metrics"
+ [redis-metrics-exporter]="Redis server performance metrics"
+ [redis-sentinel-exporter]="Redis Sentinel failover metrics"
+ [restic-backup-exporter]="Restic backup snapshot and size metrics"
+ [rsyslog-metrics-exporter]="Rsyslog message processing metrics"
+ [samba-exporter]="Samba file share and session metrics"
+ [seo-exporter]="SEO health and crawl metrics"
+ [smart-drive-exporter]="SMART disk health and temperature metrics"
+ [snipeit-exporter]="Snipe-IT asset management metrics"
+ [sonarqube-exporter]="SonarQube code quality metrics"
+ [squid-exporter]="Squid proxy cache and request metrics"
+ [storage-health-exporter]="Storage pool and volume health metrics"
+ [suricata-exporter]="Suricata IDS/IPS alert metrics"
+ [syncthing-exporter]="Syncthing folder sync and device metrics"
+ [systemd-boot-time-exporter]="Systemd boot and service startup timing"
+ [systemd-service-exporter]="Systemd service state and restart metrics"
+ [systemd-timer-exporter]="Systemd timer schedule and execution metrics"
+ [tailscale-exporter]="Tailscale node and network metrics"
+ [trivy-cve-auditor]="Trivy container image vulnerability metrics"
+ [vault-exporter]="HashiCorp Vault seal and token metrics"
+ [vaultwarden-exporter]="Vaultwarden password manager metrics"
+ [wazuh-exporter]="Wazuh SIEM alert and agent metrics"
+ [web-traffic-exporter]="Web traffic request and response metrics"
+ [webtop-selkies-exporter]="Webtop and Selkies container desktop metrics"
+ [wickr-io-exporter]="Wickr.io bot and message metrics"
+ [wickr-metrics-exporter]="Wickr messaging platform metrics"
+ [wireguard-exporter]="WireGuard tunnel and peer metrics"
+ [yum-updates-exporter]="Pending yum/dnf package updates"
+)
+
+declare -A EXPORTER_CRON=(
+ [alertmanager-exporter]="*/5 * * * *"
+ [apache-metrics-exporter]="*/3 * * * *"
+ [apt-updates-exporter]="0 0 * * *"
+ [artifactory-exporter]="*/5 * * * *"
+ [backup-status-exporter]="*/15 * * * *"
+ [borg-backup-exporter]="*/15 * * * *"
+ [caprover-exporter]="*/5 * * * *"
+ [clickhouse-exporter]="*/3 * * * *"
+ [consul-exporter]="*/3 * * * *"
+ [container-health-exporter]="*/3 * * * *"
+ [coolify-exporter]="*/5 * * * *"
+ [dokku-exporter]="*/5 * * * *"
+ [dokploy-exporter]="*/5 * * * *"
+ [cron-job-exporter]="*/5 * * * *"
+ [crowdsec-decisions-exporter]="*/5 * * * *"
+ [crowdsec-exporter]="*/5 * * * *"
+ [database-backup-exporter]="*/15 * * * *"
+ [dhcp-lease-exporter]="*/5 * * * *"
+ [directory-size-exporter]="*/15 * * * *"
+ [disk-io-exporter]="*/3 * * * *"
+ [docker-swarm-exporter]="*/3 * * * *"
+ [dovecot-metrics-exporter]="*/5 * * * *"
+ [duplicati-exporter]="*/15 * * * *"
+ [elasticsearch-exporter]="*/3 * * * *"
+ [fail2ban-exporter]="*/5 * * * *"
+ [freeradius-exporter]="*/5 * * * *"
+ [game-server-exporter]="*/3 * * * *"
+ [gitea-exporter]="*/5 * * * *"
+ [glpi-exporter]="*/5 * * * *"
+ [gitlab-metrics-exporter]="*/5 * * * *"
+ [gitlab-migration-exporter]="*/5 * * * *"
+ [gpu-exporter]="*/3 * * * *"
+ [graylog-exporter]="*/5 * * * *"
+ [headscale-metrics-exporter]="*/5 * * * *"
+ [ip-intel-exporter]="*/5 * * * *"
+ [jenkins-exporter]="*/5 * * * *"
+ [incus-metrics-exporter]="*/5 * * * *"
+ [journal-error-exporter]="*/5 * * * *"
+ [keepalived-exporter]="*/5 * * * *"
+ [login-attempt-exporter]="*/5 * * * *"
+ [ufw-blocklist-metrics]="*/5 * * * *"
+ [users-logged-in]="*/3 * * * *"
+ [logrotate-check-exporter]="0 */6 * * *"
+ [lynis-metrics-exporter]="0 0 * * *"
+ [mailcow-exporter]="*/5 * * * *"
+ [memory-pressure-exporter]="*/3 * * * *"
+ [mysql-exporter]="*/3 * * * *"
+ [n8n-exporter]="*/5 * * * *"
+ [network-info-exporter]="*/5 * * * *"
+ [nexus-exporter]="*/5 * * * *"
+ [nextcloud-exporter]="*/5 * * * *"
+ [nfs-exporter]="*/5 * * * *"
+ [nfs-server-exporter]="*/5 * * * *"
+ [nginx-metrics-exporter]="*/3 * * * *"
+ [ntp-drift-exporter]="*/5 * * * *"
+ [ollama-exporter]="*/5 * * * *"
+ [openvpn-exporter]="*/5 * * * *"
+ [password-expiry-exporter]="0 0 * * *"
+ [pihole-exporter]="*/5 * * * *"
+ [plex-exporter]="*/5 * * * *"
+ [podman-container-exporter]="*/3 * * * *"
+ [postgresql-exporter]="*/3 * * * *"
+ [postgresql-ha-exporter]="*/3 * * * *"
+ [process-metrics-exporter]="*/3 * * * *"
+ [textfile-health-exporter]="*/5 * * * *"
+ [rabbitmq-exporter]="*/5 * * * *"
+ [redis-metrics-exporter]="*/3 * * * *"
+ [redis-sentinel-exporter]="*/5 * * * *"
+ [restic-backup-exporter]="*/15 * * * *"
+ [rsyslog-metrics-exporter]="*/5 * * * *"
+ [samba-exporter]="*/5 * * * *"
+ [seo-exporter]="0 */6 * * *"
+ [smart-drive-exporter]="*/15 * * * *"
+ [snipeit-exporter]="*/5 * * * *"
+ [sonarqube-exporter]="*/5 * * * *"
+ [squid-exporter]="*/5 * * * *"
+ [storage-health-exporter]="*/15 * * * *"
+ [suricata-exporter]="*/5 * * * *"
+ [syncthing-exporter]="*/5 * * * *"
+ [systemd-boot-time-exporter]="*/15 * * * *"
+ [systemd-service-exporter]="*/5 * * * *"
+ [systemd-timer-exporter]="*/5 * * * *"
+ [tailscale-exporter]="*/5 * * * *"
+ [trivy-cve-auditor]="*/30 * * * *"
+ [vault-exporter]="*/5 * * * *"
+ [vaultwarden-exporter]="*/5 * * * *"
+ [wazuh-exporter]="*/5 * * * *"
+ [web-traffic-exporter]="*/5 * * * *"
+ [webtop-selkies-exporter]="*/3 * * * *"
+ [wickr-io-exporter]="*/5 * * * *"
+ [wickr-metrics-exporter]="*/5 * * * *"
+ [wireguard-exporter]="*/5 * * * *"
+ [yum-updates-exporter]="0 0 * * *"
+)
+
+# ============================================================================
+# HELPERS
+# ============================================================================
+
+log() { echo "# $*"; }
+warn() { echo "# WARN: $*" >&2; }
+err() { echo "# ERROR: $*" >&2; }
+die() { err "$@"; exit 1; }
+
+check_root() {
+ [[ $EUID -eq 0 ]] || die "Must run as root (need write access to ${INSTALL_DIR}/ and ${CRON_DIR}/)"
+}
+
+download() {
+ local url="$1" dest="$2"
+ if command -v wget &>/dev/null; then
+ wget -q -O "$dest" "$url"
+ elif command -v curl &>/dev/null; then
+ curl -fsSL -o "$dest" "$url"
+ else
+ die "Neither wget nor curl found"
+ fi
+}
+
+get_script_version() {
+ local file="$1"
+ grep -m1 '^# Version:' "$file" 2>/dev/null | awk '{print $3}' || echo "unknown"
+}
+
+# ============================================================================
+# LIST
+# ============================================================================
+
+cmd_list() {
+ log "Available exporters from mylinux.work (${#EXPORTER_DESC[@]} total)"
+ log ""
+ printf "# %-40s %-15s %s\n" "EXPORTER" "DEFAULT CRON" "DESCRIPTION"
+ printf "# %-40s %-15s %s\n" "--------" "------------" "-----------"
+
+ for name in $(echo "${!EXPORTER_DESC[@]}" | tr ' ' '\n' | sort); do
+ local cron="${EXPORTER_CRON[$name]:-*/5 * * * *}"
+ local desc="${EXPORTER_DESC[$name]}"
+ local installed=""
+ [[ -f "${INSTALL_DIR}/${name}.sh" ]] && installed=" [installed]"
+ printf "# %-40s %-15s %s%s\n" "$name" "$cron" "$desc" "$installed"
+ done
+}
+
+# ============================================================================
+# INSTALL
+# ============================================================================
+
+cmd_install() {
+ check_root
+
+ local names=()
+ local cron_schedule=""
+
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --cron)
+ cron_schedule="$2"
+ shift 2
+ ;;
+ -*)
+ die "Unknown option: $1"
+ ;;
+ *)
+ names+=("$1")
+ shift
+ ;;
+ esac
+ done
+
+ [[ ${#names[@]} -gt 0 ]] || die "No exporter name(s) specified"
+
+ for name in "${names[@]}"; do
+ install_one "$name" "$cron_schedule"
+ done
+}
+
+install_one() {
+ local name="$1"
+ local cron_schedule="$2"
+ local url="${BASE_URL}/${name}.sh"
+ local dest="${INSTALL_DIR}/${name}.sh"
+ local temp_file
+
+ if [[ -z "${EXPORTER_DESC[$name]+x}" ]]; then
+ warn "Unknown exporter '${name}' — not in the built-in list, attempting download anyway"
+ fi
+
+ log "Installing ${name}..."
+
+ temp_file=$(mktemp "/tmp/${name}.XXXXXX")
+ if ! download "$url" "$temp_file"; then
+ rm -f "$temp_file"
+ err "Failed to download ${url}"
+ return 1
+ fi
+
+ if [[ ! -s "$temp_file" ]]; then
+ rm -f "$temp_file"
+ err "Downloaded file is empty: ${url}"
+ return 1
+ fi
+
+ chmod +x "$temp_file"
+
+ log "Validating ${name}..."
+ local test_output
+ test_output=$("$temp_file" 2>/dev/null || true)
+ local line_count
+ line_count=$(echo "$test_output" | wc -l)
+
+ if [[ "$line_count" -lt 3 ]]; then
+ rm -f "$temp_file"
+ err "Validation failed — ${name} produced only ${line_count} lines of output"
+ return 1
+ fi
+
+ mv -f "$temp_file" "$dest"
+ chmod +x "$dest"
+ log "Installed ${dest}"
+
+ if [[ -n "$cron_schedule" ]]; then
+ local default_cron="$cron_schedule"
+ elif [[ -n "${EXPORTER_CRON[$name]+x}" ]]; then
+ local default_cron="${EXPORTER_CRON[$name]}"
+ log "No --cron specified, using default: ${default_cron}"
+ else
+ local default_cron=""
+ fi
+
+ if [[ -n "$default_cron" ]]; then
+ mkdir -p "$TEXTFILE_DIR"
+ local cron_file="${CRON_DIR}/${name}"
+ cat > "$cron_file" <&1
+EOF
+ log "Created cron job: ${cron_file}"
+ log " Schedule: ${default_cron}"
+ log " Command: ${INSTALL_DIR}/${name}.sh --textfile"
+ fi
+
+ log "${name} installed successfully"
+ log ""
+}
+
+# ============================================================================
+# STATUS
+# ============================================================================
+
+cmd_status() {
+ local found=0
+
+ log "Installed exporters in ${INSTALL_DIR}/:"
+ log ""
+ printf "# %-40s %-12s %-10s %s\n" "EXPORTER" "VERSION" "CRON" "LAST .prom UPDATE"
+ printf "# %-40s %-12s %-10s %s\n" "--------" "-------" "----" "-----------------"
+
+ for script in "${INSTALL_DIR}"/*-exporter.sh; do
+ [[ -f "$script" ]] || continue
+ found=1
+
+ local name
+ name=$(basename "$script" .sh)
+ local version
+ version=$(get_script_version "$script")
+
+ local cron_status="none"
+ [[ -f "${CRON_DIR}/${name}" ]] && cron_status="active"
+
+ local prom_name
+ prom_name=$(echo "$name" | tr '-' '_')
+ local prom_file="${TEXTFILE_DIR}/${prom_name}.prom"
+ local prom_age="no .prom file"
+
+ if [[ -f "$prom_file" ]]; then
+ local mod_time now age_sec
+ mod_time=$(stat -c %Y "$prom_file" 2>/dev/null || echo 0)
+ now=$(date +%s)
+ age_sec=$(( now - mod_time ))
+
+ if [[ $age_sec -lt 60 ]]; then
+ prom_age="${age_sec}s ago"
+ elif [[ $age_sec -lt 3600 ]]; then
+ prom_age="$(( age_sec / 60 ))m ago"
+ elif [[ $age_sec -lt 86400 ]]; then
+ prom_age="$(( age_sec / 3600 ))h ago"
+ else
+ prom_age="$(( age_sec / 86400 ))d ago (STALE)"
+ fi
+ fi
+
+ printf "# %-40s %-12s %-10s %s\n" "$name" "$version" "$cron_status" "$prom_age"
+ done
+
+ if [[ $found -eq 0 ]]; then
+ log "No exporters installed in ${INSTALL_DIR}/"
+ fi
+}
+
+# ============================================================================
+# REMOVE
+# ============================================================================
+
+cmd_remove() {
+ check_root
+ [[ $# -gt 0 ]] || die "No exporter name specified"
+
+ for name in "$@"; do
+ remove_one "$name"
+ done
+}
+
+remove_one() {
+ local name="$1"
+ local script="${INSTALL_DIR}/${name}.sh"
+ local cron_file="${CRON_DIR}/${name}"
+ local prom_name
+ prom_name=$(echo "$name" | tr '-' '_')
+ local prom_file="${TEXTFILE_DIR}/${prom_name}.prom"
+
+ if [[ ! -f "$script" ]]; then
+ warn "${name} is not installed in ${INSTALL_DIR}/"
+ return 1
+ fi
+
+ rm -f "$script"
+ log "Removed ${script}"
+
+ if [[ -f "$cron_file" ]]; then
+ rm -f "$cron_file"
+ log "Removed cron job: ${cron_file}"
+ fi
+
+ if [[ -f "$prom_file" ]]; then
+ rm -f "$prom_file"
+ log "Removed .prom file: ${prom_file}"
+ fi
+
+ log "${name} removed"
+ log ""
+}
+
+# ============================================================================
+# UPDATE
+# ============================================================================
+
+cmd_update() {
+ check_root
+
+ local found=0
+
+ for script in "${INSTALL_DIR}"/*-exporter.sh; do
+ [[ -f "$script" ]] || continue
+ found=1
+
+ local name
+ name=$(basename "$script" .sh)
+ local old_version
+ old_version=$(get_script_version "$script")
+
+ log "Updating ${name} (current: v${old_version})..."
+
+ local url="${BASE_URL}/${name}.sh"
+ local temp_file
+ temp_file=$(mktemp "/tmp/${name}.XXXXXX")
+
+ if ! download "$url" "$temp_file"; then
+ rm -f "$temp_file"
+ err "Failed to download ${name}, skipping"
+ continue
+ fi
+
+ if [[ ! -s "$temp_file" ]]; then
+ rm -f "$temp_file"
+ err "Downloaded file is empty for ${name}, skipping"
+ continue
+ fi
+
+ local new_version
+ new_version=$(get_script_version "$temp_file")
+
+ chmod +x "$temp_file"
+ mv -f "$temp_file" "$script"
+ chmod +x "$script"
+
+ if [[ "$old_version" == "$new_version" ]]; then
+ log "${name}: v${new_version} (unchanged)"
+ else
+ log "${name}: v${old_version} → v${new_version}"
+ fi
+ done
+
+ if [[ $found -eq 0 ]]; then
+ log "No exporters installed in ${INSTALL_DIR}/"
+ fi
+}
+
+# ============================================================================
+# USAGE
+# ============================================================================
+
+show_usage() {
+ cat <&2; }
+info() { echo -e "${BOLD}[INFO]${RESET} $*"; }
+
+usage() {
+ cat </dev/null || true
+ log "Stopped FreshRSS containers"
+ fi
+ fi
+
+ # Remove nginx config
+ for f in /etc/nginx/conf.d/freshrss.conf /etc/nginx/sites-enabled/freshrss.conf /etc/nginx/sites-available/freshrss.conf; do
+ if [[ -f "$f" ]]; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would remove: $f"
+ else
+ rm -f "$f"
+ log "Removed $f"
+ fi
+ fi
+ done
+
+ if [[ "$DRY_RUN" != "true" ]] && command -v nginx &>/dev/null; then
+ nginx -t 2>/dev/null && systemctl reload nginx 2>/dev/null && log "Reloaded Nginx"
+ fi
+
+ echo ""
+ if [[ "$DRY_RUN" != "true" ]]; then
+ log "Containers stopped and Nginx config removed."
+ info "Data preserved at ${INSTALL_DIR}/ - remove manually if desired:"
+ echo " rm -rf ${INSTALL_DIR}"
+ fi
+ exit 0
+fi
+
+# -- Validation --
+
+if [[ -z "$DOMAIN" ]]; then
+ err "Domain is required: --domain rss.example.com"
+ exit 1
+fi
+
+if ! command -v docker &>/dev/null; then
+ err "Docker is not installed. Install Docker first."
+ exit 1
+fi
+
+if ! docker compose version &>/dev/null 2>&1; then
+ err "Docker Compose v2 is not available. Install docker-compose-plugin."
+ exit 1
+fi
+
+# Generate DB password if not provided
+if [[ -z "$DB_PASSWORD" ]]; then
+ DB_PASSWORD=$(openssl rand -base64 24 | tr -d '/+=' | head -c 24)
+fi
+
+# -- Install mode --
+
+info "Deploying FreshRSS..."
+echo ""
+info "Domain: ${DOMAIN}"
+info "Port: ${PORT}"
+info "Install dir: ${INSTALL_DIR}"
+info "Timezone: ${TZ}"
+info "Feed cron: ${CRON_MIN}"
+echo ""
+
+# 1. Create directory
+if [[ -d "$INSTALL_DIR" ]]; then
+ info "Directory ${INSTALL_DIR} already exists"
+else
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would create: ${INSTALL_DIR}"
+ else
+ mkdir -p "$INSTALL_DIR"
+ log "Created ${INSTALL_DIR}"
+ fi
+fi
+
+# 2. Docker Compose file
+COMPOSE_FILE="${INSTALL_DIR}/docker-compose.yml"
+
+if [[ -f "$COMPOSE_FILE" ]]; then
+ info "docker-compose.yml already exists - skipping (delete to recreate)"
+else
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would create: ${COMPOSE_FILE}"
+ else
+ cat > "$COMPOSE_FILE" </dev/null | grep -q '^freshrss$'; then
+ info "FreshRSS container is already running"
+else
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would run: docker compose up -d"
+ else
+ cd "$INSTALL_DIR"
+ docker compose up -d
+ log "Started FreshRSS containers"
+ fi
+fi
+
+# 4. Nginx reverse proxy
+if [[ "$SKIP_NGINX" == "true" ]]; then
+ info "Skipping Nginx config (--skip-nginx)"
+elif ! command -v nginx &>/dev/null; then
+ warn "Nginx not installed - skipping reverse proxy config"
+ SKIP_NGINX=true
+else
+ NGINX_CONF=""
+ # Detect config directory style
+ if [[ -d /etc/nginx/conf.d ]]; then
+ NGINX_CONF="/etc/nginx/conf.d/freshrss.conf"
+ elif [[ -d /etc/nginx/sites-available ]]; then
+ NGINX_CONF="/etc/nginx/sites-available/freshrss.conf"
+ else
+ warn "Could not detect Nginx config directory - skipping"
+ SKIP_NGINX=true
+ fi
+
+ if [[ "$SKIP_NGINX" != "true" && -n "$NGINX_CONF" ]]; then
+ if [[ -f "$NGINX_CONF" ]]; then
+ info "Nginx config already exists at ${NGINX_CONF} - skipping"
+ else
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would create: ${NGINX_CONF}"
+ else
+ if [[ "$SKIP_SSL" == "true" ]]; then
+ # HTTP only
+ cat > "$NGINX_CONF" < "$NGINX_CONF" </dev/null; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would run: certbot certonly --nginx -d ${DOMAIN}"
+ else
+ certbot certonly --nginx -d "$DOMAIN" --non-interactive --agree-tos --register-unsafely-without-email || {
+ warn "Certbot failed - configure SSL manually"
+ warn "Run: certbot certonly --nginx -d ${DOMAIN}"
+ }
+ fi
+ else
+ warn "certbot not installed - configure SSL manually"
+ fi
+fi
+
+# 6. Reload Nginx
+if [[ "$SKIP_NGINX" != "true" && "$DRY_RUN" != "true" ]]; then
+ if nginx -t 2>/dev/null; then
+ systemctl reload nginx
+ log "Reloaded Nginx"
+ else
+ warn "Nginx config test failed - check config manually"
+ fi
+fi
+
+# -- Summary --
+
+echo ""
+echo -e "${BOLD}Deployment summary:${RESET}"
+echo " Docker Compose: ${INSTALL_DIR}/docker-compose.yml"
+echo " FreshRSS: http://127.0.0.1:${PORT}"
+if [[ "$SKIP_NGINX" != "true" ]]; then
+ if [[ "$SKIP_SSL" == "true" ]]; then
+ echo " Public URL: http://${DOMAIN}"
+ else
+ echo " Public URL: https://${DOMAIN}"
+ fi
+ echo " Nginx config: ${NGINX_CONF:-/etc/nginx/conf.d/freshrss.conf}"
+fi
+echo " Database: PostgreSQL (freshrss-db container)"
+echo " Feed updates: Every ${CRON_MIN} minutes"
+echo " Data directory: ${INSTALL_DIR}/data/"
+echo ""
+echo -e "${BOLD}Next steps:${RESET}"
+if [[ "$SKIP_SSL" == "true" ]]; then
+ echo " 1. Open http://${DOMAIN} and complete the setup wizard"
+else
+ echo " 1. Open https://${DOMAIN} and complete the setup wizard"
+fi
+echo " 2. Database config in wizard:"
+echo " Type: PostgreSQL"
+echo " Host: freshrss-db"
+echo " Database: freshrss"
+echo " User: freshrss"
+echo " Password: (saved in ${INSTALL_DIR}/docker-compose.yml)"
+echo " 3. Create your admin account"
+echo " 4. Add your first feed: https://mylinux.work/index.xml"
+echo ""
+info "Remove with: $(basename "$0") --remove"
diff --git a/deploy-password-expiry-checker.ps1 b/deploy-password-expiry-checker.ps1
new file mode 100644
index 0000000..5d8e65e
--- /dev/null
+++ b/deploy-password-expiry-checker.ps1
@@ -0,0 +1,389 @@
+<#
+.SYNOPSIS
+ Deploy the password expiry checker to Windows machines.
+.DESCRIPTION
+ Downloads password-expiry-check.ps1, installs it to a configurable
+ directory, creates a scheduled task for recurring checks, and
+ optionally copies the script to NETLOGON for GPO deployment.
+.NOTES
+ Author: Phil Connor
+ License: MIT (https://opensource.org/licenses/MIT)
+ Version: 1.01
+#>
+
+param(
+ [string]$InstallDir = "C:\Scripts",
+ [int]$WarningDays = 14,
+ [int]$IntervalHours = 4,
+ [switch]$NetlogonCopy,
+ [switch]$CmdPrompt,
+ [switch]$NoProfile,
+ [switch]$Remove,
+ [switch]$DryRun,
+ [Alias("h")]
+ [switch]$Help
+)
+
+$ScriptUrl = "https://mylinux.work/downloads/password-expiry-check.ps1.zip"
+$ScriptName = "password-expiry-check.ps1"
+$TaskName = "PasswordExpiryCheck"
+
+# ── Colors ────────────────────────────────────────────────────────────
+
+function Write-OK { param([string]$Msg) Write-Host "[OK] $Msg" -ForegroundColor Green }
+function Write-Warn { param([string]$Msg) Write-Host "[WARN] $Msg" -ForegroundColor Yellow }
+function Write-Err { param([string]$Msg) Write-Host "[ERROR] $Msg" -ForegroundColor Red }
+function Write-Info { param([string]$Msg) Write-Host "[INFO] $Msg" -ForegroundColor Cyan }
+
+# ── Help ──────────────────────────────────────────────────────────────
+
+if ($Help) {
+ Write-Host @"
+Usage: .\deploy-password-expiry-checker.ps1 [OPTIONS]
+
+Deploy password expiry notifications on Windows machines.
+
+Installs:
+ 1. password-expiry-check.ps1 to C:\Scripts\ (configurable)
+ 2. Scheduled task - runs every 4 hours (configurable) under logged-on user
+ 3. Logon-triggered task - fires on every user logon
+ 4. PowerShell profile hook - warning banner in every new PowerShell window
+ 5. Optional cmd.exe AutoRun hook - warning banner in every new cmd window
+ 6. Optional NETLOGON copy for GPO deployment
+
+Options:
+ -InstallDir PATH Installation directory (default: C:\Scripts)
+ -WarningDays N Warning threshold in days (default: 14)
+ -IntervalHours N Scheduled task interval in hours (default: 4)
+ -CmdPrompt Also add warning to cmd.exe via AutoRun registry key
+ -NoProfile Skip PowerShell profile hook (scheduled tasks only)
+ -NetlogonCopy Copy script to NETLOGON share for GPO deployment
+ -Remove Remove deployed components
+ -DryRun Show what would be done without making changes
+ -Help Show this help
+
+Examples:
+ .\deploy-password-expiry-checker.ps1 # install with defaults
+ .\deploy-password-expiry-checker.ps1 -CmdPrompt # also hook into cmd.exe
+ .\deploy-password-expiry-checker.ps1 -NoProfile # skip profile hook
+ .\deploy-password-expiry-checker.ps1 -WarningDays 30 # 30-day warning threshold
+ .\deploy-password-expiry-checker.ps1 -IntervalHours 8 # check every 8 hours
+ .\deploy-password-expiry-checker.ps1 -NetlogonCopy # also copy to NETLOGON
+ .\deploy-password-expiry-checker.ps1 -DryRun # preview changes
+ .\deploy-password-expiry-checker.ps1 -Remove # uninstall
+"@
+ exit 0
+}
+
+# ── Admin check ───────────────────────────────────────────────────────
+
+$currentUser = [Security.Principal.WindowsIdentity]::GetCurrent()
+$principal = New-Object Security.Principal.WindowsPrincipal($currentUser)
+if (-not $principal.IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)) {
+ Write-Err "Must run as Administrator"
+ exit 1
+}
+
+$ScriptPath = Join-Path $InstallDir $ScriptName
+
+# ── Remove mode ───────────────────────────────────────────────────────
+
+if ($Remove) {
+ Write-Info "Removing password expiry checker deployment..."
+ Write-Host ""
+
+ # Remove scheduled tasks
+ foreach ($name in @($TaskName, "${TaskName}Logon")) {
+ $task = Get-ScheduledTask -TaskName $name -ErrorAction SilentlyContinue
+ if ($task) {
+ if ($DryRun) {
+ Write-Info "Would remove scheduled task: $name"
+ } else {
+ Unregister-ScheduledTask -TaskName $name -Confirm:$false
+ Write-OK "Removed scheduled task: $name"
+ }
+ } else {
+ Write-Info "Scheduled task '$name' not found, skipping"
+ }
+ }
+
+ # Remove script
+ if (Test-Path $ScriptPath) {
+ if ($DryRun) {
+ Write-Info "Would remove: $ScriptPath"
+ } else {
+ Remove-Item -Path $ScriptPath -Force
+ Write-OK "Removed $ScriptPath"
+ }
+ }
+
+ # Remove PowerShell profile hook
+ $profileMarker = "# PasswordExpiryCheck"
+ $allUsersProfile = $PROFILE.AllUsersAllHosts
+ if ((Test-Path $allUsersProfile) -and (Select-String -Path $allUsersProfile -Pattern $profileMarker -Quiet)) {
+ if ($DryRun) {
+ Write-Info "Would remove profile hook from $allUsersProfile"
+ } else {
+ $content = Get-Content $allUsersProfile | Where-Object { $_ -notmatch $profileMarker }
+ if ($content) {
+ Set-Content -Path $allUsersProfile -Value $content
+ } else {
+ Remove-Item -Path $allUsersProfile -Force
+ }
+ Write-OK "Removed PowerShell profile hook"
+ }
+ }
+
+ # Remove cmd.exe AutoRun
+ $cmdAutoRun = Get-ItemProperty -Path "HKLM:\Software\Microsoft\Command Processor" -Name "AutoRun" -ErrorAction SilentlyContinue
+ if ($cmdAutoRun -and $cmdAutoRun.AutoRun -match "password-expiry-check") {
+ if ($DryRun) {
+ Write-Info "Would remove cmd.exe AutoRun registry key"
+ } else {
+ $existing = $cmdAutoRun.AutoRun
+ # Remove our command, handle single command or chained with ampersand
+ $cleaned = ($existing -split '\s*&\s*' | Where-Object { $_ -notmatch 'password-expiry-check' }) -join ' & '
+ if ($cleaned.Trim()) {
+ Set-ItemProperty -Path "HKLM:\Software\Microsoft\Command Processor" -Name "AutoRun" -Value $cleaned.Trim()
+ } else {
+ Remove-ItemProperty -Path "HKLM:\Software\Microsoft\Command Processor" -Name "AutoRun" -ErrorAction SilentlyContinue
+ }
+ Write-OK "Removed cmd.exe AutoRun hook"
+ }
+ }
+
+ # Remove install dir if empty
+ if ((Test-Path $InstallDir) -and @(Get-ChildItem $InstallDir -Force).Count -eq 0) {
+ if ($DryRun) {
+ Write-Info "Would remove empty directory: $InstallDir"
+ } else {
+ Remove-Item -Path $InstallDir -Force
+ Write-OK "Removed empty directory: $InstallDir"
+ }
+ }
+
+ Write-Host ""
+ if (-not $DryRun) {
+ Write-OK "Removal complete."
+ }
+ exit 0
+}
+
+# ── Install mode ──────────────────────────────────────────────────────
+
+Write-Info "Deploying password expiry checker..."
+Write-Host ""
+
+# 1. Create install directory
+if (-not (Test-Path $InstallDir)) {
+ if ($DryRun) {
+ Write-Info "Would create directory: $InstallDir"
+ } else {
+ New-Item -Path $InstallDir -ItemType Directory -Force | Out-Null
+ Write-OK "Created directory: $InstallDir"
+ }
+}
+
+# 2. Download script
+if (Test-Path $ScriptPath) {
+ Write-Info "Script already exists at $ScriptPath - downloading latest version"
+}
+
+if ($DryRun) {
+ Write-Info "Would download $ScriptUrl and extract to $ScriptPath"
+} else {
+ $zipPath = Join-Path $env:TEMP "password-expiry-check.ps1.zip"
+ try {
+ Invoke-WebRequest -Uri $ScriptUrl -OutFile $zipPath -UseBasicParsing -ErrorAction Stop
+ Expand-Archive -Path $zipPath -DestinationPath $InstallDir -Force
+ Remove-Item $zipPath -Force -ErrorAction SilentlyContinue
+ if (Test-Path $ScriptPath) {
+ Write-OK "Downloaded and extracted $ScriptPath"
+ } else {
+ Write-Err "Zip extracted but $ScriptName not found in $InstallDir"
+ exit 1
+ }
+ } catch {
+ Write-Err "Failed to download: $($_.Exception.Message)"
+ exit 1
+ }
+}
+
+# 3. Scheduled task - recurring interval
+$taskArgs = "-NoProfile -ExecutionPolicy Bypass -WindowStyle Hidden -File `"$ScriptPath`" -Quiet -WarningDays $WarningDays"
+
+$existingTask = Get-ScheduledTask -TaskName $TaskName -ErrorAction SilentlyContinue
+if ($existingTask) {
+ Write-Info "Scheduled task '$TaskName' already exists - recreating"
+ if (-not $DryRun) {
+ Unregister-ScheduledTask -TaskName $TaskName -Confirm:$false
+ }
+}
+
+if ($DryRun) {
+ Write-Info "Would create scheduled task: $TaskName (every ${IntervalHours}h)"
+} else {
+ $action = New-ScheduledTaskAction -Execute "powershell.exe" -Argument $taskArgs
+ $trigger = New-ScheduledTaskTrigger -Once -At (Get-Date).Date.AddHours(9) `
+ -RepetitionInterval (New-TimeSpan -Hours $IntervalHours) `
+ -RepetitionDuration (New-TimeSpan -Days 365)
+ $settings = New-ScheduledTaskSettingsSet `
+ -AllowStartIfOnBatteries `
+ -DontStopIfGoingOnBatteries `
+ -StartWhenAvailable `
+ -RunOnlyIfNetworkAvailable:$false
+ $principal = New-ScheduledTaskPrincipal -GroupId "S-1-5-32-545" -RunLevel Limited
+
+ Register-ScheduledTask -TaskName $TaskName -Action $action -Trigger $trigger `
+ -Settings $settings -Principal $principal `
+ -Description "Check password expiry every $IntervalHours hours (mylinux.work)" | Out-Null
+ Write-OK "Created scheduled task: $TaskName (every ${IntervalHours}h)"
+}
+
+# 4. Logon trigger task
+$logonTaskName = "${TaskName}Logon"
+$existingLogon = Get-ScheduledTask -TaskName $logonTaskName -ErrorAction SilentlyContinue
+if ($existingLogon) {
+ Write-Info "Logon task '$logonTaskName' already exists - recreating"
+ if (-not $DryRun) {
+ Unregister-ScheduledTask -TaskName $logonTaskName -Confirm:$false
+ }
+}
+
+if ($DryRun) {
+ Write-Info "Would create logon trigger task: $logonTaskName"
+} else {
+ $logonAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument $taskArgs
+ $logonTrigger = New-ScheduledTaskTrigger -AtLogOn
+ $logonSettings = New-ScheduledTaskSettingsSet `
+ -AllowStartIfOnBatteries `
+ -DontStopIfGoingOnBatteries `
+ -StartWhenAvailable `
+ -ExecutionTimeLimit (New-TimeSpan -Minutes 5)
+ # Delay 30 seconds after logon to let the desktop load
+ $logonTrigger.Delay = "PT30S"
+ $logonPrincipal = New-ScheduledTaskPrincipal -GroupId "S-1-5-32-545" -RunLevel Limited
+
+ Register-ScheduledTask -TaskName $logonTaskName -Action $logonAction -Trigger $logonTrigger `
+ -Settings $logonSettings -Principal $logonPrincipal `
+ -Description "Check password expiry at logon (mylinux.work)" | Out-Null
+ Write-OK "Created logon trigger task: $logonTaskName"
+}
+
+# 5. NETLOGON copy (optional)
+if ($NetlogonCopy) {
+ $logonServer = $env:LOGONSERVER
+ if ($logonServer) {
+ $netlogonPath = Join-Path "$logonServer\NETLOGON" $ScriptName
+ if ($DryRun) {
+ Write-Info "Would copy $ScriptPath to $netlogonPath"
+ } else {
+ try {
+ Copy-Item -Path $ScriptPath -Destination $netlogonPath -Force -ErrorAction Stop
+ Write-OK "Copied to $netlogonPath"
+ } catch {
+ Write-Warn "Could not copy to NETLOGON: $($_.Exception.Message)"
+ Write-Warn "Copy manually: Copy-Item '$ScriptPath' '$netlogonPath'"
+ }
+ }
+ } else {
+ Write-Warn "LOGONSERVER not set - machine may not be domain-joined"
+ Write-Warn "Copy manually to \\DC\NETLOGON\$ScriptName"
+ }
+}
+
+# 6. PowerShell profile hook (default, skip with -NoProfile)
+$profileMarker = "# PasswordExpiryCheck"
+$profileLine = "& `"$ScriptPath`" -Quiet -WarningDays $WarningDays $profileMarker"
+
+if (-not $NoProfile) {
+ $allUsersProfile = $PROFILE.AllUsersAllHosts
+ $profileDir = Split-Path $allUsersProfile -Parent
+
+ # Check if hook already exists
+ $hookExists = (Test-Path $allUsersProfile) -and (Select-String -Path $allUsersProfile -Pattern $profileMarker -Quiet)
+
+ if ($hookExists) {
+ Write-Info "PowerShell profile hook already present - updating"
+ if (-not $DryRun) {
+ $content = Get-Content $allUsersProfile | Where-Object { $_ -notmatch $profileMarker }
+ $content += $profileLine
+ Set-Content -Path $allUsersProfile -Value $content
+ }
+ } else {
+ if ($DryRun) {
+ Write-Info "Would add profile hook to $allUsersProfile"
+ } else {
+ if (-not (Test-Path $profileDir)) {
+ New-Item -Path $profileDir -ItemType Directory -Force | Out-Null
+ }
+ Add-Content -Path $allUsersProfile -Value $profileLine
+ }
+ }
+ Write-OK "PowerShell profile hook: $allUsersProfile"
+} else {
+ Write-Info "Skipping PowerShell profile hook (-NoProfile)"
+}
+
+# 7. cmd.exe AutoRun hook (optional, enable with -CmdPrompt)
+if ($CmdPrompt) {
+ $cmdCommand = '@powershell.exe -NoProfile -ExecutionPolicy Bypass -File "' + $ScriptPath + '" -Quiet -WarningDays ' + $WarningDays
+ $regPath = "HKLM:\Software\Microsoft\Command Processor"
+
+ $existing = Get-ItemProperty -Path $regPath -Name "AutoRun" -ErrorAction SilentlyContinue
+ if ($existing -and $existing.AutoRun -match "password-expiry-check") {
+ Write-Info "cmd.exe AutoRun hook already present - updating"
+ if (-not $DryRun) {
+ $cleaned = ($existing.AutoRun -split '\s*&\s*' | Where-Object { $_ -notmatch 'password-expiry-check' }) -join ' & '
+ if ($cleaned.Trim()) {
+ $newValue = $cleaned.Trim() + " & " + $cmdCommand
+ } else {
+ $newValue = $cmdCommand
+ }
+ Set-ItemProperty -Path $regPath -Name "AutoRun" -Value $newValue
+ }
+ } elseif ($existing -and $existing.AutoRun.Trim()) {
+ if ($DryRun) {
+ Write-Info "Would append to existing cmd.exe AutoRun"
+ } else {
+ $newValue = $existing.AutoRun.Trim() + " & " + $cmdCommand
+ Set-ItemProperty -Path $regPath -Name "AutoRun" -Value $newValue
+ }
+ } else {
+ if ($DryRun) {
+ Write-Info "Would create cmd.exe AutoRun registry key"
+ } else {
+ Set-ItemProperty -Path $regPath -Name "AutoRun" -Value $cmdCommand
+ }
+ }
+ Write-OK "cmd.exe AutoRun hook: $regPath"
+}
+
+# ── Summary ───────────────────────────────────────────────────────────
+
+Write-Host ""
+Write-Host "Deployment summary:" -ForegroundColor White
+Write-Host " Script: $ScriptPath"
+Write-Host " Warning: $WarningDays days"
+Write-Host " Interval task: $TaskName (every ${IntervalHours}h)"
+Write-Host " Logon task: $logonTaskName (at user logon, 30s delay)"
+if (-not $NoProfile) {
+ Write-Host " PS profile: $($PROFILE.AllUsersAllHosts) (all users)"
+}
+if ($CmdPrompt) {
+ Write-Host " cmd.exe: AutoRun registry hook (HKLM)"
+}
+if ($NetlogonCopy) {
+ Write-Host " NETLOGON: $env:LOGONSERVER\NETLOGON\$ScriptName"
+}
+Write-Host ""
+Write-Host "Users will see warnings via:" -ForegroundColor White
+Write-Host " MessageBox popup every $IntervalHours hours (scheduled task)"
+Write-Host " MessageBox popup at logon (logon trigger task)"
+Write-Host " Terminal banner in new PowerShell windows (profile hook)"
+if ($CmdPrompt) {
+ Write-Host " Terminal banner in new cmd.exe windows (AutoRun hook)"
+}
+Write-Host ""
+Write-Info "Test with: & '$ScriptPath' -Test"
+Write-Info "Remove with: .\deploy-password-expiry-checker.ps1 -Remove"
diff --git a/deploy-password-expiry-timer.sh b/deploy-password-expiry-timer.sh
new file mode 100644
index 0000000..7757cae
--- /dev/null
+++ b/deploy-password-expiry-timer.sh
@@ -0,0 +1,249 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### deploy-password-expiry-timer.sh — Deploy password expiry desktop notifications ####
+#### Sets up systemd user timer + /etc/bashrc integration for all users. ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### sudo ./deploy-password-expiry-timer.sh ####
+#### sudo ./deploy-password-expiry-timer.sh --dry-run ####
+#### sudo ./deploy-password-expiry-timer.sh --remove ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+DRY_RUN=false
+REMOVE=false
+SCRIPT_PATH="/usr/local/bin/password-expiry-check.sh"
+SCRIPT_URL="https://mylinux.work/downloads/password-expiry-check.sh"
+
+# ── Colors ────────────────────────────────────────────────────────────
+if [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+else
+ RED="" GREEN="" YELLOW="" BOLD="" RESET=""
+fi
+
+log() { echo -e "${GREEN}[OK]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*"; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+info() { echo -e "${BOLD}[INFO]${RESET} $*"; }
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat </dev/null"
+BASHRC_MARKER="# password-expiry-check"
+
+# ── Remove mode ───────────────────────────────────────────────────────
+if [[ "$REMOVE" == "true" ]]; then
+ info "Removing password expiry timer deployment..."
+ echo ""
+
+ # Disable global timer
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would run: systemctl --global disable password-expiry-check.timer"
+ else
+ systemctl --global disable password-expiry-check.timer 2>/dev/null && \
+ log "Disabled global user timer" || info "Timer was not enabled"
+ fi
+
+ # Remove systemd files
+ for f in /etc/systemd/user/password-expiry-check.service /etc/systemd/user/password-expiry-check.timer; do
+ if [[ -f "$f" ]]; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would remove: $f"
+ else
+ rm -f "$f"
+ log "Removed $f"
+ fi
+ fi
+ done
+
+ # Remove XDG autostart
+ if [[ -f /etc/xdg/autostart/password-expiry-check.desktop ]]; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would remove: /etc/xdg/autostart/password-expiry-check.desktop"
+ else
+ rm -f /etc/xdg/autostart/password-expiry-check.desktop
+ log "Removed XDG autostart"
+ fi
+ fi
+
+ # Remove bashrc entry
+ if grep -q "$BASHRC_MARKER" /etc/bashrc 2>/dev/null; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would remove password-expiry lines from /etc/bashrc"
+ else
+ sed -i "/${BASHRC_MARKER}/d" /etc/bashrc
+ sed -i "/password-expiry-check/d" /etc/bashrc
+ log "Removed /etc/bashrc entry"
+ fi
+ fi
+
+ echo ""
+ if [[ "$DRY_RUN" != "true" ]]; then
+ log "Removal complete. Script left at ${SCRIPT_PATH} (remove manually if desired)"
+ fi
+ exit 0
+fi
+
+# ── Install mode ──────────────────────────────────────────────────────
+info "Deploying password expiry notifications..."
+echo ""
+
+# 1. Install script
+if [[ -f "$SCRIPT_PATH" ]]; then
+ info "Script already exists at ${SCRIPT_PATH}"
+else
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would download ${SCRIPT_URL} to ${SCRIPT_PATH}"
+ else
+ if command -v curl &>/dev/null; then
+ curl -sSL -o "$SCRIPT_PATH" "$SCRIPT_URL"
+ elif command -v wget &>/dev/null; then
+ wget -q -O "$SCRIPT_PATH" "$SCRIPT_URL"
+ else
+ err "Neither curl nor wget found — copy password-expiry-check.sh to ${SCRIPT_PATH} manually"
+ exit 1
+ fi
+ chmod +x "$SCRIPT_PATH"
+ log "Installed ${SCRIPT_PATH}"
+ fi
+fi
+
+# 2. Systemd user service
+SERVICE_CONTENT="[Unit]
+Description=Password Expiry Checker
+After=graphical-session.target
+
+[Service]
+Type=oneshot
+ExecStart=${SCRIPT_PATH} -q
+Environment=DISPLAY=:0"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would create: /etc/systemd/user/password-expiry-check.service"
+else
+ mkdir -p /etc/systemd/user
+ echo "$SERVICE_CONTENT" > /etc/systemd/user/password-expiry-check.service
+ log "Created /etc/systemd/user/password-expiry-check.service"
+fi
+
+# 3. Systemd user timer — every 4 hours
+TIMER_CONTENT="[Unit]
+Description=Check password expiry every 4 hours
+
+[Timer]
+OnStartupSec=60
+OnUnitActiveSec=4h
+Persistent=true
+
+[Install]
+WantedBy=timers.target"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would create: /etc/systemd/user/password-expiry-check.timer"
+ info "Would run: systemctl --global enable password-expiry-check.timer"
+else
+ echo "$TIMER_CONTENT" > /etc/systemd/user/password-expiry-check.timer
+ log "Created /etc/systemd/user/password-expiry-check.timer"
+
+ systemctl --global enable password-expiry-check.timer 2>/dev/null
+ log "Enabled timer globally for all users"
+fi
+
+# 4. XDG autostart (graphical login trigger with delay)
+DESKTOP_CONTENT="[Desktop Entry]
+Type=Application
+Name=Password Expiry Checker
+Comment=Check password expiry on login
+Exec=bash -c 'sleep 10 && ${SCRIPT_PATH} -q'
+Terminal=false
+NoDisplay=true
+X-GNOME-Autostart-enabled=true"
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would create: /etc/xdg/autostart/password-expiry-check.desktop"
+else
+ mkdir -p /etc/xdg/autostart
+ echo "$DESKTOP_CONTENT" > /etc/xdg/autostart/password-expiry-check.desktop
+ log "Created /etc/xdg/autostart/password-expiry-check.desktop"
+fi
+
+# 5. /etc/bashrc entry (terminal warning)
+if grep -q "$BASHRC_MARKER" /etc/bashrc 2>/dev/null; then
+ info "/etc/bashrc entry already exists"
+else
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "Would add to /etc/bashrc:"
+ echo " ${BASHRC_LINE}"
+ echo " ${BASHRC_EXEC}"
+ else
+ {
+ echo ""
+ echo "$BASHRC_LINE"
+ echo "$BASHRC_EXEC ${BASHRC_MARKER}"
+ } >> /etc/bashrc
+ log "Added /etc/bashrc entry"
+ fi
+fi
+
+echo ""
+echo -e "${BOLD}Deployment summary:${RESET}"
+echo " • Script: ${SCRIPT_PATH}"
+echo " • Timer: /etc/systemd/user/password-expiry-check.timer (every 4h)"
+echo " • XDG autostart: /etc/xdg/autostart/password-expiry-check.desktop (login + 10s delay)"
+echo " • Terminal: /etc/bashrc (quiet mode — warns only when near expiry)"
+echo ""
+echo -e "${BOLD}Users will see warnings via:${RESET}"
+echo " • Desktop popup every 4 hours (systemd timer)"
+echo " • Desktop popup on graphical login (XDG autostart)"
+echo " • Terminal banner on every new shell (bashrc)"
+echo ""
+info "Test with: ${SCRIPT_PATH} --test"
+info "Remove with: $(basename "$0") --remove"
diff --git a/dhcp-lease-exporter.sh b/dhcp-lease-exporter.sh
new file mode 100644
index 0000000..4072830
--- /dev/null
+++ b/dhcp-lease-exporter.sh
@@ -0,0 +1,668 @@
+#!/bin/bash
+################################################################################
+# Script Name: dhcp-lease-exporter.sh
+# Version: 1.01
+# Description: Prometheus exporter for DHCP lease metrics — pool utilization,
+# active leases per subnet, lease expirations, reservation status,
+# DORA packet counts, and lease duration tracking for ISC DHCP
+# (dhcpd) and ISC Kea.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Usage:
+# # Output to stdout
+# sudo ./dhcp-lease-exporter.sh
+#
+# # Textfile collector mode
+# sudo ./dhcp-lease-exporter.sh --textfile
+#
+# # HTTP server mode
+# sudo ./dhcp-lease-exporter.sh --http
+#
+# # Custom port
+# sudo ./dhcp-lease-exporter.sh --http --port 9533
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+readonly VERSION="1.0"
+readonly SCRIPT_NAME="${0##*/}"
+
+# DHCP backend — auto, dhcpd, or kea
+DHCP_BACKEND="auto"
+
+# dhcpd paths
+DHCPD_LEASES="/var/lib/dhcp/dhcpd.leases"
+DHCPD_CONF="/etc/dhcp/dhcpd.conf"
+
+# Kea paths and API
+KEA_LEASES="/var/lib/kea/kea-leases4.csv"
+KEA_API="http://127.0.0.1:8000"
+KEA_USE_API="true"
+
+# Output settings
+TEXTFILE_DIR="/var/lib/node_exporter/textfile_collector"
+HTTP_PORT=9533
+LOCK_FILE="/tmp/dhcp-lease-exporter.lock"
+
+# Runtime
+MODE="stdout"
+ONCE=false
+DETECTED_BACKEND=""
+
+# ============================================================================
+# COLORS
+# ============================================================================
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+NC='\033[0m'
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $*" >&2; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $*" >&2; }
+log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
+
+show_usage() {
+ cat </dev/null || true)
+ if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
+ log_error "Another instance is running (PID $pid)"
+ exit 1
+ fi
+ rm -f "$LOCK_FILE"
+ fi
+ echo $$ > "$LOCK_FILE"
+ trap 'rm -f "$LOCK_FILE"' EXIT
+}
+
+# ============================================================================
+# BACKEND DETECTION
+# ============================================================================
+
+detect_backend() {
+ if [ "$DHCP_BACKEND" != "auto" ]; then
+ DETECTED_BACKEND="$DHCP_BACKEND"
+ return
+ fi
+
+ if systemctl is-active --quiet isc-kea-dhcp4-server 2>/dev/null || \
+ systemctl is-active --quiet kea-dhcp4 2>/dev/null; then
+ DETECTED_BACKEND="kea"
+ elif systemctl is-active --quiet isc-dhcp-server 2>/dev/null || \
+ systemctl is-active --quiet dhcpd 2>/dev/null; then
+ DETECTED_BACKEND="dhcpd"
+ elif [ -f "$KEA_LEASES" ]; then
+ DETECTED_BACKEND="kea"
+ elif [ -f "$DHCPD_LEASES" ]; then
+ DETECTED_BACKEND="dhcpd"
+ else
+ DETECTED_BACKEND="unknown"
+ fi
+}
+
+# ============================================================================
+# DHCPD FUNCTIONS
+# ============================================================================
+
+# Parse dhcpd.conf for subnet definitions and pool ranges
+parse_dhcpd_subnets() {
+ local conf="$DHCPD_CONF"
+ [ -f "$conf" ] || return
+
+ local current_subnet="" current_name="" range_start="" range_end=""
+ local in_subnet=false
+
+ while IFS= read -r line; do
+ # Match subnet declaration
+ if [[ "$line" =~ ^[[:space:]]*subnet[[:space:]]+([0-9.]+)[[:space:]]+netmask[[:space:]]+([0-9.]+) ]]; then
+ current_subnet="${BASH_REMATCH[1]}"
+ local netmask="${BASH_REMATCH[2]}"
+ current_name="$current_subnet"
+ in_subnet=true
+ range_start=""
+ range_end=""
+ # Calculate CIDR from netmask
+ local cidr
+ cidr=$(netmask_to_cidr "$netmask")
+ current_subnet="${current_subnet}/${cidr}"
+ fi
+
+ # Check for comment-based name
+ if $in_subnet && [[ "$line" =~ ^[[:space:]]*#[[:space:]]*(.+) ]]; then
+ if [ "$current_name" = "${current_subnet%%/*}" ]; then
+ current_name="${BASH_REMATCH[1]}"
+ fi
+ fi
+
+ # Match range declaration
+ if $in_subnet && [[ "$line" =~ ^[[:space:]]*range[[:space:]]+([0-9.]+)[[:space:]]+([0-9.]+) ]]; then
+ range_start="${BASH_REMATCH[1]}"
+ range_end="${BASH_REMATCH[2]}"
+ fi
+
+ # End of subnet block
+ if $in_subnet && [[ "$line" =~ ^[[:space:]]*\} ]]; then
+ if [ -n "$range_start" ] && [ -n "$range_end" ]; then
+ local total
+ total=$(ip_range_count "$range_start" "$range_end")
+ echo "${current_subnet}|${current_name}|${total}|${range_start}|${range_end}"
+ fi
+ in_subnet=false
+ fi
+ done < "$conf"
+}
+
+netmask_to_cidr() {
+ local netmask="$1"
+ local cidr=0
+ for octet in $(echo "$netmask" | tr '.' ' '); do
+ case $octet in
+ 255) cidr=$((cidr + 8)) ;;
+ 254) cidr=$((cidr + 7)) ;;
+ 252) cidr=$((cidr + 6)) ;;
+ 248) cidr=$((cidr + 5)) ;;
+ 240) cidr=$((cidr + 4)) ;;
+ 224) cidr=$((cidr + 3)) ;;
+ 192) cidr=$((cidr + 2)) ;;
+ 128) cidr=$((cidr + 1)) ;;
+ 0) ;;
+ esac
+ done
+ echo "$cidr"
+}
+
+ip_to_int() {
+ local a b c d
+ IFS='.' read -r a b c d <<< "$1"
+ echo $(( (a << 24) + (b << 16) + (c << 8) + d ))
+}
+
+ip_range_count() {
+ local start_int end_int
+ start_int=$(ip_to_int "$1")
+ end_int=$(ip_to_int "$2")
+ echo $(( end_int - start_int + 1 ))
+}
+
+# Count active leases per subnet from dhcpd.leases
+count_dhcpd_leases() {
+ local lease_file="$DHCPD_LEASES"
+ [ -f "$lease_file" ] || return
+
+ local now
+ now=$(date +%s)
+
+ awk -v now="$now" '
+ /^lease / { ip = $2 }
+ /ends / {
+ gsub(/[;\/:]/, " ", $0)
+ if ($2 != "never") {
+ t = mktime($3 " " $4 " " $5 " " $6 " " $7 " " $8)
+ if (t > now) active[ip] = t - now
+ }
+ }
+ /binding state active/ { state[ip] = "active" }
+ END {
+ for (ip in active) {
+ if (state[ip] == "active") {
+ print ip, active[ip]
+ }
+ }
+ }' "$lease_file"
+}
+
+# Count reservations from dhcpd.conf
+count_dhcpd_reservations() {
+ local conf="$DHCPD_CONF"
+ [ -f "$conf" ] || return
+ grep -c "fixed-address" "$conf" 2>/dev/null || true
+}
+
+# Parse DORA stats from syslog
+parse_dhcpd_dora() {
+ local logfile="/var/log/syslog"
+ [ -f "$logfile" ] || logfile="/var/log/messages"
+ [ -f "$logfile" ] || return
+
+ local discovers offers requests acks naks declines releases
+ discovers=$(grep -c "DHCPDISCOVER" "$logfile" 2>/dev/null || true)
+ offers=$(grep -c "DHCPOFFER" "$logfile" 2>/dev/null || true)
+ requests=$(grep -c "DHCPREQUEST" "$logfile" 2>/dev/null || true)
+ acks=$(grep -c "DHCPACK" "$logfile" 2>/dev/null || true)
+ naks=$(grep -c "DHCPNAK" "$logfile" 2>/dev/null || true)
+ declines=$(grep -c "DHCPDECLINE" "$logfile" 2>/dev/null || true)
+ releases=$(grep -c "DHCPRELEASE" "$logfile" 2>/dev/null || true)
+
+ echo "${discovers}|${offers}|${requests}|${acks}|${naks}|${declines}|${releases}"
+}
+
+# ============================================================================
+# KEA FUNCTIONS
+# ============================================================================
+
+kea_api_call() {
+ local command="$1"
+ curl -s --max-time 5 -X POST "${KEA_API}" \
+ -H "Content-Type: application/json" \
+ -d "{\"command\": \"${command}\", \"service\": [\"dhcp4\"]}" 2>/dev/null
+}
+
+parse_kea_leases_file() {
+ local lease_file="$KEA_LEASES"
+ [ -f "$lease_file" ] || return
+
+ local now
+ now=$(date +%s)
+
+ awk -F',' -v now="$now" '
+ NR > 1 && NF >= 9 {
+ ip = $1
+ expire = $7
+ state = $9
+ if (state == 0 && expire > now) {
+ remaining = expire - now
+ print ip, remaining
+ }
+ }' "$lease_file"
+}
+
+parse_kea_api_subnets() {
+ local response
+ response=$(kea_api_call "subnet4-list")
+ if [ -z "$response" ]; then
+ return 1
+ fi
+
+ echo "$response" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+if data[0]['result'] == 0:
+ for s in data[0].get('arguments', {}).get('subnets', []):
+ sid = s.get('id', 0)
+ subnet = s.get('subnet', '')
+ print(f'{sid}|{subnet}')
+" 2>/dev/null
+}
+
+parse_kea_api_stats() {
+ local response
+ response=$(kea_api_call "statistic-get-all")
+ if [ -z "$response" ]; then
+ return 1
+ fi
+ echo "$response"
+}
+
+# ============================================================================
+# METRIC COLLECTION
+# ============================================================================
+
+collect_metrics() {
+ local start_time
+ start_time=$(date +%s%N)
+ local metrics=""
+
+ # Exporter status
+ metrics+="$(write_metric_header "dhcp_up" "gauge" "Exporter status (1=up, 0=down)")"$'\n'
+ metrics+="$(write_metric_header "dhcp_exporter_info" "gauge" "Exporter version and backend")"$'\n'
+
+ if [ "$DETECTED_BACKEND" = "unknown" ]; then
+ metrics+="dhcp_up 0"$'\n'
+ echo "$metrics"
+ return
+ fi
+
+ metrics+="dhcp_up 1"$'\n'
+ metrics+="dhcp_exporter_info{version=\"${VERSION}\",backend=\"${DETECTED_BACKEND}\"} 1"$'\n'
+
+ local subnet_count=0
+ local total_active=0
+
+ if [ "$DETECTED_BACKEND" = "dhcpd" ]; then
+ collect_dhcpd_metrics
+ elif [ "$DETECTED_BACKEND" = "kea" ]; then
+ collect_kea_metrics
+ fi
+
+ # Subnet count
+ metrics+="$(write_metric_header "dhcp_subnets_total" "gauge" "Total number of configured subnets")"$'\n'
+ metrics+="dhcp_subnets_total ${subnet_count}"$'\n'
+
+ # Total active leases
+ metrics+="$(write_metric_header "dhcp_leases_active_total" "gauge" "Total active leases across all subnets")"$'\n'
+ metrics+="dhcp_leases_active_total ${total_active}"$'\n'
+
+ # Lease file info
+ if [ "$DETECTED_BACKEND" = "dhcpd" ] && [ -f "$DHCPD_LEASES" ]; then
+ local file_age file_size
+ file_age=$(( $(date +%s) - $(stat -c %Y "$DHCPD_LEASES") ))
+ file_size=$(stat -c %s "$DHCPD_LEASES")
+ metrics+="$(write_metric_header "dhcp_lease_file_age_seconds" "gauge" "Seconds since the lease file was last modified")"$'\n'
+ metrics+="dhcp_lease_file_age_seconds ${file_age}"$'\n'
+ metrics+="$(write_metric_header "dhcp_lease_file_size_bytes" "gauge" "Size of the lease file")"$'\n'
+ metrics+="dhcp_lease_file_size_bytes ${file_size}"$'\n'
+ elif [ "$DETECTED_BACKEND" = "kea" ] && [ -f "$KEA_LEASES" ]; then
+ local file_age file_size
+ file_age=$(( $(date +%s) - $(stat -c %Y "$KEA_LEASES") ))
+ file_size=$(stat -c %s "$KEA_LEASES")
+ metrics+="$(write_metric_header "dhcp_lease_file_age_seconds" "gauge" "Seconds since the lease file was last modified")"$'\n'
+ metrics+="dhcp_lease_file_age_seconds ${file_age}"$'\n'
+ metrics+="$(write_metric_header "dhcp_lease_file_size_bytes" "gauge" "Size of the lease file")"$'\n'
+ metrics+="dhcp_lease_file_size_bytes ${file_size}"$'\n'
+ fi
+
+ # Execution time
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $start_time) / 1000000000" | bc 2>/dev/null || echo "0")
+ metrics+="$(write_metric_header "dhcp_exporter_duration_seconds" "gauge" "Script execution time")"$'\n'
+ metrics+="dhcp_exporter_duration_seconds ${duration}"$'\n'
+ metrics+="$(write_metric_header "dhcp_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run")"$'\n'
+ metrics+="dhcp_exporter_last_run_timestamp $(date +%s)"$'\n'
+
+ echo "$metrics"
+}
+
+collect_dhcpd_metrics() {
+ # Parse subnets from config
+ local subnet_data
+ subnet_data=$(parse_dhcpd_subnets)
+
+ # Get active leases
+ local lease_data
+ lease_data=$(count_dhcpd_leases)
+
+ metrics+="$(write_metric_header "dhcp_subnet_pool_total" "gauge" "Total addresses in the pool")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_pool_active" "gauge" "Currently leased addresses")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_pool_free" "gauge" "Available addresses in the pool")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_pool_utilization" "gauge" "Pool utilization percentage (0-100)")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_pool_reserved" "gauge" "Number of static reservations")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_leases_expiring" "gauge" "Leases expiring within threshold")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_lease_longest_seconds" "gauge" "Remaining time on the longest lease")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_lease_shortest_seconds" "gauge" "Remaining time on the shortest lease")"$'\n'
+
+ while IFS='|' read -r subnet name pool_total range_start range_end; do
+ [ -z "$subnet" ] && continue
+ subnet_count=$((subnet_count + 1))
+
+ # Count active leases in this subnet range
+ local active=0 longest=0 shortest=999999999
+ local expiring_1h=0 expiring_4h=0 expiring_24h=0
+ local start_int end_int
+ start_int=$(ip_to_int "$range_start")
+ end_int=$(ip_to_int "$range_end")
+
+ while read -r lease_ip remaining; do
+ [ -z "$lease_ip" ] && continue
+ local lip
+ lip=$(ip_to_int "$lease_ip")
+ if [ "$lip" -ge "$start_int" ] && [ "$lip" -le "$end_int" ]; then
+ active=$((active + 1))
+ total_active=$((total_active + 1))
+ [ "$remaining" -gt "$longest" ] && longest=$remaining
+ [ "$remaining" -lt "$shortest" ] && shortest=$remaining
+ [ "$remaining" -le 3600 ] && expiring_1h=$((expiring_1h + 1))
+ [ "$remaining" -le 14400 ] && expiring_4h=$((expiring_4h + 1))
+ [ "$remaining" -le 86400 ] && expiring_24h=$((expiring_24h + 1))
+ fi
+ done <<< "$lease_data"
+
+ local free=$((pool_total - active))
+ [ $free -lt 0 ] && free=0
+ local util=0
+ if [ "$pool_total" -gt 0 ]; then
+ util=$(echo "scale=2; $active * 100 / $pool_total" | bc 2>/dev/null || echo "0")
+ fi
+ [ $active -eq 0 ] && shortest=0
+
+ local reserved
+ reserved=$(count_dhcpd_reservations)
+
+ metrics+="dhcp_subnet_pool_total{subnet=\"${subnet}\",name=\"${name}\"} ${pool_total}"$'\n'
+ metrics+="dhcp_subnet_pool_active{subnet=\"${subnet}\",name=\"${name}\"} ${active}"$'\n'
+ metrics+="dhcp_subnet_pool_free{subnet=\"${subnet}\",name=\"${name}\"} ${free}"$'\n'
+ metrics+="dhcp_subnet_pool_utilization{subnet=\"${subnet}\",name=\"${name}\"} ${util}"$'\n'
+ metrics+="dhcp_subnet_pool_reserved{subnet=\"${subnet}\",name=\"${name}\"} ${reserved}"$'\n'
+ metrics+="dhcp_subnet_leases_expiring{subnet=\"${subnet}\",name=\"${name}\",within=\"1h\"} ${expiring_1h}"$'\n'
+ metrics+="dhcp_subnet_leases_expiring{subnet=\"${subnet}\",name=\"${name}\",within=\"4h\"} ${expiring_4h}"$'\n'
+ metrics+="dhcp_subnet_leases_expiring{subnet=\"${subnet}\",name=\"${name}\",within=\"24h\"} ${expiring_24h}"$'\n'
+ metrics+="dhcp_subnet_lease_longest_seconds{subnet=\"${subnet}\",name=\"${name}\"} ${longest}"$'\n'
+ metrics+="dhcp_subnet_lease_shortest_seconds{subnet=\"${subnet}\",name=\"${name}\"} ${shortest}"$'\n'
+ done <<< "$subnet_data"
+
+ # DORA stats
+ local dora
+ dora=$(parse_dhcpd_dora)
+ if [ -n "$dora" ]; then
+ IFS='|' read -r discovers offers requests acks naks declines releases <<< "$dora"
+ metrics+="$(write_metric_header "dhcp_discovers_total" "counter" "Total DHCPDISCOVER packets received")"$'\n'
+ metrics+="dhcp_discovers_total ${discovers}"$'\n'
+ metrics+="$(write_metric_header "dhcp_offers_total" "counter" "Total DHCPOFFER packets sent")"$'\n'
+ metrics+="dhcp_offers_total ${offers}"$'\n'
+ metrics+="$(write_metric_header "dhcp_requests_total" "counter" "Total DHCPREQUEST packets received")"$'\n'
+ metrics+="dhcp_requests_total ${requests}"$'\n'
+ metrics+="$(write_metric_header "dhcp_acks_total" "counter" "Total DHCPACK packets sent")"$'\n'
+ metrics+="dhcp_acks_total ${acks}"$'\n'
+ metrics+="$(write_metric_header "dhcp_naks_total" "counter" "Total DHCPNAK packets sent")"$'\n'
+ metrics+="dhcp_naks_total ${naks}"$'\n'
+ metrics+="$(write_metric_header "dhcp_declines_total" "counter" "Total DHCPDECLINE packets received")"$'\n'
+ metrics+="dhcp_declines_total ${declines}"$'\n'
+ metrics+="$(write_metric_header "dhcp_releases_total" "counter" "Total DHCPRELEASE packets received")"$'\n'
+ metrics+="dhcp_releases_total ${releases}"$'\n'
+ fi
+}
+
+collect_kea_metrics() {
+ metrics+="$(write_metric_header "dhcp_subnet_pool_total" "gauge" "Total addresses in the pool")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_pool_active" "gauge" "Currently leased addresses")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_pool_free" "gauge" "Available addresses in the pool")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_pool_utilization" "gauge" "Pool utilization percentage (0-100)")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_pool_reserved" "gauge" "Number of static reservations")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_leases_expiring" "gauge" "Leases expiring within threshold")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_lease_longest_seconds" "gauge" "Remaining time on the longest lease")"$'\n'
+ metrics+="$(write_metric_header "dhcp_subnet_lease_shortest_seconds" "gauge" "Remaining time on the shortest lease")"$'\n'
+
+ if [ "$KEA_USE_API" = "true" ]; then
+ collect_kea_api_metrics
+ else
+ collect_kea_file_metrics
+ fi
+}
+
+collect_kea_api_metrics() {
+ local stats_json
+ stats_json=$(kea_api_call "statistic-get-all")
+
+ if [ -z "$stats_json" ]; then
+ log_warn "Kea API not responding, falling back to file mode"
+ collect_kea_file_metrics
+ return
+ fi
+
+ # Parse stats via python3
+ echo "$stats_json" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+if data[0]['result'] == 0:
+ args = data[0].get('arguments', {})
+ for key, val in args.items():
+ if val and isinstance(val, list):
+ v = val[0][0] if isinstance(val[0], list) else val[0]
+ print(f'{key}={v}')
+" 2>/dev/null | while IFS='=' read -r key value; do
+ case "$key" in
+ subnet*total-addresses*)
+ local sid="${key#subnet[}"
+ sid="${sid%%]*}"
+ metrics+="dhcp_subnet_pool_total{subnet=\"${sid}\"} ${value}"$'\n'
+ ;;
+ subnet*assigned-addresses*)
+ local sid="${key#subnet[}"
+ sid="${sid%%]*}"
+ metrics+="dhcp_subnet_pool_active{subnet=\"${sid}\"} ${value}"$'\n'
+ ;;
+ pkt4-discover-received)
+ metrics+="$(write_metric_header "dhcp_discovers_total" "counter" "Total DHCPDISCOVER packets received")"$'\n'
+ metrics+="dhcp_discovers_total ${value}"$'\n'
+ ;;
+ pkt4-offer-sent)
+ metrics+="$(write_metric_header "dhcp_offers_total" "counter" "Total DHCPOFFER packets sent")"$'\n'
+ metrics+="dhcp_offers_total ${value}"$'\n'
+ ;;
+ pkt4-request-received)
+ metrics+="$(write_metric_header "dhcp_requests_total" "counter" "Total DHCPREQUEST packets received")"$'\n'
+ metrics+="dhcp_requests_total ${value}"$'\n'
+ ;;
+ pkt4-ack-sent)
+ metrics+="$(write_metric_header "dhcp_acks_total" "counter" "Total DHCPACK packets sent")"$'\n'
+ metrics+="dhcp_acks_total ${value}"$'\n'
+ ;;
+ pkt4-nak-sent)
+ metrics+="$(write_metric_header "dhcp_naks_total" "counter" "Total DHCPNAK packets sent")"$'\n'
+ metrics+="dhcp_naks_total ${value}"$'\n'
+ ;;
+ pkt4-decline-received)
+ metrics+="$(write_metric_header "dhcp_declines_total" "counter" "Total DHCPDECLINE packets received")"$'\n'
+ metrics+="dhcp_declines_total ${value}"$'\n'
+ ;;
+ pkt4-release-received)
+ metrics+="$(write_metric_header "dhcp_releases_total" "counter" "Total DHCPRELEASE packets received")"$'\n'
+ metrics+="dhcp_releases_total ${value}"$'\n'
+ ;;
+ esac
+ done
+}
+
+collect_kea_file_metrics() {
+ local lease_data
+ lease_data=$(parse_kea_leases_file)
+ local now
+ now=$(date +%s)
+
+ # Simple lease counting from CSV
+ while read -r lease_ip remaining; do
+ [ -z "$lease_ip" ] && continue
+ total_active=$((total_active + 1))
+ done <<< "$lease_data"
+}
+
+# ============================================================================
+# OUTPUT
+# ============================================================================
+
+output_metrics() {
+ local all_metrics
+ all_metrics=$(collect_metrics)
+
+ case "$MODE" in
+ stdout)
+ echo "$all_metrics"
+ ;;
+ textfile)
+ mkdir -p "$TEXTFILE_DIR"
+ local tmp_file
+ tmp_file=$(mktemp "${TEXTFILE_DIR}/.dhcp-metrics.XXXXXX")
+ echo "$all_metrics" > "$tmp_file"
+ mv "$tmp_file" "${TEXTFILE_DIR}/dhcp-metrics.prom"
+ log_info "Wrote metrics to ${TEXTFILE_DIR}/dhcp-metrics.prom"
+ ;;
+ http)
+ run_http_server "$all_metrics"
+ ;;
+ esac
+}
+
+run_http_server() {
+ log_info "Starting HTTP server on port ${HTTP_PORT}"
+ while true; do
+ local all_metrics
+ all_metrics=$(collect_metrics)
+
+ {
+ echo -e "HTTP/1.1 200 OK\r"
+ echo -e "Content-Type: text/plain; version=0.0.4; charset=utf-8\r"
+ echo -e "Content-Length: ${#all_metrics}\r"
+ echo -e "\r"
+ echo "$all_metrics"
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null || \
+ {
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\n${all_metrics}"
+ } | nc -l "$HTTP_PORT" 2>/dev/null
+
+ if $ONCE; then
+ break
+ fi
+ done
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ parse_args "$@"
+ acquire_lock
+ detect_backend
+ log_info "Detected DHCP backend: ${DETECTED_BACKEND}"
+ output_metrics
+}
+
+main "$@"
diff --git a/directory-size-exporter.sh b/directory-size-exporter.sh
index ff25c40..0b67038 100644
--- a/directory-size-exporter.sh
+++ b/directory-size-exporter.sh
@@ -9,7 +9,7 @@
# Author: Phil Connor
# Contact: contact@mylinux.work
# License: MIT
-# Version: 1.0.0
+# Version: 1.0.1
set -euo pipefail
@@ -27,28 +27,23 @@ TARGET_DIRECTORIES=()
# ── Metrics Collection ──────────────────────────────────────────────
log_verbose() {
- [[ "$VERBOSE" == true ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
+ [[ "$VERBOSE" == true ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2 || true
}
log_info() {
- [[ "$QUIET" == false ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
+ [[ "$QUIET" == false ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2 || true
}
collect_metrics() {
local start_time
start_time=$(date +%s%N)
- echo "# HELP node_directory_size_bytes Disk space used by directory"
- echo "# TYPE node_directory_size_bytes gauge"
- echo "# HELP node_directory_filesystem_usage_percent Filesystem usage percentage for the directory mount point"
- echo "# TYPE node_directory_filesystem_usage_percent gauge"
-
local success=1
+ local size_lines="" pct_lines=""
for directory in "${TARGET_DIRECTORIES[@]}"; do
log_verbose "Running du for: $directory"
- # Get directory size in bytes
local du_output
du_output=$(timeout "$TIMEOUT" du --block-size=1 --summarize "$directory" 2>/dev/null) || {
log_info "WARNING: du failed for $directory"
@@ -58,16 +53,24 @@ collect_metrics() {
local size_bytes
size_bytes=$(echo "$du_output" | awk '{print $1}')
- echo "node_directory_size_bytes{directory=\"${directory}\"} ${size_bytes}"
+ size_lines+="node_directory_size_bytes{directory=\"${directory}\"} ${size_bytes}"$'\n'
- # Get filesystem usage percentage for the mount point
local pct
pct=$(df --output=pcent "$directory" 2>/dev/null | tail -n 1 | tr -d ' %')
if [[ "$pct" =~ ^[0-9]+$ ]]; then
- echo "node_directory_filesystem_usage_percent{directory=\"${directory}\"} ${pct}"
+ pct_lines+="node_directory_filesystem_usage_percent{directory=\"${directory}\"} ${pct}"$'\n'
fi
done
+ echo "# HELP node_directory_size_bytes Disk space used by directory"
+ echo "# TYPE node_directory_size_bytes gauge"
+ printf "%s" "$size_lines"
+
+ echo ""
+ echo "# HELP node_directory_filesystem_usage_percent Filesystem usage percentage for the directory mount point"
+ echo "# TYPE node_directory_filesystem_usage_percent gauge"
+ printf "%s" "$pct_lines"
+
# ── Script runtime ──
local end_time runtime
end_time=$(date +%s%N)
@@ -78,10 +81,12 @@ collect_metrics() {
echo "# TYPE ${EXPORTER_NAME}_duration_seconds gauge"
echo "${EXPORTER_NAME}_duration_seconds ${runtime}"
+ echo ""
echo "# HELP ${EXPORTER_NAME}_last_run_timestamp Last successful run"
echo "# TYPE ${EXPORTER_NAME}_last_run_timestamp gauge"
echo "${EXPORTER_NAME}_last_run_timestamp $(date +%s)"
+ echo ""
echo "# HELP ${EXPORTER_NAME}_success Whether the exporter ran successfully"
echo "# TYPE ${EXPORTER_NAME}_success gauge"
echo "${EXPORTER_NAME}_success ${success}"
@@ -191,8 +196,8 @@ while [[ $# -gt 0 ]]; do
shift
;;
--handle-request)
- handle_request
- exit 0
+ OUTPUT_MODE="handle-request"
+ shift
;;
-h|--help)
show_help
@@ -236,6 +241,10 @@ if [[ "$DRY_RUN" == true ]]; then
fi
case "$OUTPUT_MODE" in
+ handle-request)
+ handle_request
+ exit 0
+ ;;
stdout)
collect_metrics
;;
@@ -262,6 +271,6 @@ case "$OUTPUT_MODE" in
fi
echo "${EXPORTER_NAME} listening on port ${PORT}..."
echo "Monitoring directories: ${TARGET_DIRECTORIES[*]}"
- socat TCP-LISTEN:"$PORT",reuseaddr,fork EXEC:"$0 --handle-request"
+ socat TCP-LISTEN:"$PORT",reuseaddr,fork EXEC:"$0 --handle-request ${TARGET_DIRECTORIES[*]}"
;;
esac
diff --git a/disk-cleanup.sh b/disk-cleanup.sh
new file mode 100644
index 0000000..b80387b
--- /dev/null
+++ b/disk-cleanup.sh
@@ -0,0 +1,584 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### disk-cleanup.sh — Find and clean disk space hogs on Linux servers ####
+#### Scans logs, temp files, package caches, old kernels, journal, and Docker cruft ####
+#### Dry-run by default — nothing is deleted without --force ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./disk-cleanup.sh --scan ####
+#### ./disk-cleanup.sh --clean --force ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+LOG_AGE_DAYS="${LOG_AGE_DAYS:-30}"
+TMP_AGE_DAYS="${TMP_AGE_DAYS:-7}"
+JOURNAL_MAX="${JOURNAL_MAX:-500M}"
+LARGE_FILE_MIN="${LARGE_FILE_MIN:-100M}"
+LARGE_FILE_DIRS="${LARGE_FILE_DIRS:-/var /home /opt /tmp /srv}"
+DRY_RUN="${DRY_RUN:-true}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+RUN_MODE=""
+TOTAL_RECLAIMABLE=0
+TOTAL_CLEANED=0
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ else
+ RED="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+human_bytes() {
+ local bytes="$1"
+ if [[ "$bytes" -ge 1073741824 ]]; then
+ awk "BEGIN { printf \"%.1f GiB\", $bytes / 1073741824 }"
+ elif [[ "$bytes" -ge 1048576 ]]; then
+ awk "BEGIN { printf \"%.1f MiB\", $bytes / 1048576 }"
+ elif [[ "$bytes" -ge 1024 ]]; then
+ awk "BEGIN { printf \"%.1f KiB\", $bytes / 1024 }"
+ else
+ echo "${bytes} B"
+ fi
+}
+
+add_reclaimable() {
+ TOTAL_RECLAIMABLE=$((TOTAL_RECLAIMABLE + $1))
+}
+
+add_cleaned() {
+ TOTAL_CLEANED=$((TOTAL_CLEANED + $1))
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OLD LOGS
+# ══════════════════════════════════════════════════════════════════════
+
+scan_old_logs() {
+ section_header "Old Log Files (> ${LOG_AGE_DAYS} days)"
+
+ local total_size=0
+ local count=0
+
+ while IFS= read -r -d '' file; do
+ local size
+ size=$(stat -c%s "$file" 2>/dev/null || echo 0)
+ if [[ "$size" -gt 0 ]]; then
+ total_size=$((total_size + size))
+ ((count++)) || true
+ if [[ "$VERBOSE" == "true" ]]; then
+ printf " %10s %s\n" "$(human_bytes "$size")" "$file"
+ fi
+ fi
+ done < <(find /var/log -type f \( -name "*.gz" -o -name "*.xz" -o -name "*.bz2" -o -name "*.[0-9]" -o -name "*.old" \) -mtime +"$LOG_AGE_DAYS" -print0 2>/dev/null)
+
+ # Rotated logs without compression
+ while IFS= read -r -d '' file; do
+ local size
+ size=$(stat -c%s "$file" 2>/dev/null || echo 0)
+ if [[ "$size" -gt 0 ]]; then
+ total_size=$((total_size + size))
+ ((count++)) || true
+ fi
+ done < <(find /var/log -type f -name "*.log.*" -mtime +"$LOG_AGE_DAYS" -print0 2>/dev/null)
+
+ printf " %-30s %s (%d files)\n" "Rotated/old logs:" "$(human_bytes "$total_size")" "$count"
+ add_reclaimable "$total_size"
+}
+
+clean_old_logs() {
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY-RUN] Would delete old log files in /var/log"
+ return
+ fi
+
+ local cleaned=0
+ find /var/log -type f \( -name "*.gz" -o -name "*.xz" -o -name "*.bz2" -o -name "*.[0-9]" -o -name "*.old" \) -mtime +"$LOG_AGE_DAYS" -print0 2>/dev/null | while IFS= read -r -d '' file; do
+ local size
+ size=$(stat -c%s "$file" 2>/dev/null || echo 0)
+ rm -f "$file" && cleaned=$((cleaned + size))
+ done
+
+ find /var/log -type f -name "*.log.*" -mtime +"$LOG_AGE_DAYS" -delete 2>/dev/null || true
+ log "Cleaned old log files"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# JOURNAL
+# ══════════════════════════════════════════════════════════════════════
+
+scan_journal() {
+ section_header "Systemd Journal"
+
+ if ! command -v journalctl &>/dev/null; then
+ printf " %-30s %s\n" "Journal:" "N/A (no systemd)"
+ return
+ fi
+
+ local journal_size
+ journal_size=$(journalctl --disk-usage 2>/dev/null | grep -oP '[\d.]+[GMKT]' | head -1 || echo "0")
+
+ # Get bytes for tracking
+ local journal_bytes
+ journal_bytes=$(du -sb /var/log/journal/ 2>/dev/null | awk '{print $1}' || echo "0")
+ if [[ "$journal_bytes" -eq 0 ]]; then
+ journal_bytes=$(du -sb /run/log/journal/ 2>/dev/null | awk '{print $1}' || echo "0")
+ fi
+
+ printf " %-30s %s\n" "Journal size:" "${journal_size:-Unknown}"
+ printf " %-30s %s\n" "Would vacuum to:" "$JOURNAL_MAX"
+
+ # Estimate savings
+ local max_bytes=0
+ local max_num="${JOURNAL_MAX%[GMKT]*}"
+ local max_unit="${JOURNAL_MAX: -1}"
+ case "$max_unit" in
+ G) max_bytes=$((max_num * 1073741824)) ;;
+ M) max_bytes=$((max_num * 1048576)) ;;
+ K) max_bytes=$((max_num * 1024)) ;;
+ *) max_bytes=$((max_num)) ;;
+ esac
+
+ if [[ "$journal_bytes" -gt "$max_bytes" ]]; then
+ local savings=$((journal_bytes - max_bytes))
+ add_reclaimable "$savings"
+ printf " %-30s %s\n" "Reclaimable:" "$(human_bytes "$savings")"
+ fi
+}
+
+clean_journal() {
+ if ! command -v journalctl &>/dev/null; then
+ return
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY-RUN] Would vacuum journal to ${JOURNAL_MAX}"
+ return
+ fi
+
+ journalctl --vacuum-size="$JOURNAL_MAX" 2>/dev/null || warn "Journal vacuum failed"
+ log "Vacuumed journal to ${JOURNAL_MAX}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# TEMP FILES
+# ══════════════════════════════════════════════════════════════════════
+
+scan_tmp() {
+ section_header "Temp Files (> ${TMP_AGE_DAYS} days)"
+
+ local total_size=0
+ local count=0
+
+ for dir in /tmp /var/tmp; do
+ if [[ -d "$dir" ]]; then
+ while IFS= read -r -d '' file; do
+ local size
+ size=$(stat -c%s "$file" 2>/dev/null || echo 0)
+ total_size=$((total_size + size))
+ ((count++)) || true
+ done < <(find "$dir" -maxdepth 2 -type f -mtime +"$TMP_AGE_DAYS" -print0 2>/dev/null)
+ fi
+ done
+
+ printf " %-30s %s (%d files)\n" "Old temp files:" "$(human_bytes "$total_size")" "$count"
+ add_reclaimable "$total_size"
+}
+
+clean_tmp() {
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY-RUN] Would delete temp files older than ${TMP_AGE_DAYS} days"
+ return
+ fi
+
+ for dir in /tmp /var/tmp; do
+ if [[ -d "$dir" ]]; then
+ find "$dir" -maxdepth 2 -type f -mtime +"$TMP_AGE_DAYS" -delete 2>/dev/null || true
+ fi
+ done
+ log "Cleaned temp files"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# PACKAGE CACHE
+# ══════════════════════════════════════════════════════════════════════
+
+scan_package_cache() {
+ section_header "Package Cache"
+
+ if command -v apt-get &>/dev/null; then
+ local apt_size
+ apt_size=$(du -sb /var/cache/apt/archives/ 2>/dev/null | awk '{print $1}' || echo "0")
+ printf " %-30s %s\n" "APT cache:" "$(human_bytes "$apt_size")"
+ add_reclaimable "$apt_size"
+ fi
+
+ if command -v yum &>/dev/null || command -v dnf &>/dev/null; then
+ local yum_size
+ yum_size=$(du -sb /var/cache/yum/ /var/cache/dnf/ 2>/dev/null | awk '{total+=$1} END {print total+0}')
+ printf " %-30s %s\n" "YUM/DNF cache:" "$(human_bytes "$yum_size")"
+ add_reclaimable "$yum_size"
+ fi
+}
+
+clean_package_cache() {
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY-RUN] Would clean package cache"
+ return
+ fi
+
+ if command -v apt-get &>/dev/null; then
+ apt-get clean -y 2>/dev/null || warn "apt-get clean failed"
+ log "Cleaned APT cache"
+ fi
+
+ if command -v dnf &>/dev/null; then
+ dnf clean all 2>/dev/null || warn "dnf clean failed"
+ log "Cleaned DNF cache"
+ elif command -v yum &>/dev/null; then
+ yum clean all 2>/dev/null || warn "yum clean failed"
+ log "Cleaned YUM cache"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OLD KERNELS
+# ══════════════════════════════════════════════════════════════════════
+
+scan_old_kernels() {
+ section_header "Old Kernels"
+
+ local current_kernel
+ current_kernel=$(uname -r)
+
+ local old_count=0
+ local total_size=0
+
+ if command -v dpkg &>/dev/null; then
+ while IFS= read -r pkg; do
+ [[ -z "$pkg" ]] && continue
+ local pkg_version
+ pkg_version=$(echo "$pkg" | sed 's/linux-image-//' | sed 's/-generic//' | sed 's/-unsigned//')
+ if [[ "$current_kernel" != *"$pkg_version"* ]]; then
+ local size
+ size=$(dpkg-query -W --showformat='${Installed-Size}' "$pkg" 2>/dev/null || echo "0")
+ total_size=$((total_size + size * 1024))
+ ((old_count++)) || true
+ verbose "Old kernel: ${pkg} ($(human_bytes $((size * 1024))))"
+ fi
+ done < <(dpkg --list 'linux-image-*' 2>/dev/null | grep '^ii' | awk '{print $2}' | grep -v "$current_kernel")
+ elif command -v rpm &>/dev/null; then
+ while IFS= read -r pkg; do
+ [[ -z "$pkg" ]] && continue
+ if [[ "$pkg" != *"$current_kernel"* ]]; then
+ local size
+ size=$(rpm -q --queryformat '%{SIZE}' "$pkg" 2>/dev/null || echo "0")
+ total_size=$((total_size + size))
+ ((old_count++)) || true
+ verbose "Old kernel: ${pkg}"
+ fi
+ done < <(rpm -qa kernel 2>/dev/null)
+ fi
+
+ printf " %-30s %s\n" "Current kernel:" "$current_kernel"
+ printf " %-30s %d ($(human_bytes "$total_size"))\n" "Old kernels:" "$old_count"
+ add_reclaimable "$total_size"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# DOCKER CLEANUP
+# ══════════════════════════════════════════════════════════════════════
+
+scan_docker() {
+ if ! command -v docker &>/dev/null; then
+ return
+ fi
+
+ if ! docker info &>/dev/null 2>&1; then
+ return
+ fi
+
+ section_header "Docker"
+
+ # Dangling images
+ local dangling_count
+ dangling_count=$(docker images -f "dangling=true" -q 2>/dev/null | wc -l)
+
+ printf " %-30s %d\n" "Dangling images:" "$dangling_count"
+
+ # Stopped containers
+ local stopped_count
+ stopped_count=$(docker ps -f "status=exited" -q 2>/dev/null | wc -l)
+ printf " %-30s %d\n" "Stopped containers:" "$stopped_count"
+
+ # Unused volumes
+ local vol_count
+ vol_count=$(docker volume ls -f "dangling=true" -q 2>/dev/null | wc -l)
+ printf " %-30s %d\n" "Unused volumes:" "$vol_count"
+
+ # Build cache
+ if docker builder prune --dry-run 2>/dev/null | grep -q "Total:"; then
+ local build_cache
+ build_cache=$(docker builder prune --dry-run 2>/dev/null | grep "Total:" | awk '{print $2}')
+ printf " %-30s %s\n" "Build cache:" "${build_cache:-0}"
+ fi
+
+ # Docker system df
+ echo ""
+ docker system df 2>/dev/null | while IFS= read -r line; do
+ printf " %s\n" "$line"
+ done
+}
+
+clean_docker() {
+ if ! command -v docker &>/dev/null || ! docker info &>/dev/null 2>&1; then
+ return
+ fi
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "[DRY-RUN] Would prune Docker system (stopped containers, dangling images, unused networks, build cache)"
+ return
+ fi
+
+ docker system prune -f 2>/dev/null || warn "Docker prune failed"
+ log "Pruned Docker system"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LARGE FILES
+# ══════════════════════════════════════════════════════════════════════
+
+scan_large_files() {
+ section_header "Large Files (> ${LARGE_FILE_MIN})"
+
+ printf " ${BOLD}%-12s %s${RESET}\n" "SIZE" "FILE"
+ printf " %s\n" "$(printf '%.0s─' {1..70})"
+
+ local count=0
+ for dir in $LARGE_FILE_DIRS; do
+ [[ -d "$dir" ]] || continue
+ find "$dir" -xdev -type f -size +"$LARGE_FILE_MIN" -printf '%s %p\n' 2>/dev/null
+ done | sort -rn | head -20 | while IFS=' ' read -r size path; do
+ printf " %10s %s\n" "$(human_bytes "$size")" "$path"
+ ((count++)) || true
+ done
+
+ if [[ "$count" -eq 0 ]]; then
+ echo " No files larger than ${LARGE_FILE_MIN} found"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+
+print_summary() {
+ echo ""
+ echo -e " ${BOLD}══════════════════════════════════════════${RESET}"
+ echo -e " ${BOLD}Disk Cleanup Summary${RESET}"
+ echo -e " ${BOLD}══════════════════════════════════════════${RESET}"
+
+ # Current disk usage
+ local root_pct
+ root_pct=$(df / 2>/dev/null | tail -1 | awk '{print $5}' | tr -d '%')
+ printf " %-20s %s%%\n" "Root disk usage:" "${root_pct:-?}"
+ printf " %-20s %s\n" "Reclaimable:" "$(human_bytes "$TOTAL_RECLAIMABLE")"
+
+ if [[ "$TOTAL_CLEANED" -gt 0 ]]; then
+ printf " %-20s %s\n" "Cleaned:" "$(human_bytes "$TOTAL_CLEANED")"
+ fi
+
+ if [[ "$DRY_RUN" == "true" && "$RUN_MODE" == *"clean"* ]]; then
+ echo ""
+ echo -e " ${YELLOW}Dry-run mode — nothing was deleted${RESET}"
+ echo -e " Run with --force to actually clean"
+ fi
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2
+ exit 1 ;;
+ esac
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+
+ echo ""
+ echo -e "${BOLD}Disk Cleanup — $(hostname -f 2>/dev/null || hostname)${RESET}"
+ echo -e "Mode: ${RUN_MODE}"
+ if [[ "$RUN_MODE" == "clean" ]]; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo -e "Safety: ${YELLOW}dry-run (use --force to delete)${RESET}"
+ else
+ echo -e "Safety: ${RED}LIVE — files will be deleted${RESET}"
+ fi
+ fi
+ echo -e "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+
+ case "$RUN_MODE" in
+ scan)
+ scan_old_logs
+ scan_journal
+ scan_tmp
+ scan_package_cache
+ scan_old_kernels
+ scan_docker
+ scan_large_files
+ print_summary
+ ;;
+ clean)
+ scan_old_logs
+ clean_old_logs
+ scan_journal
+ clean_journal
+ scan_tmp
+ clean_tmp
+ scan_package_cache
+ clean_package_cache
+ scan_docker
+ clean_docker
+ scan_large_files
+ print_summary
+ ;;
+ large-files)
+ scan_large_files
+ ;;
+ esac
+}
+
+main "$@"
diff --git a/disk-usage-reporter.sh b/disk-usage-reporter.sh
new file mode 100755
index 0000000..669bc88
--- /dev/null
+++ b/disk-usage-reporter.sh
@@ -0,0 +1,451 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### disk-usage-reporter.sh — Find what's consuming disk space ####
+#### Scans filesystems, ranks largest directories and files, flags old data ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./disk-usage-reporter.sh ####
+#### ./disk-usage-reporter.sh --path /var ####
+#### ./disk-usage-reporter.sh --top 50 --min-size 100M ####
+#### ./disk-usage-reporter.sh --json ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# DEFAULTS
+# ============================================================================
+
+SCAN_PATH="/"
+TOP_N=20
+MIN_SIZE="1M"
+MAX_DEPTH=3
+AGE_WARN=90
+JSON_MODE=false
+NO_COLOR=false
+VERSION="1.00"
+
+# Colors
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+GREEN='\033[0;32m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+SCRIPT_NAME="$(basename "$0")"
+
+# ============================================================================
+# USAGE & ARGUMENT PARSING
+# ============================================================================
+
+show_usage() {
+ cat <= 100M
+ ${SCRIPT_NAME} --path /home --age-warn 365 # Flag files older than 1 year
+ ${SCRIPT_NAME} --json # JSON output for scripting
+
+EOF
+ exit 0
+}
+
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ -h|--help) show_usage ;;
+ --path) SCAN_PATH="$2"; shift 2 ;;
+ --top) TOP_N="$2"; shift 2 ;;
+ --min-size) MIN_SIZE="$2"; shift 2 ;;
+ --max-depth) MAX_DEPTH="$2"; shift 2 ;;
+ --age-warn) AGE_WARN="$2"; shift 2 ;;
+ --json) JSON_MODE=true; shift ;;
+ --no-color) NO_COLOR=true; shift ;;
+ *) echo "Unknown option: $1" >&2; exit 1 ;;
+ esac
+ done
+ if [[ "$NO_COLOR" == true ]]; then
+ RED="" YELLOW="" GREEN="" CYAN="" BOLD="" NC=""
+ fi
+}
+
+# ============================================================================
+# HELPERS
+# ============================================================================
+
+header() {
+ echo ""
+ echo -e "${CYAN}====================================================${NC}"
+ echo -e "${CYAN} ${BOLD}${1}${NC}"
+ echo -e "${CYAN}====================================================${NC}"
+ echo ""
+}
+
+format_bytes() {
+ local b="$1"
+ if [[ "$b" -ge 1073741824 ]]; then
+ awk "BEGIN {printf \"%.2f GB\", $b/1073741824}"
+ elif [[ "$b" -ge 1048576 ]]; then
+ awk "BEGIN {printf \"%.1f MB\", $b/1048576}"
+ elif [[ "$b" -ge 1024 ]]; then
+ awk "BEGIN {printf \"%.1f KB\", $b/1024}"
+ else
+ echo "${b} B"
+ fi
+}
+
+fmt_num() {
+ printf "%'d" "$1" 2>/dev/null || echo "$1"
+}
+
+# Convert human-readable size (1M, 500K, 2G) to find -size argument
+parse_min_size() {
+ echo "${MIN_SIZE}"
+}
+
+# Convert human-readable size to bytes for comparison
+size_to_bytes() {
+ local size="$1"
+ local num unit
+ num="$(echo "$size" | sed 's/[^0-9.]//g')"
+ unit="$(echo "$size" | sed 's/[0-9.]//g' | tr '[:lower:]' '[:upper:]')"
+ case "$unit" in
+ K) awk "BEGIN {printf \"%d\", $num * 1024}" ;;
+ M) awk "BEGIN {printf \"%d\", $num * 1048576}" ;;
+ G) awk "BEGIN {printf \"%d\", $num * 1073741824}" ;;
+ T) awk "BEGIN {printf \"%d\", $num * 1099511627776}" ;;
+ *) echo "$num" ;;
+ esac
+}
+
+# ============================================================================
+# FILESYSTEM OVERVIEW
+# ============================================================================
+
+filesystem_overview() {
+ header "Filesystem Overview"
+
+ printf " ${BOLD}%-30s %6s %6s %6s %5s %-20s${NC}\n" \
+ "Filesystem" "Size" "Used" "Avail" "Use%" "Mounted on"
+ echo " ────────────────────────────────────────────────────────────────────────────────────"
+
+ df -hP -x tmpfs -x devtmpfs -x squashfs 2>/dev/null | tail -n +2 | while IFS= read -r line; do
+ local fs size used avail pct mount
+ fs="$(echo "$line" | awk '{print $1}')"
+ size="$(echo "$line" | awk '{print $2}')"
+ used="$(echo "$line" | awk '{print $3}')"
+ avail="$(echo "$line" | awk '{print $4}')"
+ pct="$(echo "$line" | awk '{print $5}')"
+ mount="$(echo "$line" | awk '{print $6}')"
+
+ local pct_num="${pct%\%}"
+ local color=""
+ if [[ "$pct_num" -ge 90 ]]; then
+ color="${RED}"
+ elif [[ "$pct_num" -ge 80 ]]; then
+ color="${YELLOW}"
+ else
+ color="${GREEN}"
+ fi
+
+ printf " ${color}%-30s %6s %6s %6s %5s %-20s${NC}\n" \
+ "$fs" "$size" "$used" "$avail" "$pct" "$mount"
+ done
+}
+
+# ============================================================================
+# TOP DIRECTORIES BY SIZE
+# ============================================================================
+
+top_directories() {
+ header "Top ${TOP_N} Directories by Size"
+
+ printf " ${BOLD}%4s %-60s %10s${NC}\n" "#" "Directory" "Size"
+ echo " ────────────────────────────────────────────────────────────────────────────────────"
+
+ du -x --max-depth="${MAX_DEPTH}" "${SCAN_PATH}" 2>/dev/null \
+ | sort -rn \
+ | head -n "${TOP_N}" \
+ | while IFS=$'\t' read -r size_kb dir; do
+ local num
+ num=$((COUNTER + 1))
+ COUNTER=$num
+ local size_bytes=$((size_kb * 1024))
+ local hsize
+ hsize="$(format_bytes "$size_bytes")"
+
+ local color="${NC}"
+ if [[ "$size_bytes" -ge 10737418240 ]]; then
+ color="${RED}"
+ elif [[ "$size_bytes" -ge 1073741824 ]]; then
+ color="${YELLOW}"
+ fi
+
+ printf " ${color}%4d %-60s %10s${NC}\n" "$num" "$dir" "$hsize"
+ done
+}
+
+# ============================================================================
+# TOP FILES BY SIZE
+# ============================================================================
+
+top_files() {
+ header "Top ${TOP_N} Files by Size"
+
+ printf " ${BOLD}%4s %-60s %10s${NC}\n" "#" "File" "Size"
+ echo " ────────────────────────────────────────────────────────────────────────────────────"
+
+ find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -printf '%s\t%p\n' 2>/dev/null \
+ | sort -rn \
+ | head -n "${TOP_N}" \
+ | awk -v idx=0 '{idx++; print idx"\t"$1"\t"$2}' \
+ | while IFS=$'\t' read -r num size_bytes filepath; do
+ local hsize
+ hsize="$(format_bytes "$size_bytes")"
+
+ local color="${NC}"
+ if [[ "$size_bytes" -ge 1073741824 ]]; then
+ color="${RED}"
+ elif [[ "$size_bytes" -ge 104857600 ]]; then
+ color="${YELLOW}"
+ fi
+
+ printf " ${color}%4d %-60s %10s${NC}\n" "$num" "$filepath" "$hsize"
+ done
+}
+
+# ============================================================================
+# OLD LARGE FILES
+# ============================================================================
+
+old_large_files() {
+ header "Old Large Files (> ${MIN_SIZE}, older than ${AGE_WARN} days)"
+
+ printf " ${BOLD}%4s %-50s %10s %12s${NC}\n" "#" "File" "Size" "Last Modified"
+ echo " ────────────────────────────────────────────────────────────────────────────────────"
+
+ OLD_FILES_DATA="$(find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -mtime +"${AGE_WARN}" \
+ -printf '%s\t%T+\t%p\n' 2>/dev/null \
+ | sort -rn \
+ | head -n "${TOP_N}")"
+
+ OLD_FILE_COUNT=0
+ OLD_FILE_BYTES=0
+
+ if [[ -z "$OLD_FILES_DATA" ]]; then
+ echo " No files found matching criteria."
+ return
+ fi
+
+ echo "$OLD_FILES_DATA" | awk -v idx=0 '{idx++; print idx"\t"$0}' \
+ | while IFS=$'\t' read -r num size_bytes mtime filepath; do
+ OLD_FILE_COUNT=$((OLD_FILE_COUNT + 1))
+ OLD_FILE_BYTES=$((OLD_FILE_BYTES + size_bytes))
+
+ local hsize mdate
+ hsize="$(format_bytes "$size_bytes")"
+ mdate="$(echo "$mtime" | cut -d'+' -f1)"
+
+ printf " ${YELLOW}%4d %-50s %10s %12s${NC}\n" "$num" "$filepath" "$hsize" "$mdate"
+ done
+}
+
+# ============================================================================
+# SUMMARY
+# ============================================================================
+
+compute_summary() {
+ local total_scanned old_count old_bytes
+
+ total_scanned="$(du -sx "${SCAN_PATH}" 2>/dev/null | awk '{print $1}')"
+ total_scanned=$((total_scanned * 1024))
+
+ old_bytes="$(find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -mtime +"${AGE_WARN}" \
+ -printf '%s\n' 2>/dev/null | awk '{s+=$1} END {print s+0}')"
+ old_count="$(find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -mtime +"${AGE_WARN}" \
+ 2>/dev/null | wc -l)"
+
+ echo "$total_scanned" "$old_count" "$old_bytes"
+}
+
+print_summary() {
+ header "Summary"
+
+ local data total_scanned old_count old_bytes
+ data="$(compute_summary)"
+ total_scanned="$(echo "$data" | awk '{print $1}')"
+ old_count="$(echo "$data" | awk '{print $2}')"
+ old_bytes="$(echo "$data" | awk '{print $3}')"
+
+ echo -e " ${BOLD}Scan path:${NC} ${SCAN_PATH}"
+ echo -e " ${BOLD}Total scanned:${NC} $(format_bytes "$total_scanned")"
+ echo -e " ${BOLD}Min file size:${NC} ${MIN_SIZE}"
+ echo -e " ${BOLD}Age threshold:${NC} ${AGE_WARN} days"
+ echo ""
+ echo -e " ${BOLD}Old large files:${NC} $(fmt_num "$old_count") files"
+ echo -e " ${BOLD}Reclaimable space:${NC} ${YELLOW}$(format_bytes "$old_bytes")${NC}"
+ echo ""
+
+ if [[ "$old_bytes" -gt 0 ]]; then
+ echo -e " ${YELLOW}→ Review old files above — candidates for cleanup or archival${NC}"
+ else
+ echo -e " ${GREEN}✓ No old large files found${NC}"
+ fi
+ echo ""
+}
+
+# ============================================================================
+# JSON OUTPUT
+# ============================================================================
+
+json_output() {
+ local total_scanned old_count old_bytes
+ local data
+ data="$(compute_summary)"
+ total_scanned="$(echo "$data" | awk '{print $1}')"
+ old_count="$(echo "$data" | awk '{print $2}')"
+ old_bytes="$(echo "$data" | awk '{print $3}')"
+
+ echo "{"
+ echo " \"scan_path\": \"${SCAN_PATH}\","
+ echo " \"timestamp\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\","
+ echo " \"min_size\": \"${MIN_SIZE}\","
+ echo " \"age_warn_days\": ${AGE_WARN},"
+ echo " \"max_depth\": ${MAX_DEPTH},"
+
+ # Filesystems
+ echo " \"filesystems\": ["
+ local fs_first=true
+ df -hP -x tmpfs -x devtmpfs -x squashfs 2>/dev/null | tail -n +2 | while IFS= read -r line; do
+ local fs size used avail pct mount
+ fs="$(echo "$line" | awk '{print $1}')"
+ size="$(echo "$line" | awk '{print $2}')"
+ used="$(echo "$line" | awk '{print $3}')"
+ avail="$(echo "$line" | awk '{print $4}')"
+ pct="$(echo "$line" | awk '{print $5}')"
+ mount="$(echo "$line" | awk '{print $6}')"
+ if [[ "$fs_first" == true ]]; then
+ fs_first=false
+ else
+ echo ","
+ fi
+ printf ' {"filesystem":"%s","size":"%s","used":"%s","avail":"%s","use_pct":"%s","mount":"%s"}' \
+ "$fs" "$size" "$used" "$avail" "$pct" "$mount"
+ done
+ echo ""
+ echo " ],"
+
+ # Top directories
+ echo " \"top_directories\": ["
+ local dir_first=true
+ du -x --max-depth="${MAX_DEPTH}" "${SCAN_PATH}" 2>/dev/null \
+ | sort -rn | head -n "${TOP_N}" \
+ | while IFS=$'\t' read -r size_kb dir; do
+ if [[ "$dir_first" == true ]]; then
+ dir_first=false
+ else
+ echo ","
+ fi
+ printf ' {"path":"%s","size_bytes":%d}' "$dir" "$((size_kb * 1024))"
+ done
+ echo ""
+ echo " ],"
+
+ # Top files
+ echo " \"top_files\": ["
+ local file_first=true
+ find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -printf '%s\t%p\n' 2>/dev/null \
+ | sort -rn | head -n "${TOP_N}" \
+ | while IFS=$'\t' read -r size_bytes filepath; do
+ if [[ "$file_first" == true ]]; then
+ file_first=false
+ else
+ echo ","
+ fi
+ printf ' {"path":"%s","size_bytes":%d}' "$filepath" "$size_bytes"
+ done
+ echo ""
+ echo " ],"
+
+ # Old files
+ echo " \"old_large_files\": ["
+ local old_first=true
+ find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -mtime +"${AGE_WARN}" \
+ -printf '%s\t%T+\t%p\n' 2>/dev/null \
+ | sort -rn | head -n "${TOP_N}" \
+ | while IFS=$'\t' read -r size_bytes mtime filepath; do
+ local mdate
+ mdate="$(echo "$mtime" | cut -d'+' -f1)"
+ if [[ "$old_first" == true ]]; then
+ old_first=false
+ else
+ echo ","
+ fi
+ printf ' {"path":"%s","size_bytes":%d,"last_modified":"%s"}' "$filepath" "$size_bytes" "$mdate"
+ done
+ echo ""
+ echo " ],"
+
+ # Summary
+ echo " \"summary\": {"
+ echo " \"total_scanned_bytes\": ${total_scanned},"
+ echo " \"old_file_count\": ${old_count},"
+ echo " \"reclaimable_bytes\": ${old_bytes}"
+ echo " }"
+ echo "}"
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ if [[ ! -d "$SCAN_PATH" ]]; then
+ echo -e "${RED}[ERROR]${NC} Path does not exist: ${SCAN_PATH}" >&2
+ exit 1
+ fi
+
+ if [[ "$JSON_MODE" == true ]]; then
+ json_output
+ exit 0
+ fi
+
+ echo ""
+ echo -e "${BOLD}Disk Usage Report${NC}"
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S %Z') — Scanning: ${SCAN_PATH}"
+
+ COUNTER=0
+ filesystem_overview
+ top_directories
+ top_files
+ old_large_files
+ print_summary
+}
+
+main "$@"
diff --git a/dns-lookup.sh b/dns-lookup.sh
new file mode 100644
index 0000000..fa7188a
--- /dev/null
+++ b/dns-lookup.sh
@@ -0,0 +1,429 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### dns-lookup.sh — Batch DNS lookups with record comparison across servers ####
+#### Query multiple record types and compare results across DNS servers ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./dns-lookup.sh example.com google.com ####
+#### ./dns-lookup.sh --type MX --servers 8.8.8.8,1.1.1.1 example.com ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+DNS_TIMEOUT="${DNS_TIMEOUT:-5}"
+RECORD_TYPE="${RECORD_TYPE:-A}"
+DNS_SERVERS=""
+COMPARE="${COMPARE:-false}"
+DOMAIN_FILE=""
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+DOMAINS=()
+COUNT_TOTAL=0
+COUNT_SUCCESS=0
+COUNT_FAILED=0
+COUNT_MISMATCH=0
+DIG_CMD=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${CYAN}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# DNS QUERY
+# ══════════════════════════════════════════════════════════════════════
+
+detect_dns_tool() {
+ if command -v dig &>/dev/null; then
+ DIG_CMD="dig"
+ elif command -v nslookup &>/dev/null; then
+ DIG_CMD="nslookup"
+ else
+ err "Neither dig nor nslookup found. Install dnsutils or bind-utils."
+ exit 1
+ fi
+ verbose "Using DNS tool: ${DIG_CMD}"
+}
+
+query_dig() {
+ local domain="$1"
+ local rtype="$2"
+ local server="${3:-}"
+
+ local cmd_args=()
+ if [[ -n "$server" ]]; then
+ cmd_args+=("@${server}")
+ fi
+ cmd_args+=("$domain" "$rtype" "+short" "+time=${DNS_TIMEOUT}" "+tries=1")
+
+ verbose "dig ${cmd_args[*]}"
+ dig "${cmd_args[@]}" 2>/dev/null || echo ""
+}
+
+query_nslookup() {
+ local domain="$1"
+ local rtype="$2"
+ local server="${3:-}"
+
+ local result
+ if [[ -n "$server" ]]; then
+ result=$(nslookup -type="$rtype" -timeout="$DNS_TIMEOUT" "$domain" "$server" 2>/dev/null) || result=""
+ else
+ result=$(nslookup -type="$rtype" -timeout="$DNS_TIMEOUT" "$domain" 2>/dev/null) || result=""
+ fi
+
+ # Parse nslookup output — extract answer lines
+ echo "$result" | awk '/^Name:|^Address:|answer:/{found=1} found && /^[^ \t]/' | grep -v "^Server:" | grep -v "^Name:" | awk '{print $NF}'
+}
+
+do_query() {
+ local domain="$1"
+ local rtype="$2"
+ local server="${3:-}"
+
+ if [[ "$DIG_CMD" == "dig" ]]; then
+ query_dig "$domain" "$rtype" "$server"
+ else
+ query_nslookup "$domain" "$rtype" "$server"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LOOKUP LOGIC
+# ══════════════════════════════════════════════════════════════════════
+
+lookup_single() {
+ local domain="$1"
+ local rtype="$2"
+ local server="${3:-system resolver}"
+ local server_arg="${3:-}"
+
+ COUNT_TOTAL=$((COUNT_TOTAL + 1))
+
+ local result
+ result=$(do_query "$domain" "$rtype" "$server_arg")
+
+ if [[ -z "$result" ]]; then
+ COUNT_FAILED=$((COUNT_FAILED + 1))
+ printf " %b%-30s %-6s %-18s %s%b\n" "$RED" "$domain" "$rtype" "$server" "NO RECORDS" "$RESET"
+ return
+ fi
+
+ COUNT_SUCCESS=$((COUNT_SUCCESS + 1))
+
+ # Get TTL if using dig
+ local ttl="--"
+ if [[ "$DIG_CMD" == "dig" && -n "$server_arg" ]]; then
+ ttl=$(dig "@${server_arg}" "$domain" "$rtype" +noall +answer +time="${DNS_TIMEOUT}" +tries=1 2>/dev/null \
+ | awk '{print $2}' | head -1 || echo "--")
+ elif [[ "$DIG_CMD" == "dig" ]]; then
+ ttl=$(dig "$domain" "$rtype" +noall +answer +time="${DNS_TIMEOUT}" +tries=1 2>/dev/null \
+ | awk '{print $2}' | head -1 || echo "--")
+ fi
+
+ while IFS= read -r value; do
+ [[ -z "$value" ]] && continue
+ printf " %-30s %-6s %-8s %-18s %s\n" "$domain" "$rtype" "$ttl" "$server" "$value"
+ # Only print domain on first line
+ domain=""
+ ttl=""
+ done <<< "$result"
+}
+
+lookup_compare() {
+ local domain="$1"
+ local rtype="$2"
+ local -a servers_arr
+
+ IFS=',' read -ra servers_arr <<< "$DNS_SERVERS"
+
+ if [[ ${#servers_arr[@]} -lt 2 ]]; then
+ warn "Compare mode requires at least 2 DNS servers (use --servers)"
+ return
+ fi
+
+ local -a all_results=()
+ local first_result=""
+
+ for server in "${servers_arr[@]}"; do
+ COUNT_TOTAL=$((COUNT_TOTAL + 1))
+
+ local result
+ result=$(do_query "$domain" "$rtype" "$server" | sort)
+
+ if [[ -z "$result" ]]; then
+ COUNT_FAILED=$((COUNT_FAILED + 1))
+ printf " %b%-30s %-6s %-18s %s%b\n" "$RED" "$domain" "$rtype" "$server" "NO RECORDS" "$RESET"
+ all_results+=("FAILED")
+ continue
+ fi
+
+ COUNT_SUCCESS=$((COUNT_SUCCESS + 1))
+ all_results+=("$result")
+
+ if [[ -z "$first_result" ]]; then
+ first_result="$result"
+ fi
+
+ while IFS= read -r value; do
+ [[ -z "$value" ]] && continue
+ printf " %-30s %-6s %-18s %s\n" "$domain" "$rtype" "$server" "$value"
+ domain=""
+ done <<< "$result"
+ done
+
+ # Check for mismatches
+ local mismatch=false
+ for r in "${all_results[@]}"; do
+ if [[ "$r" != "$first_result" && "$r" != "FAILED" && "$first_result" != "FAILED" ]]; then
+ mismatch=true
+ break
+ fi
+ done
+
+ if [[ "$mismatch" == "true" ]]; then
+ COUNT_MISMATCH=$((COUNT_MISMATCH + 1))
+ printf " %b ⚠ MISMATCH across servers for %s%b\n" "$RED" "$1" "$RESET"
+ else
+ printf " %b ✓ Consistent across servers for %s%b\n" "$GREEN" "$1" "$RESET"
+ fi
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# INPUT PARSING
+# ══════════════════════════════════════════════════════════════════════
+
+parse_domain() {
+ local entry="$1"
+ entry=$(echo "$entry" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+ [[ -z "$entry" || "$entry" == \#* ]] && return
+ DOMAINS+=("$entry")
+}
+
+load_domains_from_file() {
+ local file="$1"
+ if [[ ! -f "$file" ]]; then
+ err "File not found: $file"
+ exit 1
+ fi
+ while IFS= read -r line; do
+ parse_domain "$line"
+ done < "$file"
+}
+
+load_domains_from_stdin() {
+ while IFS= read -r line; do
+ parse_domain "$line"
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2
+ exit 1 ;;
+ *)
+ parse_domain "$1"; shift ;;
+ esac
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+ detect_dns_tool
+
+ # Load domains from file if specified
+ if [[ -n "$DOMAIN_FILE" ]]; then
+ load_domains_from_file "$DOMAIN_FILE"
+ fi
+
+ # Load from stdin if no domains yet and stdin is not a terminal
+ if [[ ${#DOMAINS[@]} -eq 0 ]] && ! [[ -t 0 ]]; then
+ load_domains_from_stdin
+ fi
+
+ if [[ ${#DOMAINS[@]} -eq 0 ]]; then
+ err "No domains specified"
+ echo "Run ${SCRIPT_NAME} --help for usage" >&2
+ exit 1
+ fi
+
+ # Validate record type
+ case "$RECORD_TYPE" in
+ A|AAAA|MX|NS|TXT|CNAME|SOA|PTR) ;;
+ *)
+ err "Unsupported record type: ${RECORD_TYPE}"
+ exit 1 ;;
+ esac
+
+ echo ""
+ echo -e "${BOLD}DNS Lookup — ${RECORD_TYPE} Records${RESET}"
+ echo -e "${DIM}$(date '+%Y-%m-%d %H:%M:%S %Z')${RESET}"
+ echo -e "${DIM}Tool: ${DIG_CMD} | Timeout: ${DNS_TIMEOUT}s${RESET}"
+
+ section_header "Results"
+
+ if [[ "$COMPARE" == "true" ]]; then
+ printf " ${BOLD}%-30s %-6s %-18s %s${RESET}\n" "DOMAIN" "TYPE" "SERVER" "VALUE"
+ printf " %s\n" "$(printf '%.0s─' {1..85})"
+
+ for domain in "${DOMAINS[@]}"; do
+ lookup_compare "$domain" "$RECORD_TYPE"
+ done
+ else
+ # Determine servers to query
+ local -a servers_list
+ if [[ -n "$DNS_SERVERS" ]]; then
+ IFS=',' read -ra servers_list <<< "$DNS_SERVERS"
+ else
+ servers_list=("")
+ fi
+
+ printf " ${BOLD}%-30s %-6s %-8s %-18s %s${RESET}\n" "DOMAIN" "TYPE" "TTL" "SERVER" "VALUE"
+ printf " %s\n" "$(printf '%.0s─' {1..90})"
+
+ for domain in "${DOMAINS[@]}"; do
+ for server in "${servers_list[@]}"; do
+ lookup_single "$domain" "$RECORD_TYPE" "$server"
+ done
+ done
+ fi
+
+ section_header "Summary"
+ field "Total lookups:" "$COUNT_TOTAL"
+ field_color "Successful:" "${GREEN}${COUNT_SUCCESS}${RESET}"
+ if [[ "$COUNT_FAILED" -gt 0 ]]; then
+ field_color "Failed:" "${RED}${COUNT_FAILED}${RESET}"
+ else
+ field "Failed:" "$COUNT_FAILED"
+ fi
+ if [[ "$COMPARE" == "true" ]]; then
+ if [[ "$COUNT_MISMATCH" -gt 0 ]]; then
+ field_color "Mismatches:" "${RED}${COUNT_MISMATCH}${RESET}"
+ else
+ field_color "Mismatches:" "${GREEN}0${RESET}"
+ fi
+ fi
+
+ echo ""
+}
+
+main "$@"
diff --git a/dns-propagation-checker.sh b/dns-propagation-checker.sh
new file mode 100755
index 0000000..0cc4ba1
--- /dev/null
+++ b/dns-propagation-checker.sh
@@ -0,0 +1,350 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### dns-propagation-checker.sh — Check DNS propagation across public resolvers ####
+#### Queries Cloudflare, Google, Quad9, OpenDNS, compares results ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./dns-propagation-checker.sh example.com ####
+#### ./dns-propagation-checker.sh example.com --type MX ####
+#### ./dns-propagation-checker.sh example.com --watch 30 ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+DOMAIN=""
+RECORD_TYPE="A"
+TIMEOUT=5
+COLOR="auto"
+JSON_OUTPUT="false"
+WATCH_INTERVAL=0
+EXPECTED=""
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+
+# ── Built-in Resolvers ───────────────────────────────────────────────
+RESOLVER_NAMES=("Cloudflare" "Google" "Quad9" "OpenDNS" "Cloudflare-2" "Google-2")
+RESOLVER_IPS=("1.1.1.1" "8.8.8.8" "9.9.9.9" "208.67.222.222" "1.0.0.1" "8.8.4.4")
+CUSTOM_RESOLVERS=()
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2
+ exit 1 ;;
+ *)
+ if [[ -z "$DOMAIN" ]]; then
+ DOMAIN="$1"
+ else
+ err "Unexpected argument: $1"
+ exit 1
+ fi
+ shift ;;
+ esac
+ done
+
+ if [[ -z "$DOMAIN" ]]; then
+ err "Domain name is required"
+ echo "Run ${SCRIPT_NAME} --help for usage" >&2
+ exit 1
+ fi
+
+ local valid_types="A AAAA MX CNAME TXT NS SOA PTR"
+ if [[ ! " $valid_types " =~ " $RECORD_TYPE " ]]; then
+ err "Invalid record type: $RECORD_TYPE"
+ exit 1
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# DNS QUERY
+# ══════════════════════════════════════════════════════════════════════
+
+query_resolver() {
+ local resolver_ip="$1" domain="$2" rtype="$3" timeout="$4"
+ local output ttl_output answer ttl
+
+ output=$(dig +time="$timeout" +tries=1 +short "@${resolver_ip}" "$domain" "$rtype" 2>/dev/null) || true
+ ttl_output=$(dig +time="$timeout" +tries=1 +noall +answer "@${resolver_ip}" "$domain" "$rtype" 2>/dev/null) || true
+ answer=$(echo "$output" | tr '\n' ' ' | sed 's/ *$//')
+ ttl=$(echo "$ttl_output" | awk 'NR==1{print $2}')
+
+ if [[ -z "$answer" ]]; then
+ echo "FAIL||"; return
+ fi
+ echo "${answer}|${ttl:-?}|"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAJORITY ANSWER
+# ══════════════════════════════════════════════════════════════════════
+
+find_majority() {
+ local -n answers_ref=$1
+ local -A counts
+ local max_count=0 majority=""
+ for answer in "${answers_ref[@]}"; do
+ [[ "$answer" == "FAIL" ]] && continue
+ counts["$answer"]=$(( ${counts["$answer"]:-0} + 1 ))
+ if [[ ${counts["$answer"]} -gt $max_count ]]; then
+ max_count=${counts["$answer"]}; majority="$answer"
+ fi
+ done
+ echo "$majority"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# RUN CHECK
+# ══════════════════════════════════════════════════════════════════════
+
+run_check() {
+ local all_names=("${RESOLVER_NAMES[@]}")
+ local all_ips=("${RESOLVER_IPS[@]}")
+
+ for custom in "${CUSTOM_RESOLVERS[@]}"; do
+ all_names+=("Custom-${custom}")
+ all_ips+=("$custom")
+ done
+
+ local total=${#all_names[@]}
+ local answers=() ttls=()
+
+ for i in $(seq 0 $(( total - 1 ))); do
+ local result
+ result=$(query_resolver "${all_ips[$i]}" "$DOMAIN" "$RECORD_TYPE" "$TIMEOUT")
+ answers+=("$(echo "$result" | cut -d'|' -f1)")
+ ttls+=("$(echo "$result" | cut -d'|' -f2)")
+ done
+
+ local majority
+ majority=$(find_majority answers)
+ local compare_to="${EXPECTED:-$majority}"
+ local agree_count=0 statuses=()
+
+ for i in $(seq 0 $(( total - 1 ))); do
+ if [[ "${answers[$i]}" == "FAIL" ]]; then
+ statuses+=("FAIL")
+ elif [[ "${answers[$i]}" == "$compare_to" ]]; then
+ statuses+=("MATCH"); agree_count=$((agree_count + 1))
+ else
+ statuses+=("MISMATCH")
+ fi
+ done
+
+ if [[ "$JSON_OUTPUT" == "true" ]]; then
+ print_json all_names all_ips answers ttls statuses "$agree_count" "$total" "$majority"
+ else
+ print_table all_names all_ips answers ttls statuses "$agree_count" "$total" "$majority" "$compare_to"
+ fi
+ [[ "$agree_count" -eq "$total" ]] && return 0 || return 1
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OUTPUT: TABLE
+# ══════════════════════════════════════════════════════════════════════
+
+print_table() {
+ local -n names_ref=$1 ips_ref=$2 ans_ref=$3 ttl_ref=$4 stat_ref=$5
+ local agree="$6" total="$7" majority="$8" compare="$9"
+
+ echo ""
+ echo -e "${BOLD}DNS Propagation Check — ${DOMAIN} (${RECORD_TYPE})${RESET}"
+ echo -e "${DIM}$(date -u '+%Y-%m-%d %H:%M:%S UTC')${RESET}"
+ echo ""
+ printf " ${BOLD}%-20s %-17s %-22s %-6s %s${RESET}\n" "RESOLVER" "IP" "RESULT" "TTL" "STATUS"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ local count=${#names_ref[@]}
+ for i in $(seq 0 $(( count - 1 ))); do
+ local color status_str
+ case "${stat_ref[$i]}" in
+ MATCH) color="$GREEN"; status_str="MATCH" ;;
+ MISMATCH) color="$YELLOW"; status_str="MISMATCH" ;;
+ FAIL) color="$RED"; status_str="FAIL" ;;
+ esac
+
+ local display_answer="${ans_ref[$i]}"
+ if [[ ${#display_answer} -gt 20 ]]; then
+ display_answer="${display_answer:0:17}..."
+ fi
+
+ printf " %-20s %-17s %b%-22s%b %-6s %b%s%b\n" \
+ "${names_ref[$i]}" \
+ "${ips_ref[$i]}" \
+ "$color" "$display_answer" "$RESET" \
+ "${ttl_ref[$i]}" \
+ "$color" "$status_str" "$RESET"
+ done
+
+ echo ""
+ echo -e " ${BOLD}Summary${RESET}"
+ if [[ -n "$EXPECTED" ]]; then
+ printf " %-20s %s\n" "Expected answer:" "$EXPECTED"
+ fi
+ printf " %-20s %s\n" "Majority answer:" "${majority:-N/A}"
+ printf " %-20s %s\n" "Agree:" "${agree}/${total} resolvers"
+
+ if [[ "$agree" -eq "$total" ]]; then
+ printf " %-20s " "Status:"; echo -e "${GREEN}PROPAGATION COMPLETE${RESET}"
+ else
+ printf " %-20s " "Status:"; echo -e "${YELLOW}PROPAGATION PENDING${RESET}"
+ fi
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OUTPUT: JSON
+# ══════════════════════════════════════════════════════════════════════
+
+print_json() {
+ local -n jnames=$1 jips=$2 jans=$3 jttls=$4 jstats=$5
+ local agree="$6" total="$7" majority="$8"
+ local count=${#jnames[@]} propagated="false"
+ [[ "$agree" -eq "$total" ]] && propagated="true"
+
+ printf '{"domain":"%s","type":"%s","timestamp":"%s","results":[' \
+ "$DOMAIN" "$RECORD_TYPE" "$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
+ for i in $(seq 0 $(( count - 1 ))); do
+ [[ $i -gt 0 ]] && printf ','
+ local escaped_answer
+ escaped_answer=$(echo "${jans[$i]}" | sed 's/"/\\"/g')
+ printf '{"resolver":"%s","ip":"%s","answer":"%s","ttl":"%s","status":"%s"}' \
+ "${jnames[$i]}" "${jips[$i]}" "$escaped_answer" "${jttls[$i]}" "${jstats[$i]}"
+ done
+ local escaped_majority
+ escaped_majority=$(echo "$majority" | sed 's/"/\\"/g')
+ printf '],"summary":{"majority":"%s","agree":%d,"total":%d,"propagated":%s}}\n' \
+ "$escaped_majority" "$agree" "$total" "$propagated"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+
+ if ! command -v dig &>/dev/null; then
+ err "dig is required but not found. Install dnsutils (Debian/Ubuntu) or bind-utils (RHEL/CentOS)."
+ exit 1
+ fi
+
+ if [[ "$WATCH_INTERVAL" -gt 0 ]]; then
+ local cycle=1
+ while true; do
+ if [[ "$JSON_OUTPUT" != "true" ]]; then
+ [[ $cycle -gt 1 ]] && echo -e "${DIM}────────────────────────────────────────────────${RESET}"
+ echo -e "${DIM}Watch cycle ${cycle} — checking every ${WATCH_INTERVAL}s (Ctrl+C to stop)${RESET}"
+ fi
+ if run_check; then
+ [[ "$JSON_OUTPUT" != "true" ]] && echo -e " ${GREEN}All resolvers agree. Propagation complete.${RESET}\n"
+ exit 0
+ fi
+ cycle=$((cycle + 1))
+ sleep "$WATCH_INTERVAL"
+ done
+ else
+ run_check && exit 0 || exit 1
+ fi
+}
+
+main "$@"
diff --git a/dns-smoke-tests.sh b/dns-smoke-tests.sh
new file mode 100644
index 0000000..7959749
--- /dev/null
+++ b/dns-smoke-tests.sh
@@ -0,0 +1,500 @@
+#!/usr/bin/env bash
+
+#####################################################################################
+#### dns-smoke-tests.sh — Verify DNS infrastructure is healthy ####
+#### Checks resolution, zone transfers, SOA, DNSSEC, response time, DoT. ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.0 ####
+#### ####
+#### Usage: ./dns-smoke-tests.sh ####
+#### DNS_SERVER=192.168.1.1 DOMAIN=example.com ./dns-smoke-tests.sh ####
+#### ####
+#### See --help for all options. ####
+#####################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+DNS_SERVER="${DNS_SERVER:-}"
+DOMAIN="${DOMAIN:-example.com}"
+REVERSE_IP="${REVERSE_IP:-}"
+ZONE="${ZONE:-}"
+ZONE_MASTER="${ZONE_MASTER:-}"
+DNSSEC_DOMAIN="${DNSSEC_DOMAIN:-}"
+DOT_SERVER="${DOT_SERVER:-}"
+MAX_RESPONSE_MS="${MAX_RESPONSE_MS:-500}"
+TEST_RECORDS="${TEST_RECORDS:-}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+PASS=0; FAIL=0; SKIP=0; TOTAL=0
+RESULTS=()
+START_TIME=""
+
+# ── Dig tool detection ───────────────────────────────────────────────
+DIG_CMD=""
+detect_dig() {
+ if command -v dig >/dev/null 2>&1; then
+ DIG_CMD="dig"
+ elif command -v drill >/dev/null 2>&1; then
+ DIG_CMD="drill"
+ else
+ err "Neither dig nor drill found. Install dnsutils or ldns."
+ exit 1
+ fi
+ verbose "Using ${DIG_CMD} for DNS queries"
+}
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; }
+
+# ── Test Result Recording ─────────────────────────────────────────────
+record_pass() {
+ local name="$1" detail="${2:-}"
+ ((PASS++)) || true; ((TOTAL++)) || true
+ RESULTS+=("PASS|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name}${detail:+ (${detail})}"
+ else echo -e " ${GREEN}✓${RESET} ${name}${detail:+ — ${detail}}"; fi
+}
+
+record_fail() {
+ local name="$1" detail="${2:-}"
+ ((FAIL++)) || true; ((TOTAL++)) || true
+ RESULTS+=("FAIL|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "not ok ${TOTAL} - ${name}"
+ [[ -n "$detail" ]] && echo " # ${detail}"
+ else echo -e " ${RED}✗${RESET} ${name}${detail:+ — ${detail}}"; fi
+}
+
+record_skip() {
+ local name="$1" reason="${2:-}"
+ ((SKIP++)) || true; ((TOTAL++)) || true
+ RESULTS+=("SKIP|${name}|${reason}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name} # SKIP ${reason}"
+ else echo -e " ${YELLOW}⊘${RESET} ${name}${reason:+ — ${reason}}"; fi
+}
+
+# ── Helpers ───────────────────────────────────────────────────────────
+has_cmd() { command -v "$1" >/dev/null 2>&1; }
+
+# Build dig command with optional @server
+dig_cmd() {
+ if [[ -n "$DNS_SERVER" ]]; then
+ "$DIG_CMD" "@${DNS_SERVER}" "$@"
+ else
+ "$DIG_CMD" "$@"
+ fi
+}
+
+# ── Output Functions ──────────────────────────────────────────────────
+section_header() {
+ local name="$1"
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo ""
+ echo -e "${BOLD}${name}${RESET}"
+ fi
+}
+
+print_header() {
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo ""
+ echo -e "${BOLD}DNS Smoke Tests${RESET}"
+ echo "Domain: ${DOMAIN}"
+ [[ -n "$DNS_SERVER" ]] && echo "Server: ${DNS_SERVER}" || echo "Server: (system resolver)"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ fi
+}
+
+print_tap_header() {
+ echo "TAP version 13"
+}
+
+print_summary() {
+ local end_time; end_time=$(date +%s)
+ local duration=$(( end_time - START_TIME ))
+ echo ""
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ echo -e "${BOLD}Summary${RESET} ${DOMAIN} ${DNS_SERVER:-(system resolver)}"
+ echo -e " ${GREEN}${PASS} passed${RESET} ${RED}${FAIL} failed${RESET} ${YELLOW}${SKIP} skipped${RESET} (${duration}s)"
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ if [[ $FAIL -eq 0 ]]; then echo -e "${GREEN}${BOLD}All tests passed.${RESET}"
+ else echo -e "${RED}${BOLD}${FAIL} test(s) failed.${RESET}"; fi
+}
+
+print_tap_footer() {
+ echo "1..${TOTAL}"
+ echo "# pass ${PASS}"
+ echo "# fail ${FAIL}"
+ echo "# skip ${SKIP}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# TESTS
+# ══════════════════════════════════════════════════════════════════════
+
+# ── 1. Resolver Reachable ─────────────────────────────────────────────
+test_resolver_reachable() {
+ section_header "Connectivity"
+ local output
+ output=$(dig_cmd +short +time=5 +tries=1 "${DOMAIN}" A 2>&1) || true
+ if [[ -n "$output" ]] && ! echo "$output" | grep -qi "timed out\|connection refused\|no servers\|SERVFAIL"; then
+ record_pass "Resolver reachable" "${DNS_SERVER:-(system resolver)}"
+ else
+ record_fail "Resolver reachable" "${DNS_SERVER:-(system resolver)} — ${output:-no response}"
+ fi
+}
+
+# ── 2. Forward Resolution (A) ────────────────────────────────────────
+test_forward_resolution() {
+ section_header "Resolution"
+ local output
+ output=$(dig_cmd +short "${DOMAIN}" A 2>/dev/null) || true
+ if [[ -n "$output" ]]; then
+ local first_ip
+ first_ip=$(echo "$output" | head -1)
+ record_pass "Forward resolution (${DOMAIN} A)" "${first_ip}"
+ else
+ record_fail "Forward resolution (${DOMAIN} A)" "no A record returned"
+ fi
+}
+
+# ── 3. AAAA Resolution ───────────────────────────────────────────────
+test_aaaa_resolution() {
+ local output
+ output=$(dig_cmd +short "${DOMAIN}" AAAA 2>/dev/null) || true
+ if [[ -n "$output" ]]; then
+ local first_ip
+ first_ip=$(echo "$output" | head -1)
+ record_pass "AAAA resolution (${DOMAIN})" "${first_ip}"
+ else
+ record_skip "AAAA resolution (${DOMAIN})" "no AAAA record"
+ fi
+}
+
+# ── 4. MX Resolution ─────────────────────────────────────────────────
+test_mx_resolution() {
+ local output
+ output=$(dig_cmd +short "${DOMAIN}" MX 2>/dev/null) || true
+ if [[ -n "$output" ]]; then
+ local first_mx
+ first_mx=$(echo "$output" | head -1)
+ record_pass "MX resolution (${DOMAIN})" "${first_mx}"
+ else
+ record_skip "MX resolution (${DOMAIN})" "no MX record"
+ fi
+}
+
+# ── 5. Reverse Lookup ────────────────────────────────────────────────
+test_reverse_lookup() {
+ if [[ -z "$REVERSE_IP" ]]; then
+ record_skip "Reverse lookup" "REVERSE_IP not set"
+ return
+ fi
+ local output
+ output=$(dig_cmd +short -x "${REVERSE_IP}" 2>/dev/null) || true
+ if [[ -n "$output" ]]; then
+ record_pass "Reverse lookup (${REVERSE_IP})" "${output}"
+ else
+ record_fail "Reverse lookup (${REVERSE_IP})" "no PTR record returned"
+ fi
+}
+
+# ── 6. Response Time ─────────────────────────────────────────────────
+test_response_time() {
+ section_header "Performance"
+ local output query_time
+ output=$(dig_cmd "${DOMAIN}" A 2>/dev/null) || true
+ # dig outputs "Query time: 12 msec" or ";; Query time: 12 msec"
+ query_time=$(echo "$output" | grep -i "query time" | grep -oP '[0-9]+' | head -1) || true
+ if [[ -z "$query_time" ]]; then
+ # drill outputs ";; Query time: 0 msec"
+ query_time=$(echo "$output" | grep -i "query time" | awk '{print $4}') || true
+ fi
+ if [[ -n "$query_time" ]]; then
+ if [[ "$query_time" -le "$MAX_RESPONSE_MS" ]]; then
+ record_pass "Response time" "${query_time}ms (<= ${MAX_RESPONSE_MS}ms)"
+ else
+ record_fail "Response time" "${query_time}ms (> ${MAX_RESPONSE_MS}ms)"
+ fi
+ else
+ record_fail "Response time" "could not parse query time"
+ fi
+}
+
+# ── 7. Authoritative Answer ──────────────────────────────────────────
+test_authoritative_answer() {
+ section_header "Authority"
+ local output
+ output=$(dig_cmd "${DOMAIN}" A 2>/dev/null) || true
+ if echo "$output" | grep -q "flags:.*aa"; then
+ record_pass "Authoritative answer (${DOMAIN})" "AA flag set"
+ else
+ record_fail "Authoritative answer (${DOMAIN})" "AA flag not set — server is not authoritative"
+ fi
+}
+
+# ── 8. SOA Serial ────────────────────────────────────────────────────
+test_soa_serial() {
+ local output serial
+ output=$(dig_cmd +short "${DOMAIN}" SOA 2>/dev/null) || true
+ if [[ -z "$output" ]]; then
+ record_fail "SOA serial (${DOMAIN})" "no SOA record returned"
+ return
+ fi
+ # SOA format: ns1.example.com. admin.example.com. 2026051201 3600 900 604800 86400
+ serial=$(echo "$output" | awk '{print $3}') || true
+ if [[ -z "$serial" ]]; then
+ record_fail "SOA serial (${DOMAIN})" "could not parse serial"
+ elif [[ "$serial" == "0" ]]; then
+ record_fail "SOA serial (${DOMAIN})" "serial is 0"
+ else
+ record_pass "SOA serial (${DOMAIN})" "${serial}"
+ fi
+}
+
+# ── 9. SOA Consistency ───────────────────────────────────────────────
+test_soa_consistency() {
+ if [[ -z "$ZONE_MASTER" ]]; then
+ record_skip "SOA consistency" "ZONE_MASTER not set"
+ return
+ fi
+ local serial_local serial_master
+ # Get serial from configured server
+ serial_local=$(dig_cmd +short "${DOMAIN}" SOA 2>/dev/null | awk '{print $3}') || true
+ # Get serial from master
+ serial_master=$("$DIG_CMD" "@${ZONE_MASTER}" +short "${DOMAIN}" SOA 2>/dev/null | awk '{print $3}') || true
+ if [[ -z "$serial_local" || -z "$serial_master" ]]; then
+ record_fail "SOA consistency" "could not retrieve serials (local=${serial_local:-?}, master=${serial_master:-?})"
+ return
+ fi
+ if [[ "$serial_local" == "$serial_master" ]]; then
+ record_pass "SOA consistency" "serial ${serial_local} matches across servers"
+ else
+ record_fail "SOA consistency" "serial mismatch — local=${serial_local}, master=${serial_master}"
+ fi
+}
+
+# ── 10. Zone Transfer ────────────────────────────────────────────────
+test_zone_transfer() {
+ section_header "Zone Transfer"
+ if [[ -z "$ZONE" ]]; then
+ record_skip "Zone transfer (AXFR)" "ZONE not set"
+ return
+ fi
+ local output exit_code=0
+ output=$(dig_cmd AXFR "${ZONE}" 2>&1) || exit_code=$?
+ # Check if transfer returned records
+ local record_count
+ record_count=$(echo "$output" | grep -c "^${ZONE}" 2>/dev/null) || record_count=0
+ if [[ $record_count -gt 0 ]]; then
+ record_pass "Zone transfer (${ZONE})" "${record_count} records transferred"
+ elif echo "$output" | grep -qi "transfer failed\|refused\|REFUSED"; then
+ record_pass "Zone transfer (${ZONE})" "AXFR refused (expected on production)"
+ else
+ record_fail "Zone transfer (${ZONE})" "transfer failed — ${output:0:100}"
+ fi
+}
+
+# ── 11. DNSSEC Validation ────────────────────────────────────────────
+test_dnssec_validation() {
+ section_header "DNSSEC"
+ if [[ -z "$DNSSEC_DOMAIN" ]]; then
+ record_skip "DNSSEC validation" "DNSSEC_DOMAIN not set"
+ return
+ fi
+ local output
+ output=$(dig_cmd +dnssec +short "${DNSSEC_DOMAIN}" A 2>/dev/null) || true
+ # Check for AD flag in full output
+ local full_output
+ full_output=$(dig_cmd +dnssec "${DNSSEC_DOMAIN}" A 2>/dev/null) || true
+ if echo "$full_output" | grep -q "flags:.*ad"; then
+ record_pass "DNSSEC validation (${DNSSEC_DOMAIN})" "AD flag set"
+ elif [[ -n "$output" ]]; then
+ record_fail "DNSSEC validation (${DNSSEC_DOMAIN})" "response received but AD flag not set"
+ else
+ record_fail "DNSSEC validation (${DNSSEC_DOMAIN})" "no response"
+ fi
+}
+
+# ── 12. DNS-over-TLS ─────────────────────────────────────────────────
+test_dot() {
+ section_header "DNS-over-TLS"
+ if [[ -z "$DOT_SERVER" ]]; then
+ record_skip "DNS-over-TLS" "DOT_SERVER not set"
+ return
+ fi
+ if ! has_cmd openssl; then
+ record_skip "DNS-over-TLS" "openssl not installed"
+ return
+ fi
+ local output exit_code=0
+ output=$(echo "" | openssl s_client -connect "${DOT_SERVER}:853" -servername "${DOT_SERVER}" 2>&1) || exit_code=$?
+ if echo "$output" | grep -qi "connected\|verify return"; then
+ # Extract certificate info if available
+ local cn
+ cn=$(echo "$output" | grep -oP 'CN\s*=\s*\K[^,/]+' | head -1) || true
+ record_pass "DNS-over-TLS (${DOT_SERVER}:853)" "TLS handshake OK${cn:+ — CN=${cn}}"
+ else
+ record_fail "DNS-over-TLS (${DOT_SERVER}:853)" "TLS handshake failed"
+ fi
+}
+
+# ── 13. Custom Record Checks ─────────────────────────────────────────
+test_custom_records() {
+ if [[ -z "$TEST_RECORDS" ]]; then return; fi
+ section_header "Custom Records"
+ local IFS=','
+ for entry in $TEST_RECORDS; do
+ local name type expected
+ name=$(echo "$entry" | cut -d: -f1)
+ type=$(echo "$entry" | cut -d: -f2)
+ expected=$(echo "$entry" | cut -d: -f3-)
+ if [[ -z "$name" || -z "$type" ]]; then
+ record_fail "Custom record" "invalid entry: ${entry}"
+ continue
+ fi
+ local output
+ output=$(dig_cmd +short "${name}" "${type}" 2>/dev/null) || true
+ if [[ -z "$output" ]]; then
+ record_fail "Custom record (${name} ${type})" "no record returned"
+ elif [[ -n "$expected" ]]; then
+ if echo "$output" | grep -q "$expected"; then
+ record_pass "Custom record (${name} ${type})" "${output}"
+ else
+ record_fail "Custom record (${name} ${type})" "expected '${expected}', got '${output}'"
+ fi
+ else
+ record_pass "Custom record (${name} ${type})" "${output}"
+ fi
+ done
+}
+
+# ── 14. Recursive Resolution ─────────────────────────────────────────
+test_recursive_resolution() {
+ section_header "Recursion"
+ local output
+ output=$(dig_cmd +short "google.com" A 2>/dev/null) || true
+ if [[ -n "$output" ]]; then
+ local first_ip
+ first_ip=$(echo "$output" | head -1)
+ record_pass "Recursive resolution (google.com)" "${first_ip}"
+ else
+ record_fail "Recursive resolution (google.com)" "could not resolve external domain"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; }
+
+# ── Test Result Recording ─────────────────────────────────────────────
+record_pass() {
+ local name="$1" detail="${2:-}"
+ ((PASS++)) || true; ((TOTAL++)) || true
+ RESULTS+=("PASS|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name}${detail:+ (${detail})}"
+ else echo -e " ${GREEN}✓${RESET} ${name}${detail:+ — ${detail}}"; fi
+}
+
+record_fail() {
+ local name="$1" detail="${2:-}"
+ ((FAIL++)) || true; ((TOTAL++)) || true
+ RESULTS+=("FAIL|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "not ok ${TOTAL} - ${name}"
+ [[ -n "$detail" ]] && echo " # ${detail}"
+ else echo -e " ${RED}✗${RESET} ${name}${detail:+ — ${detail}}"; fi
+}
+
+record_skip() {
+ local name="$1" reason="${2:-}"
+ ((SKIP++)) || true; ((TOTAL++)) || true
+ RESULTS+=("SKIP|${name}|${reason}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name} # SKIP ${reason}"
+ else echo -e " ${YELLOW}⊘${RESET} ${name}${reason:+ — ${reason}}"; fi
+}
+
+# ── Helpers ───────────────────────────────────────────────────────────
+has_cmd() { command -v "$1" >/dev/null 2>&1; }
+
+remove_container() {
+ local name="$1"
+ docker rm -f "$name" >/dev/null 2>&1 || true
+}
+
+section() {
+ if [[ "$OUTPUT_FORMAT" != "tap" ]]; then echo ""; echo -e "${BOLD}$1${RESET}"; fi
+}
+
+# ── Cleanup ───────────────────────────────────────────────────────────
+# shellcheck disable=SC2317
+cleanup() {
+ verbose "Cleaning up test artifacts..."
+ remove_container "$SMOKE_CONTAINER"
+ remove_container "$SMOKE_PORT_CONTAINER"
+ remove_container "$SMOKE_DNS_CONTAINER"
+ remove_container "$SMOKE_VOL_CONTAINER"
+ remove_container "$SMOKE_NET_CONTAINER"
+ remove_container "$SMOKE_MEM_CONTAINER"
+ docker volume rm -f "$SMOKE_VOLUME" >/dev/null 2>&1 || true
+ docker network rm "$SMOKE_NETWORK" >/dev/null 2>&1 || true
+ docker rmi -f "$SMOKE_BUILD_TAG" >/dev/null 2>&1 || true
+}
+trap cleanup EXIT
+
+# ══════════════════════════════════════════════════════════════════════
+# TEST FUNCTIONS
+# ══════════════════════════════════════════════════════════════════════
+
+# ── 1. Docker daemon running ─────────────────────────────────────────
+test_daemon_running() {
+ if has_cmd systemctl; then
+ if systemctl is-active --quiet docker 2>/dev/null; then record_pass "Docker daemon running" "systemctl active"
+ else record_fail "Docker daemon running" "systemctl inactive"; fi
+ elif has_cmd service; then
+ if service docker status >/dev/null 2>&1; then record_pass "Docker daemon running" "service running"
+ else record_fail "Docker daemon running" "service stopped"; fi
+ elif docker info >/dev/null 2>&1; then record_pass "Docker daemon running" "docker info ok"
+ else record_fail "Docker daemon running" "cannot determine status"; fi
+}
+
+# ── 2. Docker API responsive ─────────────────────────────────────────
+test_api_responsive() {
+ local output
+ if output=$(timeout 10 docker info 2>&1); then
+ local ver; ver=$(echo "$output" | grep -i "Server Version" | head -1 | awk '{print $NF}') || true
+ record_pass "Docker API responsive" "server ${ver:-unknown}"
+ else record_fail "Docker API responsive" "docker info timed out or failed"; fi
+}
+
+# ── 3. Docker socket accessible ──────────────────────────────────────
+test_socket_accessible() {
+ local socket="/var/run/docker.sock"
+ if [[ -S "$socket" ]]; then
+ if [[ -r "$socket" && -w "$socket" ]]; then record_pass "Docker socket accessible" "$socket"
+ else record_fail "Docker socket accessible" "$socket not readable/writable"; fi
+ elif docker info >/dev/null 2>&1; then record_pass "Docker socket accessible" "non-default socket"
+ else record_fail "Docker socket accessible" "$socket not found"; fi
+}
+
+# ── 4. Container lifecycle ───────────────────────────────────────────
+test_container_lifecycle() {
+ if [[ "$SKIP_LIFECYCLE" == "true" ]]; then record_skip "Container lifecycle" "SKIP_LIFECYCLE=true"; return; fi
+ remove_container "$SMOKE_CONTAINER"
+
+ if ! docker create --name "$SMOKE_CONTAINER" "$TEST_IMAGE" sleep 30 >/dev/null 2>&1; then
+ record_fail "Container lifecycle" "docker create failed"
+ return
+ fi
+
+ if ! docker start "$SMOKE_CONTAINER" >/dev/null 2>&1; then
+ record_fail "Container lifecycle" "docker start failed"
+ return
+ fi
+
+ local exec_output
+ exec_output=$(docker exec "$SMOKE_CONTAINER" echo "smoke-ok" 2>&1) || true
+ if [[ "$exec_output" != "smoke-ok" ]]; then
+ record_fail "Container lifecycle" "docker exec failed"
+ return
+ fi
+
+ if ! docker stop -t 5 "$SMOKE_CONTAINER" >/dev/null 2>&1; then
+ record_fail "Container lifecycle" "docker stop failed"
+ return
+ fi
+
+ if ! docker rm "$SMOKE_CONTAINER" >/dev/null 2>&1; then
+ record_fail "Container lifecycle" "docker rm failed"
+ return
+ fi
+
+ record_pass "Container lifecycle" "create/start/exec/stop/rm"
+}
+
+# ── 5. Port binding ──────────────────────────────────────────────────
+test_port_binding() {
+ if [[ "$SKIP_LIFECYCLE" == "true" ]]; then record_skip "Port binding" "SKIP_LIFECYCLE=true"; return; fi
+ if ! has_cmd curl; then record_skip "Port binding" "curl not installed"; return; fi
+ remove_container "$SMOKE_PORT_CONTAINER"
+
+ if ! docker run -d --name "$SMOKE_PORT_CONTAINER" \
+ -p "${SMOKE_PORT}:80" \
+ "$TEST_IMAGE" sh -c 'mkdir -p /var/www && echo "smoke-ok" > /var/www/index.html && httpd -f -p 80 -h /var/www 2>/dev/null || { while true; do echo -e "HTTP/1.1 200 OK\r\nContent-Length: 9\r\n\r\nsmoke-ok\n" | nc -l -p 80 2>/dev/null || break; done; }' >/dev/null 2>&1; then
+ record_fail "Port binding" "failed to start container with port mapping"
+ return
+ fi
+
+ sleep 2
+ local response
+ response=$(curl -sf --max-time 5 "http://localhost:${SMOKE_PORT}/" 2>/dev/null) || true
+ remove_container "$SMOKE_PORT_CONTAINER"
+
+ if [[ "$response" == *"smoke-ok"* ]]; then
+ record_pass "Port binding" "curl localhost:${SMOKE_PORT}"
+ else
+ record_fail "Port binding" "no response on localhost:${SMOKE_PORT}"
+ fi
+}
+
+# ── 6. Container DNS ─────────────────────────────────────────────────
+test_container_dns() {
+ if [[ "$SKIP_LIFECYCLE" == "true" ]]; then record_skip "Container DNS" "SKIP_LIFECYCLE=true"; return; fi
+ remove_container "$SMOKE_DNS_CONTAINER"
+
+ local dns_output
+ dns_output=$(docker run --rm --name "$SMOKE_DNS_CONTAINER" "$TEST_IMAGE" \
+ sh -c "nslookup ${DNS_TEST_DOMAIN} 2>/dev/null || getent hosts ${DNS_TEST_DOMAIN} 2>/dev/null || ping -c1 -W3 ${DNS_TEST_DOMAIN} 2>/dev/null" 2>&1) || true
+
+ if [[ -n "$dns_output" ]] && ! echo "$dns_output" | grep -qi "can't resolve\|not found\|failure\|NXDOMAIN"; then
+ record_pass "Container DNS" "${DNS_TEST_DOMAIN}"
+ else
+ record_fail "Container DNS" "failed to resolve ${DNS_TEST_DOMAIN}"
+ fi
+}
+
+# ── 7. Volume mount ──────────────────────────────────────────────────
+test_volume_mount() {
+ if [[ "$SKIP_VOLUME" == "true" ]]; then record_skip "Volume mount" "SKIP_VOLUME=true"; return; fi
+ docker volume rm -f "$SMOKE_VOLUME" >/dev/null 2>&1 || true
+ remove_container "$SMOKE_VOL_CONTAINER"
+
+ if ! docker volume create "$SMOKE_VOLUME" >/dev/null 2>&1; then
+ record_fail "Volume mount" "docker volume create failed"
+ return
+ fi
+
+ local write_result
+ write_result=$(docker run --rm --name "$SMOKE_VOL_CONTAINER" \
+ -v "${SMOKE_VOLUME}:/data" "$TEST_IMAGE" \
+ sh -c 'echo "smoke-vol-ok" > /data/test.txt && cat /data/test.txt' 2>&1) || true
+
+ docker volume rm -f "$SMOKE_VOLUME" >/dev/null 2>&1 || true
+
+ if [[ "$write_result" == "smoke-vol-ok" ]]; then
+ record_pass "Volume mount" "write/read verified"
+ else
+ record_fail "Volume mount" "write/read mismatch"
+ fi
+}
+
+# ── 8. Network create/connect ────────────────────────────────────────
+test_network_create() {
+ if [[ "$SKIP_NETWORK" == "true" ]]; then record_skip "Network create/connect" "SKIP_NETWORK=true"; return; fi
+ docker network rm "$SMOKE_NETWORK" >/dev/null 2>&1 || true
+ remove_container "$SMOKE_NET_CONTAINER"
+
+ if ! docker network create --driver bridge "$SMOKE_NETWORK" >/dev/null 2>&1; then
+ record_fail "Network create/connect" "docker network create failed"
+ return
+ fi
+
+ local net_output
+ net_output=$(docker run --rm --name "$SMOKE_NET_CONTAINER" \
+ --network "$SMOKE_NETWORK" "$TEST_IMAGE" \
+ sh -c 'ip addr show 2>/dev/null || ifconfig 2>/dev/null' 2>&1) || true
+
+ docker network rm "$SMOKE_NETWORK" >/dev/null 2>&1 || true
+
+ if [[ -n "$net_output" ]]; then
+ record_pass "Network create/connect" "bridge network"
+ else
+ record_fail "Network create/connect" "container failed to attach to network"
+ fi
+}
+
+# ── 9. Image pull ────────────────────────────────────────────────────
+test_image_pull() {
+ if docker pull "$TEST_IMAGE" >/dev/null 2>&1; then
+ record_pass "Image pull" "$TEST_IMAGE"
+ else
+ record_fail "Image pull" "failed to pull $TEST_IMAGE"
+ fi
+}
+
+# ── 10. Image build ──────────────────────────────────────────────────
+test_image_build() {
+ if [[ "$SKIP_BUILD" == "true" ]]; then record_skip "Image build" "SKIP_BUILD=true"; return; fi
+ docker rmi -f "$SMOKE_BUILD_TAG" >/dev/null 2>&1 || true
+
+ if echo "FROM alpine:latest" | docker build -t "$SMOKE_BUILD_TAG" - >/dev/null 2>&1; then
+ docker rmi -f "$SMOKE_BUILD_TAG" >/dev/null 2>&1 || true
+ record_pass "Image build" "inline Dockerfile"
+ else
+ record_fail "Image build" "docker build failed"
+ fi
+}
+
+# ── 11. Docker Compose ───────────────────────────────────────────────
+test_compose_stack() {
+ if [[ -z "$COMPOSE_FILE" ]]; then
+ record_skip "Compose stack" "COMPOSE_FILE not set"
+ return
+ fi
+ if [[ ! -f "$COMPOSE_FILE" ]]; then
+ record_fail "Compose stack" "${COMPOSE_FILE} not found"
+ return
+ fi
+ local compose_cmd=""
+ if docker compose version >/dev/null 2>&1; then
+ compose_cmd="docker compose"
+ elif has_cmd docker-compose; then
+ compose_cmd="docker-compose"
+ else
+ record_skip "Compose stack" "neither 'docker compose' nor 'docker-compose' available"
+ return
+ fi
+
+ local ps_output expected_count running_count
+ ps_output=$($compose_cmd -f "$COMPOSE_FILE" ps --format json 2>/dev/null) || true
+
+ if [[ -z "$ps_output" ]]; then
+ ps_output=$($compose_cmd -f "$COMPOSE_FILE" ps 2>/dev/null) || true
+ if [[ -z "$ps_output" ]]; then
+ record_fail "Compose stack" "could not read compose project status"
+ return
+ fi
+ expected_count=$(echo "$ps_output" | tail -n +2 | wc -l)
+ running_count=$(echo "$ps_output" | tail -n +2 | grep -ciE "up|running" || true)
+ else
+ expected_count=$(echo "$ps_output" | grep -c '"Service"' 2>/dev/null || echo "$ps_output" | wc -l)
+ running_count=$(echo "$ps_output" | grep -ciE '"running"' 2>/dev/null || true)
+ fi
+
+ if [[ "$expected_count" -eq 0 ]]; then
+ record_fail "Compose stack" "no services found"
+ elif [[ "$running_count" -ge "$expected_count" ]]; then
+ record_pass "Compose stack" "${running_count}/${expected_count} services running"
+ else
+ record_fail "Compose stack" "${running_count}/${expected_count} services running"
+ fi
+}
+
+# ── 12. Resource limits ──────────────────────────────────────────────
+test_resource_limits() {
+ if [[ "$SKIP_LIFECYCLE" == "true" ]]; then record_skip "Resource limits" "SKIP_LIFECYCLE=true"; return; fi
+ remove_container "$SMOKE_MEM_CONTAINER"
+
+ local mem_limit
+ mem_limit=$(docker run --rm --name "$SMOKE_MEM_CONTAINER" \
+ --memory=64m "$TEST_IMAGE" \
+ sh -c 'cat /sys/fs/cgroup/memory.max 2>/dev/null || cat /sys/fs/cgroup/memory/memory.limit_in_bytes 2>/dev/null' 2>&1) || true
+
+ if [[ -z "$mem_limit" ]]; then
+ record_skip "Resource limits" "cgroup memory info not available"
+ return
+ fi
+
+ local limit_bytes=67108864 # 64 MiB
+ if [[ "$mem_limit" =~ ^[0-9]+$ ]]; then
+ if [[ "$mem_limit" -le $((limit_bytes + 1048576)) ]]; then
+ local limit_mb=$((mem_limit / 1048576))
+ record_pass "Resource limits" "memory cgroup enforced (${limit_mb}M)"
+ else
+ record_fail "Resource limits" "memory limit not enforced (got ${mem_limit})"
+ fi
+ else
+ record_skip "Resource limits" "unexpected cgroup value: ${mem_limit}"
+ fi
+}
+
+# ── 13. Disk space ───────────────────────────────────────────────────
+test_disk_space() {
+ local df_output
+ df_output=$(docker system df 2>/dev/null) || true
+
+ if [[ -z "$df_output" ]]; then
+ record_fail "Disk space" "docker system df failed"
+ return
+ fi
+
+ local docker_root used_pct
+ docker_root=$(docker info --format '{{.DockerRootDir}}' 2>/dev/null) || docker_root="/var/lib/docker"
+ used_pct=$(df "$docker_root" 2>/dev/null | tail -1 | awk '{print $5}' | tr -d '%') || used_pct=0
+
+ if [[ "$used_pct" -gt 80 ]]; then
+ record_fail "Disk space" "${used_pct}% used (threshold 80%)"
+ else
+ record_pass "Disk space" "${used_pct}% used"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OUTPUT
+# ══════════════════════════════════════════════════════════════════════
+
+print_tap_header() {
+ echo "TAP version 13"
+}
+
+print_tap_footer() {
+ echo "1..${TOTAL}"
+ echo "# pass ${PASS}"
+ echo "# fail ${FAIL}"
+ echo "# skip ${SKIP}"
+}
+
+print_summary() {
+ local end_time; end_time=$(date +%s)
+ local duration=$(( end_time - START_TIME ))
+ echo ""
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ echo -e "${BOLD}Summary${RESET} Docker Smoke Tests"
+ echo -e " ${GREEN}${PASS} passed${RESET} ${RED}${FAIL} failed${RESET} ${YELLOW}${SKIP} skipped${RESET} (${duration}s)"
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ if [[ $FAIL -eq 0 ]]; then echo -e "${GREEN}${BOLD}All tests passed.${RESET}"
+ else echo -e "${RED}${BOLD}${FAIL} test(s) failed.${RESET}"; fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat </dev/null; then
+ missing+=("$cmd")
+ fi
+ done
+ if [[ ${#missing[@]} -gt 0 ]]; then
+ echo "ERROR: Missing required commands: ${missing[*]}" >&2
+ echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2
+ exit 1
+ fi
+}
+
+validate_config() {
+ # Export DOCKER_HOST if set so docker CLI picks it up
+ if [[ -n "$DOCKER_HOST" ]]; then
+ export DOCKER_HOST
+ fi
+}
+
+docker_cmd() {
+ # Run a docker command and return its output
+ # Returns empty string on failure
+ docker "$@" 2>/dev/null || echo ""
+}
+
+add_metric() {
+ local name="$1"
+ local type="$2"
+ local help="$3"
+ local value="$4"
+ local labels="${5:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name} ${value}
+"
+ fi
+}
+
+add_metric_value() {
+ local name="$1"
+ local value="$2"
+ local labels="${3:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="${name} ${value}
+"
+ fi
+}
+
+collect_nodes() {
+ local nodes_json
+ nodes_json=$(docker_cmd node ls --format '{{json .}}')
+
+ if [[ -z "$nodes_json" ]]; then
+ add_metric "swarm_up" "gauge" "Docker Swarm reachability (1=up, 0=down)" "0"
+ return 1
+ fi
+
+ add_metric "swarm_up" "gauge" "Docker Swarm reachability (1=up, 0=down)" "1"
+
+ # Total node count
+ local node_count
+ node_count=$(echo "$nodes_json" | wc -l)
+ add_metric "swarm_node_count" "gauge" "Total number of nodes in the swarm" "${node_count}"
+
+ # Nodes by status
+ local nodes_ready nodes_down
+ nodes_ready=$(echo "$nodes_json" | jq -r 'select(.Status == "Ready")' | jq -s 'length')
+ nodes_down=$(echo "$nodes_json" | jq -r 'select(.Status == "Down")' | jq -s 'length')
+ add_metric "swarm_nodes_ready" "gauge" "Number of nodes in ready state" "${nodes_ready}"
+ add_metric "swarm_nodes_down" "gauge" "Number of nodes in down state" "${nodes_down}"
+
+ # Manager and worker counts
+ local managers_total workers_total
+ managers_total=$(echo "$nodes_json" | jq -r 'select(.ManagerStatus != "")' | jq -s 'length')
+ workers_total=$(echo "$nodes_json" | jq -r 'select(.ManagerStatus == "")' | jq -s 'length')
+ add_metric "swarm_managers_total" "gauge" "Total number of manager nodes" "${managers_total}"
+ add_metric "swarm_workers_total" "gauge" "Total number of worker nodes" "${workers_total}"
+
+ # Leader detection — check if the current node is the leader
+ local is_leader
+ is_leader=$(echo "$nodes_json" | jq -r 'select(.Self == "true" or .Self == true) | select(.ManagerStatus == "Leader")' | jq -s 'length')
+ if [[ "$is_leader" -gt 0 ]]; then
+ add_metric "swarm_manager_leader" "gauge" "Whether this node is the leader (1=leader, 0=not leader)" "1"
+ else
+ add_metric "swarm_manager_leader" "gauge" "Whether this node is the leader (1=leader, 0=not leader)" "0"
+ fi
+
+ return 0
+}
+
+collect_services() {
+ local services_json
+ services_json=$(docker_cmd service ls --format '{{json .}}')
+
+ if [[ -z "$services_json" ]]; then
+ add_metric "swarm_services_total" "gauge" "Total number of services" "0"
+ return
+ fi
+
+ # Total service count
+ local service_count
+ service_count=$(echo "$services_json" | wc -l)
+ add_metric "swarm_services_total" "gauge" "Total number of services" "${service_count}"
+
+ # Per-service replica metrics
+ # docker service ls --format '{{json .}}' gives us Name and Replicas ("3/3" format)
+ local first_replicas=true
+ local first_running=true
+
+ while IFS= read -r line; do
+ local service_name replicas_str desired running
+
+ service_name=$(echo "$line" | jq -r '.Name')
+ replicas_str=$(echo "$line" | jq -r '.Replicas')
+
+ # Replicas format is "RUNNING/DESIRED" (e.g. "3/3") or "RUNNING/DESIRED (max N per node)"
+ # Strip any parenthetical suffix
+ replicas_str="${replicas_str%% (*}"
+
+ running=$(echo "$replicas_str" | cut -d'/' -f1)
+ desired=$(echo "$replicas_str" | cut -d'/' -f2)
+
+ # Validate numeric
+ if ! [[ "$desired" =~ ^[0-9]+$ ]]; then
+ desired=0
+ fi
+ if ! [[ "$running" =~ ^[0-9]+$ ]]; then
+ running=0
+ fi
+
+ if [[ "$first_replicas" == true ]]; then
+ OUTPUT+="# HELP swarm_service_replicas Desired replica count per service
+# TYPE swarm_service_replicas gauge
+"
+ first_replicas=false
+ fi
+ OUTPUT+="swarm_service_replicas{service=\"${service_name}\"} ${desired}
+"
+
+ if [[ "$first_running" == true ]]; then
+ first_running=false
+ fi
+ done <<< "$services_json"
+
+ # Running replicas — separate HELP/TYPE block
+ OUTPUT+="# HELP swarm_service_replicas_running Running replica count per service
+# TYPE swarm_service_replicas_running gauge
+"
+ while IFS= read -r line; do
+ local service_name replicas_str running
+
+ service_name=$(echo "$line" | jq -r '.Name')
+ replicas_str=$(echo "$line" | jq -r '.Replicas')
+ replicas_str="${replicas_str%% (*}"
+ running=$(echo "$replicas_str" | cut -d'/' -f1)
+
+ if ! [[ "$running" =~ ^[0-9]+$ ]]; then
+ running=0
+ fi
+
+ OUTPUT+="swarm_service_replicas_running{service=\"${service_name}\"} ${running}
+"
+ done <<< "$services_json"
+}
+
+collect_tasks() {
+ # Count running tasks
+ local tasks_running
+ tasks_running=$(docker_cmd node ps --format '{{json .}}' --filter 'desired-state=running' 2>/dev/null | jq -s 'length' 2>/dev/null)
+ if [[ -z "$tasks_running" || "$tasks_running" == "null" ]]; then
+ tasks_running=0
+ fi
+ add_metric "swarm_tasks_running" "gauge" "Total number of running tasks" "${tasks_running}"
+
+ # Count failed tasks across all services
+ local tasks_failed
+ tasks_failed=$(docker_cmd service ls -q 2>/dev/null | while read -r svc_id; do
+ docker service ps "$svc_id" --format '{{json .}}' --filter 'desired-state=shutdown' 2>/dev/null
+ done | jq -r 'select(.CurrentState | test("^Failed|^Rejected"; "i"))' 2>/dev/null | jq -s 'length' 2>/dev/null)
+ if [[ -z "$tasks_failed" || "$tasks_failed" == "null" ]]; then
+ tasks_failed=0
+ fi
+ add_metric "swarm_tasks_failed" "gauge" "Total number of failed tasks" "${tasks_failed}"
+}
+
+collect_networks() {
+ local networks_json
+ networks_json=$(docker_cmd network ls --filter driver=overlay --format '{{json .}}')
+
+ local network_count=0
+ if [[ -n "$networks_json" ]]; then
+ network_count=$(echo "$networks_json" | wc -l)
+ fi
+
+ add_metric "swarm_networks_total" "gauge" "Total number of overlay networks" "${network_count}"
+}
+
+collect_raft() {
+ # Get Raft index from docker info
+ local info_json
+ info_json=$(docker_cmd info --format '{{json .}}')
+
+ if [[ -z "$info_json" ]]; then
+ return
+ fi
+
+ local raft_index
+ raft_index=$(echo "$info_json" | jq -r '.Swarm.Cluster.RaftIndex // .Swarm.RaftIndex // empty' 2>/dev/null)
+
+ # Fallback — try extracting from Swarm.Cluster directly
+ if [[ -z "$raft_index" ]]; then
+ raft_index=$(echo "$info_json" | jq -r '.Swarm.Cluster.Version.Index // empty' 2>/dev/null)
+ fi
+
+ if [[ -n "$raft_index" && "$raft_index" != "null" ]]; then
+ add_metric "swarm_raft_index" "gauge" "Raft applied index" "${raft_index}"
+ else
+ add_metric "swarm_raft_index" "gauge" "Raft applied index" "0"
+ fi
+}
+
+write_output() {
+ if [[ "$TEXTFILE_MODE" == true ]]; then
+ local output_file="${TEXTFILE_DIR}/docker_swarm.prom"
+ local temp_file="${output_file}.$$"
+
+ mkdir -p "$TEXTFILE_DIR"
+ echo "$OUTPUT" > "$temp_file"
+ mv "$temp_file" "$output_file"
+ else
+ echo "$OUTPUT"
+ fi
+}
+
+install_cron() {
+ if [[ $EUID -ne 0 ]]; then
+ echo "ERROR: --install requires root" >&2
+ exit 1
+ fi
+
+ local script_path
+ script_path=$(readlink -f "$0")
+
+ local env_lines=""
+ if [[ -n "$DOCKER_HOST" ]]; then
+ env_lines="DOCKER_HOST=${DOCKER_HOST}
+"
+ fi
+
+ cat > /etc/cron.d/docker-swarm-exporter </dev/null
+EOF
+
+ chmod 644 /etc/cron.d/docker-swarm-exporter
+ echo "Installed cron job: /etc/cron.d/docker-swarm-exporter"
+ echo "Metrics will be written to: ${TEXTFILE_DIR}/docker_swarm.prom"
+}
+
+# --- Main ---
+
+main() {
+ # Parse arguments
+ for arg in "$@"; do
+ case "$arg" in
+ --textfile) TEXTFILE_MODE=true ;;
+ --install)
+ check_dependencies
+ validate_config
+ install_cron
+ exit 0
+ ;;
+ --help|-h) usage ;;
+ *) echo "Unknown option: $arg" >&2; usage ;;
+ esac
+ done
+
+ check_dependencies
+ validate_config
+
+ START_TIME=$(date +%s%N)
+
+ # Exporter info
+ add_metric "swarm_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\""
+
+ # Collect metrics
+ if collect_nodes; then
+ collect_services
+ collect_tasks
+ collect_networks
+ collect_raft
+ fi
+
+ # Exporter performance
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ add_metric "swarm_exporter_duration_seconds" "gauge" "Script execution time" "$duration"
+ add_metric "swarm_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)"
+
+ write_output
+}
+
+main "$@"
diff --git a/docker-volume-backup.sh b/docker-volume-backup.sh
new file mode 100644
index 0000000..9f26908
--- /dev/null
+++ b/docker-volume-backup.sh
@@ -0,0 +1,269 @@
+#!/bin/bash
+#############################################################
+#### Docker Volume Backup Script ####
+#### Backup and restore Docker named volumes using ####
+#### tar archives with optional compression ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.0 ####
+#### ####
+#### Usage: ./docker-volume-backup.sh [OPTIONS] ####
+#############################################################
+
+set -euo pipefail
+
+SCRIPT_NAME=$(basename "$0")
+readonly SCRIPT_NAME
+readonly DEFAULT_BACKUP_DIR="/opt/docker-backups"
+readonly DEFAULT_RETAIN=7
+readonly ALPINE_IMAGE="alpine:latest"
+
+BACKUP_DIR="$DEFAULT_BACKUP_DIR"
+RETAIN="$DEFAULT_RETAIN"
+MODE=""
+TARGET_VOLUME=""
+RESTORE_ARCHIVE=""
+
+# Colors
+readonly RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m'
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*" >&2; }
+log_step() { echo -e "${BLUE}[STEP]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*"; }
+
+show_help() {
+ cat << EOF
+Usage: $SCRIPT_NAME [OPTIONS]
+
+Backup and restore Docker named volumes using tar archives with compression.
+
+OPTIONS:
+ --backup [VOLUME] Backup all named volumes, or a specific volume if given
+ --restore ARCHIVE Restore a volume from the specified tar.gz archive
+ --list List available backups
+ --backup-dir PATH Backup directory (default: $DEFAULT_BACKUP_DIR)
+ --retain N Number of backups to keep per volume (default: $DEFAULT_RETAIN)
+ --help, -h Show this help message
+
+EXAMPLES:
+ $SCRIPT_NAME --backup
+ $SCRIPT_NAME --backup my_volume
+ $SCRIPT_NAME --restore $DEFAULT_BACKUP_DIR/my_volume_20260309_143022.tar.gz
+ $SCRIPT_NAME --list
+ $SCRIPT_NAME --backup --backup-dir /mnt/backups --retain 14
+EOF
+ exit 0
+}
+
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --backup)
+ MODE="backup"; shift
+ [[ $# -gt 0 && ! "$1" =~ ^-- ]] && { TARGET_VOLUME="$1"; shift; }
+ ;;
+ --restore)
+ MODE="restore"
+ [[ $# -lt 2 ]] && { log_error "--restore requires an archive path"; exit 1; }
+ RESTORE_ARCHIVE="$2"; shift 2
+ ;;
+ --list) MODE="list"; shift ;;
+ --backup-dir)
+ [[ $# -lt 2 ]] && { log_error "--backup-dir requires a path"; exit 1; }
+ BACKUP_DIR="$2"; shift 2
+ ;;
+ --retain)
+ [[ $# -lt 2 ]] && { log_error "--retain requires a number"; exit 1; }
+ RETAIN="$2"; shift 2
+ ;;
+ --help|-h) show_help ;;
+ *) log_error "Unknown option: $1"; show_help ;;
+ esac
+ done
+ if [[ -z "$MODE" ]]; then
+ log_error "No action specified. Use --backup, --restore, or --list."
+ show_help
+ fi
+}
+
+check_dependencies() {
+ if ! command -v docker &>/dev/null; then
+ log_error "docker is required but not installed"; exit 1
+ fi
+ if ! docker info &>/dev/null; then
+ log_error "Cannot connect to Docker daemon. Is it running?"; exit 1
+ fi
+}
+
+backup_volume() {
+ local volume_name="$1"
+ local timestamp archive_name final_path tmp_file size
+ timestamp=$(date +%Y%m%d_%H%M%S)
+ archive_name="${volume_name}_${timestamp}.tar.gz"
+ final_path="${BACKUP_DIR}/${archive_name}"
+
+ log_step "Backing up volume: ${volume_name}"
+
+ if ! docker volume inspect "$volume_name" &>/dev/null; then
+ log_error "Volume '$volume_name' does not exist"; return 1
+ fi
+
+ mkdir -p "$BACKUP_DIR"
+ tmp_file=$(mktemp "${BACKUP_DIR}/.backup_XXXXXX.tar.gz")
+
+ if docker run --rm \
+ -v "${volume_name}:/source:ro" \
+ -v "${BACKUP_DIR}:/backup" \
+ "$ALPINE_IMAGE" \
+ tar czf "/backup/$(basename "$tmp_file")" -C /source . 2>/dev/null; then
+ mv "$tmp_file" "$final_path"
+ size=$(du -h "$final_path" | cut -f1)
+ log_info "Created backup: ${final_path} (${size})"
+ else
+ rm -f "$tmp_file"
+ log_error "Failed to backup volume: ${volume_name}"; return 1
+ fi
+}
+
+do_backup() {
+ log_step "Starting Docker volume backup"
+ log_info "Backup directory: ${BACKUP_DIR}"
+
+ local volumes=()
+ if [[ -n "$TARGET_VOLUME" ]]; then
+ volumes=("$TARGET_VOLUME")
+ else
+ while IFS= read -r vol; do
+ [[ -n "$vol" ]] && volumes+=("$vol")
+ done < <(docker volume ls --format '{{.Name}}' | sort)
+ fi
+
+ if [[ ${#volumes[@]} -eq 0 ]]; then
+ log_warn "No Docker named volumes found"; return 0
+ fi
+ log_info "Found ${#volumes[@]} volume(s) to backup"
+
+ local success=0 failed=0
+ for vol in "${volumes[@]}"; do
+ if backup_volume "$vol"; then
+ success=$((success + 1))
+ else
+ failed=$((failed + 1))
+ fi
+ done
+
+ apply_retention
+ log_step "Backup complete: ${success} succeeded, ${failed} failed"
+ [[ $failed -gt 0 ]] && return 1
+ return 0
+}
+
+do_restore() {
+ if [[ ! -f "$RESTORE_ARCHIVE" ]]; then
+ log_error "Archive not found: ${RESTORE_ARCHIVE}"; exit 1
+ fi
+
+ local basename_archive volume_name
+ basename_archive=$(basename "$RESTORE_ARCHIVE")
+ volume_name=$(echo "$basename_archive" | sed 's/_[0-9]\{8\}_[0-9]\{6\}\.tar\.gz$//')
+
+ if [[ -z "$volume_name" || "$volume_name" == "$basename_archive" ]]; then
+ log_error "Cannot determine volume name from archive: ${basename_archive}"
+ log_error "Expected format: volumename_YYYYMMDD_HHMMSS.tar.gz"; exit 1
+ fi
+
+ log_step "Restoring volume: ${volume_name} from ${RESTORE_ARCHIVE}"
+
+ if ! docker volume inspect "$volume_name" &>/dev/null; then
+ log_info "Creating volume: ${volume_name}"
+ docker volume create "$volume_name" >/dev/null
+ else
+ log_warn "Volume '${volume_name}' already exists — contents will be overwritten"
+ fi
+
+ local archive_abs archive_dir archive_file
+ archive_abs=$(realpath "$RESTORE_ARCHIVE")
+ archive_dir=$(dirname "$archive_abs")
+ archive_file=$(basename "$archive_abs")
+
+ if docker run --rm \
+ -v "${volume_name}:/target" \
+ -v "${archive_dir}:/backup:ro" \
+ "$ALPINE_IMAGE" \
+ sh -c "rm -rf /target/* /target/..?* /target/.[!.]* 2>/dev/null; tar xzf /backup/${archive_file} -C /target" 2>/dev/null; then
+ log_info "Volume '${volume_name}' restored successfully"
+ else
+ log_error "Failed to restore volume: ${volume_name}"; exit 1
+ fi
+}
+
+do_list() {
+ if [[ ! -d "$BACKUP_DIR" ]]; then
+ log_info "No backups found (directory does not exist: ${BACKUP_DIR})"; return 0
+ fi
+
+ local count=0
+ log_step "Available backups in ${BACKUP_DIR}:"
+ printf "\n %-40s %-10s %s\n" "ARCHIVE" "SIZE" "VOLUME"
+ printf " %-40s %-10s %s\n" "-------" "----" "------"
+
+ for archive in "$BACKUP_DIR"/*.tar.gz; do
+ [[ -f "$archive" ]] || continue
+ count=$((count + 1))
+ local name size vol_name
+ name=$(basename "$archive")
+ size=$(du -h "$archive" | cut -f1)
+ vol_name=$(echo "$name" | sed 's/_[0-9]\{8\}_[0-9]\{6\}\.tar\.gz$//')
+ printf " %-40s %-10s %s\n" "$name" "$size" "$vol_name"
+ done
+
+ echo ""
+ if [[ $count -eq 0 ]]; then
+ log_info "No backup archives found in ${BACKUP_DIR}"
+ else
+ log_info "${count} backup(s) found"
+ fi
+}
+
+apply_retention() {
+ log_step "Applying retention policy (keep last ${RETAIN} per volume)"
+
+ local vol_names=()
+ for archive in "$BACKUP_DIR"/*.tar.gz; do
+ [[ -f "$archive" ]] || continue
+ local name vol_name
+ name=$(basename "$archive")
+ vol_name=$(echo "$name" | sed 's/_[0-9]\{8\}_[0-9]\{6\}\.tar\.gz$//')
+ vol_names+=("$vol_name")
+ done
+
+ local unique_vols
+ unique_vols=$(printf '%s\n' "${vol_names[@]}" 2>/dev/null | sort -u)
+
+ while IFS= read -r vol; do
+ [[ -z "$vol" ]] && continue
+ local old_archives=()
+ while IFS= read -r f; do
+ [[ -n "$f" ]] && old_archives+=("$f")
+ done < <(ls -1t "$BACKUP_DIR"/${vol}_[0-9]*_[0-9]*.tar.gz 2>/dev/null | tail -n +$((RETAIN + 1)))
+ for old_archive in "${old_archives[@]}"; do
+ log_info "Removing old backup: $(basename "$old_archive")"
+ rm -f "$old_archive"
+ done
+ done <<< "$unique_vols"
+}
+
+main() {
+ parse_args "$@"
+ check_dependencies
+ case "$MODE" in
+ backup) do_backup ;;
+ restore) do_restore ;;
+ list) do_list ;;
+ esac
+}
+
+main "$@"
diff --git a/dokku-exporter.sh b/dokku-exporter.sh
new file mode 100755
index 0000000..617d6fc
--- /dev/null
+++ b/dokku-exporter.sh
@@ -0,0 +1,410 @@
+#!/bin/bash
+################################################################################
+# Script Name: dokku-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for Dokku PaaS providing operational
+# metrics via the Dokku CLI — application status, plugin counts,
+# domain configuration, SSL status, and host health
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - Dokku installed on the local host
+# - Root or dokku user access to run dokku commands
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# # Output to stdout
+# ./dokku-exporter.sh
+#
+# # HTTP server mode
+# ./dokku-exporter.sh --http -p 9198
+#
+# # Textfile collector mode
+# ./dokku-exporter.sh --textfile
+#
+# Metrics Exported:
+# - dokku_up - Dokku reachability (1=up, 0=down)
+# - dokku_info{version} - Dokku version info
+# - dokku_apps_total - Total app count
+# - dokku_apps_running - Running apps
+# - dokku_apps_stopped - Stopped apps
+# - dokku_plugins_total - Installed plugin count
+# - dokku_domains_app_total - Total app domains configured
+# - dokku_ssl_enabled_total - Apps with SSL enabled
+# - dokku_exporter_duration_seconds - Script execution time
+# - dokku_exporter_last_run_timestamp - Last run timestamp
+#
+# Configuration:
+# Default HTTP port: 9198
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9198
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Check prerequisites
+# Returns: 0 if OK, 1 if error
+check_prerequisites() {
+ if ! command -v dokku >/dev/null 2>&1; then
+ echo "ERROR: dokku not found" >&2
+ return 1
+ fi
+
+ return 0
+}
+
+# Escape special characters in Prometheus label values
+# Args: $1 - string to escape
+# Returns: escaped string safe for Prometheus labels
+prom_escape() {
+ local val="$1"
+ val="${val//\\/\\\\}"
+ val="${val//\"/\\\"}"
+ val="${val//$'\n'/}"
+ echo "$val"
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+# Generate all Prometheus metrics
+# Returns: Prometheus text format metrics on stdout
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check prerequisites
+ if ! check_prerequisites; then
+ cat </dev/null)
+
+ if [ -z "$version_output" ]; then
+ cat < "0.38.0")
+ local dokku_version
+ dokku_version=$(echo "$version_output" | awk '{print $NF}')
+
+ if [ -z "$dokku_version" ]; then
+ cat </dev/null || true)
+
+ local total_apps=0
+ local running_apps=0
+ local stopped_apps=0
+
+ if [ -n "$apps_list" ]; then
+ total_apps=$(echo "$apps_list" | wc -l)
+ total_apps=${total_apps:-0}
+
+ # Count running apps by checking each app's process status
+ while IFS= read -r app; do
+ local ps_running
+ ps_running=$(dokku ps:report "$app" --ps-running 2>/dev/null || echo "false")
+ if [ "$ps_running" = "true" ]; then
+ running_apps=$((running_apps + 1))
+ fi
+ done <<< "$apps_list"
+
+ stopped_apps=$((total_apps - running_apps))
+ fi
+
+ cat </dev/null | wc -l)
+ plugins_count=${plugins_count:-0}
+
+ cat </dev/null || true)
+ if [ -n "$app_domains" ]; then
+ # Domains are space-separated; count words
+ local domain_count
+ domain_count=$(echo "$app_domains" | wc -w)
+ total_domains=$((total_domains + domain_count))
+ fi
+ done <<< "$apps_list"
+ fi
+
+ cat </dev/null || echo "false")
+ if [ "$ssl_enabled" = "true" ]; then
+ ssl_enabled_count=$((ssl_enabled_count + 1))
+ fi
+ done <<< "$apps_list"
+ fi
+
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ # Infinite loop accepting HTTP requests
+ while true; do
+ {
+ read -r request
+ # Check if request is for /metrics endpoint
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else # Serve HTML landing page for other requests
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+Dokku Exporter v1.0
+
+Dokku Prometheus Exporter v1.0
+Metrics
+Operational metrics from the Dokku CLI.
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+# Main entry point - routes to appropriate output mode
+main() {
+ parse_args "$@"
+
+ if [ "$HTTP_MODE" = true ]; then
+ # Run HTTP server (blocks until killed)
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ # Textfile collector mode: write atomically using temp file
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ # Create temp file in SAME directory for atomic rename (same filesystem)
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.dokku_metrics.XXXXXX")
+
+ # Generate metrics to temp file
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ # Validate: file must exist, have content
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 10 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ # Set permissions before move
+ chmod 644 "$temp_file"
+
+ # Atomic rename - no gap where file is missing
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ # Default: output to stdout
+ generate_metrics
+ fi
+}
+
+# Execute main function with all script arguments
+main "$@"
diff --git a/dokku-smoke-tests.sh b/dokku-smoke-tests.sh
new file mode 100755
index 0000000..1114f91
--- /dev/null
+++ b/dokku-smoke-tests.sh
@@ -0,0 +1,455 @@
+#!/bin/bash
+################################################################################
+# Script Name: dokku-smoke-tests.sh
+# Version: 1.0
+# Description: Smoke test suite for Dokku PaaS — validates connectivity,
+# app deployment lifecycle, plugin health, SSL certificates,
+# and resource usage via the dokku CLI
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - bash 4+
+# - dokku binary (run on the Dokku host)
+# - Root or dokku user access
+#
+# Usage:
+# sudo ./dokku-smoke-tests.sh
+# sudo ./dokku-smoke-tests.sh --skip-app --skip-ssl
+# sudo ./dokku-smoke-tests.sh --format tap
+# sudo ./dokku-smoke-tests.sh --format junit --junit-file results.xml
+#
+################################################################################
+
+set -euo pipefail
+
+# --- Defaults ---
+SKIP_APP="${SKIP_APP_LIFECYCLE:-false}"
+SKIP_SSL="${SKIP_SSL:-false}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+JUNIT_FILE="${JUNIT_FILE:-smoke-results.xml}"
+DOKKU_DOMAIN="${DOKKU_DOMAIN:-}"
+VERBOSE=false
+USE_COLOR=true
+TEST_APP_NAME=""
+PASSED=0
+FAILED=0
+SKIPPED=0
+START_TIME=""
+JUNIT_RESULTS=()
+TAP_RESULTS=()
+TEST_NUM=0
+
+# --- Colors ---
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+usage() {
+ cat <&2
+ fi
+}
+
+pass() {
+ local suite="$1" msg="$2"
+ ((TEST_NUM++)) || true
+ ((PASSED++)) || true
+ case "$OUTPUT_FORMAT" in
+ tap) TAP_RESULTS+=("ok $TEST_NUM - [$suite] $msg") ;;
+ junit) JUNIT_RESULTS+=("") ;;
+ *) echo -e " ${GREEN}✓${NC} $msg" ;;
+ esac
+}
+
+fail() {
+ local suite="$1" msg="$2" detail="${3:-}"
+ ((TEST_NUM++)) || true
+ ((FAILED++)) || true
+ case "$OUTPUT_FORMAT" in
+ tap) TAP_RESULTS+=("not ok $TEST_NUM - [$suite] $msg") ;;
+ junit) JUNIT_RESULTS+=("$detail") ;;
+ *) echo -e " ${RED}✗${NC} $msg${detail:+ — $detail}" ;;
+ esac
+}
+
+skip() {
+ local suite="$1" msg="$2"
+ ((TEST_NUM++)) || true
+ ((SKIPPED++)) || true
+ case "$OUTPUT_FORMAT" in
+ tap) TAP_RESULTS+=("ok $TEST_NUM - [$suite] $msg # SKIP") ;;
+ junit) JUNIT_RESULTS+=("") ;;
+ *) echo -e " ${YELLOW}⊘${NC} $msg — skipped" ;;
+ esac
+}
+
+suite_header() {
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e "\n${BOLD}$1${NC}"
+ fi
+}
+
+# --- Cleanup ---
+cleanup() {
+ if [[ -n "$TEST_APP_NAME" ]]; then
+ debug "Cleaning up test app: $TEST_APP_NAME"
+ dokku apps:destroy "$TEST_APP_NAME" --force >/dev/null 2>&1 || true
+ TEST_APP_NAME=""
+ fi
+}
+trap cleanup EXIT INT TERM
+
+# --- Header ---
+START_TIME=$(date +%s)
+HOSTNAME_STR=$(hostname -f 2>/dev/null || hostname)
+
+if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e "${BOLD}Dokku Smoke Tests${NC}"
+ echo "Host: $HOSTNAME_STR"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+fi
+
+# =====================================================
+# Suite 1: Connectivity
+# =====================================================
+suite_header "Connectivity"
+
+# Check dokku binary
+DOKKU_BIN=$(command -v dokku 2>/dev/null || true)
+if [[ -n "$DOKKU_BIN" ]]; then
+ pass "Connectivity" "Dokku binary found — $DOKKU_BIN"
+else
+ fail "Connectivity" "Dokku binary not found" "dokku is not in PATH"
+ echo -e "\n${RED}Cannot continue without dokku binary. Aborting.${NC}" >&2
+ exit 1
+fi
+
+# Check Docker daemon
+if docker info >/dev/null 2>&1; then
+ pass "Connectivity" "Docker daemon running"
+else
+ fail "Connectivity" "Docker daemon not running" "docker info failed"
+fi
+
+# Check dokku version
+DOKKU_VERSION=$(dokku version 2>/dev/null | grep -oP 'dokku version \K[0-9]+\.[0-9]+\.[0-9]+' || true)
+if [[ -z "$DOKKU_VERSION" ]]; then
+ # Try alternate format
+ DOKKU_VERSION=$(dokku version 2>/dev/null | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' || true)
+fi
+
+if [[ -n "$DOKKU_VERSION" ]]; then
+ pass "Connectivity" "Dokku version — $DOKKU_VERSION"
+else
+ fail "Connectivity" "Dokku version" "Could not parse version string"
+fi
+
+# Auto-detect global domain if not set
+if [[ -z "$DOKKU_DOMAIN" ]]; then
+ DOKKU_DOMAIN=$(dokku domains:report --global 2>/dev/null | grep -i "global vhosts" | awk '{print $NF}' || true)
+ if [[ -z "$DOKKU_DOMAIN" ]]; then
+ DOKKU_DOMAIN=$(dokku domains:report --global 2>/dev/null | tail -1 | awk '{print $NF}' || true)
+ fi
+ debug "Auto-detected domain: $DOKKU_DOMAIN"
+fi
+
+# =====================================================
+# Suite 2: App Lifecycle
+# =====================================================
+if [[ "$SKIP_APP" == "true" ]]; then
+ suite_header "App Lifecycle"
+ skip "App Lifecycle" "Create test app"
+ skip "App Lifecycle" "Deploy image"
+ skip "App Lifecycle" "App responding"
+ skip "App Lifecycle" "Delete test app"
+else
+ suite_header "App Lifecycle"
+
+ TEST_APP_NAME="dokku-smoke-$(date +%s)"
+ debug "Test app name: $TEST_APP_NAME"
+
+ # Create app
+ create_output=$(dokku apps:create "$TEST_APP_NAME" 2>&1) || true
+ debug "Create output: $create_output"
+
+ if dokku apps:exists "$TEST_APP_NAME" >/dev/null 2>&1; then
+ pass "App Lifecycle" "Create test app — $TEST_APP_NAME"
+ else
+ fail "App Lifecycle" "Create test app" "$create_output"
+ skip "App Lifecycle" "Deploy image"
+ skip "App Lifecycle" "App responding"
+ skip "App Lifecycle" "Delete test app"
+ TEST_APP_NAME=""
+ SKIP_APP=true
+ fi
+
+ if [[ "$SKIP_APP" != "true" ]]; then
+ # Deploy image via git:from-image
+ deploy_output=$(dokku git:from-image "$TEST_APP_NAME" nginxdemos/hello 2>&1) || true
+ debug "Deploy output: $deploy_output"
+
+ # Check if app is running
+ app_running=false
+ for i in $(seq 1 12); do
+ sleep 5
+ debug "Waiting for app to start... attempt $i/12"
+ ps_output=$(dokku ps:report "$TEST_APP_NAME" 2>/dev/null || true)
+ if echo "$ps_output" | grep -qi "running"; then
+ app_running=true
+ break
+ fi
+ # Also check container status directly
+ running_count=$(dokku ps:report "$TEST_APP_NAME" 2>/dev/null | grep -i "running" | wc -l || echo "0")
+ if [[ "$running_count" -gt 0 ]]; then
+ app_running=true
+ break
+ fi
+ done
+
+ if [[ "$app_running" == "true" ]]; then
+ pass "App Lifecycle" "Deploy image — nginxdemos/hello deployed"
+ else
+ fail "App Lifecycle" "Deploy image" "App not running after 60s"
+ fi
+
+ # Verify HTTP response
+ if [[ -n "$DOKKU_DOMAIN" ]]; then
+ app_url="http://${TEST_APP_NAME}.${DOKKU_DOMAIN}"
+ debug "App URL: $app_url"
+ sleep 3
+
+ app_http=$(curl -s -o /dev/null -w "%{http_code}" \
+ --connect-timeout 10 --max-time 30 \
+ "$app_url" 2>/dev/null || echo "000")
+
+ if [[ "$app_http" == "200" ]]; then
+ pass "App Lifecycle" "App responding — HTTP 200 at ${TEST_APP_NAME}.${DOKKU_DOMAIN}"
+ else
+ fail "App Lifecycle" "App responding" "HTTP $app_http at $app_url"
+ fi
+ else
+ # No domain configured — check container port directly
+ debug "No global domain — checking container directly"
+ port=$(dokku proxy:ports "$TEST_APP_NAME" 2>/dev/null | grep -oP ':\K[0-9]+$' | head -1 || true)
+ if [[ -n "$port" ]]; then
+ app_http=$(curl -s -o /dev/null -w "%{http_code}" \
+ --connect-timeout 10 --max-time 30 \
+ "http://localhost:$port" 2>/dev/null || echo "000")
+ if [[ "$app_http" == "200" ]]; then
+ pass "App Lifecycle" "App responding — HTTP 200 on port $port"
+ else
+ fail "App Lifecycle" "App responding" "HTTP $app_http on port $port"
+ fi
+ else
+ skip "App Lifecycle" "App responding"
+ fi
+ fi
+
+ # Delete test app
+ delete_output=$(dokku apps:destroy "$TEST_APP_NAME" --force 2>&1) || true
+ debug "Delete output: $delete_output"
+
+ if ! dokku apps:exists "$TEST_APP_NAME" >/dev/null 2>&1; then
+ pass "App Lifecycle" "Delete test app — cleaned up"
+ TEST_APP_NAME=""
+ else
+ fail "App Lifecycle" "Delete test app" "Manual cleanup may be required"
+ fi
+ fi
+fi
+
+# =====================================================
+# Suite 3: Plugin Health
+# =====================================================
+suite_header "Plugins"
+
+plugin_list=$(dokku plugin:list 2>/dev/null || true)
+debug "Plugin list: $plugin_list"
+
+if [[ -n "$plugin_list" ]]; then
+ plugin_count=$(echo "$plugin_list" | grep -c "enabled" || echo "0")
+ pass "Plugins" "Plugin list — $plugin_count plugins installed"
+else
+ fail "Plugins" "Plugin list" "dokku plugin:list failed"
+fi
+
+# Check core plugins
+CORE_PLUGINS=("nginx-vhosts" "apps" "config" "ps")
+for plugin in "${CORE_PLUGINS[@]}"; do
+ if echo "$plugin_list" | grep -q "$plugin"; then
+ pass "Plugins" "Core plugin present — $plugin"
+ else
+ fail "Plugins" "Core plugin present — $plugin" "Not found in plugin list"
+ fi
+done
+
+# =====================================================
+# Suite 4: SSL
+# =====================================================
+if [[ "$SKIP_SSL" == "true" ]]; then
+ suite_header "SSL"
+ skip "SSL" "Letsencrypt plugin installed"
+ skip "SSL" "TLS certificate valid"
+else
+ suite_header "SSL"
+
+ # Check if letsencrypt plugin is installed
+ le_installed=false
+ if echo "$plugin_list" | grep -qi "letsencrypt"; then
+ le_installed=true
+ pass "SSL" "Letsencrypt plugin installed"
+ else
+ skip "SSL" "Letsencrypt plugin installed"
+ fi
+
+ # Check global domain certificate
+ if [[ "$le_installed" == "true" && -n "$DOKKU_DOMAIN" ]]; then
+ # Check certificate via openssl if the domain resolves
+ cert_host="$DOKKU_DOMAIN"
+ cert_output=$(echo | openssl s_client -servername "$cert_host" -connect "${cert_host}:443" 2>/dev/null || true)
+ cert_enddate=$(echo "$cert_output" | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2 || true)
+
+ if [[ -n "$cert_enddate" ]]; then
+ expiry_epoch=$(date -d "$cert_enddate" +%s 2>/dev/null || echo "0")
+ now_epoch=$(date +%s)
+ days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
+
+ if [[ "$days_left" -gt 0 ]]; then
+ pass "SSL" "TLS certificate valid — $days_left days remaining"
+ else
+ fail "SSL" "TLS certificate expired" "$days_left days past expiry"
+ fi
+ else
+ skip "SSL" "TLS certificate valid"
+ fi
+ else
+ skip "SSL" "TLS certificate valid"
+ fi
+fi
+
+# =====================================================
+# Suite 5: Resources
+# =====================================================
+suite_header "Resources"
+
+# Disk usage
+disk_line=$(df -h / 2>/dev/null | tail -1 || true)
+if [[ -n "$disk_line" ]]; then
+ disk_pct=$(echo "$disk_line" | awk '{print $5}' | tr -d '%')
+ disk_used=$(echo "$disk_line" | awk '{print $3}')
+ disk_total=$(echo "$disk_line" | awk '{print $2}')
+ pass "Resources" "Disk usage — ${disk_pct}% (${disk_used} / ${disk_total})"
+else
+ fail "Resources" "Disk usage" "Could not read disk info"
+fi
+
+# Docker images
+image_count=$(docker images -q 2>/dev/null | wc -l || echo "0")
+pass "Resources" "Docker images — $image_count images"
+
+# Docker volumes
+volume_count=$(docker volume ls -q 2>/dev/null | wc -l || echo "0")
+pass "Resources" "Docker volumes — $volume_count volumes"
+
+# Docker containers
+container_count=$(docker ps -q 2>/dev/null | wc -l || echo "0")
+pass "Resources" "Docker containers — $container_count running"
+
+# =====================================================
+# Summary
+# =====================================================
+END_TIME=$(date +%s)
+DURATION=$((END_TIME - START_TIME))
+
+case "$OUTPUT_FORMAT" in
+ tap)
+ echo "TAP version 13"
+ echo "1..$TEST_NUM"
+ for line in "${TAP_RESULTS[@]}"; do
+ echo "$line"
+ done
+ echo "# passed: $PASSED"
+ echo "# failed: $FAILED"
+ echo "# skipped: $SKIPPED"
+ echo "# duration: ${DURATION}s"
+ ;;
+ junit)
+ {
+ echo ''
+ echo ""
+ echo " "
+ echo " "
+ echo " "
+ for result in "${JUNIT_RESULTS[@]}"; do
+ echo " $result"
+ done
+ echo ""
+ } > "$JUNIT_FILE"
+ echo "JUnit results written to $JUNIT_FILE"
+ ;;
+ *)
+ echo ""
+ echo "────────────────────────────────────────"
+ echo -e "Summary ${BOLD}$HOSTNAME_STR${NC}"
+ echo -e " ${GREEN}$PASSED passed${NC} ${RED}$FAILED failed${NC} ${YELLOW}$SKIPPED skipped${NC} (${DURATION}s)"
+ echo "────────────────────────────────────────"
+ if [[ "$FAILED" -eq 0 ]]; then
+ echo -e "${GREEN}All tests passed.${NC}"
+ else
+ echo -e "${RED}Some tests failed.${NC}"
+ fi
+ ;;
+esac
+
+exit $((FAILED > 0 ? 1 : 0))
diff --git a/dokploy-exporter.sh b/dokploy-exporter.sh
new file mode 100755
index 0000000..9f5f9c9
--- /dev/null
+++ b/dokploy-exporter.sh
@@ -0,0 +1,470 @@
+#!/bin/bash
+################################################################################
+# Script Name: dokploy-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for Dokploy PaaS providing operational
+# metrics via the Dokploy API — project counts, application status,
+# database breakdown by type, compose services, server info,
+# and API health
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - Dokploy instance running with API enabled
+# - Dokploy API key (generate in Settings → API)
+# - curl for API calls
+# - jq for JSON parsing
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# # Output to stdout
+# ./dokploy-exporter.sh
+#
+# # HTTP server mode
+# ./dokploy-exporter.sh --http -p 9197
+#
+# # Textfile collector mode
+# ./dokploy-exporter.sh --textfile
+#
+# # Custom API token and URL
+# ./dokploy-exporter.sh --api-url http://dokploy.local:3000 --api-token mytoken
+#
+# Metrics Exported:
+# - dokploy_up - API reachability (1=up, 0=down)
+# - dokploy_info{version} - Dokploy version info
+# - dokploy_projects_total - Total project count
+# - dokploy_applications_total - Total applications across all projects
+# - dokploy_applications_by_status{status} - Applications by status
+# - dokploy_compose_services_total - Total Docker Compose services
+# - dokploy_databases_total - Total managed databases
+# - dokploy_databases_by_type{type} - Databases by type
+# - dokploy_servers_total - Total servers managed
+# - dokploy_exporter_duration_seconds - Script execution time
+# - dokploy_exporter_last_run_timestamp - Last run timestamp
+#
+# Configuration:
+# Default HTTP port: 9197
+# Default API URL: http://localhost:3000
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9197
+API_URL="http://localhost:3000"
+API_TOKEN=""
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Check prerequisites
+# Returns: 0 if OK, 1 if error
+check_prerequisites() {
+ if ! command -v curl >/dev/null 2>&1; then
+ echo "ERROR: curl not found" >&2
+ return 1
+ fi
+
+ if ! command -v jq >/dev/null 2>&1; then
+ echo "ERROR: jq not found (required for JSON parsing)" >&2
+ return 1
+ fi
+
+ if [ -z "$API_TOKEN" ]; then
+ echo "ERROR: --api-token is required" >&2
+ return 1
+ fi
+
+ return 0
+}
+
+# Escape special characters in Prometheus label values
+# Args: $1 - string to escape
+# Returns: escaped string safe for Prometheus labels
+prom_escape() {
+ local val="$1"
+ val="${val//\\/\\\\}"
+ val="${val//\"/\\\"}"
+ val="${val//$'\n'/}"
+ echo "$val"
+}
+
+# Make an authenticated API call
+# Args: $1 - API endpoint path (e.g., /api/project.all)
+# Returns: JSON response on stdout
+api_call() {
+ local endpoint="$1"
+ curl -s -X GET \
+ -H "x-api-key: ${API_TOKEN}" \
+ -H "Accept: application/json" \
+ "${API_URL}${endpoint}" 2>/dev/null
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+# Generate all Prometheus metrics
+# Returns: Prometheus text format metrics on stdout
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check prerequisites
+ if ! check_prerequisites; then
+ cat </dev/null)
+
+ if [ -z "$health_response" ]; then
+ cat </dev/null)
+
+ if [ "$is_error" = "yes" ]; then
+ cat </dev/null)
+ dokploy_version="${dokploy_version:-unknown}"
+
+ cat </dev/null)
+ total_projects=${total_projects:-0}
+
+ # Count applications across all projects
+ total_apps=$(echo "$projects_response" | jq '[.[] | (.applications // []) | length] | add // 0' 2>/dev/null)
+ total_apps=${total_apps:-0}
+
+ # Count applications by status
+ done_apps=$(echo "$projects_response" | jq '[.[] | (.applications // [])[] | select(.applicationStatus == "done")] | length' 2>/dev/null)
+ idle_apps=$(echo "$projects_response" | jq '[.[] | (.applications // [])[] | select(.applicationStatus == "idle")] | length' 2>/dev/null)
+ running_apps=$(echo "$projects_response" | jq '[.[] | (.applications // [])[] | select(.applicationStatus == "running")] | length' 2>/dev/null)
+ error_apps=$(echo "$projects_response" | jq '[.[] | (.applications // [])[] | select(.applicationStatus == "error")] | length' 2>/dev/null)
+ done_apps=${done_apps:-0}
+ idle_apps=${idle_apps:-0}
+ running_apps=${running_apps:-0}
+ error_apps=${error_apps:-0}
+
+ # Count compose services across all projects
+ total_compose=$(echo "$projects_response" | jq '[.[] | (.compose // []) | length] | add // 0' 2>/dev/null)
+ total_compose=${total_compose:-0}
+
+ # Count databases by type across all projects
+ pg_count=$(echo "$projects_response" | jq '[.[] | (.postgres // []) | length] | add // 0' 2>/dev/null)
+ mysql_count=$(echo "$projects_response" | jq '[.[] | (.mysql // []) | length] | add // 0' 2>/dev/null)
+ mariadb_count=$(echo "$projects_response" | jq '[.[] | (.mariadb // []) | length] | add // 0' 2>/dev/null)
+ mongo_count=$(echo "$projects_response" | jq '[.[] | (.mongo // []) | length] | add // 0' 2>/dev/null)
+ redis_count=$(echo "$projects_response" | jq '[.[] | (.redis // []) | length] | add // 0' 2>/dev/null)
+ pg_count=${pg_count:-0}
+ mysql_count=${mysql_count:-0}
+ mariadb_count=${mariadb_count:-0}
+ mongo_count=${mongo_count:-0}
+ redis_count=${redis_count:-0}
+
+ total_databases=$((pg_count + mysql_count + mariadb_count + mongo_count + redis_count))
+ fi
+
+ cat </dev/null)
+ total_servers=${total_servers:-0}
+ fi
+
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ # Infinite loop accepting HTTP requests
+ while true; do
+ {
+ read -r request
+ # Check if request is for /metrics endpoint
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else # Serve HTML landing page for other requests
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+Dokploy Exporter v1.0
+
+Dokploy Prometheus Exporter v1.0
+Metrics
+Operational metrics from the Dokploy API.
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+# Main entry point - routes to appropriate output mode
+main() {
+ parse_args "$@"
+
+ if [ "$HTTP_MODE" = true ]; then
+ # Run HTTP server (blocks until killed)
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ # Textfile collector mode: write atomically using temp file
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ # Create temp file in SAME directory for atomic rename (same filesystem)
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.dokploy_metrics.XXXXXX")
+
+ # Generate metrics to temp file
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ # Validate: file must exist, have content
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 10 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ # Set permissions before move
+ chmod 644 "$temp_file"
+
+ # Atomic rename - no gap where file is missing
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ # Default: output to stdout
+ generate_metrics
+ fi
+}
+
+# Execute main function with all script arguments
+main "$@"
diff --git a/dovecot-metrics-exporter.sh b/dovecot-metrics-exporter.sh
new file mode 100644
index 0000000..0ff0f32
--- /dev/null
+++ b/dovecot-metrics-exporter.sh
@@ -0,0 +1,372 @@
+#!/bin/bash
+################################################################################
+# Script Name: dovecot-metrics-exporter.sh
+# Description: Prometheus exporter for Dovecot IMAP/POP3 server metrics
+#
+# Collects connection counts, authentication stats, mailbox operations,
+# process info, and protocol-level metrics from doveadm and Dovecot stats.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+# Version: 1.0
+#
+# Usage:
+# # Output to stdout
+# ./dovecot-metrics-exporter.sh
+#
+# # Textfile collector mode (atomic write)
+# ./dovecot-metrics-exporter.sh --textfile
+#
+# # Custom output file
+# ./dovecot-metrics-exporter.sh -o /path/to/metrics.prom
+#
+################################################################################
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HOSTNAME=$(hostname)
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Safe integer extraction — returns 0 on failure
+safe_int() {
+ local val="$1"
+ if [[ "$val" =~ ^[0-9]+$ ]]; then
+ echo "$val"
+ else
+ echo 0
+ fi
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+generate_metrics() {
+ local START_TIME
+ START_TIME=$(date +%s.%N)
+
+# --- Exporter info ---
+echo "# HELP dovecot_up Exporter status (1=up, 0=down)"
+echo "# TYPE dovecot_up gauge"
+
+# Check if Dovecot is running
+if systemctl is-active --quiet dovecot 2>/dev/null; then
+ echo "dovecot_up 1"
+else
+ echo "dovecot_up 0"
+fi
+
+echo ""
+echo "# HELP dovecot_exporter_info Exporter version information"
+echo "# TYPE dovecot_exporter_info gauge"
+echo 'dovecot_exporter_info{version="1.0"} 1'
+echo ""
+
+# --- Dovecot version ---
+echo "# HELP dovecot_version_info Dovecot version information"
+echo "# TYPE dovecot_version_info gauge"
+local dovecot_version
+dovecot_version=$(dovecot --version 2>/dev/null | awk '{print $1}') || dovecot_version="unknown"
+echo "dovecot_version_info{version=\"${dovecot_version}\"} 1"
+echo ""
+
+# --- Process counts ---
+echo "# HELP dovecot_processes Number of running Dovecot processes by type"
+echo "# TYPE dovecot_processes gauge"
+for proc_type in imap pop3 lmtp managesieve submission auth anvil; do
+ count=$(pgrep -c "dovecot/${proc_type}" 2>/dev/null) || count=0
+ echo "dovecot_processes{type=\"${proc_type}\"} ${count}"
+done
+local total_procs
+total_procs=$(pgrep -c dovecot 2>/dev/null) || total_procs=0
+echo "dovecot_processes{type=\"total\"} ${total_procs}"
+echo ""
+
+# --- Connected users (from doveadm) ---
+echo "# HELP dovecot_connected_users Number of currently connected users by protocol"
+echo "# TYPE dovecot_connected_users gauge"
+local imap_users=0 pop3_users=0 lmtp_users=0 managesieve_users=0
+if command -v doveadm >/dev/null 2>&1; then
+ imap_users=$(doveadm who -1 2>/dev/null | grep -c 'imap' 2>/dev/null) || imap_users=0
+ pop3_users=$(doveadm who -1 2>/dev/null | grep -c 'pop3' 2>/dev/null) || pop3_users=0
+ lmtp_users=$(doveadm who -1 2>/dev/null | grep -c 'lmtp' 2>/dev/null) || lmtp_users=0
+ managesieve_users=$(doveadm who -1 2>/dev/null | grep -c 'managesieve' 2>/dev/null) || managesieve_users=0
+fi
+echo "dovecot_connected_users{protocol=\"imap\"} ${imap_users}"
+echo "dovecot_connected_users{protocol=\"pop3\"} ${pop3_users}"
+echo "dovecot_connected_users{protocol=\"lmtp\"} ${lmtp_users}"
+echo "dovecot_connected_users{protocol=\"managesieve\"} ${managesieve_users}"
+echo ""
+
+# --- Total connections (from doveadm who) ---
+echo "# HELP dovecot_connections_total Total active connections by protocol"
+echo "# TYPE dovecot_connections_total gauge"
+local imap_conns=0 pop3_conns=0
+if command -v doveadm >/dev/null 2>&1; then
+ imap_conns=$(doveadm who -1 2>/dev/null | grep 'imap' | awk '{sum+=$3} END {print sum+0}' 2>/dev/null) || imap_conns=0
+ pop3_conns=$(doveadm who -1 2>/dev/null | grep 'pop3' | awk '{sum+=$3} END {print sum+0}' 2>/dev/null) || pop3_conns=0
+fi
+echo "dovecot_connections_total{protocol=\"imap\"} ${imap_conns}"
+echo "dovecot_connections_total{protocol=\"pop3\"} ${pop3_conns}"
+echo ""
+
+# --- Authentication stats from mail.log ---
+local LOG_FILE="/var/log/mail.log"
+if [[ ! -f "$LOG_FILE" ]]; then
+ LOG_FILE="/var/log/maillog"
+fi
+
+echo "# HELP dovecot_auth_success_total Successful authentication attempts by protocol"
+echo "# TYPE dovecot_auth_success_total counter"
+local imap_auth_ok=0 pop3_auth_ok=0
+if [[ -f "$LOG_FILE" ]]; then
+ imap_auth_ok=$(grep -c 'imap-login: Info: Login:' "$LOG_FILE" 2>/dev/null) || imap_auth_ok=0
+ pop3_auth_ok=$(grep -c 'pop3-login: Info: Login:' "$LOG_FILE" 2>/dev/null) || pop3_auth_ok=0
+fi
+echo "dovecot_auth_success_total{protocol=\"imap\"} ${imap_auth_ok}"
+echo "dovecot_auth_success_total{protocol=\"pop3\"} ${pop3_auth_ok}"
+echo ""
+
+echo "# HELP dovecot_auth_failed_total Failed authentication attempts by protocol"
+echo "# TYPE dovecot_auth_failed_total counter"
+local imap_auth_fail=0 pop3_auth_fail=0
+if [[ -f "$LOG_FILE" ]]; then
+ imap_auth_fail=$(grep -c 'imap-login:.*auth failed\|imap-login: Info: Aborted login' "$LOG_FILE" 2>/dev/null) || imap_auth_fail=0
+ pop3_auth_fail=$(grep -c 'pop3-login:.*auth failed\|pop3-login: Info: Aborted login' "$LOG_FILE" 2>/dev/null) || pop3_auth_fail=0
+fi
+echo "dovecot_auth_failed_total{protocol=\"imap\"} ${imap_auth_fail}"
+echo "dovecot_auth_failed_total{protocol=\"pop3\"} ${pop3_auth_fail}"
+echo ""
+
+# --- TLS connections ---
+echo "# HELP dovecot_tls_connections_total TLS connections by status"
+echo "# TYPE dovecot_tls_connections_total counter"
+local tls_yes=0 tls_no=0
+if [[ -f "$LOG_FILE" ]]; then
+ tls_yes=$(grep -c 'Login:.*TLS' "$LOG_FILE" 2>/dev/null) || tls_yes=0
+ tls_no=$(grep 'Login:' "$LOG_FILE" 2>/dev/null | grep -cv 'TLS' 2>/dev/null) || tls_no=0
+fi
+echo "dovecot_tls_connections_total{tls=\"yes\"} ${tls_yes}"
+echo "dovecot_tls_connections_total{tls=\"no\"} ${tls_no}"
+echo ""
+
+# --- Authentication methods ---
+echo "# HELP dovecot_auth_method_total Logins by authentication method"
+echo "# TYPE dovecot_auth_method_total counter"
+if [[ -f "$LOG_FILE" ]]; then
+ for method in PLAIN LOGIN CRAM-MD5 DIGEST-MD5; do
+ count=$(grep -c "Login:.*method=${method}" "$LOG_FILE" 2>/dev/null) || count=0
+ echo "dovecot_auth_method_total{method=\"${method}\"} ${count}"
+ done
+else
+ for method in PLAIN LOGIN CRAM-MD5 DIGEST-MD5; do
+ echo "dovecot_auth_method_total{method=\"${method}\"} 0"
+ done
+fi
+echo ""
+
+# --- Disconnections ---
+echo "# HELP dovecot_disconnections_total Client disconnections by reason"
+echo "# TYPE dovecot_disconnections_total counter"
+local dc_logout=0 dc_timeout=0 dc_closed=0 dc_internal=0
+if [[ -f "$LOG_FILE" ]]; then
+ dc_logout=$(grep -c 'Logged out' "$LOG_FILE" 2>/dev/null) || dc_logout=0
+ dc_timeout=$(grep -c 'Disconnected.*Timed out\|Connection timed out' "$LOG_FILE" 2>/dev/null) || dc_timeout=0
+ dc_closed=$(grep -c 'Disconnected.*Connection closed' "$LOG_FILE" 2>/dev/null) || dc_closed=0
+ dc_internal=$(grep -c 'Disconnected.*Internal error' "$LOG_FILE" 2>/dev/null) || dc_internal=0
+fi
+echo "dovecot_disconnections_total{reason=\"logout\"} ${dc_logout}"
+echo "dovecot_disconnections_total{reason=\"timeout\"} ${dc_timeout}"
+echo "dovecot_disconnections_total{reason=\"connection_closed\"} ${dc_closed}"
+echo "dovecot_disconnections_total{reason=\"internal_error\"} ${dc_internal}"
+echo ""
+
+# --- LMTP delivery stats ---
+echo "# HELP dovecot_lmtp_deliveries_total LMTP deliveries by status"
+echo "# TYPE dovecot_lmtp_deliveries_total counter"
+local lmtp_ok=0 lmtp_reject=0 lmtp_tempfail=0
+if [[ -f "$LOG_FILE" ]]; then
+ lmtp_ok=$(grep -c 'lmtp.*saved mail' "$LOG_FILE" 2>/dev/null) || lmtp_ok=0
+ lmtp_reject=$(grep -c 'lmtp.*rejected' "$LOG_FILE" 2>/dev/null) || lmtp_reject=0
+ lmtp_tempfail=$(grep -c 'lmtp.*temporary failure\|lmtp.*temp-fail' "$LOG_FILE" 2>/dev/null) || lmtp_tempfail=0
+fi
+echo "dovecot_lmtp_deliveries_total{status=\"delivered\"} ${lmtp_ok}"
+echo "dovecot_lmtp_deliveries_total{status=\"rejected\"} ${lmtp_reject}"
+echo "dovecot_lmtp_deliveries_total{status=\"tempfail\"} ${lmtp_tempfail}"
+echo ""
+
+# --- Sieve stats ---
+echo "# HELP dovecot_sieve_actions_total Sieve filter actions"
+echo "# TYPE dovecot_sieve_actions_total counter"
+local sieve_filed=0 sieve_discard=0 sieve_redirect=0 sieve_reject=0
+if [[ -f "$LOG_FILE" ]]; then
+ sieve_filed=$(grep -c 'sieve:.*stored mail\|sieve:.*fileinto' "$LOG_FILE" 2>/dev/null) || sieve_filed=0
+ sieve_discard=$(grep -c 'sieve:.*discard' "$LOG_FILE" 2>/dev/null) || sieve_discard=0
+ sieve_redirect=$(grep -c 'sieve:.*redirect' "$LOG_FILE" 2>/dev/null) || sieve_redirect=0
+ sieve_reject=$(grep -c 'sieve:.*reject' "$LOG_FILE" 2>/dev/null) || sieve_reject=0
+fi
+echo "dovecot_sieve_actions_total{action=\"filed\"} ${sieve_filed}"
+echo "dovecot_sieve_actions_total{action=\"discard\"} ${sieve_discard}"
+echo "dovecot_sieve_actions_total{action=\"redirect\"} ${sieve_redirect}"
+echo "dovecot_sieve_actions_total{action=\"reject\"} ${sieve_reject}"
+echo ""
+
+# --- Dovecot stats (if old_stats or stats plugin enabled) ---
+# Try doveadm stats dump for Dovecot 2.3+
+echo "# HELP dovecot_mail_commands_total Mail commands executed"
+echo "# TYPE dovecot_mail_commands_total counter"
+local cmds_select=0 cmds_fetch=0 cmds_store=0 cmds_search=0 cmds_copy=0 cmds_expunge=0
+if command -v doveadm >/dev/null 2>&1; then
+ local stats_output
+ stats_output=$(doveadm stats dump session 2>/dev/null | head -20)
+ if [[ -n "$stats_output" ]]; then
+ cmds_select=$(echo "$stats_output" | awk '{sum+=$4} END {print sum+0}') || cmds_select=0
+ cmds_fetch=$(echo "$stats_output" | awk '{sum+=$5} END {print sum+0}') || cmds_fetch=0
+ fi
+fi
+# Fallback: count from logs
+if [[ -f "$LOG_FILE" ]]; then
+ cmds_copy=$(grep -c 'Copy\|copy' "$LOG_FILE" 2>/dev/null | head -1) || cmds_copy=0
+ cmds_expunge=$(grep -c 'Expunged' "$LOG_FILE" 2>/dev/null) || cmds_expunge=0
+fi
+echo "dovecot_mail_commands_total{command=\"copy\"} ${cmds_copy}"
+echo "dovecot_mail_commands_total{command=\"expunge\"} ${cmds_expunge}"
+echo ""
+
+# --- Mail storage quota (top users if doveadm quota available) ---
+echo "# HELP dovecot_quota_usage_bytes User quota usage in bytes (top users)"
+echo "# TYPE dovecot_quota_usage_bytes gauge"
+echo "# HELP dovecot_quota_limit_bytes User quota limit in bytes"
+echo "# TYPE dovecot_quota_limit_bytes gauge"
+if command -v doveadm >/dev/null 2>&1; then
+ doveadm quota get -A 2>/dev/null | grep 'STORAGE' | head -20 | while IFS=$'\t' read -r user type value limit _; do
+ local usage_bytes=$((value * 1024))
+ local limit_bytes=$((limit * 1024))
+ echo "dovecot_quota_usage_bytes{user=\"${user}\"} ${usage_bytes}"
+ echo "dovecot_quota_limit_bytes{user=\"${user}\"} ${limit_bytes}"
+ done 2>/dev/null
+fi
+echo ""
+
+# --- Dovecot uptime ---
+echo "# HELP dovecot_uptime_seconds Dovecot process uptime in seconds"
+echo "# TYPE dovecot_uptime_seconds gauge"
+local dovecot_pid uptime_seconds=0
+dovecot_pid=$(pgrep -o dovecot 2>/dev/null) || dovecot_pid=""
+if [[ -n "$dovecot_pid" ]] && [[ -d "/proc/${dovecot_pid}" ]]; then
+ local start_time
+ start_time=$(stat -c %Y "/proc/${dovecot_pid}" 2>/dev/null) || start_time=0
+ if [[ "$start_time" -gt 0 ]]; then
+ uptime_seconds=$(( $(date +%s) - start_time ))
+ fi
+fi
+echo "dovecot_uptime_seconds ${uptime_seconds}"
+echo ""
+
+# --- Memory usage ---
+echo "# HELP dovecot_memory_bytes Total memory usage of all Dovecot processes"
+echo "# TYPE dovecot_memory_bytes gauge"
+local total_mem=0
+total_mem=$(pgrep dovecot 2>/dev/null | xargs -I {} cat /proc/{}/status 2>/dev/null | awk '/VmRSS/{sum+=$2} END {print sum*1024+0}') || total_mem=0
+echo "dovecot_memory_bytes ${total_mem}"
+echo ""
+
+# --- Script execution time ---
+local END_TIME
+END_TIME=$(date +%s.%N)
+local DURATION
+DURATION=$(echo "$END_TIME - $START_TIME" | bc)
+
+echo "# HELP dovecot_exporter_duration_seconds Time to generate all metrics"
+echo "# TYPE dovecot_exporter_duration_seconds gauge"
+echo "dovecot_exporter_duration_seconds ${DURATION}"
+echo ""
+
+echo "# HELP dovecot_exporter_last_run_timestamp Unix timestamp of last successful run"
+echo "# TYPE dovecot_exporter_last_run_timestamp gauge"
+echo "dovecot_exporter_last_run_timestamp $(date +%s)"
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ if [ -n "$OUTPUT_FILE" ]; then
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.dovecot_metrics.XXXXXX")
+
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 10 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ chmod 644 "$temp_file"
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ generate_metrics
+ fi
+}
+
+main "$@"
diff --git a/duplicati-exporter.sh b/duplicati-exporter.sh
new file mode 100755
index 0000000..7027b3f
--- /dev/null
+++ b/duplicati-exporter.sh
@@ -0,0 +1,445 @@
+#!/bin/bash
+################################################################################
+# Script Name: duplicati-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for Duplicati backups — backup job status,
+# last run time, backup age, file counts, and size metrics
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - curl installed
+# - jq installed
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# ./duplicati-exporter.sh --textfile
+# ./duplicati-exporter.sh --http -p 9203
+# ./duplicati-exporter.sh --url http://myhost:8200 --password secret
+# DUPLICATI_PASSWORD=secret ./duplicati-exporter.sh --textfile
+#
+# Configuration:
+# Default HTTP port: 9203
+# Default Duplicati URL: http://localhost:8200
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+EXPORTER_VERSION="1.0"
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9203
+DUPLICATI_URL="http://localhost:8200"
+DUPLICATI_PASS="${DUPLICATI_PASSWORD:-}"
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+ # Strip trailing slash from URL
+ DUPLICATI_URL="${DUPLICATI_URL%/}"
+}
+
+check_dependencies() {
+ local missing=0
+ for cmd in curl jq; do
+ if ! command -v "$cmd" >/dev/null 2>&1; then
+ echo "ERROR: $cmd not found" >&2
+ missing=1
+ fi
+ done
+ return "$missing"
+}
+
+# Authenticate with Duplicati and store cookie jar
+duplicati_auth() {
+ COOKIE_JAR=$(mktemp /tmp/.duplicati_cookies.XXXXXX)
+ trap 'rm -f "$COOKIE_JAR"' EXIT
+
+ # If no password, try unauthenticated access
+ if [ -z "$DUPLICATI_PASS" ]; then
+ return 0
+ fi
+
+ # Get XSRF token first
+ local xsrf_token
+ xsrf_token=$(curl -s -c "$COOKIE_JAR" -b "$COOKIE_JAR" \
+ "${DUPLICATI_URL}/api/v1/auth/refresh" 2>/dev/null | jq -r '.Token // empty')
+
+ if [ -z "$xsrf_token" ]; then
+ # Try fetching the login page to get cookies
+ curl -s -c "$COOKIE_JAR" -b "$COOKIE_JAR" \
+ "${DUPLICATI_URL}/" >/dev/null 2>&1
+ xsrf_token=$(grep -i "xsrf" "$COOKIE_JAR" 2>/dev/null | awk '{print $NF}')
+ fi
+
+ # Authenticate with password
+ local auth_response
+ auth_response=$(curl -s -c "$COOKIE_JAR" -b "$COOKIE_JAR" \
+ -X POST "${DUPLICATI_URL}/api/v1/auth/login" \
+ -H "Content-Type: application/json" \
+ ${xsrf_token:+-H "X-XSRF-Token: $xsrf_token"} \
+ -d "{\"Password\":\"${DUPLICATI_PASS}\"}" 2>/dev/null)
+
+ if echo "$auth_response" | jq -e '.Token' >/dev/null 2>&1; then
+ AUTH_TOKEN=$(echo "$auth_response" | jq -r '.Token')
+ return 0
+ fi
+
+ return 1
+}
+
+# Make an authenticated API call
+api_call() {
+ local endpoint="$1"
+ curl -s -b "$COOKIE_JAR" \
+ ${AUTH_TOKEN:+-H "Authorization: Bearer $AUTH_TOKEN"} \
+ "${DUPLICATI_URL}/api/v1/${endpoint}" 2>/dev/null
+}
+
+# Map status string to numeric value
+status_to_number() {
+ case "$1" in
+ Success|Completed) echo 1 ;;
+ Warning) echo 2 ;;
+ Error|Failed) echo 3 ;;
+ Fatal) echo 4 ;;
+ *) echo 0 ;;
+ esac
+}
+
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s%N)
+
+ if ! check_dependencies; then
+ echo "# HELP duplicati_up Exporter status (1=up, 0=down)"
+ echo "# TYPE duplicati_up gauge"
+ echo "duplicati_up 0"
+ return
+ fi
+
+ # Test server reachability
+ local server_up=0
+ if curl -s --connect-timeout 5 "${DUPLICATI_URL}/api/v1/systeminfo" >/dev/null 2>&1; then
+ server_up=1
+ fi
+
+ # Authenticate
+ AUTH_TOKEN=""
+ if [ "$server_up" -eq 1 ]; then
+ duplicati_auth
+ fi
+
+ echo "# HELP duplicati_up Duplicati server reachable (1=up, 0=down)"
+ echo "# TYPE duplicati_up gauge"
+ echo "duplicati_up $server_up"
+
+ echo "# HELP duplicati_exporter_info Exporter version information"
+ echo "# TYPE duplicati_exporter_info gauge"
+ echo "duplicati_exporter_info{version=\"${EXPORTER_VERSION}\"} 1"
+
+ if [ "$server_up" -eq 0 ]; then
+ echo "# HELP duplicati_backup_count Total number of configured backup jobs"
+ echo "# TYPE duplicati_backup_count gauge"
+ echo "duplicati_backup_count 0"
+ local script_end
+ script_end=$(date +%s)
+ echo "# HELP duplicati_exporter_duration_seconds Script execution time"
+ echo "# TYPE duplicati_exporter_duration_seconds gauge"
+ echo "duplicati_exporter_duration_seconds 0"
+ echo "# HELP duplicati_exporter_last_run_timestamp Last successful run"
+ echo "# TYPE duplicati_exporter_last_run_timestamp gauge"
+ echo "duplicati_exporter_last_run_timestamp $script_end"
+ return
+ fi
+
+ # Fetch all backups
+ local backups_json
+ backups_json=$(api_call "backups")
+
+ if [ -z "$backups_json" ] || ! echo "$backups_json" | jq -e '.' >/dev/null 2>&1; then
+ echo "# HELP duplicati_backup_count Total number of configured backup jobs"
+ echo "# TYPE duplicati_backup_count gauge"
+ echo "duplicati_backup_count 0"
+ local script_end
+ script_end=$(date +%s)
+ echo "# HELP duplicati_exporter_duration_seconds Script execution time"
+ echo "# TYPE duplicati_exporter_duration_seconds gauge"
+ echo "duplicati_exporter_duration_seconds 0"
+ echo "# HELP duplicati_exporter_last_run_timestamp Last successful run"
+ echo "# TYPE duplicati_exporter_last_run_timestamp gauge"
+ echo "duplicati_exporter_last_run_timestamp $script_end"
+ return
+ fi
+
+ local backup_count
+ backup_count=$(echo "$backups_json" | jq 'length')
+ echo "# HELP duplicati_backup_count Total number of configured backup jobs"
+ echo "# TYPE duplicati_backup_count gauge"
+ echo "duplicati_backup_count ${backup_count:-0}"
+
+ local now
+ now=$(date +%s)
+
+ # Collect per-backup metrics into arrays so HELP/TYPE appears once per metric
+ local info_lines=()
+ local last_run_lines=()
+ local age_lines=()
+ local duration_lines=()
+ local status_lines=()
+ local files_lines=()
+ local size_lines=()
+ local uploaded_lines=()
+ local next_ts_lines=()
+ local next_sec_lines=()
+ local error_lines=()
+ local warning_lines=()
+
+ while IFS= read -r backup; do
+ local id name target_url
+ id=$(echo "$backup" | jq -r '.Backup.ID // empty')
+ name=$(echo "$backup" | jq -r '.Backup.Name // empty')
+ target_url=$(echo "$backup" | jq -r '.Backup.TargetURL // empty')
+
+ [ -z "$name" ] && continue
+
+ local safe_name="${name//\"/\\\"}"
+ local safe_target="${target_url//\"/\\\"}"
+
+ info_lines+=("duplicati_backup_info{id=\"${id}\",name=\"${safe_name}\",target_url=\"${safe_target}\"} 1")
+
+ # Last run timestamp
+ local last_run_ts=0
+ local last_run_raw
+ last_run_raw=$(echo "$backup" | jq -r '.Backup.Metadata."LastBackupDate" // empty')
+ if [ -n "$last_run_raw" ]; then
+ last_run_ts=$(date -d "$last_run_raw" +%s 2>/dev/null || echo 0)
+ fi
+ last_run_lines+=("duplicati_backup_last_run_timestamp{name=\"${safe_name}\"} $last_run_ts")
+
+ # Age since last run
+ local age=0
+ if [ "$last_run_ts" -gt 0 ]; then
+ age=$((now - last_run_ts))
+ fi
+ age_lines+=("duplicati_backup_last_run_age_seconds{name=\"${safe_name}\"} $age")
+
+ # Last duration
+ local duration
+ duration=$(echo "$backup" | jq -r '.Backup.Metadata."LastBackupDuration" // "0"')
+ local duration_seconds=0
+ if [[ "$duration" =~ ^([0-9]+):([0-9]+):([0-9]+) ]]; then
+ duration_seconds=$(( BASH_REMATCH[1] * 3600 + BASH_REMATCH[2] * 60 + BASH_REMATCH[3] ))
+ elif [[ "$duration" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+ duration_seconds="${duration%%.*}"
+ fi
+ duration_lines+=("duplicati_backup_last_duration_seconds{name=\"${safe_name}\"} $duration_seconds")
+
+ # Last status
+ local status_raw status_num
+ status_raw=$(echo "$backup" | jq -r '.Backup.Metadata."LastBackupResult" // "Unknown"')
+ status_num=$(status_to_number "$status_raw")
+ status_lines+=("duplicati_backup_last_status{name=\"${safe_name}\",status=\"${status_raw}\"} $status_num")
+
+ # Files examined
+ local files_total
+ files_total=$(echo "$backup" | jq -r '.Backup.Metadata."LastBackupExaminedFiles" // "0"')
+ files_lines+=("duplicati_backup_files_total{name=\"${safe_name}\"} ${files_total:-0}")
+
+ # Files size
+ local files_size
+ files_size=$(echo "$backup" | jq -r '.Backup.Metadata."LastBackupSizeOfExaminedFiles" // "0"')
+ size_lines+=("duplicati_backup_files_size_bytes{name=\"${safe_name}\"} ${files_size:-0}")
+
+ # Uploaded bytes
+ local uploaded
+ uploaded=$(echo "$backup" | jq -r '.Backup.Metadata."LastBackupUploadedSize" // "0"')
+ uploaded_lines+=("duplicati_backup_uploaded_bytes{name=\"${safe_name}\"} ${uploaded:-0}")
+
+ # Next run timestamp
+ local next_run_ts=0
+ local next_run_raw
+ next_run_raw=$(echo "$backup" | jq -r '.Schedule.Time // empty')
+ if [ -n "$next_run_raw" ]; then
+ next_run_ts=$(date -d "$next_run_raw" +%s 2>/dev/null || echo 0)
+ fi
+ next_ts_lines+=("duplicati_backup_next_run_timestamp{name=\"${safe_name}\"} $next_run_ts")
+
+ # Seconds until next run
+ local next_run_seconds=0
+ if [ "$next_run_ts" -gt "$now" ]; then
+ next_run_seconds=$((next_run_ts - now))
+ fi
+ next_sec_lines+=("duplicati_backup_next_run_seconds{name=\"${safe_name}\"} $next_run_seconds")
+
+ # Error count
+ local error_count
+ error_count=$(echo "$backup" | jq -r '.Backup.Metadata."LastBackupErrors" // "0"')
+ error_lines+=("duplicati_backup_error_count{name=\"${safe_name}\"} ${error_count:-0}")
+
+ # Warning count
+ local warning_count
+ warning_count=$(echo "$backup" | jq -r '.Backup.Metadata."LastBackupWarnings" // "0"')
+ warning_lines+=("duplicati_backup_warning_count{name=\"${safe_name}\"} ${warning_count:-0}")
+ done < <(echo "$backups_json" | jq -c '.[]' 2>/dev/null)
+
+ # Output each metric group with HELP/TYPE immediately before values
+ if [ ${#info_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_info Backup job information"
+ echo "# TYPE duplicati_backup_info gauge"
+ printf '%s\n' "${info_lines[@]}"
+ fi
+ if [ ${#last_run_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_last_run_timestamp Unix timestamp of last backup run"
+ echo "# TYPE duplicati_backup_last_run_timestamp gauge"
+ printf '%s\n' "${last_run_lines[@]}"
+ fi
+ if [ ${#age_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_last_run_age_seconds Seconds since last backup run"
+ echo "# TYPE duplicati_backup_last_run_age_seconds gauge"
+ printf '%s\n' "${age_lines[@]}"
+ fi
+ if [ ${#duration_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_last_duration_seconds Duration of last backup run in seconds"
+ echo "# TYPE duplicati_backup_last_duration_seconds gauge"
+ printf '%s\n' "${duration_lines[@]}"
+ fi
+ if [ ${#status_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_last_status Last backup status (Success=1, Warning=2, Error=3, Fatal=4, Unknown=0)"
+ echo "# TYPE duplicati_backup_last_status gauge"
+ printf '%s\n' "${status_lines[@]}"
+ fi
+ if [ ${#files_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_files_total Total files examined in last backup"
+ echo "# TYPE duplicati_backup_files_total gauge"
+ printf '%s\n' "${files_lines[@]}"
+ fi
+ if [ ${#size_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_files_size_bytes Total size of examined files in bytes"
+ echo "# TYPE duplicati_backup_files_size_bytes gauge"
+ printf '%s\n' "${size_lines[@]}"
+ fi
+ if [ ${#uploaded_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_uploaded_bytes Bytes uploaded in last backup"
+ echo "# TYPE duplicati_backup_uploaded_bytes gauge"
+ printf '%s\n' "${uploaded_lines[@]}"
+ fi
+ if [ ${#next_ts_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_next_run_timestamp Next scheduled run unix timestamp"
+ echo "# TYPE duplicati_backup_next_run_timestamp gauge"
+ printf '%s\n' "${next_ts_lines[@]}"
+ fi
+ if [ ${#next_sec_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_next_run_seconds Seconds until next scheduled run"
+ echo "# TYPE duplicati_backup_next_run_seconds gauge"
+ printf '%s\n' "${next_sec_lines[@]}"
+ fi
+ if [ ${#error_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_error_count Number of errors in last backup run"
+ echo "# TYPE duplicati_backup_error_count gauge"
+ printf '%s\n' "${error_lines[@]}"
+ fi
+ if [ ${#warning_lines[@]} -gt 0 ]; then
+ echo "# HELP duplicati_backup_warning_count Number of warnings in last backup run"
+ echo "# TYPE duplicati_backup_warning_count gauge"
+ printf '%s\n' "${warning_lines[@]}"
+ fi
+
+ local script_end script_duration_ns script_duration
+ script_end=$(date +%s)
+ script_duration_ns=$(( $(date +%s%N) - script_start ))
+ script_duration=$(( script_duration_ns / 1000000000 ))
+
+ echo "# HELP duplicati_exporter_duration_seconds Script execution time"
+ echo "# TYPE duplicati_exporter_duration_seconds gauge"
+ echo "duplicati_exporter_duration_seconds $script_duration"
+ echo "# HELP duplicati_exporter_last_run_timestamp Last successful run"
+ echo "# TYPE duplicati_exporter_last_run_timestamp gauge"
+ echo "duplicati_exporter_last_run_timestamp $script_end"
+}
+
+run_http_server() {
+ echo "Starting Duplicati exporter on port $HTTP_PORT..." >&2
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+ while true; do
+ {
+ read -r request
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ echo "Duplicati ExporterDuplicati Prometheus Exporter
Metrics
"
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+main() {
+ parse_args "$@"
+ if [ "$HTTP_MODE" = true ]; then
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.duplicati_metrics.XXXXXX")
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+ chmod 644 "$temp_file"
+ mv -f "$temp_file" "$OUTPUT_FILE"
+ echo "Metrics written to $OUTPUT_FILE" >&2
+ else
+ generate_metrics
+ fi
+}
+
+main "$@"
diff --git a/ebs-snapshot-manager.sh b/ebs-snapshot-manager.sh
new file mode 100644
index 0000000..ba62a55
--- /dev/null
+++ b/ebs-snapshot-manager.sh
@@ -0,0 +1,813 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### ebs-snapshot-manager.sh — Create, manage, audit, and prune AWS EBS snapshots ####
+#### Supports automated creation, cross-region copy, retention, and orphan detection ####
+#### Requires: bash 4+, aws-cli v2, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### export AWS_PROFILE="production" ####
+#### ./ebs-snapshot-manager.sh --snapshot ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+AWS_REGION="${AWS_REGION:-}"
+VOLUME_IDS="${VOLUME_IDS:-}"
+VOLUME_TAG_KEY="${VOLUME_TAG_KEY:-}"
+VOLUME_TAG_VALUE="${VOLUME_TAG_VALUE:-}"
+RETENTION_DAYS="${RETENTION_DAYS:-30}"
+COPY_TO_REGION="${COPY_TO_REGION:-}"
+SNAPSHOT_DESCRIPTION="${SNAPSHOT_DESCRIPTION:-Automated snapshot by ebs-snapshot-manager}"
+NO_WAIT="${NO_WAIT:-false}"
+DRY_RUN="${DRY_RUN:-true}"
+RESTORE_AZ="${RESTORE_AZ:-}"
+RESTORE_VOLUME_TYPE="${RESTORE_VOLUME_TYPE:-gp3}"
+RESTORE_IOPS="${RESTORE_IOPS:-}"
+RESTORE_THROUGHPUT="${RESTORE_THROUGHPUT:-}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+RUN_MODE=""
+TARGET_VOLUME=""
+TARGET_SNAPSHOT=""
+START_TIME=""
+WARNINGS=0
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; ((WARNINGS++)) || true; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; }
+
+# ── AWS CLI wrapper ───────────────────────────────────────────────────
+aws_cmd() {
+ local args=("$@")
+ [[ -n "$AWS_REGION" ]] && args+=(--region "$AWS_REGION")
+ verbose "aws ${args[*]}"
+ aws "${args[@]}"
+}
+
+# ── Dependency check ──────────────────────────────────────────────────
+check_deps() {
+ for cmd in aws jq; do
+ if ! command -v "$cmd" &>/dev/null; then
+ err "${cmd} is required but not installed"
+ exit 1
+ fi
+ done
+
+ # Verify AWS credentials
+ if ! aws sts get-caller-identity &>/dev/null; then
+ err "AWS credentials not configured or expired"
+ exit 1
+ fi
+
+ # Determine region
+ if [[ -z "$AWS_REGION" ]]; then
+ AWS_REGION=$(aws configure get region 2>/dev/null || echo "")
+ if [[ -z "$AWS_REGION" ]]; then
+ err "AWS_REGION is required (set via env var or aws configure)"
+ exit 1
+ fi
+ fi
+
+ verbose "Using region: ${AWS_REGION}"
+ verbose "Account: $(aws sts get-caller-identity --query 'Account' --output text 2>/dev/null)"
+}
+
+# ── Get volume list ───────────────────────────────────────────────────
+get_volumes() {
+ local filters=()
+
+ if [[ -n "$VOLUME_IDS" ]]; then
+ # Specific volumes requested
+ local vol_array
+ IFS=',' read -ra vol_array <<< "$VOLUME_IDS"
+ aws_cmd ec2 describe-volumes \
+ --volume-ids "${vol_array[@]}" \
+ --query 'Volumes[*].VolumeId' \
+ --output text | tr '\t' '\n'
+ return
+ fi
+
+ if [[ -n "$VOLUME_TAG_KEY" ]]; then
+ filters+=(--filters "Name=tag:${VOLUME_TAG_KEY},Values=${VOLUME_TAG_VALUE:-*}")
+ fi
+
+ aws_cmd ec2 describe-volumes \
+ "${filters[@]}" \
+ --query 'Volumes[*].VolumeId' \
+ --output text | tr '\t' '\n'
+}
+
+# ── Get account ID ───────────────────────────────────────────────────
+get_account_id() {
+ aws sts get-caller-identity --query 'Account' --output text
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SNAPSHOT MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_snapshot() {
+ log "Creating EBS snapshots..."
+ local volumes
+ volumes=$(get_volumes)
+
+ if [[ -z "$volumes" ]]; then
+ warn "No volumes found matching criteria"
+ return
+ fi
+
+ local vol_count
+ vol_count=$(echo "$volumes" | wc -l)
+ log "Found ${vol_count} volume(s) to snapshot"
+
+ local created=0
+ local failed=0
+ local snapshot_ids=()
+ local now
+ now=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+
+ while IFS= read -r vol_id; do
+ [[ -z "$vol_id" ]] && continue
+ verbose "Snapshotting ${vol_id}..."
+
+ local vol_name
+ # shellcheck disable=SC2016
+ vol_name=$(aws_cmd ec2 describe-volumes \
+ --volume-ids "$vol_id" \
+ --query 'Volumes[0].Tags[?Key==`Name`].Value | [0]' \
+ --output text 2>/dev/null) || vol_name="N/A"
+ [[ "$vol_name" == "None" ]] && vol_name="N/A"
+
+ local snap_id
+ snap_id=$(aws_cmd ec2 create-snapshot \
+ --volume-id "$vol_id" \
+ --description "$SNAPSHOT_DESCRIPTION" \
+ --tag-specifications "ResourceType=snapshot,Tags=[
+ {Key=Name,Value=snap-${vol_id}-$(date +%Y%m%d)},
+ {Key=CreatedBy,Value=ebs-snapshot-manager},
+ {Key=CreatedAt,Value=${now}},
+ {Key=VolumeId,Value=${vol_id}},
+ {Key=VolumeName,Value=${vol_name}}
+ ]" \
+ --query 'SnapshotId' \
+ --output text 2>/dev/null) || snap_id=""
+
+ if [[ -n "$snap_id" ]]; then
+ echo -e " ${GREEN}✓${RESET} ${vol_id} → ${snap_id} (${vol_name})"
+ snapshot_ids+=("$snap_id")
+ ((created++)) || true
+ else
+ echo -e " ${RED}✗${RESET} ${vol_id} — snapshot creation failed"
+ ((failed++)) || true
+ fi
+ done <<< "$volumes"
+
+ # Wait for completion
+ if [[ "$NO_WAIT" != "true" && ${#snapshot_ids[@]} -gt 0 ]]; then
+ log "Waiting for ${#snapshot_ids[@]} snapshot(s) to complete..."
+ for snap_id in "${snapshot_ids[@]}"; do
+ if aws_cmd ec2 wait snapshot-completed --snapshot-ids "$snap_id" 2>/dev/null; then
+ local size
+ size=$(aws_cmd ec2 describe-snapshots \
+ --snapshot-ids "$snap_id" \
+ --query 'Snapshots[0].VolumeSize' \
+ --output text 2>/dev/null) || size="?"
+ verbose "${snap_id} completed (${size} GiB)"
+ else
+ warn "${snap_id} did not complete within timeout"
+ fi
+ done
+ fi
+
+ echo ""
+ log "Snapshots created: ${created}, failed: ${failed}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# PRUNE MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_prune() {
+ local cutoff_epoch
+ cutoff_epoch=$(date -d "-${RETENTION_DAYS} days" +%s 2>/dev/null) || \
+ cutoff_epoch=$(date -v-"${RETENTION_DAYS}"d +%s 2>/dev/null) || {
+ err "Could not calculate retention cutoff date"
+ exit 1
+ }
+
+ local cutoff_date
+ cutoff_date=$(date -d "@${cutoff_epoch}" +%Y-%m-%dT%H:%M:%S 2>/dev/null) || \
+ cutoff_date=$(date -r "${cutoff_epoch}" +%Y-%m-%dT%H:%M:%S 2>/dev/null)
+
+ log "Pruning snapshots older than ${RETENTION_DAYS} days (before ${cutoff_date})"
+ if [[ "$DRY_RUN" == "true" ]]; then
+ log "${YELLOW}DRY RUN${RESET} — no snapshots will be deleted. Use --force to delete."
+ fi
+
+ local owner_id
+ owner_id=$(get_account_id)
+
+ local snapshots_json
+ snapshots_json=$(aws_cmd ec2 describe-snapshots \
+ --owner-ids "$owner_id" \
+ --filters "Name=tag:CreatedBy,Values=ebs-snapshot-manager" \
+ --query 'Snapshots[*].{Id:SnapshotId,Start:StartTime,Size:VolumeSize,Vol:VolumeId}' \
+ --output json)
+
+ local total
+ total=$(echo "$snapshots_json" | jq 'length')
+ echo "$snapshots_json" | jq -c '.[]' | while IFS= read -r snap; do
+ local snap_id start_time size vol_id
+ snap_id=$(echo "$snap" | jq -r '.Id')
+ start_time=$(echo "$snap" | jq -r '.Start')
+ size=$(echo "$snap" | jq -r '.Size')
+ vol_id=$(echo "$snap" | jq -r '.Vol')
+
+ local snap_epoch
+ snap_epoch=$(date -d "$start_time" +%s 2>/dev/null) || \
+ snap_epoch=$(date -jf "%Y-%m-%dT%H:%M:%S" "${start_time%%.*}" +%s 2>/dev/null) || snap_epoch=0
+
+ if [[ $snap_epoch -lt $cutoff_epoch ]]; then
+ local age_days=$(( ($(date +%s) - snap_epoch) / 86400 ))
+
+ if [[ "$DRY_RUN" == "true" ]]; then
+ echo -e " ${YELLOW}⊘${RESET} ${snap_id} — ${age_days}d old, ${size} GiB, vol: ${vol_id} (would delete)"
+ else
+ if aws_cmd ec2 delete-snapshot --snapshot-id "$snap_id" 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} ${snap_id} — deleted (${age_days}d old, ${size} GiB)"
+ else
+ echo -e " ${RED}✗${RESET} ${snap_id} — delete failed"
+ fi
+ fi
+ fi
+ done
+
+ log "Total managed snapshots: ${total}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# COPY-REGION MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_copy_region() {
+ if [[ -z "$COPY_TO_REGION" ]]; then
+ err "Target region required. Use --copy-region REGION or set COPY_TO_REGION"
+ exit 1
+ fi
+
+ log "Copying latest snapshots to ${COPY_TO_REGION}..."
+
+ local owner_id
+ owner_id=$(get_account_id)
+
+ # Get volumes to copy snapshots for
+ local volumes
+ if [[ -n "$TARGET_VOLUME" ]]; then
+ volumes="$TARGET_VOLUME"
+ else
+ volumes=$(get_volumes)
+ fi
+
+ if [[ -z "$volumes" ]]; then
+ warn "No volumes found"
+ return
+ fi
+
+ local copied=0
+ local failed=0
+
+ while IFS= read -r vol_id; do
+ [[ -z "$vol_id" ]] && continue
+
+ # Find latest snapshot for this volume
+ local latest_snap
+ latest_snap=$(aws_cmd ec2 describe-snapshots \
+ --owner-ids "$owner_id" \
+ --filters "Name=volume-id,Values=${vol_id}" "Name=status,Values=completed" \
+ --query 'sort_by(Snapshots, &StartTime)[-1].SnapshotId' \
+ --output text 2>/dev/null) || latest_snap=""
+
+ if [[ -z "$latest_snap" || "$latest_snap" == "None" ]]; then
+ echo -e " ${YELLOW}⊘${RESET} ${vol_id} — no completed snapshots found"
+ continue
+ fi
+
+ # Copy to target region
+ local copy_id
+ copy_id=$(aws ec2 copy-snapshot \
+ --region "$COPY_TO_REGION" \
+ --source-region "$AWS_REGION" \
+ --source-snapshot-id "$latest_snap" \
+ --description "DR copy of ${latest_snap} from ${AWS_REGION}" \
+ --tag-specifications "ResourceType=snapshot,Tags=[
+ {Key=Name,Value=dr-copy-${latest_snap}},
+ {Key=CreatedBy,Value=ebs-snapshot-manager},
+ {Key=SourceRegion,Value=${AWS_REGION}},
+ {Key=SourceSnapshotId,Value=${latest_snap}},
+ {Key=VolumeId,Value=${vol_id}}
+ ]" \
+ --query 'SnapshotId' \
+ --output text 2>/dev/null) || copy_id=""
+
+ if [[ -n "$copy_id" ]]; then
+ echo -e " ${GREEN}✓${RESET} ${latest_snap} → ${copy_id} (${AWS_REGION} → ${COPY_TO_REGION})"
+ ((copied++)) || true
+ else
+ echo -e " ${RED}✗${RESET} ${latest_snap} — copy failed"
+ ((failed++)) || true
+ fi
+ done <<< "$volumes"
+
+ echo ""
+ log "Copied: ${copied}, failed: ${failed}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# AUDIT MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_audit() {
+ log "Auditing EBS snapshots in ${AWS_REGION}..."
+
+ local owner_id
+ owner_id=$(get_account_id)
+
+ local snapshots_json
+ snapshots_json=$(aws_cmd ec2 describe-snapshots \
+ --owner-ids "$owner_id" \
+ --query 'Snapshots[*].{Id:SnapshotId,Vol:VolumeId,Size:VolumeSize,Status:State,Start:StartTime,Desc:Description,Tags:Tags}' \
+ --output json)
+
+ local total
+ total=$(echo "$snapshots_json" | jq 'length')
+
+ if [[ "$total" -eq 0 ]]; then
+ log "No snapshots found"
+ return
+ fi
+
+ # Get existing volumes for orphan detection
+ local existing_volumes
+ existing_volumes=$(aws_cmd ec2 describe-volumes \
+ --query 'Volumes[*].VolumeId' \
+ --output text | tr '\t' '\n' | sort)
+
+ local orphan_count=0
+ local untagged_count=0
+ local managed_count=0
+
+ echo ""
+ echo -e "${BOLD}Snapshot Inventory${RESET}"
+ printf " %-24s %-14s %8s %6s %s\n" "SNAPSHOT" "VOLUME" "SIZE" "AGE" "STATUS"
+ echo " $(printf '%.0s─' {1..70})"
+
+ echo "$snapshots_json" | jq -c '.[]' | while IFS= read -r snap; do
+ local snap_id vol_id size status start_time
+ snap_id=$(echo "$snap" | jq -r '.Id')
+ vol_id=$(echo "$snap" | jq -r '.Vol')
+ size=$(echo "$snap" | jq -r '.Size')
+ status=$(echo "$snap" | jq -r '.Status')
+ start_time=$(echo "$snap" | jq -r '.Start')
+
+ local snap_epoch
+ snap_epoch=$(date -d "$start_time" +%s 2>/dev/null) || snap_epoch=0
+ local age_days=$(( ($(date +%s) - snap_epoch) / 86400 ))
+
+ # Check if managed
+ local is_managed
+ is_managed=$(echo "$snap" | jq -r '.Tags // [] | map(select(.Key == "CreatedBy" and .Value == "ebs-snapshot-manager")) | length')
+
+ # Check if orphaned
+ local is_orphan="no"
+ if ! echo "$existing_volumes" | grep -q "^${vol_id}$" 2>/dev/null; then
+ is_orphan="yes"
+ fi
+
+ # Check if tagged
+ local tag_count
+ tag_count=$(echo "$snap" | jq '.Tags // [] | length')
+
+ local status_marker=""
+ if [[ "$is_orphan" == "yes" ]]; then
+ status_marker="${RED}orphan${RESET}"
+ elif [[ "$tag_count" -eq 0 ]]; then
+ status_marker="${YELLOW}untagged${RESET}"
+ elif [[ "$is_managed" -gt 0 ]]; then
+ status_marker="${GREEN}managed${RESET}"
+ else
+ status_marker="${status}"
+ fi
+
+ printf " %-24s %-14s %6s G %4sd %b\n" \
+ "$snap_id" "$vol_id" "$size" "$age_days" "$status_marker"
+ done
+
+ # Summary stats
+ local total_size
+ total_size=$(echo "$snapshots_json" | jq '[.[].Size] | add // 0')
+ orphan_count=$(echo "$snapshots_json" | jq --arg vols "$existing_volumes" '
+ [.[] | select(.Vol as $v | ($vols | split("\n") | map(select(. != "")) | index($v) == null))] | length
+ ')
+ untagged_count=$(echo "$snapshots_json" | jq '[.[] | select((.Tags // []) | length == 0)] | length')
+ managed_count=$(echo "$snapshots_json" | jq '[.[] | select((.Tags // []) | map(select(.Key == "CreatedBy" and .Value == "ebs-snapshot-manager")) | length > 0)] | length')
+
+ local monthly_cost
+ monthly_cost=$(echo "$total_size * 0.05" | bc 2>/dev/null || echo "?")
+
+ echo ""
+ echo -e "${BOLD}Summary${RESET}"
+ echo -e " Total snapshots: ${total}"
+ echo -e " Managed snapshots: ${managed_count}"
+ echo -e " Total storage: ${total_size} GiB"
+ echo -e " Est. monthly cost: \$${monthly_cost}"
+ echo -e " Orphaned: ${orphan_count}"
+ echo -e " Untagged: ${untagged_count}"
+
+ if [[ "$orphan_count" -gt 0 ]]; then
+ echo ""
+ warn "${orphan_count} orphaned snapshot(s) found — source volume no longer exists"
+ fi
+
+ # Check volumes without recent snapshots
+ echo ""
+ echo -e "${BOLD}Volumes Without Recent Snapshots (>${RETENTION_DAYS}d)${RESET}"
+
+ local volumes_json
+ volumes_json=$(aws_cmd ec2 describe-volumes \
+ --query 'Volumes[*].{Id:VolumeId,Size:Size,State:State}' \
+ --output json)
+
+ echo "$volumes_json" | jq -c '.[]' | while IFS= read -r vol; do
+ local v_id v_size
+ v_id=$(echo "$vol" | jq -r '.Id')
+ v_size=$(echo "$vol" | jq -r '.Size')
+
+ local latest_snap_time
+ latest_snap_time=$(aws_cmd ec2 describe-snapshots \
+ --owner-ids "$owner_id" \
+ --filters "Name=volume-id,Values=${v_id}" "Name=status,Values=completed" \
+ --query 'sort_by(Snapshots, &StartTime)[-1].StartTime' \
+ --output text 2>/dev/null) || latest_snap_time="None"
+
+ if [[ "$latest_snap_time" == "None" || -z "$latest_snap_time" ]]; then
+ echo -e " ${RED}✗${RESET} ${v_id} (${v_size} GiB) — ${RED}no snapshots${RESET}"
+ else
+ local snap_epoch
+ snap_epoch=$(date -d "$latest_snap_time" +%s 2>/dev/null) || snap_epoch=0
+ local cutoff_epoch
+ cutoff_epoch=$(date -d "-${RETENTION_DAYS} days" +%s 2>/dev/null) || cutoff_epoch=0
+
+ if [[ $snap_epoch -lt $cutoff_epoch ]]; then
+ local age=$(( ($(date +%s) - snap_epoch) / 86400 ))
+ echo -e " ${YELLOW}!${RESET} ${v_id} (${v_size} GiB) — last snapshot ${age}d ago"
+ fi
+ fi
+ done
+
+ # Prometheus output
+ if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then
+ echo ""
+ echo "# HELP ebs_snapshots_total Total EBS snapshots"
+ echo "# TYPE ebs_snapshots_total gauge"
+ echo "ebs_snapshots_total{region=\"${AWS_REGION}\"} ${total}"
+ echo "# HELP ebs_snapshots_managed_total Managed EBS snapshots"
+ echo "# TYPE ebs_snapshots_managed_total gauge"
+ echo "ebs_snapshots_managed_total{region=\"${AWS_REGION}\"} ${managed_count}"
+ echo "# HELP ebs_snapshots_orphaned_total Orphaned EBS snapshots"
+ echo "# TYPE ebs_snapshots_orphaned_total gauge"
+ echo "ebs_snapshots_orphaned_total{region=\"${AWS_REGION}\"} ${orphan_count}"
+ echo "# HELP ebs_snapshots_untagged_total Untagged EBS snapshots"
+ echo "# TYPE ebs_snapshots_untagged_total gauge"
+ echo "ebs_snapshots_untagged_total{region=\"${AWS_REGION}\"} ${untagged_count}"
+ echo "# HELP ebs_snapshots_size_gib_total Total snapshot storage in GiB"
+ echo "# TYPE ebs_snapshots_size_gib_total gauge"
+ echo "ebs_snapshots_size_gib_total{region=\"${AWS_REGION}\"} ${total_size}"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# RESTORE MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_restore() {
+ if [[ -z "$TARGET_SNAPSHOT" ]]; then
+ err "Snapshot ID required. Use --restore SNAP_ID"
+ exit 1
+ fi
+
+ log "Restoring volume from snapshot ${TARGET_SNAPSHOT}..."
+
+ # Verify snapshot exists and is completed
+ local snap_info
+ snap_info=$(aws_cmd ec2 describe-snapshots \
+ --snapshot-ids "$TARGET_SNAPSHOT" \
+ --query 'Snapshots[0].{State:State,Size:VolumeSize,Vol:VolumeId}' \
+ --output json 2>/dev/null) || {
+ err "Snapshot ${TARGET_SNAPSHOT} not found"
+ exit 1
+ }
+
+ local snap_state snap_size source_vol
+ snap_state=$(echo "$snap_info" | jq -r '.State')
+ snap_size=$(echo "$snap_info" | jq -r '.Size')
+ source_vol=$(echo "$snap_info" | jq -r '.Vol')
+
+ if [[ "$snap_state" != "completed" ]]; then
+ err "Snapshot state is '${snap_state}' — must be 'completed'"
+ exit 1
+ fi
+
+ # Determine AZ
+ if [[ -z "$RESTORE_AZ" ]]; then
+ RESTORE_AZ=$(aws_cmd ec2 describe-availability-zones \
+ --query 'AvailabilityZones[0].ZoneName' \
+ --output text)
+ log "No AZ specified, using ${RESTORE_AZ}"
+ fi
+
+ # Build create-volume args
+ local create_args=(
+ ec2 create-volume
+ --snapshot-id "$TARGET_SNAPSHOT"
+ --availability-zone "$RESTORE_AZ"
+ --volume-type "$RESTORE_VOLUME_TYPE"
+ --tag-specifications "ResourceType=volume,Tags=[
+ {Key=Name,Value=restored-from-${TARGET_SNAPSHOT}},
+ {Key=CreatedBy,Value=ebs-snapshot-manager},
+ {Key=RestoredFrom,Value=${TARGET_SNAPSHOT}},
+ {Key=SourceVolumeId,Value=${source_vol}}
+ ]"
+ )
+
+ [[ -n "$RESTORE_IOPS" ]] && create_args+=(--iops "$RESTORE_IOPS")
+ [[ -n "$RESTORE_THROUGHPUT" ]] && create_args+=(--throughput "$RESTORE_THROUGHPUT")
+
+ local vol_id
+ vol_id=$(aws_cmd "${create_args[@]}" \
+ --query 'VolumeId' \
+ --output text 2>/dev/null) || {
+ err "Failed to create volume from snapshot"
+ exit 1
+ }
+
+ echo -e " ${GREEN}✓${RESET} Created volume ${vol_id}"
+ echo -e " Source snapshot: ${TARGET_SNAPSHOT}"
+ echo -e " Size: ${snap_size} GiB"
+ echo -e " Type: ${RESTORE_VOLUME_TYPE}"
+ echo -e " AZ: ${RESTORE_AZ}"
+
+ # Wait for volume to become available
+ log "Waiting for volume to become available..."
+ if aws_cmd ec2 wait volume-available --volume-ids "$vol_id" 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} Volume ${vol_id} is available"
+ else
+ warn "Volume did not become available within timeout"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIST MODE
+# ══════════════════════════════════════════════════════════════════════
+
+do_list() {
+ local owner_id
+ owner_id=$(get_account_id)
+
+ local filters=("Name=owner-id,Values=${owner_id}")
+
+ if [[ -n "$TARGET_VOLUME" ]]; then
+ filters+=("Name=volume-id,Values=${TARGET_VOLUME}")
+ fi
+
+ local snapshots_json
+ snapshots_json=$(aws_cmd ec2 describe-snapshots \
+ --owner-ids "$owner_id" \
+ ${TARGET_VOLUME:+--filters "Name=volume-id,Values=${TARGET_VOLUME}"} \
+ --query 'sort_by(Snapshots, &StartTime) | reverse(@) | [*].{Id:SnapshotId,Vol:VolumeId,Size:VolumeSize,Status:State,Start:StartTime,Desc:Description}' \
+ --output json)
+
+ local total
+ total=$(echo "$snapshots_json" | jq 'length')
+
+ if [[ "$total" -eq 0 ]]; then
+ log "No snapshots found"
+ return
+ fi
+
+ if [[ "$OUTPUT_FORMAT" == "json" ]]; then
+ echo "$snapshots_json" | jq '.'
+ return
+ fi
+
+ echo ""
+ printf " %-24s %-14s %8s %-12s %-22s %s\n" "SNAPSHOT" "VOLUME" "SIZE" "STATUS" "CREATED" "DESCRIPTION"
+ echo " $(printf '%.0s─' {1..100})"
+
+ echo "$snapshots_json" | jq -c '.[]' | while IFS= read -r snap; do
+ local snap_id vol_id size status start_time desc
+ snap_id=$(echo "$snap" | jq -r '.Id')
+ vol_id=$(echo "$snap" | jq -r '.Vol')
+ size=$(echo "$snap" | jq -r '.Size')
+ status=$(echo "$snap" | jq -r '.Status')
+ start_time=$(echo "$snap" | jq -r '.Start' | cut -c1-19)
+ desc=$(echo "$snap" | jq -r '.Desc' | cut -c1-40)
+
+ printf " %-24s %-14s %6s G %-12s %-22s %s\n" \
+ "$snap_id" "$vol_id" "$size" "$status" "$start_time" "$desc"
+ done
+
+ echo ""
+ log "Total: ${total} snapshot(s)"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+show_help() {
+ cat </dev/null || echo 'default')}"
+ echo -e "Mode: ${RUN_MODE}"
+ echo -e "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ echo ""
+
+ check_deps
+
+ case "$RUN_MODE" in
+ snapshot) do_snapshot ;;
+ prune) do_prune ;;
+ copy-region) do_copy_region ;;
+ audit) do_audit ;;
+ restore) do_restore ;;
+ list) do_list ;;
+ esac
+
+ local end_time
+ end_time=$(date +%s)
+ local duration=$(( end_time - START_TIME ))
+ echo ""
+ log "Completed in ${duration}s"
+
+ if [[ $WARNINGS -gt 0 ]]; then
+ exit 2
+ fi
+}
+
+main "$@"
diff --git a/ec2-inventory-reporter.sh b/ec2-inventory-reporter.sh
new file mode 100644
index 0000000..3029e75
--- /dev/null
+++ b/ec2-inventory-reporter.sh
@@ -0,0 +1,704 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### ec2-inventory-reporter.sh — AWS EC2 instance inventory and compliance report ####
+#### Instance metadata, uptime, cost estimates, tag compliance, SG audit ####
+#### Requires: bash 4+, aws-cli v2, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./ec2-inventory-reporter.sh ####
+#### ./ec2-inventory-reporter.sh --all-regions --format csv ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+AWS_REGION="${AWS_REGION:-us-east-1}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+REQUIRED_TAGS="${REQUIRED_TAGS:-Name,Environment,Owner}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+SCAN_REGION="$AWS_REGION"
+ALL_REGIONS="false"
+FILTER_STATE=""
+FILTER_TAG_KEY=""
+FILTER_TAG_VALUE=""
+FILTER_TYPE=""
+TAG_CHECK="false"
+SG_AUDIT="false"
+START_TIME=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ return
+ fi
+ if [[ "$COLOR" == "auto" && ! -t 1 ]]; then
+ return
+ fi
+ RED="\033[0;31m"
+ GREEN="\033[0;32m"
+ YELLOW="\033[0;33m"
+ BLUE="\033[0;34m"
+ BOLD="\033[1m"
+ DIM="\033[2m"
+ RESET="\033[0m"
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log_info() { printf "${GREEN}[INFO]${RESET} %s\n" "$*"; }
+log_warn() { printf "${YELLOW}[WARN]${RESET} %s\n" "$*" >&2; }
+log_error() { printf "${RED}[ERROR]${RESET} %s\n" "$*" >&2; }
+log_debug() { [[ "$VERBOSE" == "true" ]] && printf "${DIM}[DEBUG] %s${RESET}\n" "$*"; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+die() { log_error "$@"; exit 1; }
+
+check_deps() {
+ local missing=()
+ command -v aws >/dev/null 2>&1 || missing+=("aws-cli")
+ command -v jq >/dev/null 2>&1 || missing+=("jq")
+ if (( ${#missing[@]} > 0 )); then
+ die "Missing required tools: ${missing[*]}"
+ fi
+
+ local bash_major="${BASH_VERSINFO[0]}"
+ if (( bash_major < 4 )); then
+ die "Requires bash 4+, found ${BASH_VERSION}"
+ fi
+}
+
+# ── Pricing table (us-east-1 on-demand Linux, $/hr) ──────────────────
+declare -A PRICING=(
+ # General purpose
+ ["t3.nano"]=0.0052 ["t3.micro"]=0.0104 ["t3.small"]=0.0208
+ ["t3.medium"]=0.0416 ["t3.large"]=0.0832 ["t3.xlarge"]=0.1664
+ ["t3.2xlarge"]=0.3328
+ ["t3a.nano"]=0.0047 ["t3a.micro"]=0.0094 ["t3a.small"]=0.0188
+ ["t3a.medium"]=0.0376 ["t3a.large"]=0.0752 ["t3a.xlarge"]=0.1504
+ ["t3a.2xlarge"]=0.3008
+ ["m5.large"]=0.096 ["m5.xlarge"]=0.192 ["m5.2xlarge"]=0.384
+ ["m5.4xlarge"]=0.768 ["m5.8xlarge"]=1.536
+ ["m6i.large"]=0.096 ["m6i.xlarge"]=0.192 ["m6i.2xlarge"]=0.384
+ ["m6i.4xlarge"]=0.768
+ ["m7i.large"]=0.1008 ["m7i.xlarge"]=0.2016 ["m7i.2xlarge"]=0.4032
+ # Compute optimized
+ ["c5.large"]=0.085 ["c5.xlarge"]=0.17 ["c5.2xlarge"]=0.34
+ ["c5.4xlarge"]=0.68 ["c5.9xlarge"]=1.53
+ ["c6i.large"]=0.085 ["c6i.xlarge"]=0.17 ["c6i.2xlarge"]=0.34
+ # Memory optimized
+ ["r5.large"]=0.126 ["r5.xlarge"]=0.252 ["r5.2xlarge"]=0.504
+ ["r5.4xlarge"]=1.008
+ ["r6i.large"]=0.126 ["r6i.xlarge"]=0.252 ["r6i.2xlarge"]=0.504
+ # Storage optimized
+ ["i3.large"]=0.156 ["i3.xlarge"]=0.312 ["i3.2xlarge"]=0.624
+ # Accelerated
+ ["g4dn.xlarge"]=0.526 ["g4dn.2xlarge"]=0.752
+ # Burstable previous gen
+ ["t2.nano"]=0.0058 ["t2.micro"]=0.0116 ["t2.small"]=0.023
+ ["t2.medium"]=0.0464 ["t2.large"]=0.0928
+)
+
+# ── Cost estimation ──────────────────────────────────────────────────
+estimate_cost() {
+ local instance_type="$1" state="$2"
+ if [[ "$state" != "running" ]]; then
+ echo "0.00"
+ return
+ fi
+ local hourly="${PRICING[$instance_type]:-}"
+ if [[ -z "$hourly" ]]; then
+ echo "N/A"
+ return
+ fi
+ printf "%.2f" "$(echo "$hourly * 730" | bc -l)"
+}
+
+# ── Uptime calculation ───────────────────────────────────────────────
+format_uptime() {
+ local launch_time="$1" state="$2"
+ if [[ "$state" != "running" || -z "$launch_time" || "$launch_time" == "null" ]]; then
+ echo "—"
+ return
+ fi
+
+ local launch_epoch now_epoch diff_sec
+ if date --version >/dev/null 2>&1; then
+ launch_epoch=$(date -d "$launch_time" +%s 2>/dev/null) || { echo "—"; return; }
+ else
+ launch_epoch=$(date -j -f "%Y-%m-%dT%H:%M:%S" "${launch_time%%.*}" +%s 2>/dev/null) || { echo "—"; return; }
+ fi
+ now_epoch=$(date -u +%s)
+ diff_sec=$(( now_epoch - launch_epoch ))
+
+ if (( diff_sec < 0 )); then
+ echo "—"
+ return
+ fi
+
+ local days=$(( diff_sec / 86400 ))
+ local hours=$(( (diff_sec % 86400) / 3600 ))
+ local mins=$(( (diff_sec % 3600) / 60 ))
+ printf "%dd %dh %dm" "$days" "$hours" "$mins"
+}
+
+# ── Tag compliance check ─────────────────────────────────────────────
+check_tag_compliance() {
+ local tags_json="$1"
+ local missing=()
+
+ IFS=',' read -ra required <<< "$REQUIRED_TAGS"
+ for tag in "${required[@]}"; do
+ tag=$(echo "$tag" | xargs)
+ local found
+ found=$(echo "$tags_json" | jq -r --arg key "$tag" '.[] | select(.Key == $key) | .Key' 2>/dev/null)
+ if [[ -z "$found" ]]; then
+ missing+=("$tag")
+ fi
+ done
+
+ if (( ${#missing[@]} == 0 )); then
+ echo "PASS"
+ else
+ echo "MISSING: ${missing[*]}"
+ fi
+}
+
+# ── Security group audit ─────────────────────────────────────────────
+audit_security_groups() {
+ local region="$1"
+ shift
+ local sg_ids=("$@")
+ local findings=()
+
+ if (( ${#sg_ids[@]} == 0 )); then
+ echo "—"
+ return
+ fi
+
+ local sg_data
+ sg_data=$(aws ec2 describe-security-groups \
+ --region "$region" \
+ --group-ids "${sg_ids[@]}" \
+ --output json 2>/dev/null) || { echo "ERROR"; return; }
+
+ local open_rules
+ open_rules=$(echo "$sg_data" | jq -r '
+ .SecurityGroups[].IpPermissions[] |
+ select(
+ (.IpRanges[]?.CidrIp == "0.0.0.0/0") or
+ (.Ipv6Ranges[]?.CidrIpv6 == "::/0")
+ ) |
+ select(
+ (.FromPort != 80 or .ToPort != 80) and
+ (.FromPort != 443 or .ToPort != 443)
+ ) |
+ if .FromPort == .ToPort then
+ "port \(.FromPort // "all")"
+ elif .FromPort == -1 then
+ "all ports"
+ else
+ "ports \(.FromPort)-\(.ToPort)"
+ end
+ ' 2>/dev/null)
+
+ if [[ -z "$open_rules" ]]; then
+ echo "OK"
+ else
+ local unique
+ unique=$(echo "$open_rules" | sort -u | paste -sd ", " -)
+ echo "OPEN: $unique"
+ fi
+}
+
+# ── Query EC2 instances ──────────────────────────────────────────────
+get_instances() {
+ local region="$1"
+ local filters=()
+
+ if [[ -n "$FILTER_STATE" ]]; then
+ filters+=("Name=instance-state-name,Values=$FILTER_STATE")
+ fi
+ if [[ -n "$FILTER_TAG_KEY" && -n "$FILTER_TAG_VALUE" ]]; then
+ filters+=("Name=tag:$FILTER_TAG_KEY,Values=$FILTER_TAG_VALUE")
+ fi
+ if [[ -n "$FILTER_TYPE" ]]; then
+ filters+=("Name=instance-type,Values=$FILTER_TYPE")
+ fi
+
+ local cmd=(
+ aws ec2 describe-instances
+ --region "$region"
+ --output json
+ )
+
+ if (( ${#filters[@]} > 0 )); then
+ cmd+=(--filters "${filters[@]}")
+ fi
+
+ log_debug "Running: ${cmd[*]}"
+
+ local result=""
+ local next_token=""
+
+ while true; do
+ local page_cmd=("${cmd[@]}")
+ if [[ -n "$next_token" ]]; then
+ page_cmd+=(--starting-token "$next_token")
+ fi
+
+ local page
+ page=$("${page_cmd[@]}" 2>/dev/null) || { log_warn "Failed to query EC2 in $region"; echo "[]"; return; }
+
+ local page_instances
+ page_instances=$(echo "$page" | jq '[.Reservations[].Instances[]]')
+
+ if [[ -z "$result" ]]; then
+ result="$page_instances"
+ else
+ result=$(echo "$result $page_instances" | jq -s 'add')
+ fi
+
+ next_token=$(echo "$page" | jq -r '.NextToken // empty')
+ if [[ -z "$next_token" ]]; then
+ break
+ fi
+ done
+
+ echo "$result"
+}
+
+# ── Get all enabled regions ──────────────────────────────────────────
+get_all_regions() {
+ aws ec2 describe-regions \
+ --region "$AWS_REGION" \
+ --query 'Regions[].RegionName' \
+ --output text 2>/dev/null | tr '\t' '\n' | sort
+}
+
+# ── Text table output ────────────────────────────────────────────────
+output_text() {
+ local region="$1" instances_json="$2"
+ local count
+ count=$(echo "$instances_json" | jq 'length')
+
+ local account_id
+ account_id=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "unknown")
+
+ echo "EC2 Inventory Reporter"
+ echo "Account: $account_id"
+ echo "Region: $region"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ echo "Instances: $count"
+ echo ""
+
+ if (( count == 0 )); then
+ echo " No instances found."
+ echo ""
+ return
+ fi
+
+ local divider="─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────"
+
+ printf " %-21s %-14s %-10s %-15s %-16s %-17s %-16s %s\n" \
+ "INSTANCE ID" "TYPE" "STATE" "AZ" "PRIVATE IP" "PUBLIC IP" "UPTIME" "EST \$/MO"
+ printf " %s\n" "$divider"
+
+ local total_cost=0 running=0 stopped=0 other=0
+ local compliance_issues=0 sg_issues=0
+
+ while IFS=$'\t' read -r iid itype istate iaz pip eip launch_time tags_json sg_json; do
+ local uptime
+ uptime=$(format_uptime "$launch_time" "$istate")
+
+ local cost
+ cost=$(estimate_cost "$itype" "$istate")
+
+ [[ -z "$eip" || "$eip" == "null" ]] && eip="—"
+
+ printf " %-21s %-14s %-10s %-15s %-16s %-17s %-16s %s\n" \
+ "$iid" "$itype" "$istate" "$iaz" "$pip" "$eip" "$uptime" "\$$cost"
+
+ if [[ "$cost" != "N/A" ]]; then
+ total_cost=$(echo "$total_cost + $cost" | bc -l)
+ fi
+
+ case "$istate" in
+ running) (( running++ )) ;;
+ stopped) (( stopped++ )) ;;
+ *) (( other++ )) ;;
+ esac
+
+ if [[ "$TAG_CHECK" == "true" ]]; then
+ local compliance
+ compliance=$(check_tag_compliance "$tags_json")
+ if [[ "$compliance" != "PASS" ]]; then
+ (( compliance_issues++ ))
+ printf " ${YELLOW} ↳ Tag compliance: %s${RESET}\n" "$compliance"
+ fi
+ fi
+
+ if [[ "$SG_AUDIT" == "true" && "$istate" == "running" ]]; then
+ local sg_ids_list
+ sg_ids_list=$(echo "$sg_json" | jq -r '.[].GroupId' 2>/dev/null)
+ if [[ -n "$sg_ids_list" ]]; then
+ local sg_arr=()
+ while IFS= read -r sg; do
+ [[ -n "$sg" ]] && sg_arr+=("$sg")
+ done <<< "$sg_ids_list"
+ local sg_result
+ sg_result=$(audit_security_groups "$region" "${sg_arr[@]}")
+ if [[ "$sg_result" != "OK" && "$sg_result" != "—" ]]; then
+ (( sg_issues++ ))
+ printf " ${RED} ↳ SG audit: %s${RESET}\n" "$sg_result"
+ fi
+ fi
+ fi
+
+ done < <(echo "$instances_json" | jq -r '
+ .[] |
+ [
+ .InstanceId,
+ .InstanceType,
+ (.State.Name),
+ (.Placement.AvailabilityZone),
+ (.PrivateIpAddress // "—"),
+ (.PublicIpAddress // "null"),
+ (.LaunchTime // "null"),
+ (.Tags // [] | tojson),
+ (.SecurityGroups // [] | tojson)
+ ] | @tsv
+ ')
+
+ printf " %s\n" "$divider"
+
+ local summary="TOTAL: $count instances"
+ local parts=()
+ (( running > 0 )) && parts+=("$running running")
+ (( stopped > 0 )) && parts+=("$stopped stopped")
+ (( other > 0 )) && parts+=("$other other")
+ if (( ${#parts[@]} > 0 )); then
+ local joined
+ joined=$(printf ", %s" "${parts[@]}")
+ summary+=" (${joined:2})"
+ fi
+ printf " %-70s Estimated monthly cost: \$%.2f\n" "$summary" "$total_cost"
+
+ if [[ "$TAG_CHECK" == "true" ]]; then
+ echo ""
+ if (( compliance_issues > 0 )); then
+ printf " ${YELLOW}Tag compliance issues: %d instance(s) missing required tags${RESET}\n" "$compliance_issues"
+ else
+ printf " ${GREEN}Tag compliance: all instances have required tags${RESET}\n"
+ fi
+ fi
+
+ if [[ "$SG_AUDIT" == "true" ]]; then
+ if (( sg_issues > 0 )); then
+ printf " ${RED}Security group issues: %d instance(s) with overly permissive rules${RESET}\n" "$sg_issues"
+ else
+ printf " ${GREEN}Security groups: no overly permissive rules found${RESET}\n"
+ fi
+ fi
+
+ echo ""
+}
+
+# ── CSV output ────────────────────────────────────────────────────────
+output_csv() {
+ local region="$1" instances_json="$2"
+ local count
+ count=$(echo "$instances_json" | jq 'length')
+
+ local header="instance_id,type,state,az,private_ip,public_ip,launch_time,uptime,est_monthly_cost"
+ if [[ "$TAG_CHECK" == "true" ]]; then
+ header+=",tag_compliance"
+ fi
+ if [[ "$SG_AUDIT" == "true" ]]; then
+ header+=",sg_audit"
+ fi
+ header+=",region"
+ echo "$header"
+
+ if (( count == 0 )); then
+ return
+ fi
+
+ while IFS=$'\t' read -r iid itype istate iaz pip eip launch_time tags_json sg_json; do
+ local uptime
+ uptime=$(format_uptime "$launch_time" "$istate")
+
+ local cost
+ cost=$(estimate_cost "$itype" "$istate")
+
+ [[ -z "$eip" || "$eip" == "null" ]] && eip=""
+
+ local line="$iid,$itype,$istate,$iaz,$pip,$eip,$launch_time,\"$uptime\",$cost"
+
+ if [[ "$TAG_CHECK" == "true" ]]; then
+ local compliance
+ compliance=$(check_tag_compliance "$tags_json")
+ line+=",\"$compliance\""
+ fi
+
+ if [[ "$SG_AUDIT" == "true" ]]; then
+ local sg_ids_list
+ sg_ids_list=$(echo "$sg_json" | jq -r '.[].GroupId' 2>/dev/null)
+ if [[ -n "$sg_ids_list" && "$istate" == "running" ]]; then
+ local sg_arr=()
+ while IFS= read -r sg; do
+ [[ -n "$sg" ]] && sg_arr+=("$sg")
+ done <<< "$sg_ids_list"
+ local sg_result
+ sg_result=$(audit_security_groups "$region" "${sg_arr[@]}")
+ line+=",\"$sg_result\""
+ else
+ line+=",\"—\""
+ fi
+ fi
+
+ line+=",$region"
+ echo "$line"
+
+ done < <(echo "$instances_json" | jq -r '
+ .[] |
+ [
+ .InstanceId,
+ .InstanceType,
+ (.State.Name),
+ (.Placement.AvailabilityZone),
+ (.PrivateIpAddress // "—"),
+ (.PublicIpAddress // "null"),
+ (.LaunchTime // "null"),
+ (.Tags // [] | tojson),
+ (.SecurityGroups // [] | tojson)
+ ] | @tsv
+ ')
+}
+
+# ── JSON output ───────────────────────────────────────────────────────
+output_json() {
+ local region="$1" instances_json="$2"
+ local count
+ count=$(echo "$instances_json" | jq 'length')
+
+ local items="[]"
+
+ if (( count > 0 )); then
+ items=$(echo "$instances_json" | jq --arg region "$region" '[
+ .[] | {
+ instance_id: .InstanceId,
+ type: .InstanceType,
+ state: .State.Name,
+ az: .Placement.AvailabilityZone,
+ private_ip: (.PrivateIpAddress // null),
+ public_ip: (.PublicIpAddress // null),
+ launch_time: (.LaunchTime // null),
+ ami_id: (.ImageId // null),
+ vpc_id: (.VpcId // null),
+ key_name: (.KeyName // null),
+ tags: (.Tags // []),
+ security_groups: (.SecurityGroups // []),
+ region: $region
+ }
+ ]')
+ fi
+
+ local account_id
+ account_id=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "unknown")
+
+ jq -n \
+ --arg account "$account_id" \
+ --arg region "$region" \
+ --arg time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
+ --argjson count "$count" \
+ --argjson instances "$items" \
+ '{
+ account: $account,
+ region: $region,
+ time: $time,
+ instance_count: $count,
+ instances: $instances
+ }'
+}
+
+# ── Process a single region ──────────────────────────────────────────
+process_region() {
+ local region="$1"
+ local csv_header_printed="$2"
+
+ log_debug "Querying region: $region"
+
+ local instances_json
+ instances_json=$(get_instances "$region")
+
+ local count
+ count=$(echo "$instances_json" | jq 'length' 2>/dev/null || echo 0)
+
+ if (( count == 0 )) && [[ "$ALL_REGIONS" == "true" ]]; then
+ log_debug "No instances in $region, skipping"
+ return
+ fi
+
+ case "$OUTPUT_FORMAT" in
+ text)
+ output_text "$region" "$instances_json"
+ ;;
+ csv)
+ if [[ "$csv_header_printed" == "false" ]]; then
+ output_csv "$region" "$instances_json"
+ else
+ output_csv "$region" "$instances_json" | tail -n +2
+ fi
+ ;;
+ json)
+ output_json "$region" "$instances_json"
+ ;;
+ esac
+}
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat < 0 )); do
+ case "$1" in
+ --region)
+ [[ $# -lt 2 ]] && die "--region requires a value"
+ SCAN_REGION="$2"; shift 2 ;;
+ --all-regions)
+ ALL_REGIONS="true"; shift ;;
+ --state)
+ [[ $# -lt 2 ]] && die "--state requires a value"
+ FILTER_STATE="$2"; shift 2 ;;
+ --tag)
+ [[ $# -lt 2 ]] && die "--tag requires KEY=VALUE"
+ [[ "$2" != *"="* ]] && die "--tag value must be KEY=VALUE"
+ FILTER_TAG_KEY="${2%%=*}"; FILTER_TAG_VALUE="${2#*=}"; shift 2 ;;
+ --type)
+ [[ $# -lt 2 ]] && die "--type requires a value"
+ FILTER_TYPE="$2"; shift 2 ;;
+ --format)
+ [[ $# -lt 2 ]] && die "--format requires a value"
+ OUTPUT_FORMAT="$2"; shift 2 ;;
+ --tag-check)
+ TAG_CHECK="true"; shift ;;
+ --sg-audit)
+ SG_AUDIT="true"; shift ;;
+ --verbose)
+ VERBOSE="true"; shift ;;
+ --no-color)
+ COLOR="never"; shift ;;
+ --help|-h)
+ usage ;;
+ *)
+ die "Unknown option: $1 (see --help)" ;;
+ esac
+ done
+
+ case "$OUTPUT_FORMAT" in
+ text|csv|json) ;;
+ *) die "Invalid --format: $OUTPUT_FORMAT (expected text, csv, json)" ;;
+ esac
+}
+
+# ── Main ──────────────────────────────────────────────────────────────
+main() {
+ parse_args "$@"
+ setup_colors
+ check_deps
+
+ START_TIME=$(date +%s)
+
+ log_debug "Validating AWS credentials..."
+ aws sts get-caller-identity --output text >/dev/null 2>&1 \
+ || die "AWS credentials not configured or expired"
+
+ if [[ "$ALL_REGIONS" == "true" ]]; then
+ log_info "Scanning all enabled regions..."
+ local regions
+ regions=$(get_all_regions)
+
+ if [[ -z "$regions" ]]; then
+ die "Failed to retrieve region list"
+ fi
+
+ local csv_header="false"
+ local json_first="true"
+
+ if [[ "$OUTPUT_FORMAT" == "json" ]]; then
+ echo "["
+ fi
+
+ while IFS= read -r region; do
+ [[ -z "$region" ]] && continue
+ log_info "Scanning $region..."
+
+ if [[ "$OUTPUT_FORMAT" == "json" ]]; then
+ if [[ "$json_first" == "true" ]]; then
+ json_first="false"
+ else
+ echo ","
+ fi
+ fi
+
+ process_region "$region" "$csv_header"
+ csv_header="true"
+ done <<< "$regions"
+
+ if [[ "$OUTPUT_FORMAT" == "json" ]]; then
+ echo "]"
+ fi
+ else
+ log_info "Scanning region: $SCAN_REGION"
+ process_region "$SCAN_REGION" "false"
+ fi
+
+ local elapsed=$(( $(date +%s) - START_TIME ))
+ log_info "Completed in ${elapsed}s"
+}
+
+main "$@"
diff --git a/elasticsearch-exporter.sh b/elasticsearch-exporter.sh
new file mode 100755
index 0000000..137a9a7
--- /dev/null
+++ b/elasticsearch-exporter.sh
@@ -0,0 +1,424 @@
+#!/usr/bin/env bash
+#
+# Elasticsearch Prometheus Metrics Exporter
+#
+# Prometheus textfile collector exporter for Elasticsearch.
+# Uses the Elasticsearch REST API to collect cluster health,
+# node statistics, index counts, JVM memory, search/indexing
+# throughput, circuit breaker state, and shard status.
+#
+# Usage:
+# ./elasticsearch-exporter.sh
+# ./elasticsearch-exporter.sh --textfile
+# ./elasticsearch-exporter.sh --install
+#
+# Parameters:
+# --textfile Write to textfile collector directory
+# --install Create cron job for automatic collection
+# --help Show usage
+#
+# Environment:
+# ES_URL Elasticsearch REST API URL (default: http://localhost:9200)
+# ES_USER Username for basic auth (optional)
+# ES_PASS Password for basic auth (optional)
+# TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter/textfile_collector)
+# CURL_TIMEOUT API request timeout in seconds (default: 10)
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+# Version: 1.0
+#
+# Metrics Exported:
+# Core:
+# - elasticsearch_up
+# - elasticsearch_exporter_info{version}
+#
+# Cluster Health:
+# - elasticsearch_cluster_health{status}
+# - elasticsearch_cluster_nodes_total
+# - elasticsearch_cluster_data_nodes
+# - elasticsearch_cluster_shards_active
+# - elasticsearch_cluster_shards_relocating
+# - elasticsearch_cluster_shards_initializing
+# - elasticsearch_cluster_shards_unassigned
+# - elasticsearch_cluster_pending_tasks
+#
+# Cluster Stats:
+# - elasticsearch_indices_total
+# - elasticsearch_documents_total
+# - elasticsearch_store_size_bytes
+#
+# Node Stats:
+# - elasticsearch_jvm_heap_used_bytes{node}
+# - elasticsearch_jvm_heap_max_bytes{node}
+# - elasticsearch_search_query_total{node}
+# - elasticsearch_indexing_index_total{node}
+# - elasticsearch_circuit_breaker_tripped{node,breaker}
+#
+# Exporter:
+# - elasticsearch_exporter_duration_seconds
+# - elasticsearch_exporter_last_run_timestamp
+
+set -euo pipefail
+
+# --- Configuration ---
+readonly VERSION="1.0"
+readonly SCRIPT_NAME="$(basename "$0")"
+ES_URL="${ES_URL:-http://localhost:9200}"
+ES_USER="${ES_USER:-}"
+ES_PASS="${ES_PASS:-}"
+TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}"
+CURL_TIMEOUT="${CURL_TIMEOUT:-10}"
+TEXTFILE_MODE=false
+OUTPUT=""
+START_TIME=""
+
+# --- Functions ---
+
+usage() {
+ cat </dev/null; then
+ missing+=("$cmd")
+ fi
+ done
+ if [[ ${#missing[@]} -gt 0 ]]; then
+ echo "ERROR: Missing required commands: ${missing[*]}" >&2
+ echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2
+ exit 1
+ fi
+}
+
+validate_config() {
+ # Strip trailing slash
+ ES_URL="${ES_URL%/}"
+}
+
+api_get() {
+ local endpoint="$1"
+ local curl_args=(-sf --max-time "$CURL_TIMEOUT")
+
+ if [[ -n "$ES_USER" && -n "$ES_PASS" ]]; then
+ curl_args+=(-u "${ES_USER}:${ES_PASS}")
+ fi
+
+ curl "${curl_args[@]}" "${ES_URL}${endpoint}" 2>/dev/null || echo ""
+}
+
+add_metric() {
+ local name="$1"
+ local type="$2"
+ local help="$3"
+ local value="$4"
+ local labels="${5:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name} ${value}
+"
+ fi
+}
+
+add_metric_value() {
+ local name="$1"
+ local value="$2"
+ local labels="${3:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="${name} ${value}
+"
+ fi
+}
+
+collect_cluster_health() {
+ local health_json
+ health_json=$(api_get "/_cluster/health")
+
+ if [[ -z "$health_json" ]]; then
+ add_metric "elasticsearch_up" "gauge" "Elasticsearch reachability (1=up, 0=down)" "0"
+ return 1
+ fi
+
+ add_metric "elasticsearch_up" "gauge" "Elasticsearch reachability (1=up, 0=down)" "1"
+
+ # Cluster health status (green=0, yellow=1, red=2)
+ local status
+ status=$(echo "$health_json" | jq -r '.status // "red"' 2>/dev/null)
+
+ local status_value
+ case "$status" in
+ green) status_value=0 ;;
+ yellow) status_value=1 ;;
+ red) status_value=2 ;;
+ *) status_value=2 ;;
+ esac
+
+ add_metric "elasticsearch_cluster_health" "gauge" "Cluster health status (green=0, yellow=1, red=2)" "$status_value" "status=\"${status}\""
+
+ # Node counts
+ local nodes_total data_nodes
+ nodes_total=$(echo "$health_json" | jq '.number_of_nodes // 0' 2>/dev/null)
+ data_nodes=$(echo "$health_json" | jq '.number_of_data_nodes // 0' 2>/dev/null)
+
+ add_metric "elasticsearch_cluster_nodes_total" "gauge" "Total number of cluster nodes" "${nodes_total:-0}"
+ add_metric "elasticsearch_cluster_data_nodes" "gauge" "Number of data nodes" "${data_nodes:-0}"
+
+ # Shard counts
+ local active_shards relocating initializing unassigned
+ active_shards=$(echo "$health_json" | jq '.active_shards // 0' 2>/dev/null)
+ relocating=$(echo "$health_json" | jq '.relocating_shards // 0' 2>/dev/null)
+ initializing=$(echo "$health_json" | jq '.initializing_shards // 0' 2>/dev/null)
+ unassigned=$(echo "$health_json" | jq '.unassigned_shards // 0' 2>/dev/null)
+
+ add_metric "elasticsearch_cluster_shards_active" "gauge" "Number of active shards" "${active_shards:-0}"
+ add_metric "elasticsearch_cluster_shards_relocating" "gauge" "Number of relocating shards" "${relocating:-0}"
+ add_metric "elasticsearch_cluster_shards_initializing" "gauge" "Number of initializing shards" "${initializing:-0}"
+ add_metric "elasticsearch_cluster_shards_unassigned" "gauge" "Number of unassigned shards" "${unassigned:-0}"
+
+ # Pending tasks
+ local pending_tasks
+ pending_tasks=$(echo "$health_json" | jq '.number_of_pending_tasks // 0' 2>/dev/null)
+
+ add_metric "elasticsearch_cluster_pending_tasks" "gauge" "Number of pending cluster tasks" "${pending_tasks:-0}"
+
+ return 0
+}
+
+collect_cluster_stats() {
+ local stats_json
+ stats_json=$(api_get "/_cluster/stats")
+
+ if [[ -z "$stats_json" ]]; then
+ return
+ fi
+
+ # Indices count
+ local indices_count
+ indices_count=$(echo "$stats_json" | jq '.indices.count // 0' 2>/dev/null)
+
+ add_metric "elasticsearch_indices_total" "gauge" "Total number of indices" "${indices_count:-0}"
+
+ # Document count
+ local doc_count
+ doc_count=$(echo "$stats_json" | jq '.indices.docs.count // 0' 2>/dev/null)
+
+ add_metric "elasticsearch_documents_total" "gauge" "Total number of documents" "${doc_count:-0}"
+
+ # Store size
+ local store_size
+ store_size=$(echo "$stats_json" | jq '.indices.store.size_in_bytes // 0' 2>/dev/null)
+
+ add_metric "elasticsearch_store_size_bytes" "gauge" "Total store size in bytes" "${store_size:-0}"
+}
+
+collect_node_stats() {
+ local nodes_json
+ nodes_json=$(api_get "/_nodes/stats")
+
+ if [[ -z "$nodes_json" ]]; then
+ return
+ fi
+
+ local node_ids
+ node_ids=$(echo "$nodes_json" | jq -r '.nodes | keys[]' 2>/dev/null)
+
+ if [[ -z "$node_ids" ]]; then
+ return
+ fi
+
+ # JVM heap used per node
+ OUTPUT+="# HELP elasticsearch_jvm_heap_used_bytes JVM heap memory used per node
+# TYPE elasticsearch_jvm_heap_used_bytes gauge
+"
+
+ local node_id node_name heap_used
+ for node_id in $node_ids; do
+ node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null)
+ heap_used=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].jvm.mem.heap_used_in_bytes // 0" 2>/dev/null)
+
+ add_metric_value "elasticsearch_jvm_heap_used_bytes" "${heap_used:-0}" "node=\"${node_name}\""
+ done
+
+ # JVM heap max per node
+ OUTPUT+="# HELP elasticsearch_jvm_heap_max_bytes JVM heap memory max per node
+# TYPE elasticsearch_jvm_heap_max_bytes gauge
+"
+
+ local heap_max
+ for node_id in $node_ids; do
+ node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null)
+ heap_max=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].jvm.mem.heap_max_in_bytes // 0" 2>/dev/null)
+
+ add_metric_value "elasticsearch_jvm_heap_max_bytes" "${heap_max:-0}" "node=\"${node_name}\""
+ done
+
+ # Search query total per node
+ OUTPUT+="# HELP elasticsearch_search_query_total Total search queries per node
+# TYPE elasticsearch_search_query_total gauge
+"
+
+ local query_total
+ for node_id in $node_ids; do
+ node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null)
+ query_total=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].indices.search.query_total // 0" 2>/dev/null)
+
+ add_metric_value "elasticsearch_search_query_total" "${query_total:-0}" "node=\"${node_name}\""
+ done
+
+ # Indexing index total per node
+ OUTPUT+="# HELP elasticsearch_indexing_index_total Total indexing operations per node
+# TYPE elasticsearch_indexing_index_total gauge
+"
+
+ local index_total
+ for node_id in $node_ids; do
+ node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null)
+ index_total=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].indices.indexing.index_total // 0" 2>/dev/null)
+
+ add_metric_value "elasticsearch_indexing_index_total" "${index_total:-0}" "node=\"${node_name}\""
+ done
+
+ # Circuit breaker trips per node per breaker type
+ OUTPUT+="# HELP elasticsearch_circuit_breaker_tripped Circuit breaker trip count per node and breaker
+# TYPE elasticsearch_circuit_breaker_tripped gauge
+"
+
+ local breaker_names breaker_name tripped
+ for node_id in $node_ids; do
+ node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null)
+ breaker_names=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].breakers | keys[]" 2>/dev/null)
+
+ for breaker_name in $breaker_names; do
+ tripped=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].breakers[\"${breaker_name}\"].tripped // 0" 2>/dev/null)
+
+ add_metric_value "elasticsearch_circuit_breaker_tripped" "${tripped:-0}" "node=\"${node_name}\",breaker=\"${breaker_name}\""
+ done
+ done
+}
+
+write_output() {
+ if [[ "$TEXTFILE_MODE" == true ]]; then
+ local output_file="${TEXTFILE_DIR}/elasticsearch.prom"
+ local temp_file="${output_file}.$$"
+
+ mkdir -p "$TEXTFILE_DIR"
+ echo "$OUTPUT" > "$temp_file"
+ mv "$temp_file" "$output_file"
+ else
+ echo "$OUTPUT"
+ fi
+}
+
+install_cron() {
+ if [[ $EUID -ne 0 ]]; then
+ echo "ERROR: --install requires root" >&2
+ exit 1
+ fi
+
+ local script_path
+ script_path=$(readlink -f "$0")
+
+ local env_vars=""
+ env_vars+="ES_URL=${ES_URL}\n"
+ if [[ -n "$ES_USER" ]]; then
+ env_vars+="ES_USER=${ES_USER}\n"
+ fi
+ if [[ -n "$ES_PASS" ]]; then
+ env_vars+="ES_PASS=${ES_PASS}\n"
+ fi
+ env_vars+="TEXTFILE_DIR=${TEXTFILE_DIR}"
+
+ cat > /etc/cron.d/elasticsearch-exporter </dev/null
+EOF
+
+ chmod 644 /etc/cron.d/elasticsearch-exporter
+ echo "Installed cron job: /etc/cron.d/elasticsearch-exporter"
+ echo "Metrics will be written to: ${TEXTFILE_DIR}/elasticsearch.prom"
+}
+
+# --- Main ---
+
+main() {
+ # Parse arguments
+ for arg in "$@"; do
+ case "$arg" in
+ --textfile) TEXTFILE_MODE=true ;;
+ --install)
+ check_dependencies
+ validate_config
+ install_cron
+ exit 0
+ ;;
+ --help|-h) usage ;;
+ *) echo "Unknown option: $arg" >&2; usage ;;
+ esac
+ done
+
+ check_dependencies
+ validate_config
+
+ START_TIME=$(date +%s%N)
+
+ # Exporter info
+ add_metric "elasticsearch_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\""
+
+ # Collect metrics
+ if collect_cluster_health; then
+ collect_cluster_stats
+ collect_node_stats
+ fi
+
+ # Exporter performance
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ add_metric "elasticsearch_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration"
+ add_metric "elasticsearch_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)"
+
+ write_output
+}
+
+main "$@"
diff --git a/exchange-metrics.ps1 b/exchange-metrics.ps1
index 98acaef..e7cd223 100644
--- a/exchange-metrics.ps1
+++ b/exchange-metrics.ps1
@@ -1,6 +1,43 @@
-# Exchange Metrics Collector - Outputs Prometheus-compatible metrics
+# Exchange Metrics Collector - Outputs Prometheus-compatible metrics
# Requires Exchange Management Shell and appropriate permissions
+param(
+ [switch]$InstallScheduledTask,
+ [int]$TaskIntervalMinutes = 5
+)
+
+if ($InstallScheduledTask) {
+ $taskName = "ExchangeMetricsExporter"
+ $existingTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue
+
+ if (-not $existingTask) {
+ $taskAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`""
+
+ if (-not $TaskIntervalMinutes -or $TaskIntervalMinutes -le 0) {
+ throw "TaskIntervalMinutes must be a positive integer"
+ }
+
+ $taskTrigger = New-ScheduledTaskTrigger -Once -At (Get-Date).AddMinutes(1) -RepetitionInterval (New-TimeSpan -Minutes $TaskIntervalMinutes) -RepetitionDuration (New-TimeSpan -Days 365)
+ $taskPrincipal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest
+
+ try {
+ Write-Host "Creating scheduled task: $taskName"
+ Register-ScheduledTask -TaskName $taskName -Action $taskAction -Trigger $taskTrigger -Principal $taskPrincipal -Description "Exports Exchange metrics for Prometheus every $TaskIntervalMinutes minutes"
+
+ $createdTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue
+ if (-not $createdTask) {
+ throw "Failed to verify scheduled task creation"
+ }
+ Write-Host "Successfully created scheduled task: $taskName" -ForegroundColor Green
+ } catch {
+ Write-Error "Failed to create auto-start task: $($_.Exception.Message)"
+ throw
+ }
+ } else {
+ Write-Host "Scheduled task '$taskName' already exists, skipping creation"
+ }
+}
+
$StartTime = Get-Date
$Hostname = $env:COMPUTERNAME
diff --git a/expand-drive.sh b/expand-drive.sh
index f85c145..d699e3a 100755
--- a/expand-drive.sh
+++ b/expand-drive.sh
@@ -7,10 +7,13 @@
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
-#### Version: 2.3 ####
+#### Version: 2.4 ####
#### ####
#### Usage: sudo ./expand-drive.sh ####
#############################################################
+# v2.4 changes:
+# - Fixed: grep in pipeline crashes under set -euo pipefail when no matches found. Added || true guard
+#############################################################
# Set strict error handling:
# -e: Exit immediately if a command exits with a non-zero status
@@ -189,7 +192,7 @@ process_partition() {
# Extract partition number from device path (e.g., extract "1" from "/dev/sda1")
local part_num
- part_num=$(echo "$partition" | grep -o '[0-9]\+$' | tail -1)
+ part_num=$(echo "$partition" | { grep -o '[0-9]\+$' || true; } | tail -1)
if [ -z "$part_num" ]; then
log_error "Could not extract partition number from $partition"
return 1
@@ -293,7 +296,7 @@ main() {
# Get list of all disk devices in the system using lsblk
# Filter for disk type and extract device names
local devices
- devices=$($LSBLK_PATH -pln -o NAME,TYPE | grep "disk" | cut -d' ' -f1)
+ devices=$($LSBLK_PATH -pln -o NAME,TYPE | { grep "disk" || true; } | cut -d' ' -f1)
# Verify we found at least one disk device
if [ -z "$devices" ]; then
diff --git a/fapolicyd-log-analyzer.sh b/fapolicyd-log-analyzer.sh
new file mode 100644
index 0000000..d6f30c5
--- /dev/null
+++ b/fapolicyd-log-analyzer.sh
@@ -0,0 +1,387 @@
+#!/bin/bash
+
+#############################################################
+#### fapolicyd Log Analyzer Script ####
+#### Parses denial logs and suggests fix commands ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### To use this script chmod it to 755 ####
+#### or simply type bash ####
+#############################################################
+
+# ── Colors ────────────────────────────────────────────────
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m' # No Color
+
+# ── Defaults ──────────────────────────────────────────────
+MODE="recent"
+OUTPUT_FILE=""
+QUIET=0
+TOTAL_DENIALS=0
+UNIQUE_FILES=0
+SUGGESTED_FIXES=0
+
+# ── Functions ─────────────────────────────────────────────
+
+usage() {
+ echo -e "${BOLD}fapolicyd Log Analyzer${NC}"
+ echo ""
+ echo "Usage: $0 [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --help Show this help message"
+ echo " --recent Analyze denials from the last hour only (default)"
+ echo " --all Analyze all denials in the log"
+ echo " --output FILE Save suggested fixes to FILE"
+ echo " --quiet Show suggestions only, suppress raw denial lines"
+ echo ""
+ echo "Examples:"
+ echo " sudo bash $0 --recent"
+ echo " sudo bash $0 --all --output fixes.txt"
+ echo " sudo bash $0 --quiet --output /tmp/fixes.txt"
+ exit 0
+}
+
+check_root() {
+ if [[ $EUID -ne 0 ]]; then
+ echo -e "${RED}Error: This script must be run as root.${NC}"
+ echo "Please run with: sudo bash $0"
+ exit 1
+ fi
+}
+
+check_fapolicyd() {
+ if ! command -v fapolicyd-cli &>/dev/null; then
+ echo -e "${RED}Error: fapolicyd does not appear to be installed.${NC}"
+ echo -e "${YELLOW}Install with: dnf install fapolicyd${NC}"
+ exit 1
+ fi
+
+ if ! systemctl is-active --quiet fapolicyd 2>/dev/null; then
+ echo -e "${YELLOW}Warning: fapolicyd service is not currently running.${NC}"
+ echo -e "${CYAN}Continuing to analyze existing log entries...${NC}"
+ echo ""
+ fi
+}
+
+output_line() {
+ local line="$1"
+ echo -e "$line"
+ if [[ -n "$OUTPUT_FILE" ]]; then
+ echo -e "$line" | sed 's/\x1b\[[0-9;]*m//g' >> "$OUTPUT_FILE"
+ fi
+}
+
+# ── fapolicyd Analysis ───────────────────────────────────
+
+parse_fapolicyd_denial() {
+ local line="$1"
+
+ local dec perm fname exe trust pid
+ dec=$(echo "$line" | grep -oP 'dec=\K[^ ]+')
+ perm=$(echo "$line" | grep -oP 'perm=\K[^ ]+')
+ fname=$(echo "$line" | grep -oP 'fname=\K[^ ]+')
+ exe=$(echo "$line" | grep -oP 'exe=\K[^ ]+')
+ trust=$(echo "$line" | grep -oP 'trust=\K[^ ]+')
+ pid=$(echo "$line" | grep -oP 'pid=\K[^ ]+')
+
+ if [[ $QUIET -eq 0 ]]; then
+ output_line "${RED}DENIAL:${NC} $line"
+ fi
+
+ [[ -n "$dec" ]] && output_line "${CYAN} Decision:${NC} $dec"
+ [[ -n "$perm" ]] && output_line "${CYAN} Permission:${NC} $perm"
+ [[ -n "$fname" ]] && output_line "${CYAN} File:${NC} $fname"
+ [[ -n "$exe" ]] && output_line "${CYAN} Executable:${NC} $exe"
+ [[ -n "$trust" ]] && output_line "${CYAN} Trust status:${NC} $trust"
+ [[ -n "$pid" ]] && output_line "${CYAN} PID:${NC} $pid"
+
+ suggest_fapolicyd_fix "$fname" "$exe" "$perm" "$trust"
+ output_line ""
+}
+
+suggest_fapolicyd_fix() {
+ local fname="$1" exe="$2" perm="$3" trust="$4"
+
+ ((SUGGESTED_FIXES++))
+
+ if [[ -n "$fname" ]]; then
+ # Check current trust status
+ output_line "${GREEN} Suggested fixes:${NC}"
+
+ # Trust the file
+ output_line "${GREEN} 1. Add file to trust database:${NC}"
+ output_line "${GREEN} fapolicyd-cli --file add ${fname}${NC}"
+ output_line "${GREEN} fapolicyd-cli --update${NC}"
+
+ # Check trust
+ output_line "${GREEN} 2. Verify trust status:${NC}"
+ output_line "${GREEN} fapolicyd-cli --check-path ${fname}${NC}"
+
+ # If the file is a script or binary from a known package
+ if command -v rpm &>/dev/null; then
+ local pkg
+ pkg=$(rpm -qf "$fname" 2>/dev/null)
+ if [[ $? -eq 0 && -n "$pkg" ]]; then
+ output_line "${CYAN} Note: File belongs to package: ${pkg}${NC}"
+ output_line "${YELLOW} If the file was modified after install, consider:${NC}"
+ output_line "${GREEN} rpm --restore ${pkg}${NC}"
+ output_line "${GREEN} fapolicyd-cli --update${NC}"
+ fi
+ fi
+
+ # If it looks like a shared library
+ if [[ "$fname" == *.so* ]]; then
+ output_line "${YELLOW} Library denial — also check:${NC}"
+ output_line "${GREEN} ldconfig${NC}"
+ output_line "${GREEN} fapolicyd-cli --update${NC}"
+ fi
+ fi
+
+ # Suggest rule-based approach
+ if [[ -n "$exe" && -n "$perm" ]]; then
+ output_line "${GREEN} 3. Or add a custom rule in /etc/fapolicyd/rules.d/:${NC}"
+ output_line "${GREEN} allow ${perm} exe=${exe} : all${NC}"
+ fi
+}
+
+categorize_fapolicyd_denial() {
+ local line="$1"
+ local perm fname
+
+ perm=$(echo "$line" | grep -oP 'perm=\K[^ ]+')
+ fname=$(echo "$line" | grep -oP 'fname=\K[^ ]+')
+
+ case "$perm" in
+ execute)
+ if [[ "$fname" == *.so* ]]; then
+ echo "library"
+ else
+ echo "execute"
+ fi
+ ;;
+ open)
+ echo "open"
+ ;;
+ *)
+ echo "other"
+ ;;
+ esac
+}
+
+analyze_fapolicyd() {
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line "${BOLD} fapolicyd Log Analysis${NC}"
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line ""
+
+ # Show daemon status
+ local daemon_status
+ daemon_status=$(systemctl is-active fapolicyd 2>/dev/null)
+ output_line "${CYAN}fapolicyd status:${NC} $daemon_status"
+
+ # Show integrity setting
+ if [[ -f /etc/fapolicyd/fapolicyd.conf ]]; then
+ local integrity
+ integrity=$(grep -oP '^\s*integrity\s*=\s*\K.*' /etc/fapolicyd/fapolicyd.conf 2>/dev/null)
+ [[ -n "$integrity" ]] && output_line "${CYAN}Integrity mode:${NC} $integrity"
+ fi
+
+ # Show trust database stats
+ if command -v fapolicyd-cli &>/dev/null; then
+ local trust_count
+ trust_count=$(fapolicyd-cli --dump-db 2>/dev/null | wc -l)
+ [[ -n "$trust_count" ]] && output_line "${CYAN}Trusted files:${NC} $trust_count"
+ fi
+ output_line ""
+
+ # Gather denials from audit log
+ local denials=""
+
+ if [[ ! -f /var/log/audit/audit.log ]]; then
+ output_line "${RED}Error: Cannot find /var/log/audit/audit.log${NC}"
+ output_line "${YELLOW}Ensure auditd is running: systemctl start auditd${NC}"
+ return
+ fi
+
+ if [[ "$MODE" == "recent" ]]; then
+ if command -v ausearch &>/dev/null; then
+ denials=$(ausearch -m FANOTIFY -ts recent 2>/dev/null | grep "type=FANOTIFY")
+ fi
+ # Fallback to manual log parsing
+ if [[ -z "$denials" ]]; then
+ local one_hour_ago
+ one_hour_ago=$(date -d '1 hour ago' '+%s' 2>/dev/null)
+ if [[ -n "$one_hour_ago" ]]; then
+ denials=$(awk -v cutoff="$one_hour_ago" '
+ /type=FANOTIFY/ && /dec=deny/ {
+ match($0, /msg=audit\(([0-9]+)\./, arr)
+ if (arr[1] >= cutoff) print
+ }
+ ' /var/log/audit/audit.log)
+ fi
+ fi
+ else
+ denials=$(grep "type=FANOTIFY" /var/log/audit/audit.log | grep "dec=deny")
+ fi
+
+ if [[ -z "$denials" ]]; then
+ output_line "${GREEN}No fapolicyd denials found.${NC}"
+ output_line ""
+ return
+ fi
+
+ # Group denials by category
+ declare -A categories
+ local denial_count=0
+ local -A seen_files
+
+ while IFS= read -r line; do
+ [[ -z "$line" ]] && continue
+ ((denial_count++))
+ local category
+ category=$(categorize_fapolicyd_denial "$line")
+ categories["$category"]+="$line"$'\n'
+
+ local f
+ f=$(echo "$line" | grep -oP 'fname=\K[^ ]+')
+ [[ -n "$f" ]] && seen_files["$f"]=1
+ done <<< "$denials"
+
+ TOTAL_DENIALS=$denial_count
+ UNIQUE_FILES=${#seen_files[@]}
+
+ # Display grouped results
+ for category in "execute" "library" "open" "other"; do
+ if [[ -n "${categories[$category]}" ]]; then
+ local label
+ case "$category" in
+ execute) label="Execution Denials" ;;
+ library) label="Library Load Denials" ;;
+ open) label="File Open Denials" ;;
+ other) label="Other Denials" ;;
+ esac
+
+ output_line "${BOLD}── ${label} ──────────────────────────────────${NC}"
+ output_line ""
+
+ while IFS= read -r denial_line; do
+ [[ -z "$denial_line" ]] && continue
+ parse_fapolicyd_denial "$denial_line"
+ done <<< "${categories[$category]}"
+ fi
+ done
+
+ # Show bulk fix suggestions
+ if [[ ${#seen_files[@]} -gt 0 ]]; then
+ output_line "${BOLD}── Bulk Fix Commands ────────────────────────────${NC}"
+ output_line ""
+ output_line "${YELLOW}To trust all denied files at once:${NC}"
+ for f in "${!seen_files[@]}"; do
+ output_line "${GREEN} fapolicyd-cli --file add ${f}${NC}"
+ done
+ output_line "${GREEN} fapolicyd-cli --update${NC}"
+ output_line ""
+ fi
+
+ # Rule file reference
+ output_line "${BOLD}── Rule File Reference ──────────────────────────${NC}"
+ output_line ""
+ output_line "${CYAN}Rules are loaded from:${NC}"
+ if [[ -d /etc/fapolicyd/rules.d ]]; then
+ output_line " /etc/fapolicyd/rules.d/ (drop-in directory)"
+ local rule_files
+ rule_files=$(ls /etc/fapolicyd/rules.d/ 2>/dev/null)
+ if [[ -n "$rule_files" ]]; then
+ output_line "${CYAN} Current rule files:${NC}"
+ while IFS= read -r rf; do
+ output_line " $rf"
+ done <<< "$rule_files"
+ fi
+ fi
+ if [[ -f /etc/fapolicyd/fapolicyd.rules ]]; then
+ output_line " /etc/fapolicyd/fapolicyd.rules (compiled rules)"
+ fi
+ output_line ""
+ output_line "${YELLOW}After making changes, restart the daemon:${NC}"
+ output_line "${GREEN} systemctl restart fapolicyd${NC}"
+ output_line ""
+}
+
+# ── Summary ───────────────────────────────────────────────
+
+print_summary() {
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line "${BOLD} Summary${NC}"
+ output_line "${BOLD}═══════════════════════════════════════════════════${NC}"
+ output_line ""
+ output_line " Total denials found: ${BOLD}${TOTAL_DENIALS}${NC}"
+ output_line " Unique files denied: ${BOLD}${UNIQUE_FILES}${NC}"
+ output_line " Suggested fixes: ${BOLD}${SUGGESTED_FIXES}${NC}"
+ output_line ""
+
+ if [[ -n "$OUTPUT_FILE" ]]; then
+ output_line "${GREEN}Suggestions saved to: ${OUTPUT_FILE}${NC}"
+ output_line ""
+ fi
+}
+
+# ── Parse Arguments ───────────────────────────────────────
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --help|-h)
+ usage
+ ;;
+ --recent)
+ MODE="recent"
+ shift
+ ;;
+ --all)
+ MODE="all"
+ shift
+ ;;
+ --output)
+ if [[ -z "$2" || "$2" == --* ]]; then
+ echo -e "${RED}Error: --output requires a filename argument.${NC}"
+ exit 1
+ fi
+ OUTPUT_FILE="$2"
+ shift 2
+ ;;
+ --quiet|-q)
+ QUIET=1
+ shift
+ ;;
+ *)
+ echo -e "${RED}Unknown option: $1${NC}"
+ echo "Use --help for usage information."
+ exit 1
+ ;;
+ esac
+done
+
+# ── Main ──────────────────────────────────────────────────
+
+check_root
+
+# Clear output file if specified
+if [[ -n "$OUTPUT_FILE" ]]; then
+ > "$OUTPUT_FILE"
+fi
+
+echo -e "${BOLD}fapolicyd Log Analyzer v1.00${NC}"
+echo -e "${CYAN}Mode: ${MODE}${NC}"
+echo ""
+
+check_fapolicyd
+analyze_fapolicyd
+print_summary
diff --git a/file-permissions-audit.sh b/file-permissions-audit.sh
new file mode 100644
index 0000000..81ab5fb
--- /dev/null
+++ b/file-permissions-audit.sh
@@ -0,0 +1,354 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### file-permissions-audit.sh — Find world-writable files, SUID/SGID binaries, ####
+#### and files owned by nobody or with no valid owner ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./file-permissions-audit.sh ####
+#### ./file-permissions-audit.sh --scan-dirs /usr /bin /home ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+SCAN_DIRS="${SCAN_DIRS:-/usr /bin /sbin /var /opt /home /tmp}"
+EXCLUDE_PATHS=()
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+COUNT_WORLD_WRITABLE=0
+COUNT_SUID=0
+COUNT_SGID=0
+COUNT_NOBODY=0
+COUNT_UNOWNED=0
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" YELLOW="" CYAN="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ YELLOW='\033[0;33m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ else
+ RED="" YELLOW="" CYAN="" BOLD="" DIM="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${CYAN}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+build_exclude_args() {
+ local args=()
+ for path in "${EXCLUDE_PATHS[@]+"${EXCLUDE_PATHS[@]}"}"; do
+ args+=(-not -path "${path}/*")
+ done
+ # Always exclude /proc and /sys
+ args+=(-not -path "/proc/*" -not -path "/sys/*")
+ echo "${args[@]}"
+}
+
+get_file_info() {
+ local file="$1"
+ local octal symbolic owner group ftype
+
+ octal=$(stat -c '%a' "$file" 2>/dev/null || echo "????")
+ symbolic=$(stat -c '%A' "$file" 2>/dev/null || echo "??????????")
+ owner=$(stat -c '%U' "$file" 2>/dev/null || echo "UNKNOWN")
+ group=$(stat -c '%G' "$file" 2>/dev/null || echo "UNKNOWN")
+
+ if [[ -d "$file" ]]; then
+ ftype="dir"
+ elif [[ -L "$file" ]]; then
+ ftype="link"
+ else
+ ftype="file"
+ fi
+
+ echo "${octal} ${symbolic} ${owner}:${group} ${ftype}"
+}
+
+print_file_entry() {
+ local color="$1"
+ local file="$2"
+ local info
+ info=$(get_file_info "$file")
+ local octal symbolic ownership ftype
+ octal=$(echo "$info" | awk '{print $1}')
+ symbolic=$(echo "$info" | awk '{print $2}')
+ ownership=$(echo "$info" | awk '{print $3}')
+ ftype=$(echo "$info" | awk '{print $4}')
+
+ printf " %b%-4s %-11s %-20s %-6s%b %s\n" "$color" "$octal" "$symbolic" "$ownership" "$ftype" "$RESET" "$file"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SCAN FUNCTIONS
+# ══════════════════════════════════════════════════════════════════════
+
+scan_world_writable() {
+ section_header "World-Writable Files & Directories"
+
+ printf " ${BOLD}%-4s %-11s %-20s %-6s${RESET} %s\n" "PERM" "MODE" "OWNER:GROUP" "TYPE" "PATH"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ local count=0
+ local exclude_args
+ exclude_args=$(build_exclude_args)
+
+ for dir in $SCAN_DIRS; do
+ [[ -d "$dir" ]] || continue
+ # shellcheck disable=SC2086
+ while IFS= read -r file; do
+ [[ -z "$file" ]] && continue
+ print_file_entry "$CYAN" "$file"
+ count=$((count + 1))
+ done < <(find "$dir" -xdev -perm -0002 -not -type l $exclude_args 2>/dev/null)
+ done
+
+ COUNT_WORLD_WRITABLE=$count
+ echo ""
+ log "Found ${count} world-writable entries"
+}
+
+scan_suid() {
+ section_header "SUID Binaries"
+
+ printf " ${BOLD}%-4s %-11s %-20s %-6s${RESET} %s\n" "PERM" "MODE" "OWNER:GROUP" "TYPE" "PATH"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ local count=0
+ local exclude_args
+ exclude_args=$(build_exclude_args)
+
+ for dir in $SCAN_DIRS; do
+ [[ -d "$dir" ]] || continue
+ # shellcheck disable=SC2086
+ while IFS= read -r file; do
+ [[ -z "$file" ]] && continue
+ local owner
+ owner=$(stat -c '%U' "$file" 2>/dev/null || echo "UNKNOWN")
+ local color="$YELLOW"
+ if [[ "$owner" == "root" ]]; then
+ color="$RED"
+ fi
+ print_file_entry "$color" "$file"
+ count=$((count + 1))
+ done < <(find "$dir" -xdev -type f -perm -4000 $exclude_args 2>/dev/null)
+ done
+
+ COUNT_SUID=$count
+ echo ""
+ log "Found ${count} SUID binaries"
+}
+
+scan_sgid() {
+ section_header "SGID Binaries"
+
+ printf " ${BOLD}%-4s %-11s %-20s %-6s${RESET} %s\n" "PERM" "MODE" "OWNER:GROUP" "TYPE" "PATH"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ local count=0
+ local exclude_args
+ exclude_args=$(build_exclude_args)
+
+ for dir in $SCAN_DIRS; do
+ [[ -d "$dir" ]] || continue
+ # shellcheck disable=SC2086
+ while IFS= read -r file; do
+ [[ -z "$file" ]] && continue
+ print_file_entry "$YELLOW" "$file"
+ count=$((count + 1))
+ done < <(find "$dir" -xdev -type f -perm -2000 $exclude_args 2>/dev/null)
+ done
+
+ COUNT_SGID=$count
+ echo ""
+ log "Found ${count} SGID binaries"
+}
+
+scan_nobody() {
+ section_header "Files Owned by nobody/nogroup"
+
+ printf " ${BOLD}%-4s %-11s %-20s %-6s${RESET} %s\n" "PERM" "MODE" "OWNER:GROUP" "TYPE" "PATH"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ local count=0
+ local exclude_args
+ exclude_args=$(build_exclude_args)
+
+ for dir in $SCAN_DIRS; do
+ [[ -d "$dir" ]] || continue
+ # shellcheck disable=SC2086
+ while IFS= read -r file; do
+ [[ -z "$file" ]] && continue
+ print_file_entry "$YELLOW" "$file"
+ count=$((count + 1))
+ done < <(find "$dir" -xdev \( -user nobody -o -group nogroup \) $exclude_args 2>/dev/null)
+ done
+
+ COUNT_NOBODY=$count
+ echo ""
+ log "Found ${count} files owned by nobody/nogroup"
+}
+
+scan_unowned() {
+ section_header "Files With No Valid Owner"
+
+ printf " ${BOLD}%-4s %-11s %-20s %-6s${RESET} %s\n" "PERM" "MODE" "OWNER:GROUP" "TYPE" "PATH"
+ printf " %s\n" "$(printf '%.0s─' {1..78})"
+
+ local count=0
+ local exclude_args
+ exclude_args=$(build_exclude_args)
+
+ for dir in $SCAN_DIRS; do
+ [[ -d "$dir" ]] || continue
+ # shellcheck disable=SC2086
+ while IFS= read -r file; do
+ [[ -z "$file" ]] && continue
+ print_file_entry "$RED" "$file"
+ count=$((count + 1))
+ done < <(find "$dir" -xdev \( -nouser -o -nogroup \) $exclude_args 2>/dev/null)
+ done
+
+ COUNT_UNOWNED=$count
+ echo ""
+ log "Found ${count} files with no valid owner"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+
+print_summary() {
+ echo ""
+ echo -e " ${BOLD}══════════════════════════════════════════${RESET}"
+ echo -e " ${BOLD}Permissions Audit Summary${RESET}"
+ echo -e " ${BOLD}══════════════════════════════════════════${RESET}"
+ echo ""
+
+ printf " %-30s %b\n" "World-writable:" "${CYAN}${COUNT_WORLD_WRITABLE}${RESET}"
+ printf " %-30s %b\n" "SUID binaries:" "${RED}${COUNT_SUID}${RESET}"
+ printf " %-30s %b\n" "SGID binaries:" "${YELLOW}${COUNT_SGID}${RESET}"
+ printf " %-30s %b\n" "Owned by nobody/nogroup:" "${YELLOW}${COUNT_NOBODY}${RESET}"
+ printf " %-30s %b\n" "No valid owner:" "${RED}${COUNT_UNOWNED}${RESET}"
+
+ local total=$((COUNT_WORLD_WRITABLE + COUNT_SUID + COUNT_SGID + COUNT_NOBODY + COUNT_UNOWNED))
+ echo ""
+ printf " %-30s %d\n" "Total findings:" "$total"
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2
+ exit 1 ;;
+ esac
+ done
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+main() {
+ parse_args "$@"
+ setup_colors
+
+ echo ""
+ echo -e "${BOLD}File Permissions Audit — $(hostname -f 2>/dev/null || hostname)${RESET}"
+ echo -e "${DIM}$(date '+%Y-%m-%d %H:%M:%S %Z')${RESET}"
+ echo -e "${DIM}Scanning: ${SCAN_DIRS}${RESET}"
+
+ scan_world_writable
+ scan_suid
+ scan_sgid
+ scan_nobody
+ scan_unowned
+ print_summary
+}
+
+main "$@"
diff --git a/firewall-rule-diff.sh b/firewall-rule-diff.sh
new file mode 100644
index 0000000..8c52884
--- /dev/null
+++ b/firewall-rule-diff.sh
@@ -0,0 +1,620 @@
+#!/usr/bin/env bash
+
+######################################################################################
+#### firewall-rule-diff.sh — Detect firewall rule drift against a saved baseline ####
+#### Supports UFW, iptables, and nftables. Saves snapshots, diffs against ####
+#### baseline, exports Prometheus metrics via textfile collector. ####
+#### Requires: bash 4+, diff, coreutils ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### sudo ./firewall-rule-diff.sh --save ####
+#### sudo ./firewall-rule-diff.sh --check ####
+#### ####
+#### See --help for all options. ####
+######################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+MODE="" # save or check
+BACKEND="" # auto-detect: ufw, iptables, nftables
+BASELINE_DIR="/etc/firewall-baseline"
+MAX_AGE_DAYS=30
+TEXTFILE_MODE=false
+PROM_FILE="/var/lib/node_exporter/firewall_drift.prom"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+JUNIT_FILE="${JUNIT_FILE:-firewall-drift-results.xml}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+PASS=0
+FAIL=0
+WARN=0
+TOTAL=0
+RESULTS=()
+START_TIME=""
+RULES_ADDED=0
+RULES_REMOVED=0
+RULES_TOTAL=0
+DRIFT_DETECTED=0
+BASELINE_AGE=0
+DETECTED_BACKEND=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; }
+
+# ── Test Result Recording ─────────────────────────────────────────────
+record_pass() {
+ local name="$1"
+ local detail="${2:-}"
+ ((PASS++)) || true
+ ((TOTAL++)) || true
+ RESULTS+=("PASS|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "ok ${TOTAL} - ${name}"
+ elif [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e " ${GREEN}✓${RESET} ${name}${detail:+ — ${detail}}"
+ fi
+}
+
+record_fail() {
+ local name="$1"
+ local detail="${2:-}"
+ ((FAIL++)) || true
+ ((TOTAL++)) || true
+ RESULTS+=("FAIL|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "not ok ${TOTAL} - ${name}"
+ [[ -n "$detail" ]] && echo " # ${detail}"
+ elif [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e " ${RED}✗${RESET} ${name}${detail:+ — ${detail}}"
+ fi
+}
+
+record_warn() {
+ local name="$1"
+ local detail="${2:-}"
+ ((WARN++)) || true
+ ((TOTAL++)) || true
+ RESULTS+=("WARN|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "ok ${TOTAL} - ${name} # SKIP ${detail}"
+ elif [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e " ${YELLOW}⊘${RESET} ${name}${detail:+ — ${detail}}"
+ fi
+}
+
+# ── Help ──────────────────────────────────────────────────────────────
+show_help() {
+ cat <<'EOF'
+Usage: firewall-rule-diff.sh [OPTIONS]
+
+Detect firewall rule drift by comparing current state against a saved baseline.
+Supports UFW, iptables, and nftables with auto-detection.
+
+Modes:
+ --save Save current firewall rules as new baseline
+ --check Compare current rules against baseline (default)
+
+Options:
+ --backend BACKEND Force backend: ufw, iptables, nftables (default: auto-detect)
+ --baseline-dir PATH Baseline storage directory (default: /etc/firewall-baseline/)
+ --max-age DAYS Warn if baseline older than N days (default: 30)
+ --textfile Write Prometheus metrics to textfile collector
+ --prom-file PATH Textfile path (default: /var/lib/node_exporter/firewall_drift.prom)
+ --format FORMAT Output: text (default), tap, junit
+ --junit-file FILE JUnit output path (default: firewall-drift-results.xml)
+ --verbose Show debug output
+ --no-color Disable colored output
+ -h, --help Show this help
+
+Examples:
+ sudo ./firewall-rule-diff.sh --save
+ sudo ./firewall-rule-diff.sh --check
+ sudo ./firewall-rule-diff.sh --check --textfile
+ sudo ./firewall-rule-diff.sh --backend iptables --check
+ sudo ./firewall-rule-diff.sh --check --max-age 7
+EOF
+ exit 0
+}
+
+# ── Parse Arguments ───────────────────────────────────────────────────
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --save) MODE="save"; shift ;;
+ --check) MODE="check"; shift ;;
+ --backend) BACKEND="$2"; shift 2 ;;
+ --baseline-dir) BASELINE_DIR="$2"; shift 2 ;;
+ --max-age) MAX_AGE_DAYS="$2"; shift 2 ;;
+ --textfile) TEXTFILE_MODE=true; shift ;;
+ --prom-file) PROM_FILE="$2"; shift 2 ;;
+ --format) OUTPUT_FORMAT="$2"; shift 2 ;;
+ --junit-file) JUNIT_FILE="$2"; shift 2 ;;
+ --verbose) VERBOSE=true; shift ;;
+ --no-color) COLOR="never"; shift ;;
+ -h|--help) show_help ;;
+ *) err "Unknown option: $1"; echo "Run with --help for usage."; exit 1 ;;
+ esac
+ done
+
+ if [[ -z "$MODE" ]]; then
+ MODE="check"
+ fi
+}
+
+# ── Detect Backend ────────────────────────────────────────────────────
+detect_backend() {
+ if [[ -n "$BACKEND" ]]; then
+ DETECTED_BACKEND="$BACKEND"
+ verbose "Backend forced: ${DETECTED_BACKEND}"
+ return
+ fi
+
+ if command -v ufw &>/dev/null && ufw status &>/dev/null; then
+ local ufw_status
+ ufw_status=$(ufw status 2>/dev/null | head -1)
+ if [[ "$ufw_status" == *"active"* ]]; then
+ DETECTED_BACKEND="ufw"
+ verbose "Detected active UFW"
+ return
+ fi
+ fi
+
+ if command -v nft &>/dev/null; then
+ local nft_rules
+ nft_rules=$(nft list ruleset 2>/dev/null | wc -l)
+ if [[ "$nft_rules" -gt 0 ]]; then
+ DETECTED_BACKEND="nftables"
+ verbose "Detected nftables with ${nft_rules} lines"
+ return
+ fi
+ fi
+
+ if command -v iptables-save &>/dev/null; then
+ DETECTED_BACKEND="iptables"
+ verbose "Detected iptables"
+ return
+ fi
+
+ err "No supported firewall backend found (ufw, nftables, iptables)"
+ exit 1
+}
+
+# ── Snapshot Functions ────────────────────────────────────────────────
+snapshot_ufw() {
+ local dir="$1"
+ ufw status numbered > "${dir}/ufw-status.txt" 2>/dev/null || true
+ ufw status verbose > "${dir}/ufw-verbose.txt" 2>/dev/null || true
+ if [[ -f /etc/ufw/user.rules ]]; then
+ cp /etc/ufw/user.rules "${dir}/user.rules"
+ fi
+ if [[ -f /etc/ufw/user6.rules ]]; then
+ cp /etc/ufw/user6.rules "${dir}/user6.rules"
+ fi
+ # count rules from numbered output (skip header lines)
+ RULES_TOTAL=$(grep -cE '^\[' "${dir}/ufw-status.txt" 2>/dev/null) || RULES_TOTAL=0
+ verbose "UFW snapshot: ${RULES_TOTAL} rules"
+}
+
+snapshot_iptables() {
+ local dir="$1"
+ iptables-save > "${dir}/iptables-v4.rules" 2>/dev/null || true
+ if command -v ip6tables-save &>/dev/null; then
+ ip6tables-save > "${dir}/iptables-v6.rules" 2>/dev/null || true
+ fi
+ # count non-comment, non-empty lines
+ RULES_TOTAL=$(grep -cvE '^(#|$|\*|COMMIT|:)' "${dir}/iptables-v4.rules" 2>/dev/null) || RULES_TOTAL=0
+ if [[ -f "${dir}/iptables-v6.rules" ]]; then
+ local v6_count
+ v6_count=$(grep -cvE '^(#|$|\*|COMMIT|:)' "${dir}/iptables-v6.rules" 2>/dev/null) || v6_count=0
+ RULES_TOTAL=$((RULES_TOTAL + v6_count))
+ fi
+ verbose "iptables snapshot: ${RULES_TOTAL} rules"
+}
+
+snapshot_nftables() {
+ local dir="$1"
+ nft list ruleset > "${dir}/nftables.rules" 2>/dev/null || true
+ RULES_TOTAL=$(grep -cE '^\s+(rule|chain|table)' "${dir}/nftables.rules" 2>/dev/null) || RULES_TOTAL=0
+ verbose "nftables snapshot: ${RULES_TOTAL} lines"
+}
+
+take_snapshot() {
+ local dir="$1"
+ case "$DETECTED_BACKEND" in
+ ufw) snapshot_ufw "$dir" ;;
+ iptables) snapshot_iptables "$dir" ;;
+ nftables) snapshot_nftables "$dir" ;;
+ esac
+ echo "$DETECTED_BACKEND" > "${dir}/backend.txt"
+ date +%s > "${dir}/timestamp.txt"
+ date -Is > "${dir}/timestamp-human.txt"
+}
+
+# ── Save Mode ─────────────────────────────────────────────────────────
+do_save() {
+ mkdir -p "${BASELINE_DIR}"
+ local snapshot_dir="${BASELINE_DIR}/baseline"
+
+ # clean up any previous baseline
+ if [[ -d "$snapshot_dir" ]]; then
+ local prev_ts
+ prev_ts=$(cat "${snapshot_dir}/timestamp.txt" 2>/dev/null || echo "unknown")
+ local archive_dir="${BASELINE_DIR}/archive-${prev_ts}"
+ mv "$snapshot_dir" "$archive_dir" 2>/dev/null || rm -rf "$snapshot_dir"
+ verbose "Archived previous baseline"
+ fi
+
+ mkdir -p "$snapshot_dir"
+
+ verbose "Taking ${DETECTED_BACKEND} snapshot to ${snapshot_dir}"
+ take_snapshot "$snapshot_dir"
+
+ if [[ ! -f "${snapshot_dir}/backend.txt" ]]; then
+ err "Snapshot failed — ${snapshot_dir}/backend.txt not created"
+ exit 1
+ fi
+
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e "${BOLD}Firewall Rule Diff${RESET}"
+ echo "Backend: ${DETECTED_BACKEND}"
+ echo "Baseline: ${snapshot_dir}"
+ echo "Time: $(cat "${snapshot_dir}/timestamp-human.txt")"
+ echo "Rules: ${RULES_TOTAL}"
+ echo ""
+ echo -e " ${GREEN}✓${RESET} Baseline saved — ${RULES_TOTAL} rules (${DETECTED_BACKEND})"
+ elif [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "1..1"
+ echo "ok 1 - Baseline saved (${RULES_TOTAL} rules, ${DETECTED_BACKEND})"
+ fi
+}
+
+# ── Check Mode ────────────────────────────────────────────────────────
+do_check() {
+ START_TIME=$(date +%s)
+ local baseline_dir="${BASELINE_DIR}/baseline"
+
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo -e "${BOLD}Firewall Rule Diff${RESET}"
+ echo "Backend: ${DETECTED_BACKEND}"
+ echo "Baseline: ${baseline_dir}"
+ echo "Time: $(date -Is)"
+ echo ""
+ elif [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "TAP version 13"
+ fi
+
+ # check baseline exists
+ if [[ ! -d "$baseline_dir" ]]; then
+ record_fail "Baseline exists" "no baseline found — run with --save first"
+ DRIFT_DETECTED=1
+ print_summary
+ return
+ fi
+
+ # check backend matches
+ local baseline_backend
+ baseline_backend=$(cat "${baseline_dir}/backend.txt" 2>/dev/null || echo "unknown")
+ if [[ "$baseline_backend" != "$DETECTED_BACKEND" ]]; then
+ record_fail "Backend match" "baseline uses ${baseline_backend}, current is ${DETECTED_BACKEND}"
+ DRIFT_DETECTED=1
+ else
+ record_pass "Backend match" "${DETECTED_BACKEND}"
+ fi
+
+ # check baseline age
+ local baseline_ts
+ baseline_ts=$(cat "${baseline_dir}/timestamp.txt" 2>/dev/null || echo "0")
+ local now
+ now=$(date +%s)
+ BASELINE_AGE=$((now - baseline_ts))
+ local age_days=$((BASELINE_AGE / 86400))
+
+ if [[ $age_days -gt $MAX_AGE_DAYS ]]; then
+ record_warn "Baseline age" "${age_days} days old (threshold: ${MAX_AGE_DAYS})"
+ else
+ record_pass "Baseline age" "${age_days} days old"
+ fi
+
+ # take current snapshot to temp dir
+ local tmp_dir
+ tmp_dir=$(mktemp -d)
+ trap 'rm -rf "'"$tmp_dir"'"' EXIT
+ take_snapshot "$tmp_dir"
+
+ if [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo ""
+ echo -e "${BOLD}Rule Comparison${RESET}"
+ fi
+
+ # diff based on backend
+ case "$DETECTED_BACKEND" in
+ ufw) diff_ufw "$baseline_dir" "$tmp_dir" ;;
+ iptables) diff_iptables "$baseline_dir" "$tmp_dir" ;;
+ nftables) diff_nftables "$baseline_dir" "$tmp_dir" ;;
+ esac
+
+ print_summary
+}
+
+# ── Diff Functions ────────────────────────────────────────────────────
+diff_rules_file() {
+ local label="$1"
+ local baseline_file="$2"
+ local current_file="$3"
+
+ if [[ ! -f "$baseline_file" ]] && [[ ! -f "$current_file" ]]; then
+ verbose "Both files missing for ${label} — skipping"
+ return
+ fi
+
+ if [[ ! -f "$baseline_file" ]]; then
+ record_fail "${label}" "file missing from baseline but present now"
+ DRIFT_DETECTED=1
+ return
+ fi
+
+ if [[ ! -f "$current_file" ]]; then
+ record_fail "${label}" "file present in baseline but missing now"
+ DRIFT_DETECTED=1
+ return
+ fi
+
+ local diff_output
+ diff_output=$(diff --unified=0 "$baseline_file" "$current_file" 2>/dev/null) || true
+
+ if [[ -z "$diff_output" ]]; then
+ record_pass "${label}" "no changes"
+ return
+ fi
+
+ DRIFT_DETECTED=1
+
+ local added removed
+ added=$(echo "$diff_output" | grep -c '^+[^+]' 2>/dev/null) || added=0
+ removed=$(echo "$diff_output" | grep -c '^-[^-]' 2>/dev/null) || removed=0
+
+ RULES_ADDED=$((RULES_ADDED + added))
+ RULES_REMOVED=$((RULES_REMOVED + removed))
+
+ record_fail "${label}" "${added} added, ${removed} removed"
+
+ if [[ "$VERBOSE" == "true" || "$OUTPUT_FORMAT" == "text" ]]; then
+ # show the actual diff lines (limit to 20 lines)
+ local count=0
+ while IFS= read -r line; do
+ if [[ "$line" == +* && "$line" != +++* ]]; then
+ echo -e " ${GREEN}${line}${RESET}"
+ ((count++)) || true
+ elif [[ "$line" == -* && "$line" != ---* ]]; then
+ echo -e " ${RED}${line}${RESET}"
+ ((count++)) || true
+ fi
+ [[ $count -ge 20 ]] && { echo " ... (truncated)"; break; }
+ done <<< "$diff_output"
+ fi
+}
+
+diff_ufw() {
+ local baseline="$1"
+ local current="$2"
+
+ diff_rules_file "UFW status" "${baseline}/ufw-status.txt" "${current}/ufw-status.txt"
+ diff_rules_file "UFW IPv4 rules" "${baseline}/user.rules" "${current}/user.rules"
+ diff_rules_file "UFW IPv6 rules" "${baseline}/user6.rules" "${current}/user6.rules"
+
+ # rule count comparison
+ local baseline_count current_count
+ baseline_count=$(grep -cE '^\[' "${baseline}/ufw-status.txt" 2>/dev/null) || baseline_count=0
+ current_count=$(grep -cE '^\[' "${current}/ufw-status.txt" 2>/dev/null) || current_count=0
+
+ if [[ $baseline_count -ne $current_count ]]; then
+ record_fail "Rule count" "baseline: ${baseline_count}, current: ${current_count}"
+ DRIFT_DETECTED=1
+ else
+ record_pass "Rule count" "${current_count} rules"
+ fi
+}
+
+diff_iptables() {
+ local baseline="$1"
+ local current="$2"
+
+ diff_rules_file "iptables IPv4 rules" "${baseline}/iptables-v4.rules" "${current}/iptables-v4.rules"
+ diff_rules_file "iptables IPv6 rules" "${baseline}/iptables-v6.rules" "${current}/iptables-v6.rules"
+
+ # chain count comparison
+ local baseline_chains current_chains
+ baseline_chains=$(grep -cE '^:' "${baseline}/iptables-v4.rules" 2>/dev/null) || baseline_chains=0
+ current_chains=$(grep -cE '^:' "${current}/iptables-v4.rules" 2>/dev/null) || current_chains=0
+
+ if [[ $baseline_chains -ne $current_chains ]]; then
+ record_fail "Chain count (IPv4)" "baseline: ${baseline_chains}, current: ${current_chains}"
+ DRIFT_DETECTED=1
+ else
+ record_pass "Chain count (IPv4)" "${current_chains} chains"
+ fi
+}
+
+diff_nftables() {
+ local baseline="$1"
+ local current="$2"
+
+ diff_rules_file "nftables ruleset" "${baseline}/nftables.rules" "${current}/nftables.rules"
+
+ # table count comparison
+ local baseline_tables current_tables
+ baseline_tables=$(grep -c '^table' "${baseline}/nftables.rules" 2>/dev/null) || baseline_tables=0
+ current_tables=$(grep -c '^table' "${current}/nftables.rules" 2>/dev/null) || current_tables=0
+
+ if [[ $baseline_tables -ne $current_tables ]]; then
+ record_fail "Table count" "baseline: ${baseline_tables}, current: ${current_tables}"
+ DRIFT_DETECTED=1
+ else
+ record_pass "Table count" "${current_tables} tables"
+ fi
+}
+
+# ── Summary ───────────────────────────────────────────────────────────
+print_summary() {
+ local end_time
+ end_time=$(date +%s)
+ local elapsed=$((end_time - START_TIME))
+
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "1..${TOTAL}"
+ elif [[ "$OUTPUT_FORMAT" == "text" ]]; then
+ echo ""
+ echo "────────────────────────────────────────"
+ echo -e "${BOLD}Summary${RESET} ${DETECTED_BACKEND}"
+ echo -e " ${PASS} passed ${FAIL} failed ${WARN} skipped (${elapsed}s)"
+ if [[ $DRIFT_DETECTED -eq 1 ]]; then
+ echo -e " Rules added: ${RULES_ADDED} removed: ${RULES_REMOVED}"
+ echo -e " ${RED}Drift detected.${RESET}"
+ else
+ echo -e " ${GREEN}No drift detected.${RESET}"
+ fi
+ echo "────────────────────────────────────────"
+ fi
+
+ if [[ "$OUTPUT_FORMAT" == "junit" ]]; then
+ write_junit
+ fi
+
+ if [[ "$TEXTFILE_MODE" == "true" ]]; then
+ write_prometheus
+ fi
+}
+
+# ── JUnit Output ──────────────────────────────────────────────────────
+write_junit() {
+ local end_time
+ end_time=$(date +%s)
+ local elapsed=$((end_time - START_TIME))
+
+ {
+ echo ''
+ echo ""
+ echo " "
+
+ for result in "${RESULTS[@]}"; do
+ local status name detail
+ status=$(echo "$result" | cut -d'|' -f1)
+ name=$(echo "$result" | cut -d'|' -f2)
+ detail=$(echo "$result" | cut -d'|' -f3)
+
+ # escape XML
+ name="${name//&/&}"
+ name="${name//<}"
+ name="${name//>/>}"
+ detail="${detail//&/&}"
+ detail="${detail//<}"
+ detail="${detail//>/>}"
+
+ echo " "
+ if [[ "$status" == "FAIL" ]]; then
+ echo " "
+ elif [[ "$status" == "WARN" ]]; then
+ echo " "
+ fi
+ echo " "
+ done
+
+ echo " "
+ echo ""
+ } > "$JUNIT_FILE"
+
+ verbose "JUnit report written to ${JUNIT_FILE}"
+}
+
+# ── Prometheus Output ─────────────────────────────────────────────────
+write_prometheus() {
+ local prom_dir
+ prom_dir=$(dirname "$PROM_FILE")
+ if [[ ! -d "$prom_dir" ]]; then
+ warn "Prometheus textfile directory does not exist: ${prom_dir}"
+ return
+ fi
+
+ local tmp_file="${PROM_FILE}.$$"
+ {
+ echo "# HELP firewall_drift_detected Whether firewall rules differ from baseline"
+ echo "# TYPE firewall_drift_detected gauge"
+ echo "firewall_drift_detected ${DRIFT_DETECTED}"
+ echo "# HELP firewall_rules_added Rules added since baseline"
+ echo "# TYPE firewall_rules_added gauge"
+ echo "firewall_rules_added ${RULES_ADDED}"
+ echo "# HELP firewall_rules_removed Rules removed since baseline"
+ echo "# TYPE firewall_rules_removed gauge"
+ echo "firewall_rules_removed ${RULES_REMOVED}"
+ echo "# HELP firewall_rules_total Current total firewall rules"
+ echo "# TYPE firewall_rules_total gauge"
+ echo "firewall_rules_total ${RULES_TOTAL}"
+ echo "# HELP firewall_baseline_age_seconds Seconds since baseline was saved"
+ echo "# TYPE firewall_baseline_age_seconds gauge"
+ echo "firewall_baseline_age_seconds ${BASELINE_AGE}"
+ echo "# HELP firewall_scan_timestamp Unix timestamp of last scan"
+ echo "# TYPE firewall_scan_timestamp gauge"
+ echo "firewall_scan_timestamp $(date +%s)"
+ echo "# HELP firewall_backend Active firewall backend"
+ echo "# TYPE firewall_backend gauge"
+ echo "firewall_backend{backend=\"${DETECTED_BACKEND}\"} 1"
+ } > "$tmp_file"
+
+ mv "$tmp_file" "$PROM_FILE"
+ verbose "Prometheus metrics written to ${PROM_FILE}"
+}
+
+# ── Main ──────────────────────────────────────────────────────────────
+main() {
+ setup_colors
+ parse_args "$@"
+ setup_colors # re-apply after --no-color
+
+ if [[ $EUID -ne 0 ]]; then
+ err "This script must be run as root."
+ exit 1
+ fi
+
+ detect_backend
+
+ case "$MODE" in
+ save) do_save ;;
+ check) do_check ;;
+ esac
+
+ if [[ $FAIL -gt 0 ]]; then
+ exit 1
+ fi
+ exit 0
+}
+
+main "$@"
diff --git a/fix-code-server-nginx.sh b/fix-code-server-nginx.sh
new file mode 100755
index 0000000..9b4c7fe
--- /dev/null
+++ b/fix-code-server-nginx.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+###############################################################
+#### Fix code-server Nginx Config ####
+#### Applies X-Frame-Options + query-filter fixes ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version: 1.0 ####
+#### ####
+#### Usage: sudo ./fix-code-server-nginx.sh [domain] ####
+###############################################################
+set -euo pipefail
+
+if [[ $EUID -ne 0 ]]; then
+ echo "ERROR: This script must be run as root (sudo)."
+ exit 1
+fi
+
+DOMAIN="${1:-}"
+if [[ -z "$DOMAIN" ]]; then
+ read -rp "Enter the code-server domain (e.g. code.mydomain.com): " DOMAIN
+fi
+
+HEADERS_FILE="/etc/nginx/snippets/security-headers-${DOMAIN}.conf"
+SITE_CONF="/etc/nginx/conf.d/code-server.conf"
+
+echo "=== Fix 1: X-Frame-Options DENY -> SAMEORIGIN ==="
+if [[ -f "$HEADERS_FILE" ]]; then
+ if grep -q 'X-Frame-Options "DENY"' "$HEADERS_FILE"; then
+ sed -i 's/X-Frame-Options "DENY"/X-Frame-Options "SAMEORIGIN"/' "$HEADERS_FILE"
+ echo " Updated: $HEADERS_FILE"
+ else
+ echo " Already set to SAMEORIGIN (or not found) in $HEADERS_FILE — skipping."
+ fi
+else
+ echo " WARNING: $HEADERS_FILE not found. Skipping."
+fi
+
+echo ""
+echo "=== Fix 2: Disable query-filter snippet ==="
+QUERY_FILTER="snippets/query-filter-${DOMAIN}.conf"
+if [[ -f "$SITE_CONF" ]]; then
+ if grep -qE "^\s*include\s+${QUERY_FILTER}" "$SITE_CONF"; then
+ sed -i "s|^\(\s*\)include ${QUERY_FILTER};|\1# Disabled: breaks VS Code extensions\n\1# include ${QUERY_FILTER};|" "$SITE_CONF"
+ echo " Commented out query filter in: $SITE_CONF"
+ else
+ echo " Query filter already disabled (or not found) in $SITE_CONF — skipping."
+ fi
+else
+ echo " WARNING: $SITE_CONF not found. Skipping."
+fi
+
+echo ""
+echo "=== Testing and reloading nginx ==="
+if nginx -t; then
+ systemctl reload nginx
+ echo " Nginx reloaded successfully."
+else
+ echo " ERROR: nginx config test failed. Check the files manually."
+ exit 1
+fi
+
+echo ""
+echo "Done! Reload your code-server browser tab to verify."
diff --git a/freeradius-exporter.sh b/freeradius-exporter.sh
new file mode 100644
index 0000000..783c5a5
--- /dev/null
+++ b/freeradius-exporter.sh
@@ -0,0 +1,395 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### freeradius-exporter.sh — Prometheus metrics exporter for FreeRADIUS ####
+#### Exports authentication, accounting, and proxy statistics from the ####
+#### FreeRADIUS status server as Prometheus metrics ####
+#### Requires: bash 4+, radclient ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./freeradius-exporter.sh --http --port 9620 ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -uo pipefail
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+RADIUS_HOST="${RADIUS_HOST:-localhost}"
+RADIUS_STATUS_PORT="${RADIUS_STATUS_PORT:-18121}"
+RADIUS_SECRET="${RADIUS_SECRET:-adminsecret}"
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9620
+
+EXPORTER_VERSION="1.00"
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Query the FreeRADIUS status server via radclient
+# Returns: raw attribute-value pair output, or empty on failure
+query_status_server() {
+ echo "Message-Authenticator = 0x00, FreeRADIUS-Statistics-Type = All" \
+ | radclient "${RADIUS_HOST}:${RADIUS_STATUS_PORT}" status "${RADIUS_SECRET}" 2>/dev/null
+}
+
+# Extract a numeric value from radclient output
+# Args: $1 - attribute name, $2 - radclient output
+# Returns: numeric value or 0 if not found
+extract_value() {
+ local attr="$1"
+ local data="$2"
+ local val
+ val=$(echo "$data" | grep -F "$attr" | awk -F'= ' '{print $2}' | tr -d '[:space:]')
+ echo "${val:-0}"
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+# Generate all Prometheus metrics
+# Returns: Prometheus text format metrics on stdout
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check radclient is available
+ if ! command -v radclient >/dev/null 2>&1; then
+ echo "ERROR: radclient command not found" >&2
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ while true; do
+ {
+ read -r request
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+FreeRADIUS Exporter v${EXPORTER_VERSION}
+
+FreeRADIUS Prometheus Exporter v${EXPORTER_VERSION}
+Metrics
+Authentication, accounting, and proxy statistics from the FreeRADIUS status server.
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+main() {
+ parse_args "$@"
+
+ if [[ "$HTTP_MODE" = true ]]; then
+ run_http_server
+ elif [[ -n "$OUTPUT_FILE" ]]; then
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.freeradius_metrics.XXXXXX")
+
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [[ "$file_lines" -lt 5 ]]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ chmod 644 "$temp_file"
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ generate_metrics
+ fi
+}
+
+main "$@"
diff --git a/game-server-exporter.sh b/game-server-exporter.sh
new file mode 100755
index 0000000..1f66285
--- /dev/null
+++ b/game-server-exporter.sh
@@ -0,0 +1,624 @@
+#!/bin/bash
+################################################################################
+# Script Name: game-server-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for game servers providing operational
+# metrics — Minecraft, Valheim, and Palworld player counts,
+# server status, TPS, query response times, and server version info
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - nmap-ncat (nc) for network queries
+# - curl for REST API queries (Palworld)
+# - python3 with mcstatus (optional, enhanced Minecraft metrics)
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# # Output to stdout
+# ./game-server-exporter.sh
+#
+# # HTTP server mode
+# ./game-server-exporter.sh --http -p 9195
+#
+# # Textfile collector mode
+# ./game-server-exporter.sh --textfile
+#
+# # Custom server addresses
+# ./game-server-exporter.sh --minecraft-host mc.example.com
+#
+# Metrics Exported:
+# - game_server_up{game,server} - Server reachability (1=up, 0=down)
+# - game_server_players_online{game,server} - Online player count
+# - game_server_players_max{game,server} - Maximum player slots
+# - game_server_info{game,server,version,motd} - Server version info
+# - game_server_tps{game="minecraft",server} - Ticks per second (Minecraft)
+# - game_server_query_duration_seconds{game,server} - Query time per server
+# - game_server_exporter_duration_seconds - Total script execution time
+# - game_server_exporter_last_run_timestamp - Last run timestamp
+#
+# Configuration:
+# Default HTTP port: 9195
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9195
+
+# Server configuration
+MINECRAFT_HOST=""
+MINECRAFT_QUERY_PORT=25565
+MINECRAFT_RCON_PORT=25575
+MINECRAFT_RCON_PASS=""
+VALHEIM_HOST=""
+VALHEIM_QUERY_PORT=2457
+PALWORLD_HOST=""
+PALWORLD_QUERY_PORT=8212
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Check prerequisites
+# Returns: 0 if OK, 1 if error
+check_prerequisites() {
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: nc (nmap-ncat) not found" >&2
+ return 1
+ fi
+
+ if [ -n "$PALWORLD_HOST" ] && ! command -v curl >/dev/null 2>&1; then
+ echo "ERROR: curl not found (required for Palworld REST API)" >&2
+ return 1
+ fi
+
+ return 0
+}
+
+# Escape special characters in Prometheus label values
+# Args: $1 - string to escape
+# Returns: escaped string safe for Prometheus labels
+prom_escape() {
+ local val="$1"
+ val="${val//\\/\\\\}"
+ val="${val//\"/\\\"}"
+ val="${val//$'\n'/}"
+ echo "$val"
+}
+
+# ============================================================================
+# GAME SERVER QUERY FUNCTIONS
+# ============================================================================
+
+# Query Minecraft server using python3 mcstatus or basic TCP check
+# Args: $1 - host, $2 - port
+# Sets global variables: mc_up, mc_players_online, mc_players_max, mc_version, mc_motd, mc_tps, mc_query_duration
+query_minecraft() {
+ local host="$1"
+ local port="$2"
+ local query_start query_end
+
+ mc_up=0
+ mc_players_online=0
+ mc_players_max=0
+ mc_version="unknown"
+ mc_motd="unknown"
+ mc_tps=""
+ mc_query_duration=0
+
+ query_start=$(date +%s%N)
+
+ # Try python3 mcstatus first (most reliable)
+ if command -v python3 >/dev/null 2>&1; then
+ local py_result
+ py_result=$(python3 -c "
+import sys
+try:
+ from mcstatus import JavaServer
+ server = JavaServer.lookup('${host}:${port}', timeout=5)
+ status = server.status()
+ print('UP')
+ print(status.players.online)
+ print(status.players.max)
+ print(status.version.name)
+ desc = status.description
+ if isinstance(desc, dict):
+ desc = desc.get('text', 'unknown')
+ print(str(desc).replace(chr(10), ' '))
+except ImportError:
+ print('NO_MCSTATUS')
+except Exception as e:
+ print('DOWN')
+" 2>/dev/null) || true
+
+ local first_line
+ first_line=$(echo "$py_result" | head -1)
+
+ if [ "$first_line" = "UP" ]; then
+ mc_up=1
+ mc_players_online=$(echo "$py_result" | sed -n '2p')
+ mc_players_max=$(echo "$py_result" | sed -n '3p')
+ mc_version=$(echo "$py_result" | sed -n '4p')
+ mc_motd=$(echo "$py_result" | sed -n '5p')
+ elif [ "$first_line" != "NO_MCSTATUS" ]; then
+ # mcstatus available but server is down
+ query_end=$(date +%s%N)
+ mc_query_duration=$(( (query_end - query_start) / 1000000000 ))
+ return
+ fi
+ fi
+
+ # Fallback: basic TCP check if mcstatus not available or not tried yet
+ if [ "$mc_up" -eq 0 ] && [ -z "${py_result:-}" ] || { [ -n "${first_line:-}" ] && [ "$first_line" = "NO_MCSTATUS" ]; }; then
+ if nc -z -w 3 "$host" "$port" 2>/dev/null; then
+ mc_up=1
+ # Try to read SLP response for basic info
+ local slp_response
+ slp_response=$(printf '\xfe\x01' | nc -w 3 "$host" "$port" 2>/dev/null | strings 2>/dev/null) || true
+ if [ -n "$slp_response" ]; then
+ # Legacy SLP response: §1\0\0\0\0\0
+ mc_version=$(echo "$slp_response" | tr '\0' '\n' | sed -n '4p' 2>/dev/null) || mc_version="unknown"
+ mc_motd=$(echo "$slp_response" | tr '\0' '\n' | sed -n '5p' 2>/dev/null) || mc_motd="unknown"
+ mc_players_online=$(echo "$slp_response" | tr '\0' '\n' | sed -n '6p' 2>/dev/null) || mc_players_online=0
+ mc_players_max=$(echo "$slp_response" | tr '\0' '\n' | sed -n '7p' 2>/dev/null) || mc_players_max=0
+ # Sanitize numeric values
+ [[ "$mc_players_online" =~ ^[0-9]+$ ]] || mc_players_online=0
+ [[ "$mc_players_max" =~ ^[0-9]+$ ]] || mc_players_max=0
+ fi
+ fi
+ fi
+
+ # Try RCON for TPS if credentials are provided and server is up
+ if [ "$mc_up" -eq 1 ] && [ -n "$MINECRAFT_RCON_PASS" ]; then
+ local tps_result
+ tps_result=$(python3 -c "
+import sys
+try:
+ from mcrcon import MCRcon
+ with MCRcon('${host}', '${MINECRAFT_RCON_PASS}', port=${MINECRAFT_RCON_PORT}) as mcr:
+ resp = mcr.command('tps')
+ # Parse TPS from response (e.g., '§6TPS from last 1m, 5m, 15m: §a20.0, §a20.0, §a20.0')
+ import re
+ nums = re.findall(r'[\d.]+', resp)
+ if nums:
+ print(nums[-1]) # Last TPS value (15m average)
+except Exception:
+ pass
+" 2>/dev/null) || true
+ if [ -n "$tps_result" ]; then
+ mc_tps="$tps_result"
+ fi
+ fi
+
+ query_end=$(date +%s%N)
+ mc_query_duration=$(( (query_end - query_start) / 1000000000 ))
+}
+
+# Query Valheim server using Steam A2S protocol or TCP fallback
+# Args: $1 - host, $2 - port
+# Sets global variables: vh_up, vh_players_online, vh_players_max, vh_version, vh_motd, vh_query_duration
+query_valheim() {
+ local host="$1"
+ local port="$2"
+ local query_start query_end
+
+ vh_up=0
+ vh_players_online=0
+ vh_players_max=0
+ vh_version="unknown"
+ vh_motd="unknown"
+ vh_query_duration=0
+
+ query_start=$(date +%s%N)
+
+ # Try python3 A2S query first (Steam query protocol)
+ if command -v python3 >/dev/null 2>&1; then
+ local py_result
+ py_result=$(python3 -c "
+import sys
+try:
+ import a2s
+ address = ('${host}', ${port})
+ info = a2s.info(address, timeout=5)
+ print('UP')
+ print(info.player_count)
+ print(info.max_players)
+ print(info.version)
+ print(info.server_name.replace(chr(10), ' '))
+except ImportError:
+ print('NO_A2S')
+except Exception:
+ print('DOWN')
+" 2>/dev/null) || true
+
+ local first_line
+ first_line=$(echo "$py_result" | head -1)
+
+ if [ "$first_line" = "UP" ]; then
+ vh_up=1
+ vh_players_online=$(echo "$py_result" | sed -n '2p')
+ vh_players_max=$(echo "$py_result" | sed -n '3p')
+ vh_version=$(echo "$py_result" | sed -n '4p')
+ vh_motd=$(echo "$py_result" | sed -n '5p')
+ elif [ "$first_line" != "NO_A2S" ]; then
+ query_end=$(date +%s%N)
+ vh_query_duration=$(( (query_end - query_start) / 1000000000 ))
+ return
+ fi
+ fi
+
+ # Fallback: TCP port check on game port (query port - 1 is typically the game port)
+ if [ "$vh_up" -eq 0 ]; then
+ local game_port=$((port - 1))
+ if nc -z -w 3 "$host" "$game_port" 2>/dev/null || nc -z -w 3 "$host" "$port" 2>/dev/null; then
+ vh_up=1
+ fi
+ fi
+
+ query_end=$(date +%s%N)
+ vh_query_duration=$(( (query_end - query_start) / 1000000000 ))
+}
+
+# Query Palworld server using REST API or TCP fallback
+# Args: $1 - host, $2 - port
+# Sets global variables: pw_up, pw_players_online, pw_players_max, pw_version, pw_motd, pw_query_duration
+query_palworld() {
+ local host="$1"
+ local port="$2"
+ local query_start query_end
+
+ pw_up=0
+ pw_players_online=0
+ pw_players_max=0
+ pw_version="unknown"
+ pw_motd="unknown"
+ pw_query_duration=0
+
+ query_start=$(date +%s%N)
+
+ # Try REST API query first
+ if command -v curl >/dev/null 2>&1; then
+ local api_response
+ api_response=$(curl -s -m 5 "http://${host}:${port}/v1/api/info" 2>/dev/null) || true
+
+ if [ -n "$api_response" ] && command -v python3 >/dev/null 2>&1; then
+ local parse_result
+ parse_result=$(python3 -c "
+import json, sys
+try:
+ data = json.loads('''${api_response}''')
+ print('UP')
+ print(data.get('currentPlayerNum', 0))
+ print(data.get('maxPlayerNum', 0))
+ print(data.get('version', 'unknown'))
+ print(data.get('serverName', 'unknown').replace(chr(10), ' '))
+except Exception:
+ print('PARSE_FAIL')
+" 2>/dev/null) || true
+
+ local first_line
+ first_line=$(echo "$parse_result" | head -1)
+
+ if [ "$first_line" = "UP" ]; then
+ pw_up=1
+ pw_players_online=$(echo "$parse_result" | sed -n '2p')
+ pw_players_max=$(echo "$parse_result" | sed -n '3p')
+ pw_version=$(echo "$parse_result" | sed -n '4p')
+ pw_motd=$(echo "$parse_result" | sed -n '5p')
+ fi
+ elif [ -n "$api_response" ]; then
+ # curl got a response but no python3 to parse JSON
+ pw_up=1
+ fi
+ fi
+
+ # Fallback: TCP port check
+ if [ "$pw_up" -eq 0 ]; then
+ if nc -z -w 3 "$host" "$port" 2>/dev/null; then
+ pw_up=1
+ fi
+ fi
+
+ query_end=$(date +%s%N)
+ pw_query_duration=$(( (query_end - query_start) / 1000000000 ))
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+# Generate all Prometheus metrics
+# Returns: Prometheus text format metrics on stdout
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check prerequisites
+ if ! check_prerequisites; then
+ return
+ fi
+
+ # Check that at least one server is configured
+ if [ -z "$MINECRAFT_HOST" ] && [ -z "$VALHEIM_HOST" ] && [ -z "$PALWORLD_HOST" ]; then
+ echo "# No game servers configured. Use --minecraft-host, --valheim-host, or --palworld-host" >&2
+ return
+ fi
+
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ # Infinite loop accepting HTTP requests
+ while true; do
+ {
+ read -r request
+ # Check if request is for /metrics endpoint
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else # Serve HTML landing page for other requests
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+Game Server Exporter v1.0
+
+Game Server Prometheus Exporter v1.0
+Metrics
+Operational metrics from Minecraft, Valheim, and Palworld servers.
+
+
+EOF
+ fi
+ } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
+ done
+}
+
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+
+# Main entry point - routes to appropriate output mode
+main() {
+ parse_args "$@"
+
+ if [ "$HTTP_MODE" = true ]; then
+ # Run HTTP server (blocks until killed)
+ run_http_server
+ elif [ -n "$OUTPUT_FILE" ]; then
+ # Textfile collector mode: write atomically using temp file
+ local output_dir
+ output_dir="$(dirname "$OUTPUT_FILE")"
+ mkdir -p "$output_dir"
+
+ # Create temp file in SAME directory for atomic rename (same filesystem)
+ local temp_file
+ temp_file=$(mktemp "${output_dir}/.game_server_metrics.XXXXXX")
+
+ # Generate metrics to temp file
+ if ! generate_metrics > "$temp_file" 2>/dev/null; then
+ rm -f "$temp_file"
+ echo "ERROR: Failed to generate metrics" >&2
+ exit 1
+ fi
+
+ # Validate: file must exist, have content
+ local file_lines
+ file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
+
+ if [ "$file_lines" -lt 10 ]; then
+ rm -f "$temp_file"
+ echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
+ exit 1
+ fi
+
+ # Set permissions before move
+ chmod 644 "$temp_file"
+
+ # Atomic rename - no gap where file is missing
+ mv -f "$temp_file" "$OUTPUT_FILE"
+
+ echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
+ else
+ # Default: output to stdout
+ generate_metrics
+ fi
+}
+
+# Execute main function with all script arguments
+main "$@"
diff --git a/gcp-cost-reporter.sh b/gcp-cost-reporter.sh
new file mode 100755
index 0000000..872790d
--- /dev/null
+++ b/gcp-cost-reporter.sh
@@ -0,0 +1,563 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### gcp-cost-reporter.sh — GCP cost breakdown by service, project, or label. ####
+#### Queries BigQuery billing export for spend data with period comparison ####
+#### Requires: bash 4+, gcloud CLI (bq), jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./gcp-cost-reporter.sh --daily ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+BQ_BILLING_TABLE="${BQ_BILLING_TABLE:-}"
+GCP_PROJECT="${GCP_PROJECT:-}"
+GROUP_BY="${GROUP_BY:-SERVICE}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}"
+SLACK_WEBHOOK_URL="${SLACK_WEBHOOK_URL:-}"
+LABEL_FILTER_KEY="${LABEL_FILTER_KEY:-}"
+LABEL_FILTER_VALUE="${LABEL_FILTER_VALUE:-}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+RUN_MODE=""
+CUSTOM_START=""
+CUSTOM_END=""
+SLACK_URL=""
+START_TIME=""
+
+# ── Colors ────────────────────────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "auto" && ! -t 1 ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ RED="\033[0;31m"
+ GREEN="\033[0;32m"
+ YELLOW="\033[0;33m"
+ # shellcheck disable=SC2034
+ BLUE="\033[0;34m"
+ # shellcheck disable=SC2034
+ BOLD="\033[1m"
+ DIM="\033[2m"
+ RESET="\033[0m"
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log_info() { printf "${GREEN}[INFO]${RESET} %s\n" "$*"; }
+log_warn() { printf "${YELLOW}[WARN]${RESET} %s\n" "$*" >&2; }
+log_error() { printf "${RED}[ERROR]${RESET} %s\n" "$*" >&2; }
+log_debug() { [[ "$VERBOSE" == "true" ]] && printf "${DIM}[DEBUG] %s${RESET}\n" "$*"; }
+
+# ── Helpers ───────────────────────────────────────────────────────────
+die() { log_error "$@"; exit 1; }
+
+check_deps() {
+ local missing=()
+ command -v gcloud >/dev/null 2>&1 || missing+=("gcloud")
+ command -v bq >/dev/null 2>&1 || missing+=("bq")
+ command -v jq >/dev/null 2>&1 || missing+=("jq")
+ command -v curl >/dev/null 2>&1 || missing+=("curl")
+ if (( ${#missing[@]} > 0 )); then
+ die "Missing required tools: ${missing[*]}"
+ fi
+
+ local bash_major="${BASH_VERSINFO[0]}"
+ if (( bash_major < 4 )); then
+ die "Requires bash 4+, found ${BASH_VERSION}"
+ fi
+}
+
+validate_date() {
+ local d="$1"
+ if [[ ! "$d" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
+ die "Invalid date format: $d (expected YYYY-MM-DD)"
+ fi
+}
+
+# ── Date math (portable) ─────────────────────────────────────────────
+date_offset() {
+ local base="$1" offset="$2"
+ if date --version >/dev/null 2>&1; then
+ date -d "${base} ${offset} days" +%Y-%m-%d
+ else
+ date -j -v"${offset}d" -f "%Y-%m-%d" "$base" +%Y-%m-%d
+ fi
+}
+
+today_utc() { date -u +%Y-%m-%d; }
+
+first_of_month() {
+ local d="$1"
+ echo "${d:0:8}01"
+}
+
+first_of_prev_month() {
+ local d="$1"
+ local year="${d:0:4}"
+ local month="${d:5:2}"
+ month=$((10#$month - 1))
+ if (( month == 0 )); then
+ month=12
+ year=$((year - 1))
+ fi
+ printf "%04d-%02d-01" "$year" "$month"
+}
+
+days_between() {
+ local s="$1" e="$2"
+ local ss se
+ if date --version >/dev/null 2>&1; then
+ ss=$(date -d "$s" +%s)
+ se=$(date -d "$e" +%s)
+ else
+ ss=$(date -j -f "%Y-%m-%d" "$s" +%s)
+ se=$(date -j -f "%Y-%m-%d" "$e" +%s)
+ fi
+ echo $(( (se - ss) / 86400 ))
+}
+
+# ── Compute date ranges ──────────────────────────────────────────────
+compute_ranges() {
+ local today
+ today="$(today_utc)"
+
+ case "$RUN_MODE" in
+ daily)
+ PERIOD_START="$(date_offset "$today" -1)"
+ PERIOD_END="$today"
+ PREV_START="$(date_offset "$today" -2)"
+ PREV_END="$(date_offset "$today" -1)"
+ ;;
+ weekly)
+ PERIOD_START="$(date_offset "$today" -7)"
+ PERIOD_END="$today"
+ PREV_START="$(date_offset "$today" -14)"
+ PREV_END="$(date_offset "$today" -7)"
+ ;;
+ monthly)
+ PERIOD_START="$(first_of_month "$today")"
+ PERIOD_END="$today"
+ local prev_first
+ prev_first="$(first_of_prev_month "$today")"
+ PREV_START="$prev_first"
+ PREV_END="$PERIOD_START"
+ ;;
+ custom)
+ PERIOD_START="$CUSTOM_START"
+ PERIOD_END="$CUSTOM_END"
+ local span
+ span="$(days_between "$CUSTOM_START" "$CUSTOM_END")"
+ PREV_START="$(date_offset "$CUSTOM_START" "-$span")"
+ PREV_END="$CUSTOM_START"
+ ;;
+ *)
+ die "Unknown mode: $RUN_MODE"
+ ;;
+ esac
+
+ log_debug "Current period: $PERIOD_START → $PERIOD_END"
+ log_debug "Previous period: $PREV_START → $PREV_END"
+}
+
+# ── Build BigQuery SQL ────────────────────────────────────────────────
+build_select_column() {
+ case "$GROUP_BY" in
+ SERVICE) echo "service.description AS group_key" ;;
+ PROJECT) echo "project.id AS group_key" ;;
+ LABEL)
+ if [[ -z "$LABEL_FILTER_KEY" ]]; then
+ die "--group-by LABEL requires --label KEY=VALUE"
+ fi
+ echo "( SELECT value FROM UNNEST(labels) WHERE key = '${LABEL_FILTER_KEY}' ) AS group_key"
+ ;;
+ *)
+ die "Invalid --group-by value: $GROUP_BY (expected SERVICE, PROJECT, or LABEL)"
+ ;;
+ esac
+}
+
+build_where_clause() {
+ local start="$1" end="$2"
+ local where="usage_start_time >= TIMESTAMP('${start}') AND usage_start_time < TIMESTAMP('${end}')"
+
+ if [[ -n "$LABEL_FILTER_KEY" && -n "$LABEL_FILTER_VALUE" ]]; then
+ where="${where} AND EXISTS( SELECT 1 FROM UNNEST(labels) l WHERE l.key = '${LABEL_FILTER_KEY}' AND l.value = '${LABEL_FILTER_VALUE}' )"
+ fi
+
+ echo "$where"
+}
+
+build_query() {
+ local start="$1" end="$2"
+ local select_col where_clause
+
+ select_col="$(build_select_column)"
+ where_clause="$(build_where_clause "$start" "$end")"
+
+ cat </dev/null
+}
+
+# ── Parse cost data ──────────────────────────────────────────────────
+parse_costs() {
+ local raw="$1"
+ echo "$raw" | jq -r '
+ .[] |
+ select(.group_key != null and .group_key != "") |
+ "\(.group_key)\t\(.total_cost)"
+ ' 2>/dev/null || echo ""
+}
+
+# ── Format helpers ────────────────────────────────────────────────────
+fmt_currency() {
+ printf "$%.2f" "$1"
+}
+
+fmt_delta() {
+ local curr="$1" prev="$2"
+ if (( $(echo "$prev == 0" | bc -l) )); then
+ echo "N/A"
+ return
+ fi
+ local pct
+ pct=$(echo "scale=1; (($curr - $prev) / $prev) * 100" | bc -l)
+ local sign=""
+ if (( $(echo "$pct > 0" | bc -l) )); then
+ sign="+"
+ fi
+ echo "${sign}${pct}%"
+}
+
+print_header() {
+ local account_id
+ account_id=$(gcloud config get-value account 2>/dev/null || echo "unknown")
+ local project_id
+ project_id="${GCP_PROJECT:-$(gcloud config get-value project 2>/dev/null || echo "unknown")}"
+
+ echo "GCP Cost Reporter"
+ echo "Account: $account_id"
+ echo "Project: $project_id"
+ echo "Table: $BQ_BILLING_TABLE"
+ echo "Mode: $RUN_MODE"
+ echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+
+ if [[ "$RUN_MODE" == "custom" ]]; then
+ echo "Period: $PERIOD_START → $PERIOD_END"
+ fi
+ echo ""
+}
+
+# ── Text table output ────────────────────────────────────────────────
+output_text_table() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="SERVICE"
+ case "$GROUP_BY" in
+ PROJECT) label="PROJECT" ;;
+ LABEL) label="LABEL" ;;
+ esac
+ local divider="──────────────────────────────────────────────────────────────────────"
+ printf " %-38s %-12s %-12s %s\n" "$label" "COST" "PREV" "DELTA"
+ printf " %s\n" "$divider"
+ local total_curr=0 total_prev=0
+ for key in "${!curr_data[@]}"; do
+ local cost="${curr_data[$key]}" prev_cost="${prev_data[$key]:-0}"
+ printf " %-38s %-12s %-12s %s\n" \
+ "$key" "$(fmt_currency "$cost")" "$(fmt_currency "$prev_cost")" "$(fmt_delta "$cost" "$prev_cost")"
+ total_curr=$(echo "$total_curr + $cost" | bc -l)
+ total_prev=$(echo "$total_prev + $prev_cost" | bc -l)
+ done
+ printf " %s\n" "$divider"
+ printf " %-38s %-12s %-12s %s\n" \
+ "TOTAL" "$(fmt_currency "$total_curr")" "$(fmt_currency "$total_prev")" "$(fmt_delta "$total_curr" "$total_prev")"
+}
+
+# ── CSV output ────────────────────────────────────────────────────────
+output_csv() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="service"
+ case "$GROUP_BY" in
+ PROJECT) label="project" ;;
+ LABEL) label="label" ;;
+ esac
+ echo "${label},cost,previous_cost,delta_pct"
+ for key in "${!curr_data[@]}"; do
+ local cost="${curr_data[$key]}" prev_cost="${prev_data[$key]:-0}" pct="0"
+ if (( $(echo "$prev_cost != 0" | bc -l) )); then
+ pct=$(echo "scale=2; (($cost - $prev_cost) / $prev_cost) * 100" | bc -l)
+ fi
+ echo "\"$key\",$cost,$prev_cost,$pct"
+ done
+}
+
+# ── JSON output ───────────────────────────────────────────────────────
+output_json() {
+ local -n curr_data=$1
+ local -n prev_data=$2
+ local label="service"
+ case "$GROUP_BY" in
+ PROJECT) label="project" ;;
+ LABEL) label="label" ;;
+ esac
+ local items=()
+ for key in "${!curr_data[@]}"; do
+ items+=("{\"${label}\": \"${key}\", \"cost\": ${curr_data[$key]}, \"previous_cost\": ${prev_data[$key]:-0}}")
+ done
+ local joined
+ joined=$(printf ",%s" "${items[@]}")
+ joined="${joined:1}"
+ printf '{"mode":"%s","period_start":"%s","period_end":"%s","previous_start":"%s","previous_end":"%s","group_by":"%s","items":[%s]}\n' \
+ "$RUN_MODE" "$PERIOD_START" "$PERIOD_END" "$PREV_START" "$PREV_END" "$GROUP_BY" "$joined"
+}
+
+# ── Render report ─────────────────────────────────────────────────────
+render_report() {
+ local curr_raw="$1" prev_raw="$2"
+
+ declare -A curr_costs
+ declare -A prev_costs
+
+ while IFS=$'\t' read -r key amount; do
+ [[ -z "$key" ]] && continue
+ curr_costs["$key"]="$amount"
+ done <<< "$(parse_costs "$curr_raw")"
+
+ while IFS=$'\t' read -r key amount; do
+ [[ -z "$key" ]] && continue
+ prev_costs["$key"]="$amount"
+ done <<< "$(parse_costs "$prev_raw")"
+
+ for key in "${!prev_costs[@]}"; do
+ if [[ -z "${curr_costs[$key]+x}" ]]; then
+ curr_costs["$key"]="0"
+ fi
+ done
+
+ case "$OUTPUT_FORMAT" in
+ text)
+ print_header
+ local title="Cost Breakdown — ${PERIOD_START} → ${PERIOD_END}"
+ echo "$title"
+ output_text_table curr_costs prev_costs
+ echo ""
+ ;;
+ csv)
+ output_csv curr_costs prev_costs
+ ;;
+ json)
+ output_json curr_costs prev_costs
+ ;;
+ *)
+ die "Unknown format: $OUTPUT_FORMAT"
+ ;;
+ esac
+}
+
+# ── Slack webhook ─────────────────────────────────────────────────────
+send_slack() {
+ local report="$1" webhook="$2"
+
+ log_info "Posting report to Slack..."
+
+ local max_len=3000
+ local body="$report"
+ if (( ${#body} > max_len )); then
+ body="${body:0:$max_len}
+
+... (truncated — full report exceeds Slack message limit)"
+ fi
+
+ local payload
+ payload=$(jq -n --arg text "\`\`\`${body}\`\`\`" '{ text: $text }')
+
+ local http_code
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" \
+ -X POST \
+ -H "Content-Type: application/json" \
+ -d "$payload" \
+ "$webhook")
+
+ if [[ "$http_code" != "200" ]]; then
+ log_error "Slack webhook returned HTTP $http_code"
+ return 1
+ fi
+
+ log_info "Slack message posted"
+}
+
+# ── Usage ─────────────────────────────────────────────────────────────
+usage() {
+ cat < 0 )); do
+ case "$1" in
+ --daily|--weekly|--monthly)
+ RUN_MODE="${1#--}"; shift ;;
+ --custom)
+ RUN_MODE="custom"
+ [[ $# -lt 3 ]] && die "--custom requires START and END dates"
+ CUSTOM_START="$2"; CUSTOM_END="$3"
+ validate_date "$CUSTOM_START"; validate_date "$CUSTOM_END"
+ shift 3 ;;
+ --group-by)
+ [[ $# -lt 2 ]] && die "--group-by requires a value"
+ GROUP_BY="$2"; shift 2 ;;
+ --label)
+ [[ $# -lt 2 ]] && die "--label requires KEY=VALUE"
+ [[ "$2" != *"="* ]] && die "--label value must be KEY=VALUE"
+ LABEL_FILTER_KEY="${2%%=*}"; LABEL_FILTER_VALUE="${2#*=}"; shift 2 ;;
+ --format)
+ [[ $# -lt 2 ]] && die "--format requires a value"
+ OUTPUT_FORMAT="$2"; shift 2 ;;
+ --slack)
+ [[ $# -lt 2 ]] && die "--slack requires a webhook URL"
+ SLACK_URL="$2"; shift 2 ;;
+ --project)
+ [[ $# -lt 2 ]] && die "--project requires a project ID"
+ GCP_PROJECT="$2"; shift 2 ;;
+ --verbose) VERBOSE="true"; shift ;;
+ --no-color) COLOR="never"; shift ;;
+ --help|-h) usage ;;
+ *) die "Unknown option: $1 (see --help)" ;;
+ esac
+ done
+
+ if [[ -z "$RUN_MODE" ]]; then log_error "No mode specified"; echo ""; usage; exit 1; fi
+ [[ -z "$BQ_BILLING_TABLE" ]] && die "BQ_BILLING_TABLE is required (e.g., project.dataset.gcp_billing_export_v1_XXXXXX)"
+ [[ -z "$SLACK_URL" && -n "$SLACK_WEBHOOK_URL" ]] && SLACK_URL="$SLACK_WEBHOOK_URL"
+
+ case "$GROUP_BY" in
+ SERVICE|PROJECT|LABEL) ;;
+ *) die "Invalid --group-by: $GROUP_BY (expected SERVICE, PROJECT, or LABEL)" ;;
+ esac
+ case "$OUTPUT_FORMAT" in
+ text|csv|json) ;;
+ *) die "Invalid --format: $OUTPUT_FORMAT" ;;
+ esac
+}
+
+# ── Main ──────────────────────────────────────────────────────────────
+main() {
+ parse_args "$@"
+ setup_colors
+ check_deps
+
+ START_TIME=$(date +%s)
+
+ log_debug "Validating GCP credentials..."
+ gcloud auth print-access-token >/dev/null 2>&1 \
+ || die "GCP credentials not configured or expired (run gcloud auth login)"
+
+ compute_ranges
+
+ log_info "Querying BigQuery billing data ($RUN_MODE, group by $GROUP_BY)..."
+
+ local curr_raw prev_raw
+ curr_raw="$(query_costs "$PERIOD_START" "$PERIOD_END")"
+ prev_raw="$(query_costs "$PREV_START" "$PREV_END")"
+
+ if [[ -z "$curr_raw" || "$curr_raw" == "[]" ]]; then
+ die "No cost data returned for $PERIOD_START → $PERIOD_END"
+ fi
+
+ local report
+ report="$(render_report "$curr_raw" "$prev_raw")"
+
+ echo "$report"
+
+ if [[ -n "$SLACK_URL" ]]; then
+ send_slack "$report" "$SLACK_URL"
+ fi
+
+ local elapsed=$(( $(date +%s) - START_TIME ))
+ log_info "Completed in ${elapsed}s"
+}
+
+main "$@"
diff --git a/gcp-firewall-auditor.sh b/gcp-firewall-auditor.sh
new file mode 100644
index 0000000..970d528
--- /dev/null
+++ b/gcp-firewall-auditor.sh
@@ -0,0 +1,635 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### gcp-firewall-auditor.sh — Audit GCP VPC firewall rules for risky configs ####
+#### Finds 0.0.0.0/0 rules, dangerous ports, overly permissive access, unused rules ####
+#### Requires: bash 4+, gcloud CLI, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./gcp-firewall-auditor.sh --full ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+# ── Severity counters ────────────────────────────────────────────────
+TOTAL_CRIT=0
+TOTAL_WARN=0
+TOTAL_INFO=0
+TOTAL_OK=0
+
+flag_crit() { ((TOTAL_CRIT++)) || true; }
+flag_warn() { ((TOTAL_WARN++)) || true; }
+flag_info() { ((TOTAL_INFO++)) || true; }
+flag_ok() { ((TOTAL_OK++)) || true; }
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+DANGEROUS_PORTS="${DANGEROUS_PORTS:-22,3389,3306,5432,1433,6379,27017,9200,8080,8443}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+GCP_PROJECT=""
+VPC_NETWORK=""
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+
+# ── Dependency and credential checks ────────────────────────────────
+check_deps() {
+ command -v gcloud &>/dev/null || die "gcloud CLI is required (install: https://cloud.google.com/sdk/docs/install)"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+check_credentials() {
+ local account
+ account=$(gcloud auth list --filter="status:ACTIVE" --format="value(account)" 2>/dev/null)
+ [[ -z "$account" ]] && die "No active gcloud credentials — run 'gcloud auth login'"
+
+ if [[ -n "$GCP_PROJECT" ]]; then
+ gcloud config set project "$GCP_PROJECT" --quiet 2>/dev/null \
+ || die "Cannot set project: ${GCP_PROJECT}"
+ else
+ GCP_PROJECT=$(gcloud config get-value project 2>/dev/null)
+ [[ -z "$GCP_PROJECT" || "$GCP_PROJECT" == "(unset)" ]] && die "No project set — use --project or 'gcloud config set project'"
+ fi
+
+ verbose "Account: ${account}"
+ log "Project: ${GCP_PROJECT}"
+}
+
+# ── gcloud wrapper ───────────────────────────────────────────────────
+gc_cmd() {
+ local args=("$@")
+ [[ -n "$GCP_PROJECT" ]] && args+=(--project "$GCP_PROJECT")
+ verbose "gcloud ${args[*]}"
+ gcloud "${args[@]}"
+}
+
+# ── Port-to-service mapping ─────────────────────────────────────────
+port_to_service() {
+ local port="$1"
+ case "$port" in
+ 22) echo "SSH" ;;
+ 80) echo "HTTP" ;;
+ 443) echo "HTTPS" ;;
+ 3306) echo "MySQL" ;;
+ 5432) echo "PostgreSQL" ;;
+ 1433) echo "MSSQL" ;;
+ 3389) echo "RDP" ;;
+ 6379) echo "Redis" ;;
+ 27017) echo "MongoDB" ;;
+ 9200) echo "Elasticsearch" ;;
+ 8080) echo "HTTP-Alt" ;;
+ 8443) echo "HTTPS-Alt" ;;
+ 53) echo "DNS" ;;
+ 25) echo "SMTP" ;;
+ 5900) echo "VNC" ;;
+ 11211) echo "Memcached" ;;
+ 2379) echo "etcd" ;;
+ 9090) echo "Prometheus" ;;
+ *) echo "" ;;
+ esac
+}
+
+# ── Check if port is in dangerous list ───────────────────────────────
+is_dangerous_port() {
+ local port="$1"
+ local IFS=','
+ for dp in $DANGEROUS_PORTS; do
+ if [[ "$port" == "$dp" ]]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+# ── Check if port falls in a range ───────────────────────────────────
+port_in_range() {
+ local port="$1" range="$2"
+ if [[ "$range" == *-* ]]; then
+ local start="${range%-*}"
+ local end="${range#*-}"
+ [[ "$port" -ge "$start" && "$port" -le "$end" ]]
+ else
+ [[ "$port" == "$range" ]]
+ fi
+}
+
+# ── Fetch firewall rules ────────────────────────────────────────────
+fetch_rules() {
+ local args=(compute firewall-rules list --format=json)
+ if [[ -n "$VPC_NETWORK" ]]; then
+ args+=(--filter="network~${VPC_NETWORK}")
+ fi
+ gc_cmd "${args[@]}" 2>/dev/null
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OPEN PORTS AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_open_ports() {
+ log "Auditing firewall rules for dangerous open ports..."
+ log "Dangerous ports: ${DANGEROUS_PORTS}"
+ echo ""
+
+ printf " %-28s %-14s %-8s %-8s %-18s %s\n" \
+ "RULE_NAME" "NETWORK" "PROTO" "PORT" "SOURCE" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..95})"
+
+ local rules_json
+ rules_json=$(fetch_rules)
+
+ echo "$rules_json" | jq -c '.[] | select(.direction == "INGRESS" and .disabled != true)' 2>/dev/null | while IFS= read -r rule; do
+ local rule_name network
+ rule_name=$(echo "$rule" | jq -r '.name')
+ network=$(echo "$rule" | jq -r '.network' | rev | cut -d/ -f1 | rev)
+
+ local has_open="false"
+ while IFS= read -r src; do
+ if [[ "$src" == "0.0.0.0/0" ]]; then
+ has_open="true"
+ break
+ fi
+ done < <(echo "$rule" | jq -r '.sourceRanges[]? // empty' 2>/dev/null)
+
+ [[ "$has_open" != "true" ]] && continue
+
+ echo "$rule" | jq -c '.allowed[]? // empty' 2>/dev/null | while IFS= read -r allowed; do
+ local protocol
+ protocol=$(echo "$allowed" | jq -r '.IPProtocol')
+
+ local ports
+ ports=$(echo "$allowed" | jq -r '.ports[]? // empty' 2>/dev/null)
+
+ if [[ -z "$ports" ]]; then
+ if [[ "$protocol" == "all" ]]; then
+ printf " %-28s %-14s %-8s %-8s %-18s %b%s%b\n" \
+ "${rule_name:0:27}" "${network:0:13}" "all" "all" \
+ "0.0.0.0/0" "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ else
+ local IFS=','
+ for dp in $DANGEROUS_PORTS; do
+ local svc
+ svc=$(port_to_service "$dp")
+ printf " %-28s %-14s %-8s %-8s %-18s %b%s%b\n" \
+ "${rule_name:0:27}" "${network:0:13}" "$protocol" "$dp" \
+ "0.0.0.0/0" "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ done
+ fi
+ continue
+ fi
+
+ while IFS= read -r port_spec; do
+ [[ -z "$port_spec" ]] && continue
+
+ local IFS=','
+ for dp in $DANGEROUS_PORTS; do
+ if port_in_range "$dp" "$port_spec"; then
+ local svc severity color
+ svc=$(port_to_service "$dp")
+ if [[ "$dp" == "80" || "$dp" == "443" ]]; then
+ severity="INFO"; color="$CYAN"; flag_info
+ else
+ severity="CRITICAL"; color="$RED"; flag_crit
+ fi
+ printf " %-28s %-14s %-8s %-8s %-18s %b%s%b\n" \
+ "${rule_name:0:27}" "${network:0:13}" "$protocol" \
+ "${dp} (${svc})" "0.0.0.0/0" "$color" "$severity" "$RESET"
+ fi
+ done
+ done <<< "$ports"
+ done
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# PERMISSIVE RULES AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_permissive() {
+ log "Auditing overly permissive firewall rules..."
+ echo ""
+
+ printf " %-28s %-14s %-14s %-18s %s\n" \
+ "RULE_NAME" "NETWORK" "PROTOCOLS" "SOURCE" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..85})"
+
+ local rules_json
+ rules_json=$(fetch_rules)
+
+ echo "$rules_json" | jq -c '.[] | select(.direction == "INGRESS" and .disabled != true)' 2>/dev/null | while IFS= read -r rule; do
+ local rule_name network
+ rule_name=$(echo "$rule" | jq -r '.name')
+ network=$(echo "$rule" | jq -r '.network' | rev | cut -d/ -f1 | rev)
+
+ local has_open="false"
+ while IFS= read -r src; do
+ if [[ "$src" == "0.0.0.0/0" ]]; then
+ has_open="true"
+ break
+ fi
+ done < <(echo "$rule" | jq -r '.sourceRanges[]? // empty' 2>/dev/null)
+
+ [[ "$has_open" != "true" ]] && continue
+
+ local has_all_traffic="false"
+ while IFS= read -r allowed; do
+ local proto
+ proto=$(echo "$allowed" | jq -r '.IPProtocol')
+ local port_count
+ port_count=$(echo "$allowed" | jq '.ports // [] | length')
+
+ if [[ "$proto" == "all" ]]; then
+ has_all_traffic="true"
+ elif [[ "$port_count" -eq 0 ]]; then
+ has_all_traffic="true"
+ fi
+ done < <(echo "$rule" | jq -c '.allowed[]? // empty' 2>/dev/null)
+
+ if [[ "$has_all_traffic" == "true" ]]; then
+ local proto_list
+ proto_list=$(echo "$rule" | jq -r '[.allowed[]?.IPProtocol] | join(",")' 2>/dev/null)
+ printf " %-28s %-14s %-14s %-18s %b%s%b\n" \
+ "${rule_name:0:27}" "${network:0:13}" "${proto_list:0:13}" \
+ "0.0.0.0/0" "$RED" "CRITICAL" "$RESET"
+ flag_crit
+ fi
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# EGRESS AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_egress() {
+ log "Auditing egress firewall rules..."
+ echo ""
+
+ printf " %-28s %-14s %-14s %-18s %s\n" \
+ "RULE_NAME" "NETWORK" "PROTOCOLS" "DESTINATION" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..85})"
+
+ local rules_json
+ rules_json=$(fetch_rules)
+
+ echo "$rules_json" | jq -c '.[] | select(.direction == "EGRESS" and .disabled != true)' 2>/dev/null | while IFS= read -r rule; do
+ local rule_name network
+ rule_name=$(echo "$rule" | jq -r '.name')
+ network=$(echo "$rule" | jq -r '.network' | rev | cut -d/ -f1 | rev)
+
+ local has_wide="false"
+ while IFS= read -r dest; do
+ if [[ "$dest" == "0.0.0.0/0" ]]; then
+ has_wide="true"
+ break
+ fi
+ done < <(echo "$rule" | jq -r '.destinationRanges[]? // empty' 2>/dev/null)
+
+ [[ "$has_wide" != "true" ]] && continue
+
+ local proto_list
+ proto_list=$(echo "$rule" | jq -r '[.allowed[]?.IPProtocol] | join(",")' 2>/dev/null)
+
+ local severity="WARN" color="$YELLOW"
+ if [[ "$proto_list" == "all" ]]; then
+ severity="WARN"; color="$YELLOW"; flag_warn
+ else
+ severity="INFO"; color="$CYAN"; flag_info
+ fi
+
+ printf " %-28s %-14s %-14s %-18s %b%s%b\n" \
+ "${rule_name:0:27}" "${network:0:13}" "${proto_list:0:13}" \
+ "0.0.0.0/0" "$color" "$severity" "$RESET"
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# UNUSED RULES AUDIT
+# ══════════════════════════════════════════════════════════════════════
+audit_unused() {
+ log "Checking for disabled or potentially unused firewall rules..."
+ echo ""
+
+ printf " %-28s %-14s %-10s %-10s %s\n" \
+ "RULE_NAME" "NETWORK" "DIRECTION" "DISABLED" "SEVERITY"
+ printf " %s\n" "$(printf '%.0s─' {1..80})"
+
+ local rules_json
+ rules_json=$(fetch_rules)
+
+ echo "$rules_json" | jq -c '.[]' 2>/dev/null | while IFS= read -r rule; do
+ local rule_name network direction disabled
+ rule_name=$(echo "$rule" | jq -r '.name')
+ network=$(echo "$rule" | jq -r '.network' | rev | cut -d/ -f1 | rev)
+ direction=$(echo "$rule" | jq -r '.direction')
+ disabled=$(echo "$rule" | jq -r '.disabled // false')
+
+ if [[ "$disabled" == "true" ]]; then
+ printf " %-28s %-14s %-10s %-10s %b%s%b\n" \
+ "${rule_name:0:27}" "${network:0:13}" "$direction" "YES" \
+ "$YELLOW" "WARN — disabled" "$RESET"
+ flag_warn
+ continue
+ fi
+
+ local target_tags
+ target_tags=$(echo "$rule" | jq -r '.targetTags // [] | join(",")' 2>/dev/null)
+
+ if [[ -n "$target_tags" && "$target_tags" != "null" ]]; then
+ local first_tag="${target_tags%%,*}"
+ local instance_count
+ instance_count=$(gcloud compute instances list \
+ --filter="tags.items=${first_tag}" \
+ --format="value(name)" 2>/dev/null | wc -l)
+
+ if [[ "$instance_count" -eq 0 ]]; then
+ printf " %-28s %-14s %-10s %-10s %b%s%b\n" \
+ "${rule_name:0:27}" "${network:0:13}" "$direction" "NO" \
+ "$YELLOW" "WARN — no targets" "$RESET"
+ flag_warn
+ else
+ verbose "Rule ${rule_name}: ${instance_count} matching instance(s)"
+ flag_ok
+ fi
+ else
+ flag_ok
+ fi
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIST ALL RULES
+# ══════════════════════════════════════════════════════════════════════
+list_rules() {
+ log "Listing all firewall rules..."
+ echo ""
+
+ printf " %-28s %-14s %-10s %-8s %-12s %-18s %s\n" \
+ "RULE_NAME" "NETWORK" "DIR" "PROTO" "PORTS" "SOURCE/DEST" "PRIORITY"
+ printf " %s\n" "$(printf '%.0s─' {1..105})"
+
+ local rules_json
+ rules_json=$(fetch_rules)
+
+ echo "$rules_json" | jq -c '.[]' 2>/dev/null | while IFS= read -r rule; do
+ local rule_name network direction priority
+ rule_name=$(echo "$rule" | jq -r '.name')
+ network=$(echo "$rule" | jq -r '.network' | rev | cut -d/ -f1 | rev)
+ direction=$(echo "$rule" | jq -r '.direction')
+ priority=$(echo "$rule" | jq -r '.priority')
+
+ local cidr_list
+ if [[ "$direction" == "INGRESS" ]]; then
+ cidr_list=$(echo "$rule" | jq -r '.sourceRanges[0]? // "any"' 2>/dev/null)
+ else
+ cidr_list=$(echo "$rule" | jq -r '.destinationRanges[0]? // "any"' 2>/dev/null)
+ fi
+
+ echo "$rule" | jq -c '.allowed[]? // empty' 2>/dev/null | while IFS= read -r allowed; do
+ local proto port_str
+ proto=$(echo "$allowed" | jq -r '.IPProtocol')
+ port_str=$(echo "$allowed" | jq -r '.ports // ["all"] | join(",")' 2>/dev/null)
+ [[ "$port_str" == "null" ]] && port_str="all"
+
+ local dir_color="$CYAN"
+ [[ "$direction" == "EGRESS" ]] && dir_color="$YELLOW"
+
+ printf " %-28s %-14s %b%-10s%b %-8s %-12s %-18s %s\n" \
+ "${rule_name:0:27}" "${network:0:13}" "$dir_color" "$direction" "$RESET" \
+ "$proto" "${port_str:0:11}" "${cidr_list:0:17}" "$priority"
+ done
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════════════════════════════
+print_summary() {
+ local elapsed
+ elapsed=$(( $(date +%s) - START_TIME ))
+
+ echo ""
+ echo " ══════════════════════════════════════════"
+ echo " Firewall Audit Summary"
+ echo " ══════════════════════════════════════════"
+ printf " %-20s %b%d%b\n" "CRITICAL:" "$RED" "$TOTAL_CRIT" "$RESET"
+ printf " %-20s %b%d%b\n" "WARN:" "$YELLOW" "$TOTAL_WARN" "$RESET"
+ printf " %-20s %b%d%b\n" "INFO:" "$CYAN" "$TOTAL_INFO" "$RESET"
+ printf " %-20s %b%d%b\n" "OK:" "$GREEN" "$TOTAL_OK" "$RESET"
+ echo " ──────────────────────────────────────────"
+ printf " Completed in %ds\n" "$elapsed"
+ echo ""
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ echo -e " ${RED}${BOLD}Action required:${RESET} ${TOTAL_CRIT} critical finding(s)"
+ echo ""
+ echo " Top recommendations:"
+ echo " • Close 0.0.0.0/0 rules on SSH (22), RDP (3389), and database ports"
+ echo " • Replace all-protocol allow rules with specific port lists"
+ echo " • Use target tags or service accounts to scope rules"
+ echo " • Delete disabled rules that are no longer needed"
+ echo ""
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ echo -e " ${YELLOW}Review recommended:${RESET} ${TOTAL_WARN} warning(s)"
+ echo ""
+ echo " Suggestions:"
+ echo " • Review disabled rules for deletion"
+ echo " • Check rules with no matching target instances"
+ echo " • Restrict egress where applicable"
+ echo ""
+ else
+ echo -e " ${GREEN}All checks passed${RESET}"
+ echo ""
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# USAGE
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat <&2
+ exit 1
+ fi
+
+ RUN_MODE="${modes[*]}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+main() {
+ parse_args "$@"
+ setup_colors
+ check_deps
+ check_credentials
+
+ START_TIME=$(date +%s)
+
+ echo ""
+ echo -e "${BOLD}GCP Firewall Auditor${RESET}"
+ echo -e "Project: ${GCP_PROJECT}"
+ echo -e "Mode: ${RUN_MODE}"
+ if [[ -n "$VPC_NETWORK" ]]; then
+ echo -e "Network: ${VPC_NETWORK}"
+ fi
+ echo -e "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+ echo ""
+
+ for mode in $RUN_MODE; do
+ case "$mode" in
+ open-ports) audit_open_ports ;;
+ permissive) audit_permissive ;;
+ unused) audit_unused ;;
+ egress) audit_egress ;;
+ rules) list_rules ;;
+ esac
+ done
+
+ print_summary
+
+ if [[ "$TOTAL_CRIT" -gt 0 ]]; then
+ exit 2
+ elif [[ "$TOTAL_WARN" -gt 0 ]]; then
+ exit 1
+ fi
+ exit 0
+}
+
+main "$@"
diff --git a/gcp-snapshot-manager.sh b/gcp-snapshot-manager.sh
new file mode 100644
index 0000000..054dbb6
--- /dev/null
+++ b/gcp-snapshot-manager.sh
@@ -0,0 +1,708 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### gcp-snapshot-manager.sh — Create, rotate, list, audit, and restore GCP ####
+#### persistent disk snapshots via gcloud CLI. Automated retention and fleet ops ####
+#### Requires: bash 4+, gcloud CLI, jq ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.01 ####
+#### ####
+#### Usage: ####
+#### ./gcp-snapshot-manager.sh --snapshot --all ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Colors (pre-initialized) ─────────────────────────────────────────
+RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+
+setup_colors() {
+ if [[ "${COLOR:-auto}" == "never" ]]; then
+ return
+ fi
+ if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ echo ""
+ echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
+ echo ""
+}
+
+field() {
+ printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
+}
+
+field_color() {
+ printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
+}
+
+elapsed() {
+ local end_time
+ end_time=$(date +%s)
+ echo "$(( end_time - START_TIME ))s"
+}
+
+# ── Defaults ──────────────────────────────────────────────────────────
+RUN_MODE=""
+ALSO_ROTATE="false"
+INSTANCE_NAME=""
+ZONE=""
+TARGET_ALL="false"
+SNAPSHOT_NAME=""
+KEEP="${GSM_KEEP:-3}"
+PREFIX="${GSM_PREFIX:-auto}"
+MAX_AGE="${GSM_MAX_AGE:-7}"
+OUTPUT_FORMAT="${GSM_FORMAT:-text}"
+DRY_RUN="true"
+FORCE="false"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+GCP_PROJECT=""
+
+# ── State ─────────────────────────────────────────────────────────────
+SCRIPT_NAME="$(basename "$0")"
+readonly SCRIPT_NAME
+START_TIME=""
+SNAP_CREATED=0
+SNAP_DELETED=0
+SNAP_ERRORS=0
+
+# ── Dependency and credential checks ────────────────────────────────
+check_deps() {
+ command -v gcloud &>/dev/null || die "gcloud CLI is required"
+ command -v jq &>/dev/null || die "jq is required"
+}
+
+check_credentials() {
+ local account
+ account=$(gcloud auth list --filter="status:ACTIVE" --format="value(account)" 2>/dev/null)
+ [[ -z "$account" ]] && die "No active gcloud credentials — run 'gcloud auth login'"
+
+ if [[ -n "$GCP_PROJECT" ]]; then
+ gcloud config set project "$GCP_PROJECT" --quiet 2>/dev/null \
+ || die "Cannot set project: ${GCP_PROJECT}"
+ else
+ GCP_PROJECT=$(gcloud config get-value project 2>/dev/null)
+ [[ -z "$GCP_PROJECT" || "$GCP_PROJECT" == "(unset)" ]] && die "No project set — use --project or 'gcloud config set project'"
+ fi
+
+ log "Project: ${GCP_PROJECT}"
+}
+
+# ── Instance helpers ─────────────────────────────────────────────────
+get_all_instances() {
+ gcloud compute instances list --project "$GCP_PROJECT" --format=json 2>/dev/null
+}
+
+get_boot_disk() {
+ local instance="$1" zone="$2"
+ gcloud compute instances describe "$instance" --zone "$zone" --project "$GCP_PROJECT" \
+ --format='json(disks)' 2>/dev/null \
+ | jq -r '.disks[] | select(.boot == true) | .source' 2>/dev/null \
+ | rev | cut -d/ -f1 | rev
+}
+
+get_instance_zone() {
+ local instance_json="$1"
+ echo "$instance_json" | jq -r '.zone' | rev | cut -d/ -f1 | rev
+}
+
+# ── Snapshot helpers ─────────────────────────────────────────────────
+list_snapshots() {
+ gcloud compute snapshots list --project "$GCP_PROJECT" --format=json 2>/dev/null
+}
+
+managed_snapshots() {
+ list_snapshots | jq --arg pfx "$PREFIX" \
+ '[.[] | select(.name | startswith($pfx))]'
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# SNAPSHOT
+# ══════════════════════════════════════════════════════════════════════
+do_snapshot() {
+ local instances_json
+ instances_json=$(get_all_instances)
+
+ local instances
+ if [[ "$TARGET_ALL" == "true" ]]; then
+ instances="$instances_json"
+ elif [[ -n "$INSTANCE_NAME" ]]; then
+ instances=$(echo "$instances_json" | jq --arg n "$INSTANCE_NAME" '[.[] | select(.name == $n)]')
+ else
+ die "Specify --instance NAME or --all"
+ fi
+
+ local count
+ count=$(echo "$instances" | jq 'length')
+ [[ "$count" -eq 0 ]] && die "No instances found"
+
+ local target_label="$INSTANCE_NAME"
+ [[ "$TARGET_ALL" == "true" ]] && target_label="all (${count} instances)"
+
+ section_header "Creating Snapshots"
+ field "Target:" "$target_label"
+ field "Prefix:" "$PREFIX"
+ echo ""
+
+ echo "$instances" | jq -c '.[]' | while IFS= read -r inst; do
+ local name zone disk_name snap_name
+ name=$(echo "$inst" | jq -r '.name')
+ zone=$(get_instance_zone "$inst")
+ disk_name=$(get_boot_disk "$name" "$zone")
+ snap_name="${PREFIX}-${name}-$(date +%Y%m%d-%H%M%S)"
+
+ if [[ -z "$disk_name" ]]; then
+ echo -e " ${RED}✗${RESET} ${name} (${zone}) no boot disk found"
+ ((SNAP_ERRORS++)) || true
+ continue
+ fi
+
+ verbose "Snapshotting ${name} disk ${disk_name} in ${zone}"
+
+ if gcloud compute snapshots create "$snap_name" \
+ --source-disk="$disk_name" \
+ --source-disk-zone="$zone" \
+ --project "$GCP_PROJECT" \
+ --labels="managed-by=gcp-snapshot-manager,source-instance=${name}" \
+ --quiet 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} ${name} (${zone}) ${snap_name}"
+ ((SNAP_CREATED++)) || true
+ else
+ echo -e " ${RED}✗${RESET} ${name} (${zone}) failed"
+ ((SNAP_ERRORS++)) || true
+ fi
+
+ sleep 1
+ done
+
+ echo ""
+ field_color "Created:" "${GREEN}${SNAP_CREATED}${RESET}"
+ if [[ "$SNAP_ERRORS" -gt 0 ]]; then
+ field_color "Errors:" "${RED}${SNAP_ERRORS}${RESET}"
+ fi
+
+ if [[ "$ALSO_ROTATE" == "true" ]]; then
+ do_rotate
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# ROTATE
+# ══════════════════════════════════════════════════════════════════════
+do_rotate() {
+ section_header "Rotating Snapshots"
+ field "Keep:" "$KEEP per instance"
+ field "Prefix:" "$PREFIX"
+ if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then
+ field "Mode:" "DRY RUN (use --force to delete)"
+ else
+ field "Mode:" "LIVE — deletions are permanent"
+ fi
+ echo ""
+
+ local snaps
+ snaps=$(managed_snapshots)
+
+ local instance_names
+ instance_names=$(echo "$snaps" | jq -r '.[].labels["source-instance"] // empty' | sort -u)
+
+ if [[ -z "$instance_names" ]]; then
+ log "No managed snapshots found matching prefix '${PREFIX}'"
+ return
+ fi
+
+ while IFS= read -r inst; do
+ [[ -z "$inst" ]] && continue
+ local inst_snaps
+ inst_snaps=$(echo "$snaps" | jq --arg inst "$inst" \
+ '[.[] | select(.labels["source-instance"] == $inst)] | sort_by(.creationTimestamp) | reverse')
+ local total
+ total=$(echo "$inst_snaps" | jq 'length')
+
+ if (( total <= KEEP )); then
+ verbose "${inst}: ${total} snapshots, keeping all"
+ continue
+ fi
+
+ local to_delete
+ to_delete=$(echo "$inst_snaps" | jq --argjson k "$KEEP" '.[$k:]')
+ local del_count
+ del_count=$(echo "$to_delete" | jq 'length')
+
+ echo "$to_delete" | jq -c '.[]' | while IFS= read -r snap; do
+ local sname
+ sname=$(echo "$snap" | jq -r '.name')
+
+ if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then
+ echo -e " ${DIM}[DRY RUN]${RESET} would delete ${sname}"
+ else
+ if gcloud compute snapshots delete "$sname" \
+ --project "$GCP_PROJECT" --quiet 2>/dev/null; then
+ echo -e " ${YELLOW}✓${RESET} deleted ${sname}"
+ ((SNAP_DELETED++)) || true
+ else
+ echo -e " ${RED}✗${RESET} failed to delete ${sname}"
+ ((SNAP_ERRORS++)) || true
+ fi
+ fi
+ done
+
+ log "${inst}: ${total} total, keeping ${KEEP}, removing ${del_count}"
+ done <<< "$instance_names"
+
+ echo ""
+ field_color "Deleted:" "${YELLOW}${SNAP_DELETED}${RESET}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# LIST
+# ══════════════════════════════════════════════════════════════════════
+do_list() {
+ section_header "All Snapshots"
+
+ local snaps
+ snaps=$(list_snapshots)
+ local count
+ count=$(echo "$snaps" | jq 'length')
+
+ if [[ "$count" -eq 0 ]]; then
+ log "No snapshots found"
+ return
+ fi
+
+ printf " %-40s %-10s %-12s %-16s %s\n" \
+ "NAME" "SIZE_GB" "AGE" "SOURCE_DISK" "SOURCE_INSTANCE"
+ printf " %s\n" "$(printf '%.0s─' {1..100})"
+
+ local now
+ now=$(date +%s)
+
+ echo "$snaps" | jq -c '.[]' | while IFS= read -r snap; do
+ local name size_gb created source_disk source_inst age_str
+ name=$(echo "$snap" | jq -r '.name')
+ size_gb=$(echo "$snap" | jq -r '.diskSizeGb // 0')
+ created=$(echo "$snap" | jq -r '.creationTimestamp // ""')
+ source_disk=$(echo "$snap" | jq -r '.sourceDisk // ""' | rev | cut -d/ -f1 | rev)
+ source_inst=$(echo "$snap" | jq -r '.labels["source-instance"] // "manual"')
+
+ if [[ -n "$created" ]]; then
+ local snap_epoch
+ snap_epoch=$(date -d "$created" +%s 2>/dev/null || echo 0)
+ if [[ "$snap_epoch" -gt 0 ]]; then
+ local age_days=$(( (now - snap_epoch) / 86400 ))
+ age_str="${age_days}d"
+ else
+ age_str="unknown"
+ fi
+ else
+ age_str="unknown"
+ fi
+
+ printf " %-40s %-10s %-12s %-16s %s\n" \
+ "${name:0:39}" "$size_gb" "$age_str" "${source_disk:0:15}" "${source_inst:0:20}"
+ done
+
+ echo ""
+ field "Total snapshots:" "$count"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# AUDIT
+# ══════════════════════════════════════════════════════════════════════
+do_audit() {
+ section_header "Snapshot Audit"
+
+ local instances_json
+ instances_json=$(get_all_instances)
+ local snaps
+ snaps=$(list_snapshots)
+ local now
+ now=$(date +%s)
+
+ printf " %-24s %-14s %-24s %-8s %-8s %s\n" \
+ "INSTANCE" "ZONE" "LATEST_SNAPSHOT" "AGE" "COUNT" "STATUS"
+ printf " %s\n" "$(printf '%.0s─' {1..100})"
+
+ echo "$instances_json" | jq -c '.[]' | while IFS= read -r inst; do
+ local name zone
+ name=$(echo "$inst" | jq -r '.name')
+ zone=$(get_instance_zone "$inst")
+
+ local inst_snaps snap_count
+ inst_snaps=$(echo "$snaps" | jq --arg inst "$name" \
+ '[.[] | select(.labels["source-instance"] == $inst)]')
+ snap_count=$(echo "$inst_snaps" | jq 'length')
+
+ if [[ "$snap_count" -eq 0 ]]; then
+ printf " %-24s %-14s %-24s %-8s %-8s %b%s%b\n" \
+ "${name:0:23}" "${zone:0:13}" "(none)" "—" "0" \
+ "$RED" "✗ Unprotected" "$RESET"
+ continue
+ fi
+
+ local latest_name latest_date age_str status color
+ latest_name=$(echo "$inst_snaps" | jq -r 'sort_by(.creationTimestamp) | last | .name // ""')
+ latest_date=$(echo "$inst_snaps" | jq -r 'sort_by(.creationTimestamp) | last | .creationTimestamp // ""')
+
+ if [[ -n "$latest_date" ]]; then
+ local snap_epoch
+ snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0)
+ if [[ "$snap_epoch" -gt 0 ]]; then
+ local age_days=$(( (now - snap_epoch) / 86400 ))
+ age_str="${age_days}d"
+ if (( age_days > MAX_AGE )); then
+ status="⚠ Stale"; color="$YELLOW"
+ else
+ status="✓ OK"; color="$GREEN"
+ fi
+ else
+ age_str="unknown"; status="✓ OK"; color="$GREEN"
+ fi
+ else
+ age_str="unknown"; status="✓ OK"; color="$GREEN"
+ fi
+
+ printf " %-24s %-14s %-24s %-8s %-8s %b%s%b\n" \
+ "${name:0:23}" "${zone:0:13}" "${latest_name:0:23}" \
+ "$age_str" "$snap_count" "$color" "$status" "$RESET"
+ done
+
+ echo ""
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# RESTORE
+# ══════════════════════════════════════════════════════════════════════
+do_restore() {
+ [[ -z "$INSTANCE_NAME" ]] && die "--restore requires --instance NAME"
+ [[ -z "$SNAPSHOT_NAME" ]] && die "--restore requires --snapshot-name NAME"
+ [[ -z "$ZONE" ]] && die "--restore requires --zone ZONE"
+
+ section_header "Restore from Snapshot"
+ field "Instance:" "$INSTANCE_NAME"
+ field "Snapshot:" "$SNAPSHOT_NAME"
+ field "Zone:" "$ZONE"
+ echo ""
+
+ if [[ "$FORCE" != "true" ]]; then
+ warn "This will stop the instance and replace its boot disk. Use --force to confirm."
+ return
+ fi
+
+ log "Creating disk from snapshot..."
+ local disk_name="restored-${INSTANCE_NAME}-$(date +%Y%m%d-%H%M%S)"
+
+ if gcloud compute disks create "$disk_name" \
+ --source-snapshot="$SNAPSHOT_NAME" \
+ --zone="$ZONE" \
+ --project "$GCP_PROJECT" \
+ --quiet 2>/dev/null; then
+ echo -e " ${GREEN}✓${RESET} Disk created: ${disk_name}"
+ else
+ die "Failed to create disk from snapshot"
+ fi
+
+ log "Stopping instance..."
+ gcloud compute instances stop "$INSTANCE_NAME" \
+ --zone="$ZONE" --project "$GCP_PROJECT" --quiet 2>/dev/null \
+ || die "Failed to stop instance"
+
+ local old_disk
+ old_disk=$(get_boot_disk "$INSTANCE_NAME" "$ZONE")
+
+ log "Detaching old boot disk..."
+ gcloud compute instances detach-disk "$INSTANCE_NAME" \
+ --disk="$old_disk" --zone="$ZONE" --project "$GCP_PROJECT" \
+ --quiet 2>/dev/null || die "Failed to detach old disk"
+
+ log "Attaching restored disk..."
+ gcloud compute instances attach-disk "$INSTANCE_NAME" \
+ --disk="$disk_name" --zone="$ZONE" --boot \
+ --project "$GCP_PROJECT" --quiet 2>/dev/null \
+ || die "Failed to attach restored disk"
+
+ log "Starting instance..."
+ gcloud compute instances start "$INSTANCE_NAME" \
+ --zone="$ZONE" --project "$GCP_PROJECT" --quiet 2>/dev/null
+ echo -e " ${GREEN}✓${RESET} Instance started with restored disk"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# STATUS
+# ══════════════════════════════════════════════════════════════════════
+do_status() {
+ local instances_json
+ instances_json=$(get_all_instances)
+ local snaps
+ snaps=$(list_snapshots)
+ local now
+ now=$(date +%s)
+
+ local total_instances=0 total_snaps=0 total_gb=0
+ local protected=0 stale=0 unprotected=0
+
+ while IFS= read -r inst; do
+ [[ -z "$inst" ]] && continue
+ ((total_instances++)) || true
+
+ local name
+ name=$(echo "$inst" | jq -r '.name')
+
+ local inst_snaps snap_count
+ inst_snaps=$(echo "$snaps" | jq --arg inst "$name" \
+ '[.[] | select(.labels["source-instance"] == $inst)]')
+ snap_count=$(echo "$inst_snaps" | jq 'length')
+ total_snaps=$(( total_snaps + snap_count ))
+
+ local gb
+ gb=$(echo "$inst_snaps" | jq '[.[].diskSizeGb // 0 | tonumber] | add // 0')
+ total_gb=$(( total_gb + gb ))
+
+ if [[ "$snap_count" -eq 0 ]]; then
+ ((unprotected++)) || true
+ continue
+ fi
+
+ local latest_date
+ latest_date=$(echo "$inst_snaps" | jq -r \
+ 'sort_by(.creationTimestamp) | last | .creationTimestamp // ""')
+
+ if [[ -n "$latest_date" ]]; then
+ local snap_epoch
+ snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0)
+ if [[ "$snap_epoch" -gt 0 ]]; then
+ local age_days=$(( (now - snap_epoch) / 86400 ))
+ if (( age_days > MAX_AGE )); then
+ ((stale++)) || true
+ else
+ ((protected++)) || true
+ fi
+ else
+ ((protected++)) || true
+ fi
+ else
+ ((protected++)) || true
+ fi
+ done < <(echo "$instances_json" | jq -c '.[]')
+
+ if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then
+ cat <${MAX_AGE}d):" "${YELLOW}${stale}${RESET}"
+ else
+ field_color "Stale (>${MAX_AGE}d):" "${GREEN}0${RESET}"
+ fi
+ if [[ "$unprotected" -gt 0 ]]; then
+ field_color "Unprotected:" "${RED}${unprotected}${RESET}"
+ else
+ field_color "Unprotected:" "${GREEN}0${RESET}"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# HELP
+# ══════════════════════════════════════════════════════════════════════
+show_help() {
+ cat </dev/null; then
+ missing+=("$cmd")
+ fi
+ done
+ if [[ ${#missing[@]} -gt 0 ]]; then
+ echo "ERROR: Missing required commands: ${missing[*]}" >&2
+ echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2
+ exit 1
+ fi
+}
+
+validate_config() {
+ if [[ -z "$GITEA_URL" ]]; then
+ echo "ERROR: GITEA_URL environment variable is required" >&2
+ exit 1
+ fi
+ if [[ -z "$GITEA_TOKEN" ]]; then
+ echo "ERROR: GITEA_TOKEN environment variable is required" >&2
+ exit 1
+ fi
+ # Strip trailing slash
+ GITEA_URL="${GITEA_URL%/}"
+}
+
+api_get() {
+ local endpoint="$1"
+ curl -sf --max-time "$CURL_TIMEOUT" \
+ -H "Authorization: token ${GITEA_TOKEN}" \
+ "${GITEA_URL}${endpoint}" 2>/dev/null || echo ""
+}
+
+api_get_with_headers() {
+ local endpoint="$1"
+ local response
+ response=$(curl -sD - --max-time "$CURL_TIMEOUT" \
+ -H "Authorization: token ${GITEA_TOKEN}" \
+ "${GITEA_URL}${endpoint}" 2>/dev/null) || { echo ""; return; }
+
+ local headers body
+ headers=$(echo "$response" | sed '/^\r$/q')
+ body=$(echo "$response" | sed '1,/^\r$/d')
+
+ local total_count
+ total_count=$(echo "$headers" | grep -i '^X-Total-Count:' | tr -d '\r' | awk '{print $2}')
+
+ echo "${total_count:-0}"
+ echo "$body"
+}
+
+sanitize_label() {
+ local value="$1"
+ echo "$value" | sed 's/[^a-zA-Z0-9_\/.-]/_/g'
+}
+
+add_metric() {
+ local name="$1"
+ local type="$2"
+ local help="$3"
+ local value="$4"
+ local labels="${5:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name} ${value}
+"
+ fi
+}
+
+add_metric_value() {
+ local name="$1"
+ local value="$2"
+ local labels="${3:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="${name} ${value}
+"
+ fi
+}
+
+collect_version() {
+ local version_json
+ version_json=$(api_get "/api/v1/version")
+
+ if [[ -z "$version_json" ]]; then
+ add_metric "gitea_up" "gauge" "Gitea reachability (1=up, 0=down)" "0"
+ return 1
+ fi
+
+ add_metric "gitea_up" "gauge" "Gitea reachability (1=up, 0=down)" "1"
+
+ local version
+ version=$(echo "$version_json" | jq -r '.version // empty' 2>/dev/null)
+
+ if [[ -n "$version" ]]; then
+ add_metric "gitea_version_info" "gauge" "Gitea/Forgejo version" "1" "version=\"${version}\""
+ fi
+
+ return 0
+}
+
+collect_users() {
+ local response
+ response=$(api_get_with_headers "/api/v1/admin/users?limit=1")
+
+ if [[ -z "$response" ]]; then
+ return
+ fi
+
+ local total_count
+ total_count=$(echo "$response" | head -1)
+
+ if [[ -n "$total_count" && "$total_count" != "0" ]]; then
+ add_metric "gitea_users_total" "gauge" "Total number of users" "$total_count"
+ fi
+}
+
+collect_organizations() {
+ local response
+ response=$(api_get_with_headers "/api/v1/admin/orgs?limit=1")
+
+ if [[ -z "$response" ]]; then
+ return
+ fi
+
+ local total_count
+ total_count=$(echo "$response" | head -1)
+
+ if [[ -n "$total_count" ]]; then
+ add_metric "gitea_organizations_total" "gauge" "Total number of organizations" "$total_count"
+ fi
+}
+
+collect_repositories() {
+ local response
+ response=$(api_get_with_headers "/api/v1/repos/search?limit=1")
+
+ if [[ -z "$response" ]]; then
+ return
+ fi
+
+ local total_count
+ total_count=$(echo "$response" | head -1)
+
+ if [[ -n "$total_count" ]]; then
+ add_metric "gitea_repositories_total" "gauge" "Total number of repositories" "$total_count"
+ fi
+}
+
+collect_repo_details() {
+ local page=1
+ local per_page=50
+ local collected=0
+ local first_page=true
+
+ # Add HELP/TYPE lines for per-repo metrics
+ OUTPUT+="# HELP gitea_repo_stars Number of stars for the repository
+# TYPE gitea_repo_stars gauge
+# HELP gitea_repo_forks Number of forks for the repository
+# TYPE gitea_repo_forks gauge
+# HELP gitea_repo_open_issues Number of open issues for the repository
+# TYPE gitea_repo_open_issues gauge
+# HELP gitea_repo_open_pull_requests Number of open pull requests for the repository
+# TYPE gitea_repo_open_pull_requests gauge
+# HELP gitea_repo_size_bytes Repository size in bytes
+# TYPE gitea_repo_size_bytes gauge
+# HELP gitea_repo_is_mirror Whether the repository is a mirror (1=yes, 0=no)
+# TYPE gitea_repo_is_mirror gauge
+"
+
+ while [[ $collected -lt $MAX_REPOS ]]; do
+ local remaining=$((MAX_REPOS - collected))
+ local fetch_count=$((remaining < per_page ? remaining : per_page))
+
+ local repos_json
+ repos_json=$(api_get "/api/v1/repos/search?limit=${fetch_count}&page=${page}")
+
+ if [[ -z "$repos_json" ]]; then
+ break
+ fi
+
+ local repo_count
+ repo_count=$(echo "$repos_json" | jq -r '.data | length // 0' 2>/dev/null)
+
+ if [[ "$repo_count" == "0" || -z "$repo_count" ]]; then
+ break
+ fi
+
+ local i
+ for ((i = 0; i < repo_count && collected < MAX_REPOS; i++)); do
+ local full_name stars forks open_issues size mirror has_pull_requests
+ full_name=$(echo "$repos_json" | jq -r ".data[$i].full_name // empty" 2>/dev/null)
+ stars=$(echo "$repos_json" | jq -r ".data[$i].stars_count // 0" 2>/dev/null)
+ forks=$(echo "$repos_json" | jq -r ".data[$i].forks_count // 0" 2>/dev/null)
+ open_issues=$(echo "$repos_json" | jq -r ".data[$i].open_issues_count // 0" 2>/dev/null)
+ size=$(echo "$repos_json" | jq -r ".data[$i].size // 0" 2>/dev/null)
+ mirror=$(echo "$repos_json" | jq -r ".data[$i].mirror // false" 2>/dev/null)
+ has_pull_requests=$(echo "$repos_json" | jq -r ".data[$i].has_pull_requests // true" 2>/dev/null)
+
+ if [[ -z "$full_name" ]]; then
+ continue
+ fi
+
+ local safe_name
+ safe_name=$(sanitize_label "$full_name")
+ local label="repo=\"${safe_name}\""
+
+ # Size: API returns KB, convert to bytes
+ local size_bytes=$((size * 1024))
+
+ # Mirror: convert bool to 0/1
+ local mirror_val=0
+ if [[ "$mirror" == "true" ]]; then
+ mirror_val=1
+ fi
+
+ # Open PRs: fetch from repo API if pull requests are enabled
+ local open_prs=0
+ if [[ "$has_pull_requests" == "true" ]]; then
+ local owner repo_name
+ owner=$(echo "$repos_json" | jq -r ".data[$i].owner.login // empty" 2>/dev/null)
+ repo_name=$(echo "$repos_json" | jq -r ".data[$i].name // empty" 2>/dev/null)
+ if [[ -n "$owner" && -n "$repo_name" ]]; then
+ local pr_response
+ pr_response=$(api_get_with_headers "/api/v1/repos/${owner}/${repo_name}/pulls?state=open&limit=1")
+ if [[ -n "$pr_response" ]]; then
+ open_prs=$(echo "$pr_response" | head -1)
+ fi
+ fi
+ fi
+
+ add_metric_value "gitea_repo_stars" "$stars" "$label"
+ add_metric_value "gitea_repo_forks" "$forks" "$label"
+ add_metric_value "gitea_repo_open_issues" "$open_issues" "$label"
+ add_metric_value "gitea_repo_open_pull_requests" "${open_prs:-0}" "$label"
+ add_metric_value "gitea_repo_size_bytes" "$size_bytes" "$label"
+ add_metric_value "gitea_repo_is_mirror" "$mirror_val" "$label"
+
+ collected=$((collected + 1))
+ done
+
+ # If we got fewer than requested, we've reached the end
+ if [[ $repo_count -lt $fetch_count ]]; then
+ break
+ fi
+
+ page=$((page + 1))
+ done
+}
+
+collect_runners() {
+ local runners_json
+ runners_json=$(api_get "/api/v1/admin/runners")
+
+ # Runner endpoint may 404 if Actions is not enabled — skip gracefully
+ if [[ -z "$runners_json" ]]; then
+ return
+ fi
+
+ # Validate we got a JSON array
+ local is_array
+ is_array=$(echo "$runners_json" | jq -r 'if type == "array" then "yes" else "no" end' 2>/dev/null)
+
+ if [[ "$is_array" != "yes" ]]; then
+ return
+ fi
+
+ local total online offline
+ total=$(echo "$runners_json" | jq 'length' 2>/dev/null)
+ online=$(echo "$runners_json" | jq '[.[] | select(.status == "online")] | length' 2>/dev/null)
+ offline=$(echo "$runners_json" | jq '[.[] | select(.status != "online")] | length' 2>/dev/null)
+
+ add_metric "gitea_runners_total" "gauge" "Total number of registered runners" "${total:-0}"
+ add_metric "gitea_runners_online" "gauge" "Number of online runners" "${online:-0}"
+ add_metric "gitea_runners_offline" "gauge" "Number of offline runners" "${offline:-0}"
+}
+
+write_output() {
+ if [[ "$TEXTFILE_MODE" == true ]]; then
+ local output_file="${TEXTFILE_DIR}/gitea.prom"
+ local temp_file="${output_file}.$$"
+
+ mkdir -p "$TEXTFILE_DIR"
+ echo "$OUTPUT" > "$temp_file"
+ mv "$temp_file" "$output_file"
+ else
+ echo "$OUTPUT"
+ fi
+}
+
+install_cron() {
+ if [[ $EUID -ne 0 ]]; then
+ echo "ERROR: --install requires root" >&2
+ exit 1
+ fi
+
+ local script_path
+ script_path=$(readlink -f "$0")
+
+ cat > /etc/cron.d/gitea-exporter </dev/null
+EOF
+
+ chmod 644 /etc/cron.d/gitea-exporter
+ echo "Installed cron job: /etc/cron.d/gitea-exporter"
+ echo "Metrics will be written to: ${TEXTFILE_DIR}/gitea.prom"
+}
+
+# --- Main ---
+
+main() {
+ # Parse arguments
+ for arg in "$@"; do
+ case "$arg" in
+ --textfile) TEXTFILE_MODE=true ;;
+ --install)
+ check_dependencies
+ validate_config
+ install_cron
+ exit 0
+ ;;
+ --help|-h) usage ;;
+ *) echo "Unknown option: $arg" >&2; usage ;;
+ esac
+ done
+
+ check_dependencies
+ validate_config
+
+ START_TIME=$(date +%s%N)
+
+ # Exporter info
+ add_metric "gitea_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\""
+
+ # Collect metrics
+ if collect_version; then
+ collect_users
+ collect_organizations
+ collect_repositories
+ collect_repo_details
+ collect_runners
+ fi
+
+ # Exporter performance
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ add_metric "gitea_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration"
+ add_metric "gitea_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)"
+
+ write_output
+}
+
+main "$@"
diff --git a/gitlab-docker-register-runner.sh b/gitlab-docker-register-runner.sh
new file mode 100644
index 0000000..d46ff87
--- /dev/null
+++ b/gitlab-docker-register-runner.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+###############################################################################
+# register-runner.sh — GitLab Runner registration helper
+#
+# Registers the Docker-based GitLab Runner against your GitLab CE instance.
+# Designed to work with the gitlab-docker-compose.yml stack.
+#
+# Usage:
+# ./register-runner.sh
+# ./register-runner.sh glrt-xxxxxxxxxxxxxxxxxxxx
+#
+# The runner token is obtained from:
+# Admin Area → CI/CD → Runners → New instance runner → Create runner
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# License: MIT
+###############################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+GITLAB_HOSTNAME="${GITLAB_HOSTNAME:-gitlab.local}"
+RUNNER_CONTAINER="${RUNNER_CONTAINER:-gitlab-runner}"
+RUNNER_EXECUTOR="${RUNNER_EXECUTOR:-docker}"
+RUNNER_IMAGE="${RUNNER_IMAGE:-alpine:latest}"
+RUNNER_DESCRIPTION="${RUNNER_DESCRIPTION:-docker-runner}"
+RUNNER_TAGS="${RUNNER_TAGS:-docker,linux}"
+
+# ── Colors ────────────────────────────────────────────────────────────
+if [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+else
+ RED="" GREEN="" YELLOW="" BOLD="" RESET=""
+fi
+
+log() { echo -e "${GREEN}[OK]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+
+# ── Validation ────────────────────────────────────────────────────────
+if [[ $# -lt 1 ]]; then
+ echo -e "${BOLD}Usage:${RESET} $(basename "$0") "
+ echo ""
+ echo "Get the token from: Admin Area → CI/CD → Runners → New instance runner"
+ echo ""
+ echo "Environment variables:"
+ echo " GITLAB_HOSTNAME GitLab server hostname (default: gitlab.local)"
+ echo " RUNNER_CONTAINER Runner container name (default: gitlab-runner)"
+ echo " RUNNER_EXECUTOR Executor type (default: docker)"
+ echo " RUNNER_IMAGE Default CI image (default: alpine:latest)"
+ echo " RUNNER_DESCRIPTION Runner description (default: docker-runner)"
+ echo " RUNNER_TAGS Comma-separated tags (default: docker,linux)"
+ exit 1
+fi
+
+RUNNER_TOKEN="$1"
+
+# Verify the runner container is running
+if ! docker inspect "$RUNNER_CONTAINER" &>/dev/null; then
+ err "Container '${RUNNER_CONTAINER}' not found. Is the stack running?"
+ err "Run: docker compose up -d"
+ exit 1
+fi
+
+if [[ "$(docker inspect -f '{{.State.Running}}' "$RUNNER_CONTAINER" 2>/dev/null)" != "true" ]]; then
+ err "Container '${RUNNER_CONTAINER}' is not running."
+ exit 1
+fi
+
+# ── Register ──────────────────────────────────────────────────────────
+echo -e "${BOLD}Registering GitLab Runner...${RESET}"
+echo " GitLab URL: https://${GITLAB_HOSTNAME}"
+echo " Executor: ${RUNNER_EXECUTOR}"
+echo " Default image: ${RUNNER_IMAGE}"
+echo " Tags: ${RUNNER_TAGS}"
+echo " Description: ${RUNNER_DESCRIPTION}"
+echo ""
+
+docker exec "$RUNNER_CONTAINER" gitlab-runner register \
+ --non-interactive \
+ --url "https://${GITLAB_HOSTNAME}" \
+ --token "$RUNNER_TOKEN" \
+ --executor "$RUNNER_EXECUTOR" \
+ --docker-image "$RUNNER_IMAGE" \
+ --description "$RUNNER_DESCRIPTION" \
+ --tag-list "$RUNNER_TAGS" \
+ --docker-network-mode "gitlab-net" \
+ --docker-volumes "/var/run/docker.sock:/var/run/docker.sock"
+
+echo ""
+
+# ── Verify ────────────────────────────────────────────────────────────
+if docker exec "$RUNNER_CONTAINER" gitlab-runner list 2>&1 | grep -q "$RUNNER_DESCRIPTION"; then
+ log "Runner registered successfully."
+ echo ""
+ docker exec "$RUNNER_CONTAINER" gitlab-runner list
+else
+ warn "Registration completed but runner not found in list. Check logs:"
+ echo " docker compose logs gitlab-runner"
+fi
diff --git a/gitlab-metrics-exporter.sh b/gitlab-metrics-exporter.sh
index 5b47027..b2e7c3b 100755
--- a/gitlab-metrics-exporter.sh
+++ b/gitlab-metrics-exporter.sh
@@ -6,7 +6,7 @@
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
-#### Version: 1.00-030426 ####
+#### Version: 1.01-210426 ####
################################################
set -o pipefail
@@ -557,13 +557,13 @@ collect_local_metrics() {
# GitLab version info
local version_patterns="^gitlab_version_info[{ ]"
local version_help="^# (HELP|TYPE) gitlab_version_info"
- metrics+=$(echo "$raw_metrics" | grep -E "$version_help|$version_patterns" 2>/dev/null)
+ metrics+=$(echo "$raw_metrics" | grep -E "$version_help|$version_patterns" 2>/dev/null || true)
metrics+=$'\n'
# Puma metrics
local puma_patterns="^puma_workers[{ ]|^puma_running_workers[{ ]|^puma_running[{ ]|^puma_queued_connections[{ ]|^puma_active_connections[{ ]|^puma_pool_capacity[{ ]|^puma_max_threads[{ ]|^puma_idle_threads[{ ]"
local puma_help="^# (HELP|TYPE) puma_"
- metrics+=$(echo "$raw_metrics" | grep -E "$puma_help|$puma_patterns" 2>/dev/null)
+ metrics+=$(echo "$raw_metrics" | grep -E "$puma_help|$puma_patterns" 2>/dev/null || true)
metrics+=$'\n'
# Sidekiq metrics (served by separate Sidekiq exporter, default localhost:8082)
@@ -574,37 +574,37 @@ collect_local_metrics() {
# Core Sidekiq job metrics
local sidekiq_patterns="^sidekiq_running_jobs[{ ]|^sidekiq_concurrency[{ ]|^sidekiq_mem_total_bytes[{ ]|^sidekiq_jobs_failed_total[{ ]|^sidekiq_jobs_dead_total[{ ]|^sidekiq_enqueued_jobs_total[{ ]|^sidekiq_jobs_completion_seconds[_{ ]|^sidekiq_jobs_queue_duration_seconds[_{ ]|^sidekiq_jobs_cpu_seconds[_{ ]|^sidekiq_jobs_db_seconds[_{ ]|^sidekiq_jobs_gitaly_seconds[_{ ]|^sidekiq_redis_requests_total[{ ]|^sidekiq_redis_requests_duration_seconds[_{ ]"
local sidekiq_help="^# (HELP|TYPE) sidekiq_(running_jobs|concurrency|mem_total_bytes|jobs_failed_total|jobs_dead_total|enqueued_jobs_total|jobs_completion_seconds|jobs_queue_duration_seconds|jobs_cpu_seconds|jobs_db_seconds|jobs_gitaly_seconds|redis_requests_total|redis_requests_duration_seconds)"
- metrics+=$(echo "$sidekiq_raw" | grep -E "$sidekiq_help|$sidekiq_patterns" 2>/dev/null)
+ metrics+=$(echo "$sidekiq_raw" | grep -E "$sidekiq_help|$sidekiq_patterns" 2>/dev/null || true)
metrics+=$'\n'
# CI/CD pipeline internals
local ci_patterns="^pipelines_created_total[{ ]|^deployments[{ ]|^gitlab_ci_pipeline_creation_duration_seconds[_{ ]|^gitlab_ci_pipeline_failure_reasons[{ ]|^gitlab_ci_active_jobs[_{ ]"
local ci_help="^# (HELP|TYPE) (pipelines_created_total|deployments|gitlab_ci_pipeline_creation_duration_seconds|gitlab_ci_pipeline_failure_reasons|gitlab_ci_active_jobs)"
- metrics+=$(echo "$sidekiq_raw" | grep -E "$ci_help|$ci_patterns" 2>/dev/null)
+ metrics+=$(echo "$sidekiq_raw" | grep -E "$ci_help|$ci_patterns" 2>/dev/null || true)
metrics+=$'\n'
# Email delivery metrics
local email_patterns="^gitlab_emails_delivered_total[{ ]|^gitlab_emails_delivery_attempts_total[{ ]"
local email_help="^# (HELP|TYPE) gitlab_emails_(delivered_total|delivery_attempts_total)"
- metrics+=$(echo "$sidekiq_raw" | grep -E "$email_help|$email_patterns" 2>/dev/null)
+ metrics+=$(echo "$sidekiq_raw" | grep -E "$email_help|$email_patterns" 2>/dev/null || true)
metrics+=$'\n'
# External HTTP (webhooks, integrations)
local ext_http_patterns="^gitlab_external_http_total[{ ]|^gitlab_external_http_duration_seconds[_{ ]"
local ext_http_help="^# (HELP|TYPE) gitlab_external_http_(total|duration_seconds)"
- metrics+=$(echo "$sidekiq_raw" | grep -E "$ext_http_help|$ext_http_patterns" 2>/dev/null)
+ metrics+=$(echo "$sidekiq_raw" | grep -E "$ext_http_help|$ext_http_patterns" 2>/dev/null || true)
metrics+=$'\n'
# Sidekiq SLI apdex/errors
local sli_patterns="^gitlab_sli_sidekiq_execution_apdex_success_total[{ ]|^gitlab_sli_sidekiq_execution_apdex_total[{ ]|^gitlab_sli_sidekiq_execution_error_total[{ ]|^gitlab_sli_sidekiq_execution_total[{ ]"
local sli_help="^# (HELP|TYPE) gitlab_sli_sidekiq_execution"
- metrics+=$(echo "$sidekiq_raw" | grep -E "$sli_help|$sli_patterns" 2>/dev/null)
+ metrics+=$(echo "$sidekiq_raw" | grep -E "$sli_help|$sli_patterns" 2>/dev/null || true)
metrics+=$'\n'
# DB transaction duration, primary SQL, threads, cache, workers
local extra_patterns="^gitlab_database_transaction_seconds[_{ ]|^gitlab_sql_primary_duration_seconds[_{ ]|^gitlab_ruby_threads_running_threads[{ ]|^gitlab_ruby_threads_max_expected_threads[{ ]|^limited_capacity_worker_running_jobs[{ ]|^limited_capacity_worker_max_running_jobs[{ ]|^limited_capacity_worker_remaining_work_count[{ ]|^redis_hit_miss_operations_total[{ ]"
local extra_help="^# (HELP|TYPE) (gitlab_database_transaction_seconds|gitlab_sql_primary_duration_seconds|gitlab_ruby_threads_running_threads|gitlab_ruby_threads_max_expected_threads|limited_capacity_worker_running_jobs|limited_capacity_worker_max_running_jobs|limited_capacity_worker_remaining_work_count|redis_hit_miss_operations_total)"
- metrics+=$(echo "$sidekiq_raw" | grep -E "$extra_help|$extra_patterns" 2>/dev/null)
+ metrics+=$(echo "$sidekiq_raw" | grep -E "$extra_help|$extra_patterns" 2>/dev/null || true)
metrics+=$'\n'
else
debug_echo "Warning: Could not scrape Sidekiq exporter at $GITLAB_SIDEKIQ_URL (is sidekiq_exporter enabled?)"
@@ -613,31 +613,31 @@ collect_local_metrics() {
# Redis metrics
local redis_patterns="^gitlab_redis_client_requests_total[{ ]|^gitlab_redis_client_exceptions_total[{ ]|^gitlab_redis_client_requests_duration_seconds[_{ ]|^gitlab_redis_client_requests_duration_seconds_sum[{ ]|^gitlab_redis_client_requests_duration_seconds_count[{ ]"
local redis_help="^# (HELP|TYPE) gitlab_redis_client_(requests_total|exceptions_total|requests_duration_seconds)"
- metrics+=$(echo "$raw_metrics" | grep -E "$redis_help|$redis_patterns" 2>/dev/null)
+ metrics+=$(echo "$raw_metrics" | grep -E "$redis_help|$redis_patterns" 2>/dev/null || true)
metrics+=$'\n'
# Database connection pool metrics
local db_patterns="^gitlab_database_connection_pool_"
local db_help="^# (HELP|TYPE) gitlab_database_connection_pool_"
- metrics+=$(echo "$raw_metrics" | grep -E "$db_help|$db_patterns" 2>/dev/null)
+ metrics+=$(echo "$raw_metrics" | grep -E "$db_help|$db_patterns" 2>/dev/null || true)
metrics+=$'\n'
# Process metrics (CPU, memory, file descriptors)
local process_patterns="^ruby_process_resident_memory_bytes[{ ]|^ruby_process_cpu_seconds_total[{ ]|^process_open_fds[{ ]|^process_max_fds[{ ]|^ruby_gc_stat_heap_live_slots[{ ]|^ruby_gc_stat_heap_free_slots[{ ]"
local process_help="^# (HELP|TYPE) (ruby_process_resident_memory_bytes|ruby_process_cpu_seconds_total|process_open_fds|process_max_fds|ruby_gc_stat_heap_live_slots|ruby_gc_stat_heap_free_slots)"
- metrics+=$(echo "$raw_metrics" | grep -E "$process_help|$process_patterns" 2>/dev/null)
+ metrics+=$(echo "$raw_metrics" | grep -E "$process_help|$process_patterns" 2>/dev/null || true)
metrics+=$'\n'
# GitLab transaction/request metrics
local txn_patterns="^gitlab_transaction_duration_seconds[{ _]|^gitlab_sql_duration_seconds[{ _]|^gitlab_cache_operation_duration_seconds[{ _]"
local txn_help="^# (HELP|TYPE) (gitlab_transaction_duration_seconds|gitlab_sql_duration_seconds|gitlab_cache_operation_duration_seconds)"
- metrics+=$(echo "$raw_metrics" | grep -E "$txn_help|$txn_patterns" 2>/dev/null)
+ metrics+=$(echo "$raw_metrics" | grep -E "$txn_help|$txn_patterns" 2>/dev/null || true)
metrics+=$'\n'
# User session and ActionCable metrics
local session_patterns="^user_session_logins_total[{ ]|^action_cable_active_connections[{ ]|^action_cable_pool_current_size[{ ]"
local session_help="^# (HELP|TYPE) (user_session_logins_total|action_cable_active_connections|action_cable_pool_current_size)"
- metrics+=$(echo "$raw_metrics" | grep -E "$session_help|$session_patterns" 2>/dev/null)
+ metrics+=$(echo "$raw_metrics" | grep -E "$session_help|$session_patterns" 2>/dev/null || true)
metrics+=$'\n'
local metric_count
diff --git a/gitlab-smoke-tests.ps1 b/gitlab-smoke-tests.ps1
new file mode 100644
index 0000000..735b1d7
--- /dev/null
+++ b/gitlab-smoke-tests.ps1
@@ -0,0 +1,864 @@
+###############################################################################
+# gitlab-smoke-tests.ps1 - Verify GitLab instance health after upgrades
+#
+# PowerShell port of gitlab-smoke-tests.sh. Zero external dependencies
+# beyond PowerShell 5.1+ and git. Runs on Windows, Linux, and macOS.
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# License: MIT
+# Version 1.00
+#
+# Usage:
+# $env:GITLAB_URL = "https://gitlab.example.com"
+# $env:GITLAB_TOKEN = "glpat-xxxxxxxxxxxx"
+# .\gitlab-smoke-tests.ps1
+# .\gitlab-smoke-tests.ps1 -SkipGit -SkipRegistry
+# .\gitlab-smoke-tests.ps1 -Insecure -Format junit
+# .\gitlab-smoke-tests.ps1 -Format tap
+###############################################################################
+
+[CmdletBinding()]
+param(
+ [string]$GitLabUrl = $env:GITLAB_URL,
+ [string]$GitLabToken = $env:GITLAB_TOKEN,
+ [string]$GitLabUser = $(if ($env:GITLAB_USER) { $env:GITLAB_USER } else { "root" }),
+ [string]$HealthToken = $env:GITLAB_HEALTH_TOKEN,
+ [string]$ProjectPrefix = $(if ($env:SMOKE_PROJECT_PREFIX) { $env:SMOKE_PROJECT_PREFIX } else { "smoke-test" }),
+ [int]$Timeout = $(if ($env:CURL_TIMEOUT) { [int]$env:CURL_TIMEOUT } else { 10 }),
+ [switch]$Insecure,
+ [switch]$SkipGit,
+ [switch]$SkipRegistry,
+ [switch]$SkipCleanup,
+ [ValidateSet("text","tap","junit")]
+ [string]$Format = "text",
+ [string]$JunitFile = "smoke-results.xml",
+ [switch]$NoColor
+)
+
+$ErrorActionPreference = "Continue"
+
+# ============================================================================
+# STATE
+# ============================================================================
+
+$script:Pass = 0
+$script:Fail = 0
+$script:Skip = 0
+$script:Total = 0
+$script:Results = @()
+$script:CleanupProjectId = ""
+$script:TmpDir = ""
+$script:StartTime = $null
+$script:GitCloneOk = $false
+
+# ============================================================================
+# COLORS
+# ============================================================================
+
+function Write-Color {
+ param([string]$Text, [string]$Color = "White")
+ if ($NoColor) {
+ Write-Host $Text
+ } else {
+ Write-Host $Text -ForegroundColor $Color
+ }
+}
+
+function Write-Log { param([string]$Msg) Write-Color "[INFO] $Msg" "Cyan" }
+function Write-Warn { param([string]$Msg) Write-Color "[WARN] $Msg" "Yellow" }
+function Write-Err { param([string]$Msg) Write-Color "[ERROR] $Msg" "Red" }
+
+# ============================================================================
+# TEST RESULT RECORDING
+# ============================================================================
+
+function Record-Pass {
+ param([string]$Name, [string]$Detail = "")
+ $script:Pass++
+ $script:Total++
+ $script:Results += [PSCustomObject]@{ Status="PASS"; Name=$Name; Detail=$Detail }
+ if ($Format -eq "tap") {
+ Write-Host "ok $($script:Total) - $Name"
+ } else {
+ $msg = " $(if($NoColor){'[PASS]'}else{[char]0x2713}) $Name"
+ if ($Detail) { $msg += " - $Detail" }
+ Write-Color $msg "Green"
+ }
+}
+
+function Record-Fail {
+ param([string]$Name, [string]$Detail = "")
+ $script:Fail++
+ $script:Total++
+ $script:Results += [PSCustomObject]@{ Status="FAIL"; Name=$Name; Detail=$Detail }
+ if ($Format -eq "tap") {
+ Write-Host "not ok $($script:Total) - $Name"
+ if ($Detail) { Write-Host " # $Detail" }
+ } else {
+ $msg = " $(if($NoColor){'[FAIL]'}else{[char]0x2717}) $Name"
+ if ($Detail) { $msg += " - $Detail" }
+ Write-Color $msg "Red"
+ }
+}
+
+function Record-Skip {
+ param([string]$Name, [string]$Reason = "")
+ $script:Skip++
+ $script:Total++
+ $script:Results += [PSCustomObject]@{ Status="SKIP"; Name=$Name; Detail=$Reason }
+ if ($Format -eq "tap") {
+ Write-Host "ok $($script:Total) - $Name # SKIP $Reason"
+ } else {
+ $msg = " $(if($NoColor){'[SKIP]'}else{[char]0x2298}) $Name"
+ if ($Reason) { $msg += " - $Reason" }
+ Write-Color $msg "Yellow"
+ }
+}
+
+# ============================================================================
+# HTTP HELPERS
+# ============================================================================
+
+function Invoke-GitLabApi {
+ param(
+ [string]$Method,
+ [string]$Endpoint,
+ [string]$Body = $null,
+ [switch]$StatusOnly
+ )
+
+ $uri = "$GitLabUrl/api/v4$Endpoint"
+ $headers = @{ "Content-Type" = "application/json" }
+ if ($GitLabToken) { $headers["PRIVATE-TOKEN"] = $GitLabToken }
+
+ $params = @{
+ Uri = $uri
+ Method = $Method
+ Headers = $headers
+ TimeoutSec = $Timeout
+ UseBasicParsing = $true
+ ErrorAction = "Stop"
+ }
+
+ if ($Insecure -and $PSVersionTable.PSVersion.Major -ge 7) {
+ $params["SkipCertificateCheck"] = $true
+ }
+
+ if ($Body) {
+ $params["Body"] = $Body
+ }
+
+ try {
+ if ($StatusOnly) {
+ $response = Invoke-WebRequest @params
+ return [int]$response.StatusCode
+ } else {
+ return Invoke-RestMethod @params
+ }
+ } catch {
+ if ($StatusOnly) {
+ if ($_.Exception.Response) {
+ return [int]$_.Exception.Response.StatusCode
+ }
+ return 0
+ }
+ return $null
+ }
+}
+
+function Invoke-HealthCheck {
+ param([string]$Path)
+
+ $uri = "$GitLabUrl$Path"
+ if ($HealthToken) { $uri += "?token=$HealthToken" }
+
+ $params = @{
+ Uri = $uri
+ Method = "GET"
+ TimeoutSec = $Timeout
+ UseBasicParsing = $true
+ ErrorAction = "Stop"
+ }
+
+ if ($Insecure -and $PSVersionTable.PSVersion.Major -ge 7) {
+ $params["SkipCertificateCheck"] = $true
+ }
+
+ try {
+ $response = Invoke-WebRequest @params
+ return [int]$response.StatusCode
+ } catch {
+ if ($_.Exception.Response) {
+ return [int]$_.Exception.Response.StatusCode
+ }
+ return 0
+ }
+}
+
+# ============================================================================
+# TLS HELPER
+# ============================================================================
+
+function Get-TlsCertExpiry {
+ param([string]$HostName, [int]$Port = 443)
+
+ try {
+ $tcpClient = New-Object System.Net.Sockets.TcpClient
+ $tcpClient.ReceiveTimeout = $Timeout * 1000
+ $tcpClient.SendTimeout = $Timeout * 1000
+ $tcpClient.Connect($HostName, $Port)
+
+ $sslStream = New-Object System.Net.Security.SslStream(
+ $tcpClient.GetStream(), $false,
+ { param($s,$c,$ch,$e) return $true }
+ )
+ $sslStream.AuthenticateAsClient($HostName)
+
+ $cert = $sslStream.RemoteCertificate
+ $expiry = [DateTime]$cert.GetExpirationDateString()
+
+ $sslStream.Dispose()
+ $tcpClient.Dispose()
+
+ return $expiry
+ } catch {
+ return $null
+ }
+}
+
+# ============================================================================
+# TEST SUITES
+# ============================================================================
+
+# -- 1. Connectivity --------------------------------------------------------
+
+function Test-Connectivity {
+ Write-Host ""
+ Write-Color "Connectivity" "White"
+
+ # 1a. Health endpoint
+ $code = Invoke-HealthCheck "/-/health"
+ if ($code -eq 200) {
+ Record-Pass "GitLab health endpoint reachable" "HTTP $code"
+ } else {
+ Record-Fail "GitLab health endpoint reachable" "HTTP $code"
+ }
+
+ # 1b. Readiness
+ $code = Invoke-HealthCheck "/-/readiness"
+ if ($code -eq 200) {
+ Record-Pass "GitLab readiness check" "HTTP $code"
+ } else {
+ Record-Fail "GitLab readiness check" "HTTP $code"
+ }
+
+ # 1c. Liveness
+ $code = Invoke-HealthCheck "/-/liveness"
+ if ($code -eq 200) {
+ Record-Pass "GitLab liveness check" "HTTP $code"
+ } else {
+ Record-Fail "GitLab liveness check" "HTTP $code"
+ }
+
+ # 1d. TLS certificate
+ if ($GitLabUrl -match "^https://") {
+ $hostPart = $GitLabUrl -replace "^https://", "" -replace "/.*", "" -replace ":.*", ""
+ $portPart = 443
+ if ($GitLabUrl -match ":(\d+)") { $portPart = [int]$Matches[1] }
+
+ $expiry = Get-TlsCertExpiry -HostName $hostPart -Port $portPart
+ if ($expiry) {
+ $daysLeft = [math]::Floor(($expiry - (Get-Date)).TotalDays)
+ if ($daysLeft -gt 30) {
+ Record-Pass "TLS certificate valid" "$daysLeft days remaining"
+ } elseif ($daysLeft -gt 0) {
+ Record-Pass "TLS certificate valid" "$daysLeft days remaining (renew soon)"
+ } else {
+ Record-Fail "TLS certificate valid" "expired or expiring in $daysLeft days"
+ }
+ } else {
+ Record-Skip "TLS certificate check" "could not retrieve certificate"
+ }
+ } else {
+ Record-Skip "TLS certificate check" "not using HTTPS"
+ }
+}
+
+# -- 2. API ----------------------------------------------------------------
+
+function Test-Api {
+ Write-Host ""
+ Write-Color "API" "White"
+
+ # 2a. Version
+ $versionData = Invoke-GitLabApi -Method GET -Endpoint "/version"
+ if ($versionData -and $versionData.version) {
+ Record-Pass "API version endpoint" "GitLab $($versionData.version) ($($versionData.revision))"
+ } else {
+ Record-Fail "API version endpoint" "no version returned"
+ }
+
+ # 2b. Authentication
+ $authStatus = Invoke-GitLabApi -Method GET -Endpoint "/user" -StatusOnly
+ if ($authStatus -eq 200) {
+ $userData = Invoke-GitLabApi -Method GET -Endpoint "/user"
+ Record-Pass "API authentication" "authenticated as $($userData.username)"
+ } elseif ($authStatus -eq 401) {
+ Record-Fail "API authentication" "token rejected (HTTP 401)"
+ } else {
+ Record-Fail "API authentication" "HTTP $authStatus"
+ }
+
+ # 2c. List projects
+ $projStatus = Invoke-GitLabApi -Method GET -Endpoint "/projects?per_page=1" -StatusOnly
+ if ($projStatus -eq 200) {
+ Record-Pass "API list projects" "database responding"
+ } else {
+ Record-Fail "API list projects" "HTTP $projStatus"
+ }
+
+ # 2d. List users
+ $userStatus = Invoke-GitLabApi -Method GET -Endpoint "/users?per_page=1" -StatusOnly
+ if ($userStatus -eq 200) {
+ Record-Pass "API list users" "user directory accessible"
+ } else {
+ Record-Fail "API list users" "HTTP $userStatus"
+ }
+
+ # 2e. Sidekiq
+ $sidekiq = Invoke-GitLabApi -Method GET -Endpoint "/sidekiq/compound_metrics"
+ if ($sidekiq -and -not $sidekiq.error) {
+ $procCount = 0
+ if ($sidekiq.processes) { $procCount = @($sidekiq.processes).Count }
+ Record-Pass "Sidekiq running" "$procCount process(es) responding"
+ } else {
+ Record-Fail "Sidekiq running" "could not query Sidekiq metrics"
+ }
+
+ # 2f. Runners
+ $runnerStatus = Invoke-GitLabApi -Method GET -Endpoint "/runners/all?per_page=1" -StatusOnly
+ if ($runnerStatus -eq 200) {
+ Record-Pass "API runners endpoint" "runner management accessible"
+ } elseif ($runnerStatus -eq 403) {
+ Record-Skip "API runners endpoint" "token lacks admin scope"
+ } else {
+ Record-Fail "API runners endpoint" "HTTP $runnerStatus"
+ }
+
+ # 2g. Search
+ $searchStatus = Invoke-GitLabApi -Method GET -Endpoint "/search?scope=projects&search=test" -StatusOnly
+ if ($searchStatus -eq 200) {
+ Record-Pass "API search" "search index responding"
+ } elseif ($searchStatus -eq 403) {
+ Record-Skip "API search" "search disabled or token lacks scope"
+ } else {
+ Record-Fail "API search" "HTTP $searchStatus"
+ }
+}
+
+# -- 3. Git Operations -----------------------------------------------------
+
+function Test-Git {
+ if ($SkipGit) {
+ Write-Host ""
+ Write-Color "Git Operations" "White"
+ Record-Skip "Git clone" "SkipGit specified"
+ Record-Skip "Git push" "SkipGit specified"
+ return
+ }
+
+ Write-Host ""
+ Write-Color "Git Operations" "White"
+
+ # Create test project
+ $projectName = "$ProjectPrefix-$([DateTimeOffset]::UtcNow.ToUnixTimeSeconds())"
+ $body = @{ name = $projectName; visibility = "private"; initialize_with_readme = $true } | ConvertTo-Json
+ $project = Invoke-GitLabApi -Method POST -Endpoint "/projects" -Body $body
+
+ if (-not $project -or -not $project.id) {
+ Record-Fail "Create test project" "API returned no project ID"
+ Record-Skip "Git clone" "no test project"
+ Record-Skip "Git push" "no test project"
+ return
+ }
+
+ $script:CleanupProjectId = $project.id
+ Record-Pass "Create test project" "$projectName (ID: $($project.id))"
+
+ # Build clone URL
+ $httpUrl = $project.http_url_to_repo
+ if (-not $httpUrl) {
+ $httpUrl = "$GitLabUrl/$GitLabUser/$projectName.git"
+ }
+
+ # Rewrite origin if API returns an internal hostname
+ $apiOrigin = if ($httpUrl -match "^(https?://[^/]+)") { $Matches[1] } else { "" }
+ if ($apiOrigin -and $apiOrigin -ne $GitLabUrl) {
+ $httpUrl = $httpUrl -replace [regex]::Escape($apiOrigin), $GitLabUrl
+ }
+
+ # Inject token
+ if ($httpUrl -match "^https://") {
+ $cloneUrl = $httpUrl -replace "^https://", "https://oauth2:${GitLabToken}@"
+ } elseif ($httpUrl -match "^http://") {
+ $cloneUrl = $httpUrl -replace "^http://", "http://oauth2:${GitLabToken}@"
+ } else {
+ $cloneUrl = $httpUrl
+ }
+
+ # Temp directory
+ $script:TmpDir = Join-Path ([System.IO.Path]::GetTempPath()) "gitlab-smoke-$([guid]::NewGuid().ToString('N').Substring(0,8))"
+ New-Item -ItemType Directory -Path $script:TmpDir -Force | Out-Null
+
+ # Wait for repo init
+ Start-Sleep -Seconds 2
+
+ # Clone
+ $gitArgs = @("clone")
+ if ($Insecure) { $env:GIT_SSL_NO_VERIFY = "true" }
+
+ $repoDir = Join-Path $script:TmpDir "repo"
+ $cloneOutput = & git clone $cloneUrl $repoDir 2>&1
+ $cloneRc = $LASTEXITCODE
+
+ if ($cloneRc -eq 0) {
+ $script:GitCloneOk = $true
+ Record-Pass "Git clone (HTTPS)" "Gitaly responding"
+ } else {
+ $shortErr = ($cloneOutput | Select-String -Pattern "fatal|error" | Select-Object -First 1) -replace [regex]::Escape($GitLabToken), "[REDACTED]"
+ Record-Fail "Git clone (HTTPS)" "$shortErr"
+ return
+ }
+
+ # Push
+ Push-Location $repoDir
+ try {
+ & git config user.email "smoke-test@example.com"
+ & git config user.name "Smoke Test"
+ "smoke test $(Get-Date -Format 'yyyy-MM-ddTHH:mm:ssZ')" | Out-File -FilePath "smoke-test.txt" -Encoding utf8
+
+ & git add smoke-test.txt
+ & git commit -m "smoke test commit" 2>&1 | Out-Null
+
+ $pushOutput = & git push origin main 2>&1
+ $pushRc = $LASTEXITCODE
+ if ($pushRc -ne 0) {
+ $pushOutput = & git push origin master 2>&1
+ $pushRc = $LASTEXITCODE
+ }
+
+ if ($pushRc -eq 0) {
+ Record-Pass "Git push (HTTPS)" "write to Gitaly succeeded"
+ } else {
+ Record-Fail "Git push (HTTPS)" "push failed"
+ }
+ } finally {
+ Pop-Location
+ }
+}
+
+# -- 4. Container Registry -------------------------------------------------
+
+function Test-Registry {
+ if ($SkipRegistry) {
+ Write-Host ""
+ Write-Color "Container Registry" "White"
+ Record-Skip "Registry API" "SkipRegistry specified"
+ return
+ }
+
+ Write-Host ""
+ Write-Color "Container Registry" "White"
+
+ # Check if registry is enabled
+ $registryEnabled = ""
+ $settings = Invoke-GitLabApi -Method GET -Endpoint "/application/settings"
+ if ($settings) {
+ $registryEnabled = $settings.container_registry_enabled
+ }
+
+ if ($registryEnabled -eq $false) {
+ Record-Skip "Registry API reachable" "container registry disabled in application settings"
+ Record-Skip "Registry project endpoint" "container registry disabled in application settings"
+ return
+ }
+
+ # Try registry v2 API
+ $hostPart = $GitLabUrl -replace "^https?://", "" -replace "/.*", ""
+ $registryStatus = 0
+
+ $registryUrls = @(
+ "$GitLabUrl`:5050/v2/",
+ "https://${hostPart}:5050/v2/",
+ "https://registry.${hostPart}/v2/"
+ )
+
+ foreach ($regUrl in $registryUrls) {
+ try {
+ $params = @{
+ Uri = $regUrl
+ Method = "GET"
+ TimeoutSec = $Timeout
+ UseBasicParsing = $true
+ ErrorAction = "Stop"
+ }
+ if ($Insecure -and $PSVersionTable.PSVersion.Major -ge 7) {
+ $params["SkipCertificateCheck"] = $true
+ }
+ $response = Invoke-WebRequest @params
+ $registryStatus = [int]$response.StatusCode
+ break
+ } catch {
+ if ($_.Exception.Response) {
+ $registryStatus = [int]$_.Exception.Response.StatusCode
+ if ($registryStatus -eq 401) { break }
+ }
+ }
+ }
+
+ if ($registryStatus -eq 200 -or $registryStatus -eq 401) {
+ Record-Pass "Registry API reachable" "HTTP $registryStatus"
+ } elseif ($registryStatus -eq 0) {
+ if ($registryEnabled -eq $true) {
+ Record-Fail "Registry API reachable" "enabled in settings but not reachable at standard ports/hosts"
+ } else {
+ Record-Skip "Registry API reachable" "not found at standard ports/hosts (settings unreadable - may need admin token)"
+ }
+ } else {
+ Record-Fail "Registry API reachable" "HTTP $registryStatus"
+ }
+
+ # Project-level registry
+ if ($script:CleanupProjectId) {
+ $regStatus = Invoke-GitLabApi -Method GET -Endpoint "/projects/$($script:CleanupProjectId)/registry/repositories" -StatusOnly
+ if ($regStatus -eq 200) {
+ Record-Pass "Registry project endpoint" "project registry accessible"
+ } elseif ($regStatus -eq 404) {
+ Record-Skip "Registry project endpoint" "container registry not enabled for project"
+ } else {
+ Record-Fail "Registry project endpoint" "HTTP $regStatus"
+ }
+ }
+}
+
+# -- 5. CI/CD --------------------------------------------------------------
+
+function Test-CICD {
+ Write-Host ""
+ Write-Color "CI/CD" "White"
+
+ # Runners
+ $runners = Invoke-GitLabApi -Method GET -Endpoint "/runners/all?per_page=100"
+ if ($runners -is [array]) {
+ $runnerCount = $runners.Count
+ $onlineCount = @($runners | Where-Object { $_.status -eq "online" }).Count
+
+ if ($onlineCount -gt 0) {
+ Record-Pass "CI/CD runners online" "$onlineCount/$runnerCount runners online"
+ } elseif ($runnerCount -gt 0) {
+ Record-Fail "CI/CD runners online" "0/$runnerCount runners online"
+ } else {
+ Record-Skip "CI/CD runners online" "no runners registered"
+ }
+ } else {
+ Record-Skip "CI/CD runners" "could not query runners (admin token required)"
+ }
+
+ # CI/CD settings
+ $cicdStatus = Invoke-GitLabApi -Method GET -Endpoint "/application/settings" -StatusOnly
+ if ($cicdStatus -eq 200) {
+ Record-Pass "CI/CD settings accessible" "application settings readable"
+ } elseif ($cicdStatus -eq 403) {
+ Record-Skip "CI/CD settings accessible" "admin token required"
+ } else {
+ Record-Fail "CI/CD settings accessible" "HTTP $cicdStatus"
+ }
+}
+
+# -- 6. Background Migrations ----------------------------------------------
+
+function Test-Migrations {
+ Write-Host ""
+ Write-Color "Background Migrations" "White"
+
+ $migrations = Invoke-GitLabApi -Method GET -Endpoint "/admin/batched_background_migrations?database=main"
+
+ if ($migrations -is [array]) {
+ $totalMig = $migrations.Count
+ $failedMig = @($migrations | Where-Object { $_.status -eq "failed" }).Count
+ $activeMig = @($migrations | Where-Object { $_.status -eq "active" }).Count
+ $pausedMig = @($migrations | Where-Object { $_.status -eq "paused" }).Count
+ $finishedMig = @($migrations | Where-Object { $_.status -eq "finished" }).Count
+
+ if ($failedMig -gt 0) {
+ Record-Fail "Background migrations" "$failedMig failed, $activeMig active, $pausedMig paused, $finishedMig finished of $totalMig"
+ } elseif ($pausedMig -gt 0) {
+ Record-Fail "Background migrations" "$pausedMig paused, $activeMig active, $finishedMig finished of $totalMig"
+ } elseif ($activeMig -gt 0) {
+ Record-Pass "Background migrations" "$activeMig active, $finishedMig finished of $totalMig (in progress)"
+ } else {
+ Record-Pass "Background migrations" "all $totalMig finished"
+ }
+ } else {
+ $migStatus = Invoke-GitLabApi -Method GET -Endpoint "/admin/batched_background_migrations?database=main" -StatusOnly
+ if ($migStatus -eq 403) {
+ Record-Skip "Background migrations" "admin token required"
+ } else {
+ Record-Skip "Background migrations" "could not query (HTTP $migStatus)"
+ }
+ }
+}
+
+# -- 7. Components ---------------------------------------------------------
+
+function Test-Components {
+ Write-Host ""
+ Write-Color "Components" "White"
+
+ # Metadata
+ $metadata = Invoke-GitLabApi -Method GET -Endpoint "/metadata"
+ if ($metadata -and $metadata.version) {
+ $edition = if ($metadata.enterprise -eq $true) { "EE" } else { "CE" }
+ Record-Pass "GitLab metadata" "$($metadata.version) $edition"
+ } elseif ($metadata) {
+ Record-Pass "GitLab metadata" "endpoint reachable"
+ } else {
+ Record-Skip "GitLab metadata" "metadata endpoint not available"
+ }
+
+ # Statistics
+ $stats = Invoke-GitLabApi -Method GET -Endpoint "/application/statistics"
+ if ($stats -and $stats.active_users) {
+ Record-Pass "Instance statistics" "$($stats.active_users) users, $($stats.projects) projects, $($stats.groups) groups"
+ } elseif ($stats) {
+ Record-Pass "Instance statistics" "endpoint reachable"
+ } else {
+ Record-Skip "Instance statistics" "admin token required"
+ }
+
+ # Gitaly (inferred)
+ if ($script:GitCloneOk) {
+ Record-Pass "Gitaly storage" "project created and cloned successfully"
+ } elseif ($script:CleanupProjectId) {
+ Record-Skip "Gitaly storage" "project created but clone was not tested or failed"
+ }
+
+ # PostgreSQL (inferred)
+ $pgStatus = Invoke-GitLabApi -Method GET -Endpoint "/projects?per_page=1&order_by=updated_at" -StatusOnly
+ if ($pgStatus -eq 200) {
+ Record-Pass "PostgreSQL" "database queries succeeding"
+ } else {
+ Record-Fail "PostgreSQL" "sorted query failed (HTTP $pgStatus)"
+ }
+
+ # Redis (inferred)
+ $redisStatus = Invoke-GitLabApi -Method GET -Endpoint "/user" -StatusOnly
+ if ($redisStatus -eq 200) {
+ Record-Pass "Redis" "session/cache operational (auth succeeded)"
+ } else {
+ Record-Skip "Redis" "cannot verify independently"
+ }
+}
+
+# ============================================================================
+# OUTPUT
+# ============================================================================
+
+function Write-Summary {
+ $duration = [math]::Floor(((Get-Date) - $script:StartTime).TotalSeconds)
+
+ Write-Host ""
+ $separator = [string]::new([char]0x2500, 40)
+ Write-Color $separator "White"
+ Write-Color "Summary $GitLabUrl" "White"
+
+ $summaryLine = " $($script:Pass) passed $($script:Fail) failed $($script:Skip) skipped (${duration}s)"
+ Write-Host $summaryLine
+ Write-Color $separator "White"
+
+ if ($script:Fail -eq 0) {
+ Write-Color "All tests passed." "Green"
+ } else {
+ Write-Color "$($script:Fail) test(s) failed." "Red"
+ }
+}
+
+function Write-TapHeader {
+ Write-Host "TAP version 13"
+}
+
+function Write-TapFooter {
+ Write-Host "1..$($script:Total)"
+ Write-Host "# pass $($script:Pass)"
+ Write-Host "# fail $($script:Fail)"
+ Write-Host "# skip $($script:Skip)"
+}
+
+function Write-JunitReport {
+ $duration = [math]::Floor(((Get-Date) - $script:StartTime).TotalSeconds)
+
+ $xml = @"
+
+
+
+"@
+
+ foreach ($r in $script:Results) {
+ $safeName = $r.Name -replace '&','&' -replace '<','<' -replace '>','>' -replace '"','"'
+ $safeDetail = $r.Detail -replace '&','&' -replace '<','<' -replace '>','>' -replace '"','"'
+
+ switch ($r.Status) {
+ "PASS" {
+ $xml += "`n "
+ if ($r.Detail) { $xml += "`n $safeDetail" }
+ $xml += "`n "
+ }
+ "FAIL" {
+ $xml += "`n "
+ $xml += "`n FAILED: $safeName - $safeDetail"
+ $xml += "`n "
+ }
+ "SKIP" {
+ $xml += "`n "
+ $xml += "`n "
+ $xml += "`n "
+ }
+ }
+ }
+
+ $xml += "`n "
+ $xml += "`n"
+
+ $xml | Out-File -FilePath $JunitFile -Encoding utf8
+ Write-Log "JUnit report written to $JunitFile"
+}
+
+# ============================================================================
+# CLEANUP
+# ============================================================================
+
+function Invoke-Cleanup {
+ if ($script:CleanupProjectId -and -not $SkipCleanup) {
+ try {
+ Invoke-GitLabApi -Method DELETE -Endpoint "/projects/$($script:CleanupProjectId)" | Out-Null
+ } catch { }
+ }
+
+ if ($script:TmpDir -and (Test-Path $script:TmpDir)) {
+ Remove-Item -Recurse -Force $script:TmpDir -ErrorAction SilentlyContinue
+ }
+
+ if ($env:GIT_SSL_NO_VERIFY) {
+ Remove-Item Env:\GIT_SSL_NO_VERIFY -ErrorAction SilentlyContinue
+ }
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+function Show-Usage {
+ @"
+Usage: .\gitlab-smoke-tests.ps1 [OPTIONS]
+
+Smoke-test a GitLab instance. PowerShell 5.1+, git only.
+Designed for air-gapped environments.
+
+Required environment variables:
+ GITLAB_URL GitLab base URL (https://gitlab.example.com)
+ GITLAB_TOKEN Personal access token (api scope; admin for full coverage)
+
+Optional environment variables:
+ GITLAB_HEALTH_TOKEN Health check access token
+ GITLAB_USER Username for git operations (default: root)
+
+Parameters:
+ -SkipGit Skip git clone/push tests
+ -SkipRegistry Skip container registry tests
+ -SkipCleanup Don't delete the test project after run
+ -Insecure Allow self-signed TLS certificates
+ -Timeout N HTTP timeout in seconds (default: 10)
+ -Format FORMAT Output: text (default), tap, junit
+ -JunitFile FILE JUnit output path (default: smoke-results.xml)
+ -NoColor Disable colored output
+ -Verbose Show debug output
+
+Examples:
+ `$env:GITLAB_URL = "https://gitlab.example.com"
+ `$env:GITLAB_TOKEN = "glpat-xxxxxxxxxxxx"
+ .\gitlab-smoke-tests.ps1
+
+ .\gitlab-smoke-tests.ps1 -Insecure -Format junit
+ .\gitlab-smoke-tests.ps1 -SkipGit -SkipRegistry
+ .\gitlab-smoke-tests.ps1 -Format tap
+"@
+}
+
+# Handle PS 5.1 TLS and self-signed certs
+if ($PSVersionTable.PSVersion.Major -lt 7) {
+ [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
+
+ if ($Insecure) {
+ Add-Type @"
+using System.Net;
+using System.Security.Cryptography.X509Certificates;
+public class TrustAll : ICertificatePolicy {
+ public bool CheckValidationResult(ServicePoint sp, X509Certificate cert,
+ WebRequest req, int problem) { return true; }
+}
+"@ -ErrorAction SilentlyContinue
+ [System.Net.ServicePointManager]::CertificatePolicy = New-Object TrustAll
+ }
+}
+
+# Validate
+if (-not $GitLabUrl) {
+ Write-Err "GITLAB_URL is required"
+ Write-Host ""
+ Show-Usage
+ exit 1
+}
+
+if (-not $GitLabToken) {
+ Write-Err "GITLAB_TOKEN is required"
+ Write-Host ""
+ Show-Usage
+ exit 1
+}
+
+$GitLabUrl = $GitLabUrl.TrimEnd("/")
+$script:StartTime = Get-Date
+
+if ($Format -eq "tap") {
+ Write-TapHeader
+} else {
+ Write-Host ""
+ Write-Color "GitLab Smoke Tests" "White"
+ Write-Host "Target: $GitLabUrl"
+ Write-Host "Time: $(Get-Date -Format 'yyyy-MM-ddTHH:mm:ssZ')"
+ Write-Host ""
+}
+
+try {
+ Test-Connectivity
+ Test-Api
+ Test-Git
+ Test-Registry
+ Test-CICD
+ Test-Migrations
+ Test-Components
+} finally {
+ Invoke-Cleanup
+}
+
+if ($Format -eq "tap") {
+ Write-TapFooter
+} elseif ($Format -eq "junit") {
+ Write-Summary
+ Write-JunitReport
+} else {
+ Write-Summary
+}
+
+if ($script:Fail -eq 0) { exit 0 } else { exit 1 }
diff --git a/gitlab-smoke-tests.sh b/gitlab-smoke-tests.sh
new file mode 100644
index 0000000..cffd7b3
--- /dev/null
+++ b/gitlab-smoke-tests.sh
@@ -0,0 +1,862 @@
+#!/usr/bin/env bash
+
+#########################################################################################
+#### gitlab-smoke-tests.sh — Verify GitLab instance health after upgrades or changes ####
+#### Zero external dependencies. Runs in air-gapped environments. ####
+#### Requires: bash 4+, curl, git, openssl (optional) ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.02 ####
+#### ####
+#### Usage: ####
+#### export GITLAB_URL="https://gitlab.example.com" ####
+#### export GITLAB_TOKEN="glpat-xxxxxxxxxxxxxxxxxxxx" ####
+#### export GITLAB_HEALTH_TOKEN="your-health-token" # optional ####
+#### ./gitlab-smoke-tests.sh ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+
+set -euo pipefail
+
+# ── Defaults ──────────────────────────────────────────────────────────
+GITLAB_URL="${GITLAB_URL:-}"
+GITLAB_TOKEN="${GITLAB_TOKEN:-}"
+GITLAB_USER="${GITLAB_USER:-root}"
+SMOKE_PROJECT_PREFIX="${SMOKE_PROJECT_PREFIX:-smoke-test}"
+CURL_TIMEOUT="${CURL_TIMEOUT:-10}"
+CURL_INSECURE="${CURL_INSECURE:-false}"
+SKIP_GIT="${SKIP_GIT:-false}"
+SKIP_REGISTRY="${SKIP_REGISTRY:-false}"
+SKIP_CLEANUP="${SKIP_CLEANUP:-false}"
+GITLAB_HEALTH_TOKEN="${GITLAB_HEALTH_TOKEN:-}"
+OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}" # text, tap, junit
+JUNIT_FILE="${JUNIT_FILE:-smoke-results.xml}"
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# ── State ─────────────────────────────────────────────────────────────
+PASS=0
+FAIL=0
+SKIP=0
+TOTAL=0
+RESULTS=()
+CLEANUP_PROJECT_ID=""
+TMPDIR_SMOKE=""
+START_TIME=""
+GIT_CLONE_OK="false"
+
+# ── Colors ────────────────────────────────────────────────────────────
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[0;33m'
+ BLUE='\033[0;34m'
+ BOLD='\033[1m'
+ RESET='\033[0m'
+ else
+ RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET=""
+ fi
+}
+
+# ── Logging ───────────────────────────────────────────────────────────
+log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
+err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
+verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; }
+
+# ── Test Result Recording ─────────────────────────────────────────────
+record_pass() {
+ local name="$1"
+ local detail="${2:-}"
+ ((PASS++)) || true
+ ((TOTAL++)) || true
+ RESULTS+=("PASS|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "ok ${TOTAL} - ${name}"
+ else
+ echo -e " ${GREEN}✓${RESET} ${name}${detail:+ — ${detail}}"
+ fi
+}
+
+record_fail() {
+ local name="$1"
+ local detail="${2:-}"
+ ((FAIL++)) || true
+ ((TOTAL++)) || true
+ RESULTS+=("FAIL|${name}|${detail}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "not ok ${TOTAL} - ${name}"
+ [[ -n "$detail" ]] && echo " # ${detail}"
+ else
+ echo -e " ${RED}✗${RESET} ${name}${detail:+ — ${detail}}"
+ fi
+}
+
+record_skip() {
+ local name="$1"
+ local reason="${2:-}"
+ ((SKIP++)) || true
+ ((TOTAL++)) || true
+ RESULTS+=("SKIP|${name}|${reason}")
+ if [[ "$OUTPUT_FORMAT" == "tap" ]]; then
+ echo "ok ${TOTAL} - ${name} # SKIP ${reason}"
+ else
+ echo -e " ${YELLOW}⊘${RESET} ${name}${reason:+ — ${reason}}"
+ fi
+}
+
+# ── curl wrapper ──────────────────────────────────────────────────────
+api_curl() {
+ local method="$1"
+ local endpoint="$2"
+ shift 2
+ local curl_opts=(-s -S --max-time "$CURL_TIMEOUT" -X "$method")
+
+ [[ "$CURL_INSECURE" == "true" ]] && curl_opts+=(-k)
+ [[ -n "$GITLAB_TOKEN" ]] && curl_opts+=(-H "PRIVATE-TOKEN: ${GITLAB_TOKEN}")
+ curl_opts+=(-H "Content-Type: application/json")
+
+ local url="${GITLAB_URL}/api/v4${endpoint}"
+ verbose "curl ${method} ${url} $*"
+
+ curl "${curl_opts[@]}" "$@" "$url" 2>/dev/null
+}
+
+api_curl_status() {
+ local method="$1"
+ local endpoint="$2"
+ shift 2
+ local curl_opts=(-s -S -o /dev/null -w "%{http_code}" --max-time "$CURL_TIMEOUT" -X "$method")
+
+ [[ "$CURL_INSECURE" == "true" ]] && curl_opts+=(-k)
+ [[ -n "$GITLAB_TOKEN" ]] && curl_opts+=(-H "PRIVATE-TOKEN: ${GITLAB_TOKEN}")
+ curl_opts+=(-H "Content-Type: application/json")
+
+ local url="${GITLAB_URL}/api/v4${endpoint}"
+ curl "${curl_opts[@]}" "$@" "$url" 2>/dev/null
+}
+
+# ── JSON parsing (no jq required) ────────────────────────────────────
+# Extract a top-level string/number value from flat JSON
+json_value() {
+ local key="$1"
+ local json="$2"
+ echo "$json" | { grep -oP "\"${key}\"\s*:\s*\"?\K[^\",}]+" || true; } | head -1
+}
+
+json_value_string() {
+ local key="$1"
+ local json="$2"
+ echo "$json" | { grep -oP "\"${key}\"\s*:\s*\"\K[^\"]*" || true; } | head -1
+}
+
+# ── Cleanup ───────────────────────────────────────────────────────────
+cleanup() {
+ if [[ -n "$CLEANUP_PROJECT_ID" && "$SKIP_CLEANUP" != "true" ]]; then
+ verbose "Cleaning up smoke test project (ID: ${CLEANUP_PROJECT_ID})"
+ api_curl DELETE "/projects/${CLEANUP_PROJECT_ID}" >/dev/null 2>&1 || true
+ fi
+ if [[ -n "$TMPDIR_SMOKE" && -d "$TMPDIR_SMOKE" ]]; then
+ rm -rf "$TMPDIR_SMOKE"
+ fi
+}
+
+trap cleanup EXIT
+
+# ══════════════════════════════════════════════════════════════════════
+# TEST SUITES
+# ══════════════════════════════════════════════════════════════════════
+
+# ── 1. Connectivity ──────────────────────────────────────────────────
+test_connectivity() {
+ echo ""
+ echo -e "${BOLD}Connectivity${RESET}"
+
+ # 1a. HTTP(S) reachable
+ local curl_opts=(-s -o /dev/null -w "%{http_code}" --max-time "$CURL_TIMEOUT")
+ [[ "$CURL_INSECURE" == "true" ]] && curl_opts+=(-k)
+
+ local health_qs=""
+ [[ -n "$GITLAB_HEALTH_TOKEN" ]] && health_qs="?token=${GITLAB_HEALTH_TOKEN}"
+
+ local http_code
+ http_code=$(curl "${curl_opts[@]}" "${GITLAB_URL}/-/health${health_qs}" 2>/dev/null) || http_code="000"
+
+ if [[ "$http_code" == "200" ]]; then
+ record_pass "GitLab health endpoint reachable" "HTTP ${http_code}"
+ else
+ record_fail "GitLab health endpoint reachable" "HTTP ${http_code}"
+ fi
+
+ # 1b. Readiness check
+ http_code=$(curl "${curl_opts[@]}" "${GITLAB_URL}/-/readiness${health_qs}" 2>/dev/null) || http_code="000"
+ if [[ "$http_code" == "200" ]]; then
+ record_pass "GitLab readiness check" "HTTP ${http_code}"
+ else
+ record_fail "GitLab readiness check" "HTTP ${http_code}"
+ fi
+
+ # 1c. Liveness check
+ http_code=$(curl "${curl_opts[@]}" "${GITLAB_URL}/-/liveness${health_qs}" 2>/dev/null) || http_code="000"
+ if [[ "$http_code" == "200" ]]; then
+ record_pass "GitLab liveness check" "HTTP ${http_code}"
+ else
+ record_fail "GitLab liveness check" "HTTP ${http_code}"
+ fi
+
+ # 1d. TLS certificate validity (if HTTPS)
+ if [[ "$GITLAB_URL" == https://* ]]; then
+ local host
+ host=$(echo "$GITLAB_URL" | sed 's|https://||' | cut -d/ -f1 | cut -d: -f1)
+ local port
+ port=$(echo "$GITLAB_URL" | grep -oP ':\K[0-9]+$' || echo "443")
+
+ local expiry
+ expiry=$(echo | openssl s_client -servername "$host" -connect "${host}:${port}" 2>/dev/null | \
+ openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2) || expiry=""
+
+ if [[ -n "$expiry" ]]; then
+ local expiry_epoch
+ expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null) || expiry_epoch=0
+ local now_epoch
+ now_epoch=$(date +%s)
+ local days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
+
+ if [[ $days_left -gt 30 ]]; then
+ record_pass "TLS certificate valid" "${days_left} days remaining"
+ elif [[ $days_left -gt 0 ]]; then
+ record_pass "TLS certificate valid" "${days_left} days remaining (renew soon)"
+ else
+ record_fail "TLS certificate valid" "expired or expiring in ${days_left} days"
+ fi
+ else
+ record_skip "TLS certificate check" "could not retrieve certificate"
+ fi
+ else
+ record_skip "TLS certificate check" "not using HTTPS"
+ fi
+}
+
+# ── 2. API ────────────────────────────────────────────────────────────
+test_api() {
+ echo ""
+ echo -e "${BOLD}API${RESET}"
+
+ # 2a. Version endpoint
+ local version_json
+ version_json=$(api_curl GET "/version" 2>/dev/null) || version_json=""
+
+ local gl_version
+ gl_version=$(json_value_string "version" "$version_json")
+ local gl_revision
+ gl_revision=$(json_value_string "revision" "$version_json")
+
+ if [[ -n "$gl_version" ]]; then
+ record_pass "API version endpoint" "GitLab ${gl_version} (${gl_revision})"
+ else
+ record_fail "API version endpoint" "no version returned"
+ fi
+
+ # 2b. Authentication
+ local auth_status
+ auth_status=$(api_curl_status GET "/user")
+ if [[ "$auth_status" == "200" ]]; then
+ local user_json
+ user_json=$(api_curl GET "/user")
+ local username
+ username=$(json_value_string "username" "$user_json")
+ record_pass "API authentication" "authenticated as ${username}"
+ elif [[ "$auth_status" == "401" ]]; then
+ record_fail "API authentication" "token rejected (HTTP 401)"
+ else
+ record_fail "API authentication" "HTTP ${auth_status}"
+ fi
+
+ # 2c. List projects (verify database queries work)
+ local projects_status
+ projects_status=$(api_curl_status GET "/projects?per_page=1")
+ if [[ "$projects_status" == "200" ]]; then
+ record_pass "API list projects" "database responding"
+ else
+ record_fail "API list projects" "HTTP ${projects_status}"
+ fi
+
+ # 2d. List users
+ local users_status
+ users_status=$(api_curl_status GET "/users?per_page=1")
+ if [[ "$users_status" == "200" ]]; then
+ record_pass "API list users" "user directory accessible"
+ else
+ record_fail "API list users" "HTTP ${users_status}"
+ fi
+
+ # 2e. Sidekiq health (job processing)
+ local sidekiq_json
+ sidekiq_json=$(api_curl GET "/sidekiq/compound_metrics" 2>/dev/null) || sidekiq_json=""
+
+ if [[ -n "$sidekiq_json" && "$sidekiq_json" != *"error"* ]]; then
+ local processes
+ processes=$(echo "$sidekiq_json" | { grep -oP '"hostname"\s*:' || true; } | wc -l)
+ record_pass "Sidekiq running" "${processes} process(es) responding"
+ else
+ record_fail "Sidekiq running" "could not query Sidekiq metrics"
+ fi
+
+ # 2f. Runners endpoint
+ local runners_status
+ runners_status=$(api_curl_status GET "/runners/all?per_page=1")
+ if [[ "$runners_status" == "200" ]]; then
+ record_pass "API runners endpoint" "runner management accessible"
+ elif [[ "$runners_status" == "403" ]]; then
+ record_skip "API runners endpoint" "token lacks admin scope"
+ else
+ record_fail "API runners endpoint" "HTTP ${runners_status}"
+ fi
+
+ # 2g. Search endpoint
+ local search_status
+ search_status=$(api_curl_status GET "/search?scope=projects&search=test")
+ if [[ "$search_status" == "200" ]]; then
+ record_pass "API search" "search index responding"
+ elif [[ "$search_status" == "403" ]]; then
+ record_skip "API search" "search disabled or token lacks scope"
+ else
+ record_fail "API search" "HTTP ${search_status}"
+ fi
+}
+
+# ── 3. Git Operations ────────────────────────────────────────────────
+test_git() {
+ if [[ "$SKIP_GIT" == "true" ]]; then
+ echo ""
+ echo -e "${BOLD}Git Operations${RESET}"
+ record_skip "Git clone" "SKIP_GIT=true"
+ record_skip "Git push" "SKIP_GIT=true"
+ return
+ fi
+
+ echo ""
+ echo -e "${BOLD}Git Operations${RESET}"
+
+ # Create a test project via API
+ local project_name
+ project_name="${SMOKE_PROJECT_PREFIX}-$(date +%s)"
+ local create_json
+ create_json=$(api_curl POST "/projects" -d "{\"name\":\"${project_name}\",\"visibility\":\"private\",\"initialize_with_readme\":true}")
+
+ local project_id
+ project_id=$(json_value "id" "$create_json")
+ local http_url
+ http_url=$(json_value_string "http_url_to_repo" "$create_json")
+
+ if [[ -z "$project_id" || "$project_id" == "null" ]]; then
+ record_fail "Create test project" "API returned: $(echo "$create_json" | head -c 200)"
+ record_skip "Git clone" "no test project"
+ record_skip "Git push" "no test project"
+ return
+ fi
+
+ CLEANUP_PROJECT_ID="$project_id"
+ record_pass "Create test project" "${project_name} (ID: ${project_id})"
+
+ # Clone
+ TMPDIR_SMOKE=$(mktemp -d)
+
+ # Fallback: if http_url_to_repo wasn't parsed, construct it
+ if [[ -z "$http_url" ]]; then
+ http_url="${GITLAB_URL}/${GITLAB_USER}/${project_name}.git"
+ verbose "http_url_to_repo not found in API response, constructed: ${http_url}"
+ fi
+ verbose "Clone URL (from API): ${http_url}"
+
+ # Replace the hostname in the API-returned URL with GITLAB_URL
+ # (the API may return an internal hostname that's unreachable remotely)
+ local api_origin
+ api_origin=$(echo "$http_url" | grep -oP 'https?://[^/]+')
+ if [[ -n "$api_origin" && "$api_origin" != "$GITLAB_URL" ]]; then
+ http_url="${http_url/$api_origin/$GITLAB_URL}"
+ verbose "Rewrote clone URL to: ${http_url}"
+ fi
+
+ local clone_url
+ # Inject token into URL for HTTPS clone
+ if [[ "$http_url" == https://* ]]; then
+ clone_url="https://oauth2:${GITLAB_TOKEN}@${http_url#https://}"
+ elif [[ "$http_url" == http://* ]]; then
+ clone_url="http://oauth2:${GITLAB_TOKEN}@${http_url#http://}"
+ else
+ clone_url="$http_url"
+ fi
+
+ local git_opts=()
+ [[ "$CURL_INSECURE" == "true" ]] && git_opts+=(-c http.sslVerify=false)
+
+ # Brief wait for repository initialization (initialize_with_readme is async)
+ sleep 2
+
+ verbose "Running: git clone ${TMPDIR_SMOKE}/repo"
+ local clone_err clone_rc
+ clone_err=$(git ${git_opts[@]+"${git_opts[@]}"} clone "$clone_url" "${TMPDIR_SMOKE}/repo" 2>&1) && clone_rc=0 || clone_rc=$?
+ if [[ "$clone_rc" -eq 0 ]]; then
+ GIT_CLONE_OK="true"
+ record_pass "Git clone (HTTPS)" "Gitaly responding"
+ else
+ local short_err redacted_url
+ short_err=$(echo "$clone_err" | grep -i -E 'fatal|error' | head -1 | sed "s|${GITLAB_TOKEN}|[REDACTED]|g")
+ redacted_url=$(echo "$http_url" | sed "s|${GITLAB_TOKEN}|[REDACTED]|g")
+ verbose "Full clone output: $(echo "$clone_err" | sed "s|${GITLAB_TOKEN}|[REDACTED]|g")"
+ local redacted_clone
+ redacted_clone=$(echo "$clone_url" | sed "s|${GITLAB_TOKEN}|[REDACTED]|g")
+ verbose "Attempted URL: ${redacted_clone}"
+ record_fail "Git clone (HTTPS)" "${short_err:-clone failed (exit $clone_rc)}"
+ return
+ fi
+
+ # Push a commit
+ pushd "${TMPDIR_SMOKE}/repo" >/dev/null
+ git config user.email "smoke-test@example.com"
+ git config user.name "Smoke Test"
+ echo "smoke test $(date -u +%Y-%m-%dT%H:%M:%SZ)" > smoke-test.txt
+ git add smoke-test.txt
+ git commit -m "smoke test commit" >/dev/null 2>&1
+
+ if git ${git_opts[@]+"${git_opts[@]}"} push origin main >/dev/null 2>&1 || \
+ git ${git_opts[@]+"${git_opts[@]}"} push origin master >/dev/null 2>&1; then
+ record_pass "Git push (HTTPS)" "write to Gitaly succeeded"
+ else
+ record_fail "Git push (HTTPS)" "push failed"
+ fi
+ popd >/dev/null
+}
+
+# ── 4. Container Registry ────────────────────────────────────────────
+test_registry() {
+ if [[ "$SKIP_REGISTRY" == "true" ]]; then
+ echo ""
+ echo -e "${BOLD}Container Registry${RESET}"
+ record_skip "Registry API" "SKIP_REGISTRY=true"
+ return
+ fi
+
+ echo ""
+ echo -e "${BOLD}Container Registry${RESET}"
+
+ # Check if registry is enabled via application settings API
+ local registry_enabled=""
+ local settings_json
+ settings_json=$(api_curl GET "/application/settings" 2>/dev/null) || settings_json=""
+
+ if [[ -n "$settings_json" ]]; then
+ registry_enabled=$(json_value "container_registry_enabled" "$settings_json" 2>/dev/null || echo "")
+ fi
+
+ if [[ "$registry_enabled" == "false" ]]; then
+ record_skip "Registry API reachable" "container registry disabled in application settings"
+ record_skip "Registry project endpoint" "container registry disabled in application settings"
+ return
+ fi
+
+ # Try the registry v2 API endpoint
+ local host
+ host=$(echo "$GITLAB_URL" | sed 's|https\?://||' | cut -d/ -f1)
+
+ local curl_opts=(-s -o /dev/null -w "%{http_code}" --max-time "$CURL_TIMEOUT")
+ [[ "$CURL_INSECURE" == "true" ]] && curl_opts+=(-k)
+
+ local registry_status
+ registry_status=$(curl "${curl_opts[@]}" "${GITLAB_URL}:5050/v2/" 2>/dev/null) || \
+ registry_status=$(curl "${curl_opts[@]}" "https://${host}:5050/v2/" 2>/dev/null) || \
+ registry_status=$(curl "${curl_opts[@]}" "https://registry.${host}/v2/" 2>/dev/null) || \
+ registry_status="000"
+
+ if [[ "$registry_status" == "200" || "$registry_status" == "401" ]]; then
+ record_pass "Registry API reachable" "HTTP ${registry_status}"
+ elif [[ "$registry_status" == "000" ]]; then
+ if [[ "$registry_enabled" == "true" ]]; then
+ record_fail "Registry API reachable" "enabled in settings but not reachable at standard ports/hosts"
+ else
+ record_skip "Registry API reachable" "not found at standard ports/hosts (settings unreadable — may need admin token)"
+ fi
+ else
+ record_fail "Registry API reachable" "HTTP ${registry_status}"
+ fi
+
+ # Check registry via GitLab API (project-level)
+ if [[ -n "$CLEANUP_PROJECT_ID" ]]; then
+ local reg_status
+ reg_status=$(api_curl_status GET "/projects/${CLEANUP_PROJECT_ID}/registry/repositories")
+ if [[ "$reg_status" == "200" ]]; then
+ record_pass "Registry project endpoint" "project registry accessible"
+ elif [[ "$reg_status" == "404" ]]; then
+ record_skip "Registry project endpoint" "container registry not enabled for project"
+ else
+ record_fail "Registry project endpoint" "HTTP ${reg_status}"
+ fi
+ fi
+}
+
+# ── 5. CI/CD ──────────────────────────────────────────────────────────
+test_cicd() {
+ echo ""
+ echo -e "${BOLD}CI/CD${RESET}"
+
+ # Check runners
+ local runners_json
+ runners_json=$(api_curl GET "/runners/all?per_page=100" 2>/dev/null) || runners_json=""
+
+ if [[ "$runners_json" == "["* ]]; then
+ local runner_count
+ runner_count=$(echo "$runners_json" | { grep -oP '"id"\s*:' || true; } | wc -l)
+ local online_count
+ online_count=$(echo "$runners_json" | { grep -oP '"status"\s*:\s*"online"' || true; } | wc -l)
+
+ if [[ $online_count -gt 0 ]]; then
+ record_pass "CI/CD runners online" "${online_count}/${runner_count} runners online"
+ elif [[ $runner_count -gt 0 ]]; then
+ record_fail "CI/CD runners online" "0/${runner_count} runners online"
+ else
+ record_skip "CI/CD runners online" "no runners registered"
+ fi
+ else
+ record_skip "CI/CD runners" "could not query runners (admin token required)"
+ fi
+
+ # Check CI/CD settings via API
+ local cicd_status
+ cicd_status=$(api_curl_status GET "/application/settings")
+ if [[ "$cicd_status" == "200" ]]; then
+ record_pass "CI/CD settings accessible" "application settings readable"
+ elif [[ "$cicd_status" == "403" ]]; then
+ record_skip "CI/CD settings accessible" "admin token required"
+ else
+ record_fail "CI/CD settings accessible" "HTTP ${cicd_status}"
+ fi
+}
+
+# ── 6. Background Migrations ─────────────────────────────────────────
+test_migrations() {
+ echo ""
+ echo -e "${BOLD}Background Migrations${RESET}"
+
+ # Batched background migrations (admin only)
+ local migrations_json
+ migrations_json=$(api_curl GET "/admin/batched_background_migrations?database=main" 2>/dev/null) || migrations_json=""
+
+ if [[ "$migrations_json" == "["* ]]; then
+ local total_mig
+ total_mig=$(echo "$migrations_json" | { grep -oP '"id"\s*:' || true; } | wc -l)
+
+ local failed_mig
+ failed_mig=$(echo "$migrations_json" | { grep -oP '"status"\s*:\s*"failed"' || true; } | wc -l)
+ local active_mig
+ active_mig=$(echo "$migrations_json" | { grep -oP '"status"\s*:\s*"active"' || true; } | wc -l)
+ local paused_mig
+ paused_mig=$(echo "$migrations_json" | { grep -oP '"status"\s*:\s*"paused"' || true; } | wc -l)
+ local finalized_mig
+ finalized_mig=$(echo "$migrations_json" | { grep -oP '"status"\s*:\s*"finished"' || true; } | wc -l)
+
+ if [[ $failed_mig -gt 0 ]]; then
+ record_fail "Background migrations" "${failed_mig} failed, ${active_mig} active, ${paused_mig} paused, ${finalized_mig} finished of ${total_mig}"
+ elif [[ $paused_mig -gt 0 ]]; then
+ record_fail "Background migrations" "${paused_mig} paused, ${active_mig} active, ${finalized_mig} finished of ${total_mig}"
+ elif [[ $active_mig -gt 0 ]]; then
+ record_pass "Background migrations" "${active_mig} active, ${finalized_mig} finished of ${total_mig} (in progress)"
+ else
+ record_pass "Background migrations" "all ${total_mig} finished"
+ fi
+ else
+ local mig_status
+ mig_status=$(api_curl_status GET "/admin/batched_background_migrations?database=main")
+ if [[ "$mig_status" == "403" ]]; then
+ record_skip "Background migrations" "admin token required"
+ else
+ record_skip "Background migrations" "could not query (HTTP ${mig_status})"
+ fi
+ fi
+}
+
+# ── 7. Storage & Components ──────────────────────────────────────────
+test_components() {
+ echo ""
+ echo -e "${BOLD}Components${RESET}"
+
+ # Metadata endpoint
+ local metadata_json
+ metadata_json=$(api_curl GET "/metadata" 2>/dev/null) || metadata_json=""
+
+ if [[ -n "$metadata_json" ]]; then
+ local gl_version
+ gl_version=$(json_value_string "version" "$metadata_json")
+ local enterprise
+ enterprise=$(json_value "enterprise" "$metadata_json")
+
+ if [[ -n "$gl_version" ]]; then
+ local edition="CE"
+ [[ "$enterprise" == "true" ]] && edition="EE"
+ record_pass "GitLab metadata" "${gl_version} ${edition}"
+ else
+ record_pass "GitLab metadata" "endpoint reachable"
+ fi
+ else
+ record_skip "GitLab metadata" "metadata endpoint not available"
+ fi
+
+ # Statistics (admin)
+ local stats_json
+ stats_json=$(api_curl GET "/application/statistics" 2>/dev/null) || stats_json=""
+
+ if [[ -n "$stats_json" && "$stats_json" != *"error"* && "$stats_json" != *"403"* ]]; then
+ local active_users
+ active_users=$(json_value "active_users" "$stats_json")
+ local projects
+ projects=$(json_value "projects" "$stats_json")
+ local groups
+ groups=$(json_value "groups" "$stats_json")
+
+ if [[ -n "$active_users" ]]; then
+ record_pass "Instance statistics" "${active_users} users, ${projects} projects, ${groups} groups"
+ else
+ record_pass "Instance statistics" "endpoint reachable"
+ fi
+ else
+ record_skip "Instance statistics" "admin token required"
+ fi
+
+ # Gitaly check — only report pass if clone actually succeeded
+ if [[ "$GIT_CLONE_OK" == "true" ]]; then
+ record_pass "Gitaly storage" "project created and cloned successfully"
+ elif [[ -n "$CLEANUP_PROJECT_ID" ]]; then
+ record_skip "Gitaly storage" "project created but clone was not tested or failed"
+ fi
+
+ # PostgreSQL (inferred from API responsiveness)
+ local pg_test
+ pg_test=$(api_curl_status GET "/projects?per_page=1&order_by=updated_at")
+ if [[ "$pg_test" == "200" ]]; then
+ record_pass "PostgreSQL" "database queries succeeding"
+ else
+ record_fail "PostgreSQL" "sorted query failed (HTTP ${pg_test})"
+ fi
+
+ # Redis (inferred from session/cache)
+ local redis_test
+ redis_test=$(api_curl_status GET "/user")
+ if [[ "$redis_test" == "200" ]]; then
+ record_pass "Redis" "session/cache operational (auth succeeded)"
+ else
+ record_skip "Redis" "cannot verify independently"
+ fi
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# OUTPUT
+# ══════════════════════════════════════════════════════════════════════
+
+print_summary() {
+ local end_time
+ end_time=$(date +%s)
+ local duration=$(( end_time - START_TIME ))
+
+ echo ""
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+ echo -e "${BOLD}Summary${RESET} ${GITLAB_URL}"
+ echo -e " ${GREEN}${PASS} passed${RESET} ${RED}${FAIL} failed${RESET} ${YELLOW}${SKIP} skipped${RESET} (${duration}s)"
+ echo -e "${BOLD}────────────────────────────────────────${RESET}"
+
+ if [[ $FAIL -eq 0 ]]; then
+ echo -e "${GREEN}${BOLD}All tests passed.${RESET}"
+ else
+ echo -e "${RED}${BOLD}${FAIL} test(s) failed.${RESET}"
+ fi
+}
+
+print_tap_header() {
+ echo "TAP version 13"
+}
+
+print_tap_footer() {
+ echo "1..${TOTAL}"
+ echo "# pass ${PASS}"
+ echo "# fail ${FAIL}"
+ echo "# skip ${SKIP}"
+}
+
+write_junit() {
+ local end_time
+ end_time=$(date +%s)
+ local duration=$(( end_time - START_TIME ))
+
+ cat > "$JUNIT_FILE" <
+
+
+JUNIT_EOF
+
+ for result in "${RESULTS[@]}"; do
+ local status name detail
+ status=$(echo "$result" | cut -d'|' -f1)
+ name=$(echo "$result" | cut -d'|' -f2)
+ detail=$(echo "$result" | cut -d'|' -f3)
+
+ # XML-escape the values
+ name=$(echo "$name" | sed 's/&/\&/g; s/\</g; s/>/\>/g; s/"/\"/g')
+ detail=$(echo "$detail" | sed 's/&/\&/g; s/\</g; s/>/\>/g; s/"/\"/g')
+
+ case "$status" in
+ PASS)
+ echo " " >> "$JUNIT_FILE"
+ [[ -n "$detail" ]] && echo " ${detail}" >> "$JUNIT_FILE"
+ echo " " >> "$JUNIT_FILE"
+ ;;
+ FAIL)
+ echo " " >> "$JUNIT_FILE"
+ echo " FAILED: ${name} — ${detail}" >> "$JUNIT_FILE"
+ echo " " >> "$JUNIT_FILE"
+ ;;
+ SKIP)
+ echo " " >> "$JUNIT_FILE"
+ echo " " >> "$JUNIT_FILE"
+ echo " " >> "$JUNIT_FILE"
+ ;;
+ esac
+ done
+
+ echo " " >> "$JUNIT_FILE"
+ echo "" >> "$JUNIT_FILE"
+
+ log "JUnit report written to ${JUNIT_FILE}"
+}
+
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+
+usage() {
+ cat <&2; }
+err() { printf "${RED}✗ %s${RESET}\n" "$*" >&2; }
+verbose() { [[ "$VERBOSE" == "true" ]] && printf "${DIM} %s${RESET}\n" "$*" >&2 || true; }
+die() { err "$*"; exit 1; }
+
+# ── Help ──────────────────────────────────────────────────────────────────────
+
+show_help() {
+ cat </dev/null 2>&1; then
+ ver=$(gitlab-ctl version 2>/dev/null | grep -oP '\d+\.\d+\.\d+' | head -1 || true)
+ fi
+ if [[ -z "$ver" ]] && command -v dpkg >/dev/null 2>&1; then
+ ver=$(dpkg -l gitlab-ce gitlab-ee 2>/dev/null | awk '/^ii/{print $3}' | grep -oP '\d+\.\d+\.\d+' | head -1 || true)
+ fi
+ if [[ -z "$ver" ]] && command -v rpm >/dev/null 2>&1; then
+ ver=$(rpm -q gitlab-ce gitlab-ee 2>/dev/null | grep -oP '\d+\.\d+\.\d+' | head -1 || true)
+ fi
+ echo "$ver"
+}
+
+detect_pg_version() {
+ local ver=""
+ if command -v gitlab-psql >/dev/null 2>&1; then
+ ver=$(gitlab-psql --version 2>/dev/null | grep -oP '\d+' | head -1 || true)
+ fi
+ if [[ -z "$ver" ]] && command -v psql >/dev/null 2>&1; then
+ ver=$(psql --version 2>/dev/null | grep -oP '\d+' | head -1 || true)
+ fi
+ echo "$ver"
+}
+
+# ── Core Logic ────────────────────────────────────────────────────────────────
+
+get_pg_req() {
+ local gl_major="$1"
+ local entry
+ for entry in "${PG_REQS[@]}"; do
+ IFS='|' read -r req_gl req_min req_max <<< "$entry"
+ if [[ "$req_gl" == "$gl_major" ]]; then
+ echo "${req_min}|${req_max}"
+ return
+ fi
+ done
+ echo "unknown|unknown"
+}
+
+build_upgrade_path() {
+ local from_int to_int
+ from_int=$(version_to_int "$FROM_VERSION")
+ to_int=$(version_to_int "$TO_VERSION")
+
+ UPGRADE_PATH=()
+ local entry ver ver_int conditional notes
+ for entry in "${STOPS[@]}"; do
+ IFS='|' read -r ver conditional notes <<< "$entry"
+ ver_int=$(version_to_int "$ver")
+
+ if (( ver_int <= from_int )); then
+ continue
+ fi
+ if (( ver_int > to_int )); then
+ continue
+ fi
+ if [[ "$SKIP_CONDITIONAL" == "true" && "$conditional" == "1" ]]; then
+ verbose "Skipping conditional stop: $ver"
+ continue
+ fi
+
+ UPGRADE_PATH+=("$entry")
+ done
+
+ # Add target if it's not already in the path
+ local last_ver=""
+ if [[ ${#UPGRADE_PATH[@]} -gt 0 ]]; then
+ IFS='|' read -r last_ver _ _ <<< "${UPGRADE_PATH[-1]}"
+ fi
+ if [[ "$last_ver" != "$TO_VERSION" ]]; then
+ UPGRADE_PATH+=("${TO_VERSION}|0|Target version")
+ fi
+}
+
+get_pg_warnings() {
+ PG_WARNINGS=()
+ if [[ -z "$PG_VERSION" ]]; then
+ return
+ fi
+
+ local from_major to_major
+ from_major=$(version_major "$FROM_VERSION")
+ to_major=$(version_major "$TO_VERSION")
+
+ local gl_major
+ for (( gl_major = from_major; gl_major <= to_major; gl_major++ )); do
+ local req
+ req=$(get_pg_req "$gl_major")
+ IFS='|' read -r pg_min pg_max <<< "$req"
+ if [[ "$pg_min" == "unknown" ]]; then
+ continue
+ fi
+ if (( PG_VERSION < pg_min )); then
+ # Find the last stop before this major version
+ local boundary_stop=""
+ local prev_major=$(( gl_major - 1 ))
+ local entry ver
+ for entry in "${STOPS[@]}"; do
+ IFS='|' read -r ver _ _ <<< "$entry"
+ if [[ "$(version_major "$ver")" == "$prev_major" ]]; then
+ boundary_stop="$ver"
+ fi
+ done
+ PG_WARNINGS+=("PostgreSQL ${PG_VERSION} is below minimum for GitLab ${gl_major}.x (requires ${pg_min}+)|Upgrade PostgreSQL to ${pg_min}+ before upgrading past GitLab ${boundary_stop:-${prev_major}.x}|${gl_major}|${pg_min}|${pg_max}")
+ fi
+ done
+}
+
+estimate_downtime() {
+ local stop_count=${#UPGRADE_PATH[@]}
+ local pg_upgrade_count=${#PG_WARNINGS[@]}
+
+ # Software time: package install + gitlab-ctl reconfigure per stop
+ DT_SW_LOW=$(( stop_count * 5 ))
+ DT_SW_HIGH=$(( stop_count * 15 ))
+
+ # Background migration time per stop, based on database size
+ # These run between stops and must complete before proceeding
+ local mig_low=0 mig_high=0
+ case "$DB_SIZE" in
+ small) mig_low=2; mig_high=10 ;; # <10GB: minutes
+ medium) mig_low=10; mig_high=30 ;; # 10-50GB: tens of minutes
+ large) mig_low=30; mig_high=90 ;; # 50-200GB: up to hours
+ xlarge) mig_low=60; mig_high=240 ;; # 200GB+: hours per stop
+ *) mig_low=0; mig_high=0 ;; # unknown: show software only
+ esac
+ DT_MIG_LOW=$(( stop_count * mig_low ))
+ DT_MIG_HIGH=$(( stop_count * mig_high ))
+
+ # PostgreSQL major upgrade time
+ DT_PG_LOW=$(( pg_upgrade_count * 15 ))
+ DT_PG_HIGH=$(( pg_upgrade_count * 60 ))
+
+ DT_GL_LOW=$(( DT_SW_LOW + DT_MIG_LOW ))
+ DT_GL_HIGH=$(( DT_SW_HIGH + DT_MIG_HIGH ))
+ DT_TOTAL_LOW=$(( DT_GL_LOW + DT_PG_LOW ))
+ DT_TOTAL_HIGH=$(( DT_GL_HIGH + DT_PG_HIGH ))
+}
+
+# ── Text Output ───────────────────────────────────────────────────────────────
+
+print_header() {
+ printf "\n${BOLD}GitLab Upgrade Path Calculator${RESET}\n"
+ printf "══════════════════════════════════════════════════════════════\n\n"
+}
+
+format_path_text() {
+ print_header
+
+ printf " ${BOLD}From:${RESET} %s\n" "$FROM_VERSION"
+ printf " ${BOLD}To:${RESET} %s\n" "$TO_VERSION"
+ printf " ${BOLD}Stops:${RESET} %d\n" "${#UPGRADE_PATH[@]}"
+
+ if [[ -n "$PG_VERSION" ]]; then
+ if [[ ${#PG_WARNINGS[@]} -gt 0 ]]; then
+ local last_warn="${PG_WARNINGS[-1]}"
+ IFS='|' read -r _ _ _ final_pg_min _ <<< "$last_warn"
+ printf "\n ${BOLD}PostgreSQL:${RESET} ${YELLOW}Currently ${PG_VERSION} → Must upgrade to ${final_pg_min}+ before GitLab ${TO_VERSION}${RESET}\n"
+ else
+ printf "\n ${BOLD}PostgreSQL:${RESET} ${GREEN}${PG_VERSION} — compatible with target${RESET}\n"
+ fi
+ fi
+
+ printf "\n ── Upgrade Path ──────────────────────────────────────────\n\n"
+ printf " ${DIM}Step Version Notes PG Required${RESET}\n"
+ printf " ${DIM}──── ──────────── ─────────────────────────────────────── ──────────${RESET}\n"
+
+ local step=0 entry ver conditional notes
+ for entry in "${UPGRADE_PATH[@]}"; do
+ IFS='|' read -r ver conditional notes <<< "$entry"
+ step=$((step + 1))
+
+ local gl_major pg_range
+ gl_major=$(version_major "$ver")
+ local req
+ req=$(get_pg_req "$gl_major")
+ IFS='|' read -r pg_min pg_max <<< "$req"
+ if [[ "$pg_min" != "unknown" ]]; then
+ pg_range="${pg_min}-${pg_max}"
+ else
+ pg_range="—"
+ fi
+
+ local cond_marker=""
+ if [[ "$conditional" == "1" ]]; then
+ cond_marker=" ⓘ"
+ fi
+
+ local ver_color="$RESET"
+ if [[ "$notes" == "Target version" ]]; then
+ ver_color="$GREEN"
+ fi
+
+ printf " %3d ${ver_color}%-12s${RESET} %-39s %s\n" "$step" "$ver" "${notes}${cond_marker}" "$pg_range"
+ done
+
+ if [[ ${#PG_WARNINGS[@]} -gt 0 ]]; then
+ printf "\n ── PostgreSQL Upgrade Required ───────────────────────────\n\n"
+ local warning
+ for warning in "${PG_WARNINGS[@]}"; do
+ IFS='|' read -r msg action gl_major pg_min pg_max <<< "$warning"
+ printf " ${YELLOW}⚠ %s${RESET}\n" "$msg"
+ printf " → %s\n\n" "$action"
+ done
+ fi
+
+ estimate_downtime
+ printf " ── Estimated Downtime ────────────────────────────────────\n\n"
+ printf " Software: %d stops × 5-15 min = %d-%d min\n" "${#UPGRADE_PATH[@]}" "$DT_SW_LOW" "$DT_SW_HIGH"
+ printf " ${DIM}(package install + gitlab-ctl reconfigure)${RESET}\n"
+ if [[ -n "$DB_SIZE" ]]; then
+ printf " Migrations: %d stops × %s db = %d-%d min\n" "${#UPGRADE_PATH[@]}" "$DB_SIZE" "$DT_MIG_LOW" "$DT_MIG_HIGH"
+ printf " ${DIM}(background migrations must complete per stop)${RESET}\n"
+ else
+ printf " Migrations: ${DIM}use --db-size (small/medium/large/xlarge) for estimates${RESET}\n"
+ fi
+ if [[ ${#PG_WARNINGS[@]} -gt 0 ]]; then
+ printf " PG upgrades: %d × 15-60 min = %d-%d min\n" "${#PG_WARNINGS[@]}" "$DT_PG_LOW" "$DT_PG_HIGH"
+ fi
+ printf "\n ${BOLD}Total estimate: %d-%d min${RESET}" "$DT_TOTAL_LOW" "$DT_TOTAL_HIGH"
+ if (( DT_TOTAL_HIGH >= 120 )); then
+ local hours_low=$(( DT_TOTAL_LOW / 60 ))
+ local hours_high=$(( DT_TOTAL_HIGH / 60 ))
+ printf " (%d-%d hrs — plan a full maintenance window)" "$hours_low" "$hours_high"
+ fi
+ printf "\n\n"
+
+ if [[ "$SKIP_CONDITIONAL" == "false" ]]; then
+ local has_conditional=false
+ for entry in "${UPGRADE_PATH[@]}"; do
+ IFS='|' read -r _ conditional _ <<< "$entry"
+ if [[ "$conditional" == "1" ]]; then
+ has_conditional=true
+ break
+ fi
+ done
+ if [[ "$has_conditional" == "true" ]]; then
+ printf " ${DIM}ⓘ = conditional stop (may be skippable — use --skip-conditional)${RESET}\n\n"
+ fi
+ fi
+}
+
+format_path_json() {
+ local steps_json="["
+ local step=0 first=true entry ver conditional notes
+ for entry in "${UPGRADE_PATH[@]}"; do
+ IFS='|' read -r ver conditional notes <<< "$entry"
+ step=$((step + 1))
+ local gl_major req pg_min pg_max
+ gl_major=$(version_major "$ver")
+ req=$(get_pg_req "$gl_major")
+ IFS='|' read -r pg_min pg_max <<< "$req"
+
+ [[ "$first" == "true" ]] || steps_json+=","
+ first=false
+ steps_json+=$(printf '{"step":%d,"version":"%s","conditional":%s,"notes":"%s","pg_min":"%s","pg_max":"%s"}' \
+ "$step" "$ver" "$( [[ "$conditional" == "1" ]] && echo "true" || echo "false" )" "$notes" "$pg_min" "$pg_max")
+ done
+ steps_json+="]"
+
+ local pg_upgrades_json="["
+ first=true
+ if [[ ${#PG_WARNINGS[@]} -gt 0 ]]; then
+ local warning
+ for warning in "${PG_WARNINGS[@]}"; do
+ IFS='|' read -r msg action gl_major pg_min pg_max <<< "$warning"
+ [[ "$first" == "true" ]] || pg_upgrades_json+=","
+ first=false
+ pg_upgrades_json+=$(printf '{"before_gitlab":"%s.0.0","min_pg":"%s","max_pg":"%s"}' "$gl_major" "$pg_min" "$pg_max")
+ done
+ fi
+ pg_upgrades_json+="]"
+
+ estimate_downtime
+
+ printf '{\n'
+ printf ' "from": "%s",\n' "$FROM_VERSION"
+ printf ' "to": "%s",\n' "$TO_VERSION"
+ printf ' "total_stops": %d,\n' "${#UPGRADE_PATH[@]}"
+ printf ' "pg_current": "%s",\n' "${PG_VERSION:-null}"
+ printf ' "pg_upgrades_needed": %s,\n' "$pg_upgrades_json"
+ printf ' "steps": %s,\n' "$steps_json"
+ printf ' "db_size": "%s",\n' "${DB_SIZE:-unknown}"
+ printf ' "estimated_downtime_min": {"software": {"low": %d, "high": %d}, "migrations": {"low": %d, "high": %d}, "pg_upgrades": {"low": %d, "high": %d}, "total": {"low": %d, "high": %d}}\n' \
+ "$DT_SW_LOW" "$DT_SW_HIGH" "$DT_MIG_LOW" "$DT_MIG_HIGH" "$DT_PG_LOW" "$DT_PG_HIGH" "$DT_TOTAL_LOW" "$DT_TOTAL_HIGH"
+ printf '}\n'
+}
+
+# ── Mode: --path ──────────────────────────────────────────────────────────────
+
+run_path() {
+ if [[ -z "$FROM_VERSION" ]]; then
+ verbose "Detecting installed GitLab version..."
+ FROM_VERSION=$(detect_gitlab_version)
+ if [[ -z "$FROM_VERSION" ]]; then
+ die "Could not detect installed GitLab version. Use --from VERSION."
+ fi
+ log "Detected GitLab version: $FROM_VERSION"
+ fi
+ if [[ -z "$TO_VERSION" || "$TO_VERSION" == "latest" ]]; then
+ TO_VERSION="$LATEST_VERSION"
+ fi
+
+ validate_version "$FROM_VERSION"
+ validate_version "$TO_VERSION"
+
+ local from_int to_int
+ from_int=$(version_to_int "$FROM_VERSION")
+ to_int=$(version_to_int "$TO_VERSION")
+ if (( from_int >= to_int )); then
+ die "Target version ($TO_VERSION) must be higher than current version ($FROM_VERSION)"
+ fi
+
+ build_upgrade_path
+ get_pg_warnings
+
+ if [[ "$FORMAT" == "json" ]]; then
+ format_path_json
+ else
+ format_path_text
+ fi
+}
+
+# ── Mode: --check ─────────────────────────────────────────────────────────────
+
+run_check() {
+ verbose "Detecting installed GitLab version..."
+ FROM_VERSION=$(detect_gitlab_version)
+
+ if [[ -z "$FROM_VERSION" ]]; then
+ die "Could not detect installed GitLab version. Use --path --from VERSION instead."
+ fi
+
+ log "Detected GitLab version: $FROM_VERSION"
+
+ if [[ -z "$PG_VERSION" ]]; then
+ verbose "Detecting PostgreSQL version..."
+ PG_VERSION=$(detect_pg_version)
+ if [[ -n "$PG_VERSION" ]]; then
+ log "Detected PostgreSQL version: $PG_VERSION"
+ fi
+ fi
+
+ if [[ -z "$TO_VERSION" || "$TO_VERSION" == "latest" ]]; then
+ TO_VERSION="$LATEST_VERSION"
+ fi
+
+ validate_version "$FROM_VERSION"
+ validate_version "$TO_VERSION"
+
+ build_upgrade_path
+ get_pg_warnings
+
+ if [[ "$FORMAT" == "json" ]]; then
+ format_path_json
+ else
+ format_path_text
+ fi
+}
+
+# ── Mode: --list-stops ────────────────────────────────────────────────────────
+
+run_list_stops() {
+ print_header
+ printf " ── All Known Required Upgrade Stops ──────────────────────\n\n"
+ printf " ${DIM}Version Type Notes PG Required${RESET}\n"
+ printf " ${DIM}──────────── ──────────── ─────────────────────────────────────── ──────────${RESET}\n"
+
+ local entry ver conditional notes
+ for entry in "${STOPS[@]}"; do
+ IFS='|' read -r ver conditional notes <<< "$entry"
+
+ local gl_major req pg_min pg_max pg_range type_label
+ gl_major=$(version_major "$ver")
+ req=$(get_pg_req "$gl_major")
+ IFS='|' read -r pg_min pg_max <<< "$req"
+ if [[ "$pg_min" != "unknown" ]]; then
+ pg_range="${pg_min}-${pg_max}"
+ else
+ pg_range="—"
+ fi
+
+ if [[ "$conditional" == "1" ]]; then
+ type_label="${YELLOW}conditional${RESET} "
+ else
+ type_label="${GREEN}required${RESET} "
+ fi
+
+ printf " %-12s %b %-39s %s\n" "$ver" "$type_label" "$notes" "$pg_range"
+ done
+
+ printf "\n ── PostgreSQL Version Requirements ───────────────────────\n\n"
+ printf " ${DIM}GitLab Min PG Max PG${RESET}\n"
+ printf " ${DIM}───────── ──────── ────────${RESET}\n"
+ local pg_entry
+ for pg_entry in "${PG_REQS[@]}"; do
+ IFS='|' read -r gl_major pg_min pg_max <<< "$pg_entry"
+ printf " %-9s %-8s %s\n" "${gl_major}.x" "$pg_min" "$pg_max"
+ done
+ printf "\n"
+}
+
+# ── Mode: --db-check ──────────────────────────────────────────────────────────
+
+run_db_check() {
+ if [[ -z "$PG_VERSION" ]]; then
+ verbose "Detecting PostgreSQL version..."
+ PG_VERSION=$(detect_pg_version)
+ if [[ -z "$PG_VERSION" ]]; then
+ die "Could not detect PostgreSQL version. Use --pg-version VERSION."
+ fi
+ log "Detected PostgreSQL version: $PG_VERSION"
+ fi
+
+ if [[ -z "$FROM_VERSION" ]]; then
+ FROM_VERSION=$(detect_gitlab_version)
+ if [[ -z "$FROM_VERSION" ]]; then
+ die "Could not detect GitLab version. Use --from VERSION."
+ fi
+ log "Detected GitLab version: $FROM_VERSION"
+ fi
+
+ if [[ -z "$TO_VERSION" || "$TO_VERSION" == "latest" ]]; then
+ TO_VERSION="$LATEST_VERSION"
+ fi
+
+ validate_version "$FROM_VERSION"
+ validate_version "$TO_VERSION"
+
+ build_upgrade_path
+ get_pg_warnings
+
+ if [[ "$FORMAT" == "json" ]]; then
+ local pg_json="["
+ local first=true
+ if [[ ${#PG_WARNINGS[@]} -gt 0 ]]; then
+ local warning
+ for warning in "${PG_WARNINGS[@]}"; do
+ IFS='|' read -r msg action gl_major pg_min pg_max <<< "$warning"
+ [[ "$first" == "true" ]] || pg_json+=","
+ first=false
+ pg_json+=$(printf '{"message":"%s","action":"%s","gitlab_major":"%s","pg_min":"%s","pg_max":"%s"}' \
+ "$msg" "$action" "$gl_major" "$pg_min" "$pg_max")
+ done
+ fi
+ pg_json+="]"
+ printf '{"pg_current":"%s","from":"%s","to":"%s","compatible":%s,"warnings":%s}\n' \
+ "$PG_VERSION" "$FROM_VERSION" "$TO_VERSION" \
+ "$( [[ ${#PG_WARNINGS[@]} -eq 0 ]] && echo "true" || echo "false" )" "$pg_json"
+ return
+ fi
+
+ print_header
+ printf " ${BOLD}PostgreSQL Compatibility Check${RESET}\n\n"
+ printf " Current GitLab: %s\n" "$FROM_VERSION"
+ printf " Target GitLab: %s\n" "$TO_VERSION"
+ printf " Current PostgreSQL: %s\n\n" "$PG_VERSION"
+
+ local from_major to_major
+ from_major=$(version_major "$FROM_VERSION")
+ to_major=$(version_major "$TO_VERSION")
+
+ printf " ── Requirements by GitLab Version ────────────────────────\n\n"
+ printf " ${DIM}GitLab Min PG Max PG Your PG %s Status${RESET}\n" "$PG_VERSION"
+ printf " ${DIM}───────── ──────── ──────── ────────── ──────────${RESET}\n"
+
+ local gl_major
+ for (( gl_major = from_major; gl_major <= to_major; gl_major++ )); do
+ local req pg_min pg_max status
+ req=$(get_pg_req "$gl_major")
+ IFS='|' read -r pg_min pg_max <<< "$req"
+
+ if (( PG_VERSION < pg_min )); then
+ status="${RED}✗ Too low${RESET}"
+ elif (( PG_VERSION > pg_max )); then
+ status="${YELLOW}⚠ Too high${RESET}"
+ else
+ status="${GREEN}✓ OK${RESET}"
+ fi
+ printf " %-9s %-8s %-8s %-10s %b\n" "${gl_major}.x" "$pg_min" "$pg_max" "$PG_VERSION" "$status"
+ done
+
+ if [[ ${#PG_WARNINGS[@]} -gt 0 ]]; then
+ printf "\n ── Action Required ───────────────────────────────────────\n\n"
+ local warning
+ for warning in "${PG_WARNINGS[@]}"; do
+ IFS='|' read -r msg action gl_major pg_min pg_max <<< "$warning"
+ printf " ${YELLOW}⚠ %s${RESET}\n" "$msg"
+ printf " → %s\n\n" "$action"
+ done
+ else
+ printf "\n ${GREEN}✓ PostgreSQL %s is compatible with the full upgrade path.${RESET}\n\n" "$PG_VERSION"
+ fi
+}
+
+# ── Main ──────────────────────────────────────────────────────────────────────
+
+main() {
+ setup_colors
+ parse_args "$@"
+ setup_colors
+
+ case "$RUN_MODE" in
+ path) run_path ;;
+ check) run_check ;;
+ list-stops) run_list_stops ;;
+ db-check) run_db_check ;;
+ *) die "Unknown mode: $RUN_MODE" ;;
+ esac
+}
+
+main "$@"
diff --git a/gitlab-upgrade.sh b/gitlab-upgrade.sh
index e0ac0ff..d6620d0 100644
--- a/gitlab-upgrade.sh
+++ b/gitlab-upgrade.sh
@@ -7,7 +7,7 @@
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
-#### Version: 1.00-030526 ####
+#### Version: 1.01-051326 ####
################################################
set -o pipefail
@@ -15,7 +15,8 @@ set -o pipefail
SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_NAME
-# Required version stops (as of 2026)
+# Required version stops (as of May 2026)
+# Source: https://docs.gitlab.com/update/upgrade_paths/
readonly VERSION_STOPS=(
"14.0.12"
"14.3.6"
@@ -24,15 +25,19 @@ readonly VERSION_STOPS=(
"15.0.5"
"15.4.6"
"15.11.13"
- "16.0.9"
- "16.3.8"
- "16.7.9"
+ "16.0.10"
+ "16.3.9"
+ "16.7.10"
"16.11.10"
- "17.0.8"
"17.3.7"
"17.5.5"
"17.8.7"
- "18.0.1"
+ "17.11.7"
+ "18.0.2"
+ "18.2.6"
+ "18.5.2"
+ "18.8.7"
+ "18.11.0"
)
# Default configuration
diff --git a/gitops-bootstrap.sh b/gitops-bootstrap.sh
new file mode 100644
index 0000000..8bfb40f
--- /dev/null
+++ b/gitops-bootstrap.sh
@@ -0,0 +1,652 @@
+#!/usr/bin/env bash
+#########################################################################################
+#### gitops-bootstrap.sh — Bootstrap GitOps on Kubernetes with Flux or ArgoCD ####
+#### Install, configure git source, sync applications, and validate deployments ####
+#### Requires: bash 4+, kubectl, git, flux CLI or argocd CLI ####
+#### ####
+#### Author: Phil Connor ####
+#### Contact: contact@mylinux.work ####
+#### License: MIT ####
+#### Version 1.00 ####
+#### ####
+#### Usage: ####
+#### ./gitops-bootstrap.sh --install flux --repo git@github.com:org/infra.git ####
+#### ####
+#### See --help for all options. ####
+#########################################################################################
+set -euo pipefail
+
+VERSION="1.00"
+
+# --- ANSI color variables (pre-initialized) ---
+RED=""
+GREEN=""
+YELLOW=""
+BLUE=""
+CYAN=""
+BOLD=""
+DIM=""
+RESET=""
+
+# --- Defaults ---
+RUN_MODE=""
+GITOPS_TOOL="${GITOPS_TOOL:-flux}"
+GIT_REPO="${GITOPS_REPO:-}"
+GIT_BRANCH="${GITOPS_BRANCH:-main}"
+GIT_PATH="${GITOPS_PATH:-./clusters/default}"
+NAMESPACE="${GITOPS_NAMESPACE:-}"
+KUBECONFIG_FILE="${KUBECONFIG:-}"
+KUBE_CTX="${KUBE_CONTEXT:-}"
+CONFIRM_YES=false
+VERBOSE="${VERBOSE:-false}"
+COLOR="${COLOR:-auto}"
+
+# --- State ---
+readonly SCRIPT_NAME="${0##*/}"
+START_TIME=$(date +%s)
+
+# --- Source name used for flux commands ---
+SOURCE_NAME="main"
+KUSTOMIZATION_NAME="default"
+APP_NAME=""
+
+# --- Color setup ---
+setup_colors() {
+ if [[ "$COLOR" == "never" ]]; then
+ RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
+ return
+ fi
+ if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
+ RED='\033[0;31m'
+ GREEN='\033[0;32m'
+ YELLOW='\033[1;33m'
+ BLUE='\033[0;34m'
+ CYAN='\033[0;36m'
+ BOLD='\033[1m'
+ DIM='\033[2m'
+ RESET='\033[0m'
+ fi
+}
+
+# --- Logging ---
+log() { printf "%b\n" "${GREEN}✔${RESET} $*"; }
+warn() { printf "%b\n" "${YELLOW}⚠${RESET} $*" >&2; }
+err() { printf "%b\n" "${RED}✖${RESET} $*" >&2; }
+verbose() { [[ "$VERBOSE" == "true" ]] && printf "%b\n" "${DIM}▸ $*${RESET}" >&2; return 0; }
+die() { err "$*"; exit 1; }
+
+section_header() {
+ printf "\n%b━━━ %s ━━━%b\n" "${BOLD}${BLUE}" "$1" "${RESET}"
+}
+
+field() {
+ printf " %-22s %s\n" "$1:" "$2"
+}
+
+field_color() {
+ printf " %-22s %b%s%b\n" "$1:" "$2" "$3" "${RESET}"
+}
+
+# --- Resolve namespace default based on tool ---
+resolve_namespace() {
+ if [[ -z "$NAMESPACE" ]]; then
+ if [[ "$GITOPS_TOOL" == "argocd" ]]; then
+ NAMESPACE="argocd"
+ else
+ NAMESPACE="flux-system"
+ fi
+ fi
+}
+
+# --- kubectl wrapper ---
+kubectl_cmd() {
+ local -a args=("kubectl")
+ [[ -n "$KUBECONFIG_FILE" ]] && args+=("--kubeconfig" "$KUBECONFIG_FILE")
+ [[ -n "$KUBE_CTX" ]] && args+=("--context" "$KUBE_CTX")
+ "${args[@]}" "$@"
+}
+
+# --- Dependency checks ---
+require_kubectl() {
+ command -v kubectl >/dev/null 2>&1 || die "kubectl is required but not found in PATH"
+}
+
+require_flux() {
+ command -v flux >/dev/null 2>&1 || die "flux CLI is required but not found in PATH"
+}
+
+require_argocd() {
+ command -v argocd >/dev/null 2>&1 || die "argocd CLI is required but not found in PATH"
+}
+
+require_git() {
+ command -v git >/dev/null 2>&1 || die "git is required but not found in PATH"
+}
+
+# --- Confirm prompt ---
+confirm_action() {
+ local prompt="${1:-Continue?}"
+ if [[ "$CONFIRM_YES" == "true" ]]; then
+ return 0
+ fi
+ printf "%s [y/N] " "$prompt"
+ read -r answer
+ [[ "$answer" =~ ^[Yy]$ ]] || die "Aborted"
+}
+
+# --- Wait for pods ready in namespace ---
+wait_for_pods() {
+ local ns="$1"
+ local timeout="${2:-120}"
+ log "Waiting for pods in namespace ${CYAN}${ns}${RESET} (timeout ${timeout}s)"
+
+ local deadline=$(($(date +%s) + timeout))
+ while true; do
+ local not_ready
+ not_ready=$(kubectl_cmd get pods -n "$ns" --no-headers 2>/dev/null \
+ | grep -cvE 'Running|Completed|Succeeded' || true)
+ if [[ "$not_ready" -eq 0 ]]; then
+ local total
+ total=$(kubectl_cmd get pods -n "$ns" --no-headers 2>/dev/null | wc -l)
+ if [[ "$total" -gt 0 ]]; then
+ log "All ${total} pod(s) ready in ${ns}"
+ return 0
+ fi
+ fi
+ if [[ $(date +%s) -ge $deadline ]]; then
+ warn "Timeout waiting for pods in ${ns}"
+ kubectl_cmd get pods -n "$ns" --no-headers 2>/dev/null || true
+ return 1
+ fi
+ sleep 5
+ done
+}
+
+# ─────────────────────────────────────────────────────────────────────
+# Install
+# ─────────────────────────────────────────────────────────────────────
+
+do_install_flux() {
+ section_header "Installing Flux"
+ require_kubectl
+ require_flux
+
+ log "Running pre-flight checks"
+ verbose "flux check --pre"
+ flux check --pre || die "Flux pre-flight checks failed"
+
+ log "Installing Flux components into namespace ${CYAN}${NAMESPACE}${RESET}"
+ verbose "flux install --namespace=${NAMESPACE}"
+ flux install --namespace="$NAMESPACE"
+
+ wait_for_pods "$NAMESPACE"
+
+ if [[ -n "$GIT_REPO" ]]; then
+ log "Configuring GitRepository source"
+ verbose "flux create source git ${SOURCE_NAME} --url=${GIT_REPO} --branch=${GIT_BRANCH} --namespace=${NAMESPACE}"
+ flux create source git "$SOURCE_NAME" \
+ --url="$GIT_REPO" \
+ --branch="$GIT_BRANCH" \
+ --namespace="$NAMESPACE"
+
+ log "Creating Kustomization"
+ verbose "flux create kustomization ${KUSTOMIZATION_NAME} --source=${SOURCE_NAME} --path=${GIT_PATH} --namespace=${NAMESPACE} --prune=true"
+ flux create kustomization "$KUSTOMIZATION_NAME" \
+ --source="$SOURCE_NAME" \
+ --path="$GIT_PATH" \
+ --namespace="$NAMESPACE" \
+ --prune=true
+ fi
+
+ section_header "Flux Installation Summary"
+ field "Namespace" "$NAMESPACE"
+ field "Git repository" "${GIT_REPO:-not configured}"
+ field "Branch" "$GIT_BRANCH"
+ field "Path" "$GIT_PATH"
+ log "Flux installation complete"
+}
+
+do_install_argocd() {
+ section_header "Installing Argo CD"
+ require_kubectl
+
+ local manifests_url="https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml"
+
+ log "Creating namespace ${CYAN}${NAMESPACE}${RESET}"
+ kubectl_cmd create namespace "$NAMESPACE" --dry-run=client -o yaml \
+ | kubectl_cmd apply -f -
+
+ log "Applying Argo CD manifests"
+ verbose "kubectl apply -n ${NAMESPACE} -f ${manifests_url}"
+ kubectl_cmd apply -n "$NAMESPACE" -f "$manifests_url"
+
+ wait_for_pods "$NAMESPACE"
+
+ section_header "Argo CD Installation Summary"
+ field "Namespace" "$NAMESPACE"
+ field "Manifests" "$manifests_url"
+ printf "\n"
+ log "Retrieve initial admin password:"
+ printf " %bkubectl -n %s get secret argocd-initial-admin-secret -o jsonpath='{.data.password}' | base64 -d%b\n" \
+ "${CYAN}" "$NAMESPACE" "${RESET}"
+ log "Argo CD installation complete"
+}
+
+do_install() {
+ case "$GITOPS_TOOL" in
+ flux) do_install_flux ;;
+ argocd) do_install_argocd ;;
+ *) die "Unknown GitOps tool: ${GITOPS_TOOL}. Use 'flux' or 'argocd'." ;;
+ esac
+}
+
+# ─────────────────────────────────────────────────────────────────────
+# Status
+# ─────────────────────────────────────────────────────────────────────
+
+do_status_flux() {
+ section_header "Flux Status"
+ require_kubectl
+ require_flux
+
+ log "Sources"
+ flux get sources all --namespace="$NAMESPACE" 2>/dev/null || warn "No sources found"
+
+ printf "\n"
+ log "Kustomizations"
+ flux get kustomizations --namespace="$NAMESPACE" 2>/dev/null || warn "No kustomizations found"
+
+ printf "\n"
+ log "Helm releases"
+ flux get helmreleases --all-namespaces 2>/dev/null || verbose "No helm releases found"
+}
+
+do_status_argocd() {
+ section_header "Argo CD Status"
+ require_kubectl
+
+ log "Applications"
+ kubectl_cmd get applications -n "$NAMESPACE" -o wide 2>/dev/null || warn "No applications found"
+
+ printf "\n"
+ log "App Projects"
+ kubectl_cmd get appprojects -n "$NAMESPACE" -o wide 2>/dev/null || verbose "No app projects found"
+}
+
+do_status() {
+ case "$GITOPS_TOOL" in
+ flux) do_status_flux ;;
+ argocd) do_status_argocd ;;
+ *) die "Unknown GitOps tool: ${GITOPS_TOOL}" ;;
+ esac
+
+ section_header "Pod Status (${NAMESPACE})"
+ kubectl_cmd get pods -n "$NAMESPACE" -o wide 2>/dev/null || warn "No pods found"
+}
+
+# ─────────────────────────────────────────────────────────────────────
+# Add Source
+# ─────────────────────────────────────────────────────────────────────
+
+do_add_source_flux() {
+ section_header "Adding Git Source (Flux)"
+ require_flux
+
+ [[ -z "$GIT_REPO" ]] && die "--repo is required to add a source"
+
+ log "Creating GitRepository source ${CYAN}${SOURCE_NAME}${RESET}"
+ verbose "flux create source git ${SOURCE_NAME} --url=${GIT_REPO} --branch=${GIT_BRANCH} --namespace=${NAMESPACE}"
+ flux create source git "$SOURCE_NAME" \
+ --url="$GIT_REPO" \
+ --branch="$GIT_BRANCH" \
+ --namespace="$NAMESPACE"
+
+ log "Source added successfully"
+ flux get sources git --namespace="$NAMESPACE"
+}
+
+do_add_source_argocd() {
+ section_header "Adding Git Source (Argo CD)"
+ require_argocd
+
+ [[ -z "$GIT_REPO" ]] && die "--repo is required to add a source"
+
+ log "Adding repository ${CYAN}${GIT_REPO}${RESET}"
+ verbose "argocd repo add ${GIT_REPO}"
+ argocd repo add "$GIT_REPO" || die "Failed to add repository"
+
+ log "Repository added successfully"
+}
+
+do_add_source() {
+ case "$GITOPS_TOOL" in
+ flux) do_add_source_flux ;;
+ argocd) do_add_source_argocd ;;
+ *) die "Unknown GitOps tool: ${GITOPS_TOOL}" ;;
+ esac
+}
+
+# ─────────────────────────────────────────────────────────────────────
+# Sync / Reconcile
+# ─────────────────────────────────────────────────────────────────────
+
+do_sync_flux() {
+ section_header "Reconciling (Flux)"
+ require_flux
+
+ log "Reconciling source git/${SOURCE_NAME}"
+ verbose "flux reconcile source git ${SOURCE_NAME} --namespace=${NAMESPACE}"
+ flux reconcile source git "$SOURCE_NAME" --namespace="$NAMESPACE"
+
+ log "Reconciling kustomization ${KUSTOMIZATION_NAME}"
+ verbose "flux reconcile kustomization ${KUSTOMIZATION_NAME} --namespace=${NAMESPACE}"
+ flux reconcile kustomization "$KUSTOMIZATION_NAME" --namespace="$NAMESPACE"
+
+ log "Reconciliation triggered"
+}
+
+do_sync_argocd() {
+ section_header "Syncing (Argo CD)"
+ require_argocd
+
+ if [[ -n "$APP_NAME" ]]; then
+ log "Syncing application ${CYAN}${APP_NAME}${RESET}"
+ verbose "argocd app sync ${APP_NAME}"
+ argocd app sync "$APP_NAME"
+ else
+ log "Syncing all applications"
+ local apps
+ apps=$(kubectl_cmd get applications -n "$NAMESPACE" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)
+ if [[ -z "$apps" ]]; then
+ warn "No applications found to sync"
+ return
+ fi
+ while IFS= read -r app; do
+ [[ -z "$app" ]] && continue
+ log "Syncing ${app}"
+ argocd app sync "$app" || warn "Failed to sync ${app}"
+ done <<< "$apps"
+ fi
+
+ log "Sync triggered"
+}
+
+do_sync() {
+ case "$GITOPS_TOOL" in
+ flux) do_sync_flux ;;
+ argocd) do_sync_argocd ;;
+ *) die "Unknown GitOps tool: ${GITOPS_TOOL}" ;;
+ esac
+}
+
+# ─────────────────────────────────────────────────────────────────────
+# Validate (pre-flight)
+# ─────────────────────────────────────────────────────────────────────
+
+do_validate() {
+ section_header "Pre-flight Validation"
+ require_kubectl
+
+ local checks_passed=0
+ local checks_failed=0
+
+ # Check kubectl connectivity
+ log "Checking kubectl connectivity"
+ if kubectl_cmd cluster-info >/dev/null 2>&1; then
+ field_color "Cluster access" "${GREEN}" "OK"
+ checks_passed=$((checks_passed + 1))
+ else
+ field_color "Cluster access" "${RED}" "FAILED"
+ checks_failed=$((checks_failed + 1))
+ fi
+
+ # Check namespace exists
+ log "Checking namespace ${CYAN}${NAMESPACE}${RESET}"
+ if kubectl_cmd get namespace "$NAMESPACE" >/dev/null 2>&1; then
+ field_color "Namespace" "${GREEN}" "${NAMESPACE} exists"
+ checks_passed=$((checks_passed + 1))
+ else
+ field_color "Namespace" "${YELLOW}" "${NAMESPACE} does not exist (will be created)"
+ checks_passed=$((checks_passed + 1))
+ fi
+
+ # Check CRDs installed
+ log "Checking CRDs"
+ if [[ "$GITOPS_TOOL" == "flux" ]]; then
+ if kubectl_cmd get crd gitrepositories.source.toolkit.fluxcd.io >/dev/null 2>&1; then
+ field_color "Flux CRDs" "${GREEN}" "installed"
+ checks_passed=$((checks_passed + 1))
+ else
+ field_color "Flux CRDs" "${YELLOW}" "not installed"
+ checks_passed=$((checks_passed + 1))
+ fi
+ elif [[ "$GITOPS_TOOL" == "argocd" ]]; then
+ if kubectl_cmd get crd applications.argoproj.io >/dev/null 2>&1; then
+ field_color "ArgoCD CRDs" "${GREEN}" "installed"
+ checks_passed=$((checks_passed + 1))
+ else
+ field_color "ArgoCD CRDs" "${YELLOW}" "not installed"
+ checks_passed=$((checks_passed + 1))
+ fi
+ fi
+
+ # Check git repo accessible
+ if [[ -n "$GIT_REPO" ]]; then
+ log "Checking git repository accessibility"
+ require_git
+ if git ls-remote "$GIT_REPO" HEAD >/dev/null 2>&1; then
+ field_color "Git repository" "${GREEN}" "accessible"
+ checks_passed=$((checks_passed + 1))
+ else
+ field_color "Git repository" "${RED}" "not accessible"
+ checks_failed=$((checks_failed + 1))
+ fi
+ else
+ verbose "No git repository specified, skipping connectivity check"
+ fi
+
+ # Check tool CLI available
+ log "Checking CLI tools"
+ if [[ "$GITOPS_TOOL" == "flux" ]]; then
+ if command -v flux >/dev/null 2>&1; then
+ local flux_ver
+ flux_ver=$(flux version --client 2>/dev/null | head -1 || echo "unknown")
+ field_color "flux CLI" "${GREEN}" "$flux_ver"
+ checks_passed=$((checks_passed + 1))
+ else
+ field_color "flux CLI" "${RED}" "not found"
+ checks_failed=$((checks_failed + 1))
+ fi
+ elif [[ "$GITOPS_TOOL" == "argocd" ]]; then
+ if command -v argocd >/dev/null 2>&1; then
+ local argocd_ver
+ argocd_ver=$(argocd version --client --short 2>/dev/null || echo "unknown")
+ field_color "argocd CLI" "${GREEN}" "$argocd_ver"
+ checks_passed=$((checks_passed + 1))
+ else
+ field_color "argocd CLI" "${RED}" "not found"
+ checks_failed=$((checks_failed + 1))
+ fi
+ fi
+
+ section_header "Validation Summary"
+ field_color "Passed" "${GREEN}" "$checks_passed"
+ if [[ "$checks_failed" -gt 0 ]]; then
+ field_color "Failed" "${RED}" "$checks_failed"
+ die "Validation failed with ${checks_failed} error(s)"
+ else
+ field_color "Failed" "${GREEN}" "0"
+ log "All pre-flight checks passed"
+ fi
+}
+
+# ─────────────────────────────────────────────────────────────────────
+# Teardown
+# ─────────────────────────────────────────────────────────────────────
+
+do_teardown_flux() {
+ section_header "Tearing Down Flux"
+ require_flux
+
+ confirm_action "Remove Flux from the cluster?"
+
+ log "Uninstalling Flux"
+ verbose "flux uninstall --namespace=${NAMESPACE} --silent"
+ flux uninstall --namespace="$NAMESPACE" --silent
+
+ log "Flux has been removed from the cluster"
+}
+
+do_teardown_argocd() {
+ section_header "Tearing Down Argo CD"
+ require_kubectl
+
+ confirm_action "Remove Argo CD from the cluster (delete namespace ${NAMESPACE})?"
+
+ log "Deleting namespace ${CYAN}${NAMESPACE}${RESET}"
+ kubectl_cmd delete namespace "$NAMESPACE" --wait=true
+
+ log "Argo CD has been removed from the cluster"
+}
+
+do_teardown() {
+ case "$GITOPS_TOOL" in
+ flux) do_teardown_flux ;;
+ argocd) do_teardown_argocd ;;
+ *) die "Unknown GitOps tool: ${GITOPS_TOOL}" ;;
+ esac
+}
+
+# ─────────────────────────────────────────────────────────────────────
+# Help
+# ─────────────────────────────────────────────────────────────────────
+
+show_help() {
+ cat </dev/null; then
+ missing+=("$cmd")
+ fi
+ done
+ if [[ ${#missing[@]} -gt 0 ]]; then
+ echo "# ERROR: Missing required commands: ${missing[*]}" >&2
+ echo "# Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2
+ exit 1
+ fi
+}
+
+validate_config() {
+ if [[ -z "$GLPI_URL" ]]; then
+ echo "# ERROR: GLPI_URL environment variable is required" >&2
+ exit 1
+ fi
+ if [[ -z "$GLPI_USER_TOKEN" ]]; then
+ echo "# ERROR: GLPI_USER_TOKEN environment variable is required" >&2
+ exit 1
+ fi
+ GLPI_URL="${GLPI_URL%/}"
+}
+
+init_session() {
+ local auth_headers=(-H "Authorization: user_token ${GLPI_USER_TOKEN}")
+ if [[ -n "$GLPI_APP_TOKEN" ]]; then
+ auth_headers+=(-H "App-Token: ${GLPI_APP_TOKEN}")
+ fi
+
+ local response
+ response=$(curl -sf --max-time "$CURL_TIMEOUT" \
+ "${auth_headers[@]}" \
+ "${GLPI_URL}/apirest.php/initSession" 2>/dev/null) || { echo ""; return 1; }
+
+ SESSION_TOKEN=$(echo "$response" | jq -r '.session_token // empty' 2>/dev/null)
+
+ if [[ -z "$SESSION_TOKEN" ]]; then
+ return 1
+ fi
+
+ return 0
+}
+
+kill_session() {
+ if [[ -n "$SESSION_TOKEN" ]]; then
+ local headers=(-H "Session-Token: ${SESSION_TOKEN}")
+ if [[ -n "$GLPI_APP_TOKEN" ]]; then
+ headers+=(-H "App-Token: ${GLPI_APP_TOKEN}")
+ fi
+ curl -sf --max-time "$CURL_TIMEOUT" \
+ "${headers[@]}" \
+ "${GLPI_URL}/apirest.php/killSession" &>/dev/null || true
+ SESSION_TOKEN=""
+ fi
+}
+
+api_get() {
+ local endpoint="$1"
+ local headers=(-H "Session-Token: ${SESSION_TOKEN}" -H "Content-Type: application/json")
+ if [[ -n "$GLPI_APP_TOKEN" ]]; then
+ headers+=(-H "App-Token: ${GLPI_APP_TOKEN}")
+ fi
+
+ curl -sf --max-time "$CURL_TIMEOUT" \
+ "${headers[@]}" \
+ "${GLPI_URL}/apirest.php/${endpoint}" 2>/dev/null || echo ""
+}
+
+api_get_count() {
+ local endpoint="$1"
+ local range_header
+ range_header=$(curl -sI --max-time "$CURL_TIMEOUT" \
+ -H "Session-Token: ${SESSION_TOKEN}" \
+ -H "Content-Type: application/json" \
+ ${GLPI_APP_TOKEN:+-H "App-Token: ${GLPI_APP_TOKEN}"} \
+ "${GLPI_URL}/apirest.php/${endpoint}?range=0-0" 2>/dev/null | grep -i '^Content-Range:' | tr -d '\r')
+
+ if [[ -n "$range_header" ]]; then
+ echo "$range_header" | sed 's|.*/||'
+ else
+ echo "0"
+ fi
+}
+
+sanitize_label() {
+ local value="$1"
+ echo "$value" | sed 's/[^a-zA-Z0-9_ \/.-]/_/g' | sed 's/"/\\"/g'
+}
+
+add_metric() {
+ local name="$1"
+ local type="$2"
+ local help="$3"
+ local value="$4"
+ local labels="${5:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="# HELP ${name} ${help}
+# TYPE ${name} ${type}
+${name} ${value}
+"
+ fi
+}
+
+add_metric_value() {
+ local name="$1"
+ local value="$2"
+ local labels="${3:-}"
+
+ if [[ -n "$labels" ]]; then
+ OUTPUT+="${name}{${labels}} ${value}
+"
+ else
+ OUTPUT+="${name} ${value}
+"
+ fi
+}
+
+# --- Collectors ---
+
+collect_tickets() {
+ # Total tickets (open = not closed)
+ local total
+ total=$(api_get_count "Ticket")
+ add_metric "glpi_tickets_total" "gauge" "Total number of tickets" "${total:-0}"
+
+ # Tickets by status
+ # GLPI status codes: 1=New, 2=Assigned, 3=Planned, 4=Waiting, 5=Solved, 6=Closed
+ local status_names=("new" "assigned" "planned" "waiting" "solved" "closed")
+ local status_codes=(1 2 3 4 5 6)
+
+ for i in "${!status_codes[@]}"; do
+ local code="${status_codes[$i]}"
+ local name="${status_names[$i]}"
+ local count
+ count=$(api_get_count "Ticket?searchText[status]=${code}")
+ add_metric "glpi_tickets_${name}" "gauge" "Tickets in ${name} status" "${count:-0}"
+ done
+
+ # Tickets by urgency
+ # GLPI urgency: 1=Very low, 2=Low, 3=Medium, 4=High, 5=Very high
+ OUTPUT+="# HELP glpi_tickets_by_urgency Number of tickets by urgency level
+# TYPE glpi_tickets_by_urgency gauge
+"
+ local urgency_names=("very_low" "low" "medium" "high" "very_high")
+ local urgency_codes=(1 2 3 4 5)
+
+ for i in "${!urgency_codes[@]}"; do
+ local code="${urgency_codes[$i]}"
+ local uname="${urgency_names[$i]}"
+ local count
+ count=$(api_get_count "Ticket?searchText[urgency]=${code}")
+ add_metric_value "glpi_tickets_by_urgency" "${count:-0}" "urgency=\"${uname}\""
+ done
+
+ # Tickets by category (top categories)
+ local cat_json
+ cat_json=$(api_get "ITILCategory?range=0-49")
+
+ if [[ -n "$cat_json" ]]; then
+ local is_array
+ is_array=$(echo "$cat_json" | jq -r 'if type == "array" then "yes" else "no" end' 2>/dev/null)
+
+ if [[ "$is_array" == "yes" ]]; then
+ local cat_count
+ cat_count=$(echo "$cat_json" | jq 'length' 2>/dev/null)
+
+ if [[ "$cat_count" -gt 0 ]]; then
+ OUTPUT+="# HELP glpi_tickets_by_category Number of tickets per category
+# TYPE glpi_tickets_by_category gauge
+"
+ local j
+ for ((j = 0; j < cat_count && j < 30; j++)); do
+ local cat_name cat_id
+ cat_name=$(echo "$cat_json" | jq -r ".[$j].completename // .[$j].name // empty" 2>/dev/null)
+ cat_id=$(echo "$cat_json" | jq -r ".[$j].id // empty" 2>/dev/null)
+
+ if [[ -n "$cat_name" && -n "$cat_id" ]]; then
+ local ticket_count
+ ticket_count=$(api_get_count "Ticket?searchText[itilcategories_id]=${cat_id}")
+ local safe_name
+ safe_name=$(sanitize_label "$cat_name")
+ add_metric_value "glpi_tickets_by_category" "${ticket_count:-0}" "category=\"${safe_name}\""
+ fi
+ done
+ fi
+ fi
+ fi
+}
+
+collect_assets() {
+ # Computers
+ local computers
+ computers=$(api_get_count "Computer")
+ add_metric "glpi_computers_total" "gauge" "Total number of computers" "${computers:-0}"
+
+ # Monitors
+ local monitors
+ monitors=$(api_get_count "Monitor")
+ add_metric "glpi_monitors_total" "gauge" "Total number of monitors" "${monitors:-0}"
+
+ # Network devices
+ local netdevices
+ netdevices=$(api_get_count "NetworkEquipment")
+ add_metric "glpi_network_devices_total" "gauge" "Total number of network devices" "${netdevices:-0}"
+
+ # Phones
+ local phones
+ phones=$(api_get_count "Phone")
+ add_metric "glpi_phones_total" "gauge" "Total number of phones" "${phones:-0}"
+
+ # Printers
+ local printers
+ printers=$(api_get_count "Printer")
+ add_metric "glpi_printers_total" "gauge" "Total number of printers" "${printers:-0}"
+
+ # Software
+ local software
+ software=$(api_get_count "Software")
+ add_metric "glpi_software_total" "gauge" "Total number of software entries" "${software:-0}"
+}
+
+collect_organization() {
+ # Users
+ local users
+ users=$(api_get_count "User")
+ add_metric "glpi_users_total" "gauge" "Total number of users" "${users:-0}"
+
+ # Groups
+ local groups
+ groups=$(api_get_count "Group")
+ add_metric "glpi_groups_total" "gauge" "Total number of groups" "${groups:-0}"
+
+ # Entities
+ local entities
+ entities=$(api_get_count "Entity")
+ add_metric "glpi_entities_total" "gauge" "Total number of entities" "${entities:-0}"
+
+ # Locations
+ local locations
+ locations=$(api_get_count "Location")
+ add_metric "glpi_locations_total" "gauge" "Total number of locations" "${locations:-0}"
+}
+
+# --- Output ---
+
+write_output() {
+ if [[ "$TEXTFILE_MODE" == true ]]; then
+ local output_file="${TEXTFILE_DIR}/glpi.prom"
+ local temp_file="${output_file}.$$"
+
+ mkdir -p "$TEXTFILE_DIR"
+ echo "$OUTPUT" > "$temp_file"
+ mv "$temp_file" "$output_file"
+ echo "# Wrote metrics to ${output_file}" >&2
+ else
+ echo "$OUTPUT"
+ fi
+}
+
+serve_http() {
+ if ! command -v nc &>/dev/null && ! command -v ncat &>/dev/null; then
+ echo "# ERROR: nc (netcat) or ncat required for HTTP mode" >&2
+ exit 1
+ fi
+
+ echo "# GLPI exporter listening on port ${HTTP_PORT}" >&2
+ echo "# Metrics endpoint: http://localhost:${HTTP_PORT}/metrics" >&2
+
+ local nc_cmd="nc"
+ if command -v ncat &>/dev/null; then
+ nc_cmd="ncat"
+ fi
+
+ while true; do
+ OUTPUT=""
+ START_TIME=$(date +%s%N)
+
+ add_metric "glpi_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\""
+
+ if init_session; then
+ add_metric "glpi_up" "gauge" "GLPI API reachability (1=up, 0=down)" "1"
+ collect_tickets
+ collect_assets
+ collect_organization
+ kill_session
+ else
+ add_metric "glpi_up" "gauge" "GLPI API reachability (1=up, 0=down)" "0"
+ fi
+
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ add_metric "glpi_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration"
+ add_metric "glpi_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)"
+
+ local content_length=${#OUTPUT}
+ local response="HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\nContent-Length: ${content_length}\r\nConnection: close\r\n\r\n${OUTPUT}"
+
+ echo -e "$response" | $nc_cmd -l -p "$HTTP_PORT" -q 1 2>/dev/null || \
+ echo -e "$response" | $nc_cmd -l "$HTTP_PORT" -c 2>/dev/null || \
+ echo -e "$response" | $nc_cmd -l -p "$HTTP_PORT" 2>/dev/null || true
+ done
+}
+
+install_cron() {
+ if [[ $EUID -ne 0 ]]; then
+ echo "# ERROR: --install requires root" >&2
+ exit 1
+ fi
+
+ local script_path
+ script_path=$(readlink -f "$0")
+
+ cat > /etc/cron.d/glpi-exporter </dev/null
+EOF
+
+ chmod 644 /etc/cron.d/glpi-exporter
+ echo "# Installed cron job: /etc/cron.d/glpi-exporter" >&2
+ echo "# Metrics will be written to: ${TEXTFILE_DIR}/glpi.prom" >&2
+}
+
+# --- Main ---
+
+main() {
+ for arg in "$@"; do
+ case "$arg" in
+ --textfile) TEXTFILE_MODE=true ;;
+ --http) HTTP_MODE=true ;;
+ -p|--port) shift; HTTP_PORT="${1:-$HTTP_PORT}" ;;
+ --install)
+ check_dependencies
+ validate_config
+ install_cron
+ exit 0
+ ;;
+ --help|-h) usage ;;
+ *) ;;
+ esac
+ done
+
+ check_dependencies
+ validate_config
+
+ if [[ "$HTTP_MODE" == true ]]; then
+ serve_http
+ exit 0
+ fi
+
+ START_TIME=$(date +%s%N)
+
+ add_metric "glpi_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\""
+
+ if init_session; then
+ add_metric "glpi_up" "gauge" "GLPI API reachability (1=up, 0=down)" "1"
+ collect_tickets
+ collect_assets
+ collect_organization
+ kill_session
+ else
+ add_metric "glpi_up" "gauge" "GLPI API reachability (1=up, 0=down)" "0"
+ fi
+
+ local end_time duration
+ end_time=$(date +%s%N)
+ duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0")
+ add_metric "glpi_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration"
+ add_metric "glpi_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)"
+
+ write_output
+}
+
+main "$@"
diff --git a/gpu-exporter.sh b/gpu-exporter.sh
new file mode 100755
index 0000000..411e4a0
--- /dev/null
+++ b/gpu-exporter.sh
@@ -0,0 +1,440 @@
+#!/bin/bash
+################################################################################
+# Script Name: gpu-exporter.sh
+# Version: 1.0
+# Description: Prometheus exporter for NVIDIA GPU metrics — temperature,
+# utilization, VRAM usage, power draw, fan speed, clock speeds,
+# performance state, and per-process GPU memory via nvidia-smi
+#
+# Author: Phil Connor
+# Contact: contact@mylinux.work
+# Website: https://mylinux.work
+# License: MIT
+#
+# Prerequisites:
+# - NVIDIA GPU with drivers installed
+# - nvidia-smi available in PATH
+# - netcat (nc) for HTTP mode
+#
+# Usage:
+# ./gpu-exporter.sh # stdout
+# ./gpu-exporter.sh --http -p 9195 # HTTP server
+# ./gpu-exporter.sh --textfile # node_exporter textfile
+#
+# Metrics Exported:
+# - gpu_info{gpu,name,driver_version,cuda_version} - GPU info
+# - gpu_count - Number of GPUs detected
+# - gpu_temperature_celsius{gpu} - Temperature
+# - gpu_utilization_percent{gpu} - GPU utilization
+# - gpu_memory_utilization_percent{gpu} - Memory utilization
+# - gpu_memory_used_bytes{gpu} - VRAM used
+# - gpu_memory_total_bytes{gpu} - Total VRAM
+# - gpu_memory_free_bytes{gpu} - Free VRAM
+# - gpu_power_draw_watts{gpu} - Power draw
+# - gpu_power_limit_watts{gpu} - Power limit
+# - gpu_fan_speed_percent{gpu} - Fan speed
+# - gpu_clock_speed_mhz{gpu} - GPU clock
+# - gpu_memory_clock_speed_mhz{gpu} - Memory clock
+# - gpu_pstate{gpu} - Performance state
+# - gpu_process_memory_bytes{gpu,pid,process_name} - Per-process memory
+# - gpu_exporter_duration_seconds - Script execution time
+# - gpu_exporter_last_run_timestamp - Last run timestamp
+#
+# Configuration:
+# Default HTTP port: 9195
+# Textfile directory: /var/lib/node_exporter
+#
+################################################################################
+
+set -euo pipefail
+
+# ============================================================================
+# CONFIGURATION VARIABLES
+# ============================================================================
+
+TEXTFILE_DIR="/var/lib/node_exporter"
+OUTPUT_FILE=""
+HTTP_MODE=false
+HTTP_PORT=9195
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+show_usage() {
+ cat <&2; exit 1 ;;
+ esac
+ done
+}
+
+# Escape special characters in Prometheus label values
+# Args: $1 - string to escape
+# Returns: escaped string safe for Prometheus labels
+prom_escape() {
+ local val="$1"
+ val="${val//\\/\\\\}"
+ val="${val//\"/\\\"}"
+ val="${val//$'\n'/}"
+ echo "$val"
+}
+
+# ============================================================================
+# METRIC GENERATION
+# ============================================================================
+
+# Generate all Prometheus metrics
+# Returns: Prometheus text format metrics on stdout
+generate_metrics() {
+ local script_start
+ script_start=$(date +%s)
+
+ # Check nvidia-smi exists
+ if ! command -v nvidia-smi >/dev/null 2>&1; then
+ cat </dev/null | head -1)
+ gpu_count=${gpu_count:-0}
+
+ # Strip whitespace
+ gpu_count=$(echo "$gpu_count" | tr -d '[:space:]')
+
+ if [ "$gpu_count" -eq 0 ] 2>/dev/null; then
+ cat </dev/null | head -1)
+ driver_version=$(echo "$driver_version" | tr -d '[:space:]')
+ driver_version=${driver_version:-"unknown"}
+
+ cuda_version=$(nvidia-smi --query-gpu=cuda_version --format=csv,noheader 2>/dev/null | head -1)
+ cuda_version=$(echo "$cuda_version" | tr -d '[:space:]')
+
+ # Fallback: parse from nvidia-smi header if query fails
+ if [ -z "$cuda_version" ] || [ "$cuda_version" = "[N/A]" ]; then
+ cuda_version=$(nvidia-smi 2>/dev/null | grep -oP 'CUDA Version:\s*\K[0-9.]+' || echo "unknown")
+ fi
+
+ cat </dev/null)
+
+ if [ -n "$info_lines" ]; then
+ while IFS= read -r info_line; do
+ [ -z "$info_line" ] && continue
+ local g_idx g_name
+ g_idx=$(echo "$info_line" | cut -d',' -f1 | tr -d '[:space:]')
+ g_name=$(echo "$info_line" | cut -d',' -f2- | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+ echo "gpu_info{gpu=\"$g_idx\",name=\"$(prom_escape "$g_name")\",driver_version=\"$(prom_escape "$driver_version")\",cuda_version=\"$(prom_escape "$cuda_version")\"} 1"
+ done <<< "$info_lines"
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # OUTPUT PER-GPU METRICS (with HELP/TYPE headers)
+ # ========================================================================
+
+ # Helper: emit a metric block for all GPUs
+ # Args: $1=metric_name, $2=help_text, $3=query_field
+ emit_gpu_metric() {
+ local metric="$1" help="$2" query="$3"
+ echo "# HELP $metric $help"
+ echo "# TYPE $metric gauge"
+ local lines
+ lines=$(nvidia-smi --query-gpu=index,"$query" --format=csv,noheader,nounits 2>/dev/null)
+ while IFS=', ' read -r g_idx g_val; do
+ g_idx=$(echo "$g_idx" | tr -d '[:space:]')
+ g_val=$(echo "$g_val" | tr -d '[:space:]')
+ [[ "$g_val" == "[N/A]" ]] && g_val=0
+ echo "${metric}{gpu=\"$g_idx\"} $g_val"
+ done <<< "$lines"
+ echo ""
+ }
+
+ # Helper: emit a memory metric (MiB → bytes) for all GPUs
+ # Args: $1=metric_name, $2=help_text, $3=query_field
+ emit_gpu_mem_metric() {
+ local metric="$1" help="$2" query="$3"
+ echo "# HELP $metric $help"
+ echo "# TYPE $metric gauge"
+ local lines
+ lines=$(nvidia-smi --query-gpu=index,"$query" --format=csv,noheader,nounits 2>/dev/null)
+ while IFS=', ' read -r g_idx g_val; do
+ g_idx=$(echo "$g_idx" | tr -d '[:space:]')
+ g_val=$(echo "$g_val" | tr -d '[:space:]')
+ [[ "$g_val" == "[N/A]" ]] && g_val=0
+ local bytes
+ bytes=$(awk "BEGIN { printf \"%.0f\", $g_val * 1048576 }")
+ echo "${metric}{gpu=\"$g_idx\"} $bytes"
+ done <<< "$lines"
+ echo ""
+ }
+
+ emit_gpu_metric "gpu_temperature_celsius" "GPU temperature in degrees Celsius" "temperature.gpu"
+ emit_gpu_metric "gpu_utilization_percent" "GPU core utilization percentage" "utilization.gpu"
+ emit_gpu_metric "gpu_memory_utilization_percent" "GPU memory utilization percentage" "utilization.memory"
+ emit_gpu_mem_metric "gpu_memory_used_bytes" "GPU memory used in bytes" "memory.used"
+ emit_gpu_mem_metric "gpu_memory_total_bytes" "GPU total memory in bytes" "memory.total"
+ emit_gpu_mem_metric "gpu_memory_free_bytes" "GPU free memory in bytes" "memory.free"
+ emit_gpu_metric "gpu_power_draw_watts" "GPU power draw in watts" "power.draw"
+ emit_gpu_metric "gpu_power_limit_watts" "GPU power limit in watts" "power.limit"
+ emit_gpu_metric "gpu_fan_speed_percent" "GPU fan speed percentage" "fan.speed"
+ emit_gpu_metric "gpu_clock_speed_mhz" "GPU graphics clock speed in MHz" "clocks.current.graphics"
+ emit_gpu_metric "gpu_memory_clock_speed_mhz" "GPU memory clock speed in MHz" "clocks.current.memory"
+
+ # Performance state needs special handling (P0 → 0, P8 → 8, etc.)
+ echo "# HELP gpu_pstate GPU performance state (0=max, 12=min)"
+ echo "# TYPE gpu_pstate gauge"
+ local pstate_lines
+ pstate_lines=$(nvidia-smi --query-gpu=index,pstate --format=csv,noheader,nounits 2>/dev/null)
+ while IFS=', ' read -r g_idx g_pstate; do
+ g_idx=$(echo "$g_idx" | tr -d '[:space:]')
+ g_pstate=$(echo "$g_pstate" | tr -d '[:space:]')
+ local pnum=0
+ if [[ "$g_pstate" =~ ^P([0-9]+)$ ]]; then
+ pnum="${BASH_REMATCH[1]}"
+ fi
+ echo "gpu_pstate{gpu=\"$g_idx\"} $pnum"
+ done <<< "$pstate_lines"
+
+ echo ""
+
+ # ========================================================================
+ # PER-PROCESS GPU MEMORY
+ # ========================================================================
+
+ # Build UUID-to-index mapping
+ declare -A uuid_to_index
+ local uuid_lines
+ uuid_lines=$(nvidia-smi --query-gpu=index,uuid --format=csv,noheader 2>/dev/null)
+
+ if [ -n "$uuid_lines" ]; then
+ while IFS=', ' read -r g_idx g_uuid; do
+ g_idx=$(echo "$g_idx" | tr -d '[:space:]')
+ g_uuid=$(echo "$g_uuid" | tr -d '[:space:]')
+ uuid_to_index["$g_uuid"]="$g_idx"
+ done <<< "$uuid_lines"
+ fi
+
+ cat </dev/null)
+
+ if [ -n "$process_lines" ]; then
+ while IFS= read -r proc_line; do
+ [ -z "$proc_line" ] && continue
+
+ # Parse: uuid, pid, process_name, used_memory_mib
+ local proc_uuid proc_pid proc_name proc_mem_mib
+ proc_uuid=$(echo "$proc_line" | cut -d',' -f1 | tr -d '[:space:]')
+ proc_pid=$(echo "$proc_line" | cut -d',' -f2 | tr -d '[:space:]')
+ proc_name=$(echo "$proc_line" | cut -d',' -f3 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+ proc_mem_mib=$(echo "$proc_line" | rev | cut -d',' -f1 | rev | tr -d '[:space:]')
+
+ # Resolve UUID to GPU index
+ local proc_gpu_idx="${uuid_to_index[$proc_uuid]:-0}"
+
+ # Handle [N/A] memory
+ if [ "$proc_mem_mib" = "[N/A]" ]; then
+ proc_mem_mib=0
+ fi
+
+ # Convert MiB to bytes
+ local proc_mem_bytes
+ proc_mem_bytes=$(awk "BEGIN { printf \"%.0f\", $proc_mem_mib * 1048576 }")
+
+ # Extract short process name from full path
+ local short_name
+ short_name=$(basename "$proc_name" 2>/dev/null || echo "$proc_name")
+
+ echo "gpu_process_memory_bytes{gpu=\"$proc_gpu_idx\",pid=\"$proc_pid\",process_name=\"$(prom_escape "$short_name")\"} $proc_mem_bytes"
+ done <<< "$process_lines"
+ fi
+
+ echo ""
+
+ # ========================================================================
+ # EXPORTER RUNTIME
+ # ========================================================================
+
+ local script_end script_duration
+ script_end=$(date +%s)
+ script_duration=$((script_end - script_start))
+
+ cat <&2
+
+ if ! command -v nc >/dev/null 2>&1; then
+ echo "ERROR: netcat (nc) required for HTTP mode" >&2
+ exit 1
+ fi
+
+ # Infinite loop accepting HTTP requests
+ while true; do
+ {
+ read -r request
+ # Check if request is for /metrics endpoint
+ if [[ "$request" =~ ^GET\ /metrics ]]; then
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
+ generate_metrics
+ else # Serve HTML landing page for other requests
+ echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
+ cat <
+
+GPU Exporter v1.0
+
+GPU Prometheus Exporter v1.0
+