# .SYNOPSIS SQL Server Always On Availability Group Prometheus Metrics Exporter .DESCRIPTION Prometheus exporter for SQL Server Always On - replica roles, sync health, replication lag, database sync state, listener status, and failover readiness. Exports metrics as Prometheus-compatible text format. .PARAMETER Mode Output mode: 'stdout' (default), 'textfile', or 'http' .PARAMETER Port HTTP port for http mode (default: 9518) .PARAMETER TextfileDir Directory for textfile collector output (default: C:\ProgramData\node_exporter) .PARAMETER SqlInstance SQL Server instance to connect to (default: localhost) .PARAMETER InstallScheduledTask Switch to create a scheduled task for auto-start on system boot .PARAMETER TaskIntervalMinutes Interval in minutes for the scheduled task (default: 2) .NOTES Author: Phil Connor Contact: contact@mylinux.work Website: https://mylinux.work License: MIT Version: 1.0 Metrics Exported: Core Status: - sqlag_up - sqlag_exporter_info{version} Replica State: - sqlag_replica_role{replica,ag} - sqlag_replica_sync_health{replica,ag} - sqlag_replica_connected{replica,ag} - sqlag_failover_readiness{replica,ag} Database State: - sqlag_database_sync_state{database,ag} - sqlag_log_send_queue_kb{database,ag} - sqlag_redo_queue_kb{database,ag} - sqlag_last_commit_lag_seconds{database,ag} - sqlag_database_suspended{database,ag} Listeners: - sqlag_listener_state{listener,ag} Exporter: - sqlag_exporter_duration_seconds - sqlag_exporter_last_run_timestamp #> param( [ValidateSet('stdout', 'textfile', 'http')] [string]$Mode = 'stdout', [int]$Port = 9518, [string]$TextfileDir = 'C:\ProgramData\node_exporter', [string]$SqlInstance = 'localhost', [switch]$InstallScheduledTask, [int]$TaskIntervalMinutes = 2 ) # Create a scheduled task to run this script every $TaskIntervalMinutes minutes if ($InstallScheduledTask) { $taskName = "SqlAlwaysOnExporter" $existingTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue if (-not $existingTask) { $taskAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`" -Mode textfile -SqlInstance `"$SqlInstance`"" if (-not $TaskIntervalMinutes -or $TaskIntervalMinutes -le 0) { throw "TaskIntervalMinutes must be a positive integer" } $taskTrigger = New-ScheduledTaskTrigger -Once -At (Get-Date).AddMinutes(1) -RepetitionInterval (New-TimeSpan -Minutes $TaskIntervalMinutes) -RepetitionDuration (New-TimeSpan -Days 365) $taskPrincipal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest try { Write-Host "Creating scheduled task: $taskName" Register-ScheduledTask -TaskName $taskName -Action $taskAction -Trigger $taskTrigger -Principal $taskPrincipal -Description "Exports SQL Server Always On AG metrics for Prometheus every $TaskIntervalMinutes minutes" $createdTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue if (-not $createdTask) { throw "Failed to verify scheduled task creation" } Write-Host "Successfully created scheduled task: $taskName" -ForegroundColor Green } catch { Write-Error "Failed to create scheduled task: $($_.Exception.Message)" throw } } else { Write-Host "Scheduled task '$taskName' already exists, skipping creation" } } $ErrorActionPreference = 'SilentlyContinue' # ============================================================================ # HELPER FUNCTIONS # ============================================================================ function Get-UnixTimestamp { [int][double]::Parse((Get-Date -UFormat '%s')) } function Format-MetricValue { param([double]$Value, [int]$Decimals = 2) [math]::Round($Value, $Decimals) } function Invoke-AgQuery { param([string]$Query, [string]$Instance) try { if (Get-Command Invoke-Sqlcmd -ErrorAction SilentlyContinue) { Invoke-Sqlcmd -ServerInstance $Instance -Query $Query -ErrorAction Stop } else { $connection = New-Object System.Data.SqlClient.SqlConnection $connection.ConnectionString = "Server=$Instance;Integrated Security=True;TrustServerCertificate=True" $connection.Open() $command = $connection.CreateCommand() $command.CommandText = $Query $adapter = New-Object System.Data.SqlClient.SqlDataAdapter $command $dataset = New-Object System.Data.DataSet [void]$adapter.Fill($dataset) $connection.Close() $dataset.Tables[0] } } catch { Write-Warning "SQL query failed: $_" $null } } # ============================================================================ # AG METRICS # ============================================================================ function Get-AgMetrics { $sb = [System.Text.StringBuilder]::new() # Test connectivity $connected = $false try { $testResult = Invoke-AgQuery -Query "SELECT 1 AS test" -Instance $SqlInstance if ($testResult) { $connected = $true } } catch {} # --- sqlag_up --- [void]$sb.AppendLine('# HELP sqlag_up SQL Server AG reachability (1=up, 0=down)') [void]$sb.AppendLine('# TYPE sqlag_up gauge') $upVal = if ($connected) { 1 } else { 0 } [void]$sb.AppendLine("sqlag_up $upVal") [void]$sb.AppendLine('') # --- sqlag_exporter_info --- [void]$sb.AppendLine('# HELP sqlag_exporter_info Exporter version information') [void]$sb.AppendLine('# TYPE sqlag_exporter_info gauge') [void]$sb.AppendLine('sqlag_exporter_info{version="1.0"} 1') [void]$sb.AppendLine('') if (-not $connected) { return $sb.ToString() } # --- Replica state metrics --- $replicaQuery = @" SELECT ar.replica_server_name AS replica, ag.name AS ag_name, ars.role AS role, ars.synchronization_health AS sync_health, ars.connected_state AS connected_state, ars.is_local AS is_local FROM sys.dm_hadr_availability_replica_states ars JOIN sys.availability_replicas ar ON ars.replica_id = ar.replica_id JOIN sys.availability_groups ag ON ar.group_id = ag.group_id "@ $replicas = Invoke-AgQuery -Query $replicaQuery -Instance $SqlInstance if ($replicas) { # --- sqlag_replica_role --- [void]$sb.AppendLine('# HELP sqlag_replica_role Replica role (1=primary, 2=secondary)') [void]$sb.AppendLine('# TYPE sqlag_replica_role gauge') foreach ($r in $replicas) { $replicaName = $r.replica -replace '[\\"]', '' $agName = $r.ag_name -replace '[\\"]', '' $roleVal = if ($r.role -ne $null) { $r.role } else { 0 } [void]$sb.AppendLine("sqlag_replica_role{replica=`"$replicaName`",ag=`"$agName`"} $roleVal") } [void]$sb.AppendLine('') # --- sqlag_replica_sync_health --- [void]$sb.AppendLine('# HELP sqlag_replica_sync_health Sync health (0=not healthy, 1=partially, 2=healthy)') [void]$sb.AppendLine('# TYPE sqlag_replica_sync_health gauge') foreach ($r in $replicas) { $replicaName = $r.replica -replace '[\\"]', '' $agName = $r.ag_name -replace '[\\"]', '' $healthVal = if ($r.sync_health -ne $null) { $r.sync_health } else { 0 } [void]$sb.AppendLine("sqlag_replica_sync_health{replica=`"$replicaName`",ag=`"$agName`"} $healthVal") } [void]$sb.AppendLine('') # --- sqlag_replica_connected --- [void]$sb.AppendLine('# HELP sqlag_replica_connected Replica connected state (1=connected, 0=disconnected)') [void]$sb.AppendLine('# TYPE sqlag_replica_connected gauge') foreach ($r in $replicas) { $replicaName = $r.replica -replace '[\\"]', '' $agName = $r.ag_name -replace '[\\"]', '' $connVal = if ($r.connected_state -eq 1) { 1 } else { 0 } [void]$sb.AppendLine("sqlag_replica_connected{replica=`"$replicaName`",ag=`"$agName`"} $connVal") } [void]$sb.AppendLine('') # --- sqlag_failover_readiness --- [void]$sb.AppendLine('# HELP sqlag_failover_readiness Failover readiness (1=ready, 0=not ready)') [void]$sb.AppendLine('# TYPE sqlag_failover_readiness gauge') foreach ($r in $replicas) { $replicaName = $r.replica -replace '[\\"]', '' $agName = $r.ag_name -replace '[\\"]', '' $readyVal = if ($r.sync_health -eq 2 -and $r.connected_state -eq 1) { 1 } else { 0 } [void]$sb.AppendLine("sqlag_failover_readiness{replica=`"$replicaName`",ag=`"$agName`"} $readyVal") } [void]$sb.AppendLine('') } # --- Database replica state metrics --- $dbQuery = @" SELECT db.name AS database_name, ag.name AS ag_name, drs.synchronization_state AS sync_state, drs.log_send_queue_size AS log_send_queue_kb, drs.redo_queue_size AS redo_queue_kb, DATEDIFF(SECOND, drs.last_commit_time, GETUTCDATE()) AS last_commit_lag_seconds, drs.is_suspended AS is_suspended FROM sys.dm_hadr_database_replica_states drs JOIN sys.databases db ON drs.database_id = db.database_id JOIN sys.availability_groups ag ON drs.group_id = ag.group_id WHERE drs.is_local = 1 "@ $databases = Invoke-AgQuery -Query $dbQuery -Instance $SqlInstance if ($databases) { # --- sqlag_database_sync_state --- [void]$sb.AppendLine('# HELP sqlag_database_sync_state Database sync state (0=not syncing, 1=syncing, 2=synchronised)') [void]$sb.AppendLine('# TYPE sqlag_database_sync_state gauge') foreach ($d in $databases) { $dbName = $d.database_name -replace '[\\"]', '' $agName = $d.ag_name -replace '[\\"]', '' $syncVal = if ($d.sync_state -ne $null) { $d.sync_state } else { 0 } [void]$sb.AppendLine("sqlag_database_sync_state{database=`"$dbName`",ag=`"$agName`"} $syncVal") } [void]$sb.AppendLine('') # --- sqlag_log_send_queue_kb --- [void]$sb.AppendLine('# HELP sqlag_log_send_queue_kb Log send queue size in KB') [void]$sb.AppendLine('# TYPE sqlag_log_send_queue_kb gauge') foreach ($d in $databases) { $dbName = $d.database_name -replace '[\\"]', '' $agName = $d.ag_name -replace '[\\"]', '' $logVal = if ($d.log_send_queue_kb -ne $null) { $d.log_send_queue_kb } else { 0 } [void]$sb.AppendLine("sqlag_log_send_queue_kb{database=`"$dbName`",ag=`"$agName`"} $logVal") } [void]$sb.AppendLine('') # --- sqlag_redo_queue_kb --- [void]$sb.AppendLine('# HELP sqlag_redo_queue_kb Redo queue size in KB') [void]$sb.AppendLine('# TYPE sqlag_redo_queue_kb gauge') foreach ($d in $databases) { $dbName = $d.database_name -replace '[\\"]', '' $agName = $d.ag_name -replace '[\\"]', '' $redoVal = if ($d.redo_queue_kb -ne $null) { $d.redo_queue_kb } else { 0 } [void]$sb.AppendLine("sqlag_redo_queue_kb{database=`"$dbName`",ag=`"$agName`"} $redoVal") } [void]$sb.AppendLine('') # --- sqlag_last_commit_lag_seconds --- [void]$sb.AppendLine('# HELP sqlag_last_commit_lag_seconds Seconds since last commit was replicated') [void]$sb.AppendLine('# TYPE sqlag_last_commit_lag_seconds gauge') foreach ($d in $databases) { $dbName = $d.database_name -replace '[\\"]', '' $agName = $d.ag_name -replace '[\\"]', '' $lagVal = if ($d.last_commit_lag_seconds -ne $null) { $d.last_commit_lag_seconds } else { 0 } [void]$sb.AppendLine("sqlag_last_commit_lag_seconds{database=`"$dbName`",ag=`"$agName`"} $lagVal") } [void]$sb.AppendLine('') # --- sqlag_database_suspended --- [void]$sb.AppendLine('# HELP sqlag_database_suspended Database suspended state (1=suspended, 0=normal)') [void]$sb.AppendLine('# TYPE sqlag_database_suspended gauge') foreach ($d in $databases) { $dbName = $d.database_name -replace '[\\"]', '' $agName = $d.ag_name -replace '[\\"]', '' $suspVal = if ($d.is_suspended -eq $true) { 1 } else { 0 } [void]$sb.AppendLine("sqlag_database_suspended{database=`"$dbName`",ag=`"$agName`"} $suspVal") } [void]$sb.AppendLine('') } # --- Listener metrics --- $listenerQuery = @" SELECT agl.dns_name AS listener_name, ag.name AS ag_name, agl.state AS listener_state FROM sys.availability_group_listeners agl JOIN sys.availability_groups ag ON agl.group_id = ag.group_id "@ $listeners = Invoke-AgQuery -Query $listenerQuery -Instance $SqlInstance if ($listeners) { [void]$sb.AppendLine('# HELP sqlag_listener_state Listener state (1=online, 0=offline)') [void]$sb.AppendLine('# TYPE sqlag_listener_state gauge') foreach ($l in $listeners) { $listenerName = $l.listener_name -replace '[\\"]', '' $agName = $l.ag_name -replace '[\\"]', '' $stateVal = if ($l.listener_state -eq 1) { 1 } else { 0 } [void]$sb.AppendLine("sqlag_listener_state{listener=`"$listenerName`",ag=`"$agName`"} $stateVal") } [void]$sb.AppendLine('') } $sb.ToString() } # ============================================================================ # COLLECT ALL METRICS # ============================================================================ function Get-AllMetrics { $scriptStart = Get-Date $sb = [System.Text.StringBuilder]::new() # Collect AG metrics [void]$sb.Append((Get-AgMetrics)) # Exporter runtime $scriptEnd = Get-Date $duration = Format-MetricValue ($scriptEnd - $scriptStart).TotalSeconds $timestamp = Get-UnixTimestamp [void]$sb.AppendLine('# HELP sqlag_exporter_duration_seconds Time to generate all metrics') [void]$sb.AppendLine('# TYPE sqlag_exporter_duration_seconds gauge') [void]$sb.AppendLine("sqlag_exporter_duration_seconds $duration") [void]$sb.AppendLine('') [void]$sb.AppendLine('# HELP sqlag_exporter_last_run_timestamp Unix timestamp of last successful run') [void]$sb.AppendLine('# TYPE sqlag_exporter_last_run_timestamp gauge') [void]$sb.AppendLine("sqlag_exporter_last_run_timestamp $timestamp") [void]$sb.AppendLine('') $sb.ToString() } # ============================================================================ # HTTP SERVER MODE # ============================================================================ function Start-HttpServer { param([int]$ListenPort) $prefix = "http://+:$ListenPort/" $listener = [System.Net.HttpListener]::new() $listener.Prefixes.Add($prefix) try { $listener.Start() Write-Host "Starting SQL Server Always On exporter on port $ListenPort..." -ForegroundColor Green Write-Host "Metrics available at http://localhost:$ListenPort/metrics" while ($listener.IsListening) { $context = $listener.GetContext() $request = $context.Request $response = $context.Response if ($request.Url.AbsolutePath -eq '/metrics') { $metrics = Get-AllMetrics $buffer = [System.Text.Encoding]::UTF8.GetBytes($metrics) $response.ContentType = 'text/plain; version=0.0.4; charset=utf-8' } else { $html = @"