<# .SYNOPSIS Monitors Salt Minion service status and exports metrics for Prometheus windows_exporter. .DESCRIPTION This script checks the status of the Salt Minion service and creates Prometheus-formatted metrics. The metrics are written to a text file that can be consumed by the windows_exporter. It can also create a scheduled task to run periodically. .PARAMETER ValidateNotNullOrEmpty Switch to validate that the MetricsFilePath parameter is not null or empty. .PARAMETER ValidateScript Validate that the MetricsFilePath parameter is a valid Windows path. .PARAMETER MetricsFilePath The path where the Prometheus metrics file will be written. .PARAMETER InstallScheduledTask Switch to create a scheduled task for periodic monitoring. .PARAMETER TaskIntervalMinutes The interval in minutes for the scheduled task. Default is 15 minutes. .PARAMETER TimeoutSeconds Timeout in seconds for service status checks. Default is 30 seconds. .PARAMETER TimeoutSeconds Timeout in seconds for service status checks. Default is 30 seconds. .PARAMETER SaltMasterPort The port number for the Salt Master. Default is 4505. .PARAMETER DryRun Switch to output metrics to console instead of writing to file. .PARAMETER Verbose Switch to enable verbose debug output for troubleshooting. .PARAMETER Quiet Switch to suppress non-error output (useful for scheduled tasks). .PARAMETER NoCron Switch to skip scheduled task installation. .PARAMETER Version Switch to display script version and exit. .NOTES Version: 3.3.0-20250915 Author: Phil Connor, contact@mylinux.work License: MIT Created: 2025-01-24 loosly based on my salt_status.sh used with the linux servers. #> param( [ValidateNotNullOrEmpty()] [ValidateScript({ $parentPath = Split-Path $_ -Parent if ($parentPath -and -not (Test-Path $parentPath)) { throw "Directory does not exist: $parentPath" } if ($_ -match '^[A-Za-z]:\\') { return $true } throw "Invalid file path format" })] [string]$MetricsFilePath = "$env:ProgramFiles\windows_exporter\textfile_inputs\salt_status.prom", [switch]$InstallScheduledTask = $false, [ValidateRange(1, 1440)] # Validate the interval is between 1 and 1440 minutes for the scheduled task [int]$TaskIntervalMinutes = 15, [ValidateRange(1, 300)] # Validate the timeout is between 1 and 3600 seconds for service status checks [int]$TimeoutSeconds = 30, [int]$SaltMasterPort = 4505, [switch]$DryRun = $false, # Output metrics to console instead of file [switch]$Verbose = $false, # Enable verbose debug output [switch]$Quiet = $false, # Suppress non-error output [switch]$NoCron = $false, # Skip scheduled task installation [switch]$Version = $false # Show version and exit ) # Handle version display if ($Version) { Write-Host "Salt Status Monitor PowerShell Script" Write-Host "Version: 3.3.0-20250915" Write-Host "Author: Phil Connor pconnor@ara.com" exit 0 } # Set up logging preferences based on Verbose/Quiet flags if ($Verbose) { $VerbosePreference = 'Continue' $InformationPreference = 'Continue' } if ($Quiet) { $VerbosePreference = 'SilentlyContinue' $InformationPreference = 'SilentlyContinue' $WarningPreference = 'SilentlyContinue' } # Logging functions function Write-VerboseLog { param([string]$Message) if ($Verbose) { Write-Host "[VERBOSE] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Cyan } } function Write-InfoLog { param([string]$Message) if (-not $Quiet) { Write-Host "[INFO] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Green } } # Create a scheduled task to run this script every 15 minutes if ($InstallScheduledTask -and -not $NoCron) { $taskName = "SaltMinionStatusCheck" $existingTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue if (-not $existingTask) { $taskAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`"" # Add validation if (-not $TaskIntervalMinutes -or $TaskIntervalMinutes -le 0) { throw "TaskIntervalMinutes must be a positive integer" } $taskTrigger = New-ScheduledTaskTrigger -Once -At (Get-Date).AddMinutes(1) -RepetitionInterval (New-TimeSpan -Minutes $TaskIntervalMinutes) -RepetitionDuration (New-TimeSpan -Days 365) $taskPrincipal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest try { Write-InfoLog "Creating scheduled task: $taskName" Register-ScheduledTask -TaskName $taskName -Action $taskAction -Trigger $taskTrigger -Principal $taskPrincipal -Description "Monitors Salt Minion status every $TaskIntervalMinutes minutes" # Verify the task was created $createdTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue if (-not $createdTask) { throw "Failed to verify scheduled task creation" } Write-InfoLog "Successfully created scheduled task: $taskName" } catch { Write-Error "Failed to create auto-start task: $($_.Exception.Message)" throw } } else { Write-InfoLog "Scheduled task $taskName already exists. Skipping creation." } } # Function to check if required commands are available function Test-CommandAvailability { param([string]$Command) try { Get-Command $Command -ErrorAction Stop | Out-Null return $true } catch { Write-Warning "Required command '$Command' is not available" return $false } } # Function to check if the salt-master is connected function Test-Port4505Connection { try { # Use netstat to check for active connections on the salt-master port $portCheck = netstat -an 2>$null | Select-String "\s+[^:]+:$SaltMasterPort\s+" # Check if we found any active connections on the port if ($null -ne $portCheck) { Write-VerboseLog "Port $SaltMasterPort is in use and has active connections" return $true } else { Write-VerboseLog "No active connections found on port $SaltMasterPort" return $false } } catch [System.Management.Automation.ActionPreferenceStopException] { # Silently ignore this specific exception when error action is set to Stop } catch { # Log any other unexpected errors and return failure status Write-Warning "Failed to check port $SaltMasterPort : $($_.Exception.Message)" return $false } } # Function to check if the salt-master responds to ping function Test-SaltPing { param( [int]$TimeoutSeconds = $TimeoutSeconds ) if (-not (Test-CommandAvailability "salt-call")) { Write-Warning "Salt-call command not found" return $false } $job = $null try { $job = Start-Job -ScriptBlock { salt-call test.ping --local 2>$null } -ErrorAction Stop $completed = $job | Wait-Job -Timeout $TimeoutSeconds if (-not $completed) { Write-Warning "Salt-call test.ping timed out after $TimeoutSeconds seconds" return $false } $saltTest = $job | Receive-Job -ErrorAction SilentlyContinue if ($null -eq $saltTest) { Write-Host "No response from salt-call test.ping" return $false } if ($saltTest -is [array]) { $saltTest = $saltTest -join "`n" } if ($saltTest -match "local:\s*True" -or $saltTest -match "^\s*True\s*$") { Write-VerboseLog "Salt-call test.ping returned True" return $true } else { Write-VerboseLog "Salt-call test.ping failed or returned unexpected output: $saltTest" return $false } } catch { Write-Warning "Salt-Call failed: $($_.Exception.Message)" return $false } finally { if ($null -ne $job) { try { if ($job.State -eq 'Running') { $job | Stop-Job -Force -ErrorAction SilentlyContinue } } finally { $job | Remove-Job -Force -ErrorAction SilentlyContinue } } } } # Function to check if prometheus named metrics are sanitized or not function Test-PrometheusMetricName { param([string]$MetricName) # Prometheus metric names should match: [a-zA-Z_:][a-zA-Z0-9_:]* if ($MetricName -match '^[a-zA-Z_:][a-zA-Z0-9_:]*$') { return $true } return $false } # Function to format and add a metric to the metrics array function Add-PrometheusMetric { param( [string]$Name, [string]$Help, [string]$Type, [object]$Value, [ref]$MetricsArray ) if (-not (Test-PrometheusMetricName $Name)) { Write-Warning "Invalid metric name: $Name" return } $MetricsArray.Value += "# HELP $Name $Help" $MetricsArray.Value += "# TYPE $Name $Type" $MetricsArray.Value += "$Name $Value" } # Function to check Windows service status function Test-SaltMinionService { try { $service = Get-Service -Name "salt-minion" -ErrorAction SilentlyContinue if ($null -eq $service) { Write-Warning "Salt-minion service not found" return 2 # Service not found } if ($service.Status -eq 'Running') { return 1 # Service is running } else { return 0 # Service is not running } } catch { Write-Warning "Failed to check salt-minion service status: $($_.Exception.Message)" return 0 } } # Function to get Salt version function Get-SaltVersion { if (-not (Test-CommandAvailability "salt-call")) { return "0" } try { $versionOutput = & salt-call --version 2>$null if ($versionOutput -match "(\d+\.\d+)") { return $matches[1] } return "0" } catch { Write-Warning "Failed to get Salt version: $($_.Exception.Message)" return "0" } } # Function to get Salt-minion memory usage function Get-SaltMemoryUsage { try { $saltProcesses = Get-Process -Name "salt-minion" -ErrorAction SilentlyContinue if ($null -eq $saltProcesses) { return 0 } $totalMemory = 0 foreach ($process in $saltProcesses) { $totalMemory += $process.WorkingSet64 } return $totalMemory } catch { Write-Warning "Failed to get salt-minion memory usage: $($_.Exception.Message)" return 0 } } # Function to get last successful communication timestamp function Get-LastCommunicationTimestamp { if (-not (Test-CommandAvailability "salt-call")) { return 0 } try { $pingResult = Test-SaltPing if ($pingResult) { return [int][double]::Parse((Get-Date -UFormat %s)) } return 0 } catch { Write-Warning "Failed to get last communication timestamp: $($_.Exception.Message)" return 0 } } # Function to count recent Salt errors in Windows Event Log function Get-SaltErrorCount { try { $24HoursAgo = (Get-Date).AddHours(-24) $errorEvents = Get-WinEvent -FilterHashtable @{ LogName = 'Application' Source = 'salt-minion' Level = 2 # Error level StartTime = $24HoursAgo } -ErrorAction SilentlyContinue if ($null -eq $errorEvents) { return 0 } return $errorEvents.Count } catch { # Fallback: try to read from salt log file if it exists $logPath = "${env:ProgramData}\Salt Project\Salt\var\log\salt\minion" if (Test-Path $logPath) { try { $logContent = Get-Content $logPath -Tail 1000 -ErrorAction SilentlyContinue $errorLines = $logContent | Where-Object { $_ -match "\[ERROR\]" } return $errorLines.Count } catch { return 0 } } return 0 } } # Function to export Prometheus metrics function Export-PrometheusMetrics { #Starts the metrics export. $startTime = Get-Date $metrics = @() $errors = @() try { # Connection status metric (port 4505) try { if (-not (Test-CommandAvailability "netstat")) { $errors += "netstat command not found" $connectionStatus = 2 } else { $connectionStatus = if (Test-Port4505Connection) { 1 } else { 0 } } Add-PrometheusMetric -Name "minion_connection_status" -Help "Shows if Salt-Minion is connected to Salt-Master." -Type "gauge" -Value $connectionStatus -MetricsArray ([ref]$metrics) } catch { $errors += "Port 4505 check failed: $($_.Exception.Message)" Add-PrometheusMetric -Name "minion_connection_status" -Help "Shows if Salt-Minion is connected to Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) } # Salt ping metric try { if (-not (Test-CommandAvailability "salt-call")) { $errors += "salt-call command not found" $pingStatus = 2 } else { $pingStatus = if (Test-SaltPing) { 1 } else { 0 } } Add-PrometheusMetric -Name "minion_ping_status" -Help "Shows if Salt-Minion is able to ping Salt-Master." -Type "gauge" -Value $pingStatus -MetricsArray ([ref]$metrics) } catch { $errors += "Salt ping check failed: $($_.Exception.Message)" Add-PrometheusMetric -Name "minion_ping_status" -Help "Shows if Salt-Minion is able to ping Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) } # Service status metric try { $serviceStatus = Test-SaltMinionService Add-PrometheusMetric -Name "minion_service_status" -Help "Shows if Salt-Minion service is active." -Type "gauge" -Value $serviceStatus -MetricsArray ([ref]$metrics) } catch { $errors += "Service status check failed: $($_.Exception.Message)" Add-PrometheusMetric -Name "minion_service_status" -Help "Shows if Salt-Minion service is active." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) } # Last communication timestamp try { $lastComm = Get-LastCommunicationTimestamp Add-PrometheusMetric -Name "minion_last_communication_timestamp" -Help "Timestamp of last successful communication with Salt-Master." -Type "gauge" -Value $lastComm -MetricsArray ([ref]$metrics) } catch { $errors += "Last communication check failed: $($_.Exception.Message)" Add-PrometheusMetric -Name "minion_last_communication_timestamp" -Help "Timestamp of last successful communication with Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) } # Salt version metric try { $version = Get-SaltVersion Add-PrometheusMetric -Name "minion_version" -Help "Salt-Minion version number." -Type "gauge" -Value $version -MetricsArray ([ref]$metrics) } catch { $errors += "Version check failed: $($_.Exception.Message)" Add-PrometheusMetric -Name "minion_version" -Help "Salt-Minion version number." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) } # Memory usage metric try { $memoryUsage = Get-SaltMemoryUsage Add-PrometheusMetric -Name "minion_memory_usage_bytes" -Help "Salt-Minion process memory usage in bytes." -Type "gauge" -Value $memoryUsage -MetricsArray ([ref]$metrics) } catch { $errors += "Memory usage check failed: $($_.Exception.Message)" Add-PrometheusMetric -Name "minion_memory_usage_bytes" -Help "Salt-Minion process memory usage in bytes." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics) } # Error count metric try { $errorCount = Get-SaltErrorCount Add-PrometheusMetric -Name "minion_error_count" -Help "Number of error entries in Salt-Minion log file." -Type "counter" -Value $errorCount -MetricsArray ([ref]$metrics) } catch { $errors += "Error count check failed: $($_.Exception.Message)" Add-PrometheusMetric -Name "minion_error_count" -Help "Number of error entries in Salt-Minion log file." -Type "counter" -Value 0 -MetricsArray ([ref]$metrics) } # Windows-specific: Script execution error count Add-PrometheusMetric -Name "windows_salt_script_errors_total" -Help "Total number of errors during script execution" -Type "counter" -Value $errors.Count -MetricsArray ([ref]$metrics) # Windows-specific: Script runtime $scriptRuntime = (Get-Date) - $startTime Add-PrometheusMetric -Name "windows_salt_script_runtime_seconds" -Help "Total script execution time in seconds" -Type "gauge" -Value $scriptRuntime.TotalSeconds -MetricsArray ([ref]$metrics) } finally { # Ensure cleanup happens regardless of success/failure if ($errors.Count -gt 0) { Write-Warning "Script completed with $($errors.Count) errors" } } return $metrics } # Output metrics to console or file try { # Export metrics as an array of strings $exportedMetrics = Export-PrometheusMetrics if ($null -eq $exportedMetrics) { throw "Export-PrometheusMetrics returned null" } if ($DryRun) { # Dry run mode: output to console Write-Host "=== DRY RUN MODE - Metrics that would be written to $MetricsFilePath ===" -ForegroundColor Yellow $exportedMetrics | ForEach-Object { Write-Host $_ } Write-Host "=== END DRY RUN OUTPUT ===" -ForegroundColor Yellow } else { # Normal mode: write to file with retry mechanism $retryCount = 0 $maxRetries = 3 do { try { # Write the metrics to the file $exportedMetrics | Out-File -FilePath $MetricsFilePath -Encoding UTF8 -Force break } catch [System.IO.IOException] { $retryCount++ if ($retryCount -ge $maxRetries) { throw } # Wait 100ms before retrying Start-Sleep -Milliseconds 100 } } while ($retryCount -lt $maxRetries) } } catch { Write-Error "Failed to export metrics: $($_.Exception.Message)" exit 1 } # Uncomment the following line to write metrics to the console # $exportedMetrics = Export-PrometheusMetrics