<# .SYNOPSIS Snake Charmer - Python Process Monitor and Controller .DESCRIPTION This script monitors and manages Python processes to prevent system resource exhaustion. It checks for excessive Python processes, restarts the Salt Minion service when needed, and exports metrics for Prometheus monitoring. This version runs once and exits. .PARAMETER ProcessName The name of the process to monitor (default: python) .PARAMETER MaxPythonProcesses Maximum number of allowed Python processes (default: 8) .PARAMETER ServiceName Name of the service to restart when needed (default: salt-minion) .PARAMETER MetricsFilePath Path to export Prometheus metrics (default: C:\Program Files\windows_exporter\textfile_inputs\snake_charmer.prom) .PARAMETER InstallScheduledTask Switch to create a scheduled task for auto-start on system boot .PARAMETER TaskIntervalMinutes Interval in minutes for the scheduled task (default: 5) .PARAMETER RestartCooldownMinutes Cooldown period between service restarts in minutes (default: 5) .PARAMETER MaxRetries Maximum retry attempts for service restart (default: 3) .NOTES Version: 2.0.0-20250725 Author: Phil Connor contact@mylinux.work Created: 2025-07-24 #> param( [string]$ProcessName = "python", [int]$MaxPythonProcesses = 50, [string]$ServiceName = "salt-minion", [string]$MetricsFilePath = "C:\Program Files\windows_exporter\textfile_inputs\snake_charmer.prom", [switch]$InstallScheduledTask = $true, [int]$TaskIntervalMinutes = 5, [int]$RestartCooldownMinutes = 5, [int]$MaxRetries = 3 ) # Create a scheduled task to run this script every $TaskIntervalMinutes minutes # The task will run as SYSTEM and will be set to run at startup if ($InstallScheduledTask) { $taskName = "SnakeCharmerAutoStart" $existingTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue if (-not $existingTask) { # Create a scheduled task to run this script at startup $taskAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`"" # Validate the TaskIntervalMinutes parameter if (-not $TaskIntervalMinutes -or $TaskIntervalMinutes -le 0) { throw "TaskIntervalMinutes must be a positive integer" } # Create the task trigger $taskTrigger = New-ScheduledTaskTrigger -Once -At (Get-Date).AddMinutes(1) -RepetitionInterval (New-TimeSpan -Minutes $TaskIntervalMinutes) -RepetitionDuration (New-TimeSpan -Days 365) $taskPrincipal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest try { Write-Host "Creating scheduled task: $taskName" # Register the task Register-ScheduledTask -TaskName $taskName -Action $taskAction -Trigger $taskTrigger -Principal $taskPrincipal -Description "Monitors Salt Minion status every $TaskIntervalMinutes minutes" # Verify the task was created $createdTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue if (-not $createdTask) { throw "Failed to verify scheduled task creation" } Write-Host "Successfully created scheduled task: $taskName" } catch { Write-Error "Failed to create auto-start task: $($_.Exception.Message)" throw } } } # Validate that the process count is a positive integer if ($MaxPythonProcesses -lt 1) { throw "MaxPythonProcesses must be greater than 0" } # Validate that the service name is valid if (-not (Get-Service -Name $ServiceName -ErrorAction SilentlyContinue)) { throw "Service '$ServiceName' does not exist" } # Validate that the metrics file path is an absolute path if (-not [System.IO.Path]::IsPathRooted($MetricsFilePath)) { throw "MetricsFilePath must be an absolute path" } # Configuration summary logged $ConfigSummary = @" Snake Charmer Monitor (Single Execution) Configuration: - Process Name: $ProcessName - Max Processes: $MaxPythonProcesses - Service Name: $ServiceName - Metrics File: $MetricsFilePath - Restart Cooldown: $RestartCooldownMinutes minutes - Started: $(Get-Date) "@ Write-Host $ConfigSummary # Ensure metrics directory exists try { # Get the parent directory of the metrics file $metricsDir = Split-Path $MetricsFilePath -Parent # Create the metrics directory if it does not exist if (-not (Test-Path $metricsDir)) { New-Item -Path $metricsDir -ItemType Directory -Force | Out-Null Write-Verbose "Created metrics directory: $metricsDir" } } catch { # Handle errors that occur when creating the metrics directory switch ($_.Exception.GetType().Name) { # Handle null pointer exception 'NullReferenceException' { Write-Error "Null pointer exception encountered" } # Handle ArgumentException 'ArgumentException' { Write-Error "MetricsFilePath cannot be null or empty" } # Handle IOException 'IOException' { Write-Error "Failed to create metrics directory: $($_.Exception.Message)" } # Handle any other exception default { Write-Error "Unexpected error occurred: $($_.Exception.Message)" } } } # This function will return the number of Python processes running with the given name and that are not idle or in debug windows function Get-PythonProcessCount { param($ProcessName) if ($null -eq $ProcessName) { throw "ProcessName is null" } try { # Get the list of Python processes that are not idle or in debug windows $pythonProcesses = @(Get-Process -Name $ProcessName -ErrorAction Stop | Where-Object { # Check that the process is responding $_.Responding -and # Check that the main window title does not contain "idle" or "debug" $_.MainWindowTitle -notmatch "idle|debug" -and # Check that the process is using more than 10MB of memory $_.WorkingSet -gt 10MB -and # Check that the process name matches the provided name $_.ProcessName -eq $ProcessName }) # Return the count of Python processes return $pythonProcesses.Count } # If the process is not found, return 0 catch [Microsoft.PowerShell.Commands.ProcessCommandException] { return 0 } catch [System.NullReferenceException] { # Handle null pointer exception Write-Error "Null pointer exception encountered" return 0 } catch [System.Management.Automation.RuntimeException] { # Handle any other exception Write-Error "Unexpected error occurred: $($_.Exception.Message)" return 0 } } # Function to restart service function Restart-ServiceWithRetry { param($ServiceName, $MaxRetries) $success = $false for ($attempt = 1; $attempt -le $MaxRetries; $attempt++) { try { Write-Host "Attempting service restart (attempt $attempt/$MaxRetries)" # Get the service object $service = Get-Service -Name $ServiceName -ErrorAction Stop if ($null -eq $service) { throw "Service not found: $ServiceName" } # Restart the service $service | Restart-Service -Force -ErrorAction Stop # Wait for 10 seconds to allow the service to start Start-Sleep -Seconds 10 # Check that the service is running $service = Get-Service -Name $ServiceName if ($service.Status -eq 'Running') { Write-Host "Service restarted successfully" $success = $true break } else { Write-Warning "Service not running after restart attempt $attempt" } } catch [System.ServiceProcess.ServiceControllerException] { Write-Warning "Service control error: $($_.Exception.Message)" } catch [System.TimeoutException] { Write-Warning "Service restart timed out: $($_.Exception.Message)" } catch { Write-Error "Unexpected error during service restart: $($_.Exception.Message)" break } if ($attempt -lt $MaxRetries) { # Wait for 5 seconds before retrying Start-Sleep -Seconds 5 } } if (-not $success) { Write-Error "Failed to restart service after $MaxRetries attempts" } return $success } # Main execution $startTime = Get-Date $processCount = 0 # Get the number of Python processes running try { # Attempt to get the number of Python processes with the given name $processCount = Get-PythonProcessCount -ProcessName $ProcessName } catch [System.NullReferenceException] { # Handle null pointer exception Write-Error "Null pointer exception encountered when getting process count" } catch [System.Management.Automation.RuntimeException] { # Handle any other exception Write-Error "Unexpected error occurred when getting process count: $($_.Exception.Message)" } catch { # Handle any other exception Write-Error "An unexpected error occurred when getting process count: $($_.Exception.Message)" } # Output/Log the number of Python processes found Write-Host "Found $processCount Python processes" # Check if restart is needed $restartPerformed = $false # Check for null pointer exception if ($null -eq $processCount) { Write-Error "Null pointer exception encountered" } else { # Check for excessive Python processes if ($processCount -gt $MaxPythonProcesses) { Write-Warning "Excessive Python processes detected: $processCount > $MaxPythonProcesses" try { # Attempt to restart the service with retries $restartPerformed = Restart-ServiceWithRetry -ServiceName $ServiceName -MaxRetries $MaxRetries } catch { Write-Error "Unexpected error during service restart: $($_.Exception.Message)" } } else { # If no excessive processes were found, log a message Write-Host "System healthy: $processCount Python processes" } } # Create metrics $timestamp = [int](Get-Date -UFormat %s) if ($null -eq $startTime) { Write-Warning "Start time not set, using current time" $startTime = Get-Date } try { $runtimeSeconds = [math]::Round(((Get-Date) - $startTime).TotalSeconds) } catch { Write-Error "Failed to calculate runtime: $($_.Exception.Message)" $runtimeSeconds = 0 } $metricsOutput = @" # HELP windows_snake_charmer_python_process_count Number of Python processes running # TYPE windows_snake_charmer_python_process_count gauge windows_snake_charmer_python_process_count{process_name="$ProcessName",service="$ServiceName"} $processCount # HELP windows_snake_charmer_service_restart_total Total number of service restarts # TYPE windows_snake_charmer_service_restart_total counter windows_snake_charmer_service_restart_total $(if ($restartPerformed) { 1 } else { 0 }) # HELP windows_snake_charmer_last_check_timestamp Timestamp of last check # TYPE windows_snake_charmer_last_check_timestamp gauge windows_snake_charmer_last_check_timestamp $timestamp # HELP windows_snake_charmer_script_runtime_seconds How long the script ran in seconds # TYPE windows_snake_charmer_script_runtime_seconds gauge windows_snake_charmer_script_runtime_seconds $runtimeSeconds "@ # Export metrics to a file try { # Check if metricsOutput is null if ($null -eq $metricsOutput) { throw "MetricsOutput cannot be null" } # Create a temporary metrics file $tempMetricsFile = "$MetricsFilePath.tmp" $metricsOutput | Out-File -FilePath $tempMetricsFile -Force -Encoding UTF8 # Move the temporary file to the final destination Move-Item -Path $tempMetricsFile -Destination $MetricsFilePath -Force # Log a success message Write-Host "Metrics exported to: $MetricsFilePath" } catch [System.NullReferenceException] { # Handle null pointer exception Write-Error "Null pointer exception encountered" } catch [System.Management.Automation.RuntimeException] { # Handle any other exception Write-Error "Unexpected error occurred: $($_.Exception.Message)" } catch { # Handle any other exception Write-Error "Failed to export metrics: $($_.Exception.Message)" } Write-Host "Snake Charmer execution completed at $(Get-Date)"