Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
This commit is contained in:
Executable
+440
@@ -0,0 +1,440 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: gpu-exporter.sh
|
||||
# Version: 1.0
|
||||
# Description: Prometheus exporter for NVIDIA GPU metrics — temperature,
|
||||
# utilization, VRAM usage, power draw, fan speed, clock speeds,
|
||||
# performance state, and per-process GPU memory via nvidia-smi
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
#
|
||||
# Prerequisites:
|
||||
# - NVIDIA GPU with drivers installed
|
||||
# - nvidia-smi available in PATH
|
||||
# - netcat (nc) for HTTP mode
|
||||
#
|
||||
# Usage:
|
||||
# ./gpu-exporter.sh # stdout
|
||||
# ./gpu-exporter.sh --http -p 9195 # HTTP server
|
||||
# ./gpu-exporter.sh --textfile # node_exporter textfile
|
||||
#
|
||||
# Metrics Exported:
|
||||
# - gpu_info{gpu,name,driver_version,cuda_version} - GPU info
|
||||
# - gpu_count - Number of GPUs detected
|
||||
# - gpu_temperature_celsius{gpu} - Temperature
|
||||
# - gpu_utilization_percent{gpu} - GPU utilization
|
||||
# - gpu_memory_utilization_percent{gpu} - Memory utilization
|
||||
# - gpu_memory_used_bytes{gpu} - VRAM used
|
||||
# - gpu_memory_total_bytes{gpu} - Total VRAM
|
||||
# - gpu_memory_free_bytes{gpu} - Free VRAM
|
||||
# - gpu_power_draw_watts{gpu} - Power draw
|
||||
# - gpu_power_limit_watts{gpu} - Power limit
|
||||
# - gpu_fan_speed_percent{gpu} - Fan speed
|
||||
# - gpu_clock_speed_mhz{gpu} - GPU clock
|
||||
# - gpu_memory_clock_speed_mhz{gpu} - Memory clock
|
||||
# - gpu_pstate{gpu} - Performance state
|
||||
# - gpu_process_memory_bytes{gpu,pid,process_name} - Per-process memory
|
||||
# - gpu_exporter_duration_seconds - Script execution time
|
||||
# - gpu_exporter_last_run_timestamp - Last run timestamp
|
||||
#
|
||||
# Configuration:
|
||||
# Default HTTP port: 9195
|
||||
# Textfile directory: /var/lib/node_exporter
|
||||
#
|
||||
################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION VARIABLES
|
||||
# ============================================================================
|
||||
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
HTTP_PORT=9195
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Export NVIDIA GPU statistics as Prometheus metrics via nvidia-smi.
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
--http Run HTTP server on port $HTTP_PORT
|
||||
|
||||
OPTIONS:
|
||||
-p, --port HTTP port (default: 9195)
|
||||
-o, --output Output file path
|
||||
|
||||
EXAMPLES:
|
||||
$0 --textfile # Write to textfile collector
|
||||
$0 --http --port 9195 # Run HTTP server
|
||||
$0 -o /tmp/gpu.prom # Write to custom file
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/gpu.prom"; shift ;;
|
||||
--http) HTTP_MODE=true; shift ;;
|
||||
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
||||
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Escape special characters in Prometheus label values
|
||||
# Args: $1 - string to escape
|
||||
# Returns: escaped string safe for Prometheus labels
|
||||
prom_escape() {
|
||||
local val="$1"
|
||||
val="${val//\\/\\\\}"
|
||||
val="${val//\"/\\\"}"
|
||||
val="${val//$'\n'/}"
|
||||
echo "$val"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRIC GENERATION
|
||||
# ============================================================================
|
||||
|
||||
# Generate all Prometheus metrics
|
||||
# Returns: Prometheus text format metrics on stdout
|
||||
generate_metrics() {
|
||||
local script_start
|
||||
script_start=$(date +%s)
|
||||
|
||||
# Check nvidia-smi exists
|
||||
if ! command -v nvidia-smi >/dev/null 2>&1; then
|
||||
cat <<EOF
|
||||
# HELP gpu_count Number of NVIDIA GPUs detected
|
||||
# TYPE gpu_count gauge
|
||||
gpu_count 0
|
||||
EOF
|
||||
return
|
||||
fi
|
||||
|
||||
# ========================================================================
|
||||
# GPU COUNT
|
||||
# ========================================================================
|
||||
|
||||
local gpu_count
|
||||
gpu_count=$(nvidia-smi --query-gpu=count --format=csv,noheader,nounits 2>/dev/null | head -1)
|
||||
gpu_count=${gpu_count:-0}
|
||||
|
||||
# Strip whitespace
|
||||
gpu_count=$(echo "$gpu_count" | tr -d '[:space:]')
|
||||
|
||||
if [ "$gpu_count" -eq 0 ] 2>/dev/null; then
|
||||
cat <<EOF
|
||||
# HELP gpu_count Number of NVIDIA GPUs detected
|
||||
# TYPE gpu_count gauge
|
||||
gpu_count 0
|
||||
EOF
|
||||
return
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
# HELP gpu_count Number of NVIDIA GPUs detected
|
||||
# TYPE gpu_count gauge
|
||||
gpu_count $gpu_count
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# GPU INFO (driver + CUDA version)
|
||||
# ========================================================================
|
||||
|
||||
local driver_version cuda_version
|
||||
driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits 2>/dev/null | head -1)
|
||||
driver_version=$(echo "$driver_version" | tr -d '[:space:]')
|
||||
driver_version=${driver_version:-"unknown"}
|
||||
|
||||
cuda_version=$(nvidia-smi --query-gpu=cuda_version --format=csv,noheader 2>/dev/null | head -1)
|
||||
cuda_version=$(echo "$cuda_version" | tr -d '[:space:]')
|
||||
|
||||
# Fallback: parse from nvidia-smi header if query fails
|
||||
if [ -z "$cuda_version" ] || [ "$cuda_version" = "[N/A]" ]; then
|
||||
cuda_version=$(nvidia-smi 2>/dev/null | grep -oP 'CUDA Version:\s*\K[0-9.]+' || echo "unknown")
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
# HELP gpu_info GPU information labels
|
||||
# TYPE gpu_info gauge
|
||||
EOF
|
||||
|
||||
# ========================================================================
|
||||
# GPU INFO LABELS
|
||||
# ========================================================================
|
||||
|
||||
local info_lines
|
||||
info_lines=$(nvidia-smi --query-gpu=index,name --format=csv,noheader 2>/dev/null)
|
||||
|
||||
if [ -n "$info_lines" ]; then
|
||||
while IFS= read -r info_line; do
|
||||
[ -z "$info_line" ] && continue
|
||||
local g_idx g_name
|
||||
g_idx=$(echo "$info_line" | cut -d',' -f1 | tr -d '[:space:]')
|
||||
g_name=$(echo "$info_line" | cut -d',' -f2- | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
||||
echo "gpu_info{gpu=\"$g_idx\",name=\"$(prom_escape "$g_name")\",driver_version=\"$(prom_escape "$driver_version")\",cuda_version=\"$(prom_escape "$cuda_version")\"} 1"
|
||||
done <<< "$info_lines"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# OUTPUT PER-GPU METRICS (with HELP/TYPE headers)
|
||||
# ========================================================================
|
||||
|
||||
# Helper: emit a metric block for all GPUs
|
||||
# Args: $1=metric_name, $2=help_text, $3=query_field
|
||||
emit_gpu_metric() {
|
||||
local metric="$1" help="$2" query="$3"
|
||||
echo "# HELP $metric $help"
|
||||
echo "# TYPE $metric gauge"
|
||||
local lines
|
||||
lines=$(nvidia-smi --query-gpu=index,"$query" --format=csv,noheader,nounits 2>/dev/null)
|
||||
while IFS=', ' read -r g_idx g_val; do
|
||||
g_idx=$(echo "$g_idx" | tr -d '[:space:]')
|
||||
g_val=$(echo "$g_val" | tr -d '[:space:]')
|
||||
[[ "$g_val" == "[N/A]" ]] && g_val=0
|
||||
echo "${metric}{gpu=\"$g_idx\"} $g_val"
|
||||
done <<< "$lines"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Helper: emit a memory metric (MiB → bytes) for all GPUs
|
||||
# Args: $1=metric_name, $2=help_text, $3=query_field
|
||||
emit_gpu_mem_metric() {
|
||||
local metric="$1" help="$2" query="$3"
|
||||
echo "# HELP $metric $help"
|
||||
echo "# TYPE $metric gauge"
|
||||
local lines
|
||||
lines=$(nvidia-smi --query-gpu=index,"$query" --format=csv,noheader,nounits 2>/dev/null)
|
||||
while IFS=', ' read -r g_idx g_val; do
|
||||
g_idx=$(echo "$g_idx" | tr -d '[:space:]')
|
||||
g_val=$(echo "$g_val" | tr -d '[:space:]')
|
||||
[[ "$g_val" == "[N/A]" ]] && g_val=0
|
||||
local bytes
|
||||
bytes=$(awk "BEGIN { printf \"%.0f\", $g_val * 1048576 }")
|
||||
echo "${metric}{gpu=\"$g_idx\"} $bytes"
|
||||
done <<< "$lines"
|
||||
echo ""
|
||||
}
|
||||
|
||||
emit_gpu_metric "gpu_temperature_celsius" "GPU temperature in degrees Celsius" "temperature.gpu"
|
||||
emit_gpu_metric "gpu_utilization_percent" "GPU core utilization percentage" "utilization.gpu"
|
||||
emit_gpu_metric "gpu_memory_utilization_percent" "GPU memory utilization percentage" "utilization.memory"
|
||||
emit_gpu_mem_metric "gpu_memory_used_bytes" "GPU memory used in bytes" "memory.used"
|
||||
emit_gpu_mem_metric "gpu_memory_total_bytes" "GPU total memory in bytes" "memory.total"
|
||||
emit_gpu_mem_metric "gpu_memory_free_bytes" "GPU free memory in bytes" "memory.free"
|
||||
emit_gpu_metric "gpu_power_draw_watts" "GPU power draw in watts" "power.draw"
|
||||
emit_gpu_metric "gpu_power_limit_watts" "GPU power limit in watts" "power.limit"
|
||||
emit_gpu_metric "gpu_fan_speed_percent" "GPU fan speed percentage" "fan.speed"
|
||||
emit_gpu_metric "gpu_clock_speed_mhz" "GPU graphics clock speed in MHz" "clocks.current.graphics"
|
||||
emit_gpu_metric "gpu_memory_clock_speed_mhz" "GPU memory clock speed in MHz" "clocks.current.memory"
|
||||
|
||||
# Performance state needs special handling (P0 → 0, P8 → 8, etc.)
|
||||
echo "# HELP gpu_pstate GPU performance state (0=max, 12=min)"
|
||||
echo "# TYPE gpu_pstate gauge"
|
||||
local pstate_lines
|
||||
pstate_lines=$(nvidia-smi --query-gpu=index,pstate --format=csv,noheader,nounits 2>/dev/null)
|
||||
while IFS=', ' read -r g_idx g_pstate; do
|
||||
g_idx=$(echo "$g_idx" | tr -d '[:space:]')
|
||||
g_pstate=$(echo "$g_pstate" | tr -d '[:space:]')
|
||||
local pnum=0
|
||||
if [[ "$g_pstate" =~ ^P([0-9]+)$ ]]; then
|
||||
pnum="${BASH_REMATCH[1]}"
|
||||
fi
|
||||
echo "gpu_pstate{gpu=\"$g_idx\"} $pnum"
|
||||
done <<< "$pstate_lines"
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# PER-PROCESS GPU MEMORY
|
||||
# ========================================================================
|
||||
|
||||
# Build UUID-to-index mapping
|
||||
declare -A uuid_to_index
|
||||
local uuid_lines
|
||||
uuid_lines=$(nvidia-smi --query-gpu=index,uuid --format=csv,noheader 2>/dev/null)
|
||||
|
||||
if [ -n "$uuid_lines" ]; then
|
||||
while IFS=', ' read -r g_idx g_uuid; do
|
||||
g_idx=$(echo "$g_idx" | tr -d '[:space:]')
|
||||
g_uuid=$(echo "$g_uuid" | tr -d '[:space:]')
|
||||
uuid_to_index["$g_uuid"]="$g_idx"
|
||||
done <<< "$uuid_lines"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
# HELP gpu_process_memory_bytes Per-process GPU memory usage in bytes
|
||||
# TYPE gpu_process_memory_bytes gauge
|
||||
EOF
|
||||
|
||||
local process_lines
|
||||
process_lines=$(nvidia-smi --query-compute-apps=gpu_uuid,pid,process_name,used_gpu_memory --format=csv,noheader,nounits 2>/dev/null)
|
||||
|
||||
if [ -n "$process_lines" ]; then
|
||||
while IFS= read -r proc_line; do
|
||||
[ -z "$proc_line" ] && continue
|
||||
|
||||
# Parse: uuid, pid, process_name, used_memory_mib
|
||||
local proc_uuid proc_pid proc_name proc_mem_mib
|
||||
proc_uuid=$(echo "$proc_line" | cut -d',' -f1 | tr -d '[:space:]')
|
||||
proc_pid=$(echo "$proc_line" | cut -d',' -f2 | tr -d '[:space:]')
|
||||
proc_name=$(echo "$proc_line" | cut -d',' -f3 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
||||
proc_mem_mib=$(echo "$proc_line" | rev | cut -d',' -f1 | rev | tr -d '[:space:]')
|
||||
|
||||
# Resolve UUID to GPU index
|
||||
local proc_gpu_idx="${uuid_to_index[$proc_uuid]:-0}"
|
||||
|
||||
# Handle [N/A] memory
|
||||
if [ "$proc_mem_mib" = "[N/A]" ]; then
|
||||
proc_mem_mib=0
|
||||
fi
|
||||
|
||||
# Convert MiB to bytes
|
||||
local proc_mem_bytes
|
||||
proc_mem_bytes=$(awk "BEGIN { printf \"%.0f\", $proc_mem_mib * 1048576 }")
|
||||
|
||||
# Extract short process name from full path
|
||||
local short_name
|
||||
short_name=$(basename "$proc_name" 2>/dev/null || echo "$proc_name")
|
||||
|
||||
echo "gpu_process_memory_bytes{gpu=\"$proc_gpu_idx\",pid=\"$proc_pid\",process_name=\"$(prom_escape "$short_name")\"} $proc_mem_bytes"
|
||||
done <<< "$process_lines"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# EXPORTER RUNTIME
|
||||
# ========================================================================
|
||||
|
||||
local script_end script_duration
|
||||
script_end=$(date +%s)
|
||||
script_duration=$((script_end - script_start))
|
||||
|
||||
cat <<EOF
|
||||
# HELP gpu_exporter_duration_seconds Time to generate all metrics
|
||||
# TYPE gpu_exporter_duration_seconds gauge
|
||||
gpu_exporter_duration_seconds $script_duration
|
||||
|
||||
# HELP gpu_exporter_last_run_timestamp Unix timestamp of last successful run
|
||||
# TYPE gpu_exporter_last_run_timestamp gauge
|
||||
gpu_exporter_last_run_timestamp $script_end
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# HTTP SERVER MODE
|
||||
# ============================================================================
|
||||
|
||||
# Run simple HTTP server using netcat
|
||||
# Serves metrics on /metrics endpoint
|
||||
run_http_server() {
|
||||
echo "Starting GPU exporter on port $HTTP_PORT..." >&2
|
||||
|
||||
if ! command -v nc >/dev/null 2>&1; then
|
||||
echo "ERROR: netcat (nc) required for HTTP mode" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Infinite loop accepting HTTP requests
|
||||
while true; do
|
||||
{
|
||||
read -r request
|
||||
# Check if request is for /metrics endpoint
|
||||
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
||||
generate_metrics
|
||||
else # Serve HTML landing page for other requests
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
||||
cat <<EOF
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>GPU Exporter v1.0</title></head>
|
||||
<body>
|
||||
<h1>GPU Prometheus Exporter v1.0</h1>
|
||||
<p><a href="/metrics">Metrics</a></p>
|
||||
<p>NVIDIA GPU metrics via nvidia-smi.</p>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
fi
|
||||
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN EXECUTION
|
||||
# ============================================================================
|
||||
|
||||
# Main entry point - routes to appropriate output mode
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
if [ "$HTTP_MODE" = true ]; then
|
||||
# Run HTTP server (blocks until killed)
|
||||
run_http_server
|
||||
elif [ -n "$OUTPUT_FILE" ]; then
|
||||
# Textfile collector mode: write atomically using temp file
|
||||
local output_dir
|
||||
output_dir="$(dirname "$OUTPUT_FILE")"
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
# Create temp file in SAME directory for atomic rename (same filesystem)
|
||||
local temp_file
|
||||
temp_file=$(mktemp "${output_dir}/.gpu_metrics.XXXXXX")
|
||||
|
||||
# Generate metrics to temp file
|
||||
if ! generate_metrics > "$temp_file" 2>/dev/null; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Failed to generate metrics" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate: file must exist, have content
|
||||
local file_lines
|
||||
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
|
||||
|
||||
if [ "$file_lines" -lt 10 ]; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set permissions before move
|
||||
chmod 644 "$temp_file"
|
||||
|
||||
# Atomic rename — no gap where file is missing
|
||||
mv -f "$temp_file" "$OUTPUT_FILE"
|
||||
|
||||
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
|
||||
else
|
||||
# Default: output to stdout
|
||||
generate_metrics
|
||||
fi
|
||||
}
|
||||
|
||||
# Execute main function with all script arguments
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user