Files
linux-scripts/memory-pressure-exporter.sh
T
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

724 lines
27 KiB
Bash

#!/bin/bash
################################################################################
# Script Name: memory-pressure-exporter.sh
# Version: 1.0
# Description: Prometheus exporter for memory and swap pressure metrics.
# Exports PSI stall information, OOM kill events, swap activity
# rates, NUMA memory balance, slab pressure, transparent hugepage
# stats, and zone watermark proximity.
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Prerequisites:
# - Standard Unix tools (awk, grep, cat)
# - netcat (nc) for HTTP mode
# - Optional: journalctl (OOM tracking), kernel 4.20+ (PSI),
# multi-node NUMA. Each section is skipped gracefully if unavailable.
#
# Usage:
# # Output to stdout
# ./memory-pressure-exporter.sh
#
# # HTTP server mode
# ./memory-pressure-exporter.sh --http -p 9198
#
# # Textfile collector mode
# ./memory-pressure-exporter.sh --textfile
#
# Metrics Exported:
# Core Status:
# - memory_pressure_up - Exporter status (1=up, 0=down)
# - memory_pressure_exporter_info{version} - Exporter version
#
# PSI Memory (if /proc/pressure/memory exists):
# - memory_pressure_psi_some_avg10 - PSI memory some avg10
# - memory_pressure_psi_some_avg60 - PSI memory some avg60
# - memory_pressure_psi_some_avg300 - PSI memory some avg300
# - memory_pressure_psi_some_total_microseconds - PSI memory some total
# - memory_pressure_psi_full_avg10 - PSI memory full avg10
# - memory_pressure_psi_full_avg60 - PSI memory full avg60
# - memory_pressure_psi_full_avg300 - PSI memory full avg300
# - memory_pressure_psi_full_total_microseconds - PSI memory full total
#
# PSI I/O (if /proc/pressure/io exists):
# - memory_pressure_psi_io_some_avg10 - PSI I/O some avg10
# - memory_pressure_psi_io_some_avg60 - PSI I/O some avg60
# - memory_pressure_psi_io_some_avg300 - PSI I/O some avg300
# - memory_pressure_psi_io_some_total_microseconds - PSI I/O some total
# - memory_pressure_psi_io_full_avg10 - PSI I/O full avg10
# - memory_pressure_psi_io_full_avg60 - PSI I/O full avg60
# - memory_pressure_psi_io_full_avg300 - PSI I/O full avg300
# - memory_pressure_psi_io_full_total_microseconds - PSI I/O full total
#
# OOM Kills (if journalctl available):
# - memory_pressure_oom_kills_24h - OOM kills in last 24 hours
# - memory_pressure_oom_last_kill_timestamp - Unix timestamp of last OOM
# - memory_pressure_oom_last_victim{process} - Last killed process (1)
#
# Swap Activity:
# - memory_pressure_swap_in_pages_per_sec - Swap in pages/sec
# - memory_pressure_swap_out_pages_per_sec - Swap out pages/sec
# - memory_pressure_swap_in_bytes_per_sec - Swap in bytes/sec
# - memory_pressure_swap_out_bytes_per_sec - Swap out bytes/sec
#
# NUMA (if multi-node):
# - memory_pressure_numa_total_bytes{node} - Total memory per node
# - memory_pressure_numa_free_bytes{node} - Free memory per node
# - memory_pressure_numa_used_percent{node} - Usage percentage per node
#
# Transparent Hugepages:
# - memory_pressure_thp_fault_alloc_total - THP fault allocations
# - memory_pressure_thp_collapse_alloc_total - THP collapse allocations
# - memory_pressure_thp_fault_fallback_total - THP fault fallbacks
# - memory_pressure_compact_stall_total - Compaction stalls
#
# Slab:
# - memory_pressure_slab_reclaimable_bytes - Reclaimable slab
# - memory_pressure_slab_unreclaimable_bytes - Unreclaimable slab
# - memory_pressure_slab_total_bytes - Total slab
# - memory_pressure_slab_unreclaimable_percent - Unreclaimable percentage
#
# Zone Watermarks:
# - memory_pressure_zone_free_pages{zone} - Current free pages
# - memory_pressure_zone_min_pages{zone} - Min watermark
# - memory_pressure_zone_low_pages{zone} - Low watermark
# - memory_pressure_zone_high_pages{zone} - High watermark
# - memory_pressure_zone_free_above_low{zone} - 1 if free > low
#
# Exporter:
# - memory_pressure_exporter_duration_seconds - Script execution time
# - memory_pressure_exporter_last_run_timestamp - Last run timestamp
#
# Configuration:
# Default HTTP port: 9198
# Textfile directory: /var/lib/node_exporter
# SAMPLE_INTERVAL: seconds between swap activity samples (default: 1)
#
################################################################################
# ============================================================================
# CONFIGURATION VARIABLES
# ============================================================================
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9198
SAMPLE_INTERVAL="${SAMPLE_INTERVAL:-1}"
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Export memory and swap pressure statistics as Prometheus metrics (v1.0).
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port $HTTP_PORT
OPTIONS:
-p, --port HTTP port (default: 9198)
-o, --output Output file path
EXAMPLES:
$0 --textfile # Write to textfile collector
$0 --http --port 9198 # Run HTTP server
$0 -o /tmp/memory_pressure.prom # Write to custom file
SECTIONS (auto-detected, skipped if unavailable):
- PSI memory and I/O pressure (requires kernel 4.20+)
- OOM kill tracking (requires journalctl)
- Swap activity rates (always available)
- NUMA memory balance (requires multi-node system)
- Transparent hugepage stats (always available)
- Slab pressure (always available)
- Zone watermark proximity (always available)
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/memory_pressure.prom"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
}
# ============================================================================
# PSI PRESSURE
# ============================================================================
# Parse a PSI file (/proc/pressure/memory or /proc/pressure/io)
# Args: $1 - file path
# Output: lines of "type avg10 avg60 avg300 total"
# where type is "some" or "full"
get_psi_stats() {
local psi_file="$1"
[ -f "$psi_file" ] || return
awk '{
type = $1
avg10 = avg60 = avg300 = total = 0
for (i = 2; i <= NF; i++) {
split($i, kv, "=")
if (kv[1] == "avg10") avg10 = kv[2]
if (kv[1] == "avg60") avg60 = kv[2]
if (kv[1] == "avg300") avg300 = kv[2]
if (kv[1] == "total") total = kv[2]
}
print type, avg10, avg60, avg300, total
}' "$psi_file"
}
# ============================================================================
# OOM KILL TRACKING
# ============================================================================
# Get OOM kill count in last 24 hours
# Returns: count
get_oom_kill_count() {
if ! command -v journalctl >/dev/null 2>&1; then
echo "0"
return
fi
local count
count=$(journalctl -k --grep="Out of memory" --since "24 hours ago" --no-pager -q 2>/dev/null | wc -l)
echo "${count:-0}"
}
# Get last OOM kill timestamp (unix epoch)
# Returns: timestamp or 0
get_oom_last_timestamp() {
if ! command -v journalctl >/dev/null 2>&1; then
echo "0"
return
fi
local last_line
last_line=$(journalctl -k --grep="Out of memory" --since "24 hours ago" --no-pager -q -o short-unix 2>/dev/null | tail -1)
if [ -n "$last_line" ]; then
echo "$last_line" | awk '{printf "%d", $1}'
else
echo "0"
fi
}
# Get last OOM victim process name
# Returns: process name or empty
get_oom_last_victim() {
if ! command -v journalctl >/dev/null 2>&1; then
return
fi
journalctl -k --grep="Killed process" --since "24 hours ago" --no-pager -q 2>/dev/null \
| tail -1 \
| grep -oP 'Killed process \d+ \(\K[^)]+' \
| head -1
}
# ============================================================================
# SWAP ACTIVITY
# ============================================================================
# Read swap counters from /proc/vmstat
# Returns: "pswpin pswpout"
get_swap_counters() {
awk '/^pswpin / { pin=$2 } /^pswpout / { pout=$2 } END { print pin, pout }' /proc/vmstat 2>/dev/null
}
# ============================================================================
# NUMA MEMORY
# ============================================================================
# Check if system has multiple NUMA nodes
# Returns: 0 (true) if multi-node, 1 (false) if single
is_numa_multi_node() {
[ -d /sys/devices/system/node/node1 ]
}
# Get NUMA memory info per node
# Output: lines of "nodeN total_kb free_kb"
get_numa_memory() {
local node_dir="/sys/devices/system/node"
[ -d "$node_dir" ] || return
for node_path in "$node_dir"/node[0-9]*; do
[ -d "$node_path" ] || continue
local node_name
node_name=$(basename "$node_path")
local meminfo="$node_path/meminfo"
[ -f "$meminfo" ] || continue
local total free
total=$(awk '/MemTotal/ {print $4}' "$meminfo" 2>/dev/null)
free=$(awk '/MemFree/ {print $4}' "$meminfo" 2>/dev/null)
echo "$node_name ${total:-0} ${free:-0}"
done
}
# ============================================================================
# TRANSPARENT HUGEPAGES & COMPACTION
# ============================================================================
# Get THP and compaction stats from /proc/vmstat
# Returns: "thp_fault_alloc thp_collapse_alloc thp_fault_fallback compact_stall"
get_thp_stats() {
awk '
/^thp_fault_alloc / { fault=$2 }
/^thp_collapse_alloc / { collapse=$2 }
/^thp_fault_fallback / { fallback=$2 }
/^compact_stall / { stall=$2 }
END { print fault+0, collapse+0, fallback+0, stall+0 }
' /proc/vmstat 2>/dev/null
}
# ============================================================================
# SLAB MEMORY
# ============================================================================
# Get slab memory from /proc/meminfo
# Returns: "reclaimable_kb unreclaimable_kb"
get_slab_stats() {
awk '
/^SReclaimable:/ { reclaimable=$2 }
/^SUnreclaim:/ { unreclaimable=$2 }
END { print reclaimable+0, unreclaimable+0 }
' /proc/meminfo 2>/dev/null
}
# ============================================================================
# ZONE WATERMARKS
# ============================================================================
# Parse /proc/zoneinfo for Normal and DMA32 zones
# Output: lines of "zone free min low high"
get_zone_watermarks() {
awk '
/^Node [0-9]+, zone +[A-Za-z0-9]+/ {
zone = $NF
}
zone == "Normal" || zone == "DMA32" {
if ($1 == "pages" && $2 == "free") free = $3
if ($1 == "min") min_wm = $2
if ($1 == "low") low_wm = $2
if ($1 == "high") {
high_wm = $2
print zone, free+0, min_wm+0, low_wm+0, high_wm+0
zone = ""
}
}
' /proc/zoneinfo 2>/dev/null
}
# ============================================================================
# METRICS GENERATION
# ============================================================================
generate_metrics() {
local script_start
script_start=$(date +%s)
# ========================================================================
# Exporter Status
# ========================================================================
cat <<EOF
# HELP memory_pressure_up Exporter status (1=up)
# TYPE memory_pressure_up gauge
memory_pressure_up 1
# HELP memory_pressure_exporter_info Exporter version information
# TYPE memory_pressure_exporter_info gauge
memory_pressure_exporter_info{version="1.0"} 1
EOF
# ========================================================================
# PSI Memory Pressure
# ========================================================================
if [ -f /proc/pressure/memory ]; then
local psi_mem
psi_mem=$(get_psi_stats /proc/pressure/memory)
if [ -n "$psi_mem" ]; then
echo "$psi_mem" | while read -r type avg10 avg60 avg300 total; do
echo "# HELP memory_pressure_psi_${type}_avg10 PSI memory ${type} avg10 percentage"
echo "# TYPE memory_pressure_psi_${type}_avg10 gauge"
echo "memory_pressure_psi_${type}_avg10 $avg10"
echo ""
echo "# HELP memory_pressure_psi_${type}_avg60 PSI memory ${type} avg60 percentage"
echo "# TYPE memory_pressure_psi_${type}_avg60 gauge"
echo "memory_pressure_psi_${type}_avg60 $avg60"
echo ""
echo "# HELP memory_pressure_psi_${type}_avg300 PSI memory ${type} avg300 percentage"
echo "# TYPE memory_pressure_psi_${type}_avg300 gauge"
echo "memory_pressure_psi_${type}_avg300 $avg300"
echo ""
echo "# HELP memory_pressure_psi_${type}_total_microseconds PSI memory ${type} total stall time in microseconds"
echo "# TYPE memory_pressure_psi_${type}_total_microseconds counter"
echo "memory_pressure_psi_${type}_total_microseconds $total"
echo ""
done
fi
fi
# ========================================================================
# PSI I/O Pressure
# ========================================================================
if [ -f /proc/pressure/io ]; then
local psi_io
psi_io=$(get_psi_stats /proc/pressure/io)
if [ -n "$psi_io" ]; then
echo "$psi_io" | while read -r type avg10 avg60 avg300 total; do
echo "# HELP memory_pressure_psi_io_${type}_avg10 PSI I/O ${type} avg10 percentage"
echo "# TYPE memory_pressure_psi_io_${type}_avg10 gauge"
echo "memory_pressure_psi_io_${type}_avg10 $avg10"
echo ""
echo "# HELP memory_pressure_psi_io_${type}_avg60 PSI I/O ${type} avg60 percentage"
echo "# TYPE memory_pressure_psi_io_${type}_avg60 gauge"
echo "memory_pressure_psi_io_${type}_avg60 $avg60"
echo ""
echo "# HELP memory_pressure_psi_io_${type}_avg300 PSI I/O ${type} avg300 percentage"
echo "# TYPE memory_pressure_psi_io_${type}_avg300 gauge"
echo "memory_pressure_psi_io_${type}_avg300 $avg300"
echo ""
echo "# HELP memory_pressure_psi_io_${type}_total_microseconds PSI I/O ${type} total stall time in microseconds"
echo "# TYPE memory_pressure_psi_io_${type}_total_microseconds counter"
echo "memory_pressure_psi_io_${type}_total_microseconds $total"
echo ""
done
fi
fi
# ========================================================================
# OOM Kill Events
# ========================================================================
local oom_count oom_timestamp oom_victim
oom_count=$(get_oom_kill_count)
oom_timestamp=$(get_oom_last_timestamp)
oom_victim=$(get_oom_last_victim)
cat <<EOF
# HELP memory_pressure_oom_kills_24h OOM kills in the last 24 hours
# TYPE memory_pressure_oom_kills_24h gauge
memory_pressure_oom_kills_24h ${oom_count:-0}
# HELP memory_pressure_oom_last_kill_timestamp Unix timestamp of last OOM kill (0 if none)
# TYPE memory_pressure_oom_last_kill_timestamp gauge
memory_pressure_oom_last_kill_timestamp ${oom_timestamp:-0}
EOF
if [ -n "$oom_victim" ]; then
cat <<EOF
# HELP memory_pressure_oom_last_victim Last OOM-killed process (value is always 1)
# TYPE memory_pressure_oom_last_victim gauge
memory_pressure_oom_last_victim{process="$oom_victim"} 1
EOF
fi
# ========================================================================
# Swap Activity (two-sample delta)
# ========================================================================
local swap1 swap2
swap1=$(get_swap_counters)
sleep "$SAMPLE_INTERVAL"
swap2=$(get_swap_counters)
local pin1 pout1 pin2 pout2
pin1=$(echo "$swap1" | awk '{print $1}')
pout1=$(echo "$swap1" | awk '{print $2}')
pin2=$(echo "$swap2" | awk '{print $1}')
pout2=$(echo "$swap2" | awk '{print $2}')
local swap_in_rate swap_out_rate swap_in_bytes swap_out_bytes
swap_in_rate=$(awk "BEGIN {printf \"%.2f\", (${pin2:-0} - ${pin1:-0}) / $SAMPLE_INTERVAL}")
swap_out_rate=$(awk "BEGIN {printf \"%.2f\", (${pout2:-0} - ${pout1:-0}) / $SAMPLE_INTERVAL}")
swap_in_bytes=$(awk "BEGIN {printf \"%.2f\", ((${pin2:-0} - ${pin1:-0}) * 4096) / $SAMPLE_INTERVAL}")
swap_out_bytes=$(awk "BEGIN {printf \"%.2f\", ((${pout2:-0} - ${pout1:-0}) * 4096) / $SAMPLE_INTERVAL}")
cat <<EOF
# HELP memory_pressure_swap_in_pages_per_sec Pages swapped in per second
# TYPE memory_pressure_swap_in_pages_per_sec gauge
memory_pressure_swap_in_pages_per_sec $swap_in_rate
# HELP memory_pressure_swap_out_pages_per_sec Pages swapped out per second
# TYPE memory_pressure_swap_out_pages_per_sec gauge
memory_pressure_swap_out_pages_per_sec $swap_out_rate
# HELP memory_pressure_swap_in_bytes_per_sec Bytes swapped in per second
# TYPE memory_pressure_swap_in_bytes_per_sec gauge
memory_pressure_swap_in_bytes_per_sec $swap_in_bytes
# HELP memory_pressure_swap_out_bytes_per_sec Bytes swapped out per second
# TYPE memory_pressure_swap_out_bytes_per_sec gauge
memory_pressure_swap_out_bytes_per_sec $swap_out_bytes
EOF
# ========================================================================
# NUMA Memory Balance
# ========================================================================
if is_numa_multi_node; then
local numa_data
numa_data=$(get_numa_memory)
if [ -n "$numa_data" ]; then
echo "$numa_data" | while read -r node total_kb free_kb; do
local total_bytes free_bytes used_pct
total_bytes=$((total_kb * 1024))
free_bytes=$((free_kb * 1024))
if [ "$total_kb" -gt 0 ]; then
used_pct=$(awk "BEGIN {printf \"%.2f\", (($total_kb - $free_kb) / $total_kb) * 100}")
else
used_pct="0.00"
fi
echo "# HELP memory_pressure_numa_total_bytes Total memory per NUMA node in bytes"
echo "# TYPE memory_pressure_numa_total_bytes gauge"
echo "memory_pressure_numa_total_bytes{node=\"$node\"} $total_bytes"
echo ""
echo "# HELP memory_pressure_numa_free_bytes Free memory per NUMA node in bytes"
echo "# TYPE memory_pressure_numa_free_bytes gauge"
echo "memory_pressure_numa_free_bytes{node=\"$node\"} $free_bytes"
echo ""
echo "# HELP memory_pressure_numa_used_percent Memory usage percentage per NUMA node"
echo "# TYPE memory_pressure_numa_used_percent gauge"
echo "memory_pressure_numa_used_percent{node=\"$node\"} $used_pct"
echo ""
done
fi
fi
# ========================================================================
# Transparent Hugepages & Compaction
# ========================================================================
local thp_stats
thp_stats=$(get_thp_stats)
if [ -n "$thp_stats" ]; then
local thp_fault thp_collapse thp_fallback compact_stall
thp_fault=$(echo "$thp_stats" | awk '{print $1}')
thp_collapse=$(echo "$thp_stats" | awk '{print $2}')
thp_fallback=$(echo "$thp_stats" | awk '{print $3}')
compact_stall=$(echo "$thp_stats" | awk '{print $4}')
cat <<EOF
# HELP memory_pressure_thp_fault_alloc_total THP fault allocations
# TYPE memory_pressure_thp_fault_alloc_total counter
memory_pressure_thp_fault_alloc_total ${thp_fault:-0}
# HELP memory_pressure_thp_collapse_alloc_total THP collapse allocations
# TYPE memory_pressure_thp_collapse_alloc_total counter
memory_pressure_thp_collapse_alloc_total ${thp_collapse:-0}
# HELP memory_pressure_thp_fault_fallback_total THP fault fallbacks to regular pages
# TYPE memory_pressure_thp_fault_fallback_total counter
memory_pressure_thp_fault_fallback_total ${thp_fallback:-0}
# HELP memory_pressure_compact_stall_total Memory compaction stalls
# TYPE memory_pressure_compact_stall_total counter
memory_pressure_compact_stall_total ${compact_stall:-0}
EOF
fi
# ========================================================================
# Slab Memory
# ========================================================================
local slab_stats
slab_stats=$(get_slab_stats)
if [ -n "$slab_stats" ]; then
local slab_reclaim_kb slab_unreclaim_kb
slab_reclaim_kb=$(echo "$slab_stats" | awk '{print $1}')
slab_unreclaim_kb=$(echo "$slab_stats" | awk '{print $2}')
local slab_reclaim_bytes slab_unreclaim_bytes slab_total_bytes slab_unreclaim_pct
slab_reclaim_bytes=$((slab_reclaim_kb * 1024))
slab_unreclaim_bytes=$((slab_unreclaim_kb * 1024))
slab_total_bytes=$(( slab_reclaim_bytes + slab_unreclaim_bytes ))
if [ "$slab_total_bytes" -gt 0 ]; then
slab_unreclaim_pct=$(awk "BEGIN {printf \"%.2f\", ($slab_unreclaim_bytes / $slab_total_bytes) * 100}")
else
slab_unreclaim_pct="0.00"
fi
cat <<EOF
# HELP memory_pressure_slab_reclaimable_bytes Reclaimable slab memory in bytes
# TYPE memory_pressure_slab_reclaimable_bytes gauge
memory_pressure_slab_reclaimable_bytes $slab_reclaim_bytes
# HELP memory_pressure_slab_unreclaimable_bytes Unreclaimable slab memory in bytes
# TYPE memory_pressure_slab_unreclaimable_bytes gauge
memory_pressure_slab_unreclaimable_bytes $slab_unreclaim_bytes
# HELP memory_pressure_slab_total_bytes Total slab memory in bytes
# TYPE memory_pressure_slab_total_bytes gauge
memory_pressure_slab_total_bytes $slab_total_bytes
# HELP memory_pressure_slab_unreclaimable_percent Percentage of slab memory that is unreclaimable
# TYPE memory_pressure_slab_unreclaimable_percent gauge
memory_pressure_slab_unreclaimable_percent $slab_unreclaim_pct
EOF
fi
# ========================================================================
# Zone Watermarks
# ========================================================================
local zone_data
zone_data=$(get_zone_watermarks)
if [ -n "$zone_data" ]; then
echo "$zone_data" | while read -r zone free min_wm low_wm high_wm; do
local above_low=1
if [ "$free" -le "$low_wm" ]; then
above_low=0
fi
echo "# HELP memory_pressure_zone_free_pages Current free pages per zone"
echo "# TYPE memory_pressure_zone_free_pages gauge"
echo "memory_pressure_zone_free_pages{zone=\"$zone\"} $free"
echo ""
echo "# HELP memory_pressure_zone_min_pages Min watermark pages per zone"
echo "# TYPE memory_pressure_zone_min_pages gauge"
echo "memory_pressure_zone_min_pages{zone=\"$zone\"} $min_wm"
echo ""
echo "# HELP memory_pressure_zone_low_pages Low watermark pages per zone"
echo "# TYPE memory_pressure_zone_low_pages gauge"
echo "memory_pressure_zone_low_pages{zone=\"$zone\"} $low_wm"
echo ""
echo "# HELP memory_pressure_zone_high_pages High watermark pages per zone"
echo "# TYPE memory_pressure_zone_high_pages gauge"
echo "memory_pressure_zone_high_pages{zone=\"$zone\"} $high_wm"
echo ""
echo "# HELP memory_pressure_zone_free_above_low Whether free pages are above the low watermark (1=above, 0=below)"
echo "# TYPE memory_pressure_zone_free_above_low gauge"
echo "memory_pressure_zone_free_above_low{zone=\"$zone\"} $above_low"
echo ""
done
fi
# ========================================================================
# Exporter Runtime
# ========================================================================
local script_end script_duration
script_end=$(date +%s)
script_duration=$((script_end - script_start))
cat <<EOF
# HELP memory_pressure_exporter_duration_seconds Time to generate all metrics
# TYPE memory_pressure_exporter_duration_seconds gauge
memory_pressure_exporter_duration_seconds $script_duration
# HELP memory_pressure_exporter_last_run_timestamp Unix timestamp of last successful run
# TYPE memory_pressure_exporter_last_run_timestamp gauge
memory_pressure_exporter_last_run_timestamp $script_end
EOF
echo ""
}
# ============================================================================
# HTTP SERVER MODE
# ============================================================================
run_http_server() {
echo "Starting memory pressure exporter on port $HTTP_PORT..." >&2
if ! command -v nc >/dev/null 2>&1; then
echo "ERROR: netcat (nc) required for HTTP mode" >&2
exit 1
fi
while true; do
{
read -r request
if [[ "$request" =~ ^GET\ /metrics ]]; then
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
generate_metrics
else
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
cat <<EOF
<!DOCTYPE html>
<html>
<head><title>Memory Pressure Exporter v1.0</title></head>
<body>
<h1>Memory Pressure Exporter v1.0</h1>
<p><a href="/metrics">Metrics</a></p>
<h2>Sections (auto-detected)</h2>
<ul>
<li>PSI memory and I/O pressure (requires kernel 4.20+)</li>
<li>OOM kill tracking (requires journalctl)</li>
<li>Swap activity rates</li>
<li>NUMA memory balance (requires multi-node system)</li>
<li>Transparent hugepage and compaction stats</li>
<li>Slab memory pressure</li>
<li>Zone watermark proximity</li>
</ul>
</body>
</html>
EOF
fi
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
done
}
# ============================================================================
# MAIN EXECUTION
# ============================================================================
main() {
parse_args "$@"
if [ "$HTTP_MODE" = true ]; then
run_http_server
elif [ -n "$OUTPUT_FILE" ]; then
local output_dir
output_dir="$(dirname "$OUTPUT_FILE")"
mkdir -p "$output_dir"
local temp_file
temp_file=$(mktemp "${output_dir}/.memory_pressure_metrics.XXXXXX")
if ! generate_metrics > "$temp_file" 2>/dev/null; then
rm -f "$temp_file"
echo "ERROR: Failed to generate metrics" >&2
exit 1
fi
local file_lines
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
if [ "$file_lines" -lt 10 ]; then
rm -f "$temp_file"
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
exit 1
fi
chmod 644 "$temp_file"
mv -f "$temp_file" "$OUTPUT_FILE"
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
else
generate_metrics
fi
}
main "$@"