Files
linux-scripts/disk-io-exporter.sh
T

355 lines
12 KiB
Bash

#!/usr/bin/env bash
# disk-io-exporter.sh — Prometheus exporter for per-disk I/O performance
#
# Reads /proc/diskstats and calculates per-disk IOPS, throughput,
# latency, utilization, and queue depth. Takes two samples with a
# configurable interval to compute rates from the cumulative counters.
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# License: MIT
# Date: 2026-03-03
# Version: 1.0.0
set -euo pipefail
# ── Configuration ───────────────────────────────────────────────────
readonly VERSION="1.0.0"
readonly SCRIPT_NAME="${0##*/}"
readonly NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}"
readonly OUTPUT_FILE="${NODE_DIR}/disk_io.prom"
readonly TMP_FILE="${OUTPUT_FILE}.$$"
readonly SAMPLE_INTERVAL="${SAMPLE_INTERVAL:-1}"
readonly DISK_FILTER="${DISK_FILTER:-}"
# Runtime flags
DRY_RUN=false
DEBUG=${DEBUG:-}
# ── Helpers ─────────────────────────────────────────────────────────
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_error() {
echo "[ERROR] $*" >&2
}
cleanup() {
rm -f "$TMP_FILE"
}
trap cleanup EXIT
show_help() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS]
Prometheus textfile collector exporter for per-disk I/O performance.
Reads /proc/diskstats, takes two samples ${SAMPLE_INTERVAL}s apart, and
calculates rates per disk.
OPTIONS:
--dry-run Output metrics to stdout instead of writing to file
--debug Enable debug output
--help Show this help message
--version Show version
ENVIRONMENT VARIABLES:
DISK_FILTER Regex of disk names to include (default: all real disks)
Example: DISK_FILTER="^sd[a-z]+$|^nvme[0-9]+n[0-9]+$"
NODE_DIR Textfile collector directory (default: /var/lib/node_exporter)
SAMPLE_INTERVAL Seconds between the two samples (default: 1)
DEBUG Enable debug output when set to any value
EXAMPLES:
$SCRIPT_NAME --dry-run
DISK_FILTER="^sda$" $SCRIPT_NAME
SAMPLE_INTERVAL=2 $SCRIPT_NAME
DEBUG=1 $SCRIPT_NAME --dry-run
FILTERED DEVICES:
loop*, ram* devices are excluded by default. Use DISK_FILTER to
restrict to specific disks (e.g. only sd* or nvme* devices).
EOF
exit 0
}
show_version() {
echo "$SCRIPT_NAME version $VERSION"
exit 0
}
# ── Snapshot /proc/diskstats ────────────────────────────────────────
#
# Fields from /proc/diskstats (kernel 4.18+):
# $1 major
# $2 minor
# $3 device name
# $4 reads completed
# $5 reads merged
# $6 sectors read
# $7 time reading (ms)
# $8 writes completed
# $9 writes merged
# $10 sectors written
# $11 time writing (ms)
# $12 I/Os in progress (instantaneous)
# $13 time doing I/Os (ms)
# $14 weighted time doing I/Os (ms)
take_snapshot() {
local -n _snapshot=$1
while read -r _ _ dev reads _ sectors_read read_ms writes _ sectors_written write_ms inflight io_ms weighted_ms _; do
# Skip loop and ram devices
[[ "$dev" =~ ^loop[0-9] ]] && continue
[[ "$dev" =~ ^ram[0-9] ]] && continue
# Skip partition devices (e.g. sda1, nvme0n1p1) — report whole disks only
[[ "$dev" =~ [0-9]+p[0-9]+$ ]] && continue
[[ "$dev" =~ ^[a-z]+[0-9]+$ && ! "$dev" =~ ^nvme ]] && continue
# Apply user filter if set
if [[ -n "$DISK_FILTER" ]]; then
if ! [[ "$dev" =~ $DISK_FILTER ]]; then
continue
fi
fi
_snapshot["${dev}_reads"]="$reads"
_snapshot["${dev}_sectors_read"]="$sectors_read"
_snapshot["${dev}_read_ms"]="$read_ms"
_snapshot["${dev}_writes"]="$writes"
_snapshot["${dev}_sectors_written"]="$sectors_written"
_snapshot["${dev}_write_ms"]="$write_ms"
_snapshot["${dev}_inflight"]="$inflight"
_snapshot["${dev}_io_ms"]="$io_ms"
_snapshot["${dev}_weighted_ms"]="$weighted_ms"
done < /proc/diskstats
}
# ── Collect device list from a snapshot ─────────────────────────────
get_devices() {
local -n _snap=$1
local dev
for key in "${!_snap[@]}"; do
dev="${key%_reads}"
if [[ "$dev" != "$key" ]]; then
echo "$dev"
fi
done | sort
}
# ── Metrics Collection ─────────────────────────────────────────────
collect_metrics() {
local start_time
start_time=$(date +%s%N)
# First snapshot
declare -A snap1
take_snapshot snap1
debug_echo "First snapshot taken"
sleep "$SAMPLE_INTERVAL"
# Second snapshot
declare -A snap2
take_snapshot snap2
debug_echo "Second snapshot taken after ${SAMPLE_INTERVAL}s interval"
local devices
devices=$(get_devices snap2)
if [[ -z "$devices" ]]; then
log_error "No disks found after filtering"
echo "# No disks found"
return
fi
local interval="$SAMPLE_INTERVAL"
# ── HELP/TYPE headers and metric values ──
echo "# HELP linux_disk_io_read_iops Read operations per second"
echo "# TYPE linux_disk_io_read_iops gauge"
while read -r dev; do
local r1 r2 delta
r1="${snap1[${dev}_reads]:-0}"
r2="${snap2[${dev}_reads]:-0}"
delta=$((r2 - r1))
local value
value=$(awk "BEGIN {printf \"%.2f\", $delta / $interval}")
echo "linux_disk_io_read_iops{disk=\"${dev}\"} ${value}"
debug_echo "$dev read_iops=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_write_iops Write operations per second"
echo "# TYPE linux_disk_io_write_iops gauge"
while read -r dev; do
local w1 w2 delta
w1="${snap1[${dev}_writes]:-0}"
w2="${snap2[${dev}_writes]:-0}"
delta=$((w2 - w1))
local value
value=$(awk "BEGIN {printf \"%.2f\", $delta / $interval}")
echo "linux_disk_io_write_iops{disk=\"${dev}\"} ${value}"
debug_echo "$dev write_iops=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_read_bytes_per_sec Bytes read per second"
echo "# TYPE linux_disk_io_read_bytes_per_sec gauge"
while read -r dev; do
local s1 s2 delta
s1="${snap1[${dev}_sectors_read]:-0}"
s2="${snap2[${dev}_sectors_read]:-0}"
delta=$((s2 - s1))
# Each sector is 512 bytes
local value
value=$(awk "BEGIN {printf \"%.2f\", ($delta * 512) / $interval}")
echo "linux_disk_io_read_bytes_per_sec{disk=\"${dev}\"} ${value}"
debug_echo "$dev read_bytes_per_sec=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_write_bytes_per_sec Bytes written per second"
echo "# TYPE linux_disk_io_write_bytes_per_sec gauge"
while read -r dev; do
local s1 s2 delta
s1="${snap1[${dev}_sectors_written]:-0}"
s2="${snap2[${dev}_sectors_written]:-0}"
delta=$((s2 - s1))
local value
value=$(awk "BEGIN {printf \"%.2f\", ($delta * 512) / $interval}")
echo "linux_disk_io_write_bytes_per_sec{disk=\"${dev}\"} ${value}"
debug_echo "$dev write_bytes_per_sec=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_await_ms Average I/O latency in milliseconds"
echo "# TYPE linux_disk_io_await_ms gauge"
while read -r dev; do
local r1 r2 w1 w2 rm1 rm2 wm1 wm2
r1="${snap1[${dev}_reads]:-0}"
r2="${snap2[${dev}_reads]:-0}"
w1="${snap1[${dev}_writes]:-0}"
w2="${snap2[${dev}_writes]:-0}"
rm1="${snap1[${dev}_read_ms]:-0}"
rm2="${snap2[${dev}_read_ms]:-0}"
wm1="${snap1[${dev}_write_ms]:-0}"
wm2="${snap2[${dev}_write_ms]:-0}"
local total_ops total_ms
total_ops=$(( (r2 - r1) + (w2 - w1) ))
total_ms=$(( (rm2 - rm1) + (wm2 - wm1) ))
local value
if [[ "$total_ops" -gt 0 ]]; then
value=$(awk "BEGIN {printf \"%.2f\", $total_ms / $total_ops}")
else
value="0.00"
fi
echo "linux_disk_io_await_ms{disk=\"${dev}\"} ${value}"
debug_echo "$dev await_ms=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_util_percent Disk utilization percentage"
echo "# TYPE linux_disk_io_util_percent gauge"
while read -r dev; do
local m1 m2 delta
m1="${snap1[${dev}_io_ms]:-0}"
m2="${snap2[${dev}_io_ms]:-0}"
delta=$((m2 - m1))
# io_ms is milliseconds spent doing I/O; interval is in seconds
local value
value=$(awk "BEGIN {v = ($delta / ($interval * 1000)) * 100; if (v > 100) v = 100; printf \"%.2f\", v}")
echo "linux_disk_io_util_percent{disk=\"${dev}\"} ${value}"
debug_echo "$dev util_percent=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_queue_depth Weighted number of I/Os in progress (avgqu-sz)"
echo "# TYPE linux_disk_io_queue_depth gauge"
while read -r dev; do
local m1 m2 delta
m1="${snap1[${dev}_weighted_ms]:-0}"
m2="${snap2[${dev}_weighted_ms]:-0}"
delta=$((m2 - m1))
local value
value=$(awk "BEGIN {printf \"%.2f\", $delta / ($interval * 1000)}")
echo "linux_disk_io_queue_depth{disk=\"${dev}\"} ${value}"
debug_echo "$dev queue_depth=$value"
done <<< "$devices"
# ── Script metadata metrics ──
local end_time runtime
end_time=$(date +%s%N)
runtime=$(awk "BEGIN {printf \"%.3f\", ($end_time - $start_time) / 1000000000}")
echo ""
echo "# HELP linux_disk_io_exporter_duration_seconds Script execution time"
echo "# TYPE linux_disk_io_exporter_duration_seconds gauge"
echo "linux_disk_io_exporter_duration_seconds ${runtime}"
echo "# HELP linux_disk_io_exporter_last_run_timestamp Last successful run"
echo "# TYPE linux_disk_io_exporter_last_run_timestamp gauge"
echo "linux_disk_io_exporter_last_run_timestamp $(date +%s)"
echo "# HELP linux_disk_io_exporter_success Whether the exporter ran successfully"
echo "# TYPE linux_disk_io_exporter_success gauge"
echo "linux_disk_io_exporter_success 1"
}
# ── Main ────────────────────────────────────────────────────────────
main() {
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run)
DRY_RUN=true
shift
;;
--debug)
DEBUG=1
shift
;;
--help|-h)
show_help
;;
--version|-v)
show_version
;;
*)
log_error "Unknown option: $1"
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
if [[ ! -f /proc/diskstats ]]; then
log_error "/proc/diskstats not found — this script requires a Linux system"
exit 1
fi
if [[ "$DRY_RUN" == true ]]; then
collect_metrics
exit 0
fi
if [[ ! -d "$NODE_DIR" ]]; then
log_error "Textfile collector directory does not exist: $NODE_DIR"
exit 1
fi
collect_metrics > "$TMP_FILE"
chmod 644 "$TMP_FILE"
mv -f "$TMP_FILE" "$OUTPUT_FILE"
debug_echo "Metrics written to $OUTPUT_FILE"
}
main "$@"