#!/usr/bin/env bash # disk-io-exporter.sh — Prometheus exporter for per-disk I/O performance # # Reads /proc/diskstats and calculates per-disk IOPS, throughput, # latency, utilization, and queue depth. Takes two samples with a # configurable interval to compute rates from the cumulative counters. # # Author: Phil Connor # Contact: contact@mylinux.work # License: MIT # Date: 2026-03-03 # Version: 1.0.0 set -euo pipefail # ── Configuration ─────────────────────────────────────────────────── readonly VERSION="1.0.0" readonly SCRIPT_NAME="${0##*/}" readonly NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}" readonly OUTPUT_FILE="${NODE_DIR}/disk_io.prom" readonly TMP_FILE="${OUTPUT_FILE}.$$" readonly SAMPLE_INTERVAL="${SAMPLE_INTERVAL:-1}" readonly DISK_FILTER="${DISK_FILTER:-}" # Runtime flags DRY_RUN=false DEBUG=${DEBUG:-} # ── Helpers ───────────────────────────────────────────────────────── debug_echo() { if [[ -n "$DEBUG" ]]; then echo "[DEBUG] $*" >&2 fi } log_error() { echo "[ERROR] $*" >&2 } cleanup() { rm -f "$TMP_FILE" } trap cleanup EXIT show_help() { cat < 100) v = 100; printf \"%.2f\", v}") echo "linux_disk_io_util_percent{disk=\"${dev}\"} ${value}" debug_echo "$dev util_percent=$value" done <<< "$devices" echo "# HELP linux_disk_io_queue_depth Weighted number of I/Os in progress (avgqu-sz)" echo "# TYPE linux_disk_io_queue_depth gauge" while read -r dev; do local m1 m2 delta m1="${snap1[${dev}_weighted_ms]:-0}" m2="${snap2[${dev}_weighted_ms]:-0}" delta=$((m2 - m1)) local value value=$(awk "BEGIN {printf \"%.2f\", $delta / ($interval * 1000)}") echo "linux_disk_io_queue_depth{disk=\"${dev}\"} ${value}" debug_echo "$dev queue_depth=$value" done <<< "$devices" # ── Script metadata metrics ── local end_time runtime end_time=$(date +%s%N) runtime=$(awk "BEGIN {printf \"%.3f\", ($end_time - $start_time) / 1000000000}") echo "" echo "# HELP linux_disk_io_exporter_duration_seconds Script execution time" echo "# TYPE linux_disk_io_exporter_duration_seconds gauge" echo "linux_disk_io_exporter_duration_seconds ${runtime}" echo "# HELP linux_disk_io_exporter_last_run_timestamp Last successful run" echo "# TYPE linux_disk_io_exporter_last_run_timestamp gauge" echo "linux_disk_io_exporter_last_run_timestamp $(date +%s)" echo "# HELP linux_disk_io_exporter_success Whether the exporter ran successfully" echo "# TYPE linux_disk_io_exporter_success gauge" echo "linux_disk_io_exporter_success 1" } # ── Main ──────────────────────────────────────────────────────────── main() { while [[ $# -gt 0 ]]; do case "$1" in --dry-run) DRY_RUN=true shift ;; --debug) DEBUG=1 shift ;; --help|-h) show_help ;; --version|-v) show_version ;; *) log_error "Unknown option: $1" echo "Use --help for usage information" >&2 exit 1 ;; esac done if [[ ! -f /proc/diskstats ]]; then log_error "/proc/diskstats not found — this script requires a Linux system" exit 1 fi if [[ "$DRY_RUN" == true ]]; then collect_metrics exit 0 fi if [[ ! -d "$NODE_DIR" ]]; then log_error "Textfile collector directory does not exist: $NODE_DIR" exit 1 fi collect_metrics > "$TMP_FILE" chmod 644 "$TMP_FILE" mv -f "$TMP_FILE" "$OUTPUT_FILE" debug_echo "Metrics written to $OUTPUT_FILE" } main "$@"