linux-scripts/ollama-exporter.sh

#!/bin/bash
################################################################################
# Script Name: ollama-exporter.sh
# Version: 1.1
# Description: Prometheus exporter for Ollama LLM — model inventory, loaded
#              model status, VRAM/RAM usage, GPU count, API health, and
#              exporter diagnostics
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Prerequisites:
#   - Ollama installed and running
#   - curl for API calls
#   - jq for JSON parsing
#   - netcat (nc) for HTTP mode
#
# Usage:
#   ./ollama-exporter.sh                          # stdout
#   ./ollama-exporter.sh --http -p 9194           # HTTP server
#   ./ollama-exporter.sh --textfile               # node_exporter textfile
#   ./ollama-exporter.sh --ollama-url http://host:11434
#
# Metrics Exported:
#   - ollama_up - API reachability (1=up, 0=down)
#   - ollama_info{version} - Ollama version info
#   - ollama_models_available - Total models pulled
#   - ollama_models_loaded - Currently loaded models
#   - ollama_model_size_bytes{model} - Model file size
#   - ollama_model_vram_bytes{model} - VRAM usage per loaded model
#   - ollama_model_ram_bytes{model} - RAM usage per loaded model
#   - ollama_model_loaded{model} - Model loaded status (1/0)
#   - ollama_gpu_count - GPUs detected
#   - ollama_exporter_duration_seconds - Script execution time
#   - ollama_exporter_last_run_timestamp - Last run timestamp
#
# Configuration:
#   Default HTTP port: 9194
#   Default Ollama URL: http://localhost:11434
#   Textfile directory: /var/lib/node_exporter
#
################################################################################
# v1.1 changes:
#   - Fixed: curl in variable assignments crashes under set -euo pipefail when connection fails. Added || true guards
################################################################################

set -euo pipefail

# ============================================================================
# CONFIGURATION VARIABLES
# ============================================================================

TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9194
OLLAMA_URL="http://localhost:11434"

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

show_usage() {
    cat <<EOF
Usage: $0 [OPTIONS]

Export Ollama LLM statistics as Prometheus metrics via the Ollama API.

MODES:
    --textfile      Write to node_exporter textfile collector
    --http          Run HTTP server on port $HTTP_PORT

OPTIONS:
    -p, --port        HTTP port (default: 9194)
    -o, --output      Output file path
    --ollama-url      Ollama API URL (default: http://localhost:11434)

EXAMPLES:
    $0 --textfile                                  # Write to textfile collector
    $0 --http --port 9194                          # Run HTTP server
    $0 --ollama-url http://remote:11434            # Custom Ollama URL
    $0 -o /tmp/ollama.prom                         # Write to custom file

EOF
    exit 0
}

parse_args() {
    while [[ $# -gt 0 ]]; do
        case $1 in
            -h|--help) show_usage ;;
            --textfile) OUTPUT_FILE="$TEXTFILE_DIR/ollama.prom"; shift ;;
            --http) HTTP_MODE=true; shift ;;
            -p|--port) HTTP_PORT="$2"; shift 2 ;;
            -o|--output) OUTPUT_FILE="$2"; shift 2 ;;
            --ollama-url) OLLAMA_URL="$2"; shift 2 ;;
            *) echo "Unknown option: $1" >&2; exit 1 ;;
        esac
    done
}

# Check prerequisites
# Returns: 0 if OK, 1 if error
check_prerequisites() {
    if ! command -v curl >/dev/null 2>&1; then
        echo "ERROR: curl not found" >&2
        return 1
    fi

    if ! command -v jq >/dev/null 2>&1; then
        echo "ERROR: jq not found (required for JSON parsing)" >&2
        return 1
    fi

    return 0
}

# Escape special characters in Prometheus label values
# Args: $1 - string to escape
# Returns: escaped string safe for Prometheus labels
prom_escape() {
    local val="$1"
    val="${val//\\/\\\\}"
    val="${val//\"/\\\"}"
    val="${val//$'\n'/}"
    echo "$val"
}

# ============================================================================
# METRIC GENERATION
# ============================================================================

# Generate all Prometheus metrics
# Returns: Prometheus text format metrics on stdout
generate_metrics() {
    local script_start
    script_start=$(date +%s)

    # Check prerequisites
    if ! check_prerequisites; then
        cat <<EOF
# HELP ollama_up Ollama API reachability (1=up, 0=down)
# TYPE ollama_up gauge
ollama_up 0
EOF
        return
    fi

    # Check API health
    local health_response
    health_response=$(curl -s -o /dev/null -w "%{http_code}" "${OLLAMA_URL}/" 2>/dev/null) || health_response=""

    if [ "$health_response" != "200" ]; then
        cat <<EOF
# HELP ollama_up Ollama API reachability (1=up, 0=down)
# TYPE ollama_up gauge
ollama_up 0
EOF
        return
    fi

    cat <<EOF
# HELP ollama_up Ollama API reachability (1=up, 0=down)
# TYPE ollama_up gauge
ollama_up 1
EOF

    echo ""

    # ========================================================================
    # VERSION INFO
    # ========================================================================

    local version_response ollama_version
    version_response=$(curl -s "${OLLAMA_URL}/api/version" 2>/dev/null || true)
    ollama_version=$(echo "$version_response" | jq -r '.version // "unknown"' 2>/dev/null)

    cat <<EOF
# HELP ollama_info Ollama version information
# TYPE ollama_info gauge
ollama_info{version="$(prom_escape "$ollama_version")"} 1
EOF

    echo ""

    # ========================================================================
    # MODEL INVENTORY
    # ========================================================================

    local tags_response
    tags_response=$(curl -s "${OLLAMA_URL}/api/tags" 2>/dev/null || true)

    local models_available=0
    if [ -n "$tags_response" ] && [ "$tags_response" != "null" ]; then
        models_available=$(echo "$tags_response" | jq '[.models // [] | .[]] | length' 2>/dev/null)
        models_available=${models_available:-0}
    fi

    cat <<EOF
# HELP ollama_models_available Total number of models pulled
# TYPE ollama_models_available gauge
ollama_models_available $models_available
EOF

    echo ""

    # Per-model file sizes
    cat <<EOF
# HELP ollama_model_size_bytes Model file size in bytes
# TYPE ollama_model_size_bytes gauge
EOF

    local all_model_names=()

    if [ -n "$tags_response" ] && [ "$tags_response" != "null" ]; then
        while IFS=$'\t' read -r model_name model_size; do
            [ -z "$model_name" ] && continue
            all_model_names+=("$model_name")
            echo "ollama_model_size_bytes{model=\"$(prom_escape "$model_name")\"} $model_size"
        done < <(echo "$tags_response" | jq -r '
            .models // [] | .[] |
            "\(.name)\t\(.size // 0)"
        ' 2>/dev/null)
    fi

    echo ""

    # ========================================================================
    # RUNNING MODELS
    # ========================================================================

    local ps_response
    ps_response=$(curl -s "${OLLAMA_URL}/api/ps" 2>/dev/null || true)

    local models_loaded=0
    if [ -n "$ps_response" ] && [ "$ps_response" != "null" ]; then
        models_loaded=$(echo "$ps_response" | jq '[.models // [] | .[]] | length' 2>/dev/null)
        models_loaded=${models_loaded:-0}
    fi

    cat <<EOF
# HELP ollama_models_loaded Currently loaded models in memory
# TYPE ollama_models_loaded gauge
ollama_models_loaded $models_loaded
EOF

    echo ""

    # Per-model VRAM and RAM usage
    cat <<EOF
# HELP ollama_model_vram_bytes VRAM usage per loaded model in bytes
# TYPE ollama_model_vram_bytes gauge
EOF

    local loaded_names=()

    if [ -n "$ps_response" ] && [ "$ps_response" != "null" ]; then
        while IFS=$'\t' read -r model_name model_vram; do
            [ -z "$model_name" ] && continue
            loaded_names+=("$model_name")
            echo "ollama_model_vram_bytes{model=\"$(prom_escape "$model_name")\"} $model_vram"
        done < <(echo "$ps_response" | jq -r '
            .models // [] | .[] |
            "\(.name)\t\(.size_vram // 0)"
        ' 2>/dev/null)
    fi

    echo ""

    cat <<EOF
# HELP ollama_model_ram_bytes RAM usage per loaded model in bytes
# TYPE ollama_model_ram_bytes gauge
EOF

    if [ -n "$ps_response" ] && [ "$ps_response" != "null" ]; then
        while IFS=$'\t' read -r model_name model_size model_vram; do
            [ -z "$model_name" ] && continue
            local ram_usage=$(( model_size - model_vram ))
            if [ "$ram_usage" -lt 0 ]; then
                ram_usage=0
            fi
            echo "ollama_model_ram_bytes{model=\"$(prom_escape "$model_name")\"} $ram_usage"
        done < <(echo "$ps_response" | jq -r '
            .models // [] | .[] |
            "\(.name)\t\(.size // 0)\t\(.size_vram // 0)"
        ' 2>/dev/null)
    fi

    echo ""

    # ========================================================================
    # MODEL LOADED STATUS
    # ========================================================================

    cat <<EOF
# HELP ollama_model_loaded Model loaded status (1=loaded, 0=not loaded)
# TYPE ollama_model_loaded gauge
EOF

    for model in "${all_model_names[@]}"; do
        local is_loaded=0
        for loaded in "${loaded_names[@]}"; do
            if [ "$model" = "$loaded" ]; then
                is_loaded=1
                break
            fi
        done
        echo "ollama_model_loaded{model=\"$(prom_escape "$model")\"} $is_loaded"
    done

    echo ""

    # ========================================================================
    # GPU COUNT
    # ========================================================================

    local gpu_count=0

    if command -v nvidia-smi >/dev/null 2>&1; then
        gpu_count=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | wc -l)
        gpu_count=${gpu_count:-0}
    elif [ -n "$ps_response" ] && [ "$ps_response" != "null" ]; then
        local max_gpu=0
        while read -r gpu_id; do
            [ -z "$gpu_id" ] && continue
            if [ "$gpu_id" -gt "$max_gpu" ] 2>/dev/null; then
                max_gpu=$gpu_id
            fi
        done < <(echo "$ps_response" | jq -r '
            .models // [] | .[] |
            .details.gpu_id // empty
        ' 2>/dev/null)
        if [ "$max_gpu" -gt 0 ] 2>/dev/null; then
            gpu_count=$((max_gpu + 1))
        elif [ "$models_loaded" -gt 0 ]; then
            local has_vram
            has_vram=$(echo "$ps_response" | jq '[.models // [] | .[] | select(.size_vram > 0)] | length' 2>/dev/null)
            if [ "${has_vram:-0}" -gt 0 ]; then
                gpu_count=1
            fi
        fi
    fi

    cat <<EOF
# HELP ollama_gpu_count Number of GPUs detected
# TYPE ollama_gpu_count gauge
ollama_gpu_count $gpu_count
EOF

    echo ""

    # ========================================================================
    # EXPORTER RUNTIME
    # ========================================================================

    local script_end script_duration
    script_end=$(date +%s)
    script_duration=$((script_end - script_start))

    cat <<EOF
# HELP ollama_exporter_duration_seconds Time to generate all metrics
# TYPE ollama_exporter_duration_seconds gauge
ollama_exporter_duration_seconds $script_duration

# HELP ollama_exporter_last_run_timestamp Unix timestamp of last successful run
# TYPE ollama_exporter_last_run_timestamp gauge
ollama_exporter_last_run_timestamp $script_end
EOF

    echo ""
}

# ============================================================================
# HTTP SERVER MODE
# ============================================================================

# Run simple HTTP server using netcat
# Serves metrics on /metrics endpoint
run_http_server() {
    echo "Starting Ollama exporter on port $HTTP_PORT..." >&2

    if ! command -v nc >/dev/null 2>&1; then
        echo "ERROR: netcat (nc) required for HTTP mode" >&2
        exit 1
    fi

    # Infinite loop accepting HTTP requests
    while true; do
        {
            read -r request
            # Check if request is for /metrics endpoint
            if [[ "$request" =~ ^GET\ /metrics ]]; then
                echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
                generate_metrics
            else  # Serve HTML landing page for other requests
                echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
                cat <<EOF
<!DOCTYPE html>
<html>
<head><title>Ollama Exporter v1.0</title></head>
<body>
<h1>Ollama Prometheus Exporter v1.0</h1>
<p><a href="/metrics">Metrics</a></p>
<p>Operational metrics from the Ollama API.</p>
</body>
</html>
EOF
            fi
        } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
    done
}

# ============================================================================
# MAIN EXECUTION
# ============================================================================

# Main entry point - routes to appropriate output mode
main() {
    parse_args "$@"

    if [ "$HTTP_MODE" = true ]; then
        # Run HTTP server (blocks until killed)
        run_http_server
    elif [ -n "$OUTPUT_FILE" ]; then
        # Textfile collector mode: write atomically using temp file
        local output_dir
        output_dir="$(dirname "$OUTPUT_FILE")"
        mkdir -p "$output_dir"

        # Create temp file in SAME directory for atomic rename (same filesystem)
        local temp_file
        temp_file=$(mktemp "${output_dir}/.ollama_metrics.XXXXXX")

        # Generate metrics to temp file
        if ! generate_metrics > "$temp_file" 2>/dev/null; then
            rm -f "$temp_file"
            echo "ERROR: Failed to generate metrics" >&2
            exit 1
        fi

        # Validate: file must exist, have content
        local file_lines
        file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)

        if [ "$file_lines" -lt 10 ]; then
            rm -f "$temp_file"
            echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
            exit 1
        fi

        # Set permissions before move
        chmod 644 "$temp_file"

        # Atomic rename - no gap where file is missing
        mv -f "$temp_file" "$OUTPUT_FILE"

        echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
    else
        # Default: output to stdout
        generate_metrics
    fi
}

# Execute main function with all script arguments
main "$@"