#!/usr/bin/env bash # # Jenkins Prometheus Metrics Exporter # # Prometheus textfile collector exporter for Jenkins. # Uses the Jenkins JSON API to collect build queue length, executor # utilization, job success/failure rates, build duration, node status, # disk usage, and JVM heap statistics. # # Usage: # JENKINS_URL="https://jenkins.example.com" JENKINS_USER="admin" JENKINS_TOKEN="xxx" ./jenkins-exporter.sh # JENKINS_URL="https://jenkins.example.com" JENKINS_USER="admin" JENKINS_TOKEN="xxx" ./jenkins-exporter.sh --textfile # JENKINS_URL="https://jenkins.example.com" JENKINS_USER="admin" JENKINS_TOKEN="xxx" ./jenkins-exporter.sh --install # # Parameters: # --textfile Write to textfile collector directory # --install Create cron job for automatic collection # --help Show usage # # Environment: # JENKINS_URL Jenkins base URL (required) # JENKINS_USER Jenkins username (required) # JENKINS_TOKEN Jenkins API token (required) # TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter/textfile_collector) # CURL_TIMEOUT API request timeout in seconds (default: 10) # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # Version: 1.0 # # Metrics Exported: # Core: # - jenkins_up # - jenkins_exporter_info{version} # # Build Queue: # - jenkins_build_queue_length # # Executors: # - jenkins_executors_total # - jenkins_executors_busy # - jenkins_executors_idle # # Jobs: # - jenkins_jobs_total # - jenkins_job_last_build_duration_seconds{job} # - jenkins_job_last_build_result{job} # # Nodes: # - jenkins_nodes_total # - jenkins_nodes_online # - jenkins_nodes_offline # # Disk: # - jenkins_disk_usage_bytes # # JVM: # - jenkins_jvm_heap_used_bytes # - jenkins_jvm_heap_max_bytes # # Exporter: # - jenkins_exporter_duration_seconds # - jenkins_exporter_last_run_timestamp set -euo pipefail # --- Configuration --- readonly VERSION="1.0" readonly SCRIPT_NAME="$(basename "$0")" JENKINS_URL="${JENKINS_URL:-}" JENKINS_USER="${JENKINS_USER:-}" JENKINS_TOKEN="${JENKINS_TOKEN:-}" TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}" CURL_TIMEOUT="${CURL_TIMEOUT:-10}" TEXTFILE_MODE=false OUTPUT="" START_TIME="" # --- Functions --- usage() { cat </dev/null; then missing+=("$cmd") fi done if [[ ${#missing[@]} -gt 0 ]]; then echo "ERROR: Missing required commands: ${missing[*]}" >&2 echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2 exit 1 fi } validate_config() { if [[ -z "$JENKINS_URL" ]]; then echo "ERROR: JENKINS_URL environment variable is required" >&2 exit 1 fi if [[ -z "$JENKINS_USER" ]]; then echo "ERROR: JENKINS_USER environment variable is required" >&2 exit 1 fi if [[ -z "$JENKINS_TOKEN" ]]; then echo "ERROR: JENKINS_TOKEN environment variable is required" >&2 exit 1 fi # Strip trailing slash JENKINS_URL="${JENKINS_URL%/}" } api_get() { local endpoint="$1" curl -sf --max-time "$CURL_TIMEOUT" \ -u "${JENKINS_USER}:${JENKINS_TOKEN}" \ "${JENKINS_URL}${endpoint}" 2>/dev/null || echo "" } add_metric() { local name="$1" local type="$2" local help="$3" local value="$4" local labels="${5:-}" if [[ -n "$labels" ]]; then OUTPUT+="# HELP ${name} ${help} # TYPE ${name} ${type} ${name}{${labels}} ${value} " else OUTPUT+="# HELP ${name} ${help} # TYPE ${name} ${type} ${name} ${value} " fi } add_metric_value() { local name="$1" local value="$2" local labels="${3:-}" if [[ -n "$labels" ]]; then OUTPUT+="${name}{${labels}} ${value} " else OUTPUT+="${name} ${value} " fi } collect_health() { local api_json api_json=$(api_get "/api/json") if [[ -z "$api_json" ]]; then add_metric "jenkins_up" "gauge" "Jenkins reachability (1=up, 0=down)" "0" return 1 fi add_metric "jenkins_up" "gauge" "Jenkins reachability (1=up, 0=down)" "1" return 0 } collect_queue() { local queue_json queue_json=$(api_get "/queue/api/json") if [[ -n "$queue_json" ]]; then local queue_length queue_length=$(echo "$queue_json" | jq -r '.items | length // 0' 2>/dev/null) add_metric "jenkins_build_queue_length" "gauge" "Number of items in the build queue" "${queue_length:-0}" fi } collect_executors() { local computer_json computer_json=$(api_get "/computer/api/json") if [[ -z "$computer_json" ]]; then return fi local total_executors busy_executors idle_executors total_executors=$(echo "$computer_json" | jq -r '.totalExecutors // 0' 2>/dev/null) busy_executors=$(echo "$computer_json" | jq -r '.busyExecutors // 0' 2>/dev/null) idle_executors=$((total_executors - busy_executors)) add_metric "jenkins_executors_total" "gauge" "Total number of executors" "${total_executors:-0}" add_metric "jenkins_executors_busy" "gauge" "Number of busy executors" "${busy_executors:-0}" add_metric "jenkins_executors_idle" "gauge" "Number of idle executors" "${idle_executors:-0}" # Node status local nodes_total nodes_online nodes_offline nodes_total=$(echo "$computer_json" | jq -r '.computer | length // 0' 2>/dev/null) nodes_offline=$(echo "$computer_json" | jq -r '[.computer[] | select(.offline == true)] | length // 0' 2>/dev/null) nodes_online=$((nodes_total - nodes_offline)) add_metric "jenkins_nodes_total" "gauge" "Total number of nodes" "${nodes_total:-0}" add_metric "jenkins_nodes_online" "gauge" "Number of online nodes" "${nodes_online:-0}" add_metric "jenkins_nodes_offline" "gauge" "Number of offline nodes" "${nodes_offline:-0}" # JVM stats from the master node monitor data local heap_used heap_max heap_used=$(echo "$computer_json" | jq -r '.computer[0].monitorData["hudson.node_monitors.SwapSpaceMonitor"]["totalPhysicalMemory"] // empty' 2>/dev/null) # Use the built-in master node's memory monitor local master_monitor master_monitor=$(echo "$computer_json" | jq -r '.computer[] | select(.displayName == "Built-In Node" or .displayName == "master") | .monitorData // empty' 2>/dev/null) if [[ -n "$master_monitor" ]]; then heap_used=$(echo "$master_monitor" | jq -r '.["hudson.node_monitors.SwapSpaceMonitor"]["availablePhysicalMemory"] // empty' 2>/dev/null) heap_max=$(echo "$master_monitor" | jq -r '.["hudson.node_monitors.SwapSpaceMonitor"]["totalPhysicalMemory"] // empty' 2>/dev/null) fi # Try the overallLoad API for JVM info local overall_json overall_json=$(api_get "/overallLoad/api/json") if [[ -n "$overall_json" ]]; then # overallLoad provides queue length stats as well : fi # Collect JVM heap from the system properties or groovy script endpoint # Fall back to the computer API memory data if [[ -n "${heap_used:-}" && "$heap_used" != "null" ]]; then add_metric "jenkins_jvm_heap_used_bytes" "gauge" "JVM heap memory used" "$heap_used" fi if [[ -n "${heap_max:-}" && "$heap_max" != "null" ]]; then add_metric "jenkins_jvm_heap_max_bytes" "gauge" "JVM heap memory max" "$heap_max" fi } collect_jobs() { local jobs_json jobs_json=$(api_get "/api/json?tree=jobs[name,lastBuild[duration,result]]") if [[ -z "$jobs_json" ]]; then return fi local job_count job_count=$(echo "$jobs_json" | jq -r '.jobs | length // 0' 2>/dev/null) add_metric "jenkins_jobs_total" "gauge" "Total number of jobs" "${job_count:-0}" # Per-job metrics local job_names job_names=$(echo "$jobs_json" | jq -r '.jobs[]? | .name // empty' 2>/dev/null) local first_duration=true local first_result=true while IFS= read -r job_name; do [[ -z "$job_name" ]] && continue local duration result duration=$(echo "$jobs_json" | jq -r --arg name "$job_name" '.jobs[] | select(.name == $name) | .lastBuild.duration // empty' 2>/dev/null) result=$(echo "$jobs_json" | jq -r --arg name "$job_name" '.jobs[] | select(.name == $name) | .lastBuild.result // empty' 2>/dev/null) # Sanitise job name for Prometheus labels local safe_name safe_name=$(echo "$job_name" | sed 's/[^a-zA-Z0-9_\-]/_/g') if [[ -n "$duration" && "$duration" != "null" ]]; then local duration_seconds duration_seconds=$(echo "scale=2; $duration / 1000" | bc 2>/dev/null || echo "0") if [[ "$first_duration" == true ]]; then add_metric "jenkins_job_last_build_duration_seconds" "gauge" "Last build duration per job" "$duration_seconds" "job=\"${safe_name}\"" first_duration=false else add_metric_value "jenkins_job_last_build_duration_seconds" "$duration_seconds" "job=\"${safe_name}\"" fi fi if [[ -n "$result" && "$result" != "null" ]]; then local result_value if [[ "$result" == "SUCCESS" ]]; then result_value=1 else result_value=0 fi if [[ "$first_result" == true ]]; then add_metric "jenkins_job_last_build_result" "gauge" "Last build result (1=SUCCESS, 0=FAILURE)" "$result_value" "job=\"${safe_name}\"" first_result=false else add_metric_value "jenkins_job_last_build_result" "$result_value" "job=\"${safe_name}\"" fi fi done <<< "$job_names" } collect_disk_usage() { local disk_json disk_json=$(api_get "/disk-usage/api/json") if [[ -z "$disk_json" ]]; then return fi local disk_usage disk_usage=$(echo "$disk_json" | jq -r '.diskUsage // empty' 2>/dev/null) if [[ -n "$disk_usage" && "$disk_usage" != "null" ]]; then add_metric "jenkins_disk_usage_bytes" "gauge" "Total disk usage (disk-usage plugin)" "$disk_usage" fi } write_output() { if [[ "$TEXTFILE_MODE" == true ]]; then local output_file="${TEXTFILE_DIR}/jenkins.prom" local temp_file="${output_file}.$$" mkdir -p "$TEXTFILE_DIR" echo "$OUTPUT" > "$temp_file" mv "$temp_file" "$output_file" else echo "$OUTPUT" fi } install_cron() { if [[ $EUID -ne 0 ]]; then echo "ERROR: --install requires root" >&2 exit 1 fi local script_path script_path=$(readlink -f "$0") cat > /etc/cron.d/jenkins-exporter </dev/null EOF chmod 644 /etc/cron.d/jenkins-exporter echo "Installed cron job: /etc/cron.d/jenkins-exporter" echo "Metrics will be written to: ${TEXTFILE_DIR}/jenkins.prom" } # --- Main --- main() { # Parse arguments for arg in "$@"; do case "$arg" in --textfile) TEXTFILE_MODE=true ;; --install) check_dependencies validate_config install_cron exit 0 ;; --help|-h) usage ;; *) echo "Unknown option: $arg" >&2; usage ;; esac done check_dependencies validate_config START_TIME=$(date +%s%N) # Exporter info add_metric "jenkins_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\"" # Collect metrics if collect_health; then collect_queue collect_executors collect_jobs collect_disk_usage fi # Exporter performance local end_time duration end_time=$(date +%s%N) duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0") add_metric "jenkins_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration" add_metric "jenkins_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)" write_output } main "$@"