#!/bin/bash ################################################ #### GitLab Metrics Collector #### #### for Prometheus node_exporter textfile #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### Version: 1.00-030426 #### ################################################ set -o pipefail SCRIPT_NAME=$(basename "$0") readonly SCRIPT_NAME # Default configuration readonly DEFAULT_NODE_DIR="/var/lib/node_exporter" readonly DEFAULT_COLLECTION_INTERVAL=60 readonly DEFAULT_MAX_PROJECTS=100 readonly DEFAULT_CURL_TIMEOUT=30 readonly DEFAULT_METRICS_URL="http://localhost/-/metrics" readonly DEFAULT_SIDEKIQ_URL="http://localhost:8082/metrics" # Configuration variables (can be overridden by environment) GITLAB_URL=${GITLAB_URL:-} GITLAB_TOKEN=${GITLAB_TOKEN:-} GITLAB_METRICS_URL=${GITLAB_METRICS_URL:-$DEFAULT_METRICS_URL} GITLAB_SIDEKIQ_URL=${GITLAB_SIDEKIQ_URL:-$DEFAULT_SIDEKIQ_URL} NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR} COLLECTION_INTERVAL=${COLLECTION_INTERVAL:-$DEFAULT_COLLECTION_INTERVAL} MAX_PROJECTS=${MAX_PROJECTS:-$DEFAULT_MAX_PROJECTS} DEBUG=${DEBUG:-} # Runtime flags RUN_MODE="once" LOCAL_MODE=false # Error tracking ERRORS_TOTAL=0 handle_error() { local exit_code=$1 local line_number=$2 echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2 exit "$exit_code" } trap 'handle_error $? $LINENO' ERR debug_echo() { if [[ -n "$DEBUG" ]]; then echo "[DEBUG] $*" >&2 fi } show_help() { cat << EOF Usage: $SCRIPT_NAME [OPTIONS] GitLab metrics collector for Prometheus node_exporter textfile directory. OPTIONS: --once Run collection once and exit (default) --daemon Run continuously at COLLECTION_INTERVAL --local Scrape /-/metrics endpoint for server-side metrics (Puma, Sidekiq, Redis, DB) --help, -h Show this help message ENVIRONMENT VARIABLES: GITLAB_URL GitLab base URL (required, e.g. https://gitlab.example.com) GITLAB_TOKEN GitLab private access token (required for API metrics) GITLAB_METRICS_URL Local metrics endpoint URL (default: $DEFAULT_METRICS_URL) GITLAB_SIDEKIQ_URL Sidekiq exporter endpoint URL (default: $DEFAULT_SIDEKIQ_URL) NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR) COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL) MAX_PROJECTS Maximum number of projects to collect per-project metrics for (default: $DEFAULT_MAX_PROJECTS) DEBUG Enable debug output EXAMPLES: GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --once GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --daemon $SCRIPT_NAME --local --once GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --local --daemon DEBUG=1 GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME EOF } sanitize_label() { local value="$1" echo "${value//[^a-zA-Z0-9_]/_}" } gitlab_api() { local endpoint="$1" local include_headers="${2:-false}" local url="${GITLAB_URL}${endpoint}" debug_echo "API call: $url" if [[ "$include_headers" == "true" ]]; then curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" \ --header "PRIVATE-TOKEN: $GITLAB_TOKEN" \ -D - \ "$url" 2>/dev/null else curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" \ --header "PRIVATE-TOKEN: $GITLAB_TOKEN" \ "$url" 2>/dev/null fi } gitlab_api_paginated() { local endpoint="$1" local per_page="${2:-100}" local max_pages="${3:-50}" local page=1 local all_results="[]" while [[ $page -le $max_pages ]]; do local separator="?" if [[ "$endpoint" == *"?"* ]]; then separator="&" fi local response response=$(gitlab_api "${endpoint}${separator}per_page=${per_page}&page=${page}" "true" 2>/dev/null) || break local headers body headers=$(echo "$response" | sed '/^\r\{0,1\}$/q') body=$(echo "$response" | sed '1,/^\r\{0,1\}$/d') if [[ -z "$body" ]] || ! echo "$body" | jq -e '.' >/dev/null 2>&1; then break fi local count count=$(echo "$body" | jq 'length' 2>/dev/null) || break if [[ "$count" -eq 0 ]]; then break fi all_results=$(echo "$all_results" "$body" | jq -s '.[0] + .[1]' 2>/dev/null) || break local next_page next_page=$(echo "$headers" | grep -i '^x-next-page:' | tr -d '[:space:]' | cut -d: -f2) if [[ -z "$next_page" ]]; then break fi page=$((page + 1)) done echo "$all_results" } collect_instance_health() { local metrics="" debug_echo "Collecting instance health metrics" # Check if instance is reachable local up=0 if gitlab_api "/-/health" >/dev/null 2>&1; then up=1 fi metrics+="# HELP gitlab_instance_up Whether the GitLab instance is reachable\n" metrics+="# TYPE gitlab_instance_up gauge\n" metrics+="gitlab_instance_up $up\n" # Version info local version_json if version_json=$(gitlab_api "/api/v4/version" 2>/dev/null); then local version revision version=$(echo "$version_json" | jq -r '.version // "unknown"' 2>/dev/null) revision=$(echo "$version_json" | jq -r '.revision // "unknown"' 2>/dev/null) metrics+="# HELP gitlab_instance_version_info GitLab version information\n" metrics+="# TYPE gitlab_instance_version_info gauge\n" metrics+="gitlab_instance_version_info{version=\"$version\",revision=\"$revision\"} 1\n" debug_echo "GitLab version: $version ($revision)" else ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to collect version info" fi echo -e "$metrics" } collect_project_statistics() { local metrics="" debug_echo "Collecting project statistics" local projects if ! projects=$(gitlab_api_paginated "/api/v4/projects?statistics=true" 100 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to collect project statistics" return fi local project_count project_count=$(echo "$projects" | jq 'length' 2>/dev/null) || project_count=0 debug_echo "Found $project_count projects" metrics+="# HELP gitlab_project_stars_count Number of stars for a project\n" metrics+="# TYPE gitlab_project_stars_count gauge\n" metrics+="# HELP gitlab_project_forks_count Number of forks for a project\n" metrics+="# TYPE gitlab_project_forks_count gauge\n" metrics+="# HELP gitlab_project_open_issues_count Number of open issues for a project\n" metrics+="# TYPE gitlab_project_open_issues_count gauge\n" metrics+="# HELP gitlab_project_commit_count Number of commits in default branch\n" metrics+="# TYPE gitlab_project_commit_count gauge\n" metrics+="# HELP gitlab_project_storage_size_bytes Total storage size in bytes\n" metrics+="# TYPE gitlab_project_storage_size_bytes gauge\n" metrics+="# HELP gitlab_project_repository_size_bytes Repository size in bytes\n" metrics+="# TYPE gitlab_project_repository_size_bytes gauge\n" metrics+="# HELP gitlab_project_lfs_objects_size_bytes LFS objects size in bytes\n" metrics+="# TYPE gitlab_project_lfs_objects_size_bytes gauge\n" metrics+="# HELP gitlab_project_job_artifacts_size_bytes Job artifacts size in bytes\n" metrics+="# TYPE gitlab_project_job_artifacts_size_bytes gauge\n" metrics+="# HELP gitlab_project_packages_size_bytes Packages size in bytes\n" metrics+="# TYPE gitlab_project_packages_size_bytes gauge\n" metrics+="# HELP gitlab_project_wiki_size_bytes Wiki size in bytes\n" metrics+="# TYPE gitlab_project_wiki_size_bytes gauge\n" metrics+="# HELP gitlab_project_snippets_size_bytes Snippets size in bytes\n" metrics+="# TYPE gitlab_project_snippets_size_bytes gauge\n" metrics+="# HELP gitlab_project_uploads_size_bytes Uploads size in bytes\n" metrics+="# TYPE gitlab_project_uploads_size_bytes gauge\n" echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do local name namespace name=$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null) namespace=$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null) name=$(sanitize_label "$name") namespace=$(sanitize_label "$namespace") local labels="project=\"$name\",namespace=\"$namespace\"" local stars forks issues stars=$(echo "$project" | jq -r '.star_count // 0' 2>/dev/null) forks=$(echo "$project" | jq -r '.forks_count // 0' 2>/dev/null) issues=$(echo "$project" | jq -r '.open_issues_count // 0' 2>/dev/null) metrics+="gitlab_project_stars_count{$labels} $stars\n" metrics+="gitlab_project_forks_count{$labels} $forks\n" metrics+="gitlab_project_open_issues_count{$labels} $issues\n" local commit_count storage_size repo_size lfs_size artifacts_size packages_size wiki_size snippets_size uploads_size commit_count=$(echo "$project" | jq -r '.statistics.commit_count // 0' 2>/dev/null) storage_size=$(echo "$project" | jq -r '.statistics.storage_size // 0' 2>/dev/null) repo_size=$(echo "$project" | jq -r '.statistics.repository_size // 0' 2>/dev/null) lfs_size=$(echo "$project" | jq -r '.statistics.lfs_objects_size // 0' 2>/dev/null) artifacts_size=$(echo "$project" | jq -r '.statistics.job_artifacts_size // 0' 2>/dev/null) packages_size=$(echo "$project" | jq -r '.statistics.packages_size // 0' 2>/dev/null) wiki_size=$(echo "$project" | jq -r '.statistics.wiki_size // 0' 2>/dev/null) snippets_size=$(echo "$project" | jq -r '.statistics.snippets_size // 0' 2>/dev/null) uploads_size=$(echo "$project" | jq -r '.statistics.uploads_size // 0' 2>/dev/null) metrics+="gitlab_project_commit_count{$labels} $commit_count\n" metrics+="gitlab_project_storage_size_bytes{$labels} $storage_size\n" metrics+="gitlab_project_repository_size_bytes{$labels} $repo_size\n" metrics+="gitlab_project_lfs_objects_size_bytes{$labels} $lfs_size\n" metrics+="gitlab_project_job_artifacts_size_bytes{$labels} $artifacts_size\n" metrics+="gitlab_project_packages_size_bytes{$labels} $packages_size\n" metrics+="gitlab_project_wiki_size_bytes{$labels} $wiki_size\n" metrics+="gitlab_project_snippets_size_bytes{$labels} $snippets_size\n" metrics+="gitlab_project_uploads_size_bytes{$labels} $uploads_size\n" done echo -e "$metrics" } collect_pipeline_metrics() { local metrics="" debug_echo "Collecting pipeline metrics" local projects if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to fetch projects for pipeline metrics" return fi metrics+="# HELP gitlab_pipeline_status Count of pipelines by status\n" metrics+="# TYPE gitlab_pipeline_status gauge\n" metrics+="# HELP gitlab_pipeline_duration_seconds Duration of the latest pipeline\n" metrics+="# TYPE gitlab_pipeline_duration_seconds gauge\n" echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do local project_id name namespace project_id=$(echo "$project" | jq -r '.id' 2>/dev/null) name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)") namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)") local pipelines if ! pipelines=$(gitlab_api "/api/v4/projects/${project_id}/pipelines?per_page=20" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to fetch pipelines for project $project_id" continue fi # Count pipelines per status local statuses statuses=$(echo "$pipelines" | jq -r '.[].status // empty' 2>/dev/null | sort | uniq -c | awk '{print $2 " " $1}') while IFS= read -r line; do if [[ -n "$line" ]]; then local status count status=$(echo "$line" | awk '{print $1}') count=$(echo "$line" | awk '{print $2}') metrics+="gitlab_pipeline_status{project=\"$name\",namespace=\"$namespace\",status=\"$status\"} $count\n" fi done <<< "$statuses" # Latest pipeline duration local duration duration=$(echo "$pipelines" | jq -r '.[0].duration // empty' 2>/dev/null) if [[ -n "$duration" && "$duration" != "null" ]]; then metrics+="gitlab_pipeline_duration_seconds{project=\"$name\",namespace=\"$namespace\"} $duration\n" fi done echo -e "$metrics" } collect_runner_metrics() { local metrics="" debug_echo "Collecting runner metrics" local runners if ! runners=$(gitlab_api_paginated "/api/v4/runners" 100 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to collect runner metrics" return fi local total online_total total=$(echo "$runners" | jq 'length' 2>/dev/null) || total=0 online_total=$(echo "$runners" | jq '[.[] | select(.status == "online")] | length' 2>/dev/null) || online_total=0 metrics+="# HELP gitlab_runner_active Whether a runner is active\n" metrics+="# TYPE gitlab_runner_active gauge\n" metrics+="# HELP gitlab_runner_online Whether a runner is online\n" metrics+="# TYPE gitlab_runner_online gauge\n" metrics+="# HELP gitlab_runners_total Total number of runners\n" metrics+="# TYPE gitlab_runners_total gauge\n" metrics+="# HELP gitlab_runners_online_total Total number of online runners\n" metrics+="# TYPE gitlab_runners_online_total gauge\n" echo "$runners" | jq -c '.[]' 2>/dev/null | while IFS= read -r runner; do local runner_name runner_type active status runner_name=$(sanitize_label "$(echo "$runner" | jq -r '.description // "unknown"' 2>/dev/null)") runner_type=$(echo "$runner" | jq -r '.runner_type // "unknown"' 2>/dev/null) active=$(echo "$runner" | jq -r '.active // false' 2>/dev/null) status=$(echo "$runner" | jq -r '.status // "unknown"' 2>/dev/null) local active_val=0 if [[ "$active" == "true" ]]; then active_val=1 fi local online_val=0 if [[ "$status" == "online" ]]; then online_val=1 fi metrics+="gitlab_runner_active{runner_name=\"$runner_name\",runner_type=\"$runner_type\"} $active_val\n" metrics+="gitlab_runner_online{runner_name=\"$runner_name\",runner_type=\"$runner_type\"} $online_val\n" done metrics+="gitlab_runners_total $total\n" metrics+="gitlab_runners_online_total $online_total\n" echo -e "$metrics" } collect_user_metrics() { local metrics="" debug_echo "Collecting user metrics" local response if ! response=$(gitlab_api "/api/v4/users?per_page=1" "true" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to collect user metrics" return fi local total total=$(echo "$response" | grep -i '^x-total:' | tr -d '[:space:]' | cut -d: -f2) if [[ -n "$total" ]]; then metrics+="# HELP gitlab_users_total Total number of GitLab users\n" metrics+="# TYPE gitlab_users_total gauge\n" metrics+="gitlab_users_total $total\n" debug_echo "Total users: $total" else ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to parse user count from X-Total header" fi echo -e "$metrics" } collect_group_metrics() { local metrics="" debug_echo "Collecting group metrics" local groups if ! groups=$(gitlab_api_paginated "/api/v4/groups" 100 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to collect group metrics" return fi local total total=$(echo "$groups" | jq 'length' 2>/dev/null) || total=0 metrics+="# HELP gitlab_groups_total Total number of GitLab groups\n" metrics+="# TYPE gitlab_groups_total gauge\n" metrics+="gitlab_groups_total $total\n" debug_echo "Total groups: $total" echo -e "$metrics" } collect_job_metrics() { local metrics="" debug_echo "Collecting job metrics" local projects if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to fetch projects for job metrics" return fi metrics+="# HELP gitlab_jobs_by_status Count of jobs by status\n" metrics+="# TYPE gitlab_jobs_by_status gauge\n" echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do local project_id name namespace project_id=$(echo "$project" | jq -r '.id' 2>/dev/null) name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)") namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)") local jobs if ! jobs=$(gitlab_api "/api/v4/projects/${project_id}/jobs?per_page=20" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to fetch jobs for project $project_id" continue fi local statuses statuses=$(echo "$jobs" | jq -r '.[].status // empty' 2>/dev/null | sort | uniq -c | awk '{print $2 " " $1}') while IFS= read -r line; do if [[ -n "$line" ]]; then local status count status=$(echo "$line" | awk '{print $1}') count=$(echo "$line" | awk '{print $2}') metrics+="gitlab_jobs_by_status{project=\"$name\",namespace=\"$namespace\",status=\"$status\"} $count\n" fi done <<< "$statuses" done echo -e "$metrics" } collect_merge_request_metrics() { local metrics="" debug_echo "Collecting merge request metrics" local projects if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to fetch projects for merge request metrics" return fi metrics+="# HELP gitlab_open_merge_requests Number of open merge requests\n" metrics+="# TYPE gitlab_open_merge_requests gauge\n" echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do local project_id name namespace project_id=$(echo "$project" | jq -r '.id' 2>/dev/null) name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)") namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)") local mrs if ! mrs=$(gitlab_api "/api/v4/projects/${project_id}/merge_requests?state=opened&per_page=100" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to fetch merge requests for project $project_id" continue fi local count count=$(echo "$mrs" | jq 'length' 2>/dev/null) || count=0 metrics+="gitlab_open_merge_requests{project=\"$name\",namespace=\"$namespace\"} $count\n" done echo -e "$metrics" } collect_environment_metrics() { local metrics="" debug_echo "Collecting environment metrics" local projects if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to fetch projects for environment metrics" return fi metrics+="# HELP gitlab_environments_total Number of environments per project\n" metrics+="# TYPE gitlab_environments_total gauge\n" echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do local project_id name namespace project_id=$(echo "$project" | jq -r '.id' 2>/dev/null) name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)") namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)") local envs if ! envs=$(gitlab_api "/api/v4/projects/${project_id}/environments?per_page=100" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) debug_echo "Failed to fetch environments for project $project_id" continue fi local count count=$(echo "$envs" | jq 'length' 2>/dev/null) || count=0 metrics+="gitlab_environments_total{project=\"$name\",namespace=\"$namespace\"} $count\n" done echo -e "$metrics" } collect_local_metrics() { local metrics="" debug_echo "Scraping local metrics from $GITLAB_METRICS_URL" local raw_metrics if ! raw_metrics=$(curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" "$GITLAB_METRICS_URL" 2>/dev/null); then ERRORS_TOTAL=$((ERRORS_TOTAL + 1)) echo "Error: Failed to scrape $GITLAB_METRICS_URL" >&2 echo "Ensure this host's IP is in gitlab_rails['monitoring_whitelist']" >&2 return fi # GitLab version info local version_patterns="^gitlab_version_info[{ ]" local version_help="^# (HELP|TYPE) gitlab_version_info" metrics+=$(echo "$raw_metrics" | grep -E "$version_help|$version_patterns" 2>/dev/null) metrics+=$'\n' # Puma metrics local puma_patterns="^puma_workers[{ ]|^puma_running_workers[{ ]|^puma_running[{ ]|^puma_queued_connections[{ ]|^puma_active_connections[{ ]|^puma_pool_capacity[{ ]|^puma_max_threads[{ ]|^puma_idle_threads[{ ]" local puma_help="^# (HELP|TYPE) puma_" metrics+=$(echo "$raw_metrics" | grep -E "$puma_help|$puma_patterns" 2>/dev/null) metrics+=$'\n' # Sidekiq metrics (served by separate Sidekiq exporter, default localhost:8082) local sidekiq_raw if sidekiq_raw=$(curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" "$GITLAB_SIDEKIQ_URL" 2>/dev/null); then debug_echo "Scraped Sidekiq metrics from $GITLAB_SIDEKIQ_URL" # Core Sidekiq job metrics local sidekiq_patterns="^sidekiq_running_jobs[{ ]|^sidekiq_concurrency[{ ]|^sidekiq_mem_total_bytes[{ ]|^sidekiq_jobs_failed_total[{ ]|^sidekiq_jobs_dead_total[{ ]|^sidekiq_enqueued_jobs_total[{ ]|^sidekiq_jobs_completion_seconds[_{ ]|^sidekiq_jobs_queue_duration_seconds[_{ ]|^sidekiq_jobs_cpu_seconds[_{ ]|^sidekiq_jobs_db_seconds[_{ ]|^sidekiq_jobs_gitaly_seconds[_{ ]|^sidekiq_redis_requests_total[{ ]|^sidekiq_redis_requests_duration_seconds[_{ ]" local sidekiq_help="^# (HELP|TYPE) sidekiq_(running_jobs|concurrency|mem_total_bytes|jobs_failed_total|jobs_dead_total|enqueued_jobs_total|jobs_completion_seconds|jobs_queue_duration_seconds|jobs_cpu_seconds|jobs_db_seconds|jobs_gitaly_seconds|redis_requests_total|redis_requests_duration_seconds)" metrics+=$(echo "$sidekiq_raw" | grep -E "$sidekiq_help|$sidekiq_patterns" 2>/dev/null) metrics+=$'\n' # CI/CD pipeline internals local ci_patterns="^pipelines_created_total[{ ]|^deployments[{ ]|^gitlab_ci_pipeline_creation_duration_seconds[_{ ]|^gitlab_ci_pipeline_failure_reasons[{ ]|^gitlab_ci_active_jobs[_{ ]" local ci_help="^# (HELP|TYPE) (pipelines_created_total|deployments|gitlab_ci_pipeline_creation_duration_seconds|gitlab_ci_pipeline_failure_reasons|gitlab_ci_active_jobs)" metrics+=$(echo "$sidekiq_raw" | grep -E "$ci_help|$ci_patterns" 2>/dev/null) metrics+=$'\n' # Email delivery metrics local email_patterns="^gitlab_emails_delivered_total[{ ]|^gitlab_emails_delivery_attempts_total[{ ]" local email_help="^# (HELP|TYPE) gitlab_emails_(delivered_total|delivery_attempts_total)" metrics+=$(echo "$sidekiq_raw" | grep -E "$email_help|$email_patterns" 2>/dev/null) metrics+=$'\n' # External HTTP (webhooks, integrations) local ext_http_patterns="^gitlab_external_http_total[{ ]|^gitlab_external_http_duration_seconds[_{ ]" local ext_http_help="^# (HELP|TYPE) gitlab_external_http_(total|duration_seconds)" metrics+=$(echo "$sidekiq_raw" | grep -E "$ext_http_help|$ext_http_patterns" 2>/dev/null) metrics+=$'\n' # Sidekiq SLI apdex/errors local sli_patterns="^gitlab_sli_sidekiq_execution_apdex_success_total[{ ]|^gitlab_sli_sidekiq_execution_apdex_total[{ ]|^gitlab_sli_sidekiq_execution_error_total[{ ]|^gitlab_sli_sidekiq_execution_total[{ ]" local sli_help="^# (HELP|TYPE) gitlab_sli_sidekiq_execution" metrics+=$(echo "$sidekiq_raw" | grep -E "$sli_help|$sli_patterns" 2>/dev/null) metrics+=$'\n' # DB transaction duration, primary SQL, threads, cache, workers local extra_patterns="^gitlab_database_transaction_seconds[_{ ]|^gitlab_sql_primary_duration_seconds[_{ ]|^gitlab_ruby_threads_running_threads[{ ]|^gitlab_ruby_threads_max_expected_threads[{ ]|^limited_capacity_worker_running_jobs[{ ]|^limited_capacity_worker_max_running_jobs[{ ]|^limited_capacity_worker_remaining_work_count[{ ]|^redis_hit_miss_operations_total[{ ]" local extra_help="^# (HELP|TYPE) (gitlab_database_transaction_seconds|gitlab_sql_primary_duration_seconds|gitlab_ruby_threads_running_threads|gitlab_ruby_threads_max_expected_threads|limited_capacity_worker_running_jobs|limited_capacity_worker_max_running_jobs|limited_capacity_worker_remaining_work_count|redis_hit_miss_operations_total)" metrics+=$(echo "$sidekiq_raw" | grep -E "$extra_help|$extra_patterns" 2>/dev/null) metrics+=$'\n' else debug_echo "Warning: Could not scrape Sidekiq exporter at $GITLAB_SIDEKIQ_URL (is sidekiq_exporter enabled?)" fi # Redis metrics local redis_patterns="^gitlab_redis_client_requests_total[{ ]|^gitlab_redis_client_exceptions_total[{ ]|^gitlab_redis_client_requests_duration_seconds[_{ ]|^gitlab_redis_client_requests_duration_seconds_sum[{ ]|^gitlab_redis_client_requests_duration_seconds_count[{ ]" local redis_help="^# (HELP|TYPE) gitlab_redis_client_(requests_total|exceptions_total|requests_duration_seconds)" metrics+=$(echo "$raw_metrics" | grep -E "$redis_help|$redis_patterns" 2>/dev/null) metrics+=$'\n' # Database connection pool metrics local db_patterns="^gitlab_database_connection_pool_" local db_help="^# (HELP|TYPE) gitlab_database_connection_pool_" metrics+=$(echo "$raw_metrics" | grep -E "$db_help|$db_patterns" 2>/dev/null) metrics+=$'\n' # Process metrics (CPU, memory, file descriptors) local process_patterns="^ruby_process_resident_memory_bytes[{ ]|^ruby_process_cpu_seconds_total[{ ]|^process_open_fds[{ ]|^process_max_fds[{ ]|^ruby_gc_stat_heap_live_slots[{ ]|^ruby_gc_stat_heap_free_slots[{ ]" local process_help="^# (HELP|TYPE) (ruby_process_resident_memory_bytes|ruby_process_cpu_seconds_total|process_open_fds|process_max_fds|ruby_gc_stat_heap_live_slots|ruby_gc_stat_heap_free_slots)" metrics+=$(echo "$raw_metrics" | grep -E "$process_help|$process_patterns" 2>/dev/null) metrics+=$'\n' # GitLab transaction/request metrics local txn_patterns="^gitlab_transaction_duration_seconds[{ _]|^gitlab_sql_duration_seconds[{ _]|^gitlab_cache_operation_duration_seconds[{ _]" local txn_help="^# (HELP|TYPE) (gitlab_transaction_duration_seconds|gitlab_sql_duration_seconds|gitlab_cache_operation_duration_seconds)" metrics+=$(echo "$raw_metrics" | grep -E "$txn_help|$txn_patterns" 2>/dev/null) metrics+=$'\n' # User session and ActionCable metrics local session_patterns="^user_session_logins_total[{ ]|^action_cable_active_connections[{ ]|^action_cable_pool_current_size[{ ]" local session_help="^# (HELP|TYPE) (user_session_logins_total|action_cable_active_connections|action_cable_pool_current_size)" metrics+=$(echo "$raw_metrics" | grep -E "$session_help|$session_patterns" 2>/dev/null) metrics+=$'\n' local metric_count metric_count=$(echo "$metrics" | grep -cv '^#\|^$' 2>/dev/null) || metric_count=0 debug_echo "Extracted $metric_count local metrics" printf '%s\n' "$metrics" } run_collection() { local start_time start_time=$(date +%s) ERRORS_TOTAL=0 debug_echo "Starting metrics collection" local all_metrics="" # API-based metrics (require GITLAB_URL and GITLAB_TOKEN) if [[ -n "$GITLAB_URL" && -n "$GITLAB_TOKEN" ]]; then all_metrics+="$(collect_instance_health)"$'\n' all_metrics+="$(collect_project_statistics)"$'\n' all_metrics+="$(collect_pipeline_metrics)"$'\n' all_metrics+="$(collect_runner_metrics)"$'\n' all_metrics+="$(collect_user_metrics)"$'\n' all_metrics+="$(collect_group_metrics)"$'\n' all_metrics+="$(collect_job_metrics)"$'\n' all_metrics+="$(collect_merge_request_metrics)"$'\n' all_metrics+="$(collect_environment_metrics)"$'\n' fi # Local server-side metrics (scraped from /-/metrics) if [[ "$LOCAL_MODE" == true ]]; then all_metrics+="$(collect_local_metrics)"$'\n' fi local end_time duration end_time=$(date +%s) duration=$((end_time - start_time)) all_metrics+="# HELP gitlab_collector_duration_seconds Time taken to collect all metrics\n" all_metrics+="# TYPE gitlab_collector_duration_seconds gauge\n" all_metrics+="gitlab_collector_duration_seconds $duration\n" all_metrics+="# HELP gitlab_collector_last_run_timestamp Unix timestamp of last collection run\n" all_metrics+="# TYPE gitlab_collector_last_run_timestamp gauge\n" all_metrics+="gitlab_collector_last_run_timestamp $end_time\n" all_metrics+="# HELP gitlab_collector_errors_total Number of errors during collection\n" all_metrics+="# TYPE gitlab_collector_errors_total gauge\n" all_metrics+="gitlab_collector_errors_total $ERRORS_TOTAL\n" # Write atomically mkdir -p "$NODE_DIR" local tmp_file tmp_file=$(mktemp "${NODE_DIR}/gitlab_metrics.prom.XXXXXX") echo -e "$all_metrics" > "$tmp_file" mv "$tmp_file" "${NODE_DIR}/gitlab_metrics.prom" debug_echo "Collection complete in ${duration}s with $ERRORS_TOTAL errors" } parse_arguments() { while [[ $# -gt 0 ]]; do case $1 in --once) RUN_MODE="once" shift ;; --daemon) RUN_MODE="daemon" shift ;; --local) LOCAL_MODE=true shift ;; --help|-h) show_help exit 0 ;; *) echo "Error: Unknown option: $1" >&2 show_help >&2 exit 1 ;; esac done } validate_requirements() { # API credentials only required when not running local-only if [[ -z "$GITLAB_URL" && "$LOCAL_MODE" != true ]]; then echo "Error: GITLAB_URL is required (or use --local for server-side only)" >&2 exit 1 fi if [[ -z "$GITLAB_TOKEN" && "$LOCAL_MODE" != true ]]; then echo "Error: GITLAB_TOKEN is required (or use --local for server-side only)" >&2 exit 1 fi # Strip trailing slash from URLs GITLAB_URL="${GITLAB_URL%/}" GITLAB_METRICS_URL="${GITLAB_METRICS_URL%/}" GITLAB_SIDEKIQ_URL="${GITLAB_SIDEKIQ_URL%/}" if ! command -v curl >/dev/null 2>&1; then echo "Error: curl is required but not installed" >&2 exit 1 fi if [[ -n "$GITLAB_TOKEN" ]]; then if ! command -v jq >/dev/null 2>&1; then echo "Error: jq is required but not installed" >&2 exit 1 fi fi } main() { parse_arguments "$@" validate_requirements debug_echo "GitLab URL: $GITLAB_URL" debug_echo "Metrics URL: $GITLAB_METRICS_URL" debug_echo "Sidekiq URL: $GITLAB_SIDEKIQ_URL" debug_echo "Node exporter dir: $NODE_DIR" debug_echo "Run mode: $RUN_MODE" debug_echo "Local mode: $LOCAL_MODE" debug_echo "Max projects: $MAX_PROJECTS" if [[ "$RUN_MODE" == "daemon" ]]; then debug_echo "Running in daemon mode with ${COLLECTION_INTERVAL}s interval" while true; do run_collection sleep "$COLLECTION_INTERVAL" done else run_collection fi debug_echo "Script completed successfully" } # Execute main function if script is run directly if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then main "$@" fi