#!/bin/bash ############################################################# #### cAdvisor Installer #### #### Download, install, and configure Google cAdvisor #### #### for Docker container metrics with Prometheus #### #### #### #### Supports: Docker (container) or binary (systemd) #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### Version: 1.0.0.20260308 #### ############################################################# set -euo pipefail SCRIPT_NAME=$(basename "$0") readonly SCRIPT_NAME # Defaults readonly DEFAULT_CADVISOR_VERSION="0.49.1" readonly DEFAULT_LISTEN_PORT="8080" readonly DEFAULT_PROMETHEUS_CONFIG="/etc/prometheus/prometheus.yml" readonly DEFAULT_BIND_ADDRESS="0.0.0.0" # Configuration (overridable by environment or flags) CADVISOR_VERSION="${CADVISOR_VERSION:-$DEFAULT_CADVISOR_VERSION}" LISTEN_PORT="${LISTEN_PORT:-$DEFAULT_LISTEN_PORT}" BIND_ADDRESS="${BIND_ADDRESS:-$DEFAULT_BIND_ADDRESS}" PROMETHEUS_CONFIG="${PROMETHEUS_CONFIG:-$DEFAULT_PROMETHEUS_CONFIG}" INSTALL_MODE="docker" RESTART_POLICY="unless-stopped" CONTAINER_NAME="cadvisor" ADD_TO_PROMETHEUS=false DRY_RUN=false UNINSTALL=false # Logging log_info() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] INFO: $1" } log_error() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $1" >&2 } log_warn() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] WARN: $1" } show_help() { cat << EOF Usage: $SCRIPT_NAME [OPTIONS] Download, install, and configure Google cAdvisor for Docker container monitoring. INSTALL MODES: --docker Run cAdvisor as a Docker container (default) --binary Download and install as a systemd service OPTIONS: --version cAdvisor version to install (default: $DEFAULT_CADVISOR_VERSION) --port Metrics listen port (default: $DEFAULT_LISTEN_PORT) --bind
Bind address (default: $DEFAULT_BIND_ADDRESS) --name Docker container name (default: cadvisor) --restart Docker restart policy (default: unless-stopped) --add-to-prometheus Add scrape target to Prometheus config --prometheus-config Path to prometheus.yml (default: $DEFAULT_PROMETHEUS_CONFIG) --uninstall Remove cAdvisor installation --dry-run Show what would be done without executing --help, -h Show this help message ENVIRONMENT VARIABLES: CADVISOR_VERSION cAdvisor version (default: $DEFAULT_CADVISOR_VERSION) LISTEN_PORT Metrics listen port (default: $DEFAULT_LISTEN_PORT) BIND_ADDRESS Bind address (default: $DEFAULT_BIND_ADDRESS) PROMETHEUS_CONFIG Path to prometheus.yml (default: $DEFAULT_PROMETHEUS_CONFIG) EXAMPLES: $SCRIPT_NAME --docker $SCRIPT_NAME --docker --port 9080 --add-to-prometheus $SCRIPT_NAME --binary --version 0.49.1 --add-to-prometheus $SCRIPT_NAME --uninstall EOF exit 0 } ######################### ### Parse Arguments ### ######################### parse_arguments() { while [[ $# -gt 0 ]]; do case $1 in --docker) INSTALL_MODE="docker"; shift ;; --binary) INSTALL_MODE="binary"; shift ;; --version) CADVISOR_VERSION="$2"; shift 2 ;; --port) LISTEN_PORT="$2"; shift 2 ;; --bind) BIND_ADDRESS="$2"; shift 2 ;; --name) CONTAINER_NAME="$2"; shift 2 ;; --restart) RESTART_POLICY="$2"; shift 2 ;; --add-to-prometheus) ADD_TO_PROMETHEUS=true; shift ;; --prometheus-config) PROMETHEUS_CONFIG="$2"; shift 2 ;; --uninstall) UNINSTALL=true; shift ;; --dry-run) DRY_RUN=true; shift ;; --help|-h) show_help ;; *) log_error "Unknown option: $1"; show_help ;; esac done } ######################### ### Permission Check ### ######################### check_permissions() { if [[ $EUID -ne 0 ]]; then log_error "This script must be run as root" exit 1 fi } ######################### ### Pre-flight Checks ### ######################### check_docker() { if ! command -v docker &>/dev/null; then log_error "Docker is not installed. Install Docker first or use --binary mode." exit 1 fi if ! docker info &>/dev/null; then log_error "Docker daemon is not running" exit 1 fi } check_port_available() { if ss -tlnp 2>/dev/null | grep -q ":${LISTEN_PORT} " || \ netstat -tlnp 2>/dev/null | grep -q ":${LISTEN_PORT} "; then log_warn "Port $LISTEN_PORT is already in use" # Check if it's cAdvisor already running if docker ps --format '{{.Names}}' 2>/dev/null | grep -q "^${CONTAINER_NAME}$"; then log_warn "cAdvisor container '$CONTAINER_NAME' is already running" return 1 fi if systemctl is-active --quiet cadvisor 2>/dev/null; then log_warn "cAdvisor systemd service is already running" return 1 fi log_error "Port $LISTEN_PORT is in use by another process" exit 1 fi return 0 } detect_arch() { local arch arch=$(uname -m) case "$arch" in x86_64) echo "amd64" ;; aarch64) echo "arm64" ;; armv7l) echo "armv7" ;; *) log_error "Unsupported architecture: $arch"; exit 1 ;; esac } ######################### ### Docker Install ### ######################### install_docker_mode() { check_docker if ! check_port_available; then log_info "cAdvisor is already running — nothing to do" return fi log_info "Installing cAdvisor v${CADVISOR_VERSION} as Docker container" local image="gcr.io/cadvisor/cadvisor:v${CADVISOR_VERSION}" local docker_cmd=( docker run -d --name "$CONTAINER_NAME" --restart "$RESTART_POLICY" -p "${BIND_ADDRESS}:${LISTEN_PORT}:8080" -v /:/rootfs:ro -v /var/run:/var/run:ro -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro -v /dev/disk/:/dev/disk:ro --privileged --device /dev/kmsg "$image" ) if [[ "$DRY_RUN" == "true" ]]; then log_info "[DRY RUN] Would execute:" echo " ${docker_cmd[*]}" return fi # Pull image first log_info "Pulling image: $image" docker pull "$image" # Remove existing stopped container if present if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then log_info "Removing existing stopped container: $CONTAINER_NAME" docker rm -f "$CONTAINER_NAME" 2>/dev/null || true fi # Run container "${docker_cmd[@]}" log_info "Container '$CONTAINER_NAME' started" # Wait for metrics endpoint wait_for_metrics } ######################### ### Binary Install ### ######################### install_binary_mode() { local arch arch=$(detect_arch) local download_url="https://github.com/google/cadvisor/releases/download/v${CADVISOR_VERSION}/cadvisor-v${CADVISOR_VERSION}-linux-${arch}" local binary_path="/usr/local/bin/cadvisor" log_info "Installing cAdvisor v${CADVISOR_VERSION} as systemd service (${arch})" if [[ "$DRY_RUN" == "true" ]]; then log_info "[DRY RUN] Would download: $download_url" log_info "[DRY RUN] Would install to: $binary_path" log_info "[DRY RUN] Would create systemd unit: /etc/systemd/system/cadvisor.service" return fi # Download binary log_info "Downloading cAdvisor binary..." local temp_file temp_file=$(mktemp) if ! curl -fsSL -o "$temp_file" "$download_url"; then rm -f "$temp_file" log_error "Failed to download cAdvisor from $download_url" exit 1 fi # Install binary chmod +x "$temp_file" mv "$temp_file" "$binary_path" log_info "Installed binary to $binary_path" # Create systemd unit create_systemd_unit # Enable and start systemctl daemon-reload systemctl enable cadvisor systemctl start cadvisor log_info "cAdvisor systemd service started" # Wait for metrics endpoint wait_for_metrics } create_systemd_unit() { cat > /etc/systemd/system/cadvisor.service << EOF [Unit] Description=cAdvisor - Container Advisor Documentation=https://github.com/google/cadvisor After=network.target docker.service Wants=docker.service [Service] Type=simple ExecStart=/usr/local/bin/cadvisor \\ --port=${LISTEN_PORT} \\ --listen_ip=${BIND_ADDRESS} \\ --docker_only=true \\ --housekeeping_interval=30s \\ --storage_duration=2m0s Restart=on-failure RestartSec=5 LimitNOFILE=65536 [Install] WantedBy=multi-user.target EOF log_info "Created systemd unit: /etc/systemd/system/cadvisor.service" } ######################### ### Post-install ### ######################### wait_for_metrics() { log_info "Waiting for metrics endpoint..." local attempts=0 local max_attempts=15 while [[ $attempts -lt $max_attempts ]]; do if curl -sf "http://localhost:${LISTEN_PORT}/metrics" >/dev/null 2>&1; then log_info "cAdvisor is responding on port $LISTEN_PORT" return 0 fi attempts=$((attempts + 1)) sleep 2 done log_warn "cAdvisor did not respond within ${max_attempts} attempts — check logs" return 1 } add_prometheus_scrape_config() { if [[ "$ADD_TO_PROMETHEUS" != "true" ]]; then return fi if [[ ! -f "$PROMETHEUS_CONFIG" ]]; then log_warn "Prometheus config not found at $PROMETHEUS_CONFIG — skipping" return fi # Check if cadvisor job already exists if grep -q "job_name.*cadvisor" "$PROMETHEUS_CONFIG" 2>/dev/null; then log_info "cAdvisor scrape target already exists in $PROMETHEUS_CONFIG" return fi if [[ "$DRY_RUN" == "true" ]]; then log_info "[DRY RUN] Would add cAdvisor scrape config to $PROMETHEUS_CONFIG" return fi # Backup config cp "$PROMETHEUS_CONFIG" "${PROMETHEUS_CONFIG}.bak.$(date +%s)" # Determine the target address local target_host if [[ "$INSTALL_MODE" == "docker" ]]; then target_host="${CONTAINER_NAME}:8080" else target_host="localhost:${LISTEN_PORT}" fi cat >> "$PROMETHEUS_CONFIG" << EOF - job_name: 'cadvisor' scrape_interval: 15s static_configs: - targets: ['${target_host}'] EOF log_info "Added cAdvisor scrape target to $PROMETHEUS_CONFIG" # Reload Prometheus if running if systemctl is-active --quiet prometheus 2>/dev/null; then if systemctl reload prometheus 2>/dev/null; then log_info "Prometheus configuration reloaded" else systemctl restart prometheus 2>/dev/null || true log_info "Prometheus restarted" fi fi } ######################### ### Uninstall ### ######################### uninstall_cadvisor() { log_info "Uninstalling cAdvisor..." if [[ "$DRY_RUN" == "true" ]]; then log_info "[DRY RUN] Would stop and remove Docker container '$CONTAINER_NAME'" log_info "[DRY RUN] Would stop and remove systemd service 'cadvisor'" log_info "[DRY RUN] Would remove /usr/local/bin/cadvisor" return fi # Remove Docker container if docker ps -a --format '{{.Names}}' 2>/dev/null | grep -q "^${CONTAINER_NAME}$"; then docker stop "$CONTAINER_NAME" 2>/dev/null || true docker rm "$CONTAINER_NAME" 2>/dev/null || true log_info "Removed Docker container: $CONTAINER_NAME" fi # Remove systemd service if [[ -f /etc/systemd/system/cadvisor.service ]]; then systemctl stop cadvisor 2>/dev/null || true systemctl disable cadvisor 2>/dev/null || true rm -f /etc/systemd/system/cadvisor.service systemctl daemon-reload log_info "Removed systemd service" fi # Remove binary if [[ -f /usr/local/bin/cadvisor ]]; then rm -f /usr/local/bin/cadvisor log_info "Removed /usr/local/bin/cadvisor" fi log_info "cAdvisor uninstalled" } ######################### ### Verify ### ######################### verify_installation() { if [[ "$DRY_RUN" == "true" ]]; then return fi echo echo "=== cAdvisor Installation Summary ===" echo " Mode: $INSTALL_MODE" echo " Version: $CADVISOR_VERSION" echo " Metrics URL: http://localhost:${LISTEN_PORT}/metrics" echo " Web UI: http://localhost:${LISTEN_PORT}/" echo if [[ "$INSTALL_MODE" == "docker" ]]; then echo " Container: $CONTAINER_NAME" echo " Status: $(docker inspect -f '{{.State.Status}}' "$CONTAINER_NAME" 2>/dev/null || echo 'unknown')" else echo " Service: cadvisor.service" echo " Status: $(systemctl is-active cadvisor 2>/dev/null || echo 'unknown')" fi echo echo "Verify with:" echo " curl -s http://localhost:${LISTEN_PORT}/metrics | head -20" echo if [[ "$ADD_TO_PROMETHEUS" == "true" ]]; then echo "Prometheus scrape target configured in: $PROMETHEUS_CONFIG" echo else echo "To add to Prometheus, add this to your scrape_configs:" echo echo " - job_name: 'cadvisor'" echo " scrape_interval: 15s" echo " static_configs:" if [[ "$INSTALL_MODE" == "docker" ]]; then echo " - targets: ['${CONTAINER_NAME}:8080']" else echo " - targets: ['localhost:${LISTEN_PORT}']" fi echo fi } ######################### ### Main ### ######################### main() { parse_arguments "$@" if [[ "$UNINSTALL" == "true" ]]; then check_permissions uninstall_cadvisor exit 0 fi check_permissions log_info "Installing cAdvisor v${CADVISOR_VERSION} (mode: ${INSTALL_MODE})" case "$INSTALL_MODE" in docker) install_docker_mode ;; binary) install_binary_mode ;; *) log_error "Unknown install mode: $INSTALL_MODE"; exit 1 ;; esac add_prometheus_scrape_config verify_installation } main "$@"