#!/bin/bash ################################################################################ # Script Name: add-prometheus-tls.sh # Version: 1.01 # Description: Add TLS encryption to Prometheus and node_exporter # Auto-detects whether this is the Prometheus server (generates # a CA + server cert) or a target node (configures node_exporter # with a provided or generated cert signed by the Prometheus CA). # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Role Detection: # - "server" — Prometheus is installed: generates CA, server cert, # configures prometheus.yml for TLS scraping, and optionally # configures the local node_exporter too. # - "node" — Only node_exporter found: configures node_exporter with # TLS using a cert signed by the Prometheus CA (CA cert must # be provided or copied from the server). # # Usage: # sudo ./add-prometheus-tls.sh # Auto-detect role # sudo ./add-prometheus-tls.sh --role server # Force server mode # sudo ./add-prometheus-tls.sh --role node # Force node mode # sudo ./add-prometheus-tls.sh --role node --ca-cert /path/to/ca.crt --ca-key /path/to/ca.key # sudo ./add-prometheus-tls.sh --deploy host1,host2 # Push TLS to remote nodes # sudo ./add-prometheus-tls.sh --deploy-file hosts.txt # Push TLS to nodes from file # sudo ./add-prometheus-tls.sh --status # Show TLS status # sudo ./add-prometheus-tls.sh --remove # Remove TLS config # ################################################################################ set -euo pipefail SCRIPT_VERSION="1.0" # Paths PROM_DIR="/etc/prometheus" PROM_TLS_DIR="${PROM_DIR}/tls" NODE_EXPORTER_DIR="/etc/node_exporter" NODE_EXPORTER_TLS_DIR="${NODE_EXPORTER_DIR}/tls" BACKUP_DIR="/var/backups/prometheus-tls" # CA defaults CA_DAYS=3650 CERT_DAYS=825 KEY_BITS=4096 # Runtime ROLE="" # "server" or "node" CA_CERT="" # path to existing CA cert (node mode) CA_KEY="" # path to existing CA key (node mode) PROM_USER="prometheus" NODE_USER="node_exporter" HOSTNAME_FQDN="" DEPLOY_TARGETS="" # comma-separated hosts for --deploy DEPLOY_FILE="" # file containing hosts for --deploy-file SSH_USER="root" # SSH user for deploy SSH_KEY="" # optional SSH key path DRY_RUN=false DEBUG=${DEBUG:-} # ============================================================================ # HELPER FUNCTIONS # ============================================================================ show_usage() { cat <&2 exit 1 } warn() { echo "WARNING: $1" >&2 } info() { echo "[INFO] $1" } debug_echo() { if [[ -n "$DEBUG" ]]; then echo "[DEBUG] $*" >&2 fi } backup_file() { local file="$1" if [[ ! -f "$file" ]]; then return 0 fi local timestamp timestamp=$(date +%F_%H%M%S) local backup_path="${BACKUP_DIR}/${timestamp}" mkdir -p "$backup_path" if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would backup $file -> ${backup_path}/$(basename "$file")" else cp -a "$file" "${backup_path}/" info "Backed up $file -> ${backup_path}/$(basename "$file")" fi } # ============================================================================ # ROLE DETECTION # ============================================================================ detect_role() { local has_prometheus=false local has_node_exporter=false if systemctl list-unit-files prometheus.service &>/dev/null && \ systemctl cat prometheus.service &>/dev/null; then has_prometheus=true fi if systemctl list-unit-files node_exporter.service &>/dev/null && \ systemctl cat node_exporter.service &>/dev/null; then has_node_exporter=true fi if [[ "$has_prometheus" == true ]]; then ROLE="server" info "Detected role: server (Prometheus installed)" elif [[ "$has_node_exporter" == true ]]; then ROLE="node" info "Detected role: node (node_exporter only)" else die "Neither Prometheus nor node_exporter detected. Install them first." fi } detect_hostname() { if [[ -n "$HOSTNAME_FQDN" ]]; then return 0 fi HOSTNAME_FQDN=$(hostname -f 2>/dev/null || hostname) info "Using hostname: ${HOSTNAME_FQDN}" } # ============================================================================ # CERTIFICATE GENERATION # ============================================================================ generate_ca() { local ca_dir="${PROM_TLS_DIR}" local ca_cert="${ca_dir}/ca.crt" local ca_key="${ca_dir}/ca.key" if [[ -f "$ca_cert" && -f "$ca_key" ]]; then echo "" echo " CA certificate already exists at ${ca_cert}" read -r -p " Regenerate CA? (will invalidate all existing certs) [y/N]: " confirm if [[ ! "$confirm" =~ ^[Yy]$ ]]; then info "Keeping existing CA" CA_CERT="$ca_cert" CA_KEY="$ca_key" return 0 fi backup_file "$ca_cert" backup_file "$ca_key" fi info "Generating Certificate Authority..." if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would generate CA cert at ${ca_cert}" CA_CERT="$ca_cert" CA_KEY="$ca_key" return 0 fi mkdir -p "$ca_dir" openssl genrsa -out "$ca_key" "$KEY_BITS" 2>/dev/null openssl req -x509 -new -nodes \ -key "$ca_key" \ -sha256 \ -days "$CA_DAYS" \ -out "$ca_cert" \ -subj "/CN=Prometheus CA/O=Prometheus/OU=Monitoring" \ 2>/dev/null chmod 644 "$ca_cert" chmod 600 "$ca_key" CA_CERT="$ca_cert" CA_KEY="$ca_key" info "CA certificate created: ${ca_cert}" info "CA key created: ${ca_key} (keep this safe!)" } generate_cert() { local name="$1" # e.g., "prometheus" or "node_exporter" local cert_dir="$2" # where to put the cert local owner="$3" # file owner user local cert_file="${cert_dir}/${name}.crt" local key_file="${cert_dir}/${name}.key" if [[ -f "$cert_file" && -f "$key_file" ]]; then echo "" echo " Certificate for ${name} already exists." read -r -p " Regenerate? [y/N]: " confirm if [[ ! "$confirm" =~ ^[Yy]$ ]]; then info "Keeping existing ${name} certificate" return 0 fi backup_file "$cert_file" backup_file "$key_file" fi info "Generating certificate for ${name}..." if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would generate cert at ${cert_file}" return 0 fi mkdir -p "$cert_dir" # Create CSR config with SANs local csr_conf csr_conf=$(mktemp) cat > "$csr_conf" </dev/null | awk '{print $1}' || echo "127.0.0.1") CSREOF # Create ext config for signing local ext_conf ext_conf=$(mktemp) cat > "$ext_conf" </dev/null | awk '{print $1}' || echo "127.0.0.1") EXTEOF # Generate key openssl genrsa -out "$key_file" "$KEY_BITS" 2>/dev/null # Generate CSR local csr_file csr_file=$(mktemp) openssl req -new \ -key "$key_file" \ -out "$csr_file" \ -config "$csr_conf" \ 2>/dev/null # Sign with CA openssl x509 -req \ -in "$csr_file" \ -CA "$CA_CERT" \ -CAkey "$CA_KEY" \ -CAcreateserial \ -out "$cert_file" \ -days "$CERT_DAYS" \ -sha256 \ -extfile "$ext_conf" \ 2>/dev/null # Set ownership chmod 644 "$cert_file" chmod 600 "$key_file" if id "$owner" &>/dev/null; then chown "${owner}:${owner}" "$cert_file" "$key_file" fi # Cleanup temp files rm -f "$csr_conf" "$ext_conf" "$csr_file" info "Certificate created: ${cert_file}" info "Key created: ${key_file}" } # ============================================================================ # PROMETHEUS SERVER CONFIGURATION # ============================================================================ configure_prometheus_tls() { local web_config="${PROM_DIR}/web.yml" if [[ -f "$web_config" ]] && grep -q "tls_server_config" "$web_config" 2>/dev/null; then echo "" echo " Prometheus web.yml already has TLS config." read -r -p " Overwrite? [y/N]: " confirm if [[ ! "$confirm" =~ ^[Yy]$ ]]; then info "Keeping existing Prometheus TLS config" return 0 fi backup_file "$web_config" fi info "Configuring Prometheus TLS (web.yml)..." if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would create ${web_config}" return 0 fi cat > "$web_config" </dev/null; then chown "${PROM_USER}:${PROM_USER}" "$web_config" fi chmod 644 "$web_config" # Ensure --web.config.file is in the systemd unit update_prometheus_service info "Prometheus web.yml created: ${web_config}" } update_prometheus_service() { local service_file service_file=$(systemctl show -p FragmentPath prometheus.service 2>/dev/null | cut -d= -f2) if [[ -z "$service_file" || ! -f "$service_file" ]]; then warn "Could not find prometheus.service unit file" warn "Manually add '--web.config.file=${PROM_DIR}/web.yml' to Prometheus startup" return 0 fi if grep -q "web.config.file" "$service_file" 2>/dev/null; then debug_echo "Prometheus service already has --web.config.file flag" return 0 fi info "Updating Prometheus systemd service to use web.yml..." if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would add --web.config.file to ${service_file}" return 0 fi backup_file "$service_file" # Add --web.config.file to the ExecStart line if grep -qE '^ExecStart=.*prometheus' "$service_file"; then sed -i '/^ExecStart=.*prometheus/ s|$| \\\n --web.config.file='"${PROM_DIR}"'/web.yml|' "$service_file" systemctl daemon-reload info "Added --web.config.file to Prometheus service" else warn "Could not auto-patch service file. Add manually:" warn " --web.config.file=${PROM_DIR}/web.yml" fi } update_prometheus_scrape_configs() { local prom_config="${PROM_DIR}/prometheus.yml" if [[ ! -f "$prom_config" ]]; then warn "prometheus.yml not found at ${prom_config} — skipping scrape config update" return 0 fi info "Updating prometheus.yml scrape configs for TLS..." if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would update scrape configs in ${prom_config}" return 0 fi backup_file "$prom_config" # Check if tls_config already exists for node targets if grep -q "tls_config" "$prom_config" 2>/dev/null; then info "prometheus.yml already contains tls_config entries" echo " Review ${prom_config} to ensure all scrape jobs use TLS." return 0 fi # Create a TLS snippet file that can be included local tls_snippet="${PROM_DIR}/tls_scrape.yml" cat > "$tls_snippet" </dev/null; then chown "${PROM_USER}:${PROM_USER}" "$tls_snippet" fi # Auto-patch: update scheme and add tls_config to existing jobs # Update scheme: http -> https for node jobs local tmpfile tmpfile=$(mktemp) local in_job=false local job_patched=false while IFS= read -r line; do echo "$line" >> "$tmpfile" # Detect job_name lines if [[ "$line" =~ ^[[:space:]]*-[[:space:]]*job_name: ]]; then in_job=true job_patched=false fi # If we're in a job block and find scheme: http (not https), patch it if [[ "$in_job" == true && "$job_patched" == false ]]; then if [[ "$line" =~ ^[[:space:]]*scheme:[[:space:]]*http[[:space:]]*$ ]]; then # Replace this line with https + tls_config sed -i '$ s|scheme: http|scheme: https|' "$tmpfile" # Determine indentation local indent indent=$(echo "$line" | sed 's/\(^[[:space:]]*\).*/\1/') echo "${indent}tls_config:" >> "$tmpfile" echo "${indent} ca_file: ${PROM_TLS_DIR}/ca.crt" >> "$tmpfile" job_patched=true fi fi done < "$prom_config" # If no scheme: lines were found, add a note if ! grep -q "scheme: https" "$tmpfile" 2>/dev/null; then info "No 'scheme: http' lines found to auto-patch." info "Reference TLS snippet created at: ${tls_snippet}" info "Manually update your scrape jobs to use scheme: https with tls_config." rm -f "$tmpfile" return 0 fi cp "$tmpfile" "$prom_config" rm -f "$tmpfile" if id "$PROM_USER" &>/dev/null; then chown "${PROM_USER}:${PROM_USER}" "$prom_config" fi info "Updated scrape configs in ${prom_config}" info "TLS reference snippet saved to: ${tls_snippet}" } # ============================================================================ # NODE EXPORTER CONFIGURATION # ============================================================================ configure_node_exporter_tls() { local tls_dir="$NODE_EXPORTER_TLS_DIR" local web_config="${NODE_EXPORTER_DIR}/web.yml" mkdir -p "$tls_dir" "$NODE_EXPORTER_DIR" # Generate cert for this node generate_cert "node_exporter" "$tls_dir" "$NODE_USER" # Copy CA cert to node_exporter dir for reference if [[ "$DRY_RUN" != true && -f "$CA_CERT" ]]; then cp -a "$CA_CERT" "${tls_dir}/ca.crt" if id "$NODE_USER" &>/dev/null; then chown "${NODE_USER}:${NODE_USER}" "${tls_dir}/ca.crt" fi fi if [[ -f "$web_config" ]] && grep -q "tls_server_config" "$web_config" 2>/dev/null; then echo "" echo " node_exporter web.yml already has TLS config." read -r -p " Overwrite? [y/N]: " confirm if [[ ! "$confirm" =~ ^[Yy]$ ]]; then info "Keeping existing node_exporter TLS config" update_node_exporter_service return 0 fi backup_file "$web_config" fi info "Configuring node_exporter TLS (web.yml)..." if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would create ${web_config}" return 0 fi cat > "$web_config" </dev/null; then chown "${NODE_USER}:${NODE_USER}" "$web_config" fi chmod 644 "$web_config" update_node_exporter_service info "node_exporter web.yml created: ${web_config}" } update_node_exporter_service() { local service_file service_file=$(systemctl show -p FragmentPath node_exporter.service 2>/dev/null | cut -d= -f2) if [[ -z "$service_file" || ! -f "$service_file" ]]; then warn "Could not find node_exporter.service unit file" warn "Manually add '--web.config.file=${NODE_EXPORTER_DIR}/web.yml' to node_exporter startup" return 0 fi if grep -q "web.config.file" "$service_file" 2>/dev/null; then debug_echo "node_exporter service already has --web.config.file flag" return 0 fi info "Updating node_exporter systemd service to use web.yml..." if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would add --web.config.file to ${service_file}" return 0 fi backup_file "$service_file" if grep -qE '^ExecStart=.*node_exporter' "$service_file"; then sed -i '/^ExecStart=.*node_exporter/ s|$| \\\n --web.config.file='"${NODE_EXPORTER_DIR}"'/web.yml|' "$service_file" systemctl daemon-reload info "Added --web.config.file to node_exporter service" else warn "Could not auto-patch service file. Add manually:" warn " --web.config.file=${NODE_EXPORTER_DIR}/web.yml" fi } # ============================================================================ # STATUS # ============================================================================ show_status() { echo "" echo "==========================================" echo "Prometheus TLS Status (v${SCRIPT_VERSION})" echo "==========================================" echo "" # Check Prometheus echo "--- Prometheus Server ---" if systemctl cat prometheus.service &>/dev/null 2>&1; then local prom_status="installed" systemctl is-active --quiet prometheus 2>/dev/null && prom_status="running" echo " Service: ${prom_status}" if [[ -f "${PROM_DIR}/web.yml" ]] && grep -q "tls_server_config" "${PROM_DIR}/web.yml" 2>/dev/null; then echo " TLS: ✓ enabled (web.yml)" else echo " TLS: ✗ not configured" fi if [[ -f "${PROM_TLS_DIR}/ca.crt" ]]; then local ca_expiry ca_expiry=$(openssl x509 -enddate -noout -in "${PROM_TLS_DIR}/ca.crt" 2>/dev/null | cut -d= -f2) echo " CA cert: ✓ present (expires: ${ca_expiry})" else echo " CA cert: ✗ not found" fi if [[ -f "${PROM_TLS_DIR}/prometheus.crt" ]]; then local prom_expiry prom_expiry=$(openssl x509 -enddate -noout -in "${PROM_TLS_DIR}/prometheus.crt" 2>/dev/null | cut -d= -f2) echo " Server cert: ✓ present (expires: ${prom_expiry})" else echo " Server cert: ✗ not found" fi # Verify Prometheus is actually serving HTTPS if curl -sk --max-time 3 "https://localhost:9090/-/healthy" &>/dev/null; then echo " HTTPS: ✓ responding on https://localhost:9090" elif curl -s --max-time 3 "http://localhost:9090/-/healthy" &>/dev/null; then echo " HTTPS: ✗ still serving plain HTTP" else echo " HTTPS: ? could not connect" fi else echo " Not installed" fi echo "" # Check node_exporter echo "--- node_exporter ---" if systemctl cat node_exporter.service &>/dev/null 2>&1; then local node_status="installed" systemctl is-active --quiet node_exporter 2>/dev/null && node_status="running" echo " Service: ${node_status}" if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]] && grep -q "tls_server_config" "${NODE_EXPORTER_DIR}/web.yml" 2>/dev/null; then echo " TLS: ✓ enabled (web.yml)" else echo " TLS: ✗ not configured" fi if [[ -f "${NODE_EXPORTER_TLS_DIR}/node_exporter.crt" ]]; then local node_expiry node_expiry=$(openssl x509 -enddate -noout -in "${NODE_EXPORTER_TLS_DIR}/node_exporter.crt" 2>/dev/null | cut -d= -f2) echo " Cert: ✓ present (expires: ${node_expiry})" else echo " Cert: ✗ not found" fi # Verify node_exporter is actually serving HTTPS if curl -sk --max-time 3 "https://localhost:9100/metrics" &>/dev/null; then echo " HTTPS: ✓ responding on https://localhost:9100" elif curl -s --max-time 3 "http://localhost:9100/metrics" &>/dev/null; then echo " HTTPS: ✗ still serving plain HTTP" else echo " HTTPS: ? could not connect" fi else echo " Not installed" fi echo "" } # ============================================================================ # REMOVE # ============================================================================ do_remove() { echo "" echo "==========================================" echo "Remove Prometheus TLS Configuration" echo "==========================================" echo "" # Remove Prometheus TLS if [[ -f "${PROM_DIR}/web.yml" ]]; then backup_file "${PROM_DIR}/web.yml" if [[ "$DRY_RUN" != true ]]; then rm -f "${PROM_DIR}/web.yml" fi info "Removed Prometheus web.yml" # Remove --web.config.file from service local prom_service prom_service=$(systemctl show -p FragmentPath prometheus.service 2>/dev/null | cut -d= -f2) if [[ -n "$prom_service" && -f "$prom_service" ]] && grep -q "web.config.file" "$prom_service"; then backup_file "$prom_service" if [[ "$DRY_RUN" != true ]]; then sed -i '/--web.config.file/d' "$prom_service" # Clean up trailing backslash if left dangling sed -i '${/^[[:space:]]*\\[[:space:]]*$/d}' "$prom_service" systemctl daemon-reload fi info "Removed --web.config.file from prometheus.service" fi if [[ "$DRY_RUN" != true ]]; then systemctl restart prometheus 2>/dev/null || warn "Could not restart Prometheus" fi fi # Remove node_exporter TLS if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]]; then backup_file "${NODE_EXPORTER_DIR}/web.yml" if [[ "$DRY_RUN" != true ]]; then rm -f "${NODE_EXPORTER_DIR}/web.yml" fi info "Removed node_exporter web.yml" local node_service node_service=$(systemctl show -p FragmentPath node_exporter.service 2>/dev/null | cut -d= -f2) if [[ -n "$node_service" && -f "$node_service" ]] && grep -q "web.config.file" "$node_service"; then backup_file "$node_service" if [[ "$DRY_RUN" != true ]]; then sed -i '/--web.config.file/d' "$node_service" sed -i '${/^[[:space:]]*\\[[:space:]]*$/d}' "$node_service" systemctl daemon-reload fi info "Removed --web.config.file from node_exporter.service" fi if [[ "$DRY_RUN" != true ]]; then systemctl restart node_exporter 2>/dev/null || warn "Could not restart node_exporter" fi fi echo "" info "TLS configuration removed. Backups saved to: ${BACKUP_DIR}" info "Note: Certificate files in ${PROM_TLS_DIR} and ${NODE_EXPORTER_TLS_DIR} were NOT deleted." info "Remove them manually if no longer needed." } # ============================================================================ # SERVER SETUP # ============================================================================ setup_server() { echo "" echo "==========================================" echo "Prometheus Server TLS Setup" echo "Version: ${SCRIPT_VERSION}" echo "==========================================" echo "" detect_hostname mkdir -p "$PROM_TLS_DIR" "$BACKUP_DIR" # Step 1: Generate CA echo "" echo "=== Step 1: Certificate Authority ===" generate_ca # Step 2: Generate Prometheus server cert echo "" echo "=== Step 2: Prometheus Server Certificate ===" generate_cert "prometheus" "$PROM_TLS_DIR" "$PROM_USER" # Step 3: Configure Prometheus web.yml echo "" echo "=== Step 3: Prometheus TLS Configuration ===" configure_prometheus_tls # Step 4: Update scrape configs echo "" echo "=== Step 4: Scrape Configuration ===" update_prometheus_scrape_configs # Step 5: Optionally configure local node_exporter if systemctl cat node_exporter.service &>/dev/null 2>&1; then echo "" echo "=== Step 5: Local node_exporter ===" echo " node_exporter detected on this server." read -r -p " Configure TLS for local node_exporter too? [Y/n]: " configure_node if [[ ! "$configure_node" =~ ^[Nn]$ ]]; then configure_node_exporter_tls fi fi # Step 6: Restart services echo "" echo "=== Restarting Services ===" if [[ "$DRY_RUN" != true ]]; then info "Restarting Prometheus..." systemctl restart prometheus if systemctl is-active --quiet prometheus; then info "Prometheus restarted successfully" else warn "Prometheus failed to start — check: journalctl -u prometheus" fi if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]]; then info "Restarting node_exporter..." systemctl restart node_exporter if systemctl is-active --quiet node_exporter; then info "node_exporter restarted successfully" else warn "node_exporter failed to start — check: journalctl -u node_exporter" fi fi else info "[DRY RUN] Would restart prometheus and node_exporter" fi # Summary echo "" echo "==========================================" echo "TLS Setup Complete!" echo "==========================================" echo "" echo "CA Certificate: ${PROM_TLS_DIR}/ca.crt" echo "CA Key: ${PROM_TLS_DIR}/ca.key" echo "Server Certificate: ${PROM_TLS_DIR}/prometheus.crt" echo "Backups: ${BACKUP_DIR}" echo "" echo "To configure remote nodes, copy the CA cert and key to each node:" echo "" echo " scp ${PROM_TLS_DIR}/ca.crt ${PROM_TLS_DIR}/ca.key user@node:/tmp/" echo " ssh user@node 'sudo ./add-prometheus-tls.sh --role node --ca-cert /tmp/ca.crt --ca-key /tmp/ca.key'" echo "" echo "To verify: curl -s --cacert ${PROM_TLS_DIR}/ca.crt https://localhost:9090/-/healthy" echo "" } # ============================================================================ # NODE SETUP # ============================================================================ setup_node() { echo "" echo "==========================================" echo "Node Exporter TLS Setup" echo "Version: ${SCRIPT_VERSION}" echo "==========================================" echo "" detect_hostname mkdir -p "$NODE_EXPORTER_TLS_DIR" "$BACKUP_DIR" # Check for CA cert/key if [[ -z "$CA_CERT" || -z "$CA_KEY" ]]; then # Check if they exist locally (maybe copied from server) if [[ -f "${NODE_EXPORTER_TLS_DIR}/ca.crt" && -f "${NODE_EXPORTER_TLS_DIR}/ca.key" ]]; then CA_CERT="${NODE_EXPORTER_TLS_DIR}/ca.crt" CA_KEY="${NODE_EXPORTER_TLS_DIR}/ca.key" info "Found existing CA files in ${NODE_EXPORTER_TLS_DIR}" elif [[ -f "${PROM_TLS_DIR}/ca.crt" && -f "${PROM_TLS_DIR}/ca.key" ]]; then CA_CERT="${PROM_TLS_DIR}/ca.crt" CA_KEY="${PROM_TLS_DIR}/ca.key" info "Found existing CA files in ${PROM_TLS_DIR}" else echo " No CA certificate found. You need the CA cert and key from your" echo " Prometheus server to sign this node's certificate." echo "" echo " Copy them from the Prometheus server:" echo " scp prometheus-server:${PROM_TLS_DIR}/ca.crt /tmp/" echo " scp prometheus-server:${PROM_TLS_DIR}/ca.key /tmp/" echo "" read -r -p " Path to CA certificate: " CA_CERT read -r -p " Path to CA key: " CA_KEY if [[ ! -f "$CA_CERT" ]]; then die "CA certificate not found: ${CA_CERT}" fi if [[ ! -f "$CA_KEY" ]]; then die "CA key not found: ${CA_KEY}" fi fi else # Validate provided paths if [[ ! -f "$CA_CERT" ]]; then die "CA certificate not found: ${CA_CERT}" fi if [[ ! -f "$CA_KEY" ]]; then die "CA key not found: ${CA_KEY}" fi fi # Copy CA files to node_exporter tls dir if [[ "$DRY_RUN" != true ]]; then cp -a "$CA_CERT" "${NODE_EXPORTER_TLS_DIR}/ca.crt" cp -a "$CA_KEY" "${NODE_EXPORTER_TLS_DIR}/ca.key" chmod 644 "${NODE_EXPORTER_TLS_DIR}/ca.crt" chmod 600 "${NODE_EXPORTER_TLS_DIR}/ca.key" fi # Generate cert and configure echo "" echo "=== Generating node_exporter Certificate ===" configure_node_exporter_tls # Restart echo "" echo "=== Restarting node_exporter ===" if [[ "$DRY_RUN" != true ]]; then systemctl restart node_exporter if systemctl is-active --quiet node_exporter; then info "node_exporter restarted successfully" else warn "node_exporter failed to start — check: journalctl -u node_exporter" fi else info "[DRY RUN] Would restart node_exporter" fi # Summary echo "" echo "==========================================" echo "node_exporter TLS Setup Complete!" echo "==========================================" echo "" echo "Certificate: ${NODE_EXPORTER_TLS_DIR}/node_exporter.crt" echo "Key: ${NODE_EXPORTER_TLS_DIR}/node_exporter.key" echo "Backups: ${BACKUP_DIR}" echo "" echo "Add this node to your Prometheus server's prometheus.yml:" echo "" echo " - job_name: 'node'" echo " scheme: https" echo " tls_config:" echo " ca_file: ${PROM_TLS_DIR}/ca.crt" echo " static_configs:" echo " - targets: ['${HOSTNAME_FQDN}:9100']" echo "" echo "To verify: curl -s --cacert ${NODE_EXPORTER_TLS_DIR}/ca.crt https://localhost:9100/metrics | head" echo "" } # ============================================================================ # REMOTE DEPLOY # ============================================================================ build_ssh_cmd() { local ssh_opts="-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" if [[ -n "$SSH_KEY" ]]; then ssh_opts+=" -i ${SSH_KEY}" fi echo "ssh ${ssh_opts}" } build_scp_cmd() { local scp_opts="-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" if [[ -n "$SSH_KEY" ]]; then scp_opts+=" -i ${SSH_KEY}" fi echo "scp ${scp_opts}" } deploy_to_nodes() { local hosts=() # Build host list from --deploy and/or --deploy-file if [[ -n "$DEPLOY_TARGETS" ]]; then IFS=',' read -ra target_hosts <<< "$DEPLOY_TARGETS" hosts+=("${target_hosts[@]}") fi if [[ -n "$DEPLOY_FILE" ]]; then if [[ ! -f "$DEPLOY_FILE" ]]; then die "Deploy file not found: ${DEPLOY_FILE}" fi while IFS= read -r line; do # Skip blank lines and comments line=$(echo "$line" | sed 's/#.*//' | xargs) [[ -z "$line" ]] && continue hosts+=("$line") done < "$DEPLOY_FILE" fi if [[ ${#hosts[@]} -eq 0 ]]; then die "No target hosts specified" fi # Verify CA exists (must run server setup first) if [[ ! -f "${PROM_TLS_DIR}/ca.crt" || ! -f "${PROM_TLS_DIR}/ca.key" ]]; then die "CA not found at ${PROM_TLS_DIR}/. Run server setup first: $0 --role server" fi local script_path script_path=$(readlink -f "$0") local ssh_cmd scp_cmd ssh_cmd=$(build_ssh_cmd) scp_cmd=$(build_scp_cmd) echo "" echo "==========================================" echo "Deploy TLS to Remote Nodes" echo "==========================================" echo "" echo " CA: ${PROM_TLS_DIR}/ca.crt" echo " SSH user: ${SSH_USER}" echo " Targets: ${hosts[*]}" echo "" local succeeded=0 local failed=0 local failed_hosts=() for host in "${hosts[@]}"; do echo "--- ${host} ---" if [[ "$DRY_RUN" == true ]]; then info "[DRY RUN] Would deploy TLS to ${host}" ((succeeded++)) || true continue fi # Test SSH connectivity if ! $ssh_cmd "${SSH_USER}@${host}" "echo ok" &>/dev/null; then warn "Cannot connect to ${host} — skipping" ((failed++)) || true failed_hosts+=("$host") echo "" continue fi # Create temp dir on remote local remote_tmp remote_tmp=$($ssh_cmd "${SSH_USER}@${host}" "mktemp -d /tmp/prom-tls-XXXXXX") # Copy CA cert, CA key, and this script $scp_cmd "${PROM_TLS_DIR}/ca.crt" "${PROM_TLS_DIR}/ca.key" "$script_path" \ "${SSH_USER}@${host}:${remote_tmp}/" 2>/dev/null if [[ $? -ne 0 ]]; then warn "Failed to copy files to ${host} — skipping" ((failed++)) || true failed_hosts+=("$host") echo "" continue fi # Run the script in node mode on the remote host info "Running node setup on ${host}..." if $ssh_cmd "${SSH_USER}@${host}" \ "chmod +x ${remote_tmp}/$(basename "$script_path") && \ ${remote_tmp}/$(basename "$script_path") \ --role node \ --ca-cert ${remote_tmp}/ca.crt \ --ca-key ${remote_tmp}/ca.key"; then info "${host}: TLS configured successfully" ((succeeded++)) || true else warn "${host}: Setup failed — check logs on that host" ((failed++)) || true failed_hosts+=("$host") fi # Cleanup temp files on remote $ssh_cmd "${SSH_USER}@${host}" "rm -rf ${remote_tmp}" 2>/dev/null echo "" done # Summary echo "==========================================" echo "Deploy Complete" echo "==========================================" echo "" echo " Succeeded: ${succeeded}" echo " Failed: ${failed}" if [[ ${#failed_hosts[@]} -gt 0 ]]; then echo " Failed hosts: ${failed_hosts[*]}" fi # Print prometheus.yml snippet for all successful hosts echo "" echo "Add these targets to your prometheus.yml:" echo "" echo " - job_name: 'node'" echo " scheme: https" echo " tls_config:" echo " ca_file: ${PROM_TLS_DIR}/ca.crt" echo " static_configs:" echo -n " - targets: [" local first=true for host in "${hosts[@]}"; do # Skip failed hosts local is_failed=false for fh in "${failed_hosts[@]}"; do [[ "$fh" == "$host" ]] && is_failed=true done [[ "$is_failed" == true ]] && continue if [[ "$first" == true ]]; then echo -n "'${host}:9100'" first=false else echo -n ", '${host}:9100'" fi done echo "]" echo "" [[ $failed -gt 0 ]] && return 1 return 0 } # ============================================================================ # ARGUMENT PARSING # ============================================================================ parse_arguments() { while [[ $# -gt 0 ]]; do case $1 in --role) ROLE="$2" if [[ "$ROLE" != "server" && "$ROLE" != "node" ]]; then die "Invalid role: ${ROLE}. Must be 'server' or 'node'" fi shift 2 ;; --ca-cert) CA_CERT="$2" shift 2 ;; --ca-key) CA_KEY="$2" shift 2 ;; --hostname) HOSTNAME_FQDN="$2" shift 2 ;; --deploy) DEPLOY_TARGETS="$2" shift 2 ;; --deploy-file) DEPLOY_FILE="$2" shift 2 ;; --ssh-user) SSH_USER="$2" shift 2 ;; --ssh-key) SSH_KEY="$2" shift 2 ;; --dry-run) DRY_RUN=true shift ;; --status) show_status exit 0 ;; --remove) do_remove exit 0 ;; -h|--help) show_usage ;; *) die "Unknown option: $1. Use --help for usage." ;; esac done } # ============================================================================ # MAIN # ============================================================================ main() { if [[ $EUID -ne 0 ]]; then die "This script must be run as root" fi parse_arguments "$@" # Check openssl is available if ! command -v openssl &>/dev/null; then die "openssl is required but not installed" fi # Deploy mode — push TLS to remote nodes from the Prometheus server if [[ -n "$DEPLOY_TARGETS" || -n "$DEPLOY_FILE" ]]; then deploy_to_nodes exit $? fi # Auto-detect role if not specified if [[ -z "$ROLE" ]]; then detect_role fi case "$ROLE" in server) setup_server ;; node) setup_node ;; esac } main "$@"