Files
linux-scripts/add-prometheus-tls.sh
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

1235 lines
39 KiB
Bash

#!/bin/bash
################################################################################
# Script Name: add-prometheus-tls.sh
# Version: 1.01
# Description: Add TLS encryption to Prometheus and node_exporter
# Auto-detects whether this is the Prometheus server (generates
# a CA + server cert) or a target node (configures node_exporter
# with a provided or generated cert signed by the Prometheus CA).
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Role Detection:
# - "server" — Prometheus is installed: generates CA, server cert,
# configures prometheus.yml for TLS scraping, and optionally
# configures the local node_exporter too.
# - "node" — Only node_exporter found: configures node_exporter with
# TLS using a cert signed by the Prometheus CA (CA cert must
# be provided or copied from the server).
#
# Usage:
# sudo ./add-prometheus-tls.sh # Auto-detect role
# sudo ./add-prometheus-tls.sh --role server # Force server mode
# sudo ./add-prometheus-tls.sh --role node # Force node mode
# sudo ./add-prometheus-tls.sh --role node --ca-cert /path/to/ca.crt --ca-key /path/to/ca.key
# sudo ./add-prometheus-tls.sh --deploy host1,host2 # Push TLS to remote nodes
# sudo ./add-prometheus-tls.sh --deploy-file hosts.txt # Push TLS to nodes from file
# sudo ./add-prometheus-tls.sh --status # Show TLS status
# sudo ./add-prometheus-tls.sh --remove # Remove TLS config
#
################################################################################
set -euo pipefail
SCRIPT_VERSION="1.0"
# Paths
PROM_DIR="/etc/prometheus"
PROM_TLS_DIR="${PROM_DIR}/tls"
NODE_EXPORTER_DIR="/etc/node_exporter"
NODE_EXPORTER_TLS_DIR="${NODE_EXPORTER_DIR}/tls"
BACKUP_DIR="/var/backups/prometheus-tls"
# CA defaults
CA_DAYS=3650
CERT_DAYS=825
KEY_BITS=4096
# Runtime
ROLE="" # "server" or "node"
CA_CERT="" # path to existing CA cert (node mode)
CA_KEY="" # path to existing CA key (node mode)
PROM_USER="prometheus"
NODE_USER="node_exporter"
HOSTNAME_FQDN=""
DEPLOY_TARGETS="" # comma-separated hosts for --deploy
DEPLOY_FILE="" # file containing hosts for --deploy-file
SSH_USER="root" # SSH user for deploy
SSH_KEY="" # optional SSH key path
DRY_RUN=false
DEBUG=${DEBUG:-}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Add TLS to Prometheus and node_exporter (v${SCRIPT_VERSION}).
Auto-detects server vs node role based on installed services.
OPTIONS:
--role ROLE Force role: "server" or "node" (default: auto-detect)
--ca-cert FILE Path to existing CA certificate (node mode)
--ca-key FILE Path to existing CA private key (node mode)
--hostname NAME Override hostname/FQDN for certificate CN/SAN
--deploy HOSTS Deploy TLS to remote nodes (comma-separated hostnames/IPs)
--deploy-file F Deploy TLS to remote nodes listed in file (one per line)
--ssh-user USER SSH user for deploy (default: root)
--ssh-key FILE SSH private key for deploy (default: agent/default key)
--dry-run Show what would be done without making changes
--status Show current TLS status
--remove Remove TLS configuration (restore plain HTTP)
-h, --help Show this help
EXAMPLES:
$0 # Auto-detect and configure
$0 --role server # Set up Prometheus server with TLS
$0 --role node --ca-cert ca.crt --ca-key ca.key # Configure node with existing CA
$0 --deploy web1,web2,db1 # Push CA & configure remote nodes
$0 --deploy-file /etc/prometheus/targets.txt # Deploy from a hosts file
$0 --status # Check TLS status
$0 --remove # Remove TLS config
EOF
exit 0
}
die() {
echo "ERROR: $1" >&2
exit 1
}
warn() {
echo "WARNING: $1" >&2
}
info() {
echo "[INFO] $1"
}
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
backup_file() {
local file="$1"
if [[ ! -f "$file" ]]; then
return 0
fi
local timestamp
timestamp=$(date +%F_%H%M%S)
local backup_path="${BACKUP_DIR}/${timestamp}"
mkdir -p "$backup_path"
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would backup $file -> ${backup_path}/$(basename "$file")"
else
cp -a "$file" "${backup_path}/"
info "Backed up $file -> ${backup_path}/$(basename "$file")"
fi
}
# ============================================================================
# ROLE DETECTION
# ============================================================================
detect_role() {
local has_prometheus=false
local has_node_exporter=false
if systemctl list-unit-files prometheus.service &>/dev/null && \
systemctl cat prometheus.service &>/dev/null; then
has_prometheus=true
fi
if systemctl list-unit-files node_exporter.service &>/dev/null && \
systemctl cat node_exporter.service &>/dev/null; then
has_node_exporter=true
fi
if [[ "$has_prometheus" == true ]]; then
ROLE="server"
info "Detected role: server (Prometheus installed)"
elif [[ "$has_node_exporter" == true ]]; then
ROLE="node"
info "Detected role: node (node_exporter only)"
else
die "Neither Prometheus nor node_exporter detected. Install them first."
fi
}
detect_hostname() {
if [[ -n "$HOSTNAME_FQDN" ]]; then
return 0
fi
HOSTNAME_FQDN=$(hostname -f 2>/dev/null || hostname)
info "Using hostname: ${HOSTNAME_FQDN}"
}
# ============================================================================
# CERTIFICATE GENERATION
# ============================================================================
generate_ca() {
local ca_dir="${PROM_TLS_DIR}"
local ca_cert="${ca_dir}/ca.crt"
local ca_key="${ca_dir}/ca.key"
if [[ -f "$ca_cert" && -f "$ca_key" ]]; then
echo ""
echo " CA certificate already exists at ${ca_cert}"
read -r -p " Regenerate CA? (will invalidate all existing certs) [y/N]: " confirm
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
info "Keeping existing CA"
CA_CERT="$ca_cert"
CA_KEY="$ca_key"
return 0
fi
backup_file "$ca_cert"
backup_file "$ca_key"
fi
info "Generating Certificate Authority..."
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would generate CA cert at ${ca_cert}"
CA_CERT="$ca_cert"
CA_KEY="$ca_key"
return 0
fi
mkdir -p "$ca_dir"
openssl genrsa -out "$ca_key" "$KEY_BITS" 2>/dev/null
openssl req -x509 -new -nodes \
-key "$ca_key" \
-sha256 \
-days "$CA_DAYS" \
-out "$ca_cert" \
-subj "/CN=Prometheus CA/O=Prometheus/OU=Monitoring" \
2>/dev/null
chmod 644 "$ca_cert"
chmod 600 "$ca_key"
CA_CERT="$ca_cert"
CA_KEY="$ca_key"
info "CA certificate created: ${ca_cert}"
info "CA key created: ${ca_key} (keep this safe!)"
}
generate_cert() {
local name="$1" # e.g., "prometheus" or "node_exporter"
local cert_dir="$2" # where to put the cert
local owner="$3" # file owner user
local cert_file="${cert_dir}/${name}.crt"
local key_file="${cert_dir}/${name}.key"
if [[ -f "$cert_file" && -f "$key_file" ]]; then
echo ""
echo " Certificate for ${name} already exists."
read -r -p " Regenerate? [y/N]: " confirm
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
info "Keeping existing ${name} certificate"
return 0
fi
backup_file "$cert_file"
backup_file "$key_file"
fi
info "Generating certificate for ${name}..."
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would generate cert at ${cert_file}"
return 0
fi
mkdir -p "$cert_dir"
# Create CSR config with SANs
local csr_conf
csr_conf=$(mktemp)
cat > "$csr_conf" <<CSREOF
[req]
default_bits = ${KEY_BITS}
prompt = no
distinguished_name = dn
req_extensions = v3_req
[dn]
CN = ${HOSTNAME_FQDN}
O = Prometheus
OU = ${name}
[v3_req]
subjectAltName = @alt_names
[alt_names]
DNS.1 = ${HOSTNAME_FQDN}
DNS.2 = localhost
IP.1 = 127.0.0.1
IP.2 = $(hostname -I 2>/dev/null | awk '{print $1}' || echo "127.0.0.1")
CSREOF
# Create ext config for signing
local ext_conf
ext_conf=$(mktemp)
cat > "$ext_conf" <<EXTEOF
authorityKeyIdentifier=keyid,issuer
basicConstraints=CA:FALSE
keyUsage = digitalSignature, keyEncipherment
extendedKeyUsage = serverAuth, clientAuth
subjectAltName = @alt_names
[alt_names]
DNS.1 = ${HOSTNAME_FQDN}
DNS.2 = localhost
IP.1 = 127.0.0.1
IP.2 = $(hostname -I 2>/dev/null | awk '{print $1}' || echo "127.0.0.1")
EXTEOF
# Generate key
openssl genrsa -out "$key_file" "$KEY_BITS" 2>/dev/null
# Generate CSR
local csr_file
csr_file=$(mktemp)
openssl req -new \
-key "$key_file" \
-out "$csr_file" \
-config "$csr_conf" \
2>/dev/null
# Sign with CA
openssl x509 -req \
-in "$csr_file" \
-CA "$CA_CERT" \
-CAkey "$CA_KEY" \
-CAcreateserial \
-out "$cert_file" \
-days "$CERT_DAYS" \
-sha256 \
-extfile "$ext_conf" \
2>/dev/null
# Set ownership
chmod 644 "$cert_file"
chmod 600 "$key_file"
if id "$owner" &>/dev/null; then
chown "${owner}:${owner}" "$cert_file" "$key_file"
fi
# Cleanup temp files
rm -f "$csr_conf" "$ext_conf" "$csr_file"
info "Certificate created: ${cert_file}"
info "Key created: ${key_file}"
}
# ============================================================================
# PROMETHEUS SERVER CONFIGURATION
# ============================================================================
configure_prometheus_tls() {
local web_config="${PROM_DIR}/web.yml"
if [[ -f "$web_config" ]] && grep -q "tls_server_config" "$web_config" 2>/dev/null; then
echo ""
echo " Prometheus web.yml already has TLS config."
read -r -p " Overwrite? [y/N]: " confirm
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
info "Keeping existing Prometheus TLS config"
return 0
fi
backup_file "$web_config"
fi
info "Configuring Prometheus TLS (web.yml)..."
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would create ${web_config}"
return 0
fi
cat > "$web_config" <<WEBEOF
# Prometheus TLS configuration
# Generated by add-prometheus-tls.sh v${SCRIPT_VERSION}
# Date: $(date -Iseconds)
tls_server_config:
cert_file: ${PROM_TLS_DIR}/prometheus.crt
key_file: ${PROM_TLS_DIR}/prometheus.key
client_auth_type: "NoClientCert"
WEBEOF
if id "$PROM_USER" &>/dev/null; then
chown "${PROM_USER}:${PROM_USER}" "$web_config"
fi
chmod 644 "$web_config"
# Ensure --web.config.file is in the systemd unit
update_prometheus_service
info "Prometheus web.yml created: ${web_config}"
}
update_prometheus_service() {
local service_file
service_file=$(systemctl show -p FragmentPath prometheus.service 2>/dev/null | cut -d= -f2)
if [[ -z "$service_file" || ! -f "$service_file" ]]; then
warn "Could not find prometheus.service unit file"
warn "Manually add '--web.config.file=${PROM_DIR}/web.yml' to Prometheus startup"
return 0
fi
if grep -q "web.config.file" "$service_file" 2>/dev/null; then
debug_echo "Prometheus service already has --web.config.file flag"
return 0
fi
info "Updating Prometheus systemd service to use web.yml..."
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would add --web.config.file to ${service_file}"
return 0
fi
backup_file "$service_file"
# Add --web.config.file to the ExecStart line
if grep -qE '^ExecStart=.*prometheus' "$service_file"; then
sed -i '/^ExecStart=.*prometheus/ s|$| \\\n --web.config.file='"${PROM_DIR}"'/web.yml|' "$service_file"
systemctl daemon-reload
info "Added --web.config.file to Prometheus service"
else
warn "Could not auto-patch service file. Add manually:"
warn " --web.config.file=${PROM_DIR}/web.yml"
fi
}
update_prometheus_scrape_configs() {
local prom_config="${PROM_DIR}/prometheus.yml"
if [[ ! -f "$prom_config" ]]; then
warn "prometheus.yml not found at ${prom_config} — skipping scrape config update"
return 0
fi
info "Updating prometheus.yml scrape configs for TLS..."
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would update scrape configs in ${prom_config}"
return 0
fi
backup_file "$prom_config"
# Check if tls_config already exists for node targets
if grep -q "tls_config" "$prom_config" 2>/dev/null; then
info "prometheus.yml already contains tls_config entries"
echo " Review ${prom_config} to ensure all scrape jobs use TLS."
return 0
fi
# Create a TLS snippet file that can be included
local tls_snippet="${PROM_DIR}/tls_scrape.yml"
cat > "$tls_snippet" <<TLSEOF
# TLS scrape configuration snippet
# Include this in your scrape_configs jobs:
#
# - job_name: 'node'
# scheme: https
# tls_config:
# ca_file: ${PROM_TLS_DIR}/ca.crt
# static_configs:
# - targets: ['target1:9100', 'target2:9100']
#
# For the local Prometheus job:
#
# - job_name: 'prometheus'
# scheme: https
# tls_config:
# ca_file: ${PROM_TLS_DIR}/ca.crt
# static_configs:
# - targets: ['localhost:9090']
TLSEOF
if id "$PROM_USER" &>/dev/null; then
chown "${PROM_USER}:${PROM_USER}" "$tls_snippet"
fi
# Auto-patch: update scheme and add tls_config to existing jobs
# Update scheme: http -> https for node jobs
local tmpfile
tmpfile=$(mktemp)
local in_job=false
local job_patched=false
while IFS= read -r line; do
echo "$line" >> "$tmpfile"
# Detect job_name lines
if [[ "$line" =~ ^[[:space:]]*-[[:space:]]*job_name: ]]; then
in_job=true
job_patched=false
fi
# If we're in a job block and find scheme: http (not https), patch it
if [[ "$in_job" == true && "$job_patched" == false ]]; then
if [[ "$line" =~ ^[[:space:]]*scheme:[[:space:]]*http[[:space:]]*$ ]]; then
# Replace this line with https + tls_config
sed -i '$ s|scheme: http|scheme: https|' "$tmpfile"
# Determine indentation
local indent
indent=$(echo "$line" | sed 's/\(^[[:space:]]*\).*/\1/')
echo "${indent}tls_config:" >> "$tmpfile"
echo "${indent} ca_file: ${PROM_TLS_DIR}/ca.crt" >> "$tmpfile"
job_patched=true
fi
fi
done < "$prom_config"
# If no scheme: lines were found, add a note
if ! grep -q "scheme: https" "$tmpfile" 2>/dev/null; then
info "No 'scheme: http' lines found to auto-patch."
info "Reference TLS snippet created at: ${tls_snippet}"
info "Manually update your scrape jobs to use scheme: https with tls_config."
rm -f "$tmpfile"
return 0
fi
cp "$tmpfile" "$prom_config"
rm -f "$tmpfile"
if id "$PROM_USER" &>/dev/null; then
chown "${PROM_USER}:${PROM_USER}" "$prom_config"
fi
info "Updated scrape configs in ${prom_config}"
info "TLS reference snippet saved to: ${tls_snippet}"
}
# ============================================================================
# NODE EXPORTER CONFIGURATION
# ============================================================================
configure_node_exporter_tls() {
local tls_dir="$NODE_EXPORTER_TLS_DIR"
local web_config="${NODE_EXPORTER_DIR}/web.yml"
mkdir -p "$tls_dir" "$NODE_EXPORTER_DIR"
# Generate cert for this node
generate_cert "node_exporter" "$tls_dir" "$NODE_USER"
# Copy CA cert to node_exporter dir for reference
if [[ "$DRY_RUN" != true && -f "$CA_CERT" ]]; then
cp -a "$CA_CERT" "${tls_dir}/ca.crt"
if id "$NODE_USER" &>/dev/null; then
chown "${NODE_USER}:${NODE_USER}" "${tls_dir}/ca.crt"
fi
fi
if [[ -f "$web_config" ]] && grep -q "tls_server_config" "$web_config" 2>/dev/null; then
echo ""
echo " node_exporter web.yml already has TLS config."
read -r -p " Overwrite? [y/N]: " confirm
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
info "Keeping existing node_exporter TLS config"
update_node_exporter_service
return 0
fi
backup_file "$web_config"
fi
info "Configuring node_exporter TLS (web.yml)..."
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would create ${web_config}"
return 0
fi
cat > "$web_config" <<NODEWEBEOF
# node_exporter TLS configuration
# Generated by add-prometheus-tls.sh v${SCRIPT_VERSION}
# Date: $(date -Iseconds)
tls_server_config:
cert_file: ${tls_dir}/node_exporter.crt
key_file: ${tls_dir}/node_exporter.key
NODEWEBEOF
if id "$NODE_USER" &>/dev/null; then
chown "${NODE_USER}:${NODE_USER}" "$web_config"
fi
chmod 644 "$web_config"
update_node_exporter_service
info "node_exporter web.yml created: ${web_config}"
}
update_node_exporter_service() {
local service_file
service_file=$(systemctl show -p FragmentPath node_exporter.service 2>/dev/null | cut -d= -f2)
if [[ -z "$service_file" || ! -f "$service_file" ]]; then
warn "Could not find node_exporter.service unit file"
warn "Manually add '--web.config.file=${NODE_EXPORTER_DIR}/web.yml' to node_exporter startup"
return 0
fi
if grep -q "web.config.file" "$service_file" 2>/dev/null; then
debug_echo "node_exporter service already has --web.config.file flag"
return 0
fi
info "Updating node_exporter systemd service to use web.yml..."
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would add --web.config.file to ${service_file}"
return 0
fi
backup_file "$service_file"
if grep -qE '^ExecStart=.*node_exporter' "$service_file"; then
sed -i '/^ExecStart=.*node_exporter/ s|$| \\\n --web.config.file='"${NODE_EXPORTER_DIR}"'/web.yml|' "$service_file"
systemctl daemon-reload
info "Added --web.config.file to node_exporter service"
else
warn "Could not auto-patch service file. Add manually:"
warn " --web.config.file=${NODE_EXPORTER_DIR}/web.yml"
fi
}
# ============================================================================
# STATUS
# ============================================================================
show_status() {
echo ""
echo "=========================================="
echo "Prometheus TLS Status (v${SCRIPT_VERSION})"
echo "=========================================="
echo ""
# Check Prometheus
echo "--- Prometheus Server ---"
if systemctl cat prometheus.service &>/dev/null 2>&1; then
local prom_status="installed"
systemctl is-active --quiet prometheus 2>/dev/null && prom_status="running"
echo " Service: ${prom_status}"
if [[ -f "${PROM_DIR}/web.yml" ]] && grep -q "tls_server_config" "${PROM_DIR}/web.yml" 2>/dev/null; then
echo " TLS: ✓ enabled (web.yml)"
else
echo " TLS: ✗ not configured"
fi
if [[ -f "${PROM_TLS_DIR}/ca.crt" ]]; then
local ca_expiry
ca_expiry=$(openssl x509 -enddate -noout -in "${PROM_TLS_DIR}/ca.crt" 2>/dev/null | cut -d= -f2)
echo " CA cert: ✓ present (expires: ${ca_expiry})"
else
echo " CA cert: ✗ not found"
fi
if [[ -f "${PROM_TLS_DIR}/prometheus.crt" ]]; then
local prom_expiry
prom_expiry=$(openssl x509 -enddate -noout -in "${PROM_TLS_DIR}/prometheus.crt" 2>/dev/null | cut -d= -f2)
echo " Server cert: ✓ present (expires: ${prom_expiry})"
else
echo " Server cert: ✗ not found"
fi
# Verify Prometheus is actually serving HTTPS
if curl -sk --max-time 3 "https://localhost:9090/-/healthy" &>/dev/null; then
echo " HTTPS: ✓ responding on https://localhost:9090"
elif curl -s --max-time 3 "http://localhost:9090/-/healthy" &>/dev/null; then
echo " HTTPS: ✗ still serving plain HTTP"
else
echo " HTTPS: ? could not connect"
fi
else
echo " Not installed"
fi
echo ""
# Check node_exporter
echo "--- node_exporter ---"
if systemctl cat node_exporter.service &>/dev/null 2>&1; then
local node_status="installed"
systemctl is-active --quiet node_exporter 2>/dev/null && node_status="running"
echo " Service: ${node_status}"
if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]] && grep -q "tls_server_config" "${NODE_EXPORTER_DIR}/web.yml" 2>/dev/null; then
echo " TLS: ✓ enabled (web.yml)"
else
echo " TLS: ✗ not configured"
fi
if [[ -f "${NODE_EXPORTER_TLS_DIR}/node_exporter.crt" ]]; then
local node_expiry
node_expiry=$(openssl x509 -enddate -noout -in "${NODE_EXPORTER_TLS_DIR}/node_exporter.crt" 2>/dev/null | cut -d= -f2)
echo " Cert: ✓ present (expires: ${node_expiry})"
else
echo " Cert: ✗ not found"
fi
# Verify node_exporter is actually serving HTTPS
if curl -sk --max-time 3 "https://localhost:9100/metrics" &>/dev/null; then
echo " HTTPS: ✓ responding on https://localhost:9100"
elif curl -s --max-time 3 "http://localhost:9100/metrics" &>/dev/null; then
echo " HTTPS: ✗ still serving plain HTTP"
else
echo " HTTPS: ? could not connect"
fi
else
echo " Not installed"
fi
echo ""
}
# ============================================================================
# REMOVE
# ============================================================================
do_remove() {
echo ""
echo "=========================================="
echo "Remove Prometheus TLS Configuration"
echo "=========================================="
echo ""
# Remove Prometheus TLS
if [[ -f "${PROM_DIR}/web.yml" ]]; then
backup_file "${PROM_DIR}/web.yml"
if [[ "$DRY_RUN" != true ]]; then
rm -f "${PROM_DIR}/web.yml"
fi
info "Removed Prometheus web.yml"
# Remove --web.config.file from service
local prom_service
prom_service=$(systemctl show -p FragmentPath prometheus.service 2>/dev/null | cut -d= -f2)
if [[ -n "$prom_service" && -f "$prom_service" ]] && grep -q "web.config.file" "$prom_service"; then
backup_file "$prom_service"
if [[ "$DRY_RUN" != true ]]; then
sed -i '/--web.config.file/d' "$prom_service"
# Clean up trailing backslash if left dangling
sed -i '${/^[[:space:]]*\\[[:space:]]*$/d}' "$prom_service"
systemctl daemon-reload
fi
info "Removed --web.config.file from prometheus.service"
fi
if [[ "$DRY_RUN" != true ]]; then
systemctl restart prometheus 2>/dev/null || warn "Could not restart Prometheus"
fi
fi
# Remove node_exporter TLS
if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]]; then
backup_file "${NODE_EXPORTER_DIR}/web.yml"
if [[ "$DRY_RUN" != true ]]; then
rm -f "${NODE_EXPORTER_DIR}/web.yml"
fi
info "Removed node_exporter web.yml"
local node_service
node_service=$(systemctl show -p FragmentPath node_exporter.service 2>/dev/null | cut -d= -f2)
if [[ -n "$node_service" && -f "$node_service" ]] && grep -q "web.config.file" "$node_service"; then
backup_file "$node_service"
if [[ "$DRY_RUN" != true ]]; then
sed -i '/--web.config.file/d' "$node_service"
sed -i '${/^[[:space:]]*\\[[:space:]]*$/d}' "$node_service"
systemctl daemon-reload
fi
info "Removed --web.config.file from node_exporter.service"
fi
if [[ "$DRY_RUN" != true ]]; then
systemctl restart node_exporter 2>/dev/null || warn "Could not restart node_exporter"
fi
fi
echo ""
info "TLS configuration removed. Backups saved to: ${BACKUP_DIR}"
info "Note: Certificate files in ${PROM_TLS_DIR} and ${NODE_EXPORTER_TLS_DIR} were NOT deleted."
info "Remove them manually if no longer needed."
}
# ============================================================================
# SERVER SETUP
# ============================================================================
setup_server() {
echo ""
echo "=========================================="
echo "Prometheus Server TLS Setup"
echo "Version: ${SCRIPT_VERSION}"
echo "=========================================="
echo ""
detect_hostname
mkdir -p "$PROM_TLS_DIR" "$BACKUP_DIR"
# Step 1: Generate CA
echo ""
echo "=== Step 1: Certificate Authority ==="
generate_ca
# Step 2: Generate Prometheus server cert
echo ""
echo "=== Step 2: Prometheus Server Certificate ==="
generate_cert "prometheus" "$PROM_TLS_DIR" "$PROM_USER"
# Step 3: Configure Prometheus web.yml
echo ""
echo "=== Step 3: Prometheus TLS Configuration ==="
configure_prometheus_tls
# Step 4: Update scrape configs
echo ""
echo "=== Step 4: Scrape Configuration ==="
update_prometheus_scrape_configs
# Step 5: Optionally configure local node_exporter
if systemctl cat node_exporter.service &>/dev/null 2>&1; then
echo ""
echo "=== Step 5: Local node_exporter ==="
echo " node_exporter detected on this server."
read -r -p " Configure TLS for local node_exporter too? [Y/n]: " configure_node
if [[ ! "$configure_node" =~ ^[Nn]$ ]]; then
configure_node_exporter_tls
fi
fi
# Step 6: Restart services
echo ""
echo "=== Restarting Services ==="
if [[ "$DRY_RUN" != true ]]; then
info "Restarting Prometheus..."
systemctl restart prometheus
if systemctl is-active --quiet prometheus; then
info "Prometheus restarted successfully"
else
warn "Prometheus failed to start — check: journalctl -u prometheus"
fi
if [[ -f "${NODE_EXPORTER_DIR}/web.yml" ]]; then
info "Restarting node_exporter..."
systemctl restart node_exporter
if systemctl is-active --quiet node_exporter; then
info "node_exporter restarted successfully"
else
warn "node_exporter failed to start — check: journalctl -u node_exporter"
fi
fi
else
info "[DRY RUN] Would restart prometheus and node_exporter"
fi
# Summary
echo ""
echo "=========================================="
echo "TLS Setup Complete!"
echo "=========================================="
echo ""
echo "CA Certificate: ${PROM_TLS_DIR}/ca.crt"
echo "CA Key: ${PROM_TLS_DIR}/ca.key"
echo "Server Certificate: ${PROM_TLS_DIR}/prometheus.crt"
echo "Backups: ${BACKUP_DIR}"
echo ""
echo "To configure remote nodes, copy the CA cert and key to each node:"
echo ""
echo " scp ${PROM_TLS_DIR}/ca.crt ${PROM_TLS_DIR}/ca.key user@node:/tmp/"
echo " ssh user@node 'sudo ./add-prometheus-tls.sh --role node --ca-cert /tmp/ca.crt --ca-key /tmp/ca.key'"
echo ""
echo "To verify: curl -s --cacert ${PROM_TLS_DIR}/ca.crt https://localhost:9090/-/healthy"
echo ""
}
# ============================================================================
# NODE SETUP
# ============================================================================
setup_node() {
echo ""
echo "=========================================="
echo "Node Exporter TLS Setup"
echo "Version: ${SCRIPT_VERSION}"
echo "=========================================="
echo ""
detect_hostname
mkdir -p "$NODE_EXPORTER_TLS_DIR" "$BACKUP_DIR"
# Check for CA cert/key
if [[ -z "$CA_CERT" || -z "$CA_KEY" ]]; then
# Check if they exist locally (maybe copied from server)
if [[ -f "${NODE_EXPORTER_TLS_DIR}/ca.crt" && -f "${NODE_EXPORTER_TLS_DIR}/ca.key" ]]; then
CA_CERT="${NODE_EXPORTER_TLS_DIR}/ca.crt"
CA_KEY="${NODE_EXPORTER_TLS_DIR}/ca.key"
info "Found existing CA files in ${NODE_EXPORTER_TLS_DIR}"
elif [[ -f "${PROM_TLS_DIR}/ca.crt" && -f "${PROM_TLS_DIR}/ca.key" ]]; then
CA_CERT="${PROM_TLS_DIR}/ca.crt"
CA_KEY="${PROM_TLS_DIR}/ca.key"
info "Found existing CA files in ${PROM_TLS_DIR}"
else
echo " No CA certificate found. You need the CA cert and key from your"
echo " Prometheus server to sign this node's certificate."
echo ""
echo " Copy them from the Prometheus server:"
echo " scp prometheus-server:${PROM_TLS_DIR}/ca.crt /tmp/"
echo " scp prometheus-server:${PROM_TLS_DIR}/ca.key /tmp/"
echo ""
read -r -p " Path to CA certificate: " CA_CERT
read -r -p " Path to CA key: " CA_KEY
if [[ ! -f "$CA_CERT" ]]; then
die "CA certificate not found: ${CA_CERT}"
fi
if [[ ! -f "$CA_KEY" ]]; then
die "CA key not found: ${CA_KEY}"
fi
fi
else
# Validate provided paths
if [[ ! -f "$CA_CERT" ]]; then
die "CA certificate not found: ${CA_CERT}"
fi
if [[ ! -f "$CA_KEY" ]]; then
die "CA key not found: ${CA_KEY}"
fi
fi
# Copy CA files to node_exporter tls dir
if [[ "$DRY_RUN" != true ]]; then
cp -a "$CA_CERT" "${NODE_EXPORTER_TLS_DIR}/ca.crt"
cp -a "$CA_KEY" "${NODE_EXPORTER_TLS_DIR}/ca.key"
chmod 644 "${NODE_EXPORTER_TLS_DIR}/ca.crt"
chmod 600 "${NODE_EXPORTER_TLS_DIR}/ca.key"
fi
# Generate cert and configure
echo ""
echo "=== Generating node_exporter Certificate ==="
configure_node_exporter_tls
# Restart
echo ""
echo "=== Restarting node_exporter ==="
if [[ "$DRY_RUN" != true ]]; then
systemctl restart node_exporter
if systemctl is-active --quiet node_exporter; then
info "node_exporter restarted successfully"
else
warn "node_exporter failed to start — check: journalctl -u node_exporter"
fi
else
info "[DRY RUN] Would restart node_exporter"
fi
# Summary
echo ""
echo "=========================================="
echo "node_exporter TLS Setup Complete!"
echo "=========================================="
echo ""
echo "Certificate: ${NODE_EXPORTER_TLS_DIR}/node_exporter.crt"
echo "Key: ${NODE_EXPORTER_TLS_DIR}/node_exporter.key"
echo "Backups: ${BACKUP_DIR}"
echo ""
echo "Add this node to your Prometheus server's prometheus.yml:"
echo ""
echo " - job_name: 'node'"
echo " scheme: https"
echo " tls_config:"
echo " ca_file: ${PROM_TLS_DIR}/ca.crt"
echo " static_configs:"
echo " - targets: ['${HOSTNAME_FQDN}:9100']"
echo ""
echo "To verify: curl -s --cacert ${NODE_EXPORTER_TLS_DIR}/ca.crt https://localhost:9100/metrics | head"
echo ""
}
# ============================================================================
# REMOTE DEPLOY
# ============================================================================
build_ssh_cmd() {
local ssh_opts="-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10"
if [[ -n "$SSH_KEY" ]]; then
ssh_opts+=" -i ${SSH_KEY}"
fi
echo "ssh ${ssh_opts}"
}
build_scp_cmd() {
local scp_opts="-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10"
if [[ -n "$SSH_KEY" ]]; then
scp_opts+=" -i ${SSH_KEY}"
fi
echo "scp ${scp_opts}"
}
deploy_to_nodes() {
local hosts=()
# Build host list from --deploy and/or --deploy-file
if [[ -n "$DEPLOY_TARGETS" ]]; then
IFS=',' read -ra target_hosts <<< "$DEPLOY_TARGETS"
hosts+=("${target_hosts[@]}")
fi
if [[ -n "$DEPLOY_FILE" ]]; then
if [[ ! -f "$DEPLOY_FILE" ]]; then
die "Deploy file not found: ${DEPLOY_FILE}"
fi
while IFS= read -r line; do
# Skip blank lines and comments
line=$(echo "$line" | sed 's/#.*//' | xargs)
[[ -z "$line" ]] && continue
hosts+=("$line")
done < "$DEPLOY_FILE"
fi
if [[ ${#hosts[@]} -eq 0 ]]; then
die "No target hosts specified"
fi
# Verify CA exists (must run server setup first)
if [[ ! -f "${PROM_TLS_DIR}/ca.crt" || ! -f "${PROM_TLS_DIR}/ca.key" ]]; then
die "CA not found at ${PROM_TLS_DIR}/. Run server setup first: $0 --role server"
fi
local script_path
script_path=$(readlink -f "$0")
local ssh_cmd scp_cmd
ssh_cmd=$(build_ssh_cmd)
scp_cmd=$(build_scp_cmd)
echo ""
echo "=========================================="
echo "Deploy TLS to Remote Nodes"
echo "=========================================="
echo ""
echo " CA: ${PROM_TLS_DIR}/ca.crt"
echo " SSH user: ${SSH_USER}"
echo " Targets: ${hosts[*]}"
echo ""
local succeeded=0
local failed=0
local failed_hosts=()
for host in "${hosts[@]}"; do
echo "--- ${host} ---"
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would deploy TLS to ${host}"
((succeeded++)) || true
continue
fi
# Test SSH connectivity
if ! $ssh_cmd "${SSH_USER}@${host}" "echo ok" &>/dev/null; then
warn "Cannot connect to ${host} — skipping"
((failed++)) || true
failed_hosts+=("$host")
echo ""
continue
fi
# Create temp dir on remote
local remote_tmp
remote_tmp=$($ssh_cmd "${SSH_USER}@${host}" "mktemp -d /tmp/prom-tls-XXXXXX")
# Copy CA cert, CA key, and this script
$scp_cmd "${PROM_TLS_DIR}/ca.crt" "${PROM_TLS_DIR}/ca.key" "$script_path" \
"${SSH_USER}@${host}:${remote_tmp}/" 2>/dev/null
if [[ $? -ne 0 ]]; then
warn "Failed to copy files to ${host} — skipping"
((failed++)) || true
failed_hosts+=("$host")
echo ""
continue
fi
# Run the script in node mode on the remote host
info "Running node setup on ${host}..."
if $ssh_cmd "${SSH_USER}@${host}" \
"chmod +x ${remote_tmp}/$(basename "$script_path") && \
${remote_tmp}/$(basename "$script_path") \
--role node \
--ca-cert ${remote_tmp}/ca.crt \
--ca-key ${remote_tmp}/ca.key"; then
info "${host}: TLS configured successfully"
((succeeded++)) || true
else
warn "${host}: Setup failed — check logs on that host"
((failed++)) || true
failed_hosts+=("$host")
fi
# Cleanup temp files on remote
$ssh_cmd "${SSH_USER}@${host}" "rm -rf ${remote_tmp}" 2>/dev/null
echo ""
done
# Summary
echo "=========================================="
echo "Deploy Complete"
echo "=========================================="
echo ""
echo " Succeeded: ${succeeded}"
echo " Failed: ${failed}"
if [[ ${#failed_hosts[@]} -gt 0 ]]; then
echo " Failed hosts: ${failed_hosts[*]}"
fi
# Print prometheus.yml snippet for all successful hosts
echo ""
echo "Add these targets to your prometheus.yml:"
echo ""
echo " - job_name: 'node'"
echo " scheme: https"
echo " tls_config:"
echo " ca_file: ${PROM_TLS_DIR}/ca.crt"
echo " static_configs:"
echo -n " - targets: ["
local first=true
for host in "${hosts[@]}"; do
# Skip failed hosts
local is_failed=false
for fh in "${failed_hosts[@]}"; do
[[ "$fh" == "$host" ]] && is_failed=true
done
[[ "$is_failed" == true ]] && continue
if [[ "$first" == true ]]; then
echo -n "'${host}:9100'"
first=false
else
echo -n ", '${host}:9100'"
fi
done
echo "]"
echo ""
[[ $failed -gt 0 ]] && return 1
return 0
}
# ============================================================================
# ARGUMENT PARSING
# ============================================================================
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--role)
ROLE="$2"
if [[ "$ROLE" != "server" && "$ROLE" != "node" ]]; then
die "Invalid role: ${ROLE}. Must be 'server' or 'node'"
fi
shift 2
;;
--ca-cert)
CA_CERT="$2"
shift 2
;;
--ca-key)
CA_KEY="$2"
shift 2
;;
--hostname)
HOSTNAME_FQDN="$2"
shift 2
;;
--deploy)
DEPLOY_TARGETS="$2"
shift 2
;;
--deploy-file)
DEPLOY_FILE="$2"
shift 2
;;
--ssh-user)
SSH_USER="$2"
shift 2
;;
--ssh-key)
SSH_KEY="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
--status)
show_status
exit 0
;;
--remove)
do_remove
exit 0
;;
-h|--help)
show_usage
;;
*)
die "Unknown option: $1. Use --help for usage."
;;
esac
done
}
# ============================================================================
# MAIN
# ============================================================================
main() {
if [[ $EUID -ne 0 ]]; then
die "This script must be run as root"
fi
parse_arguments "$@"
# Check openssl is available
if ! command -v openssl &>/dev/null; then
die "openssl is required but not installed"
fi
# Deploy mode — push TLS to remote nodes from the Prometheus server
if [[ -n "$DEPLOY_TARGETS" || -n "$DEPLOY_FILE" ]]; then
deploy_to_nodes
exit $?
fi
# Auto-detect role if not specified
if [[ -z "$ROLE" ]]; then
detect_role
fi
case "$ROLE" in
server) setup_server ;;
node) setup_node ;;
esac
}
main "$@"