Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
This commit is contained in:
@@ -0,0 +1,687 @@
|
||||
#!/bin/bash
|
||||
###############################################################################
|
||||
# configure-openshift-metrics.sh
|
||||
#
|
||||
# Configure an external Prometheus server to receive metrics from OpenShift.
|
||||
# Supports federation (pull) and remote write (push) modes.
|
||||
#
|
||||
# Usage:
|
||||
# sudo ./configure-openshift-metrics.sh --method federation \
|
||||
# --openshift-url ROUTE --cluster-name NAME
|
||||
#
|
||||
# sudo ./configure-openshift-metrics.sh --method remote-write \
|
||||
# --prometheus-url URL --cluster-name NAME
|
||||
#
|
||||
# Requirements:
|
||||
# - Root or sudo access on the Prometheus server
|
||||
# - oc CLI logged in with cluster-admin (unless --skip-openshift)
|
||||
# - Prometheus installed via binary (not containerized)
|
||||
#
|
||||
# https://mylinux.work/guides/openshift-metrics-to-external-prometheus/
|
||||
###############################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
VERSION="1.0"
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Defaults
|
||||
#------------------------------------------------------------------------------
|
||||
METHOD="federation"
|
||||
OPENSHIFT_URL=""
|
||||
PROMETHEUS_URL=""
|
||||
CLUSTER_NAME="openshift"
|
||||
PROMETHEUS_CONFIG="/etc/prometheus/prometheus.yml"
|
||||
PROMETHEUS_SERVICE="prometheus"
|
||||
RULES_DIR="/etc/prometheus/rules"
|
||||
TOKEN_FILE="/etc/prometheus/openshift-token"
|
||||
PROMETHEUS_USER="prometheus"
|
||||
SKIP_OPENSHIFT=false
|
||||
SKIP_RULES=false
|
||||
DRY_RUN=false
|
||||
OC_NAMESPACE="openshift-monitoring"
|
||||
SA_NAME="prometheus-external"
|
||||
TOKEN_DURATION="8760h"
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Colors and logging
|
||||
#------------------------------------------------------------------------------
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log() { echo -e "${GREEN}[openshift-metrics]${NC} $1"; }
|
||||
warn() { echo -e "${YELLOW}[openshift-metrics]${NC} $1"; }
|
||||
error() { echo -e "${RED}[openshift-metrics]${NC} $1" >&2; }
|
||||
info() { echo -e "${BLUE}[openshift-metrics]${NC} $1"; }
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Usage
|
||||
#------------------------------------------------------------------------------
|
||||
usage() {
|
||||
cat <<EOF
|
||||
configure-openshift-metrics.sh v${VERSION}
|
||||
|
||||
Configure an external Prometheus to receive OpenShift metrics.
|
||||
|
||||
Usage:
|
||||
sudo $0 [OPTIONS]
|
||||
|
||||
Options:
|
||||
--method METHOD federation or remote-write (default: federation)
|
||||
--openshift-url URL OpenShift Prometheus route hostname (federation)
|
||||
--prometheus-url URL External Prometheus URL (remote-write)
|
||||
--cluster-name NAME Label for metrics (default: openshift)
|
||||
--prometheus-config PATH Path to prometheus.yml (default: /etc/prometheus/prometheus.yml)
|
||||
--rules-dir PATH Directory for rule files (default: /etc/prometheus/rules)
|
||||
--token-file PATH Bearer token file path (default: /etc/prometheus/openshift-token)
|
||||
--skip-openshift Skip oc commands (use existing token)
|
||||
--skip-rules Skip recording/alert rule generation
|
||||
--dry-run Show what would be done without making changes
|
||||
--help Show this help message
|
||||
|
||||
Federation example:
|
||||
sudo $0 --method federation \\
|
||||
--openshift-url prometheus-k8s-openshift-monitoring.apps.cluster.example.com \\
|
||||
--cluster-name production
|
||||
|
||||
Remote write example:
|
||||
sudo $0 --method remote-write \\
|
||||
--prometheus-url https://prometheus.example.com:9090 \\
|
||||
--cluster-name production
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Parse arguments
|
||||
#------------------------------------------------------------------------------
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--method) METHOD="$2"; shift 2 ;;
|
||||
--openshift-url) OPENSHIFT_URL="$2"; shift 2 ;;
|
||||
--prometheus-url) PROMETHEUS_URL="$2"; shift 2 ;;
|
||||
--cluster-name) CLUSTER_NAME="$2"; shift 2 ;;
|
||||
--prometheus-config) PROMETHEUS_CONFIG="$2"; shift 2 ;;
|
||||
--rules-dir) RULES_DIR="$2"; shift 2 ;;
|
||||
--token-file) TOKEN_FILE="$2"; shift 2 ;;
|
||||
--skip-openshift) SKIP_OPENSHIFT=true; shift ;;
|
||||
--skip-rules) SKIP_RULES=true; shift ;;
|
||||
--dry-run) DRY_RUN=true; shift ;;
|
||||
--help|-h) usage ;;
|
||||
*) error "Unknown option: $1"; echo "Use --help for usage."; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Validation
|
||||
#------------------------------------------------------------------------------
|
||||
validate() {
|
||||
if [[ "$METHOD" != "federation" && "$METHOD" != "remote-write" ]]; then
|
||||
error "Invalid method: $METHOD (must be federation or remote-write)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$METHOD" == "federation" && -z "$OPENSHIFT_URL" ]]; then
|
||||
error "--openshift-url is required for federation mode"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$METHOD" == "remote-write" && -z "$PROMETHEUS_URL" ]]; then
|
||||
error "--prometheus-url is required for remote-write mode"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$EUID" -ne 0 ]]; then
|
||||
error "This script must be run as root or with sudo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$PROMETHEUS_CONFIG" ]]; then
|
||||
error "Prometheus config not found: $PROMETHEUS_CONFIG"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v promtool &>/dev/null; then
|
||||
warn "promtool not found — config validation will be skipped"
|
||||
fi
|
||||
|
||||
if [[ "$SKIP_OPENSHIFT" == false ]] && ! command -v oc &>/dev/null; then
|
||||
error "oc CLI not found. Install it or use --skip-openshift with an existing token"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Backup existing config
|
||||
#------------------------------------------------------------------------------
|
||||
backup_config() {
|
||||
local backup_dir
|
||||
backup_dir="$(dirname "$PROMETHEUS_CONFIG")/backups"
|
||||
mkdir -p "$backup_dir"
|
||||
|
||||
local timestamp
|
||||
timestamp=$(date +%F_%H%M%S)
|
||||
local backup_file="${backup_dir}/prometheus.yml.${timestamp}"
|
||||
|
||||
if $DRY_RUN; then
|
||||
info "[dry-run] Would backup $PROMETHEUS_CONFIG to $backup_file"
|
||||
else
|
||||
cp "$PROMETHEUS_CONFIG" "$backup_file"
|
||||
log "Backed up config to $backup_file"
|
||||
fi
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# OpenShift: Create service account and token
|
||||
#------------------------------------------------------------------------------
|
||||
setup_openshift_sa() {
|
||||
if $SKIP_OPENSHIFT; then
|
||||
if [[ -f "$TOKEN_FILE" ]]; then
|
||||
log "Using existing token from $TOKEN_FILE"
|
||||
else
|
||||
error "No token found at $TOKEN_FILE. Provide a token or remove --skip-openshift."
|
||||
exit 1
|
||||
fi
|
||||
return
|
||||
fi
|
||||
|
||||
log "Setting up OpenShift service account..."
|
||||
|
||||
# Check oc is logged in
|
||||
if ! oc whoami &>/dev/null; then
|
||||
error "Not logged into OpenShift. Run: oc login <cluster-url>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local cluster_info
|
||||
cluster_info=$(oc whoami --show-server 2>/dev/null || echo "unknown")
|
||||
log "Connected to: $cluster_info"
|
||||
|
||||
if $DRY_RUN; then
|
||||
info "[dry-run] Would create service account $SA_NAME in $OC_NAMESPACE"
|
||||
info "[dry-run] Would grant cluster-monitoring-view role"
|
||||
info "[dry-run] Would generate token with duration $TOKEN_DURATION"
|
||||
return
|
||||
fi
|
||||
|
||||
# Create service account (ignore if exists)
|
||||
if oc get serviceaccount "$SA_NAME" -n "$OC_NAMESPACE" &>/dev/null; then
|
||||
warn "Service account $SA_NAME already exists in $OC_NAMESPACE"
|
||||
else
|
||||
oc create serviceaccount "$SA_NAME" -n "$OC_NAMESPACE"
|
||||
log "Created service account: $SA_NAME"
|
||||
fi
|
||||
|
||||
# Grant cluster-monitoring-view role
|
||||
if oc get clusterrolebinding "${SA_NAME}-monitoring-view" &>/dev/null 2>&1; then
|
||||
warn "Role binding already exists"
|
||||
else
|
||||
oc adm policy add-cluster-role-to-user cluster-monitoring-view \
|
||||
-z "$SA_NAME" -n "$OC_NAMESPACE"
|
||||
log "Granted cluster-monitoring-view role"
|
||||
fi
|
||||
|
||||
# Generate token
|
||||
local token
|
||||
token=$(oc create token "$SA_NAME" -n "$OC_NAMESPACE" --duration="$TOKEN_DURATION")
|
||||
|
||||
echo "$token" > "$TOKEN_FILE"
|
||||
chmod 600 "$TOKEN_FILE"
|
||||
chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$TOKEN_FILE"
|
||||
log "Token saved to $TOKEN_FILE (expires in $TOKEN_DURATION)"
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Generate federation scrape config
|
||||
#------------------------------------------------------------------------------
|
||||
generate_federation_config() {
|
||||
cat <<YAML
|
||||
|
||||
- job_name: "openshift-federate"
|
||||
honor_labels: true
|
||||
metrics_path: /federate
|
||||
scrape_interval: 30s
|
||||
scrape_timeout: 25s
|
||||
params:
|
||||
'match[]':
|
||||
- '{job="node-exporter"}'
|
||||
- '{job="kube-state-metrics"}'
|
||||
- '{__name__=~"container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_network_.*_bytes_total|container_fs_.*_bytes"}'
|
||||
- '{__name__=~"etcd_server_leader_changes_seen_total|etcd_disk_wal_fsync_duration_seconds_bucket|etcd_mvcc_db_total_size_in_bytes"}'
|
||||
- '{__name__=~"apiserver_request_total|apiserver_request_duration_seconds_bucket"}'
|
||||
- '{__name__="up"}'
|
||||
scheme: https
|
||||
bearer_token_file: ${TOKEN_FILE}
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
static_configs:
|
||||
- targets:
|
||||
- "${OPENSHIFT_URL}"
|
||||
labels:
|
||||
cluster: ${CLUSTER_NAME}
|
||||
YAML
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Generate recording rules
|
||||
#------------------------------------------------------------------------------
|
||||
generate_recording_rules() {
|
||||
cat <<YAML
|
||||
# OpenShift recording rules — generated by configure-openshift-metrics.sh
|
||||
# Cluster: ${CLUSTER_NAME}
|
||||
groups:
|
||||
- name: openshift_recording_rules
|
||||
interval: 30s
|
||||
rules:
|
||||
- record: openshift:node_cpu_utilization:ratio
|
||||
expr: |
|
||||
1 - avg by(instance, cluster) (
|
||||
rate(node_cpu_seconds_total{mode="idle", cluster="${CLUSTER_NAME}"}[5m])
|
||||
)
|
||||
|
||||
- record: openshift:node_memory_utilization:ratio
|
||||
expr: |
|
||||
1 - (
|
||||
node_memory_MemAvailable_bytes{cluster="${CLUSTER_NAME}"}
|
||||
/ node_memory_MemTotal_bytes{cluster="${CLUSTER_NAME}"}
|
||||
)
|
||||
|
||||
- record: openshift:namespace_pod_count:sum
|
||||
expr: |
|
||||
count by(namespace, cluster) (
|
||||
kube_pod_status_phase{phase="Running", cluster="${CLUSTER_NAME}"}
|
||||
)
|
||||
|
||||
- record: openshift:namespace_cpu_usage:sum
|
||||
expr: |
|
||||
sum by(namespace, cluster) (
|
||||
rate(container_cpu_usage_seconds_total{cluster="${CLUSTER_NAME}", container!=""}[5m])
|
||||
)
|
||||
|
||||
- record: openshift:namespace_memory_usage:sum
|
||||
expr: |
|
||||
sum by(namespace, cluster) (
|
||||
container_memory_working_set_bytes{cluster="${CLUSTER_NAME}", container!=""}
|
||||
)
|
||||
YAML
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Generate alert rules
|
||||
#------------------------------------------------------------------------------
|
||||
generate_alert_rules() {
|
||||
cat <<YAML
|
||||
# OpenShift alert rules — generated by configure-openshift-metrics.sh
|
||||
# Cluster: ${CLUSTER_NAME}
|
||||
groups:
|
||||
- name: openshift_alerts
|
||||
rules:
|
||||
- alert: OpenShiftFederationDown
|
||||
expr: up{job="openshift-federate", cluster="${CLUSTER_NAME}"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "OpenShift federation target is down"
|
||||
description: "Cannot scrape metrics from OpenShift cluster {{ \$labels.cluster }} for 5 minutes."
|
||||
|
||||
- alert: OpenShiftNodeHighCPU
|
||||
expr: openshift:node_cpu_utilization:ratio > 0.9
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU on OpenShift node {{ \$labels.instance }}"
|
||||
description: "CPU usage above 90% for 10 minutes (current: {{ \$value | humanizePercentage }})."
|
||||
|
||||
- alert: OpenShiftNodeHighMemory
|
||||
expr: openshift:node_memory_utilization:ratio > 0.9
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory on OpenShift node {{ \$labels.instance }}"
|
||||
description: "Memory usage above 90% for 10 minutes (current: {{ \$value | humanizePercentage }})."
|
||||
|
||||
- alert: OpenShiftPodCrashLooping
|
||||
expr: rate(kube_pod_container_status_restarts_total{cluster="${CLUSTER_NAME}"}[15m]) * 60 * 5 > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Pod {{ \$labels.namespace }}/{{ \$labels.pod }} is crash looping"
|
||||
description: "Pod has restarted {{ \$value | humanize }} times in the last 15 minutes."
|
||||
|
||||
- alert: OpenShiftDeploymentReplicasMismatch
|
||||
expr: |
|
||||
kube_deployment_spec_replicas{cluster="${CLUSTER_NAME}"}
|
||||
!= kube_deployment_status_ready_replicas{cluster="${CLUSTER_NAME}"}
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Deployment {{ \$labels.namespace }}/{{ \$labels.deployment }} replica mismatch"
|
||||
description: "Deployment does not have expected number of ready replicas."
|
||||
|
||||
- alert: OpenShiftEtcdLeaderChanges
|
||||
expr: increase(etcd_server_leader_changes_seen_total{cluster="${CLUSTER_NAME}"}[1h]) > 3
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Frequent etcd leader changes on {{ \$labels.cluster }}"
|
||||
description: "etcd leader changed {{ \$value | humanize }} times in the last hour."
|
||||
YAML
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Apply federation configuration
|
||||
#------------------------------------------------------------------------------
|
||||
apply_federation() {
|
||||
log "Configuring federation from $OPENSHIFT_URL..."
|
||||
|
||||
# Set up OpenShift service account and token
|
||||
setup_openshift_sa
|
||||
|
||||
# Backup existing config
|
||||
backup_config
|
||||
|
||||
# Generate and append federation scrape config
|
||||
local federation_config
|
||||
federation_config=$(generate_federation_config)
|
||||
|
||||
if $DRY_RUN; then
|
||||
info "[dry-run] Would append to $PROMETHEUS_CONFIG:"
|
||||
echo "$federation_config"
|
||||
else
|
||||
# Check if the job already exists
|
||||
if grep -q 'job_name: "openshift-federate"' "$PROMETHEUS_CONFIG" 2>/dev/null; then
|
||||
warn "Federation job 'openshift-federate' already exists in $PROMETHEUS_CONFIG"
|
||||
warn "Remove the existing job first or edit it manually."
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "$federation_config" >> "$PROMETHEUS_CONFIG"
|
||||
chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$PROMETHEUS_CONFIG"
|
||||
log "Federation scrape job added to $PROMETHEUS_CONFIG"
|
||||
fi
|
||||
|
||||
# Generate rules
|
||||
if [[ "$SKIP_RULES" == false ]]; then
|
||||
generate_rules
|
||||
fi
|
||||
|
||||
# Validate and reload
|
||||
validate_and_reload
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Apply remote write configuration
|
||||
#------------------------------------------------------------------------------
|
||||
apply_remote_write() {
|
||||
log "Configuring remote write to $PROMETHEUS_URL..."
|
||||
|
||||
# Backup existing config
|
||||
backup_config
|
||||
|
||||
# Enable remote write receiver
|
||||
local service_file="/etc/systemd/system/${PROMETHEUS_SERVICE}.service"
|
||||
if [[ -f "$service_file" ]]; then
|
||||
if grep -q "web.enable-remote-write-receiver" "$service_file"; then
|
||||
log "Remote write receiver already enabled"
|
||||
else
|
||||
if $DRY_RUN; then
|
||||
info "[dry-run] Would add --web.enable-remote-write-receiver to $service_file"
|
||||
else
|
||||
warn "You need to add --web.enable-remote-write-receiver to your Prometheus service."
|
||||
warn "Edit $service_file and add the flag to ExecStart, then run:"
|
||||
warn " sudo systemctl daemon-reload && sudo systemctl restart prometheus"
|
||||
echo ""
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Generate basic auth credentials
|
||||
local rw_password
|
||||
rw_password=$(openssl rand -base64 24 2>/dev/null || head -c 24 /dev/urandom | base64)
|
||||
local rw_user="openshift"
|
||||
|
||||
log "Generated remote write credentials:"
|
||||
log " Username: $rw_user"
|
||||
log " Password: $rw_password"
|
||||
echo ""
|
||||
|
||||
# Generate web.yml with basic auth
|
||||
local web_config_file
|
||||
web_config_file="$(dirname "$PROMETHEUS_CONFIG")/web.yml"
|
||||
|
||||
if command -v htpasswd &>/dev/null; then
|
||||
local hash
|
||||
hash=$(htpasswd -nbBC 12 "" "$rw_password" | tr -d ':\n')
|
||||
|
||||
if $DRY_RUN; then
|
||||
info "[dry-run] Would create $web_config_file with basic_auth_users"
|
||||
else
|
||||
if [[ -f "$web_config_file" ]]; then
|
||||
warn "$web_config_file already exists — add this entry manually:"
|
||||
echo " $rw_user: \"$hash\""
|
||||
else
|
||||
cat > "$web_config_file" <<EOF
|
||||
basic_auth_users:
|
||||
${rw_user}: "${hash}"
|
||||
EOF
|
||||
chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$web_config_file"
|
||||
chmod 600 "$web_config_file"
|
||||
log "Created $web_config_file"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
warn "htpasswd not found — install apache2-utils (Debian) or httpd-tools (RHEL)"
|
||||
warn "Then generate a hash: htpasswd -nbBC 12 '' 'PASSWORD'"
|
||||
fi
|
||||
|
||||
# Print OpenShift-side commands
|
||||
echo ""
|
||||
log "Run the following on your OpenShift cluster:"
|
||||
echo ""
|
||||
echo " # Create the auth secret"
|
||||
echo " oc create secret generic remote-write-auth \\"
|
||||
echo " -n openshift-monitoring \\"
|
||||
echo " --from-literal=username=${rw_user} \\"
|
||||
echo " --from-literal=password='${rw_password}'"
|
||||
echo ""
|
||||
echo " # Apply the remote write config"
|
||||
echo " oc apply -f - <<'OCEOF'"
|
||||
|
||||
cat <<OCEOF
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: cluster-monitoring-config
|
||||
namespace: openshift-monitoring
|
||||
data:
|
||||
config.yaml: |
|
||||
prometheusK8s:
|
||||
remoteWrite:
|
||||
- url: "${PROMETHEUS_URL}/api/v1/write"
|
||||
basicAuth:
|
||||
username:
|
||||
name: remote-write-auth
|
||||
key: username
|
||||
password:
|
||||
name: remote-write-auth
|
||||
key: password
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
writeRelabelConfigs:
|
||||
- sourceLabels: [__name__]
|
||||
regex: "node_.*|kube_.*|container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_network_.*_bytes_total|container_fs_.*_bytes|etcd_.*|apiserver_request_total|apiserver_request_duration_seconds_bucket|up"
|
||||
action: keep
|
||||
- regex: "prometheus_replica"
|
||||
action: labeldrop
|
||||
queueConfig:
|
||||
maxSamplesPerSend: 5000
|
||||
batchSendDeadline: 5s
|
||||
maxShards: 10
|
||||
OCEOF
|
||||
echo "OCEOF"
|
||||
echo ""
|
||||
|
||||
# Generate rules
|
||||
if [[ "$SKIP_RULES" == false ]]; then
|
||||
generate_rules
|
||||
fi
|
||||
|
||||
# Validate and reload
|
||||
validate_and_reload
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Generate recording and alert rules
|
||||
#------------------------------------------------------------------------------
|
||||
generate_rules() {
|
||||
log "Generating recording and alert rules..."
|
||||
|
||||
if $DRY_RUN; then
|
||||
info "[dry-run] Would create $RULES_DIR/openshift-rules.yml"
|
||||
info "[dry-run] Would create $RULES_DIR/openshift-alerts.yml"
|
||||
echo ""
|
||||
info "Recording rules:"
|
||||
generate_recording_rules
|
||||
echo ""
|
||||
info "Alert rules:"
|
||||
generate_alert_rules
|
||||
return
|
||||
fi
|
||||
|
||||
mkdir -p "$RULES_DIR"
|
||||
|
||||
generate_recording_rules > "$RULES_DIR/openshift-rules.yml"
|
||||
chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$RULES_DIR/openshift-rules.yml"
|
||||
log "Created $RULES_DIR/openshift-rules.yml"
|
||||
|
||||
generate_alert_rules > "$RULES_DIR/openshift-alerts.yml"
|
||||
chown "$PROMETHEUS_USER":"$PROMETHEUS_USER" "$RULES_DIR/openshift-alerts.yml"
|
||||
log "Created $RULES_DIR/openshift-alerts.yml"
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Validate config and reload Prometheus
|
||||
#------------------------------------------------------------------------------
|
||||
validate_and_reload() {
|
||||
if $DRY_RUN; then
|
||||
info "[dry-run] Would validate config and reload Prometheus"
|
||||
return
|
||||
fi
|
||||
|
||||
# Validate with promtool
|
||||
if command -v promtool &>/dev/null; then
|
||||
log "Validating Prometheus configuration..."
|
||||
|
||||
if ! promtool check config "$PROMETHEUS_CONFIG"; then
|
||||
error "Config validation failed. Restoring backup..."
|
||||
local backup_dir
|
||||
backup_dir="$(dirname "$PROMETHEUS_CONFIG")/backups"
|
||||
local latest_backup
|
||||
latest_backup=$(ls -t "$backup_dir"/prometheus.yml.* 2>/dev/null | head -1)
|
||||
if [[ -n "$latest_backup" ]]; then
|
||||
cp "$latest_backup" "$PROMETHEUS_CONFIG"
|
||||
log "Restored from $latest_backup"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
log "Config validation passed"
|
||||
|
||||
# Validate rules
|
||||
if [[ "$SKIP_RULES" == false ]]; then
|
||||
for rule_file in "$RULES_DIR"/openshift-*.yml; do
|
||||
if [[ -f "$rule_file" ]]; then
|
||||
if ! promtool check rules "$rule_file"; then
|
||||
error "Rule validation failed: $rule_file"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
log "Rule validation passed"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Reload Prometheus
|
||||
if systemctl is-active --quiet "$PROMETHEUS_SERVICE"; then
|
||||
systemctl reload "$PROMETHEUS_SERVICE" 2>/dev/null || \
|
||||
systemctl restart "$PROMETHEUS_SERVICE"
|
||||
log "Prometheus reloaded"
|
||||
else
|
||||
warn "Prometheus service is not running. Start it with: sudo systemctl start $PROMETHEUS_SERVICE"
|
||||
fi
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Print summary
|
||||
#------------------------------------------------------------------------------
|
||||
print_summary() {
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo " OpenShift Metrics Configuration Complete"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo " Method: $METHOD"
|
||||
echo " Cluster name: $CLUSTER_NAME"
|
||||
|
||||
if [[ "$METHOD" == "federation" ]]; then
|
||||
echo " OpenShift URL: $OPENSHIFT_URL"
|
||||
echo " Token file: $TOKEN_FILE"
|
||||
else
|
||||
echo " Prometheus URL: $PROMETHEUS_URL"
|
||||
fi
|
||||
|
||||
echo " Config file: $PROMETHEUS_CONFIG"
|
||||
|
||||
if [[ "$SKIP_RULES" == false ]]; then
|
||||
echo " Rules dir: $RULES_DIR"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo " Verify:"
|
||||
echo " - Check targets: http://localhost:9090/targets"
|
||||
|
||||
if [[ "$METHOD" == "federation" ]]; then
|
||||
echo " - Test query: node_memory_MemAvailable_bytes{cluster=\"${CLUSTER_NAME}\"}"
|
||||
else
|
||||
echo " - Test query: up{cluster=\"${CLUSTER_NAME}\"}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Main
|
||||
#------------------------------------------------------------------------------
|
||||
main() {
|
||||
echo ""
|
||||
log "configure-openshift-metrics.sh v${VERSION}"
|
||||
echo ""
|
||||
|
||||
validate
|
||||
|
||||
if $DRY_RUN; then
|
||||
warn "DRY RUN — no changes will be made"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
case "$METHOD" in
|
||||
federation) apply_federation ;;
|
||||
remote-write) apply_remote_write ;;
|
||||
esac
|
||||
|
||||
if ! $DRY_RUN; then
|
||||
print_summary
|
||||
fi
|
||||
|
||||
log "Done."
|
||||
}
|
||||
|
||||
main
|
||||
Reference in New Issue
Block a user