#!/bin/bash ##################################################### ### ### ### Description: Expose Wickr messaging server ### ### metrics for monitoring Wickr IO Gateway ### ### and service health. ### ### ### ### Metrics Generated: ### ### Infrastructure Metrics: ### ### - wickr_docker_container_status ### ### - wickr_api_status ### ### - wickr_network_connectivity ### ### - wickr_log_errors_total ### ### - wickr_log_warnings_total ### ### - wickr_docker_cpu_percent ### ### - wickr_docker_memory_usage_bytes ### ### - wickr_docker_memory_limit_bytes ### ### ### ### Application Metrics: ### ### - wickr_messages_sent_total ### ### - wickr_messages_received_total ### ### - wickr_messages_pending ### ### - wickr_messages_queued ### ### - wickr_messages_callback_pending ### ### - wickr_send_errors_total ### ### - wickr_receive_errors_total ### ### - wickr_outbox_sync_total ### ### ### ### Phil Connor contact@mylinux.work ### ### Version 1.01.0.20250827 ### ### ### ##################################################### # Exit on any error, treat unset variables as errors, and fail pipes on first failure set -euo pipefail # Get absolute path to this script for cron job installation readonly SCRIPT_PATH="$(readlink -f "$0")" # Configuration with defaults - can be overridden by environment variables readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User to install cron job under readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the metrics directory readonly LOCK_DIR="${LOCK_DIR:-/var/run}" # Directory for lock files to prevent concurrent runs readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/5 * * * *}" # Cron schedule - every 5 minutes by default readonly WICKR_DOCKER_NAME="${WICKR_DOCKER_NAME:-wickr-io}" # Docker container name for Wickr IO Gateway readonly WICKR_API_URL="${WICKR_API_URL:-http://localhost:8080}" # Wickr IO REST API URL readonly WICKR_LOG_PATH="${WICKR_LOG_PATH:-/var/log/wickr}" # Wickr logs directory readonly WICKR_API_KEY="${WICKR_API_KEY:-}" # Wickr IO API key for statistics readonly WICKR_STATS_ENDPOINT="${WICKR_STATS_ENDPOINT:-/WickrIO/V1/Apps}" # Statistics API endpoint # Status codes used in Prometheus metrics readonly STATUS_SUCCESS=1 # Service is working correctly readonly STATUS_FAILURE=0 # Service has failed or is not responding readonly STATUS_NOT_FOUND=2 # Service/command not found on system # Error handling function that logs to stderr and exits with specified code handle_error() { echo "ERROR: $1" >&2 exit "${2:-1}" } # Logging function with timestamp and level log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$1] $2" } # Find a command in PATH or fallback directories find_command() { local cmd="$1" shift local fallback_paths=("$@") # First try to find command in PATH if command -v "$cmd" &>/dev/null; then command -v "$cmd" return 0 fi # If not in PATH, check fallback directories for path in "${fallback_paths[@]}"; do local full_path="$path/$cmd" [[ -x "$full_path" ]] && { echo "$full_path" return 0 } done # Command not found anywhere handle_error "Could not find '$cmd' executable" } # Set up file locking to prevent multiple instances running setup_lock() { [[ ! -d "$LOCK_DIR" ]] && handle_error "Lock directory does not exist: $LOCK_DIR" # Clean up old lock files (older than 60 minutes) find "$LOCK_DIR" -name "wicker_info.*" -type f -mmin +60 -delete 2>/dev/null || true # Create unique lock file lockfile=$(mktemp -p "$LOCK_DIR" wicker_info.XXXXXX) || handle_error "Failed to create lock file" # Open lock file on file descriptor 9 and attempt to lock it exec 9>"$lockfile" flock -n 9 || handle_error "Script is already running" # Set up cleanup trap to release lock and remove file on exit trap 'flock -u 9; exec 9>&-; rm -f "$lockfile"' EXIT INT TERM } # Ensure the Node Exporter directory exists and is writable setup_directories() { [[ -d "$NODE_EXPORTER_DIR" ]] && return 0 # Create directory if running as root if [[ "$(id -u)" == "0" ]]; then mkdir -p "$NODE_EXPORTER_DIR" chown "$PROMETHEUS_USER:" "$NODE_EXPORTER_DIR" 2>/dev/null || true fi # Verify the directory is writable [[ ! -w "$NODE_EXPORTER_DIR" ]] && handle_error "$NODE_EXPORTER_DIR is not writable" } # Install cron job for periodic execution install_cron_job() { # Check if cron job already exists crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH" && return 0 local temp_cron temp_cron=$(mktemp) { crontab -l 2>/dev/null || true echo "$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/wickr_status.prom 2>&1" } > "$temp_cron" if crontab -u "$CRONTAB_USER" "$temp_cron"; then log "INFO" "Cron job installed successfully" else rm -f "$temp_cron" handle_error "Failed to install cron job" fi rm -f "$temp_cron" } # Check Docker container status check_docker_container() { local docker_path docker_path=$(find_command docker /usr/bin /usr/local/bin 2>/dev/null) || { echo $STATUS_NOT_FOUND return } # Check if Wickr container is running if "$docker_path" ps --format "table {{.Names}}\t{{.Status}}" | grep -q "$WICKR_DOCKER_NAME.*Up"; then echo $STATUS_SUCCESS else echo $STATUS_FAILURE fi } # Get Docker container resource usage get_docker_stats() { local docker_path docker_path=$(find_command docker /usr/bin /usr/local/bin 2>/dev/null) || return # Get container stats in JSON format if "$docker_path" stats --no-stream --format "{{json .}}" "$WICKR_DOCKER_NAME" 2>/dev/null; then return 0 fi return 1 } # Check Wickr IO REST API health check_wickr_api() { local curl_path curl_path=$(find_command curl /usr/bin /usr/local/bin 2>/dev/null) || { echo $STATUS_NOT_FOUND return } # Try to reach the API endpoint with a short timeout if "$curl_path" -s --max-time 5 "$WICKR_API_URL/health" >/dev/null 2>&1; then echo $STATUS_SUCCESS else echo $STATUS_FAILURE fi } # Count error messages in Wickr logs count_log_errors() { [[ ! -d "$WICKR_LOG_PATH" ]] && { echo 0; return; } # Count ERROR and WARN messages from the last hour local error_count=0 local warn_count=0 if command -v journalctl &>/dev/null; then # Use systemd journal if available error_count=$(journalctl -u wickr --since "1 hour ago" --grep "ERROR" --no-pager -q | wc -l 2>/dev/null || echo 0) warn_count=$(journalctl -u wickr --since "1 hour ago" --grep "WARN" --no-pager -q | wc -l 2>/dev/null || echo 0) elif [[ -f "$WICKR_LOG_PATH/wickr.log" ]]; then # Parse log files directly error_count=$(grep -c "ERROR" "$WICKR_LOG_PATH/wickr.log" 2>/dev/null || true) warn_count=$(grep -c "WARN" "$WICKR_LOG_PATH/wickr.log" 2>/dev/null || true) fi echo "$error_count $warn_count" } # Check network connectivity to Wickr services check_network_connectivity() { local nc_path nc_path=$(find_command nc /usr/bin /usr/local/bin 2>/dev/null) || { echo $STATUS_NOT_FOUND return } # Test connection to common Wickr ports (443 for HTTPS API) if echo | "$nc_path" -w 3 api.wickr.com 443 >/dev/null 2>&1; then echo $STATUS_SUCCESS else echo $STATUS_FAILURE fi } # Get Wickr IO application statistics via REST API get_wickr_statistics() { # Return empty values if API key not configured [[ -z "$WICKR_API_KEY" ]] && { echo "0 0 0 0 0 0 0 0" return } local curl_path curl_path=$(find_command curl /usr/bin /usr/local/bin 2>/dev/null) || { echo "0 0 0 0 0 0 0 0" return } # Construct full statistics URL local stats_url="$WICKR_API_URL$WICKR_STATS_ENDPOINT/$WICKR_API_KEY/Statistics" # Fetch statistics with timeout local stats_json stats_json=$("$curl_path" -s --max-time 10 -H "Content-Type: application/json" "$stats_url" 2>/dev/null) # Parse JSON response if jq is available if command -v jq &>/dev/null && [[ -n "$stats_json" ]]; then local sent received pending queued callback_pending send_errors recv_errors outbox_sync sent=$(echo "$stats_json" | jq -r '.sent // 0' 2>/dev/null) received=$(echo "$stats_json" | jq -r '.received // 0' 2>/dev/null) pending=$(echo "$stats_json" | jq -r '.pending_messages // 0' 2>/dev/null) queued=$(echo "$stats_json" | jq -r '.message_count // 0' 2>/dev/null) callback_pending=$(echo "$stats_json" | jq -r '.pending_callback_messages // 0' 2>/dev/null) send_errors=$(echo "$stats_json" | jq -r '.sent_errors // 0' 2>/dev/null) recv_errors=$(echo "$stats_json" | jq -r '.recv_errors // 0' 2>/dev/null) outbox_sync=$(echo "$stats_json" | jq -r '.outbox_sync // 0' 2>/dev/null) echo "$sent $received $pending $queued $callback_pending $send_errors $recv_errors $outbox_sync" else # Fallback: try basic grep parsing if jq not available if [[ -n "$stats_json" ]]; then local sent received pending queued callback_pending send_errors recv_errors outbox_sync sent=$(echo "$stats_json" | grep -o '"sent":[0-9]*' | cut -d':' -f2 2>/dev/null || echo 0) received=$(echo "$stats_json" | grep -o '"received":[0-9]*' | cut -d':' -f2 2>/dev/null || echo 0) pending=$(echo "$stats_json" | grep -o '"pending_messages":[0-9]*' | cut -d':' -f2 2>/dev/null || echo 0) queued=$(echo "$stats_json" | grep -o '"message_count":[0-9]*' | cut -d':' -f2 2>/dev/null || echo 0) callback_pending=$(echo "$stats_json" | grep -o '"pending_callback_messages":[0-9]*' | cut -d':' -f2 2>/dev/null || echo 0) send_errors=$(echo "$stats_json" | grep -o '"sent_errors":[0-9]*' | cut -d':' -f2 2>/dev/null || echo 0) recv_errors=$(echo "$stats_json" | grep -o '"recv_errors":[0-9]*' | cut -d':' -f2 2>/dev/null || echo 0) outbox_sync=$(echo "$stats_json" | grep -o '"outbox_sync":[0-9]*' | cut -d':' -f2 2>/dev/null || echo 0) echo "$sent $received $pending $queued $callback_pending $send_errors $recv_errors $outbox_sync" else echo "0 0 0 0 0 0 0 0" fi fi } # Output a Prometheus metric in the correct format output_metric() { local name="$1" value="$2" help="$3" type="$4" labels="${5:-}" # Output in Prometheus exposition format cat << EOF # HELP $name $help # TYPE $name $type $name${labels:+{$labels}} $value EOF } # Main function that orchestrates the metric collection process main() { # Set up file locking to prevent concurrent execution setup_lock # Ensure output directory exists and is writable setup_directories # Install cron job for periodic execution [[ -f "$SCRIPT_PATH" ]] && install_cron_job # Collect Wickr status metrics local docker_status api_status network_status docker_status=$(check_docker_container) api_status=$(check_wickr_api) network_status=$(check_network_connectivity) # Get log error counts read -r error_count warn_count <<< "$(count_log_errors)" # Get Wickr application statistics read -r sent received pending queued callback_pending send_errors recv_errors outbox_sync <<< "$(get_wickr_statistics)" # Output metrics in Prometheus format output_metric "wickr_docker_container_status" "$docker_status" \ "Shows if Wickr IO Docker container is running (1=running, 0=stopped, 2=not_found)" "gauge" output_metric "wickr_api_status" "$api_status" \ "Shows if Wickr IO REST API is responding (1=healthy, 0=unhealthy, 2=not_found)" "gauge" output_metric "wickr_network_connectivity" "$network_status" \ "Shows if server can connect to Wickr external services (1=connected, 0=failed, 2=not_found)" "gauge" output_metric "wickr_log_errors_total" "${error_count:-0}" \ "Total number of ERROR messages in Wickr logs in the last hour" "counter" output_metric "wickr_log_warnings_total" "${warn_count:-0}" \ "Total number of WARN messages in Wickr logs in the last hour" "counter" # Output Wickr application metrics output_metric "wickr_messages_sent_total" "${sent:-0}" \ "Total number of messages sent by Wickr IO client" "counter" output_metric "wickr_messages_received_total" "${received:-0}" \ "Total number of messages received by Wickr IO client" "counter" output_metric "wickr_messages_pending" "${pending:-0}" \ "Number of messages queued to be sent from Wickr IO client" "gauge" output_metric "wickr_messages_queued" "${queued:-0}" \ "Number of received messages currently queued in Wickr IO client" "gauge" output_metric "wickr_messages_callback_pending" "${callback_pending:-0}" \ "Number of messages waiting to be sent to callback process" "gauge" output_metric "wickr_send_errors_total" "${send_errors:-0}" \ "Total number of errors encountered during message transmission" "counter" output_metric "wickr_receive_errors_total" "${recv_errors:-0}" \ "Total number of errors encountered during message reception" "counter" output_metric "wickr_outbox_sync_total" "${outbox_sync:-0}" \ "Total number of outbox sync messages received (multi-device)" "counter" # Try to get Docker container resource usage if docker_stats=$(get_docker_stats); then # Parse CPU and memory usage from Docker stats JSON if command -v jq &>/dev/null; then cpu_percent=$(echo "$docker_stats" | jq -r '.CPUPerc' | tr -d '%' 2>/dev/null || echo 0) mem_usage=$(echo "$docker_stats" | jq -r '.MemUsage' | cut -d'/' -f1 | tr -d 'MiB' 2>/dev/null || echo 0) mem_limit=$(echo "$docker_stats" | jq -r '.MemUsage' | cut -d'/' -f2 | tr -d 'MiB' 2>/dev/null || echo 0) output_metric "wickr_docker_cpu_percent" "${cpu_percent:-0}" \ "CPU usage percentage of Wickr Docker container" "gauge" output_metric "wickr_docker_memory_usage_bytes" "$((${mem_usage:-0} * 1024 * 1024))" \ "Memory usage of Wickr Docker container in bytes" "gauge" output_metric "wickr_docker_memory_limit_bytes" "$((${mem_limit:-0} * 1024 * 1024))" \ "Memory limit of Wickr Docker container in bytes" "gauge" fi fi } # Execute main function with all script arguments main "$@"