#! /bin/bash ############################################################# #### Expose metrics from PCOIP for Prometheus #### #### Redhat/Amazon/Ubuntu Workstations. #### #### #### #### Author: Phil Connor 03/27/2024 #### #### Contact: contact@mylinux.work #### #### Version 2.18.20250731 #### ############################################################# # This script extracts metrics from PCoIP (PC over IP) log files and formats them # for consumption by Prometheus node_exporter. It monitors virtual desktop performance # including memory usage, image quality, bandwidth, network latency, and packet loss. #### System Variables #### pcoip_log='/var/log/pcoip-agent' # PCOIP Log file (Default is /var/log/pcoip-agent) DEBUG="${DEBUG:-0}" # Debug mode (Default is 0) - Set to 1 to enable debug logging #### Defaults #### # Function to declare default values for all configuration variables # These can be overridden by environment variables declare_defaults() { { CRONTAB_USER="${CRONTAB_USER:-root}" # User to run cron jobs as (Default is root) CRON_NAME="${CRON_NAME:-pcoip_metric.sh}" # Cron job name (Default is directory_size) UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/3 * * * * }" # Cron interval (Default is every 3 minutes) NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Node Exporter textfile directory for .prom files PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # System user that runs Prometheus/node_exporter LOCK_DIR="${LOCK_DIR:-/var/run}" # Directory for lock files to prevent concurrent runs SCRIPT_PATH="$(readlink -f "$0")" # Full path to this script for cron installation } } #### Error logging functions #### # Centralized error handling function that logs to stderr and exits handle_error() { { local err_msg="$1" local exit_code="${2:-1}" # Default exit code is 1 if not specified echo "ERROR: $err_msg" >&2 exit "$exit_code" } } # Logging function with level-based filtering # DEBUG messages only appear when DEBUG=1 environment variable is set log() { { local level="$1" local message="$2" # Only show DEBUG messages when debug mode is enabled if [[ "$level" != "DEBUG" || "$DEBUG" -eq 1 ]]; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message" >&2 fi } } #### Testing Log validation #### # Function to validate that log files exist and match the expected pattern # Returns 1 if no files found, 0 if files exist validate_log_file() { { local log_dir="$1" local pattern="$2" local silent="${3:-false}" # Enable nullglob to handle empty glob matches safely # This prevents the pattern from expanding to itself if no matches shopt -s nullglob local files=("$log_dir"/$pattern) shopt -u nullglob # Check if any files were found matching the pattern if [[ ${#files[@]} -eq 0 ]]; then if [[ "$silent" != "true" ]]; then log "WARNING" "No valid log files found matching $log_dir/$pattern" fi return 1 fi return 0 } } #### Testing Timeout function (log) #### # Function to execute commands with a timeout to prevent hanging # Used for log processing commands that might get stuck timeout_command() { { local timeout_seconds="$1" local command="$2" # Use the timeout command to limit execution time timeout "$timeout_seconds" bash -c "$command" || { log "WARNING" "Command timed out after $timeout_seconds seconds: $command" return 1 } } } ### System Check ### # Ensure script is running as root for proper file access and cron installation if [ ! "$(id -u)" -eq 0 ]; then handle_error "This script must be run as root" 1; fi #### Create, Check and Delete Lock File ### # Create a temporary lock file to prevent multiple instances running simultaneously LOCKFILE=$(mktemp -p "$LOCK_DIR" pcoip_stats.XXXXXX) || handle_error "Failed to create lock file" 1 exec 9>"$LOCKFILE" # Open file descriptor 9 for the lock file # Try to acquire an exclusive, non-blocking lock if ! flock -n 9; then handle_error "Script is already running (unable to acquire lock)" 1 fi # Clean up any stale lock files older than 60 minutes find "$LOCKFILE" -mmin +60 -delete 2>/dev/null # Cleanup function to release lock and remove lock file on exit cleanup() { { flock -u 9 # Unlock the file exec 9>&- # Close the file descriptor rm -f "$LOCKFILE" # Remove the lock file # log "Cleanup completed" } } # Set up trap to ensure cleanup runs on script exit/termination trap cleanup EXIT INT TERM PIPE #### Code Starts Here #### declare_defaults # Initialize default variables # Capture script start time for runtime metric calculation SCRIPT_START_TIME=$(date +%s) # Function to find executable commands with fallback paths # This ensures the script works across different Linux distributions # where commands might be in different locations find_command() { { local command_name="$1" local fallback_paths=("${@:2}") # Remaining arguments are fallback paths local path # First try to find command in PATH path=$(command -v "$command_name") if [ -z "$path" ]; then # If not found in PATH, try fallback locations for path in "${fallback_paths[@]}"; do if [ -x "$path/$command_name" ]; then echo "$path/$command_name" return 0 fi done handle_error "Could not find '$command_name' executable (even after fallback)." 1 fi # Verify the found command is executable if [ ! -x "$path" ]; then handle_error "Found '$command_name' at '$path' but it's not executable." 1 fi echo "$path" } } # Find and validate all required command paths # This section locates all Unix utilities needed for log parsing and metric extraction AWK_PATH=$(find_command awk /usr/bin) if [ -z "$AWK_PATH" ]; then handle_error "awk path is empty" 1; fi CUT_PATH=$(find_command cut /usr/bin) if [ -z "$CUT_PATH" ]; then handle_error "cut path is empty" 1; fi GREP_PATH=$(find_command grep /usr/bin) if [ -z "$GREP_PATH" ]; then handle_error "grep path is empty" 1; fi REALPATH_PATH=$(find_command realpath /usr/bin) if [ -z "$REALPATH_PATH" ]; then handle_error "realpath path is empty" 1; fi SED_PATH=$(find_command sed /usr/bin) if [ -z "$SED_PATH" ]; then handle_error "sed path is empty" 1; fi TAIL_PATH=$(find_command tail /usr/bin) if [ -z "$TAIL_PATH" ]; then handle_error "tail path is empty" 1; fi TR_PATH=$(find_command tr /usr/bin) if [ -z "$TR_PATH" ]; then handle_error "tr path is empty" 1; fi # Validation for critical environment variables [[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && handle_error "Required environment variables not set" 1 [[ -z "$NODE_EXPORTER_DIR" || ! -d "$NODE_EXPORTER_DIR" ]] && handle_error "NODE_EXPORTER_DIR is not set or not a valid directory" 1 #### Validate Lockfile #### # Ensure the lock file we created still exists (sanity check) if [[ ! -f $LOCKFILE ]]; then handle_error "No lockfile exists" 1 fi ### Check for .prom Directory #### # Create the node_exporter textfile directory if it doesn't exist if [ ! -d "$NODE_EXPORTER_DIR" ]; then if [ "$(id -u)" = "0" ]; then # Check if running as root mkdir -p "$NODE_EXPORTER_DIR" # Set ownership to prometheus user for proper access chown prometheus:prometheus "$NODE_EXPORTER_DIR" 2>/dev/null || true fi fi # Verify we can write to the output directory if [ ! -w "$NODE_EXPORTER_DIR" ]; then handle_error "$NODE_EXPORTER_DIR is not writable." 1 fi #### Check for Cron Job #### # Auto-install cron job if this script exists and cron entry is missing if [ -f "$SCRIPT_PATH" ] && ! crontab -l | grep -q "$SCRIPT_PATH"; then # Attempt to add cron job to run script at specified interval if ! (echo -e "$(crontab -u "$CRONTAB_USER" -l 2>/dev/null || echo '')\n$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/pcoip_metrics.prom 2>/dev/null" | crontab -u "$CRONTAB_USER" -); then log "WARNING" "Failed to install cron job: $CRON_NAME. Verify permissions for user: $CRONTAB_USER." else log "INFO" "Cron job installed successfully: $CRON_NAME." fi fi #### Metrics output functions #### # Function to sanitize text for UTF-8 compliance and remove control characters # Ensures Prometheus can properly parse the output sanitize_utf8() { { local input="$1" # Remove non-UTF8 characters and control characters except newlines echo "$input" | iconv -f utf-8 -t utf-8 -c | tr -d '\r' | sed 's/[[:cntrl:]]//g' } } # Function to output Prometheus metrics in the correct format # Follows the Prometheus exposition format specification output_metric() { { local metric_name="$1" local metric_value="$2" local help_text="$3" local metric_type="${4:-gauge}" # Default to gauge if type not specified # Sanitize all text for UTF-8 compliance metric_name="$(sanitize_utf8 "$metric_name")" help_text="$(sanitize_utf8 "$help_text")" # Ensure metric value is numeric and valid, default to 0 if invalid if [[ ! "$metric_value" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then metric_value="0" fi # Output in Prometheus format: HELP, TYPE, then the metric echo "# HELP $metric_name $help_text" echo "# TYPE $metric_name $metric_type" echo "$metric_name $metric_value" } } # Function to validate and sanitize metric values # Returns a default value if the input is not a valid number validate_metric_value() { { local value="$1" local default="${2:-0}" # Default to 0 if no default specified # Check if value is empty or not a valid number if [[ -z "$value" || ! "$value" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then echo "$default" else echo "$value" fi } } # Function to collect memory-related metrics from PCoIP logs # Extracts available and total memory information collect_memory_metrics() { { local log_pattern="$1" local default_values="${2:-false}" if [[ "$default_values" == "true" ]]; then # Output default metrics when logs are not available output_metric "node_teradici_pcoip_memory_mb_avail" "0" "PCoIP Mbytes Available Memory." output_metric "node_teradici_pcoip_memory_mb_total" "0" "PCoIP MBytes Total Memory." return fi # Extract memory statistics: grep for "memory", clean up formatting, get latest entry local mem_stats="$($GREP_PATH memory "$log_pattern" | $SED_PATH 's/.*memory =[\t ]*//;s/,//g' | $TR_PATH "/" " " | $TAIL_PATH -n 1)" if [[ -n "$mem_stats" ]]; then # Parse available memory (first field, remove MB suffix) local mem_avail="$(echo "$mem_stats" | $AWK_PATH '{gsub(/MB/,"",$1)} {print $1}')" # Parse total memory (second field, remove MB suffix) local mem_total="$(echo "$mem_stats" | $AWK_PATH '{gsub(/MB/,"",$2)} {print $2}')" # Validate and sanitize the extracted values mem_avail="$(validate_metric_value "$mem_avail")" mem_total="$(validate_metric_value "$mem_total")" # Output the memory metrics output_metric "node_teradici_pcoip_memory_mb_avail" "$mem_avail" "PCoIP Mbytes Available Memory." output_metric "node_teradici_pcoip_memory_mb_total" "$mem_total" "PCoIP MBytes Total Memory." else # Output default metrics when no data found in logs output_metric "node_teradici_pcoip_memory_mb_avail" "0" "PCoIP Mbytes Available Memory." output_metric "node_teradici_pcoip_memory_mb_total" "0" "PCoIP MBytes Total Memory." fi } } # Function to collect image quality and performance metrics from PCoIP logs # Extracts frame rates, image quality, pixel throughput, and RTT statistics collect_image_quality_metrics() { { local log_pattern="$1" local default_values="${2:-false}" if [[ "$default_values" == "true" ]]; then # Output default metrics when logs are not available output_metric "node_teradici_pcoip_imaging_frames_persec" "0" "PCoIP active image Frames Per Second." output_metric "node_teradici_pcoip_imaging_active_min_quality" "0" "PCoIP Min active image quality Percent." output_metric "node_teradici_pcoip_imaging_pixel_persec" "0" "PCoIP image Pixel PerSec." output_metric "node_teradici_pcoip_imaging_bits_persec" "0" "PCoIP image Bits PerSec." output_metric "node_teradici_pcoip_imaging_megapixel_persec" "0" "PCoIP image MegaPixel PerSec." output_metric "node_teradici_pcoip_imaging_rtt_last" "0" "PCoIP image RTT Last value." output_metric "node_teradici_pcoip_imaging_rtt_min" "0" "PCoIP image RTT Min value." output_metric "node_teradici_pcoip_imaging_rtt_max" "0" "PCoIP image RTT Max value." output_metric "node_teradici_pcoip_imaging_rtt_avg" "0" "PCoIP image RTT AVG value." return fi # Extract different types of image-related log entries local img_bits="$($GREP_PATH bits/pixel "$log_pattern" | $SED_PATH 's/.*(SoftIPC): [\t ]*//;s/,//g' | $TAIL_PATH -n 1)" local img_stats="$($GREP_PATH fps "$log_pattern" | $SED_PATH 's/.*(SoftIPC): [\t ]*//;s/,//g' | $TAIL_PATH -n 1)" local img_rtt="$($GREP_PATH Imaging "$log_pattern" | $SED_PATH 's/.*:Imaging [\t ]*//;s/,//g' | $TAIL_PATH -n 1)" # Process frame rate and quality statistics if [[ -n "$img_stats" ]]; then # Extract frames per second (4th field) and image quality (6th field) local img_frames="$(echo "$img_stats" | $AWK_PATH '{print $4}')" local img_quality="$(echo "$img_stats" | $AWK_PATH '{print $6}')" img_frames="$(validate_metric_value "$img_frames")" img_quality="$(validate_metric_value "$img_quality")" output_metric "node_teradici_pcoip_imaging_frames_persec" "$img_frames" "PCoIP active image Frames Per Second." output_metric "node_teradici_pcoip_imaging_active_min_quality" "$img_quality" "PCoIP Min active image quality Percent." else output_metric "node_teradici_pcoip_imaging_frames_persec" "0" "PCoIP active image Frames Per Second." output_metric "node_teradici_pcoip_imaging_active_min_quality" "0" "PCoIP Min active image quality Percent." fi # Process pixel and bandwidth statistics if [[ -n "$img_bits" ]]; then # Extract pixel rate, bits rate, and megapixel rate from different fields local img_pixel="$(echo "$img_bits" | $AWK_PATH '{print $3}')" local img_imgb="$(echo "$img_bits" | $AWK_PATH '{print $6}')" local img_mpix="$(echo "$img_bits" | $AWK_PATH '{print $9}')" img_pixel="$(validate_metric_value "$img_pixel")" img_imgb="$(validate_metric_value "$img_imgb")" img_mpix="$(validate_metric_value "$img_mpix")" output_metric "node_teradici_pcoip_imaging_pixel_persec" "$img_pixel" "PCoIP image Pixel PerSec." output_metric "node_teradici_pcoip_imaging_bits_persec" "$img_imgb" "PCoIP image Bits PerSec." output_metric "node_teradici_pcoip_imaging_megapixel_persec" "$img_mpix" "PCoIP image MegaPixel PerSec." else output_metric "node_teradici_pcoip_imaging_pixel_persec" "0" "PCoIP image Pixel PerSec." output_metric "node_teradici_pcoip_imaging_bits_persec" "0" "PCoIP image Bits PerSec." output_metric "node_teradici_pcoip_imaging_megapixel_persec" "0" "PCoIP image MegaPixel PerSec." fi # Process round-trip time (RTT) statistics for imaging if [[ -n "$img_rtt" ]]; then # Extract RTT metrics: last, min, max, and average values local rtt_last="$(echo "$img_rtt" | $AWK_PATH '{print $3}')" local rtt_min="$(echo "$img_rtt" | $AWK_PATH '{print $5}')" local rtt_max="$(echo "$img_rtt" | $AWK_PATH '{print $7}')" local rtt_avg="$(echo "$img_rtt" | $AWK_PATH '{print $9}')" rtt_last="$(validate_metric_value "$rtt_last")" rtt_min="$(validate_metric_value "$rtt_min")" rtt_max="$(validate_metric_value "$rtt_max")" rtt_avg="$(validate_metric_value "$rtt_avg")" output_metric "node_teradici_pcoip_imaging_rtt_last" "$rtt_last" "PCoIP image RTT Last value." output_metric "node_teradici_pcoip_imaging_rtt_min" "$rtt_min" "PCoIP image RTT Min value." output_metric "node_teradici_pcoip_imaging_rtt_max" "$rtt_max" "PCoIP image RTT Max value." output_metric "node_teradici_pcoip_imaging_rtt_avg" "$rtt_avg" "PCoIP image RTT AVG value." else output_metric "node_teradici_pcoip_imaging_rtt_last" "0" "PCoIP image RTT Last value." output_metric "node_teradici_pcoip_imaging_rtt_min" "0" "PCoIP image RTT Min value." output_metric "node_teradici_pcoip_imaging_rtt_max" "0" "PCoIP image RTT Max value." output_metric "node_teradici_pcoip_imaging_rtt_avg" "0" "PCoIP image RTT AVG value." fi } } # Function to collect bandwidth and loss-related metrics from PCoIP logs # Extracts bandwidth utilization, packet loss, and adaptive bandwidth control data collect_bandwidth_metrics() { { local log_pattern="$1" local default_values="${2:-false}" if [[ "$default_values" == "true" ]]; then # Output default metrics when logs are not available output_metric "node_teradici_pcoip_imaging_loss_percent" "0" "PCoIP image Loss value." output_metric "node_teradici_pcoip_imaging_loss_current_kbit_persec" "0" "PCoIP Current image Loss value." output_metric "node_teradici_pcoip_imaging_loss_active_send_kbit_persec" "0" "PCoIP Active Send image Loss value." output_metric "node_teradici_pcoip_imaging_loss_active_recv_kbit_persec" "0" "PCoIP Active Recv image Loss value." output_metric "node_teradici_pcoip_imaging_loss_adjust_factor_percent" "0" "PCoIP image Loss Adjust Factor value." output_metric "node_teradici_pcoip_imaging_loss_floor_kbit_persec" "0" "PCoIP image Loss Floor value." return fi # Extract bandwidth decrease/control statistics from ubs-BW-decr log entries local img_bits="$($GREP_PATH bits/pixel "$pcoip_log"/server*.log | $SED_PATH 's/.*(SoftIPC): [\t ]*//;s/,//g' | $TAIL_PATH -n 1)" local img_decr_stats="$($GREP_PATH :ubs-BW-decr: "$log_pattern" | $SED_PATH 's/.*:ubs-BW-decr: [\t ]*//;s/,//g' | $TAIL_PATH -n 1)" local img_rtt="$($GREP_PATH Imaging "$pcoip_log"/server*.log | $SED_PATH 's/.*:Imaging [\t ]*//;s/,//g' | $TAIL_PATH -n 1)" local img_stats="$($GREP_PATH fps "$pcoip_log"/server*.log | $SED_PATH 's/.*(SoftIPC): [\t ]*//;s/,//g' | $TAIL_PATH -n 1)" if [[ -n "$img_decr_stats" ]]; then # Parse complex bandwidth control log entry with multiple metrics # Extract packet loss percentage (remove % symbol) local img_decr="$(echo "$img_decr_stats" | $SED_PATH 's/.*loss=[\t ]*//;s/,//g' | $AWK_PATH -F '\\ current' '{gsub(/\%/,"",$1)} {print $1}')" # Extract current bandwidth usage in kbit/s local img_decr_cur="$(echo "$img_decr_stats" | $SED_PATH 's/.*current\[kbit\/s\]=[\t ]*//;s/,//g' | $AWK_PATH -F '\\ active' '{print $1}')" # Extract active send bandwidth in kbit/s local img_decr_sact="$(echo "$img_decr_stats" | $SED_PATH 's/.*active\[kbit\/s\]=[\t ]*//;s/,//g' | $AWK_PATH -F '\\ ->' '{print $1}')" # Extract active receive bandwidth in kbit/s local img_decr_ract="$(echo "$img_decr_stats" | $SED_PATH 's/.*-> [\t ]*//;s/,//g' | $AWK_PATH -F '\\ adjust' '{print $1}')" # Extract bandwidth adjustment factor percentage local img_decr_adj="$(echo "$img_decr_stats" | $SED_PATH 's/.*factor=[\t ]*//;s/,//g' | $AWK_PATH -F '\\ floor' '{gsub(/\%/,"",$1)} {print $1}')" # Extract minimum bandwidth floor in kbit/s local img_decr_floor="$(echo "$img_decr_stats" | $SED_PATH 's/^.*floor\[kbit\/s\]=[\t ]*//;s/,//g' | $AWK_PATH '{print $1}')" # Validate all extracted values and provide defaults for percentages img_decr="$(validate_metric_value "$img_decr" "0.00")" img_decr_cur="$(validate_metric_value "$img_decr_cur" "0")" img_decr_sact="$(validate_metric_value "$img_decr_sact" "0")" img_decr_ract="$(validate_metric_value "$img_decr_ract" "0")" img_decr_adj="$(validate_metric_value "$img_decr_adj" "0.00")" img_decr_floor="$(validate_metric_value "$img_decr_floor" "0")" # Output all bandwidth and loss control metrics output_metric "node_teradici_pcoip_imaging_loss_percent" "$img_decr" "PCoIP image Loss value." output_metric "node_teradici_pcoip_imaging_loss_current_kbit_persec" "$img_decr_cur" "PCoIP Current image Loss value." output_metric "node_teradici_pcoip_imaging_loss_active_send_kbit_persec" "$img_decr_sact" "PCoIP Active Send image Loss value." output_metric "node_teradici_pcoip_imaging_loss_active_recv_kbit_persec" "$img_decr_ract" "PCoIP Active Recv image Loss value." output_metric "node_teradici_pcoip_imaging_loss_adjust_factor_percent" "$img_decr_adj" "PCoIP image Loss Adjust Factor value." output_metric "node_teradici_pcoip_imaging_loss_floor_kbit_persec" "$img_decr_floor" "PCoIP image Loss Floor value." else # Output default metrics when no data found in logs output_metric "node_teradici_pcoip_imaging_loss_percent" "0" "PCoIP image Loss value." output_metric "node_teradici_pcoip_imaging_loss_current_kbit_persec" "0" "PCoIP Current image Loss value." output_metric "node_teradici_pcoip_imaging_loss_active_send_kbit_persec" "0" "PCoIP Active Send image Loss value." output_metric "node_teradici_pcoip_imaging_loss_active_recv_kbit_persec" "0" "PCoIP Active Recv image Loss value." output_metric "node_teradici_pcoip_imaging_loss_adjust_factor_percent" "0" "PCoIP image Loss Adjust Factor value." output_metric "node_teradici_pcoip_imaging_loss_floor_kbit_persec" "0" "PCoIP image Loss Floor value." fi } } # Function to collect network performance metrics from PCoIP logs # Extracts bandwidth limits, throughput averages, and round-trip time statistics collect_network_metrics() { { local log_pattern="$1" local default_values="${2:-false}" if [[ "$default_values" == "true" ]]; then # Output default metrics when logs are not available output_metric "node_teradici_pcoip_tx_bw_limit_kbit_persec" "0" "PCoIP Network TX Limit." output_metric "node_teradici_pcoip_avg_tx_kbit_persec" "0" "PCoIP Average TX kbits per second." output_metric "node_teradici_pcoip_avg_rx_kbit_persec" "0" "PCoIP Average RX kbits per second." output_metric "node_teradici_pcoip_round_trip_latency_ms" "0" "PCoIP Round Trip Latency." output_metric "node_teradici_pcoip_round_trip_latency_max_ms" "0" "Pciop Round Trip Latency Max" output_metric "node_teradici_pcoip_round_trip_variance_ms" "0" "PCoIP Round Trip Variance." return fi # Extract network bandwidth limit and throughput statistics local net_stats="$($GREP_PATH limit "$log_pattern" | $SED_PATH 's/.*limit =[\t ]*//;s/,//g' | $TAIL_PATH -n 1)" # Extract round-trip time measurements local rtt_stats="$($GREP_PATH "round trip" "$log_pattern" | $SED_PATH 's/.*(ms) =[\t ]*//;s/,//g' | $TAIL_PATH -n 1)" # Process bandwidth and throughput metrics if [[ -n "$net_stats" ]]; then # Extract bandwidth limit (1st field) local net_limit="$(echo "$net_stats" | $AWK_PATH '{print $1}')" # Extract average TX throughput (5th field) local tx_avg="$(echo "$net_stats" | $AWK_PATH '{print $5}')" # Extract average RX throughput (note: using same field as TX - may need adjustment) local rx_avg="$(echo "$net_stats" | $AWK_PATH '{print $5}')" # Validate extracted network values net_limit="$(validate_metric_value "$net_limit")" tx_avg="$(validate_metric_value "$tx_avg")" rx_avg="$(validate_metric_value "$rx_avg")" # Output network throughput metrics output_metric "node_teradici_pcoip_tx_bw_limit_kbit_persec" "$net_limit" "PCoIP Network TX Limit." output_metric "node_teradici_pcoip_avg_tx_kbit_persec" "$tx_avg" "PCoIP Average TX kbits per second." output_metric "node_teradici_pcoip_avg_rx_kbit_persec" "$rx_avg" "PCoIP Average RX kbits per second." else # Output default metrics when no data found in logs output_metric "node_teradici_pcoip_tx_bw_limit_kbit_persec" "0" "PCoIP Network TX Limit." output_metric "node_teradici_pcoip_avg_tx_kbit_persec" "0" "PCoIP Average TX kbits per second." output_metric "node_teradici_pcoip_avg_rx_kbit_persec" "0" "PCoIP Average RX kbits per second." fi # Process round-trip time (latency) metrics if [[ -n "$rtt_stats" ]]; then # Extract RTT latency (1st field) and variance (5th field) local rtt_latency="$(echo "$rtt_stats" | $AWK_PATH '{print $1}')" local rtt_variance="$(echo "$rtt_stats" | $AWK_PATH '{print $5}')" local rtt_rto="$(echo "$rtt_stats" | $AWK_PATH '{print $7}')" local rtt_last="$(echo "$rtt_stats" | $AWK_PATH '{print $10}')" local rtt_min="$(echo "$rtt_stats" | $AWK_PATH '{print $12}')" local rtt_max="$(echo "$rtt_stats" | $AWK_PATH '{print $13}')" # Validate RTT values rtt_latency="$(validate_metric_value "$rtt_latency")" rtt_variance="$(validate_metric_value "$rtt_variance")" rtt_variance_max="$(validate_metric_value "$rtt_variance_max")" rtt_rto="$(validate_metric_value "$rtt_rto")" rtt_last="$(validate_metric_value "$rtt_last")" rtt_min="$(validate_metric_value "$rtt_min")" rtt_max="$(validate_metric_value "$rtt_max")" # Output network latency metrics output_metric "node_teradici_pcoip_round_trip_latency_ms" "$rtt_latency" "PCoIP Round Trip Latency." output_metric "node_teradici_pcoip_round_trip_variance_ms" "$rtt_variance" "PCoIP Round Trip Variance." output_metric "node_teradici_pcoip_round_trip_rto_ms" "$rtt_rto" "PCoIP Round Trip RTO." output_metric "node_teradici_pcoip_round_trip_last_ms" "$rtt_last" "PCoIP Round Trip Last." output_metric "node_teradici_pcoip_round_trip_min_ms" "$rtt_min" "PCoIP Round Trip Min." output_metric "node_teradici_pcoip_round_trip_max_ms" "$rtt_max" "PCoIP Round Trip Max." else # Output default metrics when no data found in logs output_metric "node_teradici_pcoip_round_trip_latency_ms" "0" "PCoIP Round Trip Latency." output_metric "node_teradici_pcoip_round_trip_variance_ms" "0" "PCoIP Round Trip Variance." output_metric "node_teradici_pcoip_round_trip_rto_ms" "0" "PCoIP Round Trip RTO." output_metric "node_teradici_pcoip_round_trip_last_ms" "0" "PCoIP Round Trip Last." output_metric "node_teradici_pcoip_round_trip_min_ms" "0" "PCoIP Round Trip Min." output_metric "node_teradici_pcoip_round_trip_max_ms" "0" "PCoIP Round Trip Max." fi } } # Function to collect VGM (Virtual Graphics Management) packet loss metrics # Extracts detailed packet loss statistics for different packet types (Audio, Image, Other) collect_vgm_metrics() { { local log_pattern="$1" local default_values="${2:-false}" if [[ "$default_values" == "true" ]]; then # Output default metrics when logs are not available output_metric "node_teradici_pcoip_rx_packet_loss_percent" "0" "PCoIP RX Packet loss Percent." output_metric "node_teradici_pcoip_tx_packet_loss_percent" "0" "PCoIP TX Packet loss Percent." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_a" "0" "PCoIP VGM RX Packet loss A." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_i" "0" "PCoIP VGM RX Packet loss I." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_o" "0" "PCoIP VGM RX Packet loss O." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_a" "0" "PCoIP VGM TX Packet loss A." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_i" "0" "PCoIP VGM TX Packet loss I." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_o" "0" "PCoIP VGM TX Packet loss O." return fi # Extract VGM statistics from VGMAC log entries local vgm_stats="$($GREP_PATH VGMAC "$log_pattern" | $SED_PATH 's/.*Stat: =[\t ]*//;s/,//g' | $TAIL_PATH -n 1)" if [[ -n "$vgm_stats" ]]; then # Extract basic packet loss percentages (convert "/" to space for parsing) local pkt_loss="$(echo "$vgm_stats" | $SED_PATH 's/.*Loss=[\t ]*//;s/,//g' | $AWK_PATH '{gsub(/\//," ",$1)}1 ')" # Extract detailed RX loss statistics local vgm_loss_rx="$(echo "$vgm_stats" | $SED_PATH 's/.*R=[\t ]*//;s/,//g')" # Extract detailed TX loss statistics local vgm_loss_tx="$(echo "$vgm_stats" | $SED_PATH 's/.*T=[\t ]*//;s/,//g')" # Process basic packet loss percentages if [[ -n "$pkt_loss" ]]; then # Split packet loss string by % symbol to get RX and TX percentages local rx_pkt="$(echo "$pkt_loss" | $AWK_PATH '{split($0,a,"%")} {print a[1]}')" local tx_pkt="$(echo "$pkt_loss" | $AWK_PATH '{split($0,a,"%")} {print a[2]}')" rx_pkt="$(validate_metric_value "$rx_pkt")" tx_pkt="$(validate_metric_value "$tx_pkt")" # Output basic packet loss metrics output_metric "node_teradici_pcoip_rx_packet_loss_percent" "$rx_pkt" "PCoIP RX Packet loss Percent." output_metric "node_teradici_pcoip_tx_packet_loss_percent" "$tx_pkt" "PCoIP TX Packet loss Percent." else # Output default metrics when no packet loss data found output_metric "node_teradici_pcoip_rx_packet_loss_percent" "0" "PCoIP RX Packet loss Percent." output_metric "node_teradici_pcoip_tx_packet_loss_percent" "0" "PCoIP TX Packet loss Percent." fi # Process detailed VGM RX packet loss by type (A=Audio, I=Image, O=Other) if [[ -n "$vgm_loss_rx" ]]; then # Extract first 20 characters and split by "/" to get A/I/O values local vgm_rx_a="$(echo "$vgm_loss_rx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[1]}')" local vgm_rx_i="$(echo "$vgm_loss_rx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[2]}')" local vgm_rx_o="$(echo "$vgm_loss_rx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[3]}')" vgm_rx_a="$(validate_metric_value "$vgm_rx_a")" vgm_rx_i="$(validate_metric_value "$vgm_rx_i")" vgm_rx_o="$(validate_metric_value "$vgm_rx_o")" # Output detailed RX packet loss metrics by type output_metric "node_teradici_pcoip_vgm_rx_packet_loss_a" "$vgm_rx_a" "PCoIP VGM RX Packet loss A." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_i" "$vgm_rx_i" "PCoIP VGM RX Packet loss I." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_o" "$vgm_rx_o" "PCoIP VGM RX Packet loss O." else # Output default metrics when no RX data found output_metric "node_teradici_pcoip_vgm_rx_packet_loss_a" "0" "PCoIP VGM RX Packet loss A." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_i" "0" "PCoIP VGM RX Packet loss I." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_o" "0" "PCoIP VGM RX Packet loss O." fi # Process detailed VGM TX packet loss by type (A=Audio, I=Image, O=Other) if [[ -n "$vgm_loss_tx" ]]; then # Extract first 20 characters and split by "/" to get A/I/O values local vgm_tx_a="$(echo "$vgm_loss_tx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[1]}')" local vgm_tx_i="$(echo "$vgm_loss_tx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[2]}')" local vgm_tx_o="$(echo "$vgm_loss_tx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[3]}')" vgm_tx_a="$(validate_metric_value "$vgm_tx_a")" vgm_tx_i="$(validate_metric_value "$vgm_tx_i")" vgm_tx_o="$(validate_metric_value "$vgm_tx_o")" # Output detailed TX packet loss metrics by type output_metric "node_teradici_pcoip_vgm_tx_packet_loss_a" "$vgm_tx_a" "PCoIP VGM TX Packet loss A." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_i" "$vgm_tx_i" "PCoIP VGM TX Packet loss I." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_o" "$vgm_tx_o" "PCoIP VGM TX Packet loss O." else # Output default metrics when no TX data found output_metric "node_teradici_pcoip_vgm_tx_packet_loss_a" "0" "PCoIP VGM TX Packet loss A." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_i" "0" "PCoIP VGM TX Packet loss I." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_o" "0" "PCoIP VGM TX Packet loss O." fi else # Output default metrics when no VGM data found in logs output_metric "node_teradici_pcoip_rx_packet_loss_percent" "0" "PCoIP RX Packet loss Percent." output_metric "node_teradici_pcoip_tx_packet_loss_percent" "0" "PCoIP TX Packet loss Percent." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_a" "0" "PCoIP VGM RX Packet loss A." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_i" "0" "PCoIP VGM RX Packet loss I." output_metric "node_teradici_pcoip_vgm_rx_packet_loss_o" "0" "PCoIP VGM RX Packet loss O." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_a" "0" "PCoIP VGM TX Packet loss A." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_i" "0" "PCoIP VGM TX Packet loss I." output_metric "node_teradici_pcoip_vgm_tx_packet_loss_o" "0" "PCoIP VGM TX Packet loss O." fi } } #### Process all PCoIP metrics #### # Main function to orchestrate the collection of all PCoIP metrics # Validates log files exist, then calls individual metric collection functions process_pcoip_metrics() { { # Enable nullglob to handle empty glob matches safely # This prevents the glob pattern from expanding to itself if no files match shopt -s nullglob local log_files=("$pcoip_log"/server*.log) # If no server*.log files found, try looking for any server* files without .log extension if [[ ${#log_files[@]} -eq 0 ]]; then log_files=("$pcoip_log"/server*) fi shopt -u nullglob # Check if PCoIP log files exist if [[ ${#log_files[@]} -eq 0 ]] || ! validate_log_file "$pcoip_log" "server*" "true"; then # Silently handle missing logs to prevent warning in metrics file # Output status metric indicating PCoIP service is unavailable output_metric "node_teradici_pcoip_status" "0" "PCoIP service status (0=unavailable, 1=active)" # Collect all metrics with default values (0) when logs are not available collect_memory_metrics "" "true" collect_image_quality_metrics "" "true" collect_bandwidth_metrics "" "true" collect_network_metrics "" "true" collect_vgm_metrics "" "true" return 0 # Return success since we've output all metrics fi # Use the first (most recent) log file found local log_pattern="${log_files[0]}" log "INFO" "Processing PCoIP metrics from $log_pattern" # Output status metric indicating PCoIP service is active and logs are available output_metric "node_teradici_pcoip_status" "1" "PCoIP service status (0=unavailable, 1=active)" # Collect all metrics by calling individual collection functions collect_memory_metrics "$log_pattern" collect_image_quality_metrics "$log_pattern" collect_bandwidth_metrics "$log_pattern" collect_network_metrics "$log_pattern" collect_vgm_metrics "$log_pattern" } } ##################### #### PCoIP Stats #### ##################### # Main execution: Process all PCoIP metrics process_pcoip_metrics # Calculate and output script execution time as a metric # This helps monitor script performance and detect issues SCRIPT_END_TIME=$(date +%s) SCRIPT_RUNTIME=$((SCRIPT_END_TIME - SCRIPT_START_TIME)) output_metric "node_teradici_pcoip_script_runtime_seconds" "$SCRIPT_RUNTIME" "PCoIP metrics script execution time in seconds"