#! /bin/bash

#############################################################
#### Expose metrics from PCOIP for Prometheus            ####
#### Redhat/Amazon/Ubuntu Workstations.                  ####
####                                                     ####
#### Author: Phil Connor 03/27/2024                      ####
#### Contact: contact@mylinux.work                      ####
#### Version 2.18.20250731                               ####
#############################################################
# This script extracts metrics from PCoIP (PC over IP) log files and formats them
# for consumption by Prometheus node_exporter. It monitors virtual desktop performance
# including memory usage, image quality, bandwidth, network latency, and packet loss.

#### System Variables ####
pcoip_log='/var/log/pcoip-agent' # PCOIP Log file (Default is /var/log/pcoip-agent)

DEBUG="${DEBUG:-0}" # Debug mode (Default is 0) - Set to 1 to enable debug logging

#### Defaults ####
# Function to declare default values for all configuration variables
# These can be overridden by environment variables
declare_defaults() {
    {
        CRONTAB_USER="${CRONTAB_USER:-root}"                                # User to run cron jobs as (Default is root)
        CRON_NAME="${CRON_NAME:-pcoip_metric.sh}"                           # Cron job name (Default is directory_size)
        UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/3 * * * * }"                  # Cron interval (Default is every 3 minutes)

        NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}"    # Node Exporter textfile directory for .prom files
        PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}"                    # System user that runs Prometheus/node_exporter

        LOCK_DIR="${LOCK_DIR:-/var/run}"                                    # Directory for lock files to prevent concurrent runs
        SCRIPT_PATH="$(readlink -f "$0")"                                   # Full path to this script for cron installation
    }
}

#### Error logging functions ####
# Centralized error handling function that logs to stderr and exits
handle_error() {
    {
        local err_msg="$1"
        local exit_code="${2:-1}"  # Default exit code is 1 if not specified
        echo "ERROR: $err_msg" >&2
        exit "$exit_code"
    }
}

# Logging function with level-based filtering
# DEBUG messages only appear when DEBUG=1 environment variable is set
log() {
    {
        local level="$1"
        local message="$2"
        # Only show DEBUG messages when debug mode is enabled
        if [[ "$level" != "DEBUG" || "$DEBUG" -eq 1 ]]; then
            echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message" >&2
        fi
    }
}

#### Testing Log validation ####
# Function to validate that log files exist and match the expected pattern
# Returns 1 if no files found, 0 if files exist
validate_log_file() {
    {
        local log_dir="$1"
        local pattern="$2"
        local silent="${3:-false}"
        
        # Enable nullglob to handle empty glob matches safely
        # This prevents the pattern from expanding to itself if no matches
        shopt -s nullglob
        local files=("$log_dir"/$pattern)
        shopt -u nullglob

        # Check if any files were found matching the pattern
        if [[ ${#files[@]} -eq 0 ]]; then
            if [[ "$silent" != "true" ]]; then
                log "WARNING" "No valid log files found matching $log_dir/$pattern"
            fi
            return 1
        fi
        return 0
    }
}

#### Testing Timeout function (log) ####
# Function to execute commands with a timeout to prevent hanging
# Used for log processing commands that might get stuck
timeout_command() {
    {
        local timeout_seconds="$1"
        local command="$2"

        # Use the timeout command to limit execution time
        timeout "$timeout_seconds" bash -c "$command" || {
            log "WARNING" "Command timed out after $timeout_seconds seconds: $command"
            return 1
        }
    }
}

### System Check ###
# Ensure script is running as root for proper file access and cron installation
if [ ! "$(id -u)" -eq 0 ]; then handle_error "This script must be run as root" 1; fi

#### Create, Check and Delete Lock File ###
# Create a temporary lock file to prevent multiple instances running simultaneously
LOCKFILE=$(mktemp -p "$LOCK_DIR" pcoip_stats.XXXXXX) || handle_error "Failed to create lock file" 1
exec 9>"$LOCKFILE"  # Open file descriptor 9 for the lock file

# Try to acquire an exclusive, non-blocking lock
if ! flock -n 9; then
    handle_error "Script is already running (unable to acquire lock)" 1
fi

# Clean up any stale lock files older than 60 minutes
find "$LOCKFILE" -mmin +60 -delete 2>/dev/null

# Cleanup function to release lock and remove lock file on exit
cleanup() {
    {
        flock -u 9        # Unlock the file
        exec 9>&-         # Close the file descriptor
        rm -f "$LOCKFILE" # Remove the lock file
        # log "Cleanup completed"
    }
}

# Set up trap to ensure cleanup runs on script exit/termination
trap cleanup EXIT INT TERM PIPE

#### Code Starts Here ####
declare_defaults # Initialize default variables

# Capture script start time for runtime metric calculation
SCRIPT_START_TIME=$(date +%s)

# Function to find executable commands with fallback paths
# This ensures the script works across different Linux distributions
# where commands might be in different locations
find_command() {
    {
        local command_name="$1"
        local fallback_paths=("${@:2}") # Remaining arguments are fallback paths
        local path

        # First try to find command in PATH
        path=$(command -v "$command_name")
        if [ -z "$path" ]; then
            # If not found in PATH, try fallback locations
            for path in "${fallback_paths[@]}"; do
                if [ -x "$path/$command_name" ]; then
                    echo "$path/$command_name"
                    return 0
                fi
            done
            handle_error "Could not find '$command_name' executable (even after fallback)." 1
        fi

        # Verify the found command is executable
        if [ ! -x "$path" ]; then
            handle_error "Found '$command_name' at '$path' but it's not executable." 1
        fi

        echo "$path"
    }
}

# Find and validate all required command paths
# This section locates all Unix utilities needed for log parsing and metric extraction
AWK_PATH=$(find_command awk /usr/bin)
if [ -z "$AWK_PATH" ]; then handle_error "awk path is empty" 1; fi

CUT_PATH=$(find_command cut /usr/bin)
if [ -z "$CUT_PATH" ]; then handle_error "cut path is empty" 1; fi

GREP_PATH=$(find_command grep /usr/bin)
if [ -z "$GREP_PATH" ]; then handle_error "grep path is empty" 1; fi

REALPATH_PATH=$(find_command realpath /usr/bin)
if [ -z "$REALPATH_PATH" ]; then handle_error "realpath path is empty" 1; fi

SED_PATH=$(find_command sed /usr/bin)
if [ -z "$SED_PATH" ]; then handle_error "sed path is empty" 1; fi

TAIL_PATH=$(find_command tail /usr/bin)
if [ -z "$TAIL_PATH" ]; then handle_error "tail path is empty" 1; fi

TR_PATH=$(find_command tr /usr/bin)
if [ -z "$TR_PATH" ]; then handle_error "tr path is empty" 1; fi

# Validation for critical environment variables
[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && handle_error "Required environment variables not set" 1
[[ -z "$NODE_EXPORTER_DIR" || ! -d "$NODE_EXPORTER_DIR" ]] && handle_error "NODE_EXPORTER_DIR is not set or not a valid directory" 1

#### Validate Lockfile ####
# Ensure the lock file we created still exists (sanity check)
if [[ ! -f $LOCKFILE ]]; then
    handle_error "No lockfile exists" 1
fi

### Check for .prom Directory ####
# Create the node_exporter textfile directory if it doesn't exist
if [ ! -d "$NODE_EXPORTER_DIR" ]; then
    if [ "$(id -u)" = "0" ]; then # Check if running as root
        mkdir -p "$NODE_EXPORTER_DIR"
        # Set ownership to prometheus user for proper access
        chown prometheus:prometheus "$NODE_EXPORTER_DIR" 2>/dev/null || true
    fi
fi

# Verify we can write to the output directory
if [ ! -w "$NODE_EXPORTER_DIR" ]; then
    handle_error "$NODE_EXPORTER_DIR is not writable." 1
fi

#### Check for Cron Job ####
# Auto-install cron job if this script exists and cron entry is missing
if [ -f "$SCRIPT_PATH" ] && ! crontab -l | grep -q "$SCRIPT_PATH"; then
    # Attempt to add cron job to run script at specified interval
    if ! (echo -e "$(crontab -u "$CRONTAB_USER" -l 2>/dev/null || echo '')\n$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/pcoip_metrics.prom 2>/dev/null" | crontab -u "$CRONTAB_USER" -); then
        log "WARNING" "Failed to install cron job: $CRON_NAME. Verify permissions for user: $CRONTAB_USER."
    else
        log "INFO" "Cron job installed successfully: $CRON_NAME."
    fi
fi

#### Metrics output functions ####
# Function to sanitize text for UTF-8 compliance and remove control characters
# Ensures Prometheus can properly parse the output
sanitize_utf8() {
    {
        local input="$1"
        # Remove non-UTF8 characters and control characters except newlines
        echo "$input" | iconv -f utf-8 -t utf-8 -c | tr -d '\r' | sed 's/[[:cntrl:]]//g'
    }
}

# Function to output Prometheus metrics in the correct format
# Follows the Prometheus exposition format specification
output_metric() {
    {
        local metric_name="$1"
        local metric_value="$2"
        local help_text="$3"
        local metric_type="${4:-gauge}"  # Default to gauge if type not specified

        # Sanitize all text for UTF-8 compliance
        metric_name="$(sanitize_utf8 "$metric_name")"
        help_text="$(sanitize_utf8 "$help_text")"
        
        # Ensure metric value is numeric and valid, default to 0 if invalid
        if [[ ! "$metric_value" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then
            metric_value="0"
        fi

        # Output in Prometheus format: HELP, TYPE, then the metric
        echo "# HELP $metric_name $help_text"
        echo "# TYPE $metric_name $metric_type"
        echo "$metric_name $metric_value"
    }
}

# Function to validate and sanitize metric values
# Returns a default value if the input is not a valid number
validate_metric_value() {
    {
        local value="$1"
        local default="${2:-0}"  # Default to 0 if no default specified

        # Check if value is empty or not a valid number
        if [[ -z "$value" || ! "$value" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then
            echo "$default"
        else
            echo "$value"
        fi
    }
}

# Function to collect memory-related metrics from PCoIP logs
# Extracts available and total memory information
collect_memory_metrics() {
    {
        local log_pattern="$1"
        local default_values="${2:-false}"
        
        if [[ "$default_values" == "true" ]]; then
            # Output default metrics when logs are not available
            output_metric "node_teradici_pcoip_memory_mb_avail" "0" "PCoIP Mbytes Available Memory."
            output_metric "node_teradici_pcoip_memory_mb_total" "0" "PCoIP MBytes Total Memory."
            return
        fi
        
        # Extract memory statistics: grep for "memory", clean up formatting, get latest entry
        local mem_stats="$($GREP_PATH memory "$log_pattern" | $SED_PATH 's/.*memory =[\t ]*//;s/,//g' | $TR_PATH "/" " " | $TAIL_PATH -n 1)"

        if [[ -n "$mem_stats" ]]; then
            # Parse available memory (first field, remove MB suffix)
            local mem_avail="$(echo "$mem_stats" | $AWK_PATH '{gsub(/MB/,"",$1)} {print $1}')"
            # Parse total memory (second field, remove MB suffix)
            local mem_total="$(echo "$mem_stats" | $AWK_PATH '{gsub(/MB/,"",$2)} {print $2}')"

            # Validate and sanitize the extracted values
            mem_avail="$(validate_metric_value "$mem_avail")"
            mem_total="$(validate_metric_value "$mem_total")"

            # Output the memory metrics
            output_metric "node_teradici_pcoip_memory_mb_avail" "$mem_avail" "PCoIP Mbytes Available Memory."
            output_metric "node_teradici_pcoip_memory_mb_total" "$mem_total" "PCoIP MBytes Total Memory."
        else
            # Output default metrics when no data found in logs
            output_metric "node_teradici_pcoip_memory_mb_avail" "0" "PCoIP Mbytes Available Memory."
            output_metric "node_teradici_pcoip_memory_mb_total" "0" "PCoIP MBytes Total Memory."
        fi
    }
}

# Function to collect image quality and performance metrics from PCoIP logs
# Extracts frame rates, image quality, pixel throughput, and RTT statistics
collect_image_quality_metrics() {
    {
        local log_pattern="$1"
        local default_values="${2:-false}"
        
        if [[ "$default_values" == "true" ]]; then
            # Output default metrics when logs are not available
            output_metric "node_teradici_pcoip_imaging_frames_persec" "0" "PCoIP active image Frames Per Second."
            output_metric "node_teradici_pcoip_imaging_active_min_quality" "0" "PCoIP Min active image quality Percent."
            output_metric "node_teradici_pcoip_imaging_pixel_persec" "0" "PCoIP image Pixel PerSec."
            output_metric "node_teradici_pcoip_imaging_bits_persec" "0" "PCoIP image Bits PerSec."
            output_metric "node_teradici_pcoip_imaging_megapixel_persec" "0" "PCoIP image MegaPixel PerSec."
            output_metric "node_teradici_pcoip_imaging_rtt_last" "0" "PCoIP image RTT Last value."
            output_metric "node_teradici_pcoip_imaging_rtt_min" "0" "PCoIP image RTT Min value."
            output_metric "node_teradici_pcoip_imaging_rtt_max" "0" "PCoIP image RTT Max value."
            output_metric "node_teradici_pcoip_imaging_rtt_avg" "0" "PCoIP image RTT AVG value."
            return
        fi
        
        # Extract different types of image-related log entries
        local img_bits="$($GREP_PATH bits/pixel "$log_pattern" | $SED_PATH 's/.*(SoftIPC): [\t ]*//;s/,//g' | $TAIL_PATH -n 1)"
        local img_stats="$($GREP_PATH fps "$log_pattern" | $SED_PATH 's/.*(SoftIPC): [\t ]*//;s/,//g' | $TAIL_PATH -n 1)"
        local img_rtt="$($GREP_PATH Imaging "$log_pattern" | $SED_PATH 's/.*:Imaging [\t ]*//;s/,//g' | $TAIL_PATH -n 1)"

        # Process frame rate and quality statistics
        if [[ -n "$img_stats" ]]; then
            # Extract frames per second (4th field) and image quality (6th field)
            local img_frames="$(echo "$img_stats" | $AWK_PATH '{print $4}')"
            local img_quality="$(echo "$img_stats" | $AWK_PATH '{print $6}')"

            img_frames="$(validate_metric_value "$img_frames")"
            img_quality="$(validate_metric_value "$img_quality")"

            output_metric "node_teradici_pcoip_imaging_frames_persec" "$img_frames" "PCoIP active image Frames Per Second."
            output_metric "node_teradici_pcoip_imaging_active_min_quality" "$img_quality" "PCoIP Min active image quality Percent."
        else
            output_metric "node_teradici_pcoip_imaging_frames_persec" "0" "PCoIP active image Frames Per Second."
            output_metric "node_teradici_pcoip_imaging_active_min_quality" "0" "PCoIP Min active image quality Percent."
        fi

        # Process pixel and bandwidth statistics
        if [[ -n "$img_bits" ]]; then
            # Extract pixel rate, bits rate, and megapixel rate from different fields
            local img_pixel="$(echo "$img_bits" | $AWK_PATH '{print $3}')"
            local img_imgb="$(echo "$img_bits" | $AWK_PATH '{print $6}')"
            local img_mpix="$(echo "$img_bits" | $AWK_PATH '{print $9}')"

            img_pixel="$(validate_metric_value "$img_pixel")"
            img_imgb="$(validate_metric_value "$img_imgb")"
            img_mpix="$(validate_metric_value "$img_mpix")"

            output_metric "node_teradici_pcoip_imaging_pixel_persec" "$img_pixel" "PCoIP image Pixel PerSec."
            output_metric "node_teradici_pcoip_imaging_bits_persec" "$img_imgb" "PCoIP image Bits PerSec."
            output_metric "node_teradici_pcoip_imaging_megapixel_persec" "$img_mpix" "PCoIP image MegaPixel PerSec."
        else
            output_metric "node_teradici_pcoip_imaging_pixel_persec" "0" "PCoIP image Pixel PerSec."
            output_metric "node_teradici_pcoip_imaging_bits_persec" "0" "PCoIP image Bits PerSec."
            output_metric "node_teradici_pcoip_imaging_megapixel_persec" "0" "PCoIP image MegaPixel PerSec."
        fi

        # Process round-trip time (RTT) statistics for imaging
        if [[ -n "$img_rtt" ]]; then
            # Extract RTT metrics: last, min, max, and average values
            local rtt_last="$(echo "$img_rtt" | $AWK_PATH '{print $3}')"
            local rtt_min="$(echo "$img_rtt" | $AWK_PATH '{print $5}')"
            local rtt_max="$(echo "$img_rtt" | $AWK_PATH '{print $7}')"
            local rtt_avg="$(echo "$img_rtt" | $AWK_PATH '{print $9}')"

            rtt_last="$(validate_metric_value "$rtt_last")"
            rtt_min="$(validate_metric_value "$rtt_min")"
            rtt_max="$(validate_metric_value "$rtt_max")"
            rtt_avg="$(validate_metric_value "$rtt_avg")"

            output_metric "node_teradici_pcoip_imaging_rtt_last" "$rtt_last" "PCoIP image RTT Last value."
            output_metric "node_teradici_pcoip_imaging_rtt_min" "$rtt_min" "PCoIP image RTT Min value."
            output_metric "node_teradici_pcoip_imaging_rtt_max" "$rtt_max" "PCoIP image RTT Max value."
            output_metric "node_teradici_pcoip_imaging_rtt_avg" "$rtt_avg" "PCoIP image RTT AVG value."
        else
            output_metric "node_teradici_pcoip_imaging_rtt_last" "0" "PCoIP image RTT Last value."
            output_metric "node_teradici_pcoip_imaging_rtt_min" "0" "PCoIP image RTT Min value."
            output_metric "node_teradici_pcoip_imaging_rtt_max" "0" "PCoIP image RTT Max value."
            output_metric "node_teradici_pcoip_imaging_rtt_avg" "0" "PCoIP image RTT AVG value."
        fi
    }
}

# Function to collect bandwidth and loss-related metrics from PCoIP logs
# Extracts bandwidth utilization, packet loss, and adaptive bandwidth control data
collect_bandwidth_metrics() {
    {
        local log_pattern="$1"
        local default_values="${2:-false}"
        
        if [[ "$default_values" == "true" ]]; then
            # Output default metrics when logs are not available
            output_metric "node_teradici_pcoip_imaging_loss_percent" "0" "PCoIP image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_current_kbit_persec" "0" "PCoIP Current image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_active_send_kbit_persec" "0" "PCoIP Active Send image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_active_recv_kbit_persec" "0" "PCoIP Active Recv image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_adjust_factor_percent" "0" "PCoIP image Loss Adjust Factor value."
            output_metric "node_teradici_pcoip_imaging_loss_floor_kbit_persec" "0" "PCoIP image Loss Floor value."
            return
        fi
        
        # Extract bandwidth decrease/control statistics from ubs-BW-decr log entries
        local img_bits="$($GREP_PATH bits/pixel "$pcoip_log"/server*.log | $SED_PATH 's/.*(SoftIPC): [\t ]*//;s/,//g' | $TAIL_PATH -n 1)"
        local img_decr_stats="$($GREP_PATH :ubs-BW-decr: "$log_pattern" | $SED_PATH 's/.*:ubs-BW-decr: [\t ]*//;s/,//g' | $TAIL_PATH -n 1)"
        local img_rtt="$($GREP_PATH Imaging "$pcoip_log"/server*.log | $SED_PATH 's/.*:Imaging [\t ]*//;s/,//g' | $TAIL_PATH -n 1)"
        local img_stats="$($GREP_PATH fps "$pcoip_log"/server*.log | $SED_PATH 's/.*(SoftIPC): [\t ]*//;s/,//g' | $TAIL_PATH -n 1)"

        if [[ -n "$img_decr_stats" ]]; then
            # Parse complex bandwidth control log entry with multiple metrics
            # Extract packet loss percentage (remove % symbol)
            local img_decr="$(echo "$img_decr_stats" | $SED_PATH 's/.*loss=[\t ]*//;s/,//g' | $AWK_PATH -F '\\ current' '{gsub(/\%/,"",$1)} {print $1}')"
            # Extract current bandwidth usage in kbit/s
            local img_decr_cur="$(echo "$img_decr_stats" | $SED_PATH 's/.*current\[kbit\/s\]=[\t ]*//;s/,//g' | $AWK_PATH -F '\\ active' '{print $1}')"
            # Extract active send bandwidth in kbit/s
            local img_decr_sact="$(echo "$img_decr_stats" | $SED_PATH 's/.*active\[kbit\/s\]=[\t ]*//;s/,//g' | $AWK_PATH -F '\\ ->' '{print $1}')"
            # Extract active receive bandwidth in kbit/s
            local img_decr_ract="$(echo "$img_decr_stats" | $SED_PATH 's/.*-> [\t ]*//;s/,//g' | $AWK_PATH -F '\\ adjust' '{print $1}')"
            # Extract bandwidth adjustment factor percentage
            local img_decr_adj="$(echo "$img_decr_stats" | $SED_PATH 's/.*factor=[\t ]*//;s/,//g' | $AWK_PATH -F '\\ floor' '{gsub(/\%/,"",$1)} {print $1}')"
            # Extract minimum bandwidth floor in kbit/s
            local img_decr_floor="$(echo "$img_decr_stats" | $SED_PATH 's/^.*floor\[kbit\/s\]=[\t ]*//;s/,//g' | $AWK_PATH '{print $1}')"

            # Validate all extracted values and provide defaults for percentages
            img_decr="$(validate_metric_value "$img_decr" "0.00")"
            img_decr_cur="$(validate_metric_value "$img_decr_cur" "0")"
            img_decr_sact="$(validate_metric_value "$img_decr_sact" "0")"
            img_decr_ract="$(validate_metric_value "$img_decr_ract" "0")"
            img_decr_adj="$(validate_metric_value "$img_decr_adj" "0.00")"
            img_decr_floor="$(validate_metric_value "$img_decr_floor" "0")"

            # Output all bandwidth and loss control metrics
            output_metric "node_teradici_pcoip_imaging_loss_percent" "$img_decr" "PCoIP image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_current_kbit_persec" "$img_decr_cur" "PCoIP Current image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_active_send_kbit_persec" "$img_decr_sact" "PCoIP Active Send image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_active_recv_kbit_persec" "$img_decr_ract" "PCoIP Active Recv image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_adjust_factor_percent" "$img_decr_adj" "PCoIP image Loss Adjust Factor value."
            output_metric "node_teradici_pcoip_imaging_loss_floor_kbit_persec" "$img_decr_floor" "PCoIP image Loss Floor value."
        else
            # Output default metrics when no data found in logs
            output_metric "node_teradici_pcoip_imaging_loss_percent" "0" "PCoIP image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_current_kbit_persec" "0" "PCoIP Current image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_active_send_kbit_persec" "0" "PCoIP Active Send image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_active_recv_kbit_persec" "0" "PCoIP Active Recv image Loss value."
            output_metric "node_teradici_pcoip_imaging_loss_adjust_factor_percent" "0" "PCoIP image Loss Adjust Factor value."
            output_metric "node_teradici_pcoip_imaging_loss_floor_kbit_persec" "0" "PCoIP image Loss Floor value."
        fi
    }
}

# Function to collect network performance metrics from PCoIP logs
# Extracts bandwidth limits, throughput averages, and round-trip time statistics
collect_network_metrics() {
    {
        local log_pattern="$1"
        local default_values="${2:-false}"
        
        if [[ "$default_values" == "true" ]]; then
            # Output default metrics when logs are not available
            output_metric "node_teradici_pcoip_tx_bw_limit_kbit_persec" "0" "PCoIP Network TX Limit."
            output_metric "node_teradici_pcoip_avg_tx_kbit_persec" "0" "PCoIP Average TX kbits per second."
            output_metric "node_teradici_pcoip_avg_rx_kbit_persec" "0" "PCoIP Average RX kbits per second."
            output_metric "node_teradici_pcoip_round_trip_latency_ms" "0" "PCoIP Round Trip Latency."
            output_metric "node_teradici_pcoip_round_trip_latency_max_ms" "0" "Pciop Round Trip Latency Max"
            output_metric "node_teradici_pcoip_round_trip_variance_ms" "0" "PCoIP Round Trip Variance."
            return
        fi
        
        # Extract network bandwidth limit and throughput statistics
        local net_stats="$($GREP_PATH limit "$log_pattern" | $SED_PATH 's/.*limit =[\t ]*//;s/,//g' | $TAIL_PATH -n 1)"
        # Extract round-trip time measurements
        local rtt_stats="$($GREP_PATH "round trip" "$log_pattern" | $SED_PATH 's/.*(ms) =[\t ]*//;s/,//g' | $TAIL_PATH -n 1)"

        # Process bandwidth and throughput metrics
        if [[ -n "$net_stats" ]]; then
            # Extract bandwidth limit (1st field)
            local net_limit="$(echo "$net_stats" | $AWK_PATH '{print $1}')"
            # Extract average TX throughput (5th field)
            local tx_avg="$(echo "$net_stats" | $AWK_PATH '{print $5}')"
            # Extract average RX throughput (note: using same field as TX - may need adjustment)
            local rx_avg="$(echo "$net_stats" | $AWK_PATH '{print $5}')"

            # Validate extracted network values
            net_limit="$(validate_metric_value "$net_limit")"
            tx_avg="$(validate_metric_value "$tx_avg")"
            rx_avg="$(validate_metric_value "$rx_avg")"

            # Output network throughput metrics
            output_metric "node_teradici_pcoip_tx_bw_limit_kbit_persec" "$net_limit" "PCoIP Network TX Limit."
            output_metric "node_teradici_pcoip_avg_tx_kbit_persec" "$tx_avg" "PCoIP Average TX kbits per second."
            output_metric "node_teradici_pcoip_avg_rx_kbit_persec" "$rx_avg" "PCoIP Average RX kbits per second."
        else
            # Output default metrics when no data found in logs
            output_metric "node_teradici_pcoip_tx_bw_limit_kbit_persec" "0" "PCoIP Network TX Limit."
            output_metric "node_teradici_pcoip_avg_tx_kbit_persec" "0" "PCoIP Average TX kbits per second."
            output_metric "node_teradici_pcoip_avg_rx_kbit_persec" "0" "PCoIP Average RX kbits per second."
        fi

        # Process round-trip time (latency) metrics
        if [[ -n "$rtt_stats" ]]; then
            # Extract RTT latency (1st field) and variance (5th field)
            local rtt_latency="$(echo "$rtt_stats" | $AWK_PATH '{print $1}')"
            local rtt_variance="$(echo "$rtt_stats" | $AWK_PATH '{print $5}')"
            local rtt_rto="$(echo "$rtt_stats" | $AWK_PATH '{print $7}')"
            local rtt_last="$(echo "$rtt_stats" | $AWK_PATH '{print $10}')"
            local rtt_min="$(echo "$rtt_stats" | $AWK_PATH '{print $12}')"
            local rtt_max="$(echo "$rtt_stats" | $AWK_PATH '{print $13}')"
            
            # Validate RTT values
            rtt_latency="$(validate_metric_value "$rtt_latency")"
            rtt_variance="$(validate_metric_value "$rtt_variance")"
            rtt_variance_max="$(validate_metric_value "$rtt_variance_max")"
            rtt_rto="$(validate_metric_value "$rtt_rto")"
            rtt_last="$(validate_metric_value "$rtt_last")"
            rtt_min="$(validate_metric_value "$rtt_min")"
            rtt_max="$(validate_metric_value "$rtt_max")"

            # Output network latency metrics
            output_metric "node_teradici_pcoip_round_trip_latency_ms" "$rtt_latency" "PCoIP Round Trip Latency."
            output_metric "node_teradici_pcoip_round_trip_variance_ms" "$rtt_variance" "PCoIP Round Trip Variance."
            output_metric "node_teradici_pcoip_round_trip_rto_ms" "$rtt_rto" "PCoIP Round Trip RTO."
            output_metric "node_teradici_pcoip_round_trip_last_ms" "$rtt_last" "PCoIP Round Trip Last."
            output_metric "node_teradici_pcoip_round_trip_min_ms" "$rtt_min" "PCoIP Round Trip Min."
            output_metric "node_teradici_pcoip_round_trip_max_ms" "$rtt_max" "PCoIP Round Trip Max."
        else
            # Output default metrics when no data found in logs
            output_metric "node_teradici_pcoip_round_trip_latency_ms" "0" "PCoIP Round Trip Latency."
            output_metric "node_teradici_pcoip_round_trip_variance_ms" "0" "PCoIP Round Trip Variance."
            output_metric "node_teradici_pcoip_round_trip_rto_ms" "0" "PCoIP Round Trip RTO."
            output_metric "node_teradici_pcoip_round_trip_last_ms" "0" "PCoIP Round Trip Last."
            output_metric "node_teradici_pcoip_round_trip_min_ms" "0" "PCoIP Round Trip Min."
            output_metric "node_teradici_pcoip_round_trip_max_ms" "0" "PCoIP Round Trip Max."
        fi
    }
}

# Function to collect VGM (Virtual Graphics Management) packet loss metrics
# Extracts detailed packet loss statistics for different packet types (Audio, Image, Other)
collect_vgm_metrics() {
    {
        local log_pattern="$1"
        local default_values="${2:-false}"
        
        if [[ "$default_values" == "true" ]]; then
            # Output default metrics when logs are not available
            output_metric "node_teradici_pcoip_rx_packet_loss_percent" "0" "PCoIP RX Packet loss Percent."
            output_metric "node_teradici_pcoip_tx_packet_loss_percent" "0" "PCoIP TX Packet loss Percent."
            output_metric "node_teradici_pcoip_vgm_rx_packet_loss_a" "0" "PCoIP VGM RX Packet loss A."
            output_metric "node_teradici_pcoip_vgm_rx_packet_loss_i" "0" "PCoIP VGM RX Packet loss I."
            output_metric "node_teradici_pcoip_vgm_rx_packet_loss_o" "0" "PCoIP VGM RX Packet loss O."
            output_metric "node_teradici_pcoip_vgm_tx_packet_loss_a" "0" "PCoIP VGM TX Packet loss A."
            output_metric "node_teradici_pcoip_vgm_tx_packet_loss_i" "0" "PCoIP VGM TX Packet loss I."
            output_metric "node_teradici_pcoip_vgm_tx_packet_loss_o" "0" "PCoIP VGM TX Packet loss O."
            return
        fi
        
        # Extract VGM statistics from VGMAC log entries
        local vgm_stats="$($GREP_PATH VGMAC "$log_pattern" | $SED_PATH 's/.*Stat: =[\t ]*//;s/,//g' | $TAIL_PATH -n 1)"

        if [[ -n "$vgm_stats" ]]; then
            # Extract basic packet loss percentages (convert "/" to space for parsing)
            local pkt_loss="$(echo "$vgm_stats" | $SED_PATH 's/.*Loss=[\t ]*//;s/,//g' | $AWK_PATH '{gsub(/\//," ",$1)}1 ')"
            # Extract detailed RX loss statistics
            local vgm_loss_rx="$(echo "$vgm_stats" | $SED_PATH 's/.*R=[\t ]*//;s/,//g')"
            # Extract detailed TX loss statistics
            local vgm_loss_tx="$(echo "$vgm_stats" | $SED_PATH 's/.*T=[\t ]*//;s/,//g')"

            # Process basic packet loss percentages
            if [[ -n "$pkt_loss" ]]; then
                # Split packet loss string by % symbol to get RX and TX percentages
                local rx_pkt="$(echo "$pkt_loss" | $AWK_PATH '{split($0,a,"%")} {print a[1]}')"
                local tx_pkt="$(echo "$pkt_loss" | $AWK_PATH '{split($0,a,"%")} {print a[2]}')"

                rx_pkt="$(validate_metric_value "$rx_pkt")"
                tx_pkt="$(validate_metric_value "$tx_pkt")"

                # Output basic packet loss metrics
                output_metric "node_teradici_pcoip_rx_packet_loss_percent" "$rx_pkt" "PCoIP RX Packet loss Percent."
                output_metric "node_teradici_pcoip_tx_packet_loss_percent" "$tx_pkt" "PCoIP TX Packet loss Percent."
            else
                # Output default metrics when no packet loss data found
                output_metric "node_teradici_pcoip_rx_packet_loss_percent" "0" "PCoIP RX Packet loss Percent."
                output_metric "node_teradici_pcoip_tx_packet_loss_percent" "0" "PCoIP TX Packet loss Percent."
            fi

            # Process detailed VGM RX packet loss by type (A=Audio, I=Image, O=Other)
            if [[ -n "$vgm_loss_rx" ]]; then
                # Extract first 20 characters and split by "/" to get A/I/O values
                local vgm_rx_a="$(echo "$vgm_loss_rx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[1]}')"
                local vgm_rx_i="$(echo "$vgm_loss_rx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[2]}')"
                local vgm_rx_o="$(echo "$vgm_loss_rx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[3]}')"

                vgm_rx_a="$(validate_metric_value "$vgm_rx_a")"
                vgm_rx_i="$(validate_metric_value "$vgm_rx_i")"
                vgm_rx_o="$(validate_metric_value "$vgm_rx_o")"

                # Output detailed RX packet loss metrics by type
                output_metric "node_teradici_pcoip_vgm_rx_packet_loss_a" "$vgm_rx_a" "PCoIP VGM RX Packet loss A."
                output_metric "node_teradici_pcoip_vgm_rx_packet_loss_i" "$vgm_rx_i" "PCoIP VGM RX Packet loss I."
                output_metric "node_teradici_pcoip_vgm_rx_packet_loss_o" "$vgm_rx_o" "PCoIP VGM RX Packet loss O."
            else
                # Output default metrics when no RX data found
                output_metric "node_teradici_pcoip_vgm_rx_packet_loss_a" "0" "PCoIP VGM RX Packet loss A."
                output_metric "node_teradici_pcoip_vgm_rx_packet_loss_i" "0" "PCoIP VGM RX Packet loss I."
                output_metric "node_teradici_pcoip_vgm_rx_packet_loss_o" "0" "PCoIP VGM RX Packet loss O."
            fi

            # Process detailed VGM TX packet loss by type (A=Audio, I=Image, O=Other)
            if [[ -n "$vgm_loss_tx" ]]; then
                # Extract first 20 characters and split by "/" to get A/I/O values
                local vgm_tx_a="$(echo "$vgm_loss_tx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[1]}')"
                local vgm_tx_i="$(echo "$vgm_loss_tx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[2]}')"
                local vgm_tx_o="$(echo "$vgm_loss_tx" | $CUT_PATH -b 1-20 | $AWK_PATH '{split($0,a,"/")} {print a[3]}')"

                vgm_tx_a="$(validate_metric_value "$vgm_tx_a")"
                vgm_tx_i="$(validate_metric_value "$vgm_tx_i")"
                vgm_tx_o="$(validate_metric_value "$vgm_tx_o")"

                # Output detailed TX packet loss metrics by type
                output_metric "node_teradici_pcoip_vgm_tx_packet_loss_a" "$vgm_tx_a" "PCoIP VGM TX Packet loss A."
                output_metric "node_teradici_pcoip_vgm_tx_packet_loss_i" "$vgm_tx_i" "PCoIP VGM TX Packet loss I."
                output_metric "node_teradici_pcoip_vgm_tx_packet_loss_o" "$vgm_tx_o" "PCoIP VGM TX Packet loss O."
            else
                # Output default metrics when no TX data found
                output_metric "node_teradici_pcoip_vgm_tx_packet_loss_a" "0" "PCoIP VGM TX Packet loss A."
                output_metric "node_teradici_pcoip_vgm_tx_packet_loss_i" "0" "PCoIP VGM TX Packet loss I."
                output_metric "node_teradici_pcoip_vgm_tx_packet_loss_o" "0" "PCoIP VGM TX Packet loss O."
            fi
        else
            # Output default metrics when no VGM data found in logs
            output_metric "node_teradici_pcoip_rx_packet_loss_percent" "0" "PCoIP RX Packet loss Percent."
            output_metric "node_teradici_pcoip_tx_packet_loss_percent" "0" "PCoIP TX Packet loss Percent."
            output_metric "node_teradici_pcoip_vgm_rx_packet_loss_a" "0" "PCoIP VGM RX Packet loss A."
            output_metric "node_teradici_pcoip_vgm_rx_packet_loss_i" "0" "PCoIP VGM RX Packet loss I."
            output_metric "node_teradici_pcoip_vgm_rx_packet_loss_o" "0" "PCoIP VGM RX Packet loss O."
            output_metric "node_teradici_pcoip_vgm_tx_packet_loss_a" "0" "PCoIP VGM TX Packet loss A."
            output_metric "node_teradici_pcoip_vgm_tx_packet_loss_i" "0" "PCoIP VGM TX Packet loss I."
            output_metric "node_teradici_pcoip_vgm_tx_packet_loss_o" "0" "PCoIP VGM TX Packet loss O."
        fi
    }
}

#### Process all PCoIP metrics ####
# Main function to orchestrate the collection of all PCoIP metrics
# Validates log files exist, then calls individual metric collection functions
process_pcoip_metrics() {
    {
        # Enable nullglob to handle empty glob matches safely
        # This prevents the glob pattern from expanding to itself if no files match
        shopt -s nullglob
        local log_files=("$pcoip_log"/server*.log)
        
        # If no server*.log files found, try looking for any server* files without .log extension
        if [[ ${#log_files[@]} -eq 0 ]]; then
            log_files=("$pcoip_log"/server*)
        fi
        shopt -u nullglob

        # Check if PCoIP log files exist
        if [[ ${#log_files[@]} -eq 0 ]] || ! validate_log_file "$pcoip_log" "server*" "true"; then
            # Silently handle missing logs to prevent warning in metrics file
            
            # Output status metric indicating PCoIP service is unavailable
            output_metric "node_teradici_pcoip_status" "0" "PCoIP service status (0=unavailable, 1=active)"
            
            # Collect all metrics with default values (0) when logs are not available
            collect_memory_metrics "" "true"
            collect_image_quality_metrics "" "true"
            collect_bandwidth_metrics "" "true"
            collect_network_metrics "" "true"
            collect_vgm_metrics "" "true"
            
            return 0  # Return success since we've output all metrics
        fi

        # Use the first (most recent) log file found
        local log_pattern="${log_files[0]}"
        
        log "INFO" "Processing PCoIP metrics from $log_pattern"

        # Output status metric indicating PCoIP service is active and logs are available
        output_metric "node_teradici_pcoip_status" "1" "PCoIP service status (0=unavailable, 1=active)"

        # Collect all metrics by calling individual collection functions
        collect_memory_metrics "$log_pattern"
        collect_image_quality_metrics "$log_pattern"
        collect_bandwidth_metrics "$log_pattern"
        collect_network_metrics "$log_pattern"
        collect_vgm_metrics "$log_pattern"
    }
}

#####################
#### PCoIP Stats ####
#####################

# Main execution: Process all PCoIP metrics
process_pcoip_metrics

# Calculate and output script execution time as a metric
# This helps monitor script performance and detect issues
SCRIPT_END_TIME=$(date +%s)
SCRIPT_RUNTIME=$((SCRIPT_END_TIME - SCRIPT_START_TIME))
output_metric "node_teradici_pcoip_script_runtime_seconds" "$SCRIPT_RUNTIME" "PCoIP metrics script execution time in seconds"