Files
linux-scripts/postfix-metrics.sh
T
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

1078 lines
50 KiB
Bash
Executable File

#!/bin/bash
################################################################################
# Script Name: postfix-metrics.sh
# Version: 1.01
# Description: Prometheus exporter for Postfix mail server metrics
#
# Usage:
# # Output to stdout
# ./postfix-metrics.sh
#
# # Textfile collector mode (atomic write)
# ./postfix-metrics.sh --textfile
#
# # Custom output file
# ./postfix-metrics.sh -o /path/to/metrics.prom
#
################################################################################
# ============================================================================
# CONFIGURATION VARIABLES
# ============================================================================
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9192
QUEUE_DIR="/var/spool/postfix"
LOG_FILE="/var/log/mail.log"
HOSTNAME=$(hostname)
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Export Postfix statistics as Prometheus metrics.
MODES:
--textfile Write to node_exporter textfile collector
(writes to $TEXTFILE_DIR/postfix-metrics.prom)
--http Run HTTP server on port $HTTP_PORT
OPTIONS:
-p, --port HTTP port (default: $HTTP_PORT)
-o, --output Output file path (for custom locations)
-h, --help Show this help message
EXAMPLES:
$0 # Output to stdout
$0 --textfile # Write to textfile collector
$0 --http # Run HTTP server on port $HTTP_PORT
$0 --http -p 9192 # Run HTTP server on custom port
$0 -o /tmp/postfix.prom # Write to custom file
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/postfix.prom"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
}
# Helper function to count grep matches (returns 0 if no match)
grep_count() {
local result
result=$(grep -c "$@" 2>/dev/null) || result=0
echo "$result"
}
# ============================================================================
# METRIC GENERATION
# ============================================================================
generate_metrics() {
local START_TIME
START_TIME=$(date +%s.%N)
# Queue sizes
echo "# HELP postfix_queue_size Number of messages in each Postfix queue"
echo "# TYPE postfix_queue_size gauge"
for queue in incoming active deferred hold corrupt; do
count=$(find "${QUEUE_DIR}/${queue}" -type f 2>/dev/null | wc -l)
echo "postfix_queue_size{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Oldest message in queue (seconds)
echo ""
echo "# HELP postfix_queue_oldest_seconds Age of oldest message in queue"
echo "# TYPE postfix_queue_oldest_seconds gauge"
for queue in deferred hold; do
oldest=$(find "${QUEUE_DIR}/${queue}" -type f -printf '%T@\n' 2>/dev/null | sort -n | head -1)
if [[ -n "$oldest" ]]; then
age=$(echo "$(date +%s) - ${oldest%.*}" | bc)
else
age=0
fi
echo "postfix_queue_oldest_seconds{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${age}"
done
# Message counters by status
echo ""
echo "# HELP postfix_messages_total Total messages by status"
echo "# TYPE postfix_messages_total counter"
for status in sent bounced deferred expired; do
count=$(grep_count "status=${status}" "$LOG_FILE")
echo "postfix_messages_total{status=\"${status}\",hostname=\"${HOSTNAME}\"} ${count}"
done
rejected=$(grep_count 'reject:' "$LOG_FILE")
echo "postfix_messages_total{status=\"rejected\",hostname=\"${HOSTNAME}\"} ${rejected}"
# SMTP connections
echo ""
echo "# HELP postfix_smtp_connections SMTP connection stats"
echo "# TYPE postfix_smtp_connections counter"
connections=$(grep_count 'connect from' "$LOG_FILE")
disconnections=$(grep_count 'disconnect from' "$LOG_FILE")
echo "postfix_smtp_connections{type=\"connect\",hostname=\"${HOSTNAME}\"} ${connections}"
echo "postfix_smtp_connections{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${disconnections}"
# Connection timeouts
echo ""
echo "# HELP postfix_timeout_total Connection timeout events"
echo "# TYPE postfix_timeout_total counter"
timeout_count=$(grep_count 'timeout after' "$LOG_FILE")
echo "postfix_timeout_total{hostname=\"${HOSTNAME}\"} ${timeout_count}"
# SASL authentication
echo ""
echo "# HELP postfix_sasl_auth_total SASL authentication attempts"
echo "# TYPE postfix_sasl_auth_total counter"
sasl_success=$(grep_count 'sasl_username=' "$LOG_FILE")
sasl_fail=$(grep_count 'authentication failed' "$LOG_FILE")
echo "postfix_sasl_auth_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${sasl_success}"
echo "postfix_sasl_auth_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${sasl_fail}"
# Message sizes (bytes)
echo ""
echo "# HELP postfix_message_size_bytes_total Total bytes of messages processed"
echo "# TYPE postfix_message_size_bytes_total counter"
total_bytes=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum+0}')
echo "postfix_message_size_bytes_total{hostname=\"${HOSTNAME}\"} ${total_bytes}"
echo ""
echo "# HELP postfix_message_size_bytes_avg Average message size"
echo "# TYPE postfix_message_size_bytes_avg gauge"
avg_size=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) print int(sum/count); else print 0}')
echo "postfix_message_size_bytes_avg{hostname=\"${HOSTNAME}\"} ${avg_size}"
echo ""
echo "# HELP postfix_message_size_bytes_max Largest message size"
echo "# TYPE postfix_message_size_bytes_max gauge"
max_size=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
echo "postfix_message_size_bytes_max{hostname=\"${HOSTNAME}\"} ${max_size:-0}"
# Per-recipient domain stats (top domains)
echo ""
echo "# HELP postfix_recipient_domain_total Messages per recipient domain"
echo "# TYPE postfix_recipient_domain_total counter"
grep -oP 'to=<[^@]+@\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count domain; do
echo "postfix_recipient_domain_total{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Sender domain stats
echo ""
echo "# HELP postfix_sender_domain_total Messages per sender domain"
echo "# TYPE postfix_sender_domain_total counter"
grep -oP 'from=<[^@]+@\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count domain; do
echo "postfix_sender_domain_total{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Bounce reasons
echo ""
echo "# HELP postfix_bounce_reason_total Bounces by reason"
echo "# TYPE postfix_bounce_reason_total counter"
bounce_user=$(grep_count 'User unknown' "$LOG_FILE")
bounce_quota=$(grep_count -i 'over quota\|mailbox full' "$LOG_FILE")
bounce_spam=$(grep_count -i 'blocked\|spam\|blacklist' "$LOG_FILE")
bounce_dns=$(grep_count 'Host or domain name not found' "$LOG_FILE")
bounce_refused=$(grep_count 'Connection refused' "$LOG_FILE")
echo "postfix_bounce_reason_total{reason=\"user_unknown\",hostname=\"${HOSTNAME}\"} ${bounce_user}"
echo "postfix_bounce_reason_total{reason=\"over_quota\",hostname=\"${HOSTNAME}\"} ${bounce_quota}"
echo "postfix_bounce_reason_total{reason=\"spam_blocked\",hostname=\"${HOSTNAME}\"} ${bounce_spam}"
echo "postfix_bounce_reason_total{reason=\"dns_error\",hostname=\"${HOSTNAME}\"} ${bounce_dns}"
echo "postfix_bounce_reason_total{reason=\"connection_refused\",hostname=\"${HOSTNAME}\"} ${bounce_refused}"
# Relay stats
echo ""
echo "# HELP postfix_relay_total Messages by relay"
echo "# TYPE postfix_relay_total counter"
grep -oP 'relay=\K[^,\[]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | while read -r count relay; do
echo "postfix_relay_total{relay=\"${relay}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Client connections (top IPs)
echo ""
echo "# HELP postfix_client_connections_total Connections per client IP"
echo "# TYPE postfix_client_connections_total counter"
grep -oP 'connect from \S+\[\K[^\]]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | while read -r count ip; do
echo "postfix_client_connections_total{client_ip=\"${ip}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# TLS stats
echo ""
echo "# HELP postfix_tls_connections_total TLS connection statistics"
echo "# TYPE postfix_tls_connections_total counter"
tls_in=$(grep_count 'Anonymous TLS connection established from' "$LOG_FILE")
tls_out=$(grep_count 'Anonymous TLS connection established to' "$LOG_FILE")
verified_in=$(grep_count 'Trusted TLS connection established from' "$LOG_FILE")
verified_out=$(grep_count 'Trusted TLS connection established to' "$LOG_FILE")
untrusted_in=$(grep_count 'Untrusted TLS connection established from' "$LOG_FILE")
untrusted_out=$(grep_count 'Untrusted TLS connection established to' "$LOG_FILE")
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"anonymous\",hostname=\"${HOSTNAME}\"} ${tls_in}"
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"anonymous\",hostname=\"${HOSTNAME}\"} ${tls_out}"
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"trusted\",hostname=\"${HOSTNAME}\"} ${verified_in}"
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"trusted\",hostname=\"${HOSTNAME}\"} ${verified_out}"
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"untrusted\",hostname=\"${HOSTNAME}\"} ${untrusted_in}"
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"untrusted\",hostname=\"${HOSTNAME}\"} ${untrusted_out}"
# TLS protocol versions
echo ""
echo "# HELP postfix_tls_protocol_total TLS protocol version usage"
echo "# TYPE postfix_tls_protocol_total counter"
for proto in TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; do
count=$(grep_count "${proto} with cipher" "$LOG_FILE")
echo "postfix_tls_protocol_total{protocol=\"${proto}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Delay stats (queue time)
echo ""
echo "# HELP postfix_delay_seconds_total Total delay time in seconds"
echo "# TYPE postfix_delay_seconds_total counter"
total_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum+0}')
echo "postfix_delay_seconds_total{hostname=\"${HOSTNAME}\"} ${total_delay}"
echo ""
echo "# HELP postfix_delay_seconds_avg Average delivery delay"
echo "# TYPE postfix_delay_seconds_avg gauge"
avg_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
echo "postfix_delay_seconds_avg{hostname=\"${HOSTNAME}\"} ${avg_delay}"
echo ""
echo "# HELP postfix_delay_seconds_max Maximum delivery delay"
echo "# TYPE postfix_delay_seconds_max gauge"
max_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
echo "postfix_delay_seconds_max{hostname=\"${HOSTNAME}\"} ${max_delay:-0}"
# Postfix process count
echo ""
echo "# HELP postfix_processes Number of running postfix processes"
echo "# TYPE postfix_processes gauge"
proc_count=$(pgrep -c -f "postfix" 2>/dev/null) || proc_count=0
echo "postfix_processes{hostname=\"${HOSTNAME}\"} ${proc_count}"
# Mail loop detection
echo ""
echo "# HELP postfix_mail_loop_total Detected mail loops"
echo "# TYPE postfix_mail_loop_total counter"
loops=$(grep_count 'mail forwarding loop' "$LOG_FILE")
echo "postfix_mail_loop_total{hostname=\"${HOSTNAME}\"} ${loops}"
# Service status
echo ""
echo "# HELP postfix_up Postfix service status (1=running, 0=stopped)"
echo "# TYPE postfix_up gauge"
if postfix status &>/dev/null || systemctl is-active postfix &>/dev/null; then
echo "postfix_up{hostname=\"${HOSTNAME}\"} 1"
else
echo "postfix_up{hostname=\"${HOSTNAME}\"} 0"
fi
# Queue age distribution (messages by age bucket)
echo ""
echo "# HELP postfix_queue_age_bucket Messages in deferred queue by age"
echo "# TYPE postfix_queue_age_bucket gauge"
now=$(date +%s)
for mins in 5 15 60 360 1440; do
count=$(find "${QUEUE_DIR}/deferred" -type f -mmin +${mins} 2>/dev/null | wc -l)
echo "postfix_queue_age_bucket{le=\"${mins}m\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Delivery attempts (retries)
echo ""
echo "# HELP postfix_delivery_attempts_total Delivery attempts by result"
echo "# TYPE postfix_delivery_attempts_total counter"
first_attempt=$(grep_count 'delay=.*delays=0/' "$LOG_FILE")
retry_attempt=$(grep -c 'status=deferred.*will be retried' "$LOG_FILE" 2>/dev/null) || retry_attempt=0
echo "postfix_delivery_attempts_total{type=\"first\",hostname=\"${HOSTNAME}\"} ${first_attempt}"
echo "postfix_delivery_attempts_total{type=\"retry\",hostname=\"${HOSTNAME}\"} ${retry_attempt}"
# DSN status codes breakdown
echo ""
echo "# HELP postfix_dsn_total Delivery Status Notification codes"
echo "# TYPE postfix_dsn_total counter"
for dsn in "2.0.0" "4.7.1" "5.1.1" "5.1.2" "5.2.1" "5.2.2" "5.4.1" "5.7.1"; do
count=$(grep_count "dsn=${dsn}" "$LOG_FILE")
echo "postfix_dsn_total{code=\"${dsn}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Delay breakdown by phase
echo ""
echo "# HELP postfix_delay_phase_seconds_total Delay time by phase"
echo "# TYPE postfix_delay_phase_seconds_total counter"
grep -oP 'delays=\K[\d.]+/[\d.]+/[\d.]+/[\d.]+' "$LOG_FILE" 2>/dev/null | awk -F'/' '{
before_qmgr+=$1; in_qmgr+=$2; conn_setup+=$3; transmission+=$4
} END {
print "before_qmgr " before_qmgr+0
print "in_qmgr " in_qmgr+0
print "conn_setup " conn_setup+0
print "transmission " transmission+0
}' | while read -r phase total; do
echo "postfix_delay_phase_seconds_total{phase=\"${phase}\",hostname=\"${HOSTNAME}\"} ${total}"
done
# RBL rejections (per blocklist)
echo ""
echo "# HELP postfix_rbl_reject_total Rejections by RBL"
echo "# TYPE postfix_rbl_reject_total counter"
for rbl in "zen.spamhaus.org" "bl.spamcop.net" "b.barracudacentral.org" "dnsbl.sorbs.net"; do
count=$(grep_count "${rbl}" "$LOG_FILE")
echo "postfix_rbl_reject_total{rbl=\"${rbl}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Invalid HELO/EHLO attempts
echo ""
echo "# HELP postfix_helo_invalid_total Invalid HELO/EHLO attempts"
echo "# TYPE postfix_helo_invalid_total counter"
helo_invalid=$(grep_count 'Helo command rejected' "$LOG_FILE")
echo "postfix_helo_invalid_total{hostname=\"${HOSTNAME}\"} ${helo_invalid}"
# Anvil rate limiting
echo ""
echo "# HELP postfix_rate_limited_total Anvil rate limit events"
echo "# TYPE postfix_rate_limited_total counter"
rate_conn=$(grep_count 'anvil.*connection rate' "$LOG_FILE")
rate_msg=$(grep_count 'anvil.*message rate' "$LOG_FILE")
rate_rcpt=$(grep_count 'anvil.*recipient rate' "$LOG_FILE")
echo "postfix_rate_limited_total{type=\"connection\",hostname=\"${HOSTNAME}\"} ${rate_conn}"
echo "postfix_rate_limited_total{type=\"message\",hostname=\"${HOSTNAME}\"} ${rate_msg}"
echo "postfix_rate_limited_total{type=\"recipient\",hostname=\"${HOSTNAME}\"} ${rate_rcpt}"
# Milter/content filter rejections
echo ""
echo "# HELP postfix_milter_reject_total Milter rejection events"
echo "# TYPE postfix_milter_reject_total counter"
milter_reject=$(grep_count 'milter-reject' "$LOG_FILE")
echo "postfix_milter_reject_total{hostname=\"${HOSTNAME}\"} ${milter_reject}"
# Header/body checks rejections
echo ""
echo "# HELP postfix_header_checks_reject_total Header/body check rejections"
echo "# TYPE postfix_header_checks_reject_total counter"
header_reject=$(grep_count 'header_checks:' "$LOG_FILE")
body_reject=$(grep_count 'body_checks:' "$LOG_FILE")
echo "postfix_header_checks_reject_total{type=\"header\",hostname=\"${HOSTNAME}\"} ${header_reject}"
echo "postfix_header_checks_reject_total{type=\"body\",hostname=\"${HOSTNAME}\"} ${body_reject}"
# Policy daemon deferrals
echo ""
echo "# HELP postfix_policyd_total Policy daemon events"
echo "# TYPE postfix_policyd_total counter"
policyd_defer=$(grep_count 'policy.*DEFER' "$LOG_FILE")
policyd_reject=$(grep_count 'policy.*REJECT' "$LOG_FILE")
echo "postfix_policyd_total{action=\"defer\",hostname=\"${HOSTNAME}\"} ${policyd_defer}"
echo "postfix_policyd_total{action=\"reject\",hostname=\"${HOSTNAME}\"} ${policyd_reject}"
# DKIM signing (if OpenDKIM is used)
echo ""
echo "# HELP postfix_dkim_total DKIM signing/verification results"
echo "# TYPE postfix_dkim_total counter"
dkim_signed=$(grep_count 'DKIM-Signature field added' "$LOG_FILE")
dkim_pass=$(grep_count 'dkim=pass' "$LOG_FILE")
dkim_fail=$(grep_count 'dkim=fail' "$LOG_FILE")
echo "postfix_dkim_total{action=\"signed\",hostname=\"${HOSTNAME}\"} ${dkim_signed}"
echo "postfix_dkim_total{result=\"pass\",hostname=\"${HOSTNAME}\"} ${dkim_pass}"
echo "postfix_dkim_total{result=\"fail\",hostname=\"${HOSTNAME}\"} ${dkim_fail}"
# SPF results
echo ""
echo "# HELP postfix_spf_total SPF check results"
echo "# TYPE postfix_spf_total counter"
for result in pass fail softfail neutral none permerror temperror; do
count=$(grep_count -i "spf=${result}\|SPF: ${result}" "$LOG_FILE")
echo "postfix_spf_total{result=\"${result}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# DMARC results (if OpenDMARC is used)
# OpenDMARC logs: "opendmarc[PID]: QUEUEID: domain.com pass/fail/none"
echo ""
echo "# HELP postfix_dmarc_total DMARC check results"
echo "# TYPE postfix_dmarc_total counter"
for result in pass fail none; do
count=$(grep -cE "opendmarc\[.*\]: [A-F0-9]+: [^ ]+ ${result}$" "$LOG_FILE" 2>/dev/null) || count=0
echo "postfix_dmarc_total{result=\"${result}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Hourly volume (traffic patterns)
echo ""
echo "# HELP postfix_hourly_volume Messages processed per hour"
echo "# TYPE postfix_hourly_volume gauge"
current_date=$(date +%b" "%d)
for hour in $(seq -w 0 23); do
count=$(grep_count "^${current_date} ${hour}:" "$LOG_FILE" | grep -c 'status=sent' 2>/dev/null) || count=0
count=$(grep "^${current_date} ${hour}:" "$LOG_FILE" 2>/dev/null | grep -c 'status=sent') || count=0
echo "postfix_hourly_volume{hour=\"${hour}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Recent throughput (last 5/15/60 minutes)
echo ""
echo "# HELP postfix_messages_recent Messages sent in recent time windows"
echo "# TYPE postfix_messages_recent gauge"
for mins in 5 15 60; do
since=$(date -d "${mins} minutes ago" '+%b %d %H:%M' 2>/dev/null) || since=""
if [[ -n "$since" ]]; then
count=$(awk -v since="$since" '$0 >= since && /status=sent/' "$LOG_FILE" 2>/dev/null | wc -l)
else
count=0
fi
echo "postfix_messages_recent{window=\"${mins}m\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Active SMTP sessions estimate
echo ""
echo "# HELP postfix_smtp_sessions_active Estimated active SMTP sessions"
echo "# TYPE postfix_smtp_sessions_active gauge"
smtp_procs=$(pgrep -c -x smtp 2>/dev/null) || smtp_procs=0
smtpd_procs=$(pgrep -c -x smtpd 2>/dev/null) || smtpd_procs=0
echo "postfix_smtp_sessions_active{type=\"outbound\",hostname=\"${HOSTNAME}\"} ${smtp_procs}"
echo "postfix_smtp_sessions_active{type=\"inbound\",hostname=\"${HOSTNAME}\"} ${smtpd_procs}"
# Qmgr active recipients
echo ""
echo "# HELP postfix_qmgr_recipients Active recipients in queue manager"
echo "# TYPE postfix_qmgr_recipients gauge"
active_recipients=$(find "${QUEUE_DIR}/active" -type f -exec cat {} \; 2>/dev/null | wc -l) || active_recipients=0
echo "postfix_qmgr_recipients{hostname=\"${HOSTNAME}\"} ${active_recipients}"
# Estimated queue memory usage (based on file sizes)
echo ""
echo "# HELP postfix_queue_size_bytes Total size of queue files in bytes"
echo "# TYPE postfix_queue_size_bytes gauge"
for queue in incoming active deferred hold; do
size=$(du -sb "${QUEUE_DIR}/${queue}" 2>/dev/null | cut -f1)
size=${size:-0}
echo "postfix_queue_size_bytes{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${size}"
done
# Warnings and fatal errors
echo ""
echo "# HELP postfix_log_events_total Log events by severity"
echo "# TYPE postfix_log_events_total counter"
warnings=$(grep_count 'warning:' "$LOG_FILE")
fatals=$(grep_count 'fatal:' "$LOG_FILE")
panics=$(grep_count 'panic:' "$LOG_FILE")
echo "postfix_log_events_total{level=\"warning\",hostname=\"${HOSTNAME}\"} ${warnings}"
echo "postfix_log_events_total{level=\"fatal\",hostname=\"${HOSTNAME}\"} ${fatals}"
echo "postfix_log_events_total{level=\"panic\",hostname=\"${HOSTNAME}\"} ${panics}"
# SMTP response codes
echo ""
echo "# HELP postfix_smtp_response_total SMTP response codes"
echo "# TYPE postfix_smtp_response_total counter"
smtp_2xx=$(grep_count 'status=sent' "$LOG_FILE")
smtp_4xx=$(grep_count 'status=deferred' "$LOG_FILE")
smtp_5xx=$(grep_count 'status=bounced' "$LOG_FILE")
echo "postfix_smtp_response_total{code=\"2xx\",hostname=\"${HOSTNAME}\"} ${smtp_2xx}"
echo "postfix_smtp_response_total{code=\"4xx\",hostname=\"${HOSTNAME}\"} ${smtp_4xx}"
echo "postfix_smtp_response_total{code=\"5xx\",hostname=\"${HOSTNAME}\"} ${smtp_5xx}"
# Specific SMTP error codes (check multiple patterns)
# Postfix logs SMTP errors in various formats:
# - "said: 550 5.1.1 User unknown"
# - "status=bounced (host ... said: 550 ...)"
# - "dsn=5.1.1" (DSN codes start with same digit)
# - Remote server responses with just the code
echo ""
echo "# HELP postfix_smtp_error_code_total Specific SMTP error codes"
echo "# TYPE postfix_smtp_error_code_total counter"
for code in 421 450 451 452 500 501 502 503 504 550 551 552 553 554; do
# Multiple patterns: "said: 550", "(550 ", "smtp.*550", host responses
count=$(grep -cE "(said: ${code}|said:${code}|\(${code} |host .*\[.*\].*${code} |smtp.*${code}[^0-9])" "$LOG_FILE" 2>/dev/null) || count=0
echo "postfix_smtp_error_code_total{code=\"${code}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# TLS cipher suites (top 10)
# Requires smtpd_tls_loglevel=1 and smtp_tls_loglevel=1 in main.cf
# Postfix logs: "TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)"
echo ""
echo "# HELP postfix_tls_cipher_total TLS cipher suite usage"
echo "# TYPE postfix_tls_cipher_total counter"
cipher_output=$({
grep -oP 'with cipher \K[A-Za-z0-9_-]+' "$LOG_FILE" 2>/dev/null
grep -oP 'cipher=\K[A-Za-z0-9_-]+' "$LOG_FILE" 2>/dev/null
} | sort | uniq -c | sort -rn | head -10)
if [[ -n "$cipher_output" ]]; then
echo "$cipher_output" | while read -r count cipher; do
[[ -n "$cipher" ]] && echo "postfix_tls_cipher_total{cipher=\"${cipher}\",hostname=\"${HOSTNAME}\"} ${count}"
done
else
echo "postfix_tls_cipher_total{cipher=\"unknown\",hostname=\"${HOSTNAME}\"} 0"
fi
# TLS certificate expiry (check multiple locations)
echo ""
echo "# HELP postfix_cert_expiry_seconds Seconds until TLS certificate expires"
echo "# TYPE postfix_cert_expiry_seconds gauge"
CERT_FILE=""
for cert in "/etc/ssl/certs/postfix.pem" \
"/home/user-data/ssl/ssl_certificate.pem" \
"/etc/letsencrypt/live/$(hostname)/fullchain.pem" \
"/etc/letsencrypt/live/$(hostname -f)/fullchain.pem" \
"/etc/ssl/certs/ssl-cert-snakeoil.pem"; do
if [[ -f "$cert" ]]; then
CERT_FILE="$cert"
break
fi
done
cert_seconds=0
if [[ -n "$CERT_FILE" ]] && command -v openssl &>/dev/null; then
expiry=$(openssl x509 -enddate -noout -in "$CERT_FILE" 2>/dev/null | cut -d= -f2)
if [[ -n "$expiry" ]]; then
expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null) || expiry_epoch=0
now=$(date +%s)
cert_seconds=$((expiry_epoch - now))
fi
fi
echo "postfix_cert_expiry_seconds{hostname=\"${HOSTNAME}\"} ${cert_seconds}"
# LMTP delivery metrics (Postfix side)
# Matches: "postfix/lmtp[PID]: ... status=sent"
echo ""
echo "# HELP postfix_lmtp_delivery_total LMTP delivery stats"
echo "# TYPE postfix_lmtp_delivery_total counter"
lmtp_sent=$(grep_count 'postfix/lmtp\[.*status=sent' "$LOG_FILE")
lmtp_deferred=$(grep_count 'postfix/lmtp\[.*status=deferred' "$LOG_FILE")
lmtp_bounced=$(grep_count 'postfix/lmtp\[.*status=bounced' "$LOG_FILE")
echo "postfix_lmtp_delivery_total{status=\"sent\",hostname=\"${HOSTNAME}\"} ${lmtp_sent}"
echo "postfix_lmtp_delivery_total{status=\"deferred\",hostname=\"${HOSTNAME}\"} ${lmtp_deferred}"
echo "postfix_lmtp_delivery_total{status=\"bounced\",hostname=\"${HOSTNAME}\"} ${lmtp_bounced}"
echo ""
echo "# HELP postfix_lmtp_connections_total LMTP connection events"
echo "# TYPE postfix_lmtp_connections_total counter"
lmtp_connect=$(grep_count 'postfix/lmtp\[.*connect' "$LOG_FILE")
lmtp_disconnect=$(grep_count 'postfix/lmtp\[.*disconnect' "$LOG_FILE")
lmtp_timeout=$(grep_count 'postfix/lmtp\[.*timeout' "$LOG_FILE")
lmtp_refused=$(grep_count 'postfix/lmtp\[.*Connection refused' "$LOG_FILE")
echo "postfix_lmtp_connections_total{type=\"connect\",hostname=\"${HOSTNAME}\"} ${lmtp_connect}"
echo "postfix_lmtp_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${lmtp_disconnect}"
echo "postfix_lmtp_connections_total{type=\"timeout\",hostname=\"${HOSTNAME}\"} ${lmtp_timeout}"
echo "postfix_lmtp_connections_total{type=\"refused\",hostname=\"${HOSTNAME}\"} ${lmtp_refused}"
echo ""
echo "# HELP postfix_lmtp_delay_seconds LMTP delivery delay stats"
echo "# TYPE postfix_lmtp_delay_seconds gauge"
lmtp_avg_delay=$(grep 'postfix/lmtp\[' "$LOG_FILE" 2>/dev/null | grep -oP 'delay=\K[\d.]+' | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
lmtp_max_delay=$(grep 'postfix/lmtp\[' "$LOG_FILE" 2>/dev/null | grep -oP 'delay=\K[\d.]+' | sort -rn | head -1)
echo "postfix_lmtp_delay_seconds{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${lmtp_avg_delay}"
echo "postfix_lmtp_delay_seconds{stat=\"max\",hostname=\"${HOSTNAME}\"} ${lmtp_max_delay:-0}"
# Dovecot LMTP/LDA delivery stats (check multiple log locations)
DOVECOT_LOG=""
for log in "/var/log/dovecot.log" "/var/log/mail.log" "/var/log/syslog"; do
if [[ -f "$log" ]] && grep -q 'dovecot' "$log" 2>/dev/null; then
DOVECOT_LOG="$log"
break
fi
done
if [[ -n "$DOVECOT_LOG" ]]; then
echo ""
echo "# HELP postfix_dovecot_delivery_total Dovecot local delivery stats"
echo "# TYPE postfix_dovecot_delivery_total counter"
lmtp_delivered=$(grep_count 'lmtp.*saved mail' "$DOVECOT_LOG")
lda_delivered=$(grep_count 'lda.*saved mail' "$DOVECOT_LOG")
echo "postfix_dovecot_delivery_total{type=\"lmtp\",hostname=\"${HOSTNAME}\"} ${lmtp_delivered}"
echo "postfix_dovecot_delivery_total{type=\"lda\",hostname=\"${HOSTNAME}\"} ${lda_delivered}"
echo ""
echo "# HELP postfix_dovecot_sieve_total Dovecot sieve filter actions"
echo "# TYPE postfix_dovecot_sieve_total counter"
sieve_fileinto=$(grep_count 'sieve.*fileinto' "$DOVECOT_LOG")
sieve_discard=$(grep_count 'sieve.*discard' "$DOVECOT_LOG")
sieve_redirect=$(grep_count 'sieve.*redirect' "$DOVECOT_LOG")
echo "postfix_dovecot_sieve_total{action=\"fileinto\",hostname=\"${HOSTNAME}\"} ${sieve_fileinto}"
echo "postfix_dovecot_sieve_total{action=\"discard\",hostname=\"${HOSTNAME}\"} ${sieve_discard}"
echo "postfix_dovecot_sieve_total{action=\"redirect\",hostname=\"${HOSTNAME}\"} ${sieve_redirect}"
echo ""
echo "# HELP postfix_dovecot_auth_total Dovecot authentication attempts"
echo "# TYPE postfix_dovecot_auth_total counter"
auth_success=$(grep_count 'auth.*successful' "$DOVECOT_LOG")
auth_fail=$(grep_count 'auth.*failed' "$DOVECOT_LOG")
echo "postfix_dovecot_auth_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${auth_success}"
echo "postfix_dovecot_auth_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${auth_fail}"
echo ""
echo "# HELP postfix_dovecot_imap_connections_total Dovecot IMAP connections"
echo "# TYPE postfix_dovecot_imap_connections_total counter"
imap_login=$(grep_count 'imap-login:.*Login' "$DOVECOT_LOG")
imap_disconnect=$(grep_count 'imap.*Disconnected' "$DOVECOT_LOG")
echo "postfix_dovecot_imap_connections_total{type=\"login\",hostname=\"${HOSTNAME}\"} ${imap_login}"
echo "postfix_dovecot_imap_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${imap_disconnect}"
echo ""
echo "# HELP postfix_dovecot_pop3_connections_total Dovecot POP3 connections"
echo "# TYPE postfix_dovecot_pop3_connections_total counter"
pop3_login=$(grep_count 'pop3-login:.*Login' "$DOVECOT_LOG")
pop3_disconnect=$(grep_count 'pop3.*Disconnected' "$DOVECOT_LOG")
echo "postfix_dovecot_pop3_connections_total{type=\"login\",hostname=\"${HOSTNAME}\"} ${pop3_login}"
echo "postfix_dovecot_pop3_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${pop3_disconnect}"
fi
# SpamAssassin metrics (supports spamd, spampd, and amavis)
SPAM_LOG="/var/log/mail.log"
# Detect which spam daemon is in use (check spampd first as it's more specific)
if grep -q 'spampd' "$SPAM_LOG" 2>/dev/null; then
SPAM_DAEMON="spampd"
elif grep -q 'spamd\[' "$SPAM_LOG" 2>/dev/null; then
SPAM_DAEMON="spamd"
elif grep -q 'amavis' "$SPAM_LOG" 2>/dev/null; then
SPAM_DAEMON="amavis"
else
SPAM_DAEMON=""
fi
if [[ -n "$SPAM_DAEMON" ]]; then
echo ""
echo "# HELP postfix_spamassassin_total SpamAssassin scan results"
echo "# TYPE postfix_spamassassin_total counter"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
# spampd format: "clean message <...> (SCORE/THRESHOLD)" or "identified spam <...> (SCORE/THRESHOLD)"
spam_identified=$(grep_count 'spampd.*identified spam' "$SPAM_LOG")
ham_clean=$(grep_count 'spampd.*clean message' "$SPAM_LOG")
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
spam_identified=$(grep_count 'amavis.*Blocked SPAM' "$SPAM_LOG")
ham_clean=$(grep_count 'amavis.*Passed CLEAN' "$SPAM_LOG")
else
spam_identified=$(grep_count 'spamd.*identified spam' "$SPAM_LOG")
ham_clean=$(grep_count 'spamd.*clean message' "$SPAM_LOG")
fi
echo "postfix_spamassassin_total{result=\"spam\",hostname=\"${HOSTNAME}\"} ${spam_identified}"
echo "postfix_spamassassin_total{result=\"ham\",hostname=\"${HOSTNAME}\"} ${ham_clean}"
echo ""
echo "# HELP postfix_spamassassin_score_total SpamAssassin score distribution"
echo "# TYPE postfix_spamassassin_score_total counter"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
# spampd format: (SCORE/THRESHOLD) like (-0.30/5.00) or (15.2/5.0)
score_neg=$(grep -oP 'spampd.*\(\K-[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | wc -l)
score_0_5=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
score_5_10=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
score_10_plus=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
score_neg=$(grep -oP 'amavis.*Hits: \K-[\d.]+' "$SPAM_LOG" 2>/dev/null | wc -l)
score_0_5=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
score_5_10=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
score_10_plus=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
else
score_neg=0
score_0_5=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
score_5_10=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
score_10_plus=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
fi
echo "postfix_spamassassin_score_total{bucket=\"negative\",hostname=\"${HOSTNAME}\"} ${score_neg:-0}"
echo "postfix_spamassassin_score_total{bucket=\"0-5\",hostname=\"${HOSTNAME}\"} ${score_0_5}"
echo "postfix_spamassassin_score_total{bucket=\"5-10\",hostname=\"${HOSTNAME}\"} ${score_5_10}"
echo "postfix_spamassassin_score_total{bucket=\"10+\",hostname=\"${HOSTNAME}\"} ${score_10_plus}"
echo ""
echo "# HELP postfix_spamassassin_score_avg Average SpamAssassin score"
echo "# TYPE postfix_spamassassin_score_avg gauge"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
avg_score=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
avg_score=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
else
avg_score=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
fi
echo "postfix_spamassassin_score_avg{hostname=\"${HOSTNAME}\"} ${avg_score}"
echo ""
echo "# HELP postfix_spamassassin_score_max Maximum SpamAssassin score seen"
echo "# TYPE postfix_spamassassin_score_max gauge"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
max_score=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
max_score=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
else
max_score=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
fi
echo "postfix_spamassassin_score_max{hostname=\"${HOSTNAME}\"} ${max_score:-0}"
# Messages scanned total
echo ""
echo "# HELP postfix_spamassassin_scanned_total Total messages scanned"
echo "# TYPE postfix_spamassassin_scanned_total counter"
scanned_total=$((spam_identified + ham_clean))
echo "postfix_spamassassin_scanned_total{hostname=\"${HOSTNAME}\"} ${scanned_total}"
echo ""
echo "# HELP postfix_spamassassin_scan_time_seconds SpamAssassin scan time stats"
echo "# TYPE postfix_spamassassin_scan_time_seconds gauge"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
# spampd format: "in 2.15s"
avg_time=$(grep -oP 'spampd.* in \K[\d.]+(?=s)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
max_time=$(grep -oP 'spampd.* in \K[\d.]+(?=s)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
else
avg_time=$(grep -oP "${SPAM_DAEMON}.* in \K[\d.]+(?= seconds)" "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
max_time=$(grep -oP "${SPAM_DAEMON}.* in \K[\d.]+(?= seconds)" "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
fi
echo "postfix_spamassassin_scan_time_seconds{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_time:-0}"
echo "postfix_spamassassin_scan_time_seconds{stat=\"max\",hostname=\"${HOSTNAME}\"} ${max_time:-0}"
# spampd-specific: message size stats
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
echo ""
echo "# HELP postfix_spamassassin_message_size_bytes SpamAssassin processed message sizes"
echo "# TYPE postfix_spamassassin_message_size_bytes gauge"
avg_size=$(grep -oP 'spampd.*, \K\d+(?= bytes)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.0f", sum/count; else print 0}')
max_size=$(grep -oP 'spampd.*, \K\d+(?= bytes)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
echo "postfix_spamassassin_message_size_bytes{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_size:-0}"
echo "postfix_spamassassin_message_size_bytes{stat=\"max\",hostname=\"${HOSTNAME}\"} ${max_size:-0}"
echo ""
echo "# HELP postfix_spamassassin_threshold SpamAssassin spam threshold"
echo "# TYPE postfix_spamassassin_threshold gauge"
threshold=$(grep -oP 'spampd.*/-?\K[\d.]+(?=\))' "$SPAM_LOG" 2>/dev/null | head -1)
echo "postfix_spamassassin_threshold{hostname=\"${HOSTNAME}\"} ${threshold:-5}"
fi
# SpamAssassin rules (only available with spamd or if logging to separate file)
# NOTE: spampd (used by Mail-in-a-Box) does NOT log individual rules to mail.log
# Rules are only available if using standalone spamd with verbose logging or a separate log file
SA_RULES_LOG=""
for log in "/var/log/spamassassin.log" "/var/log/spamd.log" "$SPAM_LOG"; do
if [[ -f "$log" ]] && grep -q 'tests=' "$log" 2>/dev/null; then
SA_RULES_LOG="$log"
break
fi
done
if [[ -n "$SA_RULES_LOG" ]]; then
echo ""
echo "# HELP postfix_spamassassin_rules_total Top SpamAssassin rules triggered"
echo "# TYPE postfix_spamassassin_rules_total counter"
grep -oP 'tests=\K[^,\]\s]+' "$SA_RULES_LOG" 2>/dev/null | tr ',' '\n' | tr -d ' ' | sort | uniq -c | sort -rn | head -15 | while read -r count rule; do
[[ -n "$rule" ]] && echo "postfix_spamassassin_rules_total{rule=\"${rule}\",hostname=\"${HOSTNAME}\"} ${count}"
done
fi
# Daemon status
echo ""
echo "# HELP postfix_spamassassin_up SpamAssassin daemon status"
echo "# TYPE postfix_spamassassin_up gauge"
if pgrep -f "${SPAM_DAEMON}" &>/dev/null; then
echo "postfix_spamassassin_up{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} 1"
else
echo "postfix_spamassassin_up{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} 0"
fi
echo ""
echo "# HELP postfix_spamassassin_processes Number of spam daemon processes"
echo "# TYPE postfix_spamassassin_processes gauge"
spam_procs=$(pgrep -c -f "${SPAM_DAEMON}" 2>/dev/null) || spam_procs=0
echo "postfix_spamassassin_processes{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} ${spam_procs}"
fi
# Greylisting stats (postgrey)
echo ""
echo "# HELP postfix_greylist_total Greylisting events"
echo "# TYPE postfix_greylist_total counter"
greylist_defer=$(grep_count 'action=greylist' "$LOG_FILE")
greylist_pass=$(grep_count 'action=pass.*reason=triplet' "$LOG_FILE")
greylist_whitelist=$(grep_count 'action=pass.*reason=client whitelist\|action=pass, reason=client AWL' "$LOG_FILE")
echo "postfix_greylist_total{action=\"defer\",hostname=\"${HOSTNAME}\"} ${greylist_defer}"
echo "postfix_greylist_total{action=\"pass\",hostname=\"${HOSTNAME}\"} ${greylist_pass}"
echo "postfix_greylist_total{action=\"whitelist\",hostname=\"${HOSTNAME}\"} ${greylist_whitelist}"
echo ""
echo "# HELP postfix_greylist_reason_total Greylisting by reason"
echo "# TYPE postfix_greylist_reason_total counter"
grey_new=$(grep_count 'reason=new' "$LOG_FILE")
grey_early=$(grep_count 'reason=early-retry' "$LOG_FILE")
grey_triplet=$(grep_count 'reason=triplet found' "$LOG_FILE")
echo "postfix_greylist_reason_total{reason=\"new\",hostname=\"${HOSTNAME}\"} ${grey_new}"
echo "postfix_greylist_reason_total{reason=\"early_retry\",hostname=\"${HOSTNAME}\"} ${grey_early}"
echo "postfix_greylist_reason_total{reason=\"triplet_found\",hostname=\"${HOSTNAME}\"} ${grey_triplet}"
echo ""
echo "# HELP postfix_greylist_delay_seconds Greylist delay statistics"
echo "# TYPE postfix_greylist_delay_seconds gauge"
avg_delay=$(grep -oP 'delay=\K\d+' "$LOG_FILE" 2>/dev/null | grep -v '^0$' | awk '{sum+=$1; count++} END {if(count>0) printf "%.0f", sum/count; else print 0}')
max_delay=$(grep -oP 'postgrey.*delay=\K\d+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
echo "postfix_greylist_delay_seconds{type=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_delay:-0}"
echo "postfix_greylist_delay_seconds{type=\"max\",hostname=\"${HOSTNAME}\"} ${max_delay:-0}"
echo ""
echo "# HELP postfix_greylist_clients_total Unique greylisted client IPs"
echo "# TYPE postfix_greylist_clients_total gauge"
grey_clients=$(grep 'action=greylist' "$LOG_FILE" 2>/dev/null | grep -oP 'client_address=\K[^,]+' | sort -u | wc -l)
echo "postfix_greylist_clients_total{hostname=\"${HOSTNAME}\"} ${grey_clients:-0}"
echo ""
echo "# HELP postfix_greylist_top_senders Top greylisted sender domains"
echo "# TYPE postfix_greylist_top_senders counter"
grep 'action=greylist' "$LOG_FILE" 2>/dev/null | grep -oP 'sender=\K[^,]+' | sed 's/.*@//' | sort | uniq -c | sort -rn | head -10 | while read -r count domain; do
[[ -n "$domain" ]] && echo "postfix_greylist_top_senders{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Cleanup daemon stats (total messages entering system)
echo ""
echo "# HELP postfix_cleanup_total Messages processed by cleanup daemon"
echo "# TYPE postfix_cleanup_total counter"
cleanup_count=$(grep_count 'message-id=' "$LOG_FILE")
echo "postfix_cleanup_total{hostname=\"${HOSTNAME}\"} ${cleanup_count}"
# Virtual mailbox errors
echo ""
echo "# HELP postfix_virtual_errors_total Virtual mailbox lookup errors"
echo "# TYPE postfix_virtual_errors_total counter"
virtual_not_found=$(grep_count 'mailbox not found\|User unknown in virtual' "$LOG_FILE")
echo "postfix_virtual_errors_total{hostname=\"${HOSTNAME}\"} ${virtual_not_found}"
# Address verification failures
echo ""
echo "# HELP postfix_address_verify_total Address verification events"
echo "# TYPE postfix_address_verify_total counter"
verify_fail=$(grep_count 'address verification failed' "$LOG_FILE")
verify_success=$(grep_count 'address verification succeeded\|cache hit' "$LOG_FILE")
echo "postfix_address_verify_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${verify_fail}"
echo "postfix_address_verify_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${verify_success}"
# Postfix master process uptime (based on pid file age)
echo ""
echo "# HELP postfix_master_uptime_seconds Postfix master process uptime"
echo "# TYPE postfix_master_uptime_seconds gauge"
MASTER_PID_FILE="/var/spool/postfix/pid/master.pid"
if [[ -f "$MASTER_PID_FILE" ]]; then
master_start=$(stat -c %Y "$MASTER_PID_FILE" 2>/dev/null) || master_start=0
if [[ $master_start -gt 0 ]]; then
uptime_seconds=$(($(date +%s) - master_start))
else
uptime_seconds=0
fi
else
uptime_seconds=0
fi
echo "postfix_master_uptime_seconds{hostname=\"${HOSTNAME}\"} ${uptime_seconds}"
# DNS lookup failures
echo ""
echo "# HELP postfix_dns_errors_total DNS lookup errors"
echo "# TYPE postfix_dns_errors_total counter"
dns_not_found=$(grep_count 'Host not found\|Name service error\|Host or domain name not found' "$LOG_FILE")
dns_timeout=$(grep_count 'DNS lookup.*timeout\|name server.*timeout' "$LOG_FILE")
dns_servfail=$(grep_count 'SERVFAIL\|server failure' "$LOG_FILE")
echo "postfix_dns_errors_total{type=\"not_found\",hostname=\"${HOSTNAME}\"} ${dns_not_found}"
echo "postfix_dns_errors_total{type=\"timeout\",hostname=\"${HOSTNAME}\"} ${dns_timeout}"
echo "postfix_dns_errors_total{type=\"servfail\",hostname=\"${HOSTNAME}\"} ${dns_servfail}"
# STARTTLS usage - count TLS connections vs total SMTP connections
# "used" = successful TLS connections (inbound + outbound)
# "total" = total SMTP connections for ratio calculation
echo ""
echo "# HELP postfix_starttls_total STARTTLS connection counts"
echo "# TYPE postfix_starttls_total counter"
starttls_inbound=$(grep_count 'TLS connection established from' "$LOG_FILE")
starttls_outbound=$(grep_count 'TLS connection established to' "$LOG_FILE")
echo "postfix_starttls_total{type=\"inbound\",hostname=\"${HOSTNAME}\"} ${starttls_inbound}"
echo "postfix_starttls_total{type=\"outbound\",hostname=\"${HOSTNAME}\"} ${starttls_outbound}"
# Sender/recipient access rejections
echo ""
echo "# HELP postfix_access_reject_total Sender/recipient access rejections"
echo "# TYPE postfix_access_reject_total counter"
sender_reject=$(grep_count 'Sender address rejected' "$LOG_FILE")
recipient_reject=$(grep_count 'Recipient address rejected' "$LOG_FILE")
client_reject=$(grep_count 'Client host rejected' "$LOG_FILE")
echo "postfix_access_reject_total{type=\"sender\",hostname=\"${HOSTNAME}\"} ${sender_reject}"
echo "postfix_access_reject_total{type=\"recipient\",hostname=\"${HOSTNAME}\"} ${recipient_reject}"
echo "postfix_access_reject_total{type=\"client\",hostname=\"${HOSTNAME}\"} ${client_reject}"
# Queue filesystem usage
echo ""
echo "# HELP postfix_queue_filesystem_usage_percent Queue filesystem usage percentage"
echo "# TYPE postfix_queue_filesystem_usage_percent gauge"
queue_usage=$(df "${QUEUE_DIR}" 2>/dev/null | awk 'NR==2 {gsub(/%/,""); print $5}') || queue_usage=0
echo "postfix_queue_filesystem_usage_percent{hostname=\"${HOSTNAME}\"} ${queue_usage:-0}"
# Postfix file descriptor count (for master process)
echo ""
echo "# HELP postfix_file_descriptors Open file descriptors by postfix"
echo "# TYPE postfix_file_descriptors gauge"
if [[ -f "$MASTER_PID_FILE" ]]; then
master_pid=$(tr -d '[:space:]' < "$MASTER_PID_FILE" 2>/dev/null)
if [[ -n "$master_pid" ]] && [[ -d "/proc/${master_pid}/fd" ]]; then
fd_count=$(find "/proc/${master_pid}/fd" -maxdepth 1 2>/dev/null | wc -l)
else
fd_count=0
fi
else
fd_count=0
fi
echo "postfix_file_descriptors{hostname=\"${HOSTNAME}\"} ${fd_count}"
# Script execution time
# Dovecot IMAP/POP3 login metrics
echo ""
echo "# HELP dovecot_logins_total Successful logins by protocol"
echo "# TYPE dovecot_logins_total counter"
imap_logins=$(grep_count 'imap-login: Info: Login:' "$LOG_FILE")
pop3_logins=$(grep_count 'pop3-login: Info: Login:' "$LOG_FILE")
echo "dovecot_logins_total{protocol=\"imap\",hostname=\"${HOSTNAME}\"} ${imap_logins}"
echo "dovecot_logins_total{protocol=\"pop3\",hostname=\"${HOSTNAME}\"} ${pop3_logins}"
echo ""
echo "# HELP dovecot_login_auth_method_total Logins by authentication method"
echo "# TYPE dovecot_login_auth_method_total counter"
for method in PLAIN LOGIN CRAM-MD5 DIGEST-MD5; do
count=$(grep_count "Login:.*method=${method}" "$LOG_FILE")
echo "dovecot_login_auth_method_total{method=\"${method}\",hostname=\"${HOSTNAME}\"} ${count}"
done
echo ""
echo "# HELP dovecot_login_tls_total Logins with/without TLS"
echo "# TYPE dovecot_login_tls_total counter"
tls_logins=$(grep -c 'Login:.*TLS' "$LOG_FILE" 2>/dev/null) || tls_logins=0
notls_logins=$(grep 'Login:' "$LOG_FILE" 2>/dev/null | grep -cv 'TLS') || notls_logins=0
echo "dovecot_login_tls_total{tls=\"yes\",hostname=\"${HOSTNAME}\"} ${tls_logins}"
echo "dovecot_login_tls_total{tls=\"no\",hostname=\"${HOSTNAME}\"} ${notls_logins}"
echo ""
echo "# HELP dovecot_login_failed_total Failed login attempts"
echo "# TYPE dovecot_login_failed_total counter"
imap_failed=$(grep_count 'imap-login: Info: Aborted login\|imap-login:.*auth failed' "$LOG_FILE")
pop3_failed=$(grep_count 'pop3-login: Info: Aborted login\|pop3-login:.*auth failed' "$LOG_FILE")
echo "dovecot_login_failed_total{protocol=\"imap\",hostname=\"${HOSTNAME}\"} ${imap_failed}"
echo "dovecot_login_failed_total{protocol=\"pop3\",hostname=\"${HOSTNAME}\"} ${pop3_failed}"
echo ""
echo "# HELP dovecot_login_user_total Logins per user (top 20)"
echo "# TYPE dovecot_login_user_total counter"
grep -oP 'Login: user=<\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count user; do
echo "dovecot_login_user_total{user=\"${user}\",hostname=\"${HOSTNAME}\"} ${count}"
done
echo ""
echo "# HELP dovecot_login_client_ip_total Logins per client IP (top 20)"
echo "# TYPE dovecot_login_client_ip_total counter"
grep -oP 'Login:.*rip=\K[^,]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count ip; do
echo "dovecot_login_client_ip_total{client_ip=\"${ip}\",hostname=\"${HOSTNAME}\"} ${count}"
done
local END_TIME
END_TIME=$(date +%s.%N)
local DURATION
DURATION=$(echo "$END_TIME - $START_TIME" | bc)
echo ""
echo "# HELP postfix_collector_duration_seconds Time taken to collect metrics"
echo "# TYPE postfix_collector_duration_seconds gauge"
echo "postfix_collector_duration_seconds{hostname=\"${HOSTNAME}\"} ${DURATION}"
echo ""
echo "# HELP postfix_collector_last_run_timestamp Unix timestamp of last collection"
echo "# TYPE postfix_collector_last_run_timestamp gauge"
echo "postfix_collector_last_run_timestamp{hostname=\"${HOSTNAME}\"} $(date +%s)"
}
# ============================================================================
# HTTP SERVER MODE
# ============================================================================
run_http_server() {
echo "Starting Postfix metrics exporter on port $HTTP_PORT..." >&2
if ! command -v nc >/dev/null 2>&1; then
echo "ERROR: netcat (nc) required for HTTP mode" >&2
exit 1
fi
while true; do
{
read -r request
if [[ "$request" =~ ^GET\ /metrics ]]; then
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
generate_metrics
else
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
cat <<EOF
<!DOCTYPE html>
<html>
<head><title>Postfix Metrics Exporter</title></head>
<body>
<h1>Postfix Prometheus Exporter</h1>
<p><a href="/metrics">Metrics</a></p>
<h2>Available Metrics</h2>
<ul>
<li>Queue sizes and ages</li>
<li>Message counts by status</li>
<li>TLS connection stats</li>
<li>SASL authentication</li>
<li>Bounce reasons</li>
<li>SpamAssassin scores</li>
<li>Dovecot delivery stats</li>
</ul>
</body>
</html>
EOF
fi
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
done
}
# ============================================================================
# MAIN EXECUTION
# ============================================================================
main() {
parse_args "$@"
if [ "$HTTP_MODE" = true ]; then
run_http_server
elif [ -n "$OUTPUT_FILE" ]; then
# Textfile collector mode: write atomically using temp file
local output_dir
output_dir="$(dirname "$OUTPUT_FILE")"
mkdir -p "$output_dir"
# Create temp file in SAME directory for atomic rename (same filesystem)
local temp_file
temp_file=$(mktemp "${output_dir}/.postfix_metrics.XXXXXX")
# Generate metrics to temp file
if ! generate_metrics > "$temp_file" 2>/dev/null; then
rm -f "$temp_file"
echo "ERROR: Failed to generate metrics" >&2
exit 1
fi
# Validate: file must exist and have content
local file_lines
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
if [ "$file_lines" -lt 10 ]; then
rm -f "$temp_file"
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
exit 1
fi
# Set permissions before move
chmod 644 "$temp_file"
# Atomic rename - no gap where file is missing
mv -f "$temp_file" "$OUTPUT_FILE"
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
else
# Default: output to stdout
generate_metrics
fi
}
# Execute main function with all script arguments
main "$@"