This commit adds a comprehensive UPS monitoring and management system for the Proxmox cluster with automated shutdown orchestration and monthly battery health testing. Features: - NUT (Network UPS Tools) configuration for INNO TECH USB UPS - Automated cluster shutdown on power failure (3-minute grace period) - Monthly automated battery testing with health evaluation - Email notifications via PVE::Notify system - WinNUT monitoring client for Windows VM 201 Components added: - config/: NUT configuration files (ups.conf, upsd.conf, upsmon.conf, etc.) - scripts/ups-shutdown-cluster.sh: Orchestrated cluster shutdown - scripts/ups-monthly-test.sh: Monthly battery test with email reports - scripts/upssched-cmd: Event handler for UPS state changes - docs/: Complete installation and usage documentation Key findings: - UPS battery.charge reporting has 10-40 second delay after test start - Test must monitor voltage drop (1.5-2V) and charge drop (9-27%) - Battery health evaluation: EXCELLENT/GOOD/FAIR/POOR based on discharge rate - Email notifications use Handlebars templates without Unicode emojis for compatibility Configuration: - UPS: INNO TECH (Voltronic protocol, vendor 0665:5161) - Primary node: pvemini (10.0.20.201) with USB connection - Monthly test: cron 0 0 1 * * /opt/scripts/ups-monthly-test.sh - Shutdown timer: 180 seconds on battery before cluster shutdown Documentation includes complete installation guides for NUT server, WinNUT client, and troubleshooting procedures. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
84 lines
2.9 KiB
Bash
84 lines
2.9 KiB
Bash
#!/bin/bash
|
|
#
|
|
# Script de shutdown orchestrat pentru cluster Proxmox când UPS este pe baterie critică
|
|
# Autor: Generat automat
|
|
# Data: 2025-10-06
|
|
|
|
LOGFILE=/var/log/ups-shutdown.log
|
|
NODES=(10.0.20.200 10.0.20.202) # pve1, pve2 (pvemini va fi ultimul)
|
|
|
|
log_message() {
|
|
echo "[2025-10-06 20:02:34] $1" | tee -a $LOGFILE
|
|
}
|
|
|
|
log_message "========================================"
|
|
log_message "UPS SHUTDOWN ORCHESTRATION STARTED"
|
|
log_message "UPS Status: $(upsc nutdev1 ups.status 2>/dev/null || echo 'UNKNOWN')"
|
|
log_message "Battery Charge: $(upsc nutdev1 battery.charge 2>/dev/null || echo 'UNKNOWN')%"
|
|
log_message "========================================"
|
|
|
|
# Verifică dacă UPS este într-adevăr pe baterie critică
|
|
UPS_STATUS=$(upsc nutdev1 ups.status 2>/dev/null)
|
|
if [[ ! $UPS_STATUS =~ (OB|LB) ]]; then
|
|
log_message "WARNING: UPS status is $UPS_STATUS - not critical. Aborting shutdown."
|
|
exit 0
|
|
fi
|
|
|
|
log_message "Step 1: Oprire VM-uri și containere pe toate nodurile..."
|
|
|
|
# Oprește VM-uri pe toate nodurile (inclusiv local)
|
|
for node in ${NODES[@]} localhost; do
|
|
if [ "$node" == "localhost" ]; then
|
|
NODE_NAME="pvemini (local)"
|
|
else
|
|
NODE_NAME=$node
|
|
fi
|
|
|
|
log_message " - Oprire VM-uri pe $NODE_NAME..."
|
|
|
|
if [ "$node" == "localhost" ]; then
|
|
# Local - oprește VM-urile direct
|
|
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
|
|
vm_status=$(qm status $vmid | awk '{print $2}')
|
|
if [ "$vm_status" == "running" ]; then
|
|
log_message " * Oprire VM $vmid pe pvemini..."
|
|
qm shutdown $vmid --timeout 60 &
|
|
fi
|
|
done
|
|
else
|
|
# Remote - SSH către alt nod
|
|
ssh -o ConnectTimeout=5 root@$node "
|
|
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
|
|
vm_status=\$(qm status \$vmid | awk '{print \$2}')
|
|
if [ \"\$vm_status\" == \"running\" ]; then
|
|
echo ' * Oprire VM '\$vmid' pe $node...'
|
|
qm shutdown \$vmid --timeout 60 &
|
|
fi
|
|
done
|
|
" 2>&1 | tee -a $LOGFILE
|
|
fi
|
|
done
|
|
|
|
log_message "Step 2: Așteptare 90 secunde pentru oprirea VM-urilor..."
|
|
sleep 90
|
|
|
|
log_message "Step 3: Oprire noduri secundare (pve1, pve2)..."
|
|
for node in ${NODES[@]}; do
|
|
log_message " - Shutdown nod $node..."
|
|
ssh -o ConnectTimeout=5 root@$node "shutdown -h +1 'UPS on battery critical - shutting down'" 2>&1 | tee -a $LOGFILE &
|
|
done
|
|
|
|
log_message "Step 4: Așteptare 30 secunde pentru shutdown noduri secundare..."
|
|
sleep 30
|
|
|
|
log_message "Step 5: Oprire nod local (pvemini - primary)..."
|
|
log_message "========================================"
|
|
log_message "UPS SHUTDOWN ORCHESTRATION COMPLETED"
|
|
log_message "Local node will shutdown in 1 minute"
|
|
log_message "========================================"
|
|
|
|
# Oprește nodul local (ultimul)
|
|
shutdown -h +1 "UPS on battery critical - primary node shutting down"
|
|
|
|
exit 0
|