Add complete UPS monitoring system with monthly battery testing
This commit adds a comprehensive UPS monitoring and management system for the Proxmox cluster with automated shutdown orchestration and monthly battery health testing. Features: - NUT (Network UPS Tools) configuration for INNO TECH USB UPS - Automated cluster shutdown on power failure (3-minute grace period) - Monthly automated battery testing with health evaluation - Email notifications via PVE::Notify system - WinNUT monitoring client for Windows VM 201 Components added: - config/: NUT configuration files (ups.conf, upsd.conf, upsmon.conf, etc.) - scripts/ups-shutdown-cluster.sh: Orchestrated cluster shutdown - scripts/ups-monthly-test.sh: Monthly battery test with email reports - scripts/upssched-cmd: Event handler for UPS state changes - docs/: Complete installation and usage documentation Key findings: - UPS battery.charge reporting has 10-40 second delay after test start - Test must monitor voltage drop (1.5-2V) and charge drop (9-27%) - Battery health evaluation: EXCELLENT/GOOD/FAIR/POOR based on discharge rate - Email notifications use Handlebars templates without Unicode emojis for compatibility Configuration: - UPS: INNO TECH (Voltronic protocol, vendor 0665:5161) - Primary node: pvemini (10.0.20.201) with USB connection - Monthly test: cron 0 0 1 * * /opt/scripts/ups-monthly-test.sh - Shutdown timer: 180 seconds on battery before cluster shutdown Documentation includes complete installation guides for NUT server, WinNUT client, and troubleshooting procedures. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
63
proxmox/ups/scripts/ups-shutdown-test.sh
Normal file
63
proxmox/ups/scripts/ups-shutdown-test.sh
Normal file
@@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Script de TEST pentru shutdown orchestrat - NU oprește nimic
|
||||
#
|
||||
|
||||
LOGFILE=/var/log/ups-shutdown-test.log
|
||||
NODES=(10.0.20.200 10.0.20.202)
|
||||
|
||||
log_message() {
|
||||
echo "[2025-10-06 20:03:03] $1" | tee -a $LOGFILE
|
||||
}
|
||||
|
||||
log_message "========================================"
|
||||
log_message "UPS SHUTDOWN TEST STARTED (DRY RUN)"
|
||||
log_message "UPS Status: $(upsc nutdev1 ups.status 2>/dev/null || echo 'UNKNOWN')"
|
||||
log_message "Battery Charge: $(upsc nutdev1 battery.charge 2>/dev/null || echo 'UNKNOWN')%"
|
||||
log_message "Input Voltage: $(upsc nutdev1 input.voltage 2>/dev/null || echo 'UNKNOWN')V"
|
||||
log_message "Output Voltage: $(upsc nutdev1 output.voltage 2>/dev/null || echo 'UNKNOWN')V"
|
||||
log_message "========================================"
|
||||
|
||||
log_message "TEST: Ar opri VM-urile de pe toate nodurile..."
|
||||
|
||||
for node in ${NODES[@]} localhost; do
|
||||
if [ "$node" == "localhost" ]; then
|
||||
NODE_NAME="pvemini (local)"
|
||||
else
|
||||
NODE_NAME=$node
|
||||
fi
|
||||
|
||||
log_message " - VM-uri pe $NODE_NAME:"
|
||||
|
||||
if [ "$node" == "localhost" ]; then
|
||||
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
|
||||
vm_name=$(qm config $vmid | grep '^name:' | cut -d' ' -f2)
|
||||
vm_status=$(qm status $vmid | awk '{print $2}')
|
||||
log_message " * VM $vmid ($vm_name): $vm_status"
|
||||
done
|
||||
else
|
||||
ssh -o ConnectTimeout=5 root@$node "
|
||||
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
|
||||
vm_name=\$(qm config \$vmid | grep '^name:' | cut -d' ' -f2)
|
||||
vm_status=\$(qm status \$vmid | awk '{print \$2}')
|
||||
echo ' * VM '\$vmid' ('\$vm_name'): '\$vm_status
|
||||
done
|
||||
" 2>&1 | tee -a $LOGFILE
|
||||
fi
|
||||
done
|
||||
|
||||
log_message ""
|
||||
log_message "TEST: Ordinea de shutdown ar fi:"
|
||||
log_message " 1. Toate VM-urile de pe toate nodurile (paralel)"
|
||||
log_message " 2. Așteptare 90 secunde"
|
||||
log_message " 3. Shutdown pve1 (10.0.20.200)"
|
||||
log_message " 4. Shutdown pve2 (10.0.20.202)"
|
||||
log_message " 5. Așteptare 30 secunde"
|
||||
log_message " 6. Shutdown pvemini (10.0.20.201) - PRIMARY/LAST"
|
||||
log_message ""
|
||||
log_message "========================================"
|
||||
log_message "UPS SHUTDOWN TEST COMPLETED (DRY RUN)"
|
||||
log_message "NICIUN sistem nu a fost oprit - doar test"
|
||||
log_message "========================================"
|
||||
|
||||
exit 0
|
||||
Reference in New Issue
Block a user