Files
ROMFASTSQL/proxmox/ups/scripts/ups-shutdown-cluster.sh
Marius 95f76d7ffb Fix: Add LXC container shutdown to UPS emergency shutdown script
The ups-shutdown-cluster.sh script was missing LXC container shutdown
functionality, only shutting down VMs. This could leave containers
running during UPS power failure, causing ungraceful shutdown.

Changes:
- Added Step 2: LXC container shutdown on all cluster nodes
- Uses 'pct list' to find running containers
- Shuts down each container with 60s timeout
- Parallel shutdown with '&' for speed
- Both local (pvemini) and remote nodes (pve1, pveelite)
- Updated step numbers (now 6 steps total vs 5 before)
- Fixed log_message() to use dynamic timestamp
- Fixed node name comment (pve2 → pveelite)

Shutdown order:
1. VMs on all nodes (timeout 60s)
2. LXC containers on all nodes (timeout 60s) [NEW]
3. Wait 90 seconds for graceful shutdown
4. Secondary nodes shutdown (pve1, pveelite)
5. Wait 30 seconds
6. Primary node shutdown (pvemini)

This matches the behavior in ups-maintenance-shutdown.sh which already
had LXC support.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-06 21:49:28 +03:00

114 lines
3.9 KiB
Bash

#!/bin/bash
#
# Script de shutdown orchestrat pentru cluster Proxmox când UPS este pe baterie critică
# Autor: Generat automat
# Data: 2025-10-06
# Actualizat: 2025-10-06 - Adăugat suport LXC containers
LOGFILE=/var/log/ups-shutdown.log
NODES=(10.0.20.200 10.0.20.202) # pve1, pveelite (pvemini va fi ultimul)
log_message() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE
}
log_message "========================================"
log_message "UPS SHUTDOWN ORCHESTRATION STARTED"
log_message "UPS Status: $(upsc nutdev1 ups.status 2>/dev/null || echo 'UNKNOWN')"
log_message "Battery Charge: $(upsc nutdev1 battery.charge 2>/dev/null || echo 'UNKNOWN')%"
log_message "========================================"
# Verifică dacă UPS este într-adevăr pe baterie critică
UPS_STATUS=$(upsc nutdev1 ups.status 2>/dev/null)
if [[ ! $UPS_STATUS =~ (OB|LB) ]]; then
log_message "WARNING: UPS status is $UPS_STATUS - not critical. Aborting shutdown."
exit 0
fi
log_message "Step 1: Oprire VM-uri pe toate nodurile..."
# Oprește VM-uri pe toate nodurile (inclusiv local)
for node in ${NODES[@]} localhost; do
if [ "$node" == "localhost" ]; then
NODE_NAME="pvemini (local)"
else
NODE_NAME=$node
fi
log_message " - Oprire VM-uri pe $NODE_NAME..."
if [ "$node" == "localhost" ]; then
# Local - oprește VM-urile direct
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
vm_status=$(qm status $vmid | awk '{print $2}')
if [ "$vm_status" == "running" ]; then
log_message " * Oprire VM $vmid pe pvemini..."
qm shutdown $vmid --timeout 60 &
fi
done
else
# Remote - SSH către alt nod
ssh -o ConnectTimeout=5 root@$node "
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
vm_status=\$(qm status \$vmid | awk '{print \$2}')
if [ \"\$vm_status\" == \"running\" ]; then
echo ' * Oprire VM '\$vmid' pe $node...'
qm shutdown \$vmid --timeout 60 &
fi
done
" 2>&1 | tee -a $LOGFILE
fi
done
log_message "Step 2: Oprire containere LXC pe toate nodurile..."
# Oprește containere LXC pe toate nodurile
for node in ${NODES[@]} localhost; do
if [ "$node" == "localhost" ]; then
NODE_NAME="pvemini (local)"
else
NODE_NAME=$node
fi
log_message " - Oprire LXC pe $NODE_NAME..."
if [ "$node" == "localhost" ]; then
# Local - oprește containerele direct
pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' | while read ctid; do
log_message " * Oprire container $ctid pe pvemini..."
pct shutdown $ctid --timeout 60 &
done
else
# Remote - SSH către alt nod
ssh -o ConnectTimeout=5 root@$node "
pct list 2>/dev/null | awk 'NR>1 && \$2==\"running\" {print \$1}' | while read ctid; do
echo ' * Oprire container '\$ctid' pe $node...'
pct shutdown \$ctid --timeout 60 &
done
" 2>&1 | tee -a $LOGFILE
fi
done
log_message "Step 3: Așteptare 90 secunde pentru oprirea VM-urilor și LXC..."
sleep 90
log_message "Step 4: Oprire noduri secundare (pve1, pveelite)..."
for node in ${NODES[@]}; do
log_message " - Shutdown nod $node..."
ssh -o ConnectTimeout=5 root@$node "shutdown -h +1 'UPS on battery critical - shutting down'" 2>&1 | tee -a $LOGFILE &
done
log_message "Step 5: Așteptare 30 secunde pentru shutdown noduri secundare..."
sleep 30
log_message "Step 6: Oprire nod local (pvemini - primary)..."
log_message "========================================"
log_message "UPS SHUTDOWN ORCHESTRATION COMPLETED"
log_message "Local node will shutdown in 1 minute"
log_message "========================================"
# Oprește nodul local (ultimul)
shutdown -h +1 "UPS on battery critical - primary node shutting down"
exit 0