Files
ROMFASTSQL/proxmox/ups/scripts/ups-shutdown-cluster.sh
Marius ab6ac77d50 Add UPS email notifications and automatic UPS shutdown
- Add email notifications via PVE::Notify for all UPS events:
  - ONBATT: when UPS switches to battery
  - ONLINE: when power is restored
  - LOWBATT: critical battery level
  - SHUTDOWN_START/NODE/PRIMARY: during cluster shutdown
  - COMMBAD: communication lost with UPS

- Add automatic UPS shutdown command after cluster shutdown
  (protects against power surge when power returns)

- Update upssched.conf with ONLINE handler and immediate ONBATT notification

- Add notification templates for HTML and text emails

- Update documentation with new features and timer configuration

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-13 20:11:30 +02:00

217 lines
7.1 KiB
Bash

#!/bin/bash
#
# Script de shutdown orchestrat pentru cluster Proxmox cand UPS este pe baterie critica
# Trimite notificari email via PVE::Notify pentru fiecare pas
#
# Creat: 2025-10-06
# Actualizat: 2026-01-13 - Adaugat notificari email si UPS shutdown
LOGFILE=/var/log/ups-shutdown.log
NODES=("10.0.20.200" "10.0.20.202") # pve1, pveelite (pvemini va fi ultimul)
NODE_NAMES=("pve1" "pveelite") # Nume pentru notificari
UPS_NAME="nutdev1"
UPS_USER="admin"
UPS_PASS="parola99"
TEMPLATE_DIR="/etc/pve/notification-templates/default"
HOSTNAME=$(hostname)
FQDN=$(hostname -f 2>/dev/null || hostname)
log_message() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE
logger -t ups-shutdown "$1"
}
# Obtine status UPS
get_ups_info() {
echo "Status: $(upsc $UPS_NAME ups.status 2>/dev/null || echo 'UNKNOWN')"
echo "Battery: $(upsc $UPS_NAME battery.charge 2>/dev/null || echo '?')%"
echo "Input: $(upsc $UPS_NAME input.voltage 2>/dev/null || echo '?')V"
}
# Trimite notificare email via PVE::Notify
send_notification() {
local EVENT_TYPE="$1"
local EVENT_TITLE="$2"
local EVENT_DESC="$3"
local SEVERITY="$4"
local UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null || echo "UNKNOWN")
local BATTERY_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null || echo "0")
local INPUT_VOLTAGE=$(upsc $UPS_NAME input.voltage 2>/dev/null || echo "0")
local EVENT_DATE=$(date '+%Y-%m-%d %H:%M:%S')
log_message "Sending notification: $EVENT_TITLE"
perl -I/usr/share/perl5 << EOFPERL 2>&1 | tee -a $LOGFILE
use strict;
use warnings;
use PVE::Notify;
my \$template_data = {
'hostname' => '$FQDN',
'event_date' => '$EVENT_DATE',
'event_type' => '$EVENT_TYPE',
'event_title' => '$EVENT_TITLE',
'event_description' => '$EVENT_DESC',
'event_class' => 'shutdown',
'alert_type' => 'danger',
'ups_status' => '$UPS_STATUS',
'battery_charge' => '$BATTERY_CHARGE',
'input_voltage' => '$INPUT_VOLTAGE',
'action_taken' => 'Shutdown in curs',
'next_steps' => ''
};
my \$fields = {
'hostname' => '$HOSTNAME',
'type' => 'ups-power-event'
};
eval {
PVE::Notify::notify('$SEVERITY', 'ups-power-event', \$template_data, \$fields);
print "Notification sent\\n";
};
if (\$@) {
print STDERR "Notification failed: \$@\\n";
}
EOFPERL
}
log_message "========================================"
log_message "UPS SHUTDOWN ORCHESTRATION STARTED"
log_message "$(get_ups_info)"
log_message "========================================"
# Verifica daca UPS este intr-adevar pe baterie critica
UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
if [[ ! $UPS_STATUS =~ (OB|LB) ]]; then
log_message "WARNING: UPS status is $UPS_STATUS - not critical. Aborting shutdown."
exit 0
fi
# Email: START SHUTDOWN
send_notification \
"SHUTDOWN_START" \
"Shutdown cluster PORNIT" \
"UPS pe baterie critica. Se initiaza oprirea ordonata a cluster-ului Proxmox." \
"error"
log_message "Step 1: Oprire VM-uri pe toate nodurile..."
# Opreste VM-uri pe toate nodurile (inclusiv local)
for node in ${NODES[@]} localhost; do
if [ "$node" == "localhost" ]; then
NODE_NAME="pvemini (local)"
else
NODE_NAME=$node
fi
log_message " - Oprire VM-uri pe $NODE_NAME..."
if [ "$node" == "localhost" ]; then
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
vm_status=$(qm status $vmid | awk '{print $2}')
if [ "$vm_status" == "running" ]; then
log_message " * Oprire VM $vmid pe pvemini..."
qm shutdown $vmid --timeout 60 &
fi
done
else
ssh -o ConnectTimeout=5 root@$node "
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
vm_status=\$(qm status \$vmid | awk '{print \$2}')
if [ \"\$vm_status\" == \"running\" ]; then
echo ' * Oprire VM '\$vmid' pe $node...'
qm shutdown \$vmid --timeout 60 &
fi
done
" 2>&1 | tee -a $LOGFILE
fi
done
log_message "Step 2: Oprire containere LXC pe toate nodurile..."
# Opreste containere LXC pe toate nodurile
for node in ${NODES[@]} localhost; do
if [ "$node" == "localhost" ]; then
NODE_NAME="pvemini (local)"
else
NODE_NAME=$node
fi
log_message " - Oprire LXC pe $NODE_NAME..."
if [ "$node" == "localhost" ]; then
pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' | while read ctid; do
log_message " * Oprire container $ctid pe pvemini..."
pct shutdown $ctid --timeout 60 &
done
else
ssh -o ConnectTimeout=5 root@$node "
pct list 2>/dev/null | awk 'NR>1 && \$2==\"running\" {print \$1}' | while read ctid; do
echo ' * Oprire container '\$ctid' pe $node...'
pct shutdown \$ctid --timeout 60 &
done
" 2>&1 | tee -a $LOGFILE
fi
done
log_message "Step 3: Asteptare 90 secunde pentru oprirea VM-urilor si LXC..."
sleep 90
log_message "Step 4: Oprire noduri secundare..."
# Opreste nodurile secundare si trimite notificare pentru fiecare
for i in "${!NODES[@]}"; do
node="${NODES[$i]}"
node_name="${NODE_NAMES[$i]}"
log_message " - Shutdown nod $node_name ($node)..."
# Email: SHUTDOWN NOD SECUNDAR
send_notification \
"SHUTDOWN_NODE" \
"Shutdown $node_name trimis" \
"Comanda shutdown a fost trimisa catre nodul $node_name ($node)." \
"error"
ssh -o ConnectTimeout=5 root@$node "shutdown -h +1 'UPS battery critical - shutting down'" 2>&1 | tee -a $LOGFILE &
done
log_message "Step 5: Asteptare 60 secunde pentru shutdown noduri secundare..."
sleep 60
log_message "Step 6: Oprire nod local (pvemini - primary)..."
# Email: SHUTDOWN NOD PRIMARY (ultimul email inainte de shutdown)
send_notification \
"SHUTDOWN_PRIMARY" \
"Shutdown pvemini (ULTIMUL NOD)" \
"Se opreste nodul primary pvemini. Acesta este ultimul nod din cluster. UPS-ul se va opri dupa shutdown." \
"error"
log_message "Step 7: Oprire UPS dupa shutdown..."
# Comanda UPS sa se opreasca dupa un delay (permite shutdown-ul sa se finalizeze)
# Verifica daca comanda este disponibila
if upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.stayoff"; then
log_message " - Comanda UPS shutdown.stayoff (oprire completa)..."
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.stayoff 2>&1 | tee -a $LOGFILE
elif upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.return"; then
log_message " - Comanda UPS shutdown.return (oprire cu restart la revenire curent)..."
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.return 2>&1 | tee -a $LOGFILE
else
log_message " - WARNING: Nu s-a gasit comanda UPS shutdown disponibila"
fi
log_message "========================================"
log_message "UPS SHUTDOWN ORCHESTRATION COMPLETED"
log_message "$(get_ups_info)"
log_message "Local node will shutdown in 1 minute"
log_message "========================================"
# Opreste nodul local (ultimul)
shutdown -h +1 "UPS battery critical - primary node shutting down"
exit 0