#!/bin/bash # # Script de shutdown orchestrat pentru cluster Proxmox cand UPS este pe baterie critica # Trimite notificari email via PVE::Notify pentru fiecare pas # # Creat: 2025-10-06 # Actualizat: 2026-01-13 - Adaugat notificari email si UPS shutdown LOGFILE=/var/log/ups-shutdown.log NODES=("10.0.20.200" "10.0.20.202") # pve1, pveelite (pvemini va fi ultimul) NODE_NAMES=("pve1" "pveelite") # Nume pentru notificari UPS_NAME="nutdev1" UPS_USER="admin" UPS_PASS="parola99" TEMPLATE_DIR="/etc/pve/notification-templates/default" HOSTNAME=$(hostname) FQDN=$(hostname -f 2>/dev/null || hostname) log_message() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE logger -t ups-shutdown "$1" } # Obtine status UPS get_ups_info() { echo "Status: $(upsc $UPS_NAME ups.status 2>/dev/null || echo 'UNKNOWN')" echo "Battery: $(upsc $UPS_NAME battery.charge 2>/dev/null || echo '?')%" echo "Input: $(upsc $UPS_NAME input.voltage 2>/dev/null || echo '?')V" } # Trimite notificare email via PVE::Notify send_notification() { local EVENT_TYPE="$1" local EVENT_TITLE="$2" local EVENT_DESC="$3" local SEVERITY="$4" local UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null || echo "UNKNOWN") local BATTERY_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null || echo "0") local INPUT_VOLTAGE=$(upsc $UPS_NAME input.voltage 2>/dev/null || echo "0") local EVENT_DATE=$(date '+%Y-%m-%d %H:%M:%S') log_message "Sending notification: $EVENT_TITLE" perl -I/usr/share/perl5 << EOFPERL 2>&1 | tee -a $LOGFILE use strict; use warnings; use PVE::Notify; my \$template_data = { 'hostname' => '$FQDN', 'event_date' => '$EVENT_DATE', 'event_type' => '$EVENT_TYPE', 'event_title' => '$EVENT_TITLE', 'event_description' => '$EVENT_DESC', 'event_class' => 'shutdown', 'alert_type' => 'danger', 'ups_status' => '$UPS_STATUS', 'battery_charge' => '$BATTERY_CHARGE', 'input_voltage' => '$INPUT_VOLTAGE', 'action_taken' => 'Shutdown in curs', 'next_steps' => '' }; my \$fields = { 'hostname' => '$HOSTNAME', 'type' => 'ups-power-event' }; eval { PVE::Notify::notify('$SEVERITY', 'ups-power-event', \$template_data, \$fields); print "Notification sent\\n"; }; if (\$@) { print STDERR "Notification failed: \$@\\n"; } EOFPERL } log_message "========================================" log_message "UPS SHUTDOWN ORCHESTRATION STARTED" log_message "$(get_ups_info)" log_message "========================================" # Verifica daca UPS este intr-adevar pe baterie critica UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null) if [[ ! $UPS_STATUS =~ (OB|LB) ]]; then log_message "WARNING: UPS status is $UPS_STATUS - not critical. Aborting shutdown." exit 0 fi # Email: START SHUTDOWN send_notification \ "SHUTDOWN_START" \ "Shutdown cluster PORNIT" \ "UPS pe baterie critica. Se initiaza oprirea ordonata a cluster-ului Proxmox." \ "error" log_message "Step 1: Oprire VM-uri pe toate nodurile..." # Opreste VM-uri pe toate nodurile (inclusiv local) for node in ${NODES[@]} localhost; do if [ "$node" == "localhost" ]; then NODE_NAME="pvemini (local)" else NODE_NAME=$node fi log_message " - Oprire VM-uri pe $NODE_NAME..." if [ "$node" == "localhost" ]; then for vmid in $(qm list | awk 'NR>1 {print $1}'); do vm_status=$(qm status $vmid | awk '{print $2}') if [ "$vm_status" == "running" ]; then log_message " * Oprire VM $vmid pe pvemini..." qm shutdown $vmid --timeout 60 & fi done else ssh -o ConnectTimeout=5 root@$node " for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do vm_status=\$(qm status \$vmid | awk '{print \$2}') if [ \"\$vm_status\" == \"running\" ]; then echo ' * Oprire VM '\$vmid' pe $node...' qm shutdown \$vmid --timeout 60 & fi done " 2>&1 | tee -a $LOGFILE fi done log_message "Step 2: Oprire containere LXC pe toate nodurile..." # Opreste containere LXC pe toate nodurile for node in ${NODES[@]} localhost; do if [ "$node" == "localhost" ]; then NODE_NAME="pvemini (local)" else NODE_NAME=$node fi log_message " - Oprire LXC pe $NODE_NAME..." if [ "$node" == "localhost" ]; then pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' | while read ctid; do log_message " * Oprire container $ctid pe pvemini..." pct shutdown $ctid --timeout 60 & done else ssh -o ConnectTimeout=5 root@$node " pct list 2>/dev/null | awk 'NR>1 && \$2==\"running\" {print \$1}' | while read ctid; do echo ' * Oprire container '\$ctid' pe $node...' pct shutdown \$ctid --timeout 60 & done " 2>&1 | tee -a $LOGFILE fi done log_message "Step 3: Asteptare 90 secunde pentru oprirea VM-urilor si LXC..." sleep 90 log_message "Step 4: Oprire noduri secundare..." # Opreste nodurile secundare si trimite notificare pentru fiecare for i in "${!NODES[@]}"; do node="${NODES[$i]}" node_name="${NODE_NAMES[$i]}" log_message " - Shutdown nod $node_name ($node)..." # Email: SHUTDOWN NOD SECUNDAR send_notification \ "SHUTDOWN_NODE" \ "Shutdown $node_name trimis" \ "Comanda shutdown a fost trimisa catre nodul $node_name ($node)." \ "error" ssh -o ConnectTimeout=5 root@$node "shutdown -h +1 'UPS battery critical - shutting down'" 2>&1 | tee -a $LOGFILE & done log_message "Step 5: Asteptare 60 secunde pentru shutdown noduri secundare..." sleep 60 log_message "Step 6: Oprire nod local (pvemini - primary)..." # Email: SHUTDOWN NOD PRIMARY (ultimul email inainte de shutdown) send_notification \ "SHUTDOWN_PRIMARY" \ "Shutdown pvemini (ULTIMUL NOD)" \ "Se opreste nodul primary pvemini. Acesta este ultimul nod din cluster. UPS-ul se va opri dupa shutdown." \ "error" log_message "Step 7: Oprire UPS dupa shutdown..." # Comanda UPS sa se opreasca dupa un delay (permite shutdown-ul sa se finalizeze) # Verifica daca comanda este disponibila if upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.stayoff"; then log_message " - Comanda UPS shutdown.stayoff (oprire completa)..." upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.stayoff 2>&1 | tee -a $LOGFILE elif upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.return"; then log_message " - Comanda UPS shutdown.return (oprire cu restart la revenire curent)..." upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.return 2>&1 | tee -a $LOGFILE else log_message " - WARNING: Nu s-a gasit comanda UPS shutdown disponibila" fi log_message "========================================" log_message "UPS SHUTDOWN ORCHESTRATION COMPLETED" log_message "$(get_ups_info)" log_message "Local node will shutdown in 1 minute" log_message "========================================" # Opreste nodul local (ultimul) shutdown -h +1 "UPS battery critical - primary node shutting down" exit 0