Reorganize proxmox documentation into subdirectories per LXC/VM
- Create cluster/ for Proxmox cluster infrastructure (SSH guide, HA monitor, UPS) - Create lxc108-oracle/ for Oracle Database documentation and scripts - Create vm201-windows/ for Windows 11 VM docs and SSL certificate scripts - Add SSL certificate monitoring scripts (check-ssl-certificates.ps1, monitor-ssl-certificates.sh) - Remove archived VM107 references (decommissioned) - Update all cross-references between files - Update main README.md with new structure and navigation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
230
proxmox/cluster/ups/scripts/ups-shutdown-cluster.sh
Normal file
230
proxmox/cluster/ups/scripts/ups-shutdown-cluster.sh
Normal file
@@ -0,0 +1,230 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Script de shutdown orchestrat pentru cluster Proxmox cand UPS este pe baterie critica
|
||||
# Trimite notificari email via PVE::Notify pentru fiecare pas
|
||||
#
|
||||
# Creat: 2025-10-06
|
||||
# Actualizat: 2026-01-14 - Fix permisiuni log file, adaugat sudo pentru PVE::Notify
|
||||
|
||||
LOGFILE=/var/log/ups-shutdown.log
|
||||
NODES=("10.0.20.200" "10.0.20.202") # pve1, pveelite (pvemini va fi ultimul)
|
||||
NODE_NAMES=("pve1" "pveelite") # Nume pentru notificari
|
||||
UPS_NAME="nutdev1"
|
||||
UPS_USER="admin"
|
||||
UPS_PASS="parola99"
|
||||
TEMPLATE_DIR="/etc/pve/notification-templates/default"
|
||||
HOSTNAME=$(hostname)
|
||||
FQDN=$(hostname -f 2>/dev/null || hostname)
|
||||
|
||||
# Asigura ca fisierul de log exista si are permisiunile corecte
|
||||
# Scriptul poate rula ca user 'nut', deci fisierul trebuie sa fie writable
|
||||
ensure_logfile() {
|
||||
if [ ! -f "$LOGFILE" ]; then
|
||||
touch "$LOGFILE" 2>/dev/null || sudo touch "$LOGFILE"
|
||||
fi
|
||||
if [ ! -w "$LOGFILE" ]; then
|
||||
sudo chown nut:nut "$LOGFILE" 2>/dev/null
|
||||
sudo chmod 664 "$LOGFILE" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_logfile
|
||||
|
||||
log_message() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE 2>/dev/null
|
||||
logger -t ups-shutdown "$1"
|
||||
}
|
||||
|
||||
# Obtine status UPS
|
||||
get_ups_info() {
|
||||
echo "Status: $(upsc $UPS_NAME ups.status 2>/dev/null || echo 'UNKNOWN')"
|
||||
echo "Battery: $(upsc $UPS_NAME battery.charge 2>/dev/null || echo '?')%"
|
||||
echo "Input: $(upsc $UPS_NAME input.voltage 2>/dev/null || echo '?')V"
|
||||
}
|
||||
|
||||
# Trimite notificare email via PVE::Notify
|
||||
send_notification() {
|
||||
local EVENT_TYPE="$1"
|
||||
local EVENT_TITLE="$2"
|
||||
local EVENT_DESC="$3"
|
||||
local SEVERITY="$4"
|
||||
|
||||
local UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null || echo "UNKNOWN")
|
||||
local BATTERY_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null || echo "0")
|
||||
local INPUT_VOLTAGE=$(upsc $UPS_NAME input.voltage 2>/dev/null || echo "0")
|
||||
local EVENT_DATE=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
|
||||
log_message "Sending notification: $EVENT_TITLE"
|
||||
|
||||
/usr/bin/sudo /usr/bin/perl -I/usr/share/perl5 << EOFPERL 2>&1 | tee -a $LOGFILE
|
||||
use strict;
|
||||
use warnings;
|
||||
use PVE::Notify;
|
||||
|
||||
my \$template_data = {
|
||||
'hostname' => '$FQDN',
|
||||
'event_date' => '$EVENT_DATE',
|
||||
'event_type' => '$EVENT_TYPE',
|
||||
'event_title' => '$EVENT_TITLE',
|
||||
'event_description' => '$EVENT_DESC',
|
||||
'event_class' => 'shutdown',
|
||||
'alert_type' => 'danger',
|
||||
'ups_status' => '$UPS_STATUS',
|
||||
'battery_charge' => '$BATTERY_CHARGE',
|
||||
'input_voltage' => '$INPUT_VOLTAGE',
|
||||
'action_taken' => 'Shutdown in curs',
|
||||
'next_steps' => ''
|
||||
};
|
||||
|
||||
my \$fields = {
|
||||
'hostname' => '$HOSTNAME',
|
||||
'type' => 'ups-power-event'
|
||||
};
|
||||
|
||||
eval {
|
||||
PVE::Notify::notify('$SEVERITY', 'ups-power-event', \$template_data, \$fields);
|
||||
print "Notification sent\\n";
|
||||
};
|
||||
if (\$@) {
|
||||
print STDERR "Notification failed: \$@\\n";
|
||||
}
|
||||
EOFPERL
|
||||
}
|
||||
|
||||
log_message "========================================"
|
||||
log_message "UPS SHUTDOWN ORCHESTRATION STARTED"
|
||||
log_message "$(get_ups_info)"
|
||||
log_message "========================================"
|
||||
|
||||
# Verifica daca UPS este intr-adevar pe baterie critica
|
||||
UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
|
||||
if [[ ! $UPS_STATUS =~ (OB|LB) ]]; then
|
||||
log_message "WARNING: UPS status is $UPS_STATUS - not critical. Aborting shutdown."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Email: START SHUTDOWN
|
||||
send_notification \
|
||||
"SHUTDOWN_START" \
|
||||
"Shutdown cluster PORNIT" \
|
||||
"UPS pe baterie critica. Se initiaza oprirea ordonata a cluster-ului Proxmox." \
|
||||
"error"
|
||||
|
||||
log_message "Step 1: Oprire VM-uri pe toate nodurile..."
|
||||
|
||||
# Opreste VM-uri pe toate nodurile (inclusiv local)
|
||||
for node in ${NODES[@]} localhost; do
|
||||
if [ "$node" == "localhost" ]; then
|
||||
NODE_NAME="pvemini (local)"
|
||||
else
|
||||
NODE_NAME=$node
|
||||
fi
|
||||
|
||||
log_message " - Oprire VM-uri pe $NODE_NAME..."
|
||||
|
||||
if [ "$node" == "localhost" ]; then
|
||||
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
|
||||
vm_status=$(qm status $vmid | awk '{print $2}')
|
||||
if [ "$vm_status" == "running" ]; then
|
||||
log_message " * Oprire VM $vmid pe pvemini..."
|
||||
qm shutdown $vmid --timeout 60 &
|
||||
fi
|
||||
done
|
||||
else
|
||||
ssh -o ConnectTimeout=5 root@$node "
|
||||
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
|
||||
vm_status=\$(qm status \$vmid | awk '{print \$2}')
|
||||
if [ \"\$vm_status\" == \"running\" ]; then
|
||||
echo ' * Oprire VM '\$vmid' pe $node...'
|
||||
qm shutdown \$vmid --timeout 60 &
|
||||
fi
|
||||
done
|
||||
" 2>&1 | tee -a $LOGFILE
|
||||
fi
|
||||
done
|
||||
|
||||
log_message "Step 2: Oprire containere LXC pe toate nodurile..."
|
||||
|
||||
# Opreste containere LXC pe toate nodurile
|
||||
for node in ${NODES[@]} localhost; do
|
||||
if [ "$node" == "localhost" ]; then
|
||||
NODE_NAME="pvemini (local)"
|
||||
else
|
||||
NODE_NAME=$node
|
||||
fi
|
||||
|
||||
log_message " - Oprire LXC pe $NODE_NAME..."
|
||||
|
||||
if [ "$node" == "localhost" ]; then
|
||||
pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' | while read ctid; do
|
||||
log_message " * Oprire container $ctid pe pvemini..."
|
||||
pct shutdown $ctid --timeout 60 &
|
||||
done
|
||||
else
|
||||
ssh -o ConnectTimeout=5 root@$node "
|
||||
pct list 2>/dev/null | awk 'NR>1 && \$2==\"running\" {print \$1}' | while read ctid; do
|
||||
echo ' * Oprire container '\$ctid' pe $node...'
|
||||
pct shutdown \$ctid --timeout 60 &
|
||||
done
|
||||
" 2>&1 | tee -a $LOGFILE
|
||||
fi
|
||||
done
|
||||
|
||||
log_message "Step 3: Asteptare 90 secunde pentru oprirea VM-urilor si LXC..."
|
||||
sleep 90
|
||||
|
||||
log_message "Step 4: Oprire noduri secundare..."
|
||||
|
||||
# Opreste nodurile secundare si trimite notificare pentru fiecare
|
||||
for i in "${!NODES[@]}"; do
|
||||
node="${NODES[$i]}"
|
||||
node_name="${NODE_NAMES[$i]}"
|
||||
|
||||
log_message " - Shutdown nod $node_name ($node)..."
|
||||
|
||||
# Email: SHUTDOWN NOD SECUNDAR
|
||||
send_notification \
|
||||
"SHUTDOWN_NODE" \
|
||||
"Shutdown $node_name trimis" \
|
||||
"Comanda shutdown a fost trimisa catre nodul $node_name ($node)." \
|
||||
"error"
|
||||
|
||||
ssh -o ConnectTimeout=5 root@$node "shutdown -h +1 'UPS battery critical - shutting down'" 2>&1 | tee -a $LOGFILE &
|
||||
done
|
||||
|
||||
log_message "Step 5: Asteptare 60 secunde pentru shutdown noduri secundare..."
|
||||
sleep 60
|
||||
|
||||
log_message "Step 6: Oprire nod local (pvemini - primary)..."
|
||||
|
||||
# Email: SHUTDOWN NOD PRIMARY (ultimul email inainte de shutdown)
|
||||
send_notification \
|
||||
"SHUTDOWN_PRIMARY" \
|
||||
"Shutdown pvemini (ULTIMUL NOD)" \
|
||||
"Se opreste nodul primary pvemini. Acesta este ultimul nod din cluster. UPS-ul se va opri dupa shutdown." \
|
||||
"error"
|
||||
|
||||
log_message "Step 7: Oprire UPS dupa shutdown..."
|
||||
|
||||
# Comanda UPS sa se opreasca dupa un delay (permite shutdown-ul sa se finalizeze)
|
||||
# Verifica daca comanda este disponibila
|
||||
if upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.stayoff"; then
|
||||
log_message " - Comanda UPS shutdown.stayoff (oprire completa)..."
|
||||
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.stayoff 2>&1 | tee -a $LOGFILE
|
||||
elif upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.return"; then
|
||||
log_message " - Comanda UPS shutdown.return (oprire cu restart la revenire curent)..."
|
||||
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.return 2>&1 | tee -a $LOGFILE
|
||||
else
|
||||
log_message " - WARNING: Nu s-a gasit comanda UPS shutdown disponibila"
|
||||
fi
|
||||
|
||||
log_message "========================================"
|
||||
log_message "UPS SHUTDOWN ORCHESTRATION COMPLETED"
|
||||
log_message "$(get_ups_info)"
|
||||
log_message "Local node will shutdown in 1 minute"
|
||||
log_message "========================================"
|
||||
|
||||
# Opreste nodul local (ultimul)
|
||||
shutdown -h +1 "UPS battery critical - primary node shutting down"
|
||||
|
||||
exit 0
|
||||
Reference in New Issue
Block a user