- Create cluster/ for Proxmox cluster infrastructure (SSH guide, HA monitor, UPS) - Create lxc108-oracle/ for Oracle Database documentation and scripts - Create vm201-windows/ for Windows 11 VM docs and SSL certificate scripts - Add SSL certificate monitoring scripts (check-ssl-certificates.ps1, monitor-ssl-certificates.sh) - Remove archived VM107 references (decommissioned) - Update all cross-references between files - Update main README.md with new structure and navigation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
231 lines
7.5 KiB
Bash
231 lines
7.5 KiB
Bash
#!/bin/bash
|
|
#
|
|
# Script de shutdown orchestrat pentru cluster Proxmox cand UPS este pe baterie critica
|
|
# Trimite notificari email via PVE::Notify pentru fiecare pas
|
|
#
|
|
# Creat: 2025-10-06
|
|
# Actualizat: 2026-01-14 - Fix permisiuni log file, adaugat sudo pentru PVE::Notify
|
|
|
|
LOGFILE=/var/log/ups-shutdown.log
|
|
NODES=("10.0.20.200" "10.0.20.202") # pve1, pveelite (pvemini va fi ultimul)
|
|
NODE_NAMES=("pve1" "pveelite") # Nume pentru notificari
|
|
UPS_NAME="nutdev1"
|
|
UPS_USER="admin"
|
|
UPS_PASS="parola99"
|
|
TEMPLATE_DIR="/etc/pve/notification-templates/default"
|
|
HOSTNAME=$(hostname)
|
|
FQDN=$(hostname -f 2>/dev/null || hostname)
|
|
|
|
# Asigura ca fisierul de log exista si are permisiunile corecte
|
|
# Scriptul poate rula ca user 'nut', deci fisierul trebuie sa fie writable
|
|
ensure_logfile() {
|
|
if [ ! -f "$LOGFILE" ]; then
|
|
touch "$LOGFILE" 2>/dev/null || sudo touch "$LOGFILE"
|
|
fi
|
|
if [ ! -w "$LOGFILE" ]; then
|
|
sudo chown nut:nut "$LOGFILE" 2>/dev/null
|
|
sudo chmod 664 "$LOGFILE" 2>/dev/null
|
|
fi
|
|
}
|
|
|
|
ensure_logfile
|
|
|
|
log_message() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE 2>/dev/null
|
|
logger -t ups-shutdown "$1"
|
|
}
|
|
|
|
# Obtine status UPS
|
|
get_ups_info() {
|
|
echo "Status: $(upsc $UPS_NAME ups.status 2>/dev/null || echo 'UNKNOWN')"
|
|
echo "Battery: $(upsc $UPS_NAME battery.charge 2>/dev/null || echo '?')%"
|
|
echo "Input: $(upsc $UPS_NAME input.voltage 2>/dev/null || echo '?')V"
|
|
}
|
|
|
|
# Trimite notificare email via PVE::Notify
|
|
send_notification() {
|
|
local EVENT_TYPE="$1"
|
|
local EVENT_TITLE="$2"
|
|
local EVENT_DESC="$3"
|
|
local SEVERITY="$4"
|
|
|
|
local UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null || echo "UNKNOWN")
|
|
local BATTERY_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null || echo "0")
|
|
local INPUT_VOLTAGE=$(upsc $UPS_NAME input.voltage 2>/dev/null || echo "0")
|
|
local EVENT_DATE=$(date '+%Y-%m-%d %H:%M:%S')
|
|
|
|
log_message "Sending notification: $EVENT_TITLE"
|
|
|
|
/usr/bin/sudo /usr/bin/perl -I/usr/share/perl5 << EOFPERL 2>&1 | tee -a $LOGFILE
|
|
use strict;
|
|
use warnings;
|
|
use PVE::Notify;
|
|
|
|
my \$template_data = {
|
|
'hostname' => '$FQDN',
|
|
'event_date' => '$EVENT_DATE',
|
|
'event_type' => '$EVENT_TYPE',
|
|
'event_title' => '$EVENT_TITLE',
|
|
'event_description' => '$EVENT_DESC',
|
|
'event_class' => 'shutdown',
|
|
'alert_type' => 'danger',
|
|
'ups_status' => '$UPS_STATUS',
|
|
'battery_charge' => '$BATTERY_CHARGE',
|
|
'input_voltage' => '$INPUT_VOLTAGE',
|
|
'action_taken' => 'Shutdown in curs',
|
|
'next_steps' => ''
|
|
};
|
|
|
|
my \$fields = {
|
|
'hostname' => '$HOSTNAME',
|
|
'type' => 'ups-power-event'
|
|
};
|
|
|
|
eval {
|
|
PVE::Notify::notify('$SEVERITY', 'ups-power-event', \$template_data, \$fields);
|
|
print "Notification sent\\n";
|
|
};
|
|
if (\$@) {
|
|
print STDERR "Notification failed: \$@\\n";
|
|
}
|
|
EOFPERL
|
|
}
|
|
|
|
log_message "========================================"
|
|
log_message "UPS SHUTDOWN ORCHESTRATION STARTED"
|
|
log_message "$(get_ups_info)"
|
|
log_message "========================================"
|
|
|
|
# Verifica daca UPS este intr-adevar pe baterie critica
|
|
UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
|
|
if [[ ! $UPS_STATUS =~ (OB|LB) ]]; then
|
|
log_message "WARNING: UPS status is $UPS_STATUS - not critical. Aborting shutdown."
|
|
exit 0
|
|
fi
|
|
|
|
# Email: START SHUTDOWN
|
|
send_notification \
|
|
"SHUTDOWN_START" \
|
|
"Shutdown cluster PORNIT" \
|
|
"UPS pe baterie critica. Se initiaza oprirea ordonata a cluster-ului Proxmox." \
|
|
"error"
|
|
|
|
log_message "Step 1: Oprire VM-uri pe toate nodurile..."
|
|
|
|
# Opreste VM-uri pe toate nodurile (inclusiv local)
|
|
for node in ${NODES[@]} localhost; do
|
|
if [ "$node" == "localhost" ]; then
|
|
NODE_NAME="pvemini (local)"
|
|
else
|
|
NODE_NAME=$node
|
|
fi
|
|
|
|
log_message " - Oprire VM-uri pe $NODE_NAME..."
|
|
|
|
if [ "$node" == "localhost" ]; then
|
|
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
|
|
vm_status=$(qm status $vmid | awk '{print $2}')
|
|
if [ "$vm_status" == "running" ]; then
|
|
log_message " * Oprire VM $vmid pe pvemini..."
|
|
qm shutdown $vmid --timeout 60 &
|
|
fi
|
|
done
|
|
else
|
|
ssh -o ConnectTimeout=5 root@$node "
|
|
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
|
|
vm_status=\$(qm status \$vmid | awk '{print \$2}')
|
|
if [ \"\$vm_status\" == \"running\" ]; then
|
|
echo ' * Oprire VM '\$vmid' pe $node...'
|
|
qm shutdown \$vmid --timeout 60 &
|
|
fi
|
|
done
|
|
" 2>&1 | tee -a $LOGFILE
|
|
fi
|
|
done
|
|
|
|
log_message "Step 2: Oprire containere LXC pe toate nodurile..."
|
|
|
|
# Opreste containere LXC pe toate nodurile
|
|
for node in ${NODES[@]} localhost; do
|
|
if [ "$node" == "localhost" ]; then
|
|
NODE_NAME="pvemini (local)"
|
|
else
|
|
NODE_NAME=$node
|
|
fi
|
|
|
|
log_message " - Oprire LXC pe $NODE_NAME..."
|
|
|
|
if [ "$node" == "localhost" ]; then
|
|
pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' | while read ctid; do
|
|
log_message " * Oprire container $ctid pe pvemini..."
|
|
pct shutdown $ctid --timeout 60 &
|
|
done
|
|
else
|
|
ssh -o ConnectTimeout=5 root@$node "
|
|
pct list 2>/dev/null | awk 'NR>1 && \$2==\"running\" {print \$1}' | while read ctid; do
|
|
echo ' * Oprire container '\$ctid' pe $node...'
|
|
pct shutdown \$ctid --timeout 60 &
|
|
done
|
|
" 2>&1 | tee -a $LOGFILE
|
|
fi
|
|
done
|
|
|
|
log_message "Step 3: Asteptare 90 secunde pentru oprirea VM-urilor si LXC..."
|
|
sleep 90
|
|
|
|
log_message "Step 4: Oprire noduri secundare..."
|
|
|
|
# Opreste nodurile secundare si trimite notificare pentru fiecare
|
|
for i in "${!NODES[@]}"; do
|
|
node="${NODES[$i]}"
|
|
node_name="${NODE_NAMES[$i]}"
|
|
|
|
log_message " - Shutdown nod $node_name ($node)..."
|
|
|
|
# Email: SHUTDOWN NOD SECUNDAR
|
|
send_notification \
|
|
"SHUTDOWN_NODE" \
|
|
"Shutdown $node_name trimis" \
|
|
"Comanda shutdown a fost trimisa catre nodul $node_name ($node)." \
|
|
"error"
|
|
|
|
ssh -o ConnectTimeout=5 root@$node "shutdown -h +1 'UPS battery critical - shutting down'" 2>&1 | tee -a $LOGFILE &
|
|
done
|
|
|
|
log_message "Step 5: Asteptare 60 secunde pentru shutdown noduri secundare..."
|
|
sleep 60
|
|
|
|
log_message "Step 6: Oprire nod local (pvemini - primary)..."
|
|
|
|
# Email: SHUTDOWN NOD PRIMARY (ultimul email inainte de shutdown)
|
|
send_notification \
|
|
"SHUTDOWN_PRIMARY" \
|
|
"Shutdown pvemini (ULTIMUL NOD)" \
|
|
"Se opreste nodul primary pvemini. Acesta este ultimul nod din cluster. UPS-ul se va opri dupa shutdown." \
|
|
"error"
|
|
|
|
log_message "Step 7: Oprire UPS dupa shutdown..."
|
|
|
|
# Comanda UPS sa se opreasca dupa un delay (permite shutdown-ul sa se finalizeze)
|
|
# Verifica daca comanda este disponibila
|
|
if upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.stayoff"; then
|
|
log_message " - Comanda UPS shutdown.stayoff (oprire completa)..."
|
|
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.stayoff 2>&1 | tee -a $LOGFILE
|
|
elif upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.return"; then
|
|
log_message " - Comanda UPS shutdown.return (oprire cu restart la revenire curent)..."
|
|
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.return 2>&1 | tee -a $LOGFILE
|
|
else
|
|
log_message " - WARNING: Nu s-a gasit comanda UPS shutdown disponibila"
|
|
fi
|
|
|
|
log_message "========================================"
|
|
log_message "UPS SHUTDOWN ORCHESTRATION COMPLETED"
|
|
log_message "$(get_ups_info)"
|
|
log_message "Local node will shutdown in 1 minute"
|
|
log_message "========================================"
|
|
|
|
# Opreste nodul local (ultimul)
|
|
shutdown -h +1 "UPS battery critical - primary node shutting down"
|
|
|
|
exit 0
|