Reorganize proxmox documentation into subdirectories per LXC/VM
- Create cluster/ for Proxmox cluster infrastructure (SSH guide, HA monitor, UPS) - Create lxc108-oracle/ for Oracle Database documentation and scripts - Create vm201-windows/ for Windows 11 VM docs and SSL certificate scripts - Add SSL certificate monitoring scripts (check-ssl-certificates.ps1, monitor-ssl-certificates.sh) - Remove archived VM107 references (decommissioned) - Update all cross-references between files - Update main README.md with new structure and navigation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
156
proxmox/cluster/ups/scripts/ups-maintenance-shutdown.sh
Normal file
156
proxmox/cluster/ups/scripts/ups-maintenance-shutdown.sh
Normal file
@@ -0,0 +1,156 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Script de shutdown orchestrat pentru mentenanță UPS
|
||||
# Folosit când trebuie să oprești complet clusterul pentru înlocuire baterie
|
||||
#
|
||||
# Autor: Claude Code
|
||||
# Data: 2025-10-06
|
||||
|
||||
LOGFILE="/var/log/ups-maintenance.log"
|
||||
NODES=("10.0.20.200" "10.0.20.202") # pve1, pveelite (adaptează dacă IP-urile sunt altele)
|
||||
|
||||
# Culori pentru output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE
|
||||
}
|
||||
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "${BLUE}UPS MAINTENANCE - CLUSTER SHUTDOWN${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo ""
|
||||
echo -e "${YELLOW}⚠️ ATENȚIE: Acest script va opri TOATE nodurile cluster!${NC}"
|
||||
echo -e "${YELLOW}⚠️ Folosit pentru mentenanță UPS (înlocuire baterie)${NC}"
|
||||
echo ""
|
||||
echo -e "${RED}Cluster nodes care vor fi oprite:${NC}"
|
||||
echo " - pve1 (10.0.20.200)"
|
||||
echo " - pveelite (10.0.20.202)"
|
||||
echo " - pvemini (10.0.20.201) - ULTIMUL"
|
||||
echo ""
|
||||
read -p "Continui cu shutdown? (scrie 'DA' pentru confirmare): " confirm
|
||||
|
||||
if [ "$confirm" != "DA" ]; then
|
||||
echo -e "${RED}Anulat de utilizator.${NC}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log "========================================"
|
||||
log "MAINTENANCE SHUTDOWN - START"
|
||||
log "Initiated by: $(whoami)"
|
||||
log "========================================"
|
||||
|
||||
# 1. Verificare status cluster înainte
|
||||
log "Step 1: Verificare status cluster..."
|
||||
pvecm status | tee -a $LOGFILE
|
||||
echo ""
|
||||
|
||||
# 2. Oprire VM-uri pe toate nodurile
|
||||
log "Step 2: Oprire VM-uri pe TOATE nodurile..."
|
||||
|
||||
echo -e "${BLUE}Oprire VM-uri pe pvemini (local)...${NC}"
|
||||
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
|
||||
vm_name=$(qm config $vmid | grep '^name:' | cut -d' ' -f2 || echo "VM-$vmid")
|
||||
vm_status=$(qm status $vmid | awk '{print $2}')
|
||||
|
||||
if [ "$vm_status" == "running" ]; then
|
||||
log " Oprire VM $vmid ($vm_name) pe pvemini..."
|
||||
qm shutdown $vmid --timeout 180 &
|
||||
else
|
||||
log " VM $vmid ($vm_name) deja oprit"
|
||||
fi
|
||||
done
|
||||
|
||||
# Oprire VM-uri pe nodurile secundare
|
||||
for node_ip in ${NODES[@]}; do
|
||||
echo -e "${BLUE}Oprire VM-uri pe nod $node_ip...${NC}"
|
||||
ssh -o ConnectTimeout=5 root@$node_ip "
|
||||
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
|
||||
vm_name=\$(qm config \$vmid | grep '^name:' | cut -d' ' -f2 || echo 'VM-'\$vmid)
|
||||
vm_status=\$(qm status \$vmid | awk '{print \$2}')
|
||||
|
||||
if [ \"\$vm_status\" == \"running\" ]; then
|
||||
echo ' Oprire VM '\$vmid' ('\$vm_name') pe $node_ip...'
|
||||
qm shutdown \$vmid --timeout 180 &
|
||||
fi
|
||||
done
|
||||
" 2>&1 | tee -a $LOGFILE
|
||||
done
|
||||
|
||||
log "Așteptare 3 minute pentru shutdown VM-uri..."
|
||||
echo -e "${YELLOW}Aștept 180 secunde pentru oprirea graceful a VM-urilor...${NC}"
|
||||
for i in {180..1}; do
|
||||
echo -ne "\r${YELLOW}Rămas: $i secunde...${NC} "
|
||||
sleep 1
|
||||
done
|
||||
echo ""
|
||||
|
||||
# 3. Verificare VM-uri oprite
|
||||
log "Step 3: Verificare VM-uri oprite..."
|
||||
running_vms=$(qm list | awk 'NR>1 && $3=="running" {print $1}')
|
||||
if [ ! -z "$running_vms" ]; then
|
||||
log "WARNING: VM-uri încă pornite pe pvemini: $running_vms"
|
||||
echo -e "${YELLOW}WARNING: Unele VM-uri încă rulează. Oprire forțată în 30 secunde...${NC}"
|
||||
sleep 30
|
||||
for vmid in $running_vms; do
|
||||
log " Force stop VM $vmid"
|
||||
qm stop $vmid
|
||||
done
|
||||
fi
|
||||
|
||||
# 4. Oprire containere (dacă există)
|
||||
log "Step 4: Oprire containere LXC (dacă există)..."
|
||||
pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' | while read ctid; do
|
||||
log " Oprire container $ctid"
|
||||
pct shutdown $ctid --timeout 60 &
|
||||
done
|
||||
sleep 70
|
||||
|
||||
# 5. Oprire noduri secundare
|
||||
log "Step 5: Oprire noduri secundare (pve1, pveelite)..."
|
||||
for node_ip in ${NODES[@]}; do
|
||||
log " Shutdown nod $node_ip în 2 minute..."
|
||||
echo -e "${RED}Shutdown nod $node_ip...${NC}"
|
||||
ssh -o ConnectTimeout=5 root@$node_ip "shutdown -h +2 'UPS Maintenance - Battery Replacement'" 2>&1 | tee -a $LOGFILE &
|
||||
done
|
||||
|
||||
log "Așteptare 150 secunde pentru shutdown noduri secundare..."
|
||||
echo -e "${YELLOW}Aștept 150 secunde pentru oprirea nodurilor secundare...${NC}"
|
||||
for i in {150..1}; do
|
||||
echo -ne "\r${YELLOW}Rămas: $i secunde până la shutdown pvemini...${NC} "
|
||||
sleep 1
|
||||
done
|
||||
echo ""
|
||||
|
||||
# 6. Oprire nod local (pvemini) - ULTIMUL
|
||||
log "Step 6: Oprire pvemini (nod PRIMARY - ULTIMUL)..."
|
||||
log "========================================"
|
||||
log "MAINTENANCE SHUTDOWN - COMPLETE"
|
||||
log "Nodurile secundare ar trebui să fie oprite."
|
||||
log "pvemini se va opri în 2 minute."
|
||||
log "========================================"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}========================================${NC}"
|
||||
echo -e "${GREEN}SHUTDOWN ORCHESTRAT FINALIZAT${NC}"
|
||||
echo -e "${GREEN}========================================${NC}"
|
||||
echo ""
|
||||
echo -e "${YELLOW}URMĂTORII PAȘI:${NC}"
|
||||
echo "1. Așteaptă 2 minute pentru oprirea completă a pvemini"
|
||||
echo "2. Verifică că toate LED-urile nodurilor sunt stinse"
|
||||
echo "3. Deconectează UPS de la priză"
|
||||
echo "4. Înlocuiește bateria UPS"
|
||||
echo "5. Reconectează UPS la priză"
|
||||
echo "6. Pornește nodurile (apasă buton power sau WOL)"
|
||||
echo "7. Verifică cluster cu: pvecm status"
|
||||
echo ""
|
||||
echo -e "${RED}pvemini se va opri în 2 minute!${NC}"
|
||||
echo ""
|
||||
|
||||
shutdown -h +2 "UPS Maintenance - Battery Replacement - Primary Node"
|
||||
|
||||
exit 0
|
||||
435
proxmox/cluster/ups/scripts/ups-monthly-test.sh
Normal file
435
proxmox/cluster/ups/scripts/ups-monthly-test.sh
Normal file
@@ -0,0 +1,435 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Script de test lunar automat baterie UPS
|
||||
# Rulează pe 1 ale fiecărei luni la 00:00
|
||||
# Trimite raport prin notificările Proxmox (PVE::Notify)
|
||||
#
|
||||
# IMPORTANT: Timing-ul de citire este CRITIC!
|
||||
# - Battery.charge scade DOAR între 10-40 secunde după pornirea testului
|
||||
# - UPS actualizează valorile cu delay de 5-10 secunde
|
||||
#
|
||||
# Creat: 2025-10-06
|
||||
# Autor: Claude Code
|
||||
|
||||
LOGFILE="/var/log/ups-monthly-test.log"
|
||||
UPS_NAME="nutdev1"
|
||||
UPS_USER="admin"
|
||||
UPS_PASS="parola99"
|
||||
TEMPLATE_DIR="/etc/pve/notification-templates/default"
|
||||
START_TIME=$(date +%s)
|
||||
HOSTNAME=$(hostname)
|
||||
FQDN=$(hostname -f)
|
||||
|
||||
# Funcție logging
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE
|
||||
}
|
||||
|
||||
# Funcție pentru crearea template-urilor de notificare
|
||||
create_templates() {
|
||||
mkdir -p $TEMPLATE_DIR
|
||||
|
||||
# Template: Subject
|
||||
cat > "$TEMPLATE_DIR/ups-battery-test-subject.txt.hbs" << 'EOFTEMPLATE'
|
||||
[{{ hostname }}] UPS Battery Test - {{ health_status }}
|
||||
EOFTEMPLATE
|
||||
|
||||
# Template: Body Text
|
||||
cat > "$TEMPLATE_DIR/ups-battery-test-body.txt.hbs" << 'EOFTEMPLATE'
|
||||
========================================
|
||||
UPS MONTHLY BATTERY TEST REPORT
|
||||
========================================
|
||||
|
||||
Hostname: {{ hostname }}
|
||||
Date: {{ test_date }}
|
||||
UPS: {{ ups_name }}
|
||||
|
||||
BATTERY HEALTH: {{ health_status }}
|
||||
{{ health_emoji }} {{ health_description }}
|
||||
|
||||
TEST RESULTS:
|
||||
-------------
|
||||
Battery Charge Drop: {{ charge_drop }}%
|
||||
Battery Voltage Drop: {{ voltage_drop }}V
|
||||
Minimum Charge Reached: {{ min_charge }}%
|
||||
Minimum Voltage: {{ min_voltage }}V
|
||||
Recovery Time: {{ recovery_time }}s
|
||||
|
||||
BEFORE TEST:
|
||||
- Battery Charge: {{ before_charge }}%
|
||||
- Battery Voltage: {{ before_voltage }}V
|
||||
- UPS Load: {{ before_load }}%
|
||||
|
||||
AFTER TEST ({{ test_duration }}s):
|
||||
- Battery Charge: {{ after_charge }}%
|
||||
- Battery Voltage: {{ after_voltage }}V
|
||||
- UPS Load: {{ after_load }}%
|
||||
|
||||
RECOMMENDATIONS:
|
||||
{{ recommendations }}
|
||||
|
||||
========================================
|
||||
Script: /opt/scripts/ups-monthly-test.sh
|
||||
Log: /var/log/ups-monthly-test.log
|
||||
========================================
|
||||
EOFTEMPLATE
|
||||
|
||||
# Template: Body HTML
|
||||
cat > "$TEMPLATE_DIR/ups-battery-test-body.html.hbs" << 'EOFTEMPLATE'
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f5f5f5; }
|
||||
.container { max-width: 800px; margin: 0 auto; background: white; padding: 30px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }
|
||||
h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 15px; margin-top: 0; }
|
||||
.status-badge { display: inline-block; padding: 10px 20px; border-radius: 5px; font-weight: bold; font-size: 18px; margin: 15px 0; }
|
||||
.status-excellent { background-color: #d4edda; color: #155724; border: 2px solid #28a745; }
|
||||
.status-good { background-color: #d1ecf1; color: #0c5460; border: 2px solid #17a2b8; }
|
||||
.status-fair { background-color: #fff3cd; color: #856404; border: 2px solid #ffc107; }
|
||||
.status-poor { background-color: #f8d7da; color: #721c24; border: 2px solid #dc3545; }
|
||||
.metrics { display: grid; grid-template-columns: repeat(2, 1fr); gap: 15px; margin: 25px 0; }
|
||||
.metric { background: #ecf0f1; padding: 20px; border-radius: 5px; border-left: 4px solid #3498db; }
|
||||
.metric-label { font-size: 13px; color: #7f8c8d; text-transform: uppercase; letter-spacing: 0.5px; }
|
||||
.metric-value { font-size: 28px; font-weight: bold; color: #2c3e50; margin-top: 8px; }
|
||||
.section { margin: 25px 0; padding: 20px; background: #f8f9fa; border-radius: 5px; }
|
||||
.section h2 { color: #34495e; margin-top: 0; font-size: 20px; }
|
||||
.recommendations { background: #fff3cd; border-left: 4px solid #ffc107; padding: 15px; margin: 20px 0; }
|
||||
.recommendations ul { margin: 10px 0; padding-left: 20px; }
|
||||
.footer { margin-top: 30px; padding-top: 20px; border-top: 2px solid #ecf0f1; font-size: 12px; color: #7f8c8d; text-align: center; }
|
||||
table { width: 100%; border-collapse: collapse; margin: 15px 0; }
|
||||
th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }
|
||||
th { background-color: #3498db; color: white; font-weight: 600; }
|
||||
tr:hover { background-color: #f5f5f5; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>[BATTERY] UPS Battery Test Report</h1>
|
||||
|
||||
<p><strong>Hostname:</strong> {{ hostname }}<br>
|
||||
<strong>Date:</strong> {{ test_date }}<br>
|
||||
<strong>UPS:</strong> {{ ups_name }}</p>
|
||||
|
||||
<div class="status-badge status-{{ health_class }}">
|
||||
{{ health_emoji }} Battery Health: {{ health_status }}
|
||||
</div>
|
||||
|
||||
<p style="font-size: 16px; margin-top: 15px;">{{ health_description }}</p>
|
||||
|
||||
<h2 style="margin-top: 30px;">Test Metrics</h2>
|
||||
<div class="metrics">
|
||||
<div class="metric">
|
||||
<div class="metric-label">Charge Drop</div>
|
||||
<div class="metric-value">{{ charge_drop }}%</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">Voltage Drop</div>
|
||||
<div class="metric-value">{{ voltage_drop }}V</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">Min Charge</div>
|
||||
<div class="metric-value">{{ min_charge }}%</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">Recovery Time</div>
|
||||
<div class="metric-value">{{ recovery_time }}s</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>Detailed Measurements</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Parameter</th>
|
||||
<th>Before Test</th>
|
||||
<th>After Test</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Battery Charge</td>
|
||||
<td>{{ before_charge }}%</td>
|
||||
<td>{{ after_charge }}%</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Battery Voltage</td>
|
||||
<td>{{ before_voltage }}V</td>
|
||||
<td>{{ after_voltage }}V</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>UPS Load</td>
|
||||
<td>{{ before_load }}%</td>
|
||||
<td>{{ after_load }}%</td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="recommendations">
|
||||
<h2 style="margin-top: 0;">📋 Recommendations</h2>
|
||||
{{{ recommendations }}}
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<p><strong>Script:</strong> /opt/scripts/ups-monthly-test.sh<br>
|
||||
<strong>Log File:</strong> /var/log/ups-monthly-test.log</p>
|
||||
<p style="margin-top: 10px;">Proxmox VE - UPS Monitoring System</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
EOFTEMPLATE
|
||||
|
||||
log "Templates created in $TEMPLATE_DIR/"
|
||||
}
|
||||
|
||||
# Verifică și creează template-urile dacă nu există
|
||||
if [ ! -f "$TEMPLATE_DIR/ups-battery-test-subject.txt.hbs" ]; then
|
||||
log "Creating notification templates..."
|
||||
create_templates
|
||||
fi
|
||||
|
||||
log "========================================"
|
||||
log "UPS MONTHLY BATTERY TEST - START"
|
||||
log "========================================"
|
||||
|
||||
# 1. Verificare status UPS înainte de test
|
||||
log "Step 1: Verificare status UPS înainte de test..."
|
||||
BEFORE_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
|
||||
BEFORE_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null)
|
||||
BEFORE_VOLTAGE=$(upsc $UPS_NAME battery.voltage 2>/dev/null)
|
||||
BEFORE_LOAD=$(upsc $UPS_NAME ups.load 2>/dev/null)
|
||||
|
||||
log " Status: $BEFORE_STATUS"
|
||||
log " Battery Charge: $BEFORE_CHARGE%"
|
||||
log " Battery Voltage: $BEFORE_VOLTAGE V"
|
||||
log " Load: $BEFORE_LOAD%"
|
||||
|
||||
# Verifică dacă UPS este online
|
||||
if [[ $BEFORE_STATUS != *"OL"* ]]; then
|
||||
log "ERROR: UPS nu este online! Status: $BEFORE_STATUS"
|
||||
log "Test ANULAT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verifică încărcare baterie
|
||||
if [ "$BEFORE_CHARGE" -lt 95 ]; then
|
||||
log "WARNING: Baterie nu este complet încărcată ($BEFORE_CHARGE%)"
|
||||
fi
|
||||
|
||||
# 2. Pornire test baterie
|
||||
log ""
|
||||
log "Step 2: Pornire test baterie..."
|
||||
TEST_START_TIME=$(date +%s)
|
||||
|
||||
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME test.battery.start.quick 2>&1 | tee -a $LOGFILE
|
||||
|
||||
if [ ${PIPESTATUS[0]} -eq 0 ]; then
|
||||
log "Test baterie pornit cu succes!"
|
||||
else
|
||||
log "ERROR: Nu am putut porni testul de baterie!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 3. TIMING CRITIC: Așteptare 10-15 secunde pentru ca charge să scadă
|
||||
log ""
|
||||
log "Step 3: Monitorizare test baterie (timing critic pentru charge drop)..."
|
||||
|
||||
MIN_CHARGE=$BEFORE_CHARGE
|
||||
MIN_VOLTAGE=$BEFORE_VOLTAGE
|
||||
CHARGE_AT_15S=$BEFORE_CHARGE
|
||||
VOLTAGE_AT_15S=$BEFORE_VOLTAGE
|
||||
|
||||
# Primele 5 secunde - inițializare test
|
||||
sleep 5
|
||||
|
||||
# 10-40 secunde - fereastra critică când charge scade
|
||||
for i in {1..7}; do
|
||||
CURRENT_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null)
|
||||
CURRENT_VOLTAGE=$(upsc $UPS_NAME battery.voltage 2>/dev/null)
|
||||
|
||||
# Capturează minimul
|
||||
if [ ! -z "$CURRENT_CHARGE" ] && [ "$CURRENT_CHARGE" -lt "$MIN_CHARGE" ]; then
|
||||
MIN_CHARGE=$CURRENT_CHARGE
|
||||
fi
|
||||
|
||||
if [ ! -z "$CURRENT_VOLTAGE" ]; then
|
||||
MIN_VOLTAGE=$(echo "$CURRENT_VOLTAGE $MIN_VOLTAGE" | awk '{if ($1 < $2) print $1; else print $2}')
|
||||
fi
|
||||
|
||||
# Citire la 15 secunde (punct optim)
|
||||
if [ $i -eq 2 ]; then
|
||||
CHARGE_AT_15S=$CURRENT_CHARGE
|
||||
VOLTAGE_AT_15S=$CURRENT_VOLTAGE
|
||||
log " [15s CRITICAL] Charge: $CURRENT_CHARGE% | Voltage: $CURRENT_VOLTAGE V"
|
||||
else
|
||||
log " [$((5 + i*5))s] Charge: $CURRENT_CHARGE% | Voltage: $CURRENT_VOLTAGE V"
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
|
||||
TEST_END_TIME=$(date +%s)
|
||||
TEST_DURATION=$((TEST_END_TIME - TEST_START_TIME))
|
||||
|
||||
log " Minimum Charge: $MIN_CHARGE%"
|
||||
log " Minimum Voltage: $MIN_VOLTAGE V"
|
||||
|
||||
# 4. Așteptare recuperare și citire finală
|
||||
log ""
|
||||
log "Step 4: Așteptare recuperare baterie (15 secunde)..."
|
||||
sleep 15
|
||||
|
||||
AFTER_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
|
||||
AFTER_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null)
|
||||
AFTER_VOLTAGE=$(upsc $UPS_NAME battery.voltage 2>/dev/null)
|
||||
AFTER_LOAD=$(upsc $UPS_NAME ups.load 2>/dev/null)
|
||||
|
||||
log " Status: $AFTER_STATUS"
|
||||
log " Battery Charge: $AFTER_CHARGE%"
|
||||
log " Battery Voltage: $AFTER_VOLTAGE V"
|
||||
log " Load: $AFTER_LOAD%"
|
||||
|
||||
# 5. Calcul metrici
|
||||
CHARGE_DROP=$((BEFORE_CHARGE - MIN_CHARGE))
|
||||
VOLTAGE_DROP=$(echo "$BEFORE_VOLTAGE - $MIN_VOLTAGE" | bc 2>/dev/null || echo "0")
|
||||
|
||||
# Rotunjire voltage drop la 2 zecimale
|
||||
VOLTAGE_DROP=$(printf "%.2f" $VOLTAGE_DROP 2>/dev/null || echo $VOLTAGE_DROP)
|
||||
|
||||
log ""
|
||||
log "Step 5: Analiza rezultate test..."
|
||||
log " Durată test: $TEST_DURATION secunde"
|
||||
log " Scădere încărcare: $CHARGE_DROP% (de la $BEFORE_CHARGE% la $MIN_CHARGE%)"
|
||||
log " Scădere tensiune: $VOLTAGE_DROP V (de la $BEFORE_VOLTAGE V la $MIN_VOLTAGE V)"
|
||||
|
||||
# 6. Evaluare sănătate baterie
|
||||
BATTERY_HEALTH="UNKNOWN"
|
||||
HEALTH_CLASS="fair"
|
||||
HEALTH_EMOJI="[INFO]"
|
||||
HEALTH_DESCRIPTION=""
|
||||
RECOMMENDATIONS=""
|
||||
|
||||
if [ "$CHARGE_DROP" -lt 15 ]; then
|
||||
BATTERY_HEALTH="EXCELLENT"
|
||||
HEALTH_CLASS="excellent"
|
||||
HEALTH_EMOJI="[OK]"
|
||||
HEALTH_DESCRIPTION="Battery is in excellent condition with minimal discharge during test."
|
||||
RECOMMENDATIONS="<ul><li>✅ Battery is healthy and functioning normally</li><li>Continue monthly testing</li><li>No action required</li></ul>"
|
||||
log " Sănătate baterie: EXCELENTĂ (scădere < 15%)"
|
||||
elif [ "$CHARGE_DROP" -lt 35 ]; then
|
||||
BATTERY_HEALTH="GOOD"
|
||||
HEALTH_CLASS="good"
|
||||
HEALTH_EMOJI="[OK]"
|
||||
HEALTH_DESCRIPTION="Battery shows normal wear but performs adequately."
|
||||
RECOMMENDATIONS="<ul><li>Battery is functioning well</li><li>Monitor monthly for degradation trends</li><li>No immediate action needed</li></ul>"
|
||||
log " Sănătate baterie: BUNĂ (scădere 15-35%)"
|
||||
elif [ "$CHARGE_DROP" -lt 55 ]; then
|
||||
BATTERY_HEALTH="FAIR"
|
||||
HEALTH_CLASS="fair"
|
||||
HEALTH_EMOJI="[WARNING]"
|
||||
HEALTH_DESCRIPTION="Battery shows significant wear and should be monitored closely."
|
||||
RECOMMENDATIONS="<ul><li>⚠️ Battery is aging</li><li>Plan replacement in 3-6 months</li><li>Increase monitoring frequency</li><li>Order replacement battery soon</li></ul>"
|
||||
log " Sănătate baterie: ACCEPTABILĂ (scădere 35-55%)"
|
||||
else
|
||||
BATTERY_HEALTH="POOR"
|
||||
HEALTH_CLASS="poor"
|
||||
HEALTH_EMOJI="[CRITICAL]"
|
||||
HEALTH_DESCRIPTION="Battery is critically weak and requires immediate replacement!"
|
||||
RECOMMENDATIONS="<ul><li>🔴 <strong>URGENT:</strong> Battery needs immediate replacement!</li><li>Order new battery NOW</li><li>UPS may not provide adequate protection</li><li>Risk of unexpected shutdown</li></ul>"
|
||||
log " Sănătate baterie: SLABĂ (scădere > 55%) - NECESITĂ ÎNLOCUIRE!"
|
||||
fi
|
||||
|
||||
# 7. Monitorizare recuperare (30 secunde)
|
||||
log ""
|
||||
log "Step 6: Monitorizare recuperare baterie..."
|
||||
|
||||
RECOVERY_START=$(date +%s)
|
||||
sleep 30
|
||||
RECOVERY_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null)
|
||||
RECOVERY_TIME=$(($(date +%s) - RECOVERY_START))
|
||||
|
||||
log " Charge după $RECOVERY_TIME secunde: $RECOVERY_CHARGE%"
|
||||
|
||||
# 8. Calculează timpul total
|
||||
END_TIME=$(date +%s)
|
||||
RUNTIME=$((END_TIME - START_TIME))
|
||||
|
||||
# 9. Determină severity pentru notificare
|
||||
if [ "$BATTERY_HEALTH" = "EXCELLENT" ] || [ "$BATTERY_HEALTH" = "GOOD" ]; then
|
||||
SEVERITY="info"
|
||||
elif [ "$BATTERY_HEALTH" = "FAIR" ]; then
|
||||
SEVERITY="warning"
|
||||
else
|
||||
SEVERITY="error"
|
||||
fi
|
||||
|
||||
# 10. Trimite notificarea prin PVE::Notify
|
||||
log ""
|
||||
log "Step 7: Trimitere notificare prin PVE::Notify..."
|
||||
|
||||
# Escape pentru Perl heredoc
|
||||
RECOMMENDATIONS_ESCAPED=$(echo "$RECOMMENDATIONS" | sed "s/'/\\'/g")
|
||||
|
||||
perl -I/usr/share/perl5 << EOFPERL
|
||||
use strict;
|
||||
use warnings;
|
||||
use PVE::Notify;
|
||||
|
||||
my \$template_data = {
|
||||
'hostname' => '$FQDN',
|
||||
'test_date' => '$(date '+%Y-%m-%d %H:%M:%S')',
|
||||
'ups_name' => '$UPS_NAME',
|
||||
'health_status' => '$BATTERY_HEALTH',
|
||||
'health_class' => '$HEALTH_CLASS',
|
||||
'health_emoji' => '$HEALTH_EMOJI',
|
||||
'health_description' => '$HEALTH_DESCRIPTION',
|
||||
'charge_drop' => '$CHARGE_DROP',
|
||||
'voltage_drop' => '$VOLTAGE_DROP',
|
||||
'min_charge' => '$MIN_CHARGE',
|
||||
'min_voltage' => '$MIN_VOLTAGE',
|
||||
'before_charge' => '$BEFORE_CHARGE',
|
||||
'before_voltage' => '$BEFORE_VOLTAGE',
|
||||
'before_load' => '$BEFORE_LOAD',
|
||||
'after_charge' => '$AFTER_CHARGE',
|
||||
'after_voltage' => '$AFTER_VOLTAGE',
|
||||
'after_load' => '$AFTER_LOAD',
|
||||
'test_duration' => '$TEST_DURATION',
|
||||
'recovery_time' => '$RECOVERY_TIME',
|
||||
'recommendations' => '$RECOMMENDATIONS_ESCAPED'
|
||||
};
|
||||
|
||||
my \$fields = {
|
||||
'hostname' => '$HOSTNAME',
|
||||
'type' => 'ups-battery-test',
|
||||
'health' => '$BATTERY_HEALTH'
|
||||
};
|
||||
|
||||
eval {
|
||||
PVE::Notify::notify('$SEVERITY', 'ups-battery-test', \$template_data, \$fields);
|
||||
print "Notification sent successfully\\n";
|
||||
};
|
||||
if (\$@) {
|
||||
print STDERR "Failed to send notification: \$@\\n";
|
||||
exit 1;
|
||||
}
|
||||
EOFPERL
|
||||
|
||||
PERL_EXIT_CODE=$?
|
||||
|
||||
if [ $PERL_EXIT_CODE -eq 0 ]; then
|
||||
log "Notificare trimisă cu succes prin PVE::Notify"
|
||||
else
|
||||
log "ERROR: Notificarea a eșuat (exit code: $PERL_EXIT_CODE)"
|
||||
fi
|
||||
|
||||
# 11. Finalizare
|
||||
log ""
|
||||
log "========================================"
|
||||
log "UPS MONTHLY BATTERY TEST - COMPLETE"
|
||||
log "Sănătate baterie: $BATTERY_HEALTH"
|
||||
log "Scădere încărcare: $CHARGE_DROP%"
|
||||
log "Scădere tensiune: $VOLTAGE_DROP V"
|
||||
log "Timp total: $RUNTIME secunde"
|
||||
log "========================================"
|
||||
|
||||
exit 0
|
||||
230
proxmox/cluster/ups/scripts/ups-shutdown-cluster.sh
Normal file
230
proxmox/cluster/ups/scripts/ups-shutdown-cluster.sh
Normal file
@@ -0,0 +1,230 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Script de shutdown orchestrat pentru cluster Proxmox cand UPS este pe baterie critica
|
||||
# Trimite notificari email via PVE::Notify pentru fiecare pas
|
||||
#
|
||||
# Creat: 2025-10-06
|
||||
# Actualizat: 2026-01-14 - Fix permisiuni log file, adaugat sudo pentru PVE::Notify
|
||||
|
||||
LOGFILE=/var/log/ups-shutdown.log
|
||||
NODES=("10.0.20.200" "10.0.20.202") # pve1, pveelite (pvemini va fi ultimul)
|
||||
NODE_NAMES=("pve1" "pveelite") # Nume pentru notificari
|
||||
UPS_NAME="nutdev1"
|
||||
UPS_USER="admin"
|
||||
UPS_PASS="parola99"
|
||||
TEMPLATE_DIR="/etc/pve/notification-templates/default"
|
||||
HOSTNAME=$(hostname)
|
||||
FQDN=$(hostname -f 2>/dev/null || hostname)
|
||||
|
||||
# Asigura ca fisierul de log exista si are permisiunile corecte
|
||||
# Scriptul poate rula ca user 'nut', deci fisierul trebuie sa fie writable
|
||||
ensure_logfile() {
|
||||
if [ ! -f "$LOGFILE" ]; then
|
||||
touch "$LOGFILE" 2>/dev/null || sudo touch "$LOGFILE"
|
||||
fi
|
||||
if [ ! -w "$LOGFILE" ]; then
|
||||
sudo chown nut:nut "$LOGFILE" 2>/dev/null
|
||||
sudo chmod 664 "$LOGFILE" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_logfile
|
||||
|
||||
log_message() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE 2>/dev/null
|
||||
logger -t ups-shutdown "$1"
|
||||
}
|
||||
|
||||
# Obtine status UPS
|
||||
get_ups_info() {
|
||||
echo "Status: $(upsc $UPS_NAME ups.status 2>/dev/null || echo 'UNKNOWN')"
|
||||
echo "Battery: $(upsc $UPS_NAME battery.charge 2>/dev/null || echo '?')%"
|
||||
echo "Input: $(upsc $UPS_NAME input.voltage 2>/dev/null || echo '?')V"
|
||||
}
|
||||
|
||||
# Trimite notificare email via PVE::Notify
|
||||
send_notification() {
|
||||
local EVENT_TYPE="$1"
|
||||
local EVENT_TITLE="$2"
|
||||
local EVENT_DESC="$3"
|
||||
local SEVERITY="$4"
|
||||
|
||||
local UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null || echo "UNKNOWN")
|
||||
local BATTERY_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null || echo "0")
|
||||
local INPUT_VOLTAGE=$(upsc $UPS_NAME input.voltage 2>/dev/null || echo "0")
|
||||
local EVENT_DATE=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
|
||||
log_message "Sending notification: $EVENT_TITLE"
|
||||
|
||||
/usr/bin/sudo /usr/bin/perl -I/usr/share/perl5 << EOFPERL 2>&1 | tee -a $LOGFILE
|
||||
use strict;
|
||||
use warnings;
|
||||
use PVE::Notify;
|
||||
|
||||
my \$template_data = {
|
||||
'hostname' => '$FQDN',
|
||||
'event_date' => '$EVENT_DATE',
|
||||
'event_type' => '$EVENT_TYPE',
|
||||
'event_title' => '$EVENT_TITLE',
|
||||
'event_description' => '$EVENT_DESC',
|
||||
'event_class' => 'shutdown',
|
||||
'alert_type' => 'danger',
|
||||
'ups_status' => '$UPS_STATUS',
|
||||
'battery_charge' => '$BATTERY_CHARGE',
|
||||
'input_voltage' => '$INPUT_VOLTAGE',
|
||||
'action_taken' => 'Shutdown in curs',
|
||||
'next_steps' => ''
|
||||
};
|
||||
|
||||
my \$fields = {
|
||||
'hostname' => '$HOSTNAME',
|
||||
'type' => 'ups-power-event'
|
||||
};
|
||||
|
||||
eval {
|
||||
PVE::Notify::notify('$SEVERITY', 'ups-power-event', \$template_data, \$fields);
|
||||
print "Notification sent\\n";
|
||||
};
|
||||
if (\$@) {
|
||||
print STDERR "Notification failed: \$@\\n";
|
||||
}
|
||||
EOFPERL
|
||||
}
|
||||
|
||||
log_message "========================================"
|
||||
log_message "UPS SHUTDOWN ORCHESTRATION STARTED"
|
||||
log_message "$(get_ups_info)"
|
||||
log_message "========================================"
|
||||
|
||||
# Verifica daca UPS este intr-adevar pe baterie critica
|
||||
UPS_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
|
||||
if [[ ! $UPS_STATUS =~ (OB|LB) ]]; then
|
||||
log_message "WARNING: UPS status is $UPS_STATUS - not critical. Aborting shutdown."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Email: START SHUTDOWN
|
||||
send_notification \
|
||||
"SHUTDOWN_START" \
|
||||
"Shutdown cluster PORNIT" \
|
||||
"UPS pe baterie critica. Se initiaza oprirea ordonata a cluster-ului Proxmox." \
|
||||
"error"
|
||||
|
||||
log_message "Step 1: Oprire VM-uri pe toate nodurile..."
|
||||
|
||||
# Opreste VM-uri pe toate nodurile (inclusiv local)
|
||||
for node in ${NODES[@]} localhost; do
|
||||
if [ "$node" == "localhost" ]; then
|
||||
NODE_NAME="pvemini (local)"
|
||||
else
|
||||
NODE_NAME=$node
|
||||
fi
|
||||
|
||||
log_message " - Oprire VM-uri pe $NODE_NAME..."
|
||||
|
||||
if [ "$node" == "localhost" ]; then
|
||||
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
|
||||
vm_status=$(qm status $vmid | awk '{print $2}')
|
||||
if [ "$vm_status" == "running" ]; then
|
||||
log_message " * Oprire VM $vmid pe pvemini..."
|
||||
qm shutdown $vmid --timeout 60 &
|
||||
fi
|
||||
done
|
||||
else
|
||||
ssh -o ConnectTimeout=5 root@$node "
|
||||
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
|
||||
vm_status=\$(qm status \$vmid | awk '{print \$2}')
|
||||
if [ \"\$vm_status\" == \"running\" ]; then
|
||||
echo ' * Oprire VM '\$vmid' pe $node...'
|
||||
qm shutdown \$vmid --timeout 60 &
|
||||
fi
|
||||
done
|
||||
" 2>&1 | tee -a $LOGFILE
|
||||
fi
|
||||
done
|
||||
|
||||
log_message "Step 2: Oprire containere LXC pe toate nodurile..."
|
||||
|
||||
# Opreste containere LXC pe toate nodurile
|
||||
for node in ${NODES[@]} localhost; do
|
||||
if [ "$node" == "localhost" ]; then
|
||||
NODE_NAME="pvemini (local)"
|
||||
else
|
||||
NODE_NAME=$node
|
||||
fi
|
||||
|
||||
log_message " - Oprire LXC pe $NODE_NAME..."
|
||||
|
||||
if [ "$node" == "localhost" ]; then
|
||||
pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' | while read ctid; do
|
||||
log_message " * Oprire container $ctid pe pvemini..."
|
||||
pct shutdown $ctid --timeout 60 &
|
||||
done
|
||||
else
|
||||
ssh -o ConnectTimeout=5 root@$node "
|
||||
pct list 2>/dev/null | awk 'NR>1 && \$2==\"running\" {print \$1}' | while read ctid; do
|
||||
echo ' * Oprire container '\$ctid' pe $node...'
|
||||
pct shutdown \$ctid --timeout 60 &
|
||||
done
|
||||
" 2>&1 | tee -a $LOGFILE
|
||||
fi
|
||||
done
|
||||
|
||||
log_message "Step 3: Asteptare 90 secunde pentru oprirea VM-urilor si LXC..."
|
||||
sleep 90
|
||||
|
||||
log_message "Step 4: Oprire noduri secundare..."
|
||||
|
||||
# Opreste nodurile secundare si trimite notificare pentru fiecare
|
||||
for i in "${!NODES[@]}"; do
|
||||
node="${NODES[$i]}"
|
||||
node_name="${NODE_NAMES[$i]}"
|
||||
|
||||
log_message " - Shutdown nod $node_name ($node)..."
|
||||
|
||||
# Email: SHUTDOWN NOD SECUNDAR
|
||||
send_notification \
|
||||
"SHUTDOWN_NODE" \
|
||||
"Shutdown $node_name trimis" \
|
||||
"Comanda shutdown a fost trimisa catre nodul $node_name ($node)." \
|
||||
"error"
|
||||
|
||||
ssh -o ConnectTimeout=5 root@$node "shutdown -h +1 'UPS battery critical - shutting down'" 2>&1 | tee -a $LOGFILE &
|
||||
done
|
||||
|
||||
log_message "Step 5: Asteptare 60 secunde pentru shutdown noduri secundare..."
|
||||
sleep 60
|
||||
|
||||
log_message "Step 6: Oprire nod local (pvemini - primary)..."
|
||||
|
||||
# Email: SHUTDOWN NOD PRIMARY (ultimul email inainte de shutdown)
|
||||
send_notification \
|
||||
"SHUTDOWN_PRIMARY" \
|
||||
"Shutdown pvemini (ULTIMUL NOD)" \
|
||||
"Se opreste nodul primary pvemini. Acesta este ultimul nod din cluster. UPS-ul se va opri dupa shutdown." \
|
||||
"error"
|
||||
|
||||
log_message "Step 7: Oprire UPS dupa shutdown..."
|
||||
|
||||
# Comanda UPS sa se opreasca dupa un delay (permite shutdown-ul sa se finalizeze)
|
||||
# Verifica daca comanda este disponibila
|
||||
if upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.stayoff"; then
|
||||
log_message " - Comanda UPS shutdown.stayoff (oprire completa)..."
|
||||
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.stayoff 2>&1 | tee -a $LOGFILE
|
||||
elif upscmd -l $UPS_NAME 2>/dev/null | grep -q "shutdown.return"; then
|
||||
log_message " - Comanda UPS shutdown.return (oprire cu restart la revenire curent)..."
|
||||
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME shutdown.return 2>&1 | tee -a $LOGFILE
|
||||
else
|
||||
log_message " - WARNING: Nu s-a gasit comanda UPS shutdown disponibila"
|
||||
fi
|
||||
|
||||
log_message "========================================"
|
||||
log_message "UPS SHUTDOWN ORCHESTRATION COMPLETED"
|
||||
log_message "$(get_ups_info)"
|
||||
log_message "Local node will shutdown in 1 minute"
|
||||
log_message "========================================"
|
||||
|
||||
# Opreste nodul local (ultimul)
|
||||
shutdown -h +1 "UPS battery critical - primary node shutting down"
|
||||
|
||||
exit 0
|
||||
63
proxmox/cluster/ups/scripts/ups-shutdown-test.sh
Normal file
63
proxmox/cluster/ups/scripts/ups-shutdown-test.sh
Normal file
@@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Script de TEST pentru shutdown orchestrat - NU oprește nimic
|
||||
#
|
||||
|
||||
LOGFILE=/var/log/ups-shutdown-test.log
|
||||
NODES=(10.0.20.200 10.0.20.202)
|
||||
|
||||
log_message() {
|
||||
echo "[2025-10-06 20:03:03] $1" | tee -a $LOGFILE
|
||||
}
|
||||
|
||||
log_message "========================================"
|
||||
log_message "UPS SHUTDOWN TEST STARTED (DRY RUN)"
|
||||
log_message "UPS Status: $(upsc nutdev1 ups.status 2>/dev/null || echo 'UNKNOWN')"
|
||||
log_message "Battery Charge: $(upsc nutdev1 battery.charge 2>/dev/null || echo 'UNKNOWN')%"
|
||||
log_message "Input Voltage: $(upsc nutdev1 input.voltage 2>/dev/null || echo 'UNKNOWN')V"
|
||||
log_message "Output Voltage: $(upsc nutdev1 output.voltage 2>/dev/null || echo 'UNKNOWN')V"
|
||||
log_message "========================================"
|
||||
|
||||
log_message "TEST: Ar opri VM-urile de pe toate nodurile..."
|
||||
|
||||
for node in ${NODES[@]} localhost; do
|
||||
if [ "$node" == "localhost" ]; then
|
||||
NODE_NAME="pvemini (local)"
|
||||
else
|
||||
NODE_NAME=$node
|
||||
fi
|
||||
|
||||
log_message " - VM-uri pe $NODE_NAME:"
|
||||
|
||||
if [ "$node" == "localhost" ]; then
|
||||
for vmid in $(qm list | awk 'NR>1 {print $1}'); do
|
||||
vm_name=$(qm config $vmid | grep '^name:' | cut -d' ' -f2)
|
||||
vm_status=$(qm status $vmid | awk '{print $2}')
|
||||
log_message " * VM $vmid ($vm_name): $vm_status"
|
||||
done
|
||||
else
|
||||
ssh -o ConnectTimeout=5 root@$node "
|
||||
for vmid in \$(qm list | awk 'NR>1 {print \$1}'); do
|
||||
vm_name=\$(qm config \$vmid | grep '^name:' | cut -d' ' -f2)
|
||||
vm_status=\$(qm status \$vmid | awk '{print \$2}')
|
||||
echo ' * VM '\$vmid' ('\$vm_name'): '\$vm_status
|
||||
done
|
||||
" 2>&1 | tee -a $LOGFILE
|
||||
fi
|
||||
done
|
||||
|
||||
log_message ""
|
||||
log_message "TEST: Ordinea de shutdown ar fi:"
|
||||
log_message " 1. Toate VM-urile de pe toate nodurile (paralel)"
|
||||
log_message " 2. Așteptare 90 secunde"
|
||||
log_message " 3. Shutdown pve1 (10.0.20.200)"
|
||||
log_message " 4. Shutdown pve2 (10.0.20.202)"
|
||||
log_message " 5. Așteptare 30 secunde"
|
||||
log_message " 6. Shutdown pvemini (10.0.20.201) - PRIMARY/LAST"
|
||||
log_message ""
|
||||
log_message "========================================"
|
||||
log_message "UPS SHUTDOWN TEST COMPLETED (DRY RUN)"
|
||||
log_message "NICIUN sistem nu a fost oprit - doar test"
|
||||
log_message "========================================"
|
||||
|
||||
exit 0
|
||||
436
proxmox/cluster/ups/scripts/upssched-cmd
Normal file
436
proxmox/cluster/ups/scripts/upssched-cmd
Normal file
@@ -0,0 +1,436 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Script apelat de upssched pentru a gestiona evenimentele UPS
|
||||
# Trimite notificari email via PVE::Notify
|
||||
#
|
||||
# Creat: 2025-10-06
|
||||
# Actualizat: 2026-01-14 - Adaugat notificari periodice status baterie
|
||||
|
||||
LOGFILE=/var/log/ups-events.log
|
||||
UPS_NAME="nutdev1"
|
||||
TEMPLATE_DIR="/etc/pve/notification-templates/default"
|
||||
HOSTNAME=$(hostname)
|
||||
FQDN=$(hostname -f 2>/dev/null || hostname)
|
||||
BATTERY_MONITOR_PID="/run/nut/battery-monitor.pid"
|
||||
|
||||
# Asigura ca fisierul de log exista si are permisiunile corecte
|
||||
# Scriptul ruleaza ca user 'nut', deci fisierul trebuie sa fie writable
|
||||
ensure_logfile() {
|
||||
if [ ! -f "$LOGFILE" ]; then
|
||||
touch "$LOGFILE" 2>/dev/null || sudo touch "$LOGFILE"
|
||||
fi
|
||||
if [ ! -w "$LOGFILE" ]; then
|
||||
sudo chown nut:nut "$LOGFILE" 2>/dev/null
|
||||
sudo chmod 664 "$LOGFILE" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_logfile
|
||||
|
||||
log_event() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> $LOGFILE 2>/dev/null || \
|
||||
logger -t upssched-cmd "LOG_FALLBACK: $1"
|
||||
}
|
||||
|
||||
# Obtine status UPS
|
||||
get_ups_status() {
|
||||
upsc $UPS_NAME ups.status 2>/dev/null || echo "UNKNOWN"
|
||||
}
|
||||
|
||||
get_battery_charge() {
|
||||
upsc $UPS_NAME battery.charge 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_input_voltage() {
|
||||
upsc $UPS_NAME input.voltage 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_battery_runtime() {
|
||||
local runtime=$(upsc $UPS_NAME battery.runtime 2>/dev/null)
|
||||
if [ -n "$runtime" ] && [ "$runtime" != "0" ]; then
|
||||
echo $((runtime / 60))
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
get_battery_voltage() {
|
||||
upsc $UPS_NAME battery.voltage 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_ups_load() {
|
||||
upsc $UPS_NAME ups.load 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
# Creeaza template-uri daca nu exista
|
||||
create_templates() {
|
||||
mkdir -p $TEMPLATE_DIR
|
||||
|
||||
cat > "$TEMPLATE_DIR/ups-power-event-subject.txt.hbs" << 'EOFTEMPLATE'
|
||||
[{{ hostname }}] UPS {{ event_type }} - {{ event_title }}
|
||||
EOFTEMPLATE
|
||||
|
||||
cat > "$TEMPLATE_DIR/ups-power-event-body.txt.hbs" << 'EOFTEMPLATE'
|
||||
========================================
|
||||
UPS POWER EVENT - {{ event_title }}
|
||||
========================================
|
||||
|
||||
Hostname: {{ hostname }}
|
||||
Date: {{ event_date }}
|
||||
Event: {{ event_type }}
|
||||
|
||||
{{ event_description }}
|
||||
|
||||
UPS STATUS:
|
||||
-----------
|
||||
Status: {{ ups_status }}
|
||||
Battery Charge: {{ battery_charge }}%
|
||||
Input Voltage: {{ input_voltage }}V
|
||||
{{#if battery_runtime}}
|
||||
Battery Runtime: {{ battery_runtime }} min
|
||||
{{/if}}
|
||||
|
||||
{{#if action_taken}}
|
||||
ACTION: {{ action_taken }}
|
||||
{{/if}}
|
||||
|
||||
{{#if next_steps}}
|
||||
NEXT STEPS: {{ next_steps }}
|
||||
{{/if}}
|
||||
|
||||
========================================
|
||||
Log: /var/log/ups-events.log
|
||||
========================================
|
||||
EOFTEMPLATE
|
||||
|
||||
cat > "$TEMPLATE_DIR/ups-power-event-body.html.hbs" << 'EOFTEMPLATE'
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f5f5f5; }
|
||||
.container { max-width: 600px; margin: 0 auto; background: white; padding: 25px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }
|
||||
h1 { margin-top: 0; padding-bottom: 15px; border-bottom: 3px solid #3498db; }
|
||||
.status-onbatt { color: #e67e22; border-color: #e67e22; }
|
||||
.status-online { color: #27ae60; border-color: #27ae60; }
|
||||
.status-lowbatt { color: #c0392b; border-color: #c0392b; }
|
||||
.status-shutdown { color: #8e44ad; border-color: #8e44ad; }
|
||||
.status-charging { color: #3498db; border-color: #3498db; }
|
||||
.alert-box { padding: 15px; border-radius: 5px; margin: 20px 0; border-left: 4px solid; }
|
||||
.alert-warning { background: #fef9e7; border-color: #f39c12; }
|
||||
.alert-danger { background: #fdedec; border-color: #e74c3c; }
|
||||
.alert-success { background: #eafaf1; border-color: #2ecc71; }
|
||||
.alert-info { background: #ebf5fb; border-color: #3498db; }
|
||||
.metrics { display: grid; grid-template-columns: repeat(2, 1fr); gap: 10px; margin: 20px 0; }
|
||||
.metric { background: #ecf0f1; padding: 15px; border-radius: 5px; text-align: center; }
|
||||
.metric-label { font-size: 12px; color: #7f8c8d; text-transform: uppercase; }
|
||||
.metric-value { font-size: 24px; font-weight: bold; color: #2c3e50; margin-top: 5px; }
|
||||
.footer { margin-top: 25px; padding-top: 15px; border-top: 1px solid #ecf0f1; font-size: 12px; color: #7f8c8d; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1 class="status-{{ event_class }}">[UPS] {{ event_title }}</h1>
|
||||
<p><strong>Hostname:</strong> {{ hostname }}<br>
|
||||
<strong>Date:</strong> {{ event_date }}<br>
|
||||
<strong>Event:</strong> {{ event_type }}</p>
|
||||
<div class="alert-box alert-{{ alert_type }}">
|
||||
<strong>{{ event_description }}</strong>
|
||||
</div>
|
||||
<h3>UPS Status</h3>
|
||||
<div class="metrics">
|
||||
<div class="metric">
|
||||
<div class="metric-label">Status</div>
|
||||
<div class="metric-value">{{ ups_status }}</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">Battery</div>
|
||||
<div class="metric-value">{{ battery_charge }}%</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">Input Voltage</div>
|
||||
<div class="metric-value">{{ input_voltage }}V</div>
|
||||
</div>
|
||||
</div>
|
||||
{{#if action_taken}}
|
||||
<p><strong>Action:</strong> {{ action_taken }}</p>
|
||||
{{/if}}
|
||||
{{#if next_steps}}
|
||||
<p><strong>Next Steps:</strong> {{ next_steps }}</p>
|
||||
{{/if}}
|
||||
<div class="footer">
|
||||
<p>Log: /var/log/ups-events.log<br>Proxmox UPS Monitoring</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
EOFTEMPLATE
|
||||
|
||||
log_event "Templates created in $TEMPLATE_DIR/"
|
||||
}
|
||||
|
||||
# Trimite notificare email via PVE::Notify
|
||||
send_notification() {
|
||||
local EVENT_TYPE="$1"
|
||||
local EVENT_TITLE="$2"
|
||||
local EVENT_DESC="$3"
|
||||
local EVENT_CLASS="$4"
|
||||
local ALERT_TYPE="$5"
|
||||
local SEVERITY="$6"
|
||||
local ACTION="$7"
|
||||
local NEXT_STEPS="$8"
|
||||
|
||||
# Verifica si creeaza template-uri
|
||||
if [ ! -f "$TEMPLATE_DIR/ups-power-event-subject.txt.hbs" ]; then
|
||||
create_templates
|
||||
fi
|
||||
|
||||
# Obtine status UPS
|
||||
local UPS_STATUS=$(get_ups_status)
|
||||
local BATTERY_CHARGE=$(get_battery_charge)
|
||||
local INPUT_VOLTAGE=$(get_input_voltage)
|
||||
local BATTERY_RUNTIME=$(get_battery_runtime)
|
||||
local EVENT_DATE=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
|
||||
log_event "Sending $SEVERITY notification: $EVENT_TITLE"
|
||||
|
||||
/usr/bin/sudo /usr/bin/perl -I/usr/share/perl5 << EOFPERL 2>&1 | tee -a $LOGFILE
|
||||
use strict;
|
||||
use warnings;
|
||||
use PVE::Notify;
|
||||
|
||||
my \$template_data = {
|
||||
'hostname' => '$FQDN',
|
||||
'event_date' => '$EVENT_DATE',
|
||||
'event_type' => '$EVENT_TYPE',
|
||||
'event_title' => '$EVENT_TITLE',
|
||||
'event_description' => '$EVENT_DESC',
|
||||
'event_class' => '$EVENT_CLASS',
|
||||
'alert_type' => '$ALERT_TYPE',
|
||||
'ups_status' => '$UPS_STATUS',
|
||||
'battery_charge' => '$BATTERY_CHARGE',
|
||||
'input_voltage' => '$INPUT_VOLTAGE',
|
||||
'battery_runtime' => '$BATTERY_RUNTIME',
|
||||
'action_taken' => '$ACTION',
|
||||
'next_steps' => '$NEXT_STEPS'
|
||||
};
|
||||
|
||||
my \$fields = {
|
||||
'hostname' => '$HOSTNAME',
|
||||
'type' => 'ups-power-event'
|
||||
};
|
||||
|
||||
eval {
|
||||
PVE::Notify::notify('$SEVERITY', 'ups-power-event', \$template_data, \$fields);
|
||||
print "Email notification sent successfully\\n";
|
||||
};
|
||||
if (\$@) {
|
||||
print STDERR "Failed to send notification: \$@\\n";
|
||||
}
|
||||
EOFPERL
|
||||
}
|
||||
|
||||
# Porneste monitorizarea periodica a bateriei (la fiecare minut)
|
||||
start_battery_monitor() {
|
||||
# Opreste monitorul existent daca exista
|
||||
stop_battery_monitor
|
||||
|
||||
log_event "Starting battery discharge monitor (every 60 seconds)"
|
||||
|
||||
# Porneste procesul de monitorizare in background
|
||||
(
|
||||
MINUTE=1
|
||||
while true; do
|
||||
sleep 60
|
||||
|
||||
# Verifica daca UPS-ul e inca pe baterie
|
||||
STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
|
||||
if [[ ! "$STATUS" =~ "OB" ]]; then
|
||||
log_event "Battery monitor: UPS no longer on battery, stopping monitor"
|
||||
break
|
||||
fi
|
||||
|
||||
CHARGE=$(get_battery_charge)
|
||||
VOLTAGE=$(get_battery_voltage)
|
||||
RUNTIME=$(get_battery_runtime)
|
||||
LOAD=$(get_ups_load)
|
||||
|
||||
log_event "Battery monitor: Minute $MINUTE - Charge: ${CHARGE}%, Voltage: ${VOLTAGE}V, Load: ${LOAD}%"
|
||||
|
||||
send_notification \
|
||||
"BATTERY_STATUS" \
|
||||
"Status baterie - ${CHARGE}% (min $MINUTE)" \
|
||||
"UPS pe baterie de $MINUTE minute. Baterie: ${CHARGE}%, Voltaj: ${VOLTAGE}V, Load: ${LOAD}%${RUNTIME:+, Runtime: ${RUNTIME} min}" \
|
||||
"onbatt" \
|
||||
"warning" \
|
||||
"warning" \
|
||||
"Monitorizare descarcare" \
|
||||
"Se trimite status la fiecare minut"
|
||||
|
||||
MINUTE=$((MINUTE + 1))
|
||||
done
|
||||
rm -f $BATTERY_MONITOR_PID
|
||||
) &
|
||||
|
||||
echo $! > $BATTERY_MONITOR_PID
|
||||
log_event "Battery monitor started with PID $(cat $BATTERY_MONITOR_PID)"
|
||||
}
|
||||
|
||||
# Opreste monitorizarea periodica a bateriei
|
||||
stop_battery_monitor() {
|
||||
if [ -f "$BATTERY_MONITOR_PID" ]; then
|
||||
PID=$(cat $BATTERY_MONITOR_PID)
|
||||
if [ -n "$PID" ] && kill -0 $PID 2>/dev/null; then
|
||||
log_event "Stopping battery monitor (PID $PID)"
|
||||
kill $PID 2>/dev/null
|
||||
# Asteapta putin si forteaza daca nu s-a oprit
|
||||
sleep 1
|
||||
kill -9 $PID 2>/dev/null
|
||||
fi
|
||||
rm -f $BATTERY_MONITOR_PID
|
||||
fi
|
||||
}
|
||||
|
||||
# Trimite status incarcare baterie
|
||||
send_charge_status() {
|
||||
local MINUTES="$1"
|
||||
local CHARGE=$(get_battery_charge)
|
||||
local VOLTAGE=$(get_battery_voltage)
|
||||
local INPUT=$(get_input_voltage)
|
||||
local LOAD=$(get_ups_load)
|
||||
|
||||
log_event "Charge status at $MINUTES min: ${CHARGE}%, Voltage: ${VOLTAGE}V, Input: ${INPUT}V"
|
||||
|
||||
send_notification \
|
||||
"CHARGING_STATUS" \
|
||||
"Incarcare baterie - ${CHARGE}% (+${MINUTES} min)" \
|
||||
"Status incarcare la $MINUTES minute dupa revenirea curentului. Baterie: ${CHARGE}%, Voltaj baterie: ${VOLTAGE}V, Tensiune intrare: ${INPUT}V, Load: ${LOAD}%" \
|
||||
"charging" \
|
||||
"info" \
|
||||
"info" \
|
||||
"Monitorizare incarcare" \
|
||||
"Bateria se incarca"
|
||||
}
|
||||
|
||||
# Handler principal
|
||||
case $1 in
|
||||
onbatt_start)
|
||||
log_event "=========================================="
|
||||
log_event "UPS EVENT: Trecere pe baterie - Timer 3 minute pornit"
|
||||
logger -t upssched-cmd "UPS switched to battery - 3 minute timer started"
|
||||
|
||||
send_notification \
|
||||
"ONBATT" \
|
||||
"Trecere pe baterie" \
|
||||
"UPS a trecut pe baterie! Daca curentul nu revine in 3 minute, se va initia shutdown-ul cluster-ului." \
|
||||
"onbatt" \
|
||||
"warning" \
|
||||
"warning" \
|
||||
"Timer 3 minute pornit" \
|
||||
"Asteptare revenire curent sau shutdown automat"
|
||||
|
||||
# Porneste monitorizarea periodica a bateriei
|
||||
start_battery_monitor
|
||||
;;
|
||||
|
||||
onbatt)
|
||||
log_event "=========================================="
|
||||
log_event "UPS EVENT: Pe baterie de 3 minute - Incepe shutdown orchestrat"
|
||||
logger -t upssched-cmd "UPS on battery for 3 minutes - starting orchestrated shutdown"
|
||||
|
||||
# Opreste monitorizarea - urmeaza shutdown
|
||||
stop_battery_monitor
|
||||
|
||||
send_notification \
|
||||
"ONBATT_TIMEOUT" \
|
||||
"Pe baterie 3 min - SHUTDOWN" \
|
||||
"UPS a fost pe baterie timp de 3 minute. Se initiaza shutdown-ul orchestrat al cluster-ului." \
|
||||
"onbatt" \
|
||||
"danger" \
|
||||
"error" \
|
||||
"Pornire shutdown orchestrat cluster" \
|
||||
"Toate nodurile se vor opri in ordine"
|
||||
|
||||
/usr/local/bin/ups-shutdown-cluster.sh &
|
||||
;;
|
||||
|
||||
online)
|
||||
log_event "=========================================="
|
||||
log_event "UPS EVENT: Curent revenit - UPS online"
|
||||
logger -t upssched-cmd "Power restored - UPS back online"
|
||||
|
||||
# Opreste monitorizarea descarcarii
|
||||
stop_battery_monitor
|
||||
|
||||
send_notification \
|
||||
"ONLINE" \
|
||||
"Curent revenit - OK" \
|
||||
"Curentul electric a revenit. UPS functioneaza normal pe linia AC. Se vor trimite statusuri de incarcare la 5, 10 si 30 de minute." \
|
||||
"online" \
|
||||
"success" \
|
||||
"info" \
|
||||
"Sistem stabil" \
|
||||
"Monitorizare incarcare activa"
|
||||
;;
|
||||
|
||||
charge_5min)
|
||||
log_event "=========================================="
|
||||
log_event "UPS EVENT: Status incarcare la 5 minute"
|
||||
send_charge_status "5"
|
||||
;;
|
||||
|
||||
charge_10min)
|
||||
log_event "=========================================="
|
||||
log_event "UPS EVENT: Status incarcare la 10 minute"
|
||||
send_charge_status "10"
|
||||
;;
|
||||
|
||||
charge_30min)
|
||||
log_event "=========================================="
|
||||
log_event "UPS EVENT: Status incarcare la 30 minute"
|
||||
send_charge_status "30"
|
||||
;;
|
||||
|
||||
lowbatt)
|
||||
log_event "=========================================="
|
||||
log_event "UPS EVENT: BATERIE SCAZUTA - Shutdown IMEDIAT"
|
||||
logger -t upssched-cmd "UPS LOW BATTERY - immediate shutdown"
|
||||
|
||||
# Opreste monitorizarea
|
||||
stop_battery_monitor
|
||||
|
||||
send_notification \
|
||||
"LOWBATT" \
|
||||
"BATERIE CRITICA - SHUTDOWN IMEDIAT" \
|
||||
"UPS raporteaza baterie critica! Shutdown imediat pentru protectia datelor." \
|
||||
"lowbatt" \
|
||||
"danger" \
|
||||
"error" \
|
||||
"Shutdown imediat cluster" \
|
||||
"Sistemele se opresc ACUM"
|
||||
|
||||
/usr/local/bin/ups-shutdown-cluster.sh &
|
||||
;;
|
||||
|
||||
commbad)
|
||||
log_event "=========================================="
|
||||
log_event "UPS EVENT: Comunicatie pierduta cu UPS de 30 secunde"
|
||||
logger -t upssched-cmd "Lost communication with UPS for 30 seconds"
|
||||
|
||||
send_notification \
|
||||
"COMMBAD" \
|
||||
"Comunicatie pierduta cu UPS" \
|
||||
"Nu se poate comunica cu UPS-ul de 30 de secunde. Verificati conexiunea USB." \
|
||||
"commbad" \
|
||||
"warning" \
|
||||
"warning" \
|
||||
"Monitorizare activa" \
|
||||
"Verificati conexiunea fizica USB a UPS-ului"
|
||||
;;
|
||||
|
||||
*)
|
||||
log_event "UPS EVENT: Eveniment necunoscut - $1"
|
||||
logger -t upssched-cmd "Unknown UPS event: $1"
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user