This commit adds a comprehensive UPS monitoring and management system for the Proxmox cluster with automated shutdown orchestration and monthly battery health testing. Features: - NUT (Network UPS Tools) configuration for INNO TECH USB UPS - Automated cluster shutdown on power failure (3-minute grace period) - Monthly automated battery testing with health evaluation - Email notifications via PVE::Notify system - WinNUT monitoring client for Windows VM 201 Components added: - config/: NUT configuration files (ups.conf, upsd.conf, upsmon.conf, etc.) - scripts/ups-shutdown-cluster.sh: Orchestrated cluster shutdown - scripts/ups-monthly-test.sh: Monthly battery test with email reports - scripts/upssched-cmd: Event handler for UPS state changes - docs/: Complete installation and usage documentation Key findings: - UPS battery.charge reporting has 10-40 second delay after test start - Test must monitor voltage drop (1.5-2V) and charge drop (9-27%) - Battery health evaluation: EXCELLENT/GOOD/FAIR/POOR based on discharge rate - Email notifications use Handlebars templates without Unicode emojis for compatibility Configuration: - UPS: INNO TECH (Voltronic protocol, vendor 0665:5161) - Primary node: pvemini (10.0.20.201) with USB connection - Monthly test: cron 0 0 1 * * /opt/scripts/ups-monthly-test.sh - Shutdown timer: 180 seconds on battery before cluster shutdown Documentation includes complete installation guides for NUT server, WinNUT client, and troubleshooting procedures. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
436 lines
15 KiB
Bash
436 lines
15 KiB
Bash
#!/bin/bash
|
|
#
|
|
# Script de test lunar automat baterie UPS
|
|
# Rulează pe 1 ale fiecărei luni la 00:00
|
|
# Trimite raport prin notificările Proxmox (PVE::Notify)
|
|
#
|
|
# IMPORTANT: Timing-ul de citire este CRITIC!
|
|
# - Battery.charge scade DOAR între 10-40 secunde după pornirea testului
|
|
# - UPS actualizează valorile cu delay de 5-10 secunde
|
|
#
|
|
# Creat: 2025-10-06
|
|
# Autor: Claude Code
|
|
|
|
LOGFILE="/var/log/ups-monthly-test.log"
|
|
UPS_NAME="nutdev1"
|
|
UPS_USER="admin"
|
|
UPS_PASS="parola99"
|
|
TEMPLATE_DIR="/etc/pve/notification-templates/default"
|
|
START_TIME=$(date +%s)
|
|
HOSTNAME=$(hostname)
|
|
FQDN=$(hostname -f)
|
|
|
|
# Funcție logging
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOGFILE
|
|
}
|
|
|
|
# Funcție pentru crearea template-urilor de notificare
|
|
create_templates() {
|
|
mkdir -p $TEMPLATE_DIR
|
|
|
|
# Template: Subject
|
|
cat > "$TEMPLATE_DIR/ups-battery-test-subject.txt.hbs" << 'EOFTEMPLATE'
|
|
[{{ hostname }}] UPS Battery Test - {{ health_status }}
|
|
EOFTEMPLATE
|
|
|
|
# Template: Body Text
|
|
cat > "$TEMPLATE_DIR/ups-battery-test-body.txt.hbs" << 'EOFTEMPLATE'
|
|
========================================
|
|
UPS MONTHLY BATTERY TEST REPORT
|
|
========================================
|
|
|
|
Hostname: {{ hostname }}
|
|
Date: {{ test_date }}
|
|
UPS: {{ ups_name }}
|
|
|
|
BATTERY HEALTH: {{ health_status }}
|
|
{{ health_emoji }} {{ health_description }}
|
|
|
|
TEST RESULTS:
|
|
-------------
|
|
Battery Charge Drop: {{ charge_drop }}%
|
|
Battery Voltage Drop: {{ voltage_drop }}V
|
|
Minimum Charge Reached: {{ min_charge }}%
|
|
Minimum Voltage: {{ min_voltage }}V
|
|
Recovery Time: {{ recovery_time }}s
|
|
|
|
BEFORE TEST:
|
|
- Battery Charge: {{ before_charge }}%
|
|
- Battery Voltage: {{ before_voltage }}V
|
|
- UPS Load: {{ before_load }}%
|
|
|
|
AFTER TEST ({{ test_duration }}s):
|
|
- Battery Charge: {{ after_charge }}%
|
|
- Battery Voltage: {{ after_voltage }}V
|
|
- UPS Load: {{ after_load }}%
|
|
|
|
RECOMMENDATIONS:
|
|
{{ recommendations }}
|
|
|
|
========================================
|
|
Script: /opt/scripts/ups-monthly-test.sh
|
|
Log: /var/log/ups-monthly-test.log
|
|
========================================
|
|
EOFTEMPLATE
|
|
|
|
# Template: Body HTML
|
|
cat > "$TEMPLATE_DIR/ups-battery-test-body.html.hbs" << 'EOFTEMPLATE'
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<style>
|
|
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f5f5f5; }
|
|
.container { max-width: 800px; margin: 0 auto; background: white; padding: 30px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }
|
|
h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 15px; margin-top: 0; }
|
|
.status-badge { display: inline-block; padding: 10px 20px; border-radius: 5px; font-weight: bold; font-size: 18px; margin: 15px 0; }
|
|
.status-excellent { background-color: #d4edda; color: #155724; border: 2px solid #28a745; }
|
|
.status-good { background-color: #d1ecf1; color: #0c5460; border: 2px solid #17a2b8; }
|
|
.status-fair { background-color: #fff3cd; color: #856404; border: 2px solid #ffc107; }
|
|
.status-poor { background-color: #f8d7da; color: #721c24; border: 2px solid #dc3545; }
|
|
.metrics { display: grid; grid-template-columns: repeat(2, 1fr); gap: 15px; margin: 25px 0; }
|
|
.metric { background: #ecf0f1; padding: 20px; border-radius: 5px; border-left: 4px solid #3498db; }
|
|
.metric-label { font-size: 13px; color: #7f8c8d; text-transform: uppercase; letter-spacing: 0.5px; }
|
|
.metric-value { font-size: 28px; font-weight: bold; color: #2c3e50; margin-top: 8px; }
|
|
.section { margin: 25px 0; padding: 20px; background: #f8f9fa; border-radius: 5px; }
|
|
.section h2 { color: #34495e; margin-top: 0; font-size: 20px; }
|
|
.recommendations { background: #fff3cd; border-left: 4px solid #ffc107; padding: 15px; margin: 20px 0; }
|
|
.recommendations ul { margin: 10px 0; padding-left: 20px; }
|
|
.footer { margin-top: 30px; padding-top: 20px; border-top: 2px solid #ecf0f1; font-size: 12px; color: #7f8c8d; text-align: center; }
|
|
table { width: 100%; border-collapse: collapse; margin: 15px 0; }
|
|
th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }
|
|
th { background-color: #3498db; color: white; font-weight: 600; }
|
|
tr:hover { background-color: #f5f5f5; }
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container">
|
|
<h1>[BATTERY] UPS Battery Test Report</h1>
|
|
|
|
<p><strong>Hostname:</strong> {{ hostname }}<br>
|
|
<strong>Date:</strong> {{ test_date }}<br>
|
|
<strong>UPS:</strong> {{ ups_name }}</p>
|
|
|
|
<div class="status-badge status-{{ health_class }}">
|
|
{{ health_emoji }} Battery Health: {{ health_status }}
|
|
</div>
|
|
|
|
<p style="font-size: 16px; margin-top: 15px;">{{ health_description }}</p>
|
|
|
|
<h2 style="margin-top: 30px;">Test Metrics</h2>
|
|
<div class="metrics">
|
|
<div class="metric">
|
|
<div class="metric-label">Charge Drop</div>
|
|
<div class="metric-value">{{ charge_drop }}%</div>
|
|
</div>
|
|
<div class="metric">
|
|
<div class="metric-label">Voltage Drop</div>
|
|
<div class="metric-value">{{ voltage_drop }}V</div>
|
|
</div>
|
|
<div class="metric">
|
|
<div class="metric-label">Min Charge</div>
|
|
<div class="metric-value">{{ min_charge }}%</div>
|
|
</div>
|
|
<div class="metric">
|
|
<div class="metric-label">Recovery Time</div>
|
|
<div class="metric-value">{{ recovery_time }}s</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="section">
|
|
<h2>Detailed Measurements</h2>
|
|
<table>
|
|
<tr>
|
|
<th>Parameter</th>
|
|
<th>Before Test</th>
|
|
<th>After Test</th>
|
|
</tr>
|
|
<tr>
|
|
<td>Battery Charge</td>
|
|
<td>{{ before_charge }}%</td>
|
|
<td>{{ after_charge }}%</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Battery Voltage</td>
|
|
<td>{{ before_voltage }}V</td>
|
|
<td>{{ after_voltage }}V</td>
|
|
</tr>
|
|
<tr>
|
|
<td>UPS Load</td>
|
|
<td>{{ before_load }}%</td>
|
|
<td>{{ after_load }}%</td>
|
|
</tr>
|
|
</table>
|
|
</div>
|
|
|
|
<div class="recommendations">
|
|
<h2 style="margin-top: 0;">📋 Recommendations</h2>
|
|
{{{ recommendations }}}
|
|
</div>
|
|
|
|
<div class="footer">
|
|
<p><strong>Script:</strong> /opt/scripts/ups-monthly-test.sh<br>
|
|
<strong>Log File:</strong> /var/log/ups-monthly-test.log</p>
|
|
<p style="margin-top: 10px;">Proxmox VE - UPS Monitoring System</p>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
EOFTEMPLATE
|
|
|
|
log "Templates created in $TEMPLATE_DIR/"
|
|
}
|
|
|
|
# Verifică și creează template-urile dacă nu există
|
|
if [ ! -f "$TEMPLATE_DIR/ups-battery-test-subject.txt.hbs" ]; then
|
|
log "Creating notification templates..."
|
|
create_templates
|
|
fi
|
|
|
|
log "========================================"
|
|
log "UPS MONTHLY BATTERY TEST - START"
|
|
log "========================================"
|
|
|
|
# 1. Verificare status UPS înainte de test
|
|
log "Step 1: Verificare status UPS înainte de test..."
|
|
BEFORE_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
|
|
BEFORE_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null)
|
|
BEFORE_VOLTAGE=$(upsc $UPS_NAME battery.voltage 2>/dev/null)
|
|
BEFORE_LOAD=$(upsc $UPS_NAME ups.load 2>/dev/null)
|
|
|
|
log " Status: $BEFORE_STATUS"
|
|
log " Battery Charge: $BEFORE_CHARGE%"
|
|
log " Battery Voltage: $BEFORE_VOLTAGE V"
|
|
log " Load: $BEFORE_LOAD%"
|
|
|
|
# Verifică dacă UPS este online
|
|
if [[ $BEFORE_STATUS != *"OL"* ]]; then
|
|
log "ERROR: UPS nu este online! Status: $BEFORE_STATUS"
|
|
log "Test ANULAT"
|
|
exit 1
|
|
fi
|
|
|
|
# Verifică încărcare baterie
|
|
if [ "$BEFORE_CHARGE" -lt 95 ]; then
|
|
log "WARNING: Baterie nu este complet încărcată ($BEFORE_CHARGE%)"
|
|
fi
|
|
|
|
# 2. Pornire test baterie
|
|
log ""
|
|
log "Step 2: Pornire test baterie..."
|
|
TEST_START_TIME=$(date +%s)
|
|
|
|
upscmd -u $UPS_USER -p $UPS_PASS $UPS_NAME test.battery.start.quick 2>&1 | tee -a $LOGFILE
|
|
|
|
if [ ${PIPESTATUS[0]} -eq 0 ]; then
|
|
log "Test baterie pornit cu succes!"
|
|
else
|
|
log "ERROR: Nu am putut porni testul de baterie!"
|
|
exit 1
|
|
fi
|
|
|
|
# 3. TIMING CRITIC: Așteptare 10-15 secunde pentru ca charge să scadă
|
|
log ""
|
|
log "Step 3: Monitorizare test baterie (timing critic pentru charge drop)..."
|
|
|
|
MIN_CHARGE=$BEFORE_CHARGE
|
|
MIN_VOLTAGE=$BEFORE_VOLTAGE
|
|
CHARGE_AT_15S=$BEFORE_CHARGE
|
|
VOLTAGE_AT_15S=$BEFORE_VOLTAGE
|
|
|
|
# Primele 5 secunde - inițializare test
|
|
sleep 5
|
|
|
|
# 10-40 secunde - fereastra critică când charge scade
|
|
for i in {1..7}; do
|
|
CURRENT_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null)
|
|
CURRENT_VOLTAGE=$(upsc $UPS_NAME battery.voltage 2>/dev/null)
|
|
|
|
# Capturează minimul
|
|
if [ ! -z "$CURRENT_CHARGE" ] && [ "$CURRENT_CHARGE" -lt "$MIN_CHARGE" ]; then
|
|
MIN_CHARGE=$CURRENT_CHARGE
|
|
fi
|
|
|
|
if [ ! -z "$CURRENT_VOLTAGE" ]; then
|
|
MIN_VOLTAGE=$(echo "$CURRENT_VOLTAGE $MIN_VOLTAGE" | awk '{if ($1 < $2) print $1; else print $2}')
|
|
fi
|
|
|
|
# Citire la 15 secunde (punct optim)
|
|
if [ $i -eq 2 ]; then
|
|
CHARGE_AT_15S=$CURRENT_CHARGE
|
|
VOLTAGE_AT_15S=$CURRENT_VOLTAGE
|
|
log " [15s CRITICAL] Charge: $CURRENT_CHARGE% | Voltage: $CURRENT_VOLTAGE V"
|
|
else
|
|
log " [$((5 + i*5))s] Charge: $CURRENT_CHARGE% | Voltage: $CURRENT_VOLTAGE V"
|
|
fi
|
|
|
|
sleep 5
|
|
done
|
|
|
|
TEST_END_TIME=$(date +%s)
|
|
TEST_DURATION=$((TEST_END_TIME - TEST_START_TIME))
|
|
|
|
log " Minimum Charge: $MIN_CHARGE%"
|
|
log " Minimum Voltage: $MIN_VOLTAGE V"
|
|
|
|
# 4. Așteptare recuperare și citire finală
|
|
log ""
|
|
log "Step 4: Așteptare recuperare baterie (15 secunde)..."
|
|
sleep 15
|
|
|
|
AFTER_STATUS=$(upsc $UPS_NAME ups.status 2>/dev/null)
|
|
AFTER_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null)
|
|
AFTER_VOLTAGE=$(upsc $UPS_NAME battery.voltage 2>/dev/null)
|
|
AFTER_LOAD=$(upsc $UPS_NAME ups.load 2>/dev/null)
|
|
|
|
log " Status: $AFTER_STATUS"
|
|
log " Battery Charge: $AFTER_CHARGE%"
|
|
log " Battery Voltage: $AFTER_VOLTAGE V"
|
|
log " Load: $AFTER_LOAD%"
|
|
|
|
# 5. Calcul metrici
|
|
CHARGE_DROP=$((BEFORE_CHARGE - MIN_CHARGE))
|
|
VOLTAGE_DROP=$(echo "$BEFORE_VOLTAGE - $MIN_VOLTAGE" | bc 2>/dev/null || echo "0")
|
|
|
|
# Rotunjire voltage drop la 2 zecimale
|
|
VOLTAGE_DROP=$(printf "%.2f" $VOLTAGE_DROP 2>/dev/null || echo $VOLTAGE_DROP)
|
|
|
|
log ""
|
|
log "Step 5: Analiza rezultate test..."
|
|
log " Durată test: $TEST_DURATION secunde"
|
|
log " Scădere încărcare: $CHARGE_DROP% (de la $BEFORE_CHARGE% la $MIN_CHARGE%)"
|
|
log " Scădere tensiune: $VOLTAGE_DROP V (de la $BEFORE_VOLTAGE V la $MIN_VOLTAGE V)"
|
|
|
|
# 6. Evaluare sănătate baterie
|
|
BATTERY_HEALTH="UNKNOWN"
|
|
HEALTH_CLASS="fair"
|
|
HEALTH_EMOJI="[INFO]"
|
|
HEALTH_DESCRIPTION=""
|
|
RECOMMENDATIONS=""
|
|
|
|
if [ "$CHARGE_DROP" -lt 15 ]; then
|
|
BATTERY_HEALTH="EXCELLENT"
|
|
HEALTH_CLASS="excellent"
|
|
HEALTH_EMOJI="[OK]"
|
|
HEALTH_DESCRIPTION="Battery is in excellent condition with minimal discharge during test."
|
|
RECOMMENDATIONS="<ul><li>✅ Battery is healthy and functioning normally</li><li>Continue monthly testing</li><li>No action required</li></ul>"
|
|
log " Sănătate baterie: EXCELENTĂ (scădere < 15%)"
|
|
elif [ "$CHARGE_DROP" -lt 35 ]; then
|
|
BATTERY_HEALTH="GOOD"
|
|
HEALTH_CLASS="good"
|
|
HEALTH_EMOJI="[OK]"
|
|
HEALTH_DESCRIPTION="Battery shows normal wear but performs adequately."
|
|
RECOMMENDATIONS="<ul><li>Battery is functioning well</li><li>Monitor monthly for degradation trends</li><li>No immediate action needed</li></ul>"
|
|
log " Sănătate baterie: BUNĂ (scădere 15-35%)"
|
|
elif [ "$CHARGE_DROP" -lt 55 ]; then
|
|
BATTERY_HEALTH="FAIR"
|
|
HEALTH_CLASS="fair"
|
|
HEALTH_EMOJI="[WARNING]"
|
|
HEALTH_DESCRIPTION="Battery shows significant wear and should be monitored closely."
|
|
RECOMMENDATIONS="<ul><li>⚠️ Battery is aging</li><li>Plan replacement in 3-6 months</li><li>Increase monitoring frequency</li><li>Order replacement battery soon</li></ul>"
|
|
log " Sănătate baterie: ACCEPTABILĂ (scădere 35-55%)"
|
|
else
|
|
BATTERY_HEALTH="POOR"
|
|
HEALTH_CLASS="poor"
|
|
HEALTH_EMOJI="[CRITICAL]"
|
|
HEALTH_DESCRIPTION="Battery is critically weak and requires immediate replacement!"
|
|
RECOMMENDATIONS="<ul><li>🔴 <strong>URGENT:</strong> Battery needs immediate replacement!</li><li>Order new battery NOW</li><li>UPS may not provide adequate protection</li><li>Risk of unexpected shutdown</li></ul>"
|
|
log " Sănătate baterie: SLABĂ (scădere > 55%) - NECESITĂ ÎNLOCUIRE!"
|
|
fi
|
|
|
|
# 7. Monitorizare recuperare (30 secunde)
|
|
log ""
|
|
log "Step 6: Monitorizare recuperare baterie..."
|
|
|
|
RECOVERY_START=$(date +%s)
|
|
sleep 30
|
|
RECOVERY_CHARGE=$(upsc $UPS_NAME battery.charge 2>/dev/null)
|
|
RECOVERY_TIME=$(($(date +%s) - RECOVERY_START))
|
|
|
|
log " Charge după $RECOVERY_TIME secunde: $RECOVERY_CHARGE%"
|
|
|
|
# 8. Calculează timpul total
|
|
END_TIME=$(date +%s)
|
|
RUNTIME=$((END_TIME - START_TIME))
|
|
|
|
# 9. Determină severity pentru notificare
|
|
if [ "$BATTERY_HEALTH" = "EXCELLENT" ] || [ "$BATTERY_HEALTH" = "GOOD" ]; then
|
|
SEVERITY="info"
|
|
elif [ "$BATTERY_HEALTH" = "FAIR" ]; then
|
|
SEVERITY="warning"
|
|
else
|
|
SEVERITY="error"
|
|
fi
|
|
|
|
# 10. Trimite notificarea prin PVE::Notify
|
|
log ""
|
|
log "Step 7: Trimitere notificare prin PVE::Notify..."
|
|
|
|
# Escape pentru Perl heredoc
|
|
RECOMMENDATIONS_ESCAPED=$(echo "$RECOMMENDATIONS" | sed "s/'/\\'/g")
|
|
|
|
perl -I/usr/share/perl5 << EOFPERL
|
|
use strict;
|
|
use warnings;
|
|
use PVE::Notify;
|
|
|
|
my \$template_data = {
|
|
'hostname' => '$FQDN',
|
|
'test_date' => '$(date '+%Y-%m-%d %H:%M:%S')',
|
|
'ups_name' => '$UPS_NAME',
|
|
'health_status' => '$BATTERY_HEALTH',
|
|
'health_class' => '$HEALTH_CLASS',
|
|
'health_emoji' => '$HEALTH_EMOJI',
|
|
'health_description' => '$HEALTH_DESCRIPTION',
|
|
'charge_drop' => '$CHARGE_DROP',
|
|
'voltage_drop' => '$VOLTAGE_DROP',
|
|
'min_charge' => '$MIN_CHARGE',
|
|
'min_voltage' => '$MIN_VOLTAGE',
|
|
'before_charge' => '$BEFORE_CHARGE',
|
|
'before_voltage' => '$BEFORE_VOLTAGE',
|
|
'before_load' => '$BEFORE_LOAD',
|
|
'after_charge' => '$AFTER_CHARGE',
|
|
'after_voltage' => '$AFTER_VOLTAGE',
|
|
'after_load' => '$AFTER_LOAD',
|
|
'test_duration' => '$TEST_DURATION',
|
|
'recovery_time' => '$RECOVERY_TIME',
|
|
'recommendations' => '$RECOMMENDATIONS_ESCAPED'
|
|
};
|
|
|
|
my \$fields = {
|
|
'hostname' => '$HOSTNAME',
|
|
'type' => 'ups-battery-test',
|
|
'health' => '$BATTERY_HEALTH'
|
|
};
|
|
|
|
eval {
|
|
PVE::Notify::notify('$SEVERITY', 'ups-battery-test', \$template_data, \$fields);
|
|
print "Notification sent successfully\\n";
|
|
};
|
|
if (\$@) {
|
|
print STDERR "Failed to send notification: \$@\\n";
|
|
exit 1;
|
|
}
|
|
EOFPERL
|
|
|
|
PERL_EXIT_CODE=$?
|
|
|
|
if [ $PERL_EXIT_CODE -eq 0 ]; then
|
|
log "Notificare trimisă cu succes prin PVE::Notify"
|
|
else
|
|
log "ERROR: Notificarea a eșuat (exit code: $PERL_EXIT_CODE)"
|
|
fi
|
|
|
|
# 11. Finalizare
|
|
log ""
|
|
log "========================================"
|
|
log "UPS MONTHLY BATTERY TEST - COMPLETE"
|
|
log "Sănătate baterie: $BATTERY_HEALTH"
|
|
log "Scădere încărcare: $CHARGE_DROP%"
|
|
log "Scădere tensiune: $VOLTAGE_DROP V"
|
|
log "Timp total: $RUNTIME secunde"
|
|
log "========================================"
|
|
|
|
exit 0
|