Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
551 lines
17 KiB
Bash
551 lines
17 KiB
Bash
#!/bin/bash
|
||
#
|
||
# Oracle DR Weekly Test with Proxmox PVE::Notify
|
||
# Automated DR test with notifications via Proxmox notification system
|
||
#
|
||
# Location: /opt/scripts/weekly-dr-test-proxmox.sh (on Proxmox host)
|
||
# Schedule: Add to cron for weekly execution (Saturdays)
|
||
#
|
||
# This script is SELF-SUFFICIENT:
|
||
# - Automatically creates notification templates if they don't exist
|
||
# - Uses Proxmox native notification system
|
||
# - No email configuration needed - uses existing Proxmox setup
|
||
#
|
||
# Installation:
|
||
# cp weekly-dr-test-proxmox.sh /opt/scripts/
|
||
# chmod +x /opt/scripts/weekly-dr-test-proxmox.sh
|
||
# /opt/scripts/weekly-dr-test-proxmox.sh --install # Creates templates
|
||
# crontab -e # Add: 0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh
|
||
#
|
||
# Author: Claude (based on ha-monitor.sh pattern)
|
||
# Version: 1.0
|
||
|
||
set -euo pipefail
|
||
|
||
# Configuration
|
||
DR_VM_ID="109"
|
||
DR_VM_IP="10.0.20.37"
|
||
DR_VM_PORT="22122"
|
||
DR_VM_USER="romfast"
|
||
BACKUP_PATH="/mnt/pve/oracle-backups/ROA/autobackup"
|
||
MAX_RESTORE_TIME_MIN=30
|
||
TEMPLATE_DIR="/usr/share/pve-manager/templates/default"
|
||
LOG_DIR="/var/log/oracle-dr"
|
||
LOG_FILE="$LOG_DIR/dr_test_$(date +%Y%m%d_%H%M%S).log"
|
||
|
||
# Colors
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
NC='\033[0m'
|
||
|
||
# Create log directory
|
||
mkdir -p "$LOG_DIR"
|
||
|
||
# Function to create notification templates
|
||
create_templates() {
|
||
echo -e "${GREEN}Creating Oracle DR test notification templates...${NC}"
|
||
|
||
# Create templates directory if needed
|
||
mkdir -p "$TEMPLATE_DIR"
|
||
|
||
# Subject template
|
||
cat > "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" <<'EOF'
|
||
Oracle DR Test {{test_result}} | {{date}}
|
||
EOF
|
||
|
||
# Text body template
|
||
cat > "$TEMPLATE_DIR/oracle-dr-test-body.txt.hbs" <<'EOF'
|
||
Oracle DR Test {{test_result}} | {{date}}
|
||
Severity: {{severity}}
|
||
|
||
SUMMARY
|
||
- Outcome: {{test_result}}
|
||
- Duration: {{total_duration}} min (restore {{restore_duration}} min)
|
||
- Backups used: {{backup_count}}
|
||
- Tables restored: {{tables_restored}}
|
||
|
||
COMPONENTS
|
||
- VM {{vm_id}} ({{vm_ip}}): {{vm_status}}
|
||
- NFS: {{nfs_status}}
|
||
- Database: {{database_status}}
|
||
- Cleanup: {{disk_freed}} GB freed
|
||
|
||
STEPS
|
||
{{#each test_steps}}
|
||
- {{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} - {{this.status}}{{/if}}
|
||
{{/each}}
|
||
|
||
{{#if has_errors}}
|
||
ISSUES
|
||
{{#each errors}}
|
||
- {{this}}
|
||
{{/each}}
|
||
{{/if}}
|
||
|
||
{{#if has_warnings}}
|
||
WARNINGS
|
||
{{#each warnings}}
|
||
- {{this}}
|
||
{{/each}}
|
||
{{/if}}
|
||
|
||
RESTORE LOG (first 200 lines)
|
||
---
|
||
{{restore_log}}
|
||
---
|
||
|
||
Log: {{log_file}}
|
||
Next test: Saturday 06:00
|
||
EOF
|
||
|
||
# HTML body template (compact Gmail-friendly layout)
|
||
cat > "$TEMPLATE_DIR/oracle-dr-test-body.html.hbs" <<'EOF'
|
||
<!DOCTYPE html>
|
||
<html>
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||
<title>Oracle DR Test {{test_result}} | {{date}}</title>
|
||
</head>
|
||
<body style="margin:0;padding:16px;font-family:Arial,Helvetica,sans-serif;background:#ffffff;color:#2c3e50;">
|
||
<table style="width:100%;max-width:640px;margin:0 auto;border-collapse:collapse;">
|
||
<tr>
|
||
<td style="padding:0 0 12px 0;font-size:18px;font-weight:600;">
|
||
Oracle DR Test {{test_result}}
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td style="padding:0 0 8px 0;font-size:13px;color:#6c757d;">{{date}} · Severity: {{severity}}</td>
|
||
</tr>
|
||
<tr>
|
||
<td style="padding:12px;border:1px solid #e1e4e8;border-radius:4px;">
|
||
<table style="width:100%;border-collapse:collapse;font-size:14px;">
|
||
<tr><td style="padding:4px 0;">Outcome</td><td style="padding:4px 0;text-align:right;">{{test_result}}</td></tr>
|
||
<tr><td style="padding:4px 0;">Duration</td><td style="padding:4px 0;text-align:right;">{{total_duration}} min (restore {{restore_duration}} min)</td></tr>
|
||
<tr><td style="padding:4px 0;">Backups used</td><td style="padding:4px 0;text-align:right;">{{backup_count}}</td></tr>
|
||
<tr><td style="padding:4px 0;">Tables restored</td><td style="padding:4px 0;text-align:right;">{{tables_restored}}</td></tr>
|
||
</table>
|
||
</td>
|
||
</tr>
|
||
|
||
<tr>
|
||
<td style="padding:16px 0 0 0;">
|
||
<table style="width:100%;border-collapse:collapse;font-size:14px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
|
||
<tr><td style="padding:8px 12px;font-weight:600;">Components</td></tr>
|
||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">VM {{vm_id}} ({{vm_ip}}): {{vm_status}}</td></tr>
|
||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">NFS: {{nfs_status}}</td></tr>
|
||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">Database: {{database_status}}</td></tr>
|
||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">Cleanup: {{disk_freed}} GB freed</td></tr>
|
||
</table>
|
||
</td>
|
||
</tr>
|
||
|
||
<tr>
|
||
<td style="padding:16px 0 0 0;">
|
||
<table style="width:100%;border-collapse:collapse;font-size:14px;">
|
||
<tr><td style="padding:0 0 6px 0;font-weight:600;">Steps</td></tr>
|
||
{{#each test_steps}}
|
||
<tr>
|
||
<td style="padding:4px 0;border-bottom:1px solid #f1f1f1;">{{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} – {{this.status}}{{/if}}</td>
|
||
</tr>
|
||
{{/each}}
|
||
</table>
|
||
</td>
|
||
</tr>
|
||
|
||
{{#if has_errors}}
|
||
<tr>
|
||
<td style="padding:16px 0 0 0;">
|
||
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff5f5;border:1px solid #f1b0b7;border-radius:4px;">
|
||
<tr><td style="padding:8px 12px;font-weight:600;color:#c82333;">Issues</td></tr>
|
||
{{#each errors}}
|
||
<tr><td style="padding:6px 12px;border-top:1px solid #f8d7da;">• {{this}}</td></tr>
|
||
{{/each}}
|
||
</table>
|
||
</td>
|
||
</tr>
|
||
{{/if}}
|
||
|
||
{{#if has_warnings}}
|
||
<tr>
|
||
<td style="padding:16px 0 0 0;">
|
||
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff8e5;border:1px solid #ffe8a1;border-radius:4px;">
|
||
<tr><td style="padding:8px 12px;font-weight:600;color:#856404;">Warnings</td></tr>
|
||
{{#each warnings}}
|
||
<tr><td style="padding:6px 12px;border-top:1px solid #ffe8a1;">• {{this}}</td></tr>
|
||
{{/each}}
|
||
</table>
|
||
</td>
|
||
</tr>
|
||
{{/if}}
|
||
|
||
<tr>
|
||
<td style="padding:16px 0 0 0;">
|
||
<table style="width:100%;border-collapse:collapse;font-size:12px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
|
||
<tr><td style="padding:8px 12px;font-weight:600;font-size:13px;">Restore Log (first 200 lines)</td></tr>
|
||
<tr><td style="padding:8px 12px;font-family:monospace;white-space:pre-wrap;word-wrap:break-word;border-top:1px solid #e1e4e8;">{{restore_log}}</td></tr>
|
||
</table>
|
||
</td>
|
||
</tr>
|
||
|
||
<tr>
|
||
<td style="padding:16px 0 0 0;font-size:12px;color:#6c757d;">
|
||
Log: {{log_file}} · Next test: Saturday 06:00
|
||
</td>
|
||
</tr>
|
||
</table>
|
||
</body>
|
||
</html>
|
||
EOF
|
||
|
||
echo -e "${GREEN}Templates created successfully in $TEMPLATE_DIR${NC}"
|
||
}
|
||
|
||
# Function to send notification via PVE::Notify
|
||
send_pve_notification() {
|
||
local severity="$1"
|
||
local data="$2"
|
||
|
||
# Create Perl script to call PVE::Notify
|
||
cat > /tmp/oracle-dr-notify.pl <<'PERL_SCRIPT'
|
||
#!/usr/bin/perl
|
||
use strict;
|
||
use warnings;
|
||
use PVE::Notify;
|
||
use JSON;
|
||
|
||
my $json_data = do { local $/; <STDIN> };
|
||
my $data = decode_json($json_data);
|
||
|
||
my $severity = $data->{severity} // 'info';
|
||
my $template_name = 'oracle-dr-test';
|
||
|
||
# Add fields for matching rules
|
||
my $fields = {
|
||
type => 'oracle-dr-test',
|
||
severity => $severity,
|
||
test_result => $data->{test_result},
|
||
};
|
||
|
||
# Send notification
|
||
eval {
|
||
PVE::Notify::notify(
|
||
$severity,
|
||
$template_name,
|
||
$data,
|
||
$fields
|
||
);
|
||
};
|
||
|
||
if ($@) {
|
||
print "Error sending notification: $@\n";
|
||
exit 1;
|
||
}
|
||
|
||
print "Notification sent successfully\n";
|
||
PERL_SCRIPT
|
||
|
||
chmod +x /tmp/oracle-dr-notify.pl
|
||
|
||
# Send notification
|
||
echo "$data" | perl /tmp/oracle-dr-notify.pl
|
||
|
||
rm -f /tmp/oracle-dr-notify.pl
|
||
}
|
||
|
||
# Logging functions
|
||
log() {
|
||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
log_error() {
|
||
echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
log_warning() {
|
||
echo -e "${YELLOW}[WARNING]${NC} $1" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
log_success() {
|
||
echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
# Test tracking
|
||
TEST_STEPS=()
|
||
ERRORS=()
|
||
WARNINGS=()
|
||
TEST_START_TIME=$(date +%s)
|
||
|
||
# Function to track test steps
|
||
track_step() {
|
||
local name="$1"
|
||
local passed="$2"
|
||
local status="$3"
|
||
local start_time="$4"
|
||
local end_time=$(date +%s)
|
||
local duration=$((end_time - start_time))
|
||
|
||
local step_json
|
||
step_json=$(jq -n \
|
||
--arg name "$name" \
|
||
--arg status "$status" \
|
||
--arg duration "$duration" \
|
||
--arg passed "$passed" \
|
||
'{name:$name, status:$status, duration:($duration|tonumber), passed:($passed == "true")}'
|
||
)
|
||
|
||
TEST_STEPS+=("$step_json")
|
||
|
||
if [ "$passed" = "false" ]; then
|
||
ERRORS+=("$name: $status")
|
||
fi
|
||
}
|
||
|
||
# Main test workflow
|
||
run_dr_test() {
|
||
local test_result="FAILED"
|
||
local severity="error"
|
||
local is_success=false
|
||
local restore_duration=0
|
||
local tables_restored=0
|
||
local db_status="UNKNOWN"
|
||
local nfs_status="Not checked"
|
||
local vm_status_label="Not started"
|
||
local cleanup_freed=0
|
||
local backup_count=0
|
||
local restore_log="Not collected"
|
||
|
||
log "=========================================="
|
||
log "Oracle DR Weekly Test - Starting"
|
||
log "=========================================="
|
||
|
||
# Step 1: Pre-flight checks
|
||
local step_start=$(date +%s)
|
||
log "STEP 1: Pre-flight checks"
|
||
|
||
# Check backups exist
|
||
backup_count=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*.BKP' 2>/dev/null | wc -l)
|
||
|
||
if [ "$backup_count" -lt 2 ]; then
|
||
track_step "Pre-flight checks" false "Insufficient backups (found: $backup_count)" "$step_start"
|
||
test_result="FAILED - No backups"
|
||
else
|
||
track_step "Pre-flight checks" true "Found $backup_count backups" "$step_start"
|
||
|
||
# Step 2: Start VM
|
||
step_start=$(date +%s)
|
||
log "STEP 2: Starting DR VM"
|
||
|
||
if qm start "$DR_VM_ID" 2>/dev/null; then
|
||
vm_status_label="Running"
|
||
sleep 180 # Wait for boot
|
||
track_step "VM Startup" true "VM $DR_VM_ID started" "$step_start"
|
||
|
||
# Step 3: Verify NFS mount
|
||
step_start=$(date +%s)
|
||
log "STEP 3: Verifying NFS mount"
|
||
|
||
nfs_status="Not Mounted"
|
||
if ssh -p "$DR_VM_PORT" -o ConnectTimeout=10 "$DR_VM_USER@$DR_VM_IP" \
|
||
"powershell -Command 'Test-Path F:\\ROA\\autobackup'" 2>/dev/null; then
|
||
nfs_status="Mounted"
|
||
track_step "NFS Mount Check" true "F:\\ drive accessible" "$step_start"
|
||
else
|
||
track_step "NFS Mount Check" false "F:\\ drive not accessible" "$step_start"
|
||
WARNINGS+=("NFS mount may need manual intervention")
|
||
fi
|
||
|
||
# Step 4: Run restore
|
||
step_start=$(date +%s)
|
||
local restore_start=$step_start
|
||
log "STEP 4: Running database restore"
|
||
|
||
if ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||
"D:\\oracle\\scripts\\rman_restore_from_zero.cmd" 2>&1 | tee -a "$LOG_FILE"; then
|
||
|
||
local restore_end=$(date +%s)
|
||
restore_duration=$(( (restore_end - restore_start) / 60 ))
|
||
|
||
track_step "Database Restore" true "Restored in $restore_duration minutes" "$step_start"
|
||
|
||
# Step 5: Verify database
|
||
step_start=$(date +%s)
|
||
log "STEP 5: Verifying database"
|
||
|
||
db_status=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||
"cmd /c 'echo SELECT STATUS FROM V\$INSTANCE; | sqlplus -s / as sysdba' | findstr OPEN" || echo "")
|
||
|
||
tables_restored=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||
"cmd /c 'echo SELECT COUNT(*) FROM DBA_TABLES WHERE OWNER NOT IN (''SYS'',''SYSTEM''); | sqlplus -s / as sysdba' | grep -o '[0-9]*' | tail -1" || echo "0")
|
||
tables_restored=$(echo "$tables_restored" | tr -cd '0-9')
|
||
[ -z "$tables_restored" ] && tables_restored=0
|
||
|
||
if [[ "$db_status" =~ "OPEN" ]]; then
|
||
track_step "Database Verification" true "Database OPEN, $tables_restored tables" "$step_start"
|
||
test_result="PASSED"
|
||
severity="info"
|
||
is_success=true
|
||
else
|
||
track_step "Database Verification" false "Database not OPEN" "$step_start"
|
||
fi
|
||
|
||
# Collect restore log from VM
|
||
log "Collecting restore log from DR VM..."
|
||
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||
"type D:\\oracle\\logs\\restore_from_zero.log 2>nul" | head -200 || echo "Log not available")
|
||
|
||
# Step 6: Cleanup
|
||
step_start=$(date +%s)
|
||
log "STEP 6: Running cleanup"
|
||
|
||
ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||
"D:\\oracle\\scripts\\cleanup_database.cmd" 2>/dev/null
|
||
|
||
cleanup_freed=8
|
||
track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start"
|
||
|
||
else
|
||
track_step "Database Restore" false "Restore failed" "$step_start"
|
||
fi
|
||
|
||
# Step 7: Shutdown VM
|
||
step_start=$(date +%s)
|
||
log "STEP 7: Shutting down VM"
|
||
|
||
ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" "shutdown /s /t 30" 2>/dev/null
|
||
sleep 60
|
||
qm stop "$DR_VM_ID" 2>/dev/null
|
||
|
||
track_step "VM Shutdown" true "VM stopped" "$step_start"
|
||
vm_status_label="Stopped"
|
||
|
||
else
|
||
track_step "VM Startup" false "Failed to start VM $DR_VM_ID" "$step_start"
|
||
vm_status_label="Failed to start"
|
||
fi
|
||
fi
|
||
|
||
# Calculate total duration
|
||
local test_end_time=$(date +%s)
|
||
local total_duration=$(( (test_end_time - TEST_START_TIME) / 60 ))
|
||
|
||
# Prepare notification data
|
||
local steps_json
|
||
if [ ${#TEST_STEPS[@]} -eq 0 ]; then
|
||
steps_json='[]'
|
||
else
|
||
steps_json=$(printf '%s\n' "${TEST_STEPS[@]}" | jq -s '.')
|
||
fi
|
||
|
||
local errors_json
|
||
if [ ${#ERRORS[@]} -eq 0 ]; then
|
||
errors_json='[]'
|
||
else
|
||
errors_json=$(printf '%s\n' "${ERRORS[@]}" | jq -R . | jq -s .)
|
||
fi
|
||
|
||
local warnings_json
|
||
if [ ${#WARNINGS[@]} -eq 0 ]; then
|
||
warnings_json='[]'
|
||
else
|
||
warnings_json=$(printf '%s\n' "${WARNINGS[@]}" | jq -R . | jq -s .)
|
||
fi
|
||
|
||
local has_errors=false
|
||
local has_warnings=false
|
||
[ ${#ERRORS[@]} -gt 0 ] && has_errors=true
|
||
[ ${#WARNINGS[@]} -gt 0 ] && has_warnings=true
|
||
|
||
if [ "$is_success" = true ] && [ "$has_warnings" = true ]; then
|
||
severity="warning"
|
||
fi
|
||
|
||
local db_status_clean=$(echo "$db_status" | tr -d '\r' | sed 's/^ *//;s/ *$//')
|
||
|
||
# Escape restore log for JSON
|
||
local restore_log_json
|
||
restore_log_json=$(echo "$restore_log" | jq -Rs .)
|
||
|
||
local json_data=$(cat <<JSON
|
||
{
|
||
"severity": "$severity",
|
||
"test_result": "$test_result",
|
||
"date": "$(date '+%Y-%m-%d %H:%M:%S')",
|
||
"total_duration": $total_duration,
|
||
"is_success": $is_success,
|
||
"has_errors": $has_errors,
|
||
"has_warnings": $has_warnings,
|
||
"test_steps": $steps_json,
|
||
"errors": $errors_json,
|
||
"warnings": $warnings_json,
|
||
"backup_count": $backup_count,
|
||
"restore_duration": $restore_duration,
|
||
"tables_restored": ${tables_restored:-0},
|
||
"database_status": "${db_status_clean:-UNKNOWN}",
|
||
"disk_freed": $cleanup_freed,
|
||
"vm_id": "$DR_VM_ID",
|
||
"vm_ip": "$DR_VM_IP",
|
||
"vm_status": "$vm_status_label",
|
||
"nfs_status": "${nfs_status:-Unknown}",
|
||
"log_file": "$LOG_FILE",
|
||
"restore_log": $restore_log_json
|
||
}
|
||
JSON
|
||
)
|
||
|
||
# Send notification
|
||
log "Sending notification..."
|
||
send_pve_notification "$severity" "$json_data"
|
||
|
||
# Final summary
|
||
log "=========================================="
|
||
log "Oracle DR Test Complete: $test_result"
|
||
log "Duration: $total_duration minutes"
|
||
log "Log: $LOG_FILE"
|
||
log "=========================================="
|
||
}
|
||
|
||
# Main execution
|
||
main() {
|
||
case "${1:-}" in
|
||
--install)
|
||
create_templates
|
||
echo ""
|
||
echo -e "${GREEN}Installation complete!${NC}"
|
||
echo "Next steps:"
|
||
echo "1. Test the script: /opt/scripts/weekly-dr-test-proxmox.sh"
|
||
echo "2. Add to cron: crontab -e"
|
||
echo " Add line: 0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh"
|
||
echo "3. Configure notifications in Proxmox GUI if needed:"
|
||
echo " Datacenter > Notifications > Add matching rules for 'oracle-dr-test'"
|
||
;;
|
||
--help)
|
||
echo "Oracle DR Weekly Test for Proxmox"
|
||
echo "Usage:"
|
||
echo " $0 - Run DR test"
|
||
echo " $0 --install - Create notification templates"
|
||
echo " $0 --help - Show this help"
|
||
;;
|
||
*)
|
||
# Check if templates exist, create if missing
|
||
if [ ! -f "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" ]; then
|
||
echo -e "${YELLOW}Templates not found, creating...${NC}"
|
||
create_templates
|
||
echo ""
|
||
fi
|
||
|
||
# Run DR test
|
||
run_dr_test
|
||
;;
|
||
esac
|
||
}
|
||
|
||
# Check dependencies
|
||
if ! command -v jq &> /dev/null; then
|
||
echo -e "${RED}Error: jq is not installed${NC}"
|
||
echo "Install with: apt-get install jq"
|
||
exit 1
|
||
fi
|
||
|
||
main "$@" |