#!/bin/bash # # Oracle DR Weekly Test with Proxmox PVE::Notify # Automated DR test with notifications via Proxmox notification system # # Location: /opt/scripts/weekly-dr-test-proxmox.sh (on Proxmox host) # Schedule: Add to cron for weekly execution (Saturdays) # # This script is SELF-SUFFICIENT: # - Automatically creates notification templates if they don't exist # - Uses Proxmox native notification system # - No email configuration needed - uses existing Proxmox setup # # Installation: # cp weekly-dr-test-proxmox.sh /opt/scripts/ # chmod +x /opt/scripts/weekly-dr-test-proxmox.sh # /opt/scripts/weekly-dr-test-proxmox.sh --install # Creates templates # crontab -e # Add: 0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh # # Author: Claude (based on ha-monitor.sh pattern) # Version: 1.0 set -euo pipefail # Configuration DR_VM_ID="109" DR_VM_IP="10.0.20.37" DR_VM_PORT="22122" DR_VM_USER="romfast" BACKUP_PATH="/mnt/pve/oracle-backups/ROA/autobackup" MAX_RESTORE_TIME_MIN=30 TEMPLATE_DIR="/usr/share/pve-manager/templates/default" LOG_DIR="/var/log/oracle-dr" LOG_FILE="$LOG_DIR/dr_test_$(date +%Y%m%d_%H%M%S).log" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # Create log directory mkdir -p "$LOG_DIR" # Function to create notification templates create_templates() { echo -e "${GREEN}Creating Oracle DR test notification templates...${NC}" # Create templates directory if needed mkdir -p "$TEMPLATE_DIR" # Subject template cat > "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" <<'EOF' Oracle DR Test {{test_result}} | {{date}} EOF # Text body template cat > "$TEMPLATE_DIR/oracle-dr-test-body.txt.hbs" <<'EOF' Oracle DR Test {{test_result}} | {{date}} Severity: {{severity}} SUMMARY - Outcome: {{test_result}} - Duration: {{total_duration}} min (restore {{restore_duration}} min) - Backups used: {{backup_count}} - Tables restored: {{tables_restored}} COMPONENTS - VM {{vm_id}} ({{vm_ip}}): {{vm_status}} - NFS: {{nfs_status}} - Database: {{database_status}} - Cleanup: {{disk_freed}} GB freed STEPS {{#each test_steps}} - {{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} - {{this.status}}{{/if}} {{/each}} {{#if has_errors}} ISSUES {{#each errors}} - {{this}} {{/each}} {{/if}} {{#if has_warnings}} WARNINGS {{#each warnings}} - {{this}} {{/each}} {{/if}} RMAN RESTORE LOG (complete) --- {{restore_log}} --- BASH SCRIPT LOG (last 100 lines) --- {{bash_log}} --- Full log: {{log_file}} Next test: Saturday 06:00 EOF # HTML body template (compact Gmail-friendly layout) cat > "$TEMPLATE_DIR/oracle-dr-test-body.html.hbs" <<'EOF' Oracle DR Test {{test_result}} | {{date}} {{#if has_errors}} {{/if}} {{#if has_warnings}} {{/if}}
Oracle DR Test {{test_result}}
{{date}} · Severity: {{severity}}
Outcome{{test_result}}
Duration{{total_duration}} min (restore {{restore_duration}} min)
Backups used{{backup_count}}
Tables restored{{tables_restored}}
Components
VM {{vm_id}} ({{vm_ip}}): {{vm_status}}
NFS: {{nfs_status}}
Database: {{database_status}}
Cleanup: {{disk_freed}} GB freed
{{#each test_steps}} {{/each}}
Steps
{{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} – {{this.status}}{{/if}}
{{#each errors}} {{/each}}
Issues
• {{this}}
{{#each warnings}} {{/each}}
Warnings
• {{this}}
RMAN Restore Log (complete)
{{restore_log}}
Bash Script Log (last 100 lines)
{{bash_log}}
Full log: {{log_file}} · Next test: Saturday 06:00
EOF echo -e "${GREEN}Templates created successfully in $TEMPLATE_DIR${NC}" } # Function to send notification via PVE::Notify send_pve_notification() { local severity="$1" local data="$2" # Create Perl script to call PVE::Notify cat > /tmp/oracle-dr-notify.pl <<'PERL_SCRIPT' #!/usr/bin/perl use strict; use warnings; use PVE::Notify; use JSON; my $json_data = do { local $/; }; my $data = decode_json($json_data); my $severity = $data->{severity} // 'info'; my $template_name = 'oracle-dr-test'; # Add fields for matching rules my $fields = { type => 'oracle-dr-test', severity => $severity, test_result => $data->{test_result}, }; # Send notification eval { PVE::Notify::notify( $severity, $template_name, $data, $fields ); }; if ($@) { print "Error sending notification: $@\n"; exit 1; } print "Notification sent successfully\n"; PERL_SCRIPT chmod +x /tmp/oracle-dr-notify.pl # Send notification echo "$data" | perl /tmp/oracle-dr-notify.pl rm -f /tmp/oracle-dr-notify.pl } # Logging functions log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" } log_error() { echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE" } log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" | tee -a "$LOG_FILE" } log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$LOG_FILE" } # Test tracking TEST_STEPS=() ERRORS=() WARNINGS=() TEST_START_TIME=$(date +%s) # Function to track test steps track_step() { local name="$1" local passed="$2" local status="$3" local start_time="$4" local end_time=$(date +%s) local duration=$((end_time - start_time)) local step_json step_json=$(jq -n \ --arg name "$name" \ --arg status "$status" \ --arg duration "$duration" \ --arg passed "$passed" \ '{name:$name, status:$status, duration:($duration|tonumber), passed:($passed == "true")}' ) TEST_STEPS+=("$step_json") if [ "$passed" = "false" ]; then ERRORS+=("$name: $status") fi } # Main test workflow run_dr_test() { local test_result="FAILED" local severity="error" local is_success=false local restore_duration=0 local tables_restored=0 local db_status="UNKNOWN" local nfs_status="Not checked" local vm_status_label="Not started" local cleanup_freed=0 local backup_count=0 local restore_log="Not collected" log "==========================================" log "Oracle DR Weekly Test - Starting" log "==========================================" # Step 1: Pre-flight checks local step_start=$(date +%s) log "STEP 1: Pre-flight checks" # Check backups exist backup_count=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*.BKP' 2>/dev/null | wc -l) if [ "$backup_count" -lt 2 ]; then track_step "Pre-flight checks" false "Insufficient backups (found: $backup_count)" "$step_start" test_result="FAILED - No backups" else track_step "Pre-flight checks" true "Found $backup_count backups" "$step_start" # Step 2: Start VM step_start=$(date +%s) log "STEP 2: Starting DR VM" if qm start "$DR_VM_ID" 2>/dev/null; then vm_status_label="Running" sleep 180 # Wait for boot track_step "VM Startup" true "VM $DR_VM_ID started" "$step_start" # Step 3: Verify NFS mount step_start=$(date +%s) log "STEP 3: Verifying NFS mount" nfs_status="Not Mounted" if ssh -p "$DR_VM_PORT" -o ConnectTimeout=10 "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command 'Test-Path F:\\ROA\\autobackup'" 2>/dev/null; then nfs_status="Mounted" track_step "NFS Mount Check" true "F:\\ drive accessible" "$step_start" else track_step "NFS Mount Check" false "F:\\ drive not accessible" "$step_start" WARNINGS+=("NFS mount may need manual intervention") fi # Step 4: Run restore step_start=$(date +%s) local restore_start=$step_start log "STEP 4: Running database restore" if ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\rman_restore_from_zero.ps1 -TestMode" 2>&1 | tee -a "$LOG_FILE"; then local restore_end=$(date +%s) restore_duration=$(( (restore_end - restore_start) / 60 )) track_step "Database Restore" true "Restored in $restore_duration minutes" "$step_start" # Step 5: Verify database step_start=$(date +%s) log "STEP 5: Verifying database" # Use PowerShell to query database status (check if contains READ WRITE anywhere) db_status=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command \"echo 'SELECT OPEN_MODE FROM V\`\$DATABASE;' | sqlplus -s / as sysdba | Out-String | Select-String -Pattern 'READ WRITE' -Quiet\"" 2>/dev/null || echo "false") # Convert PowerShell True/False to bash-friendly value if [[ "$db_status" == *"True"* ]] || [[ "$db_status" == "True" ]]; then db_status="READ WRITE" else db_status="" fi # Use PowerShell to count tables tables_restored=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command \"'SELECT COUNT(*) FROM DBA_TABLES WHERE OWNER NOT IN (''SYS'',''SYSTEM'');' | sqlplus -s / as sysdba | Select-String -Pattern '[0-9]+' | ForEach-Object { \$_.Matches[0].Value } | Select-Object -Last 1\"" || echo "0") tables_restored=$(echo "$tables_restored" | tr -cd '0-9') [ -z "$tables_restored" ] && tables_restored=0 if [[ "$db_status" =~ "READ WRITE" ]]; then track_step "Database Verification" true "Database OPEN, $tables_restored tables" "$step_start" test_result="PASSED" severity="info" is_success=true else track_step "Database Verification" false "Database not OPEN" "$step_start" fi # Collect restore log from VM (always attempt collection - FULL log) log "Collecting restore log from DR VM..." restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command \"Get-Content 'D:\\oracle\\logs\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "") # If not found, try alternate locations if [ -z "$restore_log" ]; then restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command \"Get-Content 'D:\\oracle\\temp\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "") fi # Still not found, use fallback message if [ -z "$restore_log" ]; then restore_log="Restore log not available (file may not exist or was not generated)" fi # Step 6: Cleanup step_start=$(date +%s) log "STEP 6: Running cleanup" ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\cleanup_database.ps1 /SILENT" 2>/dev/null cleanup_freed=8 track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start" else # Collect restore log even when restore fails (FULL log) log "Collecting restore log after failure..." restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command \"Get-Content 'D:\\oracle\\logs\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "") if [ -z "$restore_log" ]; then restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command \"Get-Content 'D:\\oracle\\temp\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "") fi # Always try to get some error output from RMAN script if [ -z "$restore_log" ]; then last_error=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command \"Get-Content 'D:\\oracle\\temp\\*.rman' -Tail 20 -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "") if [ -n "$last_error" ]; then restore_log="RMAN script content (last 20 lines):\n$last_error" else restore_log="No restore logs or RMAN scripts found" fi fi track_step "Database Restore" false "Restore failed" "$step_start" fi # Step 7: Shutdown VM step_start=$(date +%s) log "STEP 7: Shutting down VM" ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" "shutdown /s /t 30" 2>/dev/null sleep 60 qm stop "$DR_VM_ID" 2>/dev/null track_step "VM Shutdown" true "VM stopped" "$step_start" vm_status_label="Stopped" else track_step "VM Startup" false "Failed to start VM $DR_VM_ID" "$step_start" vm_status_label="Failed to start" fi fi # Calculate total duration local test_end_time=$(date +%s) local total_duration=$(( (test_end_time - TEST_START_TIME) / 60 )) # Prepare notification data local steps_json if [ ${#TEST_STEPS[@]} -eq 0 ]; then steps_json='[]' else steps_json=$(printf '%s\n' "${TEST_STEPS[@]}" | jq -s '.') fi local errors_json if [ ${#ERRORS[@]} -eq 0 ]; then errors_json='[]' else errors_json=$(printf '%s\n' "${ERRORS[@]}" | jq -R . | jq -s .) fi local warnings_json if [ ${#WARNINGS[@]} -eq 0 ]; then warnings_json='[]' else warnings_json=$(printf '%s\n' "${WARNINGS[@]}" | jq -R . | jq -s .) fi local has_errors=false local has_warnings=false [ ${#ERRORS[@]} -gt 0 ] && has_errors=true [ ${#WARNINGS[@]} -gt 0 ] && has_warnings=true if [ "$is_success" = true ] && [ "$has_warnings" = true ]; then severity="warning" fi local db_status_clean=$(echo "$db_status" | tr -d '\r' | sed 's/^ *//;s/ *$//') # Escape restore log for JSON local restore_log_json restore_log_json=$(echo "$restore_log" | jq -Rs .) # Collect last 100 lines of bash script log local bash_log bash_log=$(tail -100 "$LOG_FILE" 2>/dev/null || echo "Bash log not available") local bash_log_json bash_log_json=$(echo "$bash_log" | jq -Rs .) local json_data=$(cat < Notifications > Add matching rules for 'oracle-dr-test'" ;; --help) echo "Oracle DR Weekly Test for Proxmox" echo "Usage:" echo " $0 - Run DR test" echo " $0 --install - Create notification templates" echo " $0 --help - Show this help" ;; *) # Check if templates exist, create if missing if [ ! -f "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" ]; then echo -e "${YELLOW}Templates not found, creating...${NC}" create_templates echo "" fi # Run DR test run_dr_test ;; esac } # Check dependencies if ! command -v jq &> /dev/null; then echo -e "${RED}Error: jq is not installed${NC}" echo "Install with: apt-get install jq" exit 1 fi main "$@"