diff --git a/oracle/standby-server-scripts/PLAN_TESTARE_MONITORIZARE.md b/oracle/standby-server-scripts/PLAN_TESTARE_MONITORIZARE.md index 9b9323f..a38bed6 100644 --- a/oracle/standby-server-scripts/PLAN_TESTARE_MONITORIZARE.md +++ b/oracle/standby-server-scripts/PLAN_TESTARE_MONITORIZARE.md @@ -53,7 +53,14 @@ 3. Validare log-uri și rapoarte generate 4. Configurare cron pentru execuție automată -### Faza 5: Testare Erori și Edge Cases +### Faza 5: Validare Format Notificări +1. Reinstalare template-uri compacte: `/opt/scripts/oracle-backup-monitor-proxmox.sh --install` +2. Generare notificări reale din scripturi (backup monitor + DR test) și analiză în clienți email +3. Verificare afișare în client email (text + HTML) și în GUI Proxmox +4. Rulare `weekly-dr-test-proxmox.sh` în mediu controlat și validare sumar compact în email (inclusiv componente, pași, timeline) +5. Capturare feedback utilizatori finali (Gmail + Outlook) pentru lizibilitate + +### Faza 6: Testare Erori și Edge Cases 1. Testare fără conectivitate la VM DR 2. Testare director backup-uri gol 3. Testare eșec restaurare database diff --git a/oracle/standby-server-scripts/oracle-backup-monitor-proxmox.sh b/oracle/standby-server-scripts/oracle-backup-monitor-proxmox.sh index 61c2f00..f7d7fae 100644 --- a/oracle/standby-server-scripts/oracle-backup-monitor-proxmox.sh +++ b/oracle/standby-server-scripts/oracle-backup-monitor-proxmox.sh @@ -46,81 +46,167 @@ create_templates() { # Subject template cat > "$TEMPLATE_DIR/oracle-backup-subject.txt.hbs" <<'EOF' -Oracle Backup {{severity}} - {{node}} +Oracle Backup {{status}} | {{node}} EOF # Text body template cat > "$TEMPLATE_DIR/oracle-backup-body.txt.hbs" <<'EOF' -Oracle Backup {{severity}} - {{node}} -{{status}} +Oracle Backup {{status}} | {{node}} +Date: {{date}} -======================================== -{{#if errors}} -CRITICAL ISSUES: +SUMMARY +- Full backup: {{full_backup_age}}h (limit {{full_backup_limit}}h) -> {{#if full_backup_ok}}OK{{else}}CHECK{{/if}} +- Incremental: {{cumulative_backup_age}}h (limit {{cumulative_backup_limit}}h) -> {{#if cumulative_backup_ok}}OK{{else}}CHECK{{/if}} +- Backups: {{total_backups}} files ({{total_size_label}}) +- Disk usage: {{disk_usage}}% + +{{#if has_errors}} +ISSUES {{#each errors}} - {{this}} {{/each}} {{/if}} -{{#if warnings}} -WARNINGS: +{{#if has_warnings}} +WARNINGS {{#each warnings}} - {{this}} {{/each}} {{/if}} -======================================== -BACKUP STATUS: -FULL: {{full_backup_age}}h old {{#if full_backup_ok}}OK{{else}}TOO OLD{{/if}} (limit: 25h) -CUMULATIVE: {{cumulative_backup_age}}h old {{#if cumulative_backup_ok}}OK{{else}}TOO OLD{{/if}} (limit: 7h) -Total: {{total_backups}} files | Size: {{total_size_gb}}GB | Disk: {{disk_usage}}% - -{{#if backup_list}} -LATEST BACKUPS (last 5): -{{#each backup_list}} +FULL BACKUPS ({{full_backup_count}} files) +{{#if has_full_backups}} +{{#each full_backup_list}} - {{this}} {{/each}} +{{else}} +- none detected {{/if}} -======================================== -Next check: {{date}} + 24h | Proxmox Monitoring +INCREMENTAL BACKUPS ({{incr_backup_count}} files) +{{#if has_incr_backups}} +{{#each incr_backup_list}} +- {{this}} +{{/each}} +{{else}} +- none detected +{{/if}} + +Next check: +24h via Proxmox Monitor EOF - # HTML body template (identical to text for compatibility) + # HTML body template (lightweight Gmail-friendly) cat > "$TEMPLATE_DIR/oracle-backup-body.html.hbs" <<'EOF' -Oracle Backup {{severity}} - {{node}} -{{status}} + + + + + +Oracle Backup {{status}} | {{node}} + + + + + + + + + + + + -======================================== -{{#if errors}} -CRITICAL ISSUES: -{{#each errors}} -- {{this}} -{{/each}} -{{/if}} + {{#if has_errors}} + + + + {{/if}} -{{#if warnings}} -WARNINGS: -{{#each warnings}} -- {{this}} -{{/each}} -{{/if}} + {{#if has_warnings}} + + + + {{/if}} -======================================== -BACKUP STATUS: -FULL: {{full_backup_age}}h old {{#if full_backup_ok}}OK{{else}}TOO OLD{{/if}} (limit: 25h) -CUMULATIVE: {{cumulative_backup_age}}h old {{#if cumulative_backup_ok}}OK{{else}}TOO OLD{{/if}} (limit: 7h) -Total: {{total_backups}} files | Size: {{total_size_gb}}GB | Disk: {{disk_usage}}% + + + -{{#if backup_list}} -LATEST BACKUPS (last 5): -{{#each backup_list}} -- {{this}} -{{/each}} -{{/if}} + + + -======================================== -Next check: {{date}} + 24h | Proxmox Monitoring + + + +
+ Oracle Backup {{status}} | {{node}} +
+ {{date}} +
+ + + + + + + + + + + + + + + + + +
Full backup + {{full_backup_age}}h / {{full_backup_limit}}h · {{#if full_backup_ok}}OK{{else}}CHECK{{/if}} +
Incremental + {{cumulative_backup_age}}h / {{cumulative_backup_limit}}h · {{#if cumulative_backup_ok}}OK{{else}}CHECK{{/if}} +
Backups{{total_backups}} files ({{total_size_label}})
Disk usage{{disk_usage}}%
+
+ + + {{#each errors}} + + {{/each}} +
Issues
• {{this}}
+
+ + + {{#each warnings}} + + {{/each}} +
Warnings
• {{this}}
+
+ + + {{#if has_full_backups}} + {{#each full_backup_list}} + + {{/each}} + {{else}} + + {{/if}} +
FULL Backups ({{full_backup_count}} files)
• {{this}}
• none detected
+
+ + + {{#if has_incr_backups}} + {{#each incr_backup_list}} + + {{/each}} + {{else}} + + {{/if}} +
INCREMENTAL Backups ({{incr_backup_count}} files)
• {{this}}
• none detected
+
+ Next automated check: +24h via Proxmox Monitor +
+ + EOF echo -e "${GREEN}Templates created successfully in $TEMPLATE_DIR${NC}" @@ -150,7 +236,7 @@ my $template_name = 'oracle-backup'; my $fields = { type => 'oracle-backup', severity => $severity, - hostname => $data->{hostname}, + hostname => $data->{node} // 'unknown', }; # Send notification @@ -187,79 +273,147 @@ check_backups() { echo "Checking Oracle backups..." - # Get backup list - local backup_files=$(ls -lth "$BACKUP_PATH"/*.BKP 2>/dev/null | head -10 || echo "") + local total_backups=0 + local total_size_label="0G" + local full_age_hours="N/A" + local cumulative_age_hours="N/A" + local full_backup_ok=false + local cumulative_backup_ok=false + local disk_usage=0 + local -a backup_entries=() - if [ -z "$backup_files" ]; then + if [ ! -d "$BACKUP_PATH" ]; then status="ERROR" - errors+=("No backup files found in $BACKUP_PATH") + errors+=("Backup path $BACKUP_PATH not accessible") else - # Count backups - local total_backups=$(ls "$BACKUP_PATH"/*.BKP 2>/dev/null | wc -l) - local total_size=$(du -shc "$BACKUP_PATH"/*.BKP 2>/dev/null | tail -1 | awk '{print $1}') + if compgen -G "$BACKUP_PATH"/*.BKP > /dev/null; then + total_backups=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*.BKP' | wc -l) + total_backups=${total_backups//[[:space:]]/} + [ -z "$total_backups" ] && total_backups=0 + local total_size=$(du -shc "$BACKUP_PATH"/*.BKP 2>/dev/null | tail -1 | awk '{print $1}') + [ -z "$total_size" ] && total_size="0G" + total_size_label="$total_size" - # Check FULL backup age - local latest_full=$(ls -t "$BACKUP_PATH"/*FULL*.BKP 2>/dev/null | head -1 || echo "") - local full_age_hours="N/A" - local full_backup_ok=false - - if [ -n "$latest_full" ]; then - local full_timestamp=$(stat -c %Y "$latest_full") - local current_timestamp=$(date +%s) - full_age_hours=$(( (current_timestamp - full_timestamp) / 3600 )) - - if [ "$full_age_hours" -gt "$MAX_FULL_AGE_HOURS" ]; then - status="WARNING" - warnings+=("FULL backup is $full_age_hours hours old (threshold: $MAX_FULL_AGE_HOURS)") + local latest_full=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*FULL*.BKP' -printf '%T@ %p\n' | sort -nr | head -1 | cut -d' ' -f2-) + if [ -n "$latest_full" ]; then + local full_timestamp=$(stat -c %Y "$latest_full") + local current_timestamp=$(date +%s) + full_age_hours=$(( (current_timestamp - full_timestamp) / 3600 )) + if [ "$full_age_hours" -gt "$MAX_FULL_AGE_HOURS" ]; then + status="WARNING" + warnings+=("FULL backup is $full_age_hours hours old (threshold: $MAX_FULL_AGE_HOURS)") + else + full_backup_ok=true + fi else - full_backup_ok=true + status="ERROR" + errors+=("No FULL backup found") + fi + + local latest_cumulative=$(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*INCR*.BKP' -o -name '*INCREMENTAL*.BKP' -o -name '*CUMULATIVE*.BKP' \) -printf '%T@ %p\n' | sort -nr | head -1 | cut -d' ' -f2-) + if [ -n "$latest_cumulative" ]; then + local cumulative_timestamp=$(stat -c %Y "$latest_cumulative") + local current_timestamp=$(date +%s) + cumulative_age_hours=$(( (current_timestamp - cumulative_timestamp) / 3600 )) + if [ "$cumulative_age_hours" -gt "$MAX_CUMULATIVE_AGE_HOURS" ]; then + if [ "$status" != "ERROR" ]; then status="WARNING"; fi + warnings+=("CUMULATIVE backup is $cumulative_age_hours hours old (threshold: $MAX_CUMULATIVE_AGE_HOURS)") + else + cumulative_backup_ok=true + fi + fi + + # Collect ALL FULL backups + local -a full_backups=() + local -a full_backup_entries=() + if readarray -t full_backups < <(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*FULL*.BKP' -printf '%T@ %p\n' | sort -nr | cut -d' ' -f2-); then + for backup_file in "${full_backups[@]}"; do + [ -z "$backup_file" ] && continue + local backup_name=$(basename "$backup_file") + local backup_time=$(date -r "$backup_file" '+%Y-%m-%d %H:%M') + local backup_size=$(du -sh "$backup_file" 2>/dev/null | cut -f1) + [ -z "$backup_size" ] && backup_size="N/A" + full_backup_entries+=("$backup_time | $backup_name | $backup_size") + done + fi + + # Collect ALL INCREMENTAL backups + local -a incr_backups=() + local -a incr_backup_entries=() + if readarray -t incr_backups < <(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*INCR*.BKP' -o -name '*INCREMENTAL*.BKP' -o -name '*CUMULATIVE*.BKP' \) -printf '%T@ %p\n' | sort -nr | cut -d' ' -f2-); then + for backup_file in "${incr_backups[@]}"; do + [ -z "$backup_file" ] && continue + local backup_name=$(basename "$backup_file") + local backup_time=$(date -r "$backup_file" '+%Y-%m-%d %H:%M') + local backup_size=$(du -sh "$backup_file" 2>/dev/null | cut -f1) + [ -z "$backup_size" ] && backup_size="N/A" + incr_backup_entries+=("$backup_time | $backup_name | $backup_size") + done fi else status="ERROR" - errors+=("No FULL backup found") + errors+=("No backup files found in $BACKUP_PATH") fi - # Check CUMULATIVE backup age - local latest_cumulative=$(ls -t "$BACKUP_PATH"/*INCR*.BKP "$BACKUP_PATH"/*INCREMENTAL*.BKP "$BACKUP_PATH"/*CUMULATIVE*.BKP 2>/dev/null | head -1 || echo "") - local cumulative_age_hours="N/A" - local cumulative_backup_ok=false - - if [ -n "$latest_cumulative" ]; then - local cumulative_timestamp=$(stat -c %Y "$latest_cumulative") - local current_timestamp=$(date +%s) - cumulative_age_hours=$(( (current_timestamp - cumulative_timestamp) / 3600 )) - - if [ "$cumulative_age_hours" -gt "$MAX_CUMULATIVE_AGE_HOURS" ]; then - if [ "$status" != "ERROR" ]; then status="WARNING"; fi - warnings+=("CUMULATIVE backup is $cumulative_age_hours hours old (threshold: $MAX_CUMULATIVE_AGE_HOURS)") - else - cumulative_backup_ok=true - fi + local disk_usage_raw=$(df "$BACKUP_PATH" 2>/dev/null | tail -1 | awk '{print int($5)}') + if [ -n "$disk_usage_raw" ]; then + disk_usage="$disk_usage_raw" + else + if [ "$status" = "OK" ]; then status="WARNING"; fi + warnings+=("Unable to determine disk usage for $BACKUP_PATH") fi + fi - # Check disk usage - local disk_usage=$(df "$BACKUP_PATH" | tail -1 | awk '{print int($5)}') + if [ "$disk_usage" -gt 90 ]; then + status="ERROR" + errors+=("Disk usage critical: ${disk_usage}%") + elif [ "$disk_usage" -gt 80 ]; then + if [ "$status" != "ERROR" ]; then status="WARNING"; fi + warnings+=("Disk usage high: ${disk_usage}%") + fi - if [ "$disk_usage" -gt 90 ]; then - status="ERROR" - errors+=("Disk usage critical: ${disk_usage}%") - elif [ "$disk_usage" -gt 80 ]; then - if [ "$status" != "ERROR" ]; then status="WARNING"; fi - warnings+=("Disk usage high: ${disk_usage}%") - fi + local severity="info" + [ "$status" = "WARNING" ] && severity="warning" + [ "$status" = "ERROR" ] && severity="error" - # Prepare notification data - local severity="info" - [ "$status" = "WARNING" ] && severity="warning" - [ "$status" = "ERROR" ] && severity="error" + local errors_json + if [ ${#errors[@]} -eq 0 ]; then + errors_json='[]' + else + errors_json=$(printf '%s\n' "${errors[@]}" | jq -R . | jq -s .) + fi - # Convert arrays to JSON arrays - local errors_json=$(printf '%s\n' "${errors[@]}" | jq -R . | jq -s .) - local warnings_json=$(printf '%s\n' "${warnings[@]}" | jq -R . | jq -s .) - local backup_list_json=$(echo "$backup_files" | head -5 | jq -R . | jq -s .) + local warnings_json + if [ ${#warnings[@]} -eq 0 ]; then + warnings_json='[]' + else + warnings_json=$(printf '%s\n' "${warnings[@]}" | jq -R . | jq -s .) + fi - # Create JSON data - local json_data=$(cat < "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" <<'EOF' -Oracle DR Test {{severity}} - {{test_result}} +Oracle DR Test {{test_result}} | {{date}} EOF # Text body template cat > "$TEMPLATE_DIR/oracle-dr-test-body.txt.hbs" <<'EOF' -Oracle DR Test {{severity}} - {{test_result}} -{{#if is_success}}TEST PASSED✓{{else}}TEST FAILED✗{{/if}} +Oracle DR Test {{test_result}} | {{date}} +Severity: {{severity}} -======================================== -SUMMARY: Duration {{total_duration}}min | Tables {{tables_restored}} | Backups {{backup_count}} +SUMMARY +- Outcome: {{test_result}} +- Duration: {{total_duration}} min (restore {{restore_duration}} min) +- Backups used: {{backup_count}} +- Tables restored: {{tables_restored}} -TEST STEPS: +COMPONENTS +- VM {{vm_id}} ({{vm_ip}}): {{vm_status}} +- NFS: {{nfs_status}} +- Database: {{database_status}} +- Cleanup: {{disk_freed}} GB freed + +STEPS {{#each test_steps}} -- {{#if this.passed}}PASS{{else}}FAIL{{/if}}: {{this.name}} ({{this.duration}}s) -{{#if this.details}} - Details: {{this.details}} +- {{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} - {{this.status}}{{/if}} {{/each}} -======================================== -COMPONENT STATUS: -DR VM: ID {{vm_id}} ({{vm_ip}}) - {{vm_status}} -NFS Mount: {{nfs_status}} - {{#if nfs_ok}}OK{{else}}FAILED{{/if}} -Database: {{database_status}} - {{#if database_ok}}OK{{else}}FAILED{{/if}} -Disk Space: {{disk_freed}}GB freed - OK - -{{#if errors}} -ERRORS: +{{#if has_errors}} +ISSUES {{#each errors}} - {{this}} {{/each}} {{/if}} -======================================== +{{#if has_warnings}} +WARNINGS +{{#each warnings}} +- {{this}} +{{/each}} +{{/if}} + +RESTORE LOG (first 200 lines) +--- +{{restore_log}} +--- + Log: {{log_file}} -Next scheduled test: Next Saturday 06:00 -Proxmox DR Monitoring System +Next test: Saturday 06:00 EOF - # HTML body template (identical to text for compatibility) + # HTML body template (compact Gmail-friendly layout) cat > "$TEMPLATE_DIR/oracle-dr-test-body.html.hbs" <<'EOF' -Oracle DR Test {{severity}} - {{test_result}} -{{#if is_success}}TEST PASSED✓{{else}}TEST FAILED✗{{/if}} - -======================================== -SUMMARY: Duration {{total_duration}}min | Tables {{tables_restored}} | Backups {{backup_count}} - -TEST STEPS: -{{#each test_steps}} -- {{#if this.passed}}PASS{{else}}FAIL{{/if}}: {{this.name}} ({{this.duration}}s) -{{#if this.details}} - Details: {{this.details}} -{{/each}} - -======================================== -COMPONENT STATUS: -DR VM: ID {{vm_id}} ({{vm_ip}}) - {{vm_status}} -NFS Mount: {{nfs_status}} - {{#if nfs_ok}}OK{{else}}FAILED{{/if}} -Database: {{database_status}} - {{#if database_ok}}OK{{else}}FAILED{{/if}} -Disk Space: {{disk_freed}}GB freed - OK - -{{#if errors}} -ERRORS: -{{#each errors}} -- {{this}} -{{/each}} -{{/if}} - -======================================== -Log: {{log_file}} -Next scheduled test: Next Saturday 06:00 -Proxmox DR Monitoring System -EOF - .section { - margin: 20px 0; - padding: 15px; - background-color: #f8f9fa; - border-radius: 5px; - } - .success { color: #28a745; font-weight: bold; } - .error { color: #dc3545; font-weight: bold; } - .warning { color: #ffc107; font-weight: bold; } - .info { color: #17a2b8; } - - .test-steps { - margin: 20px 0; - } - .step { - padding: 10px; - margin: 5px 0; - border-left: 4px solid; - background-color: white; - } - .step.passed { - border-color: #28a745; - } - .step.failed { - border-color: #dc3545; - background-color: #f8d7da; - } - - .metrics { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); - gap: 15px; - margin: 20px 0; - } - .metric-card { - background: white; - padding: 15px; - border-radius: 5px; - text-align: center; - box-shadow: 0 2px 4px rgba(0,0,0,0.1); - } - .metric-value { - font-size: 24px; - font-weight: bold; - color: #495057; - } - .metric-label { - font-size: 14px; - color: #6c757d; - margin-top: 5px; - } - - .timeline { - position: relative; - padding: 20px 0; - } - .timeline-item { - display: flex; - margin-bottom: 20px; - } - .timeline-marker { - width: 20px; - height: 20px; - border-radius: 50%; - margin-right: 15px; - flex-shrink: 0; - } - .timeline-marker.success { - background-color: #28a745; - } - .timeline-marker.failed { - background-color: #dc3545; - } - - table { - width: 100%; - border-collapse: collapse; - margin: 10px 0; - } - th, td { - padding: 10px; - text-align: left; - border-bottom: 1px solid #dee2e6; - } - th { - background-color: #e9ecef; - font-weight: bold; - } - + + + + + +Oracle DR Test {{test_result}} | {{date}} - -
-

Oracle DR Test Report

-

{{#if is_success}}✓ TEST PASSED{{else}}✗ TEST FAILED{{/if}}

-

{{date}} | Duration: {{total_duration}} minutes

-
- -
-

Test Summary

-
-
-
{{test_result}}
-
Test Result
-
-
-
{{restore_duration}}
-
Restore Time (min)
-
-
-
{{tables_restored}}
-
Tables Restored
-
-
-
{{backup_count}}
-
Backups Used
-
-
-
- -
-

Test Steps Timeline

-
- {{#each test_steps}} -
-
-
-
- {{this.name}} - {{this.duration}}s -
- {{#if this.passed}} - ✓ {{this.status}} - {{else}} - ✗ {{this.status}} - {{/if}} -
- {{#if this.details}} -
- {{this.details}} -
- {{/if}} -
-
-
- {{/each}} -
-
- - {{#if errors}} -
-

Errors Encountered

- -
- {{/if}} - - {{#if warnings}} -
-

Warnings

- -
- {{/if}} - -
-

System Details

- - - - - - - - - - - - - - - - - - - - - - - - - - + +
ComponentValueStatus
DR VMID: {{vm_id}} ({{vm_ip}}){{vm_status}}
NFS MountF:\ drive{{nfs_status}}
DatabaseROA{{database_status}}
Disk Space Freed{{disk_freed}} GB
+ + + + + + + + + -
-

- Log File: {{log_file}}
- Next Scheduled Test: Next Saturday 06:00 -

-
+ + + + + + + + + {{#if has_errors}} + + + + {{/if}} + + {{#if has_warnings}} + + + + {{/if}} + + + + + + + + +
+ Oracle DR Test {{test_result}} +
{{date}} · Severity: {{severity}}
+ + + + +
Outcome{{test_result}}
Duration{{total_duration}} min (restore {{restore_duration}} min)
Backups used{{backup_count}}
Tables restored{{tables_restored}}
- +
+ + + + + + +
Components
VM {{vm_id}} ({{vm_ip}}): {{vm_status}}
NFS: {{nfs_status}}
Database: {{database_status}}
Cleanup: {{disk_freed}} GB freed
+
+ + + {{#each test_steps}} + + + + {{/each}} +
Steps
{{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} – {{this.status}}{{/if}}
+
+ + + {{#each errors}} + + {{/each}} +
Issues
• {{this}}
+
+ + + {{#each warnings}} + + {{/each}} +
Warnings
• {{this}}
+
+ + + +
Restore Log (first 200 lines)
{{restore_log}}
+
+ Log: {{log_file}} · Next test: Saturday 06:00 +
EOF @@ -421,7 +286,16 @@ track_step() { local end_time=$(date +%s) local duration=$((end_time - start_time)) - TEST_STEPS+=("{\"name\":\"$name\",\"passed\":$passed,\"status\":\"$status\",\"duration\":$duration}") + local step_json + step_json=$(jq -n \ + --arg name "$name" \ + --arg status "$status" \ + --arg duration "$duration" \ + --arg passed "$passed" \ + '{name:$name, status:$status, duration:($duration|tonumber), passed:($passed == "true")}' + ) + + TEST_STEPS+=("$step_json") if [ "$passed" = "false" ]; then ERRORS+=("$name: $status") @@ -433,6 +307,14 @@ run_dr_test() { local test_result="FAILED" local severity="error" local is_success=false + local restore_duration=0 + local tables_restored=0 + local db_status="UNKNOWN" + local nfs_status="Not checked" + local vm_status_label="Not started" + local cleanup_freed=0 + local backup_count=0 + local restore_log="Not collected" log "==========================================" log "Oracle DR Weekly Test - Starting" @@ -443,7 +325,7 @@ run_dr_test() { log "STEP 1: Pre-flight checks" # Check backups exist - local backup_count=$(ls "$BACKUP_PATH"/*.BKP 2>/dev/null | wc -l || echo "0") + backup_count=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*.BKP' 2>/dev/null | wc -l) if [ "$backup_count" -lt 2 ]; then track_step "Pre-flight checks" false "Insufficient backups (found: $backup_count)" "$step_start" @@ -456,6 +338,7 @@ run_dr_test() { log "STEP 2: Starting DR VM" if qm start "$DR_VM_ID" 2>/dev/null; then + vm_status_label="Running" sleep 180 # Wait for boot track_step "VM Startup" true "VM $DR_VM_ID started" "$step_start" @@ -463,7 +346,7 @@ run_dr_test() { step_start=$(date +%s) log "STEP 3: Verifying NFS mount" - local nfs_status="Not Mounted" + nfs_status="Not Mounted" if ssh -p "$DR_VM_PORT" -o ConnectTimeout=10 "$DR_VM_USER@$DR_VM_IP" \ "powershell -Command 'Test-Path F:\\ROA\\autobackup'" 2>/dev/null; then nfs_status="Mounted" @@ -482,7 +365,7 @@ run_dr_test() { "D:\\oracle\\scripts\\rman_restore_from_zero.cmd" 2>&1 | tee -a "$LOG_FILE"; then local restore_end=$(date +%s) - local restore_duration=$(( (restore_end - restore_start) / 60 )) + restore_duration=$(( (restore_end - restore_start) / 60 )) track_step "Database Restore" true "Restored in $restore_duration minutes" "$step_start" @@ -490,11 +373,13 @@ run_dr_test() { step_start=$(date +%s) log "STEP 5: Verifying database" - local db_status=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ + db_status=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "cmd /c 'echo SELECT STATUS FROM V\$INSTANCE; | sqlplus -s / as sysdba' | findstr OPEN" || echo "") - local tables_restored=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ + tables_restored=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "cmd /c 'echo SELECT COUNT(*) FROM DBA_TABLES WHERE OWNER NOT IN (''SYS'',''SYSTEM''); | sqlplus -s / as sysdba' | grep -o '[0-9]*' | tail -1" || echo "0") + tables_restored=$(echo "$tables_restored" | tr -cd '0-9') + [ -z "$tables_restored" ] && tables_restored=0 if [[ "$db_status" =~ "OPEN" ]]; then track_step "Database Verification" true "Database OPEN, $tables_restored tables" "$step_start" @@ -505,6 +390,11 @@ run_dr_test() { track_step "Database Verification" false "Database not OPEN" "$step_start" fi + # Collect restore log from VM + log "Collecting restore log from DR VM..." + restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ + "type D:\\oracle\\logs\\restore_from_zero.log 2>nul" | head -200 || echo "Log not available") + # Step 6: Cleanup step_start=$(date +%s) log "STEP 6: Running cleanup" @@ -512,7 +402,8 @@ run_dr_test() { ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ "D:\\oracle\\scripts\\cleanup_database.cmd" 2>/dev/null - track_step "Cleanup" true "Database cleaned, ~8GB freed" "$step_start" + cleanup_freed=8 + track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start" else track_step "Database Restore" false "Restore failed" "$step_start" @@ -527,9 +418,11 @@ run_dr_test() { qm stop "$DR_VM_ID" 2>/dev/null track_step "VM Shutdown" true "VM stopped" "$step_start" + vm_status_label="Stopped" else track_step "VM Startup" false "Failed to start VM $DR_VM_ID" "$step_start" + vm_status_label="Failed to start" fi fi @@ -538,9 +431,41 @@ run_dr_test() { local total_duration=$(( (test_end_time - TEST_START_TIME) / 60 )) # Prepare notification data - local steps_json=$(printf '%s,' "${TEST_STEPS[@]}" | sed 's/,$//') - local errors_json=$(printf '"%s",' "${ERRORS[@]}" | sed 's/,$//') - local warnings_json=$(printf '"%s",' "${WARNINGS[@]}" | sed 's/,$//') + local steps_json + if [ ${#TEST_STEPS[@]} -eq 0 ]; then + steps_json='[]' + else + steps_json=$(printf '%s\n' "${TEST_STEPS[@]}" | jq -s '.') + fi + + local errors_json + if [ ${#ERRORS[@]} -eq 0 ]; then + errors_json='[]' + else + errors_json=$(printf '%s\n' "${ERRORS[@]}" | jq -R . | jq -s .) + fi + + local warnings_json + if [ ${#WARNINGS[@]} -eq 0 ]; then + warnings_json='[]' + else + warnings_json=$(printf '%s\n' "${WARNINGS[@]}" | jq -R . | jq -s .) + fi + + local has_errors=false + local has_warnings=false + [ ${#ERRORS[@]} -gt 0 ] && has_errors=true + [ ${#WARNINGS[@]} -gt 0 ] && has_warnings=true + + if [ "$is_success" = true ] && [ "$has_warnings" = true ]; then + severity="warning" + fi + + local db_status_clean=$(echo "$db_status" | tr -d '\r' | sed 's/^ *//;s/ *$//') + + # Escape restore log for JSON + local restore_log_json + restore_log_json=$(echo "$restore_log" | jq -Rs .) local json_data=$(cat <