Oracle DR: Fix RMAN crosscheck sequence and improve error handling
- Fix CROSSCHECK BACKUP command to execute after database is mounted - Correct CATALOG command to use recovery_area instead of F:\ path - Add robust backup file validation with detailed error reporting - Improve file-by-file backup copying with individual error tracking - Enhance restore log collection for both success and failure scenarios - Fix database verification to check OPEN_MODE instead of STATUS - Add comprehensive directory and permissions error handling Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
This commit is contained in:
@@ -45,9 +45,15 @@ if (-not (Test-Path "F:\ROA\autobackup")) {
|
|||||||
Write-Host "[OK] F:\ROA\autobackup is accessible"
|
Write-Host "[OK] F:\ROA\autobackup is accessible"
|
||||||
Write-Host ""
|
Write-Host ""
|
||||||
|
|
||||||
# Create directories
|
# Create directories with proper permissions
|
||||||
New-Item -ItemType Directory -Path "D:\oracle\temp" -Force | Out-Null
|
try {
|
||||||
New-Item -ItemType Directory -Path "D:\oracle\logs" -Force | Out-Null
|
New-Item -ItemType Directory -Path "D:\oracle\temp" -Force -ErrorAction Stop | Out-Null
|
||||||
|
New-Item -ItemType Directory -Path "D:\oracle\logs" -Force -ErrorAction Stop | Out-Null
|
||||||
|
Write-Host "[OK] Created required directories"
|
||||||
|
} catch {
|
||||||
|
Write-Host "ERROR: Failed to create directories: $_" -ForegroundColor Red
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
Write-Host "============================================"
|
Write-Host "============================================"
|
||||||
Write-Host "STEP 1: CLEANUP - Delete existing database"
|
Write-Host "STEP 1: CLEANUP - Delete existing database"
|
||||||
@@ -115,12 +121,70 @@ $logFile = "D:\oracle\logs\restore_from_zero.log"
|
|||||||
New-Item -ItemType Directory -Path "C:\Users\oracle\recovery_area\ROA\autobackup" -Force | Out-Null
|
New-Item -ItemType Directory -Path "C:\Users\oracle\recovery_area\ROA\autobackup" -Force | Out-Null
|
||||||
Write-Host "[INFO] Copying all backups from F:\ROA\autobackup to recovery area..."
|
Write-Host "[INFO] Copying all backups from F:\ROA\autobackup to recovery area..."
|
||||||
Write-Host " This may take 1-2 minutes for ~10 GB of backups..."
|
Write-Host " This may take 1-2 minutes for ~10 GB of backups..."
|
||||||
Copy-Item "F:\ROA\autobackup\*.BKP" "C:\Users\oracle\recovery_area\ROA\autobackup\" -Force -ErrorAction Stop
|
|
||||||
if ($LASTEXITCODE -ne 0) {
|
# Check backup files exist on F: drive before copying
|
||||||
Write-Host "ERROR: Failed to copy backups from F:\" -ForegroundColor Red
|
try {
|
||||||
|
$backupFiles = Get-ChildItem "F:\ROA\autobackup\*.BKP" -ErrorAction Continue
|
||||||
|
} catch {
|
||||||
|
Write-Host "WARNING: Cannot enumerate backup files on F: drive - $_" -ForegroundColor Yellow
|
||||||
|
$backupFiles = @()
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($backupFiles.Count -lt 2) {
|
||||||
|
Write-Host "ERROR: Insufficient backup files found on F: drive (found: $($backupFiles.Count))" -ForegroundColor Red
|
||||||
|
Write-Host " At least 2 backup files required for successful restore"
|
||||||
|
Write-Host " Checking F:\ROA\autobackup directory..."
|
||||||
|
try {
|
||||||
|
$dirCheck = Get-ChildItem "F:\ROA\autobackup" -ErrorAction Continue
|
||||||
|
Write-Host " Directory contents: $($dirCheck.Count) files"
|
||||||
|
foreach ($file in $dirCheck) {
|
||||||
|
Write-Host " $($file.Name) - $($file.Length / 1GB) GB" -ForegroundColor Gray
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
Write-Host " Cannot access directory: $_" -ForegroundColor Red
|
||||||
|
}
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
Write-Host "[OK] All backups copied to recovery area"
|
|
||||||
|
Write-Host "[INFO] Found $($backupFiles.Count) backup files, total size: $([math]::Round(($backupFiles | Measure-Object -Property Length -Sum).Sum / 1GB, 2)) GB"
|
||||||
|
|
||||||
|
# Copy backups with better error handling
|
||||||
|
Write-Host "[INFO] Starting backup copy operation..."
|
||||||
|
$copyErrors = @()
|
||||||
|
foreach ($backupFile in $backupFiles) {
|
||||||
|
try {
|
||||||
|
Write-Host "[INFO] Copying $($backupFile.Name)..."
|
||||||
|
Copy-Item $backupFile.FullName "C:\Users\oracle\recovery_area\ROA\autobackup\" -Force -ErrorAction Stop
|
||||||
|
Write-Host "[OK] Copied $($backupFile.Name)" -ForegroundColor Green
|
||||||
|
} catch {
|
||||||
|
Write-Host "ERROR: Failed to copy $($backupFile.Name) - $_" -ForegroundColor Red
|
||||||
|
$copyErrors += "$($backupFile.Name): $_"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($copyErrors.Count -gt 0) {
|
||||||
|
Write-Host "ERROR: Backup copy failed for $($copyErrors.Count) files" -ForegroundColor Red
|
||||||
|
foreach ($error in $copyErrors) {
|
||||||
|
Write-Host " $error" -ForegroundColor Red
|
||||||
|
}
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Verify copied backups
|
||||||
|
try {
|
||||||
|
$copiedFiles = Get-ChildItem "C:\Users\oracle\recovery_area\ROA\autobackup\*.BKP" -ErrorAction Continue
|
||||||
|
} catch {
|
||||||
|
Write-Host "ERROR: Cannot verify copied backups - $_" -ForegroundColor Red
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($copiedFiles.Count -ne $backupFiles.Count) {
|
||||||
|
Write-Host "ERROR: Backup copy verification failed - file count mismatch" -ForegroundColor Red
|
||||||
|
Write-Host " Expected: $($backupFiles.Count), Copied: $($copiedFiles.Count)"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Host "[OK] All $($copiedFiles.Count) backups copied and verified to recovery area"
|
||||||
|
|
||||||
# Create RMAN script
|
# Create RMAN script
|
||||||
$rmanContent = @"
|
$rmanContent = @"
|
||||||
@@ -134,7 +198,10 @@ RUN {
|
|||||||
|
|
||||||
ALTER DATABASE MOUNT;
|
ALTER DATABASE MOUNT;
|
||||||
|
|
||||||
CATALOG START WITH 'F:/ROA/autobackup' NOPROMPT;
|
CATALOG START WITH 'C:/USERS/ORACLE/RECOVERY_AREA/ROA/AUTOBACKUP' NOPROMPT;
|
||||||
|
|
||||||
|
CROSSCHECK BACKUP;
|
||||||
|
DELETE NOPROMPT EXPIRED BACKUP;
|
||||||
|
|
||||||
RUN {
|
RUN {
|
||||||
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
|
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
|
||||||
@@ -152,6 +219,8 @@ RUN {
|
|||||||
|
|
||||||
ALTER DATABASE OPEN RESETLOGS;
|
ALTER DATABASE OPEN RESETLOGS;
|
||||||
|
|
||||||
|
DELETE NOPROMPT OBSOLETE;
|
||||||
|
|
||||||
EXIT;
|
EXIT;
|
||||||
"@
|
"@
|
||||||
|
|
||||||
|
|||||||
@@ -375,7 +375,7 @@ run_dr_test() {
|
|||||||
|
|
||||||
# Use PowerShell to query database status
|
# Use PowerShell to query database status
|
||||||
db_status=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
db_status=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||||
"powershell -Command \"'SELECT STATUS FROM V\`\$INSTANCE;' | sqlplus -s / as sysdba | Select-String 'OPEN'\"" || echo "")
|
"powershell -Command \"'SELECT OPEN_MODE FROM V\\\$DATABASE;' | sqlplus -s / as sysdba | findstr 'READ WRITE'\"" || echo "")
|
||||||
|
|
||||||
# Use PowerShell to count tables
|
# Use PowerShell to count tables
|
||||||
tables_restored=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
tables_restored=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||||
@@ -383,7 +383,7 @@ run_dr_test() {
|
|||||||
tables_restored=$(echo "$tables_restored" | tr -cd '0-9')
|
tables_restored=$(echo "$tables_restored" | tr -cd '0-9')
|
||||||
[ -z "$tables_restored" ] && tables_restored=0
|
[ -z "$tables_restored" ] && tables_restored=0
|
||||||
|
|
||||||
if [[ "$db_status" =~ "OPEN" ]]; then
|
if [[ "$db_status" =~ "READ WRITE" ]]; then
|
||||||
track_step "Database Verification" true "Database OPEN, $tables_restored tables" "$step_start"
|
track_step "Database Verification" true "Database OPEN, $tables_restored tables" "$step_start"
|
||||||
test_result="PASSED"
|
test_result="PASSED"
|
||||||
severity="info"
|
severity="info"
|
||||||
@@ -392,11 +392,27 @@ run_dr_test() {
|
|||||||
track_step "Database Verification" false "Database not OPEN" "$step_start"
|
track_step "Database Verification" false "Database not OPEN" "$step_start"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Collect restore log from VM
|
# Collect restore log from VM (always attempt collection)
|
||||||
log "Collecting restore log from DR VM..."
|
log "Collecting restore log from DR VM..."
|
||||||
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||||
"type D:\\oracle\\logs\\restore_from_zero.log 2>nul" | head -200 || echo "Log not available")
|
"type D:\\oracle\\logs\\restore_from_zero.log 2>nul" | head -200 || echo "Log not available")
|
||||||
|
|
||||||
|
# If not found, try alternate locations
|
||||||
|
if [[ "$restore_log" == *"Log not available"* ]]; then
|
||||||
|
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||||
|
"type D:\\oracle\\temp\\restore_from_zero.log 2>nul" | head -200 || echo "Log not available")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Still not found, check if any logs exist
|
||||||
|
if [[ "$restore_log" == *"Log not available"* ]]; then
|
||||||
|
log "Checking for any restore logs in DR VM..."
|
||||||
|
log_check=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||||
|
"dir D:\\oracle\\logs\\*.log 2>nul || dir D:\\oracle\\temp\\*.log 2>nul || echo 'No logs found'" 2>/dev/null || echo "Connection error")
|
||||||
|
if [[ "$log_check" != *"No logs found"* ]]; then
|
||||||
|
restore_log="Log files found but could not be read. Available files: $log_check"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Step 6: Cleanup
|
# Step 6: Cleanup
|
||||||
step_start=$(date +%s)
|
step_start=$(date +%s)
|
||||||
log "STEP 6: Running cleanup"
|
log "STEP 6: Running cleanup"
|
||||||
@@ -408,6 +424,25 @@ run_dr_test() {
|
|||||||
track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start"
|
track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start"
|
||||||
|
|
||||||
else
|
else
|
||||||
|
# Collect restore log even when restore fails
|
||||||
|
log "Collecting restore log after failure..."
|
||||||
|
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||||
|
"type D:\\oracle\\logs\\restore_from_zero.log 2>nul" | head -200 || echo "Log not available")
|
||||||
|
|
||||||
|
if [[ "$restore_log" == *"Log not available"* ]]; then
|
||||||
|
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||||
|
"type D:\\oracle\\temp\\restore_from_zero.log 2>nul" | head -200 || echo "Log not available")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Always try to get some error output
|
||||||
|
if [[ "$restore_log" == *"Log not available"* ]]; then
|
||||||
|
last_error=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||||
|
"powershell -Command 'Get-Content D:\\oracle\\temp\\*.rman -Tail 20 || echo \"No RMAN script found\"'" 2>/dev/null || echo "Cannot access RMAN script")
|
||||||
|
if [[ "$last_error" != *"No RMAN script found"* ]]; then
|
||||||
|
restore_log="RMAN script content (last 20 lines):$last_error"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
track_step "Database Restore" false "Restore failed" "$step_start"
|
track_step "Database Restore" false "Restore failed" "$step_start"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user