From f1002d6e4a587bf76717e32216577d0ec874ccb2 Mon Sep 17 00:00:00 2001 From: Marius Date: Sat, 11 Oct 2025 15:34:00 +0300 Subject: [PATCH] Oracle DR: Add /AFTER parameter to cleanup - smart shutdown based on context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical fix based on user analysis: PROBLEM: Cleanup is called in 2 contexts with different requirements: 1. BEFORE restore (from rman_restore): Should NOT shutdown 2. AFTER restore (from weekly-test): MUST shutdown to delete files USER INSIGHT: "Why shutdown if restore will clean anyway? But AFTER restore, you MUST shutdown to release file locks for deletion!" SOLUTION: Add /AFTER parameter to cleanup_database.ps1: WITHOUT /AFTER (before restore): - Skip SHUTDOWN ABORT - Skip Stop-Service - Leave service in current state (running/stopped) - Files CAN be deleted (no lock before restore) - Optimization: If service running → restore saves ~30s WITH /AFTER (after restore): - SHUTDOWN ABORT (stop instance) - Stop-Service (release file locks) - REQUIRED for file deletion after restore - Files are locked by active instance/service CALL SITES: 1. rman_restore: cleanup_database.ps1 /SILENT (no /AFTER) 2. weekly-test: cleanup_database.ps1 /SILENT /AFTER (with /AFTER) FLOW OPTIMIZATION: Test 1: Service stopped → start(30s) → restore → cleanup /AFTER Test 2: Service stopped → start(30s) → restore → cleanup /AFTER → No improvement yet BUT if we keep service running between tests: Test 1: Service stopped → start(30s) → restore → cleanup /AFTER Test 2: Service running → restore(0s saved!) → cleanup /AFTER → Save 30s on subsequent tests! Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- .../cleanup_database.ps1 | 73 +++++++++++++------ .../weekly-dr-test-proxmox.sh | 4 +- 2 files changed, 53 insertions(+), 24 deletions(-) diff --git a/oracle/standby-server-scripts/cleanup_database.ps1 b/oracle/standby-server-scripts/cleanup_database.ps1 index b798c82..526fa13 100644 --- a/oracle/standby-server-scripts/cleanup_database.ps1 +++ b/oracle/standby-server-scripts/cleanup_database.ps1 @@ -2,6 +2,10 @@ # Purpose: Remove all database files and services to restore DR VM to clean state # Run as: Administrator # Location: D:\oracle\scripts\cleanup_database.ps1 +# +# Parameters: +# /SILENT - Non-interactive mode +# /AFTER - Cleanup AFTER restore (shutdown instance + stop service before deleting files) $ErrorActionPreference = "Continue" @@ -21,8 +25,9 @@ Write-Host " 4. Delete trace files" Write-Host " 5. Leave VM in completely clean state (no service, no DB files)" Write-Host "" -# Check if running in non-interactive mode +# Check parameters $silent = $args -contains "/SILENT" -or $args -contains "/AUTO" +$afterRestore = $args -contains "/AFTER" if (-not $silent) { Write-Host "WARNING: This will DELETE the entire database!" -ForegroundColor Red @@ -35,32 +40,56 @@ New-Item -ItemType Directory -Path "D:\oracle\temp" -Force | Out-Null New-Item -ItemType Directory -Path "D:\oracle\logs" -Force | Out-Null Write-Host "" -Write-Host "[1/6] Shutting down database (if running)..." - -# Check if Oracle service exists -$service = Get-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue -if ($service) { - Write-Host " Oracle service found, attempting shutdown..." +if ($afterRestore) { + Write-Host "[1/6] Shutting down database (cleanup AFTER restore)..." - # Try to shutdown database using SQL*Plus with inline command - $shutdownSQL = "SHUTDOWN ABORT;`nEXIT;" - try { - $shutdownSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null - Start-Sleep -Seconds 2 - } catch { - Write-Host " Shutdown command sent (errors ignored)" + # Check if Oracle service exists + $service = Get-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue + if ($service) { + Write-Host " Oracle service found, attempting shutdown..." + + # Shutdown instance using SQL*Plus + $shutdownSQL = "WHENEVER SQLERROR CONTINUE`nSHUTDOWN ABORT;`nEXIT;" + try { + $shutdownSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null + Start-Sleep -Seconds 2 + Write-Host " Instance shut down" + } catch { + Write-Host " Shutdown command sent (errors ignored)" + } + + # Stop Oracle service to release file locks + if ($service.Status -eq "Running") { + Write-Host " Stopping Oracle service to release file locks..." + try { + Stop-Service -Name "OracleServiceROA" -Force -ErrorAction Stop + Start-Sleep -Seconds 2 + Write-Host " Service stopped" + } catch { + Write-Host " WARNING: Failed to stop service: $_" -ForegroundColor Yellow + } + } + + # Force kill any remaining Oracle processes + Get-Process -Name "sqlplus" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue + Get-Process -Name "oracle" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue + } else { + Write-Host " Oracle service not found, skipping shutdown" } - - # Force kill any Oracle processes - Get-Process -Name "sqlplus" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue - Get-Process -Name "oracle" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue + Start-Sleep -Seconds 2 } else { - Write-Host " Oracle service not found, skipping shutdown" + Write-Host "[1/6] Skipping instance shutdown (cleanup BEFORE restore)" + Write-Host " Instance and service left in current state" + Write-Host " Restore script will handle service state properly" } -Start-Sleep -Seconds 2 -Write-Host "[2/6] Oracle service cleanup skipped (service preserved for next test)" -Write-Host " Service will be reused for faster subsequent restores (~15s saved)" +Write-Host "[2/6] Oracle service preserved for reuse" +if ($afterRestore) { + Write-Host " Service stopped to release file locks" +} else { + Write-Host " Service remains in current state (running or stopped)" + Write-Host " Optimization: If running, restore saves ~30s startup time" +} Write-Host "[3/6] Deleting database files + SPFILE..." Write-Host " Deleting datafiles..." diff --git a/oracle/standby-server-scripts/weekly-dr-test-proxmox.sh b/oracle/standby-server-scripts/weekly-dr-test-proxmox.sh index fbdc616..b9cc586 100644 --- a/oracle/standby-server-scripts/weekly-dr-test-proxmox.sh +++ b/oracle/standby-server-scripts/weekly-dr-test-proxmox.sh @@ -465,12 +465,12 @@ run_dr_test() { restore_log="Restore log not available (file may not exist or was not generated)" fi - # Step 6: Cleanup + # Step 6: Cleanup (AFTER restore - stop service to release file locks) step_start=$(date +%s) log "STEP 6: Running cleanup" ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ - "powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\cleanup_database.ps1 /SILENT" 2>/dev/null + "powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\cleanup_database.ps1 /SILENT /AFTER" 2>/dev/null cleanup_freed=8 track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start"