Oracle DR: Add /AFTER parameter to cleanup - smart shutdown based on context

Critical fix based on user analysis:

PROBLEM:
Cleanup is called in 2 contexts with different requirements:
1. BEFORE restore (from rman_restore): Should NOT shutdown
2. AFTER restore (from weekly-test): MUST shutdown to delete files

USER INSIGHT:
"Why shutdown if restore will clean anyway? But AFTER restore,
you MUST shutdown to release file locks for deletion!"

SOLUTION:
Add /AFTER parameter to cleanup_database.ps1:

WITHOUT /AFTER (before restore):
- Skip SHUTDOWN ABORT
- Skip Stop-Service
- Leave service in current state (running/stopped)
- Files CAN be deleted (no lock before restore)
- Optimization: If service running → restore saves ~30s

WITH /AFTER (after restore):
- SHUTDOWN ABORT (stop instance)
- Stop-Service (release file locks)
- REQUIRED for file deletion after restore
- Files are locked by active instance/service

CALL SITES:
1. rman_restore: cleanup_database.ps1 /SILENT (no /AFTER)
2. weekly-test: cleanup_database.ps1 /SILENT /AFTER (with /AFTER)

FLOW OPTIMIZATION:
Test 1: Service stopped → start(30s) → restore → cleanup /AFTER
Test 2: Service stopped → start(30s) → restore → cleanup /AFTER
→ No improvement yet

BUT if we keep service running between tests:
Test 1: Service stopped → start(30s) → restore → cleanup /AFTER
Test 2: Service running → restore(0s saved!) → cleanup /AFTER
→ Save 30s on subsequent tests!

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
This commit is contained in:
Marius
2025-10-11 15:34:00 +03:00
parent 5af33fc217
commit f1002d6e4a
2 changed files with 53 additions and 24 deletions

View File

@@ -2,6 +2,10 @@
# Purpose: Remove all database files and services to restore DR VM to clean state # Purpose: Remove all database files and services to restore DR VM to clean state
# Run as: Administrator # Run as: Administrator
# Location: D:\oracle\scripts\cleanup_database.ps1 # Location: D:\oracle\scripts\cleanup_database.ps1
#
# Parameters:
# /SILENT - Non-interactive mode
# /AFTER - Cleanup AFTER restore (shutdown instance + stop service before deleting files)
$ErrorActionPreference = "Continue" $ErrorActionPreference = "Continue"
@@ -21,8 +25,9 @@ Write-Host " 4. Delete trace files"
Write-Host " 5. Leave VM in completely clean state (no service, no DB files)" Write-Host " 5. Leave VM in completely clean state (no service, no DB files)"
Write-Host "" Write-Host ""
# Check if running in non-interactive mode # Check parameters
$silent = $args -contains "/SILENT" -or $args -contains "/AUTO" $silent = $args -contains "/SILENT" -or $args -contains "/AUTO"
$afterRestore = $args -contains "/AFTER"
if (-not $silent) { if (-not $silent) {
Write-Host "WARNING: This will DELETE the entire database!" -ForegroundColor Red Write-Host "WARNING: This will DELETE the entire database!" -ForegroundColor Red
@@ -35,32 +40,56 @@ New-Item -ItemType Directory -Path "D:\oracle\temp" -Force | Out-Null
New-Item -ItemType Directory -Path "D:\oracle\logs" -Force | Out-Null New-Item -ItemType Directory -Path "D:\oracle\logs" -Force | Out-Null
Write-Host "" Write-Host ""
Write-Host "[1/6] Shutting down database (if running)..." if ($afterRestore) {
Write-Host "[1/6] Shutting down database (cleanup AFTER restore)..."
# Check if Oracle service exists # Check if Oracle service exists
$service = Get-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue $service = Get-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue
if ($service) { if ($service) {
Write-Host " Oracle service found, attempting shutdown..." Write-Host " Oracle service found, attempting shutdown..."
# Try to shutdown database using SQL*Plus with inline command # Shutdown instance using SQL*Plus
$shutdownSQL = "SHUTDOWN ABORT;`nEXIT;" $shutdownSQL = "WHENEVER SQLERROR CONTINUE`nSHUTDOWN ABORT;`nEXIT;"
try { try {
$shutdownSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null $shutdownSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
Start-Sleep -Seconds 2 Start-Sleep -Seconds 2
} catch { Write-Host " Instance shut down"
Write-Host " Shutdown command sent (errors ignored)" } catch {
Write-Host " Shutdown command sent (errors ignored)"
}
# Stop Oracle service to release file locks
if ($service.Status -eq "Running") {
Write-Host " Stopping Oracle service to release file locks..."
try {
Stop-Service -Name "OracleServiceROA" -Force -ErrorAction Stop
Start-Sleep -Seconds 2
Write-Host " Service stopped"
} catch {
Write-Host " WARNING: Failed to stop service: $_" -ForegroundColor Yellow
}
}
# Force kill any remaining Oracle processes
Get-Process -Name "sqlplus" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
Get-Process -Name "oracle" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
} else {
Write-Host " Oracle service not found, skipping shutdown"
} }
Start-Sleep -Seconds 2
# Force kill any Oracle processes
Get-Process -Name "sqlplus" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
Get-Process -Name "oracle" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
} else { } else {
Write-Host " Oracle service not found, skipping shutdown" Write-Host "[1/6] Skipping instance shutdown (cleanup BEFORE restore)"
Write-Host " Instance and service left in current state"
Write-Host " Restore script will handle service state properly"
} }
Start-Sleep -Seconds 2
Write-Host "[2/6] Oracle service cleanup skipped (service preserved for next test)" Write-Host "[2/6] Oracle service preserved for reuse"
Write-Host " Service will be reused for faster subsequent restores (~15s saved)" if ($afterRestore) {
Write-Host " Service stopped to release file locks"
} else {
Write-Host " Service remains in current state (running or stopped)"
Write-Host " Optimization: If running, restore saves ~30s startup time"
}
Write-Host "[3/6] Deleting database files + SPFILE..." Write-Host "[3/6] Deleting database files + SPFILE..."
Write-Host " Deleting datafiles..." Write-Host " Deleting datafiles..."

View File

@@ -465,12 +465,12 @@ run_dr_test() {
restore_log="Restore log not available (file may not exist or was not generated)" restore_log="Restore log not available (file may not exist or was not generated)"
fi fi
# Step 6: Cleanup # Step 6: Cleanup (AFTER restore - stop service to release file locks)
step_start=$(date +%s) step_start=$(date +%s)
log "STEP 6: Running cleanup" log "STEP 6: Running cleanup"
ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \ ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
"powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\cleanup_database.ps1 /SILENT" 2>/dev/null "powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\cleanup_database.ps1 /SILENT /AFTER" 2>/dev/null
cleanup_freed=8 cleanup_freed=8
track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start" track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start"