Files
ROMFASTSQL/oracle/standby-server-scripts/cleanup_database.ps1
Marius b4c2a24281 Fix Oracle DR test ORA-00600 error by forcing service shutdown in cleanup
Problem: DR weekly test failed with ORA-00600 [kcbzib_kcrsds_1] when executed
via cron, but succeeded when run manually. Error occurred during "ALTER DATABASE
OPEN RESETLOGS" step after successful restore and recovery.

Root Cause Analysis:
- Manual test (12:09): Undo initialization = 0ms, no errors
- Cron test (10:45): Undo initialization = 2735ms, ORA-00600 crash
- Alert log showed: "Undo initialization recovery: err:600"
- Oracle instance was in inconsistent state from previous run

The cleanup_database.ps1 script had an "optimization" that preserved the
running Oracle service to "save ~30s startup time". This left the service
in an inconsistent state between test runs, causing Oracle to crash when
attempting to open the database with RESETLOGS.

Solution:
Modified cleanup_database.ps1 to ALWAYS stop Oracle service completely:
1. SHUTDOWN ABORT the instance (not just when /AFTER flag)
2. Stop-Service OracleServiceROA (force clean state)
3. Kill remaining oracle processes
4. Service starts fresh during restore (clean Undo initialization)

Changes:
- Removed if/else branch that skipped shutdown before restore
- Always perform full shutdown regardless of /AFTER parameter
- Updated messages to reflect clean state approach
- Added explanation: "This ensures no state inconsistencies (prevents ORA-00600)"

Testing: Manual test confirmed clean 0ms Undo initialization after fix.

Related: Works in conjunction with weekly-dr-test-proxmox.sh PATH fix (commit 34f91ba)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-06 12:25:38 +02:00

134 lines
5.7 KiB
PowerShell

# Oracle Database Complete Cleanup Script (PowerShell)
# Purpose: Remove all database files and services to restore DR VM to clean state
# Run as: Administrator
# Location: D:\oracle\scripts\cleanup_database.ps1
#
# Parameters:
# /SILENT - Non-interactive mode
# /AFTER - Cleanup AFTER restore (shutdown instance + stop service before deleting files)
$ErrorActionPreference = "Continue"
$env:ORACLE_HOME = "C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home"
$env:ORACLE_SID = "ROA"
$env:PATH = "$env:ORACLE_HOME\bin;$env:PATH"
Write-Host "============================================"
Write-Host "Oracle Database Cleanup Script"
Write-Host "============================================"
Write-Host ""
Write-Host "This script will:"
Write-Host " 1. Stop and delete Oracle service"
Write-Host " 2. Delete all database files (datafiles, control files, redo logs)"
Write-Host " 3. Delete local FRA (backups are on F:\, safe to delete)"
Write-Host " 4. Delete trace files"
Write-Host " 5. Leave VM in completely clean state (no service, no DB files)"
Write-Host ""
# Check parameters
$silent = $args -contains "/SILENT" -or $args -contains "/AUTO"
$afterRestore = $args -contains "/AFTER"
if (-not $silent) {
Write-Host "WARNING: This will DELETE the entire database!" -ForegroundColor Red
Write-Host "Starting cleanup in 3 seconds... (Press Ctrl+C to cancel)"
Start-Sleep -Seconds 3
}
# Create directories
New-Item -ItemType Directory -Path "D:\oracle\temp" -Force | Out-Null
New-Item -ItemType Directory -Path "D:\oracle\logs" -Force | Out-Null
Write-Host ""
Write-Host "[1/6] Shutting down database and stopping service..."
# Check if Oracle service exists
$service = Get-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue
if ($service) {
Write-Host " Oracle service found, ensuring clean shutdown..."
# Shutdown instance using SQL*Plus (always, not just /AFTER)
$shutdownSQL = "WHENEVER SQLERROR CONTINUE`nSHUTDOWN ABORT;`nEXIT;"
try {
$shutdownSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
Start-Sleep -Seconds 2
Write-Host " Instance shut down (ABORT for fast cleanup)"
} catch {
Write-Host " Shutdown command sent (errors ignored)"
}
# ALWAYS stop Oracle service to ensure clean state
if ($service.Status -eq "Running") {
Write-Host " Stopping Oracle service to ensure clean state..."
try {
Stop-Service -Name "OracleServiceROA" -Force -ErrorAction Stop
Start-Sleep -Seconds 3
Write-Host " Service stopped successfully"
} catch {
Write-Host " WARNING: Failed to stop service: $_" -ForegroundColor Yellow
}
} else {
Write-Host " Service already stopped"
}
# Force kill any remaining Oracle processes to ensure clean state
Write-Host " Cleaning up any remaining Oracle processes..."
Get-Process -Name "sqlplus" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
Get-Process -Name "oracle" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
Start-Sleep -Seconds 2
Write-Host " All Oracle processes terminated"
} else {
Write-Host " Oracle service not found, will be created during restore"
}
Write-Host "[2/6] Oracle service stopped (clean state for restore)"
Write-Host " Service will be started fresh during restore"
Write-Host " This ensures no state inconsistencies (prevents ORA-00600)"
Write-Host "[3/6] Deleting database files + SPFILE..."
Write-Host " Deleting datafiles..."
Remove-Item "C:\Users\oracle\oradata\ROA\*.dbf" -Force -ErrorAction SilentlyContinue
Write-Host " Deleting control files..."
Remove-Item "C:\Users\oracle\oradata\ROA\*.ctl" -Force -ErrorAction SilentlyContinue
Write-Host " Deleting redo logs..."
Remove-Item "C:\Users\oracle\oradata\ROA\*.log" -Force -ErrorAction SilentlyContinue
Write-Host " Deleting SPFILE (ensures PFILE-based startup)..."
Remove-Item "$env:ORACLE_HOME\database\SPFILE*.ORA" -Force -ErrorAction SilentlyContinue
Write-Host "[4/6] Deleting local FRA (backups are on F:\)..."
if (Test-Path "C:\Users\oracle\recovery_area\ROA") {
Remove-Item "C:\Users\oracle\recovery_area\ROA" -Recurse -Force -ErrorAction SilentlyContinue
New-Item -ItemType Directory -Path "C:\Users\oracle\recovery_area\ROA" -Force | Out-Null
Write-Host " FRA cleared"
} else {
New-Item -ItemType Directory -Path "C:\Users\oracle\recovery_area\ROA" -Force | Out-Null
Write-Host " FRA directory created"
}
Write-Host "[5/6] Deleting trace files (to save space)..."
Remove-Item "C:\Users\oracle\diag\rdbms\roa\ROA\trace\*.trc" -Force -ErrorAction SilentlyContinue
Remove-Item "C:\Users\oracle\diag\rdbms\roa\ROA\trace\*.trm" -Force -ErrorAction SilentlyContinue
Write-Host " Trace files deleted"
Write-Host ""
Write-Host "============================================"
Write-Host "Database Cleanup Complete!"
Write-Host "============================================"
Write-Host ""
Write-Host "Current state:"
Write-Host " [YES] Oracle software installed"
Write-Host " [YES] PFILE exists (C:\Users\oracle\admin\ROA\pfile\initROA.ora)"
Write-Host " [YES] Oracle service (STOPPED for clean restore)"
Write-Host " [NO] SPFILE (deleted to ensure PFILE startup)"
Write-Host " [NO] Database files (will be restored from backups)"
Write-Host " [NO] Control files (will be restored from backups)"
Write-Host " [NO] Datafiles (will be restored from backups)"
Write-Host ""
Write-Host "VM is now in CLEAN STATE (service stopped, ready for fresh start)!"
Write-Host ""
Write-Host "Next step: Run D:\oracle\scripts\rman_restore_from_zero.ps1"
Write-Host " (It will start the service fresh and restore the database)"
Write-Host ""
exit 0