docs(dr): document failback DR -> PRIMARY procedure + restore script

Adds end-to-end procedure for moving production back from DR (10.0.20.37)
to a repaired/reinstalled PRIMARY (10.0.20.36): final RMAN backup on DR
in restricted/read-only mode, RMAN restore on PRIMARY, app connection
switch, scheduled-task reactivation, VM 109 stop. Companion PowerShell
script handles the restore with sanity checks (IP, NFS, backup freshness)
and aborts if Oracle major version != 19, since failback to 21c would
need an extra dictionary upgrade step (~30-60 min) that adds untested
risk during the critical window — recommended path is 19c failback then
upgrade later in a planned window.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-25 20:18:09 +00:00
parent 2ee51c7318
commit 8846c9c855
3 changed files with 830 additions and 1 deletions

View File

@@ -0,0 +1,452 @@
# RMAN Restore to PRIMARY (failback procedure)
# Restores ROA database on a freshly-installed/reinstalled PRIMARY server (10.0.20.36)
# from backups on F:\ (NFS mount from Proxmox host).
#
# This is the FAILBACK companion to rman_restore_from_zero.ps1:
# - rman_restore_from_zero.ps1: PRIMARY -> DR (during disaster activation)
# - rman_restore_to_primary.ps1 (this): DR -> PRIMARY (after PRIMARY repaired)
#
# See docs/FAILBACK_PROCEDURE.md for the full procedure.
#
# Run as: Administrator on PRIMARY (10.0.20.36)
# Location: D:\oracle\scripts\rman_restore_to_primary.ps1
#
# !!! IMPORTANT — ORACLE VERSION !!!
# This script REQUIRES Oracle Database 19c on PRIMARY and aborts otherwise.
# Backups are 19.3 with compatible=19.0.0. Restoring onto 19c is the path tested
# weekly on DR — same flow, no upgrade step.
#
# Technically 21c can RMAN-restore 19c backups, BUT the database then needs
# STARTUP UPGRADE + dbupgrade (catctl.pl) before OPEN — adds 30-60 min and an
# untested upgrade-during-failback risk. For a calm planned migration to 21c,
# do failback to 19c first, then upgrade in a separate window.
# Use installer WINDOWS.X64_193000_db_home.zip.
#
# Prerequisites:
# 1. Final RMAN backup taken on DR (10.0.20.37) and visible on F:\ROA\autobackup
# 2. Oracle 19c installed on PRIMARY at the same path as on DR (NOT 21c!)
# 3. ORACLE_SID=ROA, no demo DB present (if any, run cleanup_database.ps1 first)
# 4. NFS mount: mount -o anon,nolock,mtype=hard,timeout=60 10.0.20.202:/mnt/pve/oracle-backups F:
#
# Parameters:
# -SkipCleanup: Skip the destructive cleanup step (use only if PRIMARY is already clean)
# -DryRun: Print actions but do not execute (validate config before real run)
param(
[switch]$SkipCleanup,
[switch]$DryRun
)
$ErrorActionPreference = "Continue"
$env:ORACLE_HOME = "C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home"
$env:ORACLE_SID = "ROA"
$env:PATH = "$env:ORACLE_HOME\bin;$env:PATH"
$DBID = "1363569330"
$LISTENER_IP = "10.0.20.36"
$LISTENER_PORT = "1521"
$SERVICE_NAME = "ROA"
Write-Host "============================================"
Write-Host "RMAN Restore TO PRIMARY (Failback)"
Write-Host "============================================"
Write-Host ""
Write-Host "Target: PRIMARY $LISTENER_IP"
Write-Host "Database: $SERVICE_NAME"
Write-Host "DBID: $DBID"
Write-Host "Backups: F:\ROA\autobackup (NFS from Proxmox)"
Write-Host "DryRun: $DryRun"
Write-Host ""
# Sanity: confirm we're running on PRIMARY, not on DR
$myIP = (Get-NetIPAddress -AddressFamily IPv4 | Where-Object { $_.IPAddress -like "10.0.20.*" }).IPAddress
if ($myIP -eq "10.0.20.37") {
Write-Host "ERROR: This script is for PRIMARY (10.0.20.36), but I'm running on DR (10.0.20.37)" -ForegroundColor Red
Write-Host "Did you mean to run rman_restore_from_zero.ps1 instead?"
exit 1
}
if ($myIP -ne $LISTENER_IP) {
Write-Host "WARNING: My IP is $myIP, expected $LISTENER_IP" -ForegroundColor Yellow
Write-Host "Continue anyway? (Ctrl+C to abort, Enter to continue)"
if (-not $DryRun) { Read-Host }
}
# Verify Oracle version is 19c — RMAN backups are 19.3 / compatible=19.0.0 and
# CANNOT be restored on a 21c+ instance (datafile header version mismatch).
Write-Host "[CHECK] Verifying Oracle version on PRIMARY..."
$sqlplusBin = Join-Path $env:ORACLE_HOME "bin\sqlplus.exe"
if (-not (Test-Path $sqlplusBin)) {
Write-Host "ERROR: sqlplus.exe not found at $sqlplusBin" -ForegroundColor Red
Write-Host " Is Oracle Database 19c installed at ORACLE_HOME=$env:ORACLE_HOME ?"
exit 1
}
$versionOutput = & $sqlplusBin -V 2>&1 | Out-String
# Expected: "SQL*Plus: Release 19.0.0.0.0 ..."
if ($versionOutput -match "Release\s+(\d+)\.(\d+)") {
$majorVersion = [int]$Matches[1]
Write-Host "[CHECK] Detected Oracle major version: $majorVersion"
if ($majorVersion -ne 19) {
Write-Host ""
Write-Host "ERROR: Oracle major version is $majorVersion, this script supports only 19c." -ForegroundColor Red
Write-Host " Backups are 19.3 (compatible=19.0.0)." -ForegroundColor Red
Write-Host ""
Write-Host " 21c CAN technically restore 19c backups, but requires an extra" -ForegroundColor Yellow
Write-Host " STARTUP UPGRADE + dbupgrade step (~30-60 min) which this script" -ForegroundColor Yellow
Write-Host " does NOT perform. Doing a cross-version upgrade during a failback" -ForegroundColor Yellow
Write-Host " is risky — recommended path is: install 19c, failback, then upgrade" -ForegroundColor Yellow
Write-Host " to 21c later in a planned window (DBUA or Data Pump)." -ForegroundColor Yellow
Write-Host ""
Write-Host " Install Oracle Database 19c (WINDOWS.X64_193000_db_home.zip) and re-run." -ForegroundColor Yellow
exit 1
}
Write-Host "[OK] Oracle 19c confirmed"
} else {
Write-Host "WARNING: Could not parse Oracle version from sqlplus output:" -ForegroundColor Yellow
Write-Host $versionOutput
Write-Host "Continue anyway? (yes/no)"
$confirm = Read-Host
if ($confirm -ne "yes") { exit 1 }
}
# Verify NFS mount
if (-not (Test-Path "F:\ROA\autobackup")) {
Write-Host "ERROR: F:\ROA\autobackup not accessible!" -ForegroundColor Red
Write-Host ""
Write-Host "Mount NFS first:"
Write-Host " mount -o anon,nolock,mtype=hard,timeout=60 10.0.20.202:/mnt/pve/oracle-backups F:"
exit 1
}
Write-Host "[OK] F:\ROA\autobackup is accessible"
# Verify backup freshness — warn if newest backup is > 2 hours old
$newest = Get-ChildItem "F:\ROA\autobackup\*.BKP" -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending | Select-Object -First 1
if ($newest) {
$ageHours = ((Get-Date) - $newest.LastWriteTime).TotalHours
Write-Host "[INFO] Newest backup: $($newest.Name) ($([math]::Round($ageHours,1))h old)"
if ($ageHours -gt 2) {
Write-Host "WARNING: Newest backup is more than 2 hours old." -ForegroundColor Yellow
Write-Host " Did you take a final backup on DR before starting failback?"
Write-Host " (See FAILBACK_PROCEDURE.md Pas 3-4)"
if (-not $DryRun) {
$confirm = Read-Host "Continue anyway? (yes/no)"
if ($confirm -ne "yes") { exit 1 }
}
}
} else {
Write-Host "ERROR: No .BKP files found in F:\ROA\autobackup" -ForegroundColor Red
exit 1
}
# Create local directories
New-Item -ItemType Directory -Path "D:\oracle\temp" -Force -ErrorAction SilentlyContinue | Out-Null
New-Item -ItemType Directory -Path "D:\oracle\logs" -Force -ErrorAction SilentlyContinue | Out-Null
New-Item -ItemType Directory -Path "C:\Users\oracle\oradata\ROA" -Force -ErrorAction SilentlyContinue | Out-Null
New-Item -ItemType Directory -Path "C:\Users\oracle\recovery_area\ROA" -Force -ErrorAction SilentlyContinue | Out-Null
New-Item -ItemType Directory -Path "C:\Users\oracle\admin\ROA\adump" -Force -ErrorAction SilentlyContinue | Out-Null
New-Item -ItemType Directory -Path "C:\Users\oracle\admin\ROA\pfile" -Force -ErrorAction SilentlyContinue | Out-Null
if ($DryRun) {
Write-Host ""
Write-Host "[DRYRUN] Would proceed to STEP 1 (cleanup), STEP 2 (restore), STEP 3 (configure listener)."
Write-Host "[DRYRUN] No changes made. Re-run without -DryRun to execute."
exit 0
}
# ============================================
# STEP 1: CLEANUP — delete any pre-existing DB
# ============================================
if (-not $SkipCleanup) {
Write-Host ""
Write-Host "============================================"
Write-Host "STEP 1: CLEANUP — remove any existing DB"
Write-Host "============================================"
if (Test-Path "D:\oracle\scripts\cleanup_database.ps1") {
& "D:\oracle\scripts\cleanup_database.ps1" /SILENT
if ($LASTEXITCODE -ne 0) {
Write-Host "ERROR: Cleanup failed!" -ForegroundColor Red
exit 1
}
} else {
Write-Host "WARNING: cleanup_database.ps1 not found, doing minimal cleanup..." -ForegroundColor Yellow
$svc = Get-Service -Name "OracleService$SERVICE_NAME" -ErrorAction SilentlyContinue
if ($svc) {
Stop-Service -Name "OracleService$SERVICE_NAME" -Force -ErrorAction SilentlyContinue
& oradim -delete -sid $SERVICE_NAME 2>&1 | Out-Null
}
Get-ChildItem "C:\Users\oracle\oradata\ROA" -ErrorAction SilentlyContinue | Remove-Item -Recurse -Force -ErrorAction SilentlyContinue
Get-ChildItem "C:\Users\oracle\recovery_area\ROA" -ErrorAction SilentlyContinue | Remove-Item -Recurse -Force -ErrorAction SilentlyContinue
}
Write-Host "[OK] Cleanup complete"
} else {
Write-Host "[SKIP] Cleanup skipped (-SkipCleanup)"
}
# ============================================
# STEP 2: RESTORE
# ============================================
Write-Host ""
Write-Host "============================================"
Write-Host "STEP 2: RESTORE from F:\ backups"
Write-Host "============================================"
# Step 2.1: Create PFILE
$pfilePath = "C:\Users\oracle\admin\ROA\pfile\initROA.ora"
$pfileContent = @"
# Initialization parameters for ROA PRIMARY (failback)
# Auto-generated by rman_restore_to_primary.ps1 $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')
db_name=ROA
db_unique_name=ROA
memory_target=2048M
memory_max_target=2048M
control_files=('C:\Users\oracle\oradata\ROA\control01.ctl', 'C:\Users\oracle\recovery_area\ROA\control02.ctl')
db_recovery_file_dest='C:\Users\oracle\recovery_area'
db_recovery_file_dest_size=100G
audit_file_dest='C:\Users\oracle\admin\ROA\adump'
log_archive_format=%t_%s_%r.dbf
compatible=19.0.0
nls_language=AMERICAN
nls_territory=AMERICA
processes=300
sessions=472
diagnostic_dest='C:\Users\oracle'
"@
$pfileContent | Out-File -FilePath $pfilePath -Encoding ASCII
Write-Host "[OK] PFILE created at $pfilePath"
# Step 2.2: Create Oracle service
Write-Host "[2.2] Creating Oracle service $SERVICE_NAME..."
$svc = Get-Service -Name "OracleService$SERVICE_NAME" -ErrorAction SilentlyContinue
if (-not $svc) {
& oradim -new -sid $SERVICE_NAME -startmode auto -pfile $pfilePath 2>&1 | Out-Null
if ($LASTEXITCODE -ne 0) {
Write-Host "ERROR: oradim -new failed" -ForegroundColor Red
exit 1
}
Write-Host "[OK] Service OracleService$SERVICE_NAME created"
Start-Sleep -Seconds 3
} else {
Write-Host "[OK] Service OracleService$SERVICE_NAME already exists"
}
# Step 2.3: Startup NOMOUNT
Write-Host "[2.3] Starting database NOMOUNT..."
@"
WHENEVER SQLERROR CONTINUE
SHUTDOWN ABORT;
EXIT;
"@ | & sqlplus -S / as sysdba 2>&1 | Out-Null
@"
STARTUP NOMOUNT PFILE='$pfilePath';
EXIT;
"@ | & sqlplus -S / as sysdba 2>&1 | Out-Null
if ($LASTEXITCODE -ne 0) {
Write-Host "ERROR: STARTUP NOMOUNT failed" -ForegroundColor Red
exit 1
}
Write-Host "[OK] Database in NOMOUNT mode"
# Step 2.4: Copy ALL backups from F:\ to local recovery area (failback uses ALL, not test subset)
Write-Host "[2.4] Copying ALL backups from F:\ROA\autobackup to local recovery area..."
$backupFiles = Get-ChildItem "F:\ROA\autobackup\*.BKP" -ErrorAction Continue
if ($backupFiles.Count -lt 2) {
Write-Host "ERROR: Insufficient backups (found $($backupFiles.Count))" -ForegroundColor Red
exit 1
}
Write-Host "[INFO] Copying $($backupFiles.Count) files (~$([math]::Round(($backupFiles | Measure-Object -Property Length -Sum).Sum / 1GB, 2)) GB)"
$copyErrors = @()
foreach ($f in $backupFiles) {
try {
Copy-Item $f.FullName "C:\Users\oracle\recovery_area\ROA\autobackup\" -Force -ErrorAction Stop
} catch {
$copyErrors += "$($f.Name): $_"
}
}
if ($copyErrors.Count -gt 0) {
Write-Host "ERROR: $($copyErrors.Count) copy failures:" -ForegroundColor Red
$copyErrors | ForEach-Object { Write-Host " $_" -ForegroundColor Red }
exit 1
}
Write-Host "[OK] All backups copied"
# Step 2.5: RMAN restore
$rmanScript = "D:\oracle\temp\restore_to_primary.rman"
$logFile = "D:\oracle\logs\restore_to_primary.log"
# Note: RECOVER DATABASE (without NOREDO) — apply all archivelogs through final SCN.
# RECOVER DATABASE NOREDO is for the DR test path where archivelogs may be stale.
# For failback we want every committed transaction up to the final backup.
$rmanContent = @"
SET DBID $DBID;
RUN {
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
RESTORE CONTROLFILE FROM AUTOBACKUP;
RELEASE CHANNEL ch1;
}
ALTER DATABASE MOUNT;
CATALOG START WITH 'C:/USERS/ORACLE/RECOVERY_AREA/ROA/AUTOBACKUP' NOPROMPT;
CROSSCHECK BACKUP;
CROSSCHECK ARCHIVELOG ALL;
DELETE NOPROMPT EXPIRED BACKUP;
RUN {
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
ALLOCATE CHANNEL ch2 DEVICE TYPE DISK;
RESTORE DATABASE;
RECOVER DATABASE;
RELEASE CHANNEL ch1;
RELEASE CHANNEL ch2;
}
ALTER DATABASE OPEN RESETLOGS;
LIST INCARNATION;
DELETE NOPROMPT OBSOLETE;
EXIT;
"@
$rmanContent | Out-File -FilePath $rmanScript -Encoding ASCII
Write-Host "[2.5] Running RMAN restore (10-25 min)..."
Write-Host " Log: $logFile"
& rman target / cmdfile=$rmanScript log=$logFile
if ($LASTEXITCODE -ne 0) {
Write-Host "ERROR: RMAN restore failed. Last 40 lines of log:" -ForegroundColor Red
Get-Content $logFile -Tail 40
exit 1
}
Write-Host "[OK] RMAN restore complete"
# ============================================
# STEP 3: CONFIGURE — SPFILE, listener, register
# ============================================
Write-Host ""
Write-Host "============================================"
Write-Host "STEP 3: CONFIGURE listener and SPFILE"
Write-Host "============================================"
# Step 3.1: SPFILE
Write-Host "[3.1] Creating SPFILE..."
@"
CREATE SPFILE FROM PFILE='$pfilePath';
EXIT;
"@ | & sqlplus -S / as sysdba 2>&1 | Out-Null
# Step 3.2: Recreate service to use SPFILE
Write-Host "[3.2] Reconfiguring service to use SPFILE..."
@"
SHUTDOWN IMMEDIATE;
EXIT;
"@ | & sqlplus -S / as sysdba 2>&1 | Out-Null
Start-Sleep -Seconds 3
& oradim -delete -sid $SERVICE_NAME 2>&1 | Out-Null
Start-Sleep -Seconds 2
& oradim -new -sid $SERVICE_NAME -startmode auto -spfile 2>&1 | Out-Null
# Step 3.3: Startup with SPFILE
@"
STARTUP;
EXIT;
"@ | & sqlplus -S / as sysdba 2>&1 | Out-Null
Start-Sleep -Seconds 3
Write-Host "[OK] Database started with SPFILE"
# Step 3.4: Configure listener.ora to bind on PRIMARY IP
Write-Host "[3.4] Configuring listener for $LISTENER_IP`:$LISTENER_PORT..."
$listenerOra = "$env:ORACLE_HOME\network\admin\listener.ora"
$listenerContent = @"
LISTENER =
(DESCRIPTION_LIST =
(DESCRIPTION =
(ADDRESS = (PROTOCOL = TCP)(HOST = $LISTENER_IP)(PORT = $LISTENER_PORT))
(ADDRESS = (PROTOCOL = IPC)(KEY = EXTPROC1521))
)
)
SID_LIST_LISTENER =
(SID_LIST =
(SID_DESC =
(GLOBAL_DBNAME = $SERVICE_NAME)
(ORACLE_HOME = $env:ORACLE_HOME)
(SID_NAME = $SERVICE_NAME)
)
)
"@
$listenerContent | Out-File -FilePath $listenerOra -Encoding ASCII
Set-Service -Name "OracleOraDB19Home1TNSListener" -StartupType Automatic -ErrorAction SilentlyContinue
Restart-Service -Name "OracleOraDB19Home1TNSListener" -Force -ErrorAction SilentlyContinue
Start-Sleep -Seconds 3
# Step 3.5: Register DB with listener
@"
ALTER SYSTEM SET LOCAL_LISTENER='(ADDRESS=(PROTOCOL=TCP)(HOST=$LISTENER_IP)(PORT=$LISTENER_PORT))' SCOPE=BOTH;
ALTER SYSTEM REGISTER;
EXIT;
"@ | & sqlplus -S / as sysdba 2>&1 | Out-Null
Write-Host "[OK] Database registered with listener"
# ============================================
# STEP 4: VERIFY
# ============================================
Write-Host ""
Write-Host "============================================"
Write-Host "STEP 4: VERIFY"
Write-Host "============================================"
@"
SET PAGESIZE 100 LINESIZE 200
COLUMN info FORMAT A100
SELECT 'DB: ' || NAME || ' | OPEN_MODE: ' || OPEN_MODE || ' | DBID: ' || DBID || ' | SCN: ' || CURRENT_SCN AS info FROM V`$DATABASE;
SELECT 'INSTANCE: ' || INSTANCE_NAME || ' | STATUS: ' || STATUS AS info FROM V`$INSTANCE;
SELECT 'LISTENER_HOST: ' || NETWORK_NAME || ' | SERVICE: ' || NAME AS info FROM V`$ACTIVE_SERVICES WHERE NAME = '$SERVICE_NAME';
SELECT 'TABLESPACES: ' || COUNT(*) AS info FROM DBA_TABLESPACES;
SELECT 'DATAFILES: ' || COUNT(*) AS info FROM DBA_DATA_FILES;
SELECT 'USER_TABLES: ' || COUNT(*) AS info FROM DBA_TABLES
WHERE OWNER NOT IN ('SYS','SYSTEM','XDB','GSMADMIN_INTERNAL','APPQOSSYS','OUTLN','DBSNMP','WMSYS','OLAPSYS','MDSYS','CTXSYS','EXFSYS','ORDSYS','LBACSYS');
EXIT;
"@ | & sqlplus -S / as sysdba
# Listener status
Write-Host ""
Write-Host "[4.1] Listener status:"
& lsnrctl status
Write-Host ""
Write-Host "============================================"
Write-Host "Failback restore COMPLETE"
Write-Host "============================================"
Write-Host ""
Write-Host "Restore log: $logFile"
Write-Host ""
Write-Host "Verify SCN above is >= the SCN you noted on DR after final archive log switch."
Write-Host ""
Write-Host "Next steps (manual):"
Write-Host " 1. Test app connectivity: sqlplus user/pass@$LISTENER_IP`:$LISTENER_PORT/$SERVICE_NAME"
Write-Host " 2. Update app connection strings: 10.0.20.37 -> $LISTENER_IP"
Write-Host " 3. Re-enable scheduled tasks: rman_backup.bat, rman_backup_incremental.bat, transfer_backups.ps1"
Write-Host " 4. Stop DR VM: ssh root@10.0.20.203 'qm stop 109'"
Write-Host " 5. RMAN catalog: open 'rman target /' and run RESET DATABASE TO INCARNATION <new>"
Write-Host ""
exit 0