Reorganize oracle/ and chatbot/ into proxmox/ per LXC/VM structure
- Move oracle/migration-scripts/ to proxmox/lxc108-oracle/migration/ - Move oracle/roa/ and oracle/roa-romconstruct/ to proxmox/lxc108-oracle/sql/ - Move oracle/standby-server-scripts/ to proxmox/vm109-windows-dr/ - Move chatbot/ to proxmox/lxc104-flowise/ - Update proxmox/README.md with new structure and navigation - Update all documentation with correct directory references - Remove unused input/claude-agent-sdk/ files Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
565
proxmox/vm109-windows-dr/README.md
Normal file
565
proxmox/vm109-windows-dr/README.md
Normal file
@@ -0,0 +1,565 @@
|
||||
# VM 109 - Oracle DR System (Windows Standby)
|
||||
|
||||
**Director Proxmox:** `proxmox/vm109-windows-dr/`
|
||||
**VMID:** 109
|
||||
**Rol:** Disaster Recovery pentru Oracle Database (backup RMAN de pe server Windows extern)
|
||||
|
||||
---
|
||||
|
||||
# 🛡️ Oracle DR System - Complete Architecture
|
||||
|
||||
## 📊 System Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PRODUCTION ENVIRONMENT │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ PRIMARY SERVER (10.0.20.36) │
|
||||
│ Windows Server + Oracle 19c │
|
||||
│ ┌──────────────────────────────┐ │
|
||||
│ │ Database: ROA │ │
|
||||
│ │ Size: ~80 GB │ │
|
||||
│ │ Tables: 42,625 │ │
|
||||
│ └──────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ Backups (Daily) │
|
||||
│ ┌──────────────────────────────┐ │
|
||||
│ │ 02:30 - FULL backup (6-7 GB) │ │
|
||||
│ │ 13:00 - CUMULATIVE (200 MB) │ │
|
||||
│ │ 18:00 - CUMULATIVE (300 MB) │ │
|
||||
│ └──────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
│ SSH Transfer (Port 22)
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ DR ENVIRONMENT │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ PROXMOX HOST (10.0.20.202 - pveelite) │
|
||||
│ ┌──────────────────────────────┐ │
|
||||
│ │ Backup Storage (NFS Server) │◄─────── Monitoring Scripts │
|
||||
│ │ /mnt/pve/oracle-backups/ │ /opt/scripts/ │
|
||||
│ │ └── ROA/autobackup/ │ │
|
||||
│ └──────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ │ NFS Mount (F:\) │
|
||||
│ ▼ │
|
||||
│ ┌──────────────────────────────┐ │
|
||||
│ │ DR VM 109 (10.0.20.37) │ │
|
||||
│ │ Windows Server + Oracle 19c │ │
|
||||
│ │ Status: OFF (normally) │ │
|
||||
│ │ Starts for: Tests or Disaster │ │
|
||||
│ └──────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## 🎯 Quick Actions
|
||||
|
||||
### ⚡ Emergency DR Activation (Production Down!)
|
||||
|
||||
```bash
|
||||
# 1. Start DR VM
|
||||
ssh root@10.0.20.202 "qm start 109"
|
||||
|
||||
# 2. Connect to VM (wait 3 min for boot)
|
||||
ssh -p 22122 romfast@10.0.20.37
|
||||
|
||||
# 3. Run restore (takes ~10-15 minutes)
|
||||
D:\oracle\scripts\rman_restore_from_zero.cmd
|
||||
|
||||
# 4. Database is now RUNNING - Update app connections to 10.0.20.37
|
||||
```
|
||||
|
||||
### 🧪 Weekly Test (Every Saturday)
|
||||
|
||||
```bash
|
||||
# Automatic at 06:00 via cron, or manual:
|
||||
ssh root@10.0.20.202 "/opt/scripts/weekly-dr-test-proxmox.sh"
|
||||
|
||||
# What it does:
|
||||
# ✓ Starts VM → Restores DB → Tests → Cleanup → Shutdown
|
||||
# ✓ Sends email report with results
|
||||
```
|
||||
|
||||
### 📊 Check Backup Health
|
||||
|
||||
```bash
|
||||
# Manual check (runs daily at 09:00 automatically)
|
||||
ssh root@10.0.20.202 "/opt/scripts/oracle-backup-monitor-proxmox.sh"
|
||||
|
||||
# Output:
|
||||
# Status: OK
|
||||
# FULL backup age: 11 hours ✓
|
||||
# CUMULATIVE backup age: 2 hours ✓
|
||||
# Disk usage: 45% ✓
|
||||
```
|
||||
|
||||
## 🗂️ Component Locations
|
||||
|
||||
### 📁 PRIMARY Server (10.0.20.36)
|
||||
```
|
||||
D:\rman_backup\
|
||||
├── rman_backup_full.txt # RMAN script for FULL backup
|
||||
├── rman_backup_incremental.txt # RMAN script for CUMULATIVE
|
||||
└── transfer_backups.ps1 # UNIFIED: Transfer ALL backups to Proxmox
|
||||
|
||||
Scheduled Tasks:
|
||||
├── 02:30 - Oracle RMAN Full Backup
|
||||
├── 03:00 - Transfer backups to DR (transfer_backups.ps1)
|
||||
├── 13:00 - Oracle RMAN Cumulative Backup
|
||||
├── 14:45 - Transfer backups to DR (transfer_backups.ps1)
|
||||
└── 18:00 - Oracle RMAN Cumulative Backup
|
||||
```
|
||||
|
||||
### 📁 PROXMOX Host (10.0.20.202)
|
||||
```
|
||||
/opt/scripts/
|
||||
├── oracle-backup-monitor-proxmox.sh # Daily backup monitoring
|
||||
├── weekly-dr-test-proxmox.sh # Weekly DR test
|
||||
└── PROXMOX_NOTIFICATIONS_README.md # Documentation
|
||||
|
||||
/mnt/pve/oracle-backups/ROA/autobackup/
|
||||
├── FULL_20251010_023001.BKP # Latest FULL backup
|
||||
├── INCR_20251010_130001.BKP # CUMULATIVE 13:00
|
||||
└── INCR_20251010_180001.BKP # CUMULATIVE 18:00
|
||||
|
||||
Cron Jobs:
|
||||
0 9 * * * /opt/scripts/oracle-backup-monitor-proxmox.sh
|
||||
0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh
|
||||
```
|
||||
|
||||
### 📁 DR VM 109 (10.0.20.37) - When Running
|
||||
```
|
||||
D:\oracle\scripts\
|
||||
├── rman_restore_from_zero.cmd # Main restore script ⭐
|
||||
├── cleanup_database.cmd # Cleanup after test
|
||||
└── mount-nfs.bat # Mount F:\ at startup
|
||||
|
||||
F:\ (NFS mount from Proxmox)
|
||||
└── ROA\autobackup\ # All backup files
|
||||
```
|
||||
|
||||
## 🔄 How It Works
|
||||
|
||||
### Backup Flow (Daily)
|
||||
```
|
||||
PRIMARY PROXMOX
|
||||
│ │
|
||||
├─02:30─FULL─Backup─────────────►
|
||||
│ (6-7 GB) │
|
||||
├─03:00─Transfer ALL────────────► Skip duplicates
|
||||
│ (transfer_backups.ps1) │
|
||||
│ │
|
||||
├─13:00─CUMULATIVE──────────────►
|
||||
│ (200 MB) │
|
||||
├─14:45─Transfer ALL────────────► Skip duplicates
|
||||
│ (transfer_backups.ps1) │ (only new files)
|
||||
│ │
|
||||
└─18:00─CUMULATIVE──────────────►
|
||||
(300 MB) Storage
|
||||
│
|
||||
┌──────────┐
|
||||
│ Monitor │ 09:00 Daily
|
||||
│ Check Age│ Alert if old
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
### Restore Process
|
||||
```
|
||||
Start VM → Mount F:\ → Copy Backups → RMAN Restore → Database OPEN
|
||||
2min Auto 2min 8min Ready!
|
||||
|
||||
Total Time: ~15 minutes
|
||||
```
|
||||
|
||||
## 🔧 Manual Operations
|
||||
|
||||
### Test Individual Components
|
||||
|
||||
```bash
|
||||
# 1. Test backup transfer (on PRIMARY)
|
||||
powershell -ExecutionPolicy Bypass -File "D:\rman_backup\transfer_backups.ps1"
|
||||
|
||||
# 2. Test NFS mount (on VM 109)
|
||||
mount -o rw,nolock,mtype=hard,timeout=60 10.0.20.202:/mnt/pve/oracle-backups F:
|
||||
dir F:\ROA\autobackup
|
||||
|
||||
# 3. Test notification system
|
||||
ssh root@10.0.20.202 "touch -d '2 days ago' /mnt/pve/oracle-backups/ROA/autobackup/*FULL*.BKP"
|
||||
ssh root@10.0.20.202 "/opt/scripts/oracle-backup-monitor-proxmox.sh"
|
||||
# Should send WARNING notification
|
||||
|
||||
# 4. Test database restore (on VM 109)
|
||||
D:\oracle\scripts\rman_restore_from_zero.cmd
|
||||
```
|
||||
|
||||
### Force Actions
|
||||
|
||||
```bash
|
||||
# Force backup now (on PRIMARY)
|
||||
rman cmdfile=D:\rman_backup\rman_backup_incremental.txt
|
||||
|
||||
# Force cleanup VM (on VM 109)
|
||||
D:\oracle\scripts\cleanup_database.cmd
|
||||
|
||||
# Force VM shutdown
|
||||
ssh root@10.0.20.202 "qm stop 109"
|
||||
```
|
||||
|
||||
## 🐛 Troubleshooting
|
||||
|
||||
### 🔍 Debugging Restore Tests
|
||||
|
||||
#### Check Backup Files on Proxmox (10.0.20.202)
|
||||
|
||||
```bash
|
||||
# 1. List all backup files with size and date
|
||||
ssh root@10.0.20.202 "ls -lht /mnt/pve/oracle-backups/ROA/autobackup/*.BKP"
|
||||
|
||||
# 2. Count backup files
|
||||
ssh root@10.0.20.202 "ls /mnt/pve/oracle-backups/ROA/autobackup/*.BKP | wc -l"
|
||||
|
||||
# 3. Check latest backups (last 24 hours)
|
||||
ssh root@10.0.20.202 "find /mnt/pve/oracle-backups/ROA/autobackup -name '*.BKP' -mtime -1 -ls"
|
||||
|
||||
# 4. Show backup files grouped by type (with new naming convention)
|
||||
ssh root@10.0.20.202 "ls -lh /mnt/pve/oracle-backups/ROA/autobackup/ | grep -E '(L0_|L1_|ARC_|SPFILE_|CF_|O1_MF)'"
|
||||
|
||||
# 5. Check disk space usage
|
||||
ssh root@10.0.20.202 "df -h /mnt/pve/oracle-backups"
|
||||
ssh root@10.0.20.202 "du -sh /mnt/pve/oracle-backups/ROA/autobackup/"
|
||||
|
||||
# 6. Verify newest backup timestamp
|
||||
ssh root@10.0.20.202 "stat /mnt/pve/oracle-backups/ROA/autobackup/L0_*.BKP 2>/dev/null | grep Modify || echo 'No L0 backups with new naming'"
|
||||
```
|
||||
|
||||
#### Verify Backup Files on DR VM (when running)
|
||||
|
||||
```powershell
|
||||
# 1. Check NFS mount is accessible
|
||||
Test-Path F:\ROA\autobackup
|
||||
|
||||
# 2. List all backup files
|
||||
Get-ChildItem F:\ROA\autobackup\*.BKP | Format-Table Name, Length, LastWriteTime
|
||||
|
||||
# 3. Count backup files
|
||||
(Get-ChildItem F:\ROA\autobackup\*.BKP).Count
|
||||
|
||||
# 4. Show total backup size
|
||||
"{0:N2} GB" -f ((Get-ChildItem F:\ROA\autobackup\*.BKP | Measure-Object -Property Length -Sum).Sum / 1GB)
|
||||
|
||||
# 5. Check latest Level 0 backup
|
||||
Get-ChildItem F:\ROA\autobackup\L0_*.BKP -ErrorAction SilentlyContinue | Sort-Object LastWriteTime -Descending | Select-Object -First 1
|
||||
|
||||
# 6. Check what was copied during last restore
|
||||
Get-Content D:\oracle\logs\restore_from_zero.log | Select-String "Copying|Copied"
|
||||
```
|
||||
|
||||
#### Check DR Test Results
|
||||
|
||||
```bash
|
||||
# 1. View latest DR test log
|
||||
ssh root@10.0.20.202 "ls -lt /var/log/oracle-dr/dr_test_*.log | head -1 | awk '{print \$9}' | xargs cat | tail -100"
|
||||
|
||||
# 2. Check test status (passed/failed)
|
||||
ssh root@10.0.20.202 "grep -E 'PASSED|FAILED|Database Verification' /var/log/oracle-dr/dr_test_*.log | tail -5"
|
||||
|
||||
# 3. See backup selection logic output
|
||||
ssh root@10.0.20.202 "grep -A5 'TEST MODE: Selecting' /var/log/oracle-dr/dr_test_*.log | tail -20"
|
||||
|
||||
# 4. Check how many files were selected
|
||||
ssh root@10.0.20.202 "grep 'Total files selected' /var/log/oracle-dr/dr_test_*.log | tail -1"
|
||||
|
||||
# 5. View RMAN errors (if any)
|
||||
ssh root@10.0.20.202 "grep -i 'RMAN-\|ORA-' /var/log/oracle-dr/dr_test_*.log | tail -20"
|
||||
```
|
||||
|
||||
#### Simulate Test Locally (on DR VM)
|
||||
|
||||
```powershell
|
||||
# 1. Start Oracle service manually
|
||||
Start-Service OracleServiceROA
|
||||
|
||||
# 2. Run cleanup to prepare for restore
|
||||
D:\oracle\scripts\cleanup_database.ps1 /SILENT
|
||||
|
||||
# 3. Run restore in test mode
|
||||
D:\oracle\scripts\rman_restore_from_zero.ps1 -TestMode
|
||||
|
||||
# 4. Verify database opened correctly
|
||||
sqlplus / as sysdba @D:\oracle\scripts\verify_db.sql
|
||||
|
||||
# 5. Check what backups were used
|
||||
Get-Content D:\oracle\logs\restore_from_zero.log | Select-String "backup piece"
|
||||
|
||||
# 6. View database verification output
|
||||
Get-Content D:\oracle\logs\restore_from_zero.log | Select-String -Pattern "DB_NAME|OPEN_MODE|TABLES" -Context 0,1
|
||||
```
|
||||
|
||||
#### Common Restore Test Issues
|
||||
|
||||
| Issue | Check | Fix |
|
||||
|-------|-------|-----|
|
||||
| Test reports FAILED but DB is open | Check log for "OPEN_MODE: READ WRITE" | Already fixed in latest version |
|
||||
| Missing datafiles in restore | Count backup files: should be 15-40+ | Wait for next full backup or copy all files |
|
||||
| "No backups found" error | Verify NFS mount: `Test-Path F:\` | Remount NFS or check Proxmox NFS service |
|
||||
| Restore takes > 30 min | Check backup size: should be ~5-8 GB | Normal for first restore after format change |
|
||||
| RMAN-06023 errors | Check for L0_*.BKP files on F:\ | Old format: need new backup with naming convention |
|
||||
|
||||
#### Verify Naming Convention is Active
|
||||
|
||||
```bash
|
||||
# Check if new naming convention is being used (after Oct 11, 2025)
|
||||
ssh root@10.0.20.202 "ls /mnt/pve/oracle-backups/ROA/autobackup/ | grep -E '^(L0_|L1_|ARC_|SPFILE_|CF_)' | wc -l"
|
||||
# Should return > 0 if active
|
||||
|
||||
# If 0, backups are still using old format (O1_MF_ANNNN_*)
|
||||
# Wait for next scheduled backup (02:30 daily) or run manual backup
|
||||
```
|
||||
|
||||
#### Manual Test Run with Verbose Output
|
||||
|
||||
```bash
|
||||
# Run test with full output visible
|
||||
ssh root@10.0.20.202
|
||||
cd /opt/scripts
|
||||
./weekly-dr-test-proxmox.sh 2>&1 | tee /tmp/dr_test_manual.log
|
||||
|
||||
# Watch in real-time what's happening
|
||||
# Look for these key stages:
|
||||
# - "TEST MODE: Selecting latest backup set"
|
||||
# - "Total files selected: XX"
|
||||
# - "RMAN restore completed successfully"
|
||||
# - "OPEN_MODE: READ WRITE"
|
||||
```
|
||||
|
||||
### ❌ Backup Monitor Not Sending Alerts
|
||||
|
||||
```bash
|
||||
# 1. Check templates exist
|
||||
ssh root@10.0.20.202 "ls /usr/share/pve-manager/templates/default/oracle-*"
|
||||
|
||||
# 2. Reinstall templates
|
||||
ssh root@10.0.20.202 "/opt/scripts/oracle-backup-monitor-proxmox.sh --install"
|
||||
|
||||
# 3. Check Proxmox notifications work
|
||||
ssh root@10.0.20.202 "pvesh create /nodes/$(hostname)/apt/update"
|
||||
# Should receive update notification
|
||||
```
|
||||
|
||||
### ❌ F:\ Drive Not Accessible in VM
|
||||
|
||||
```bash
|
||||
# On VM 109:
|
||||
# 1. Check NFS Client service
|
||||
Get-Service | Where {$_.Name -like "*NFS*"}
|
||||
|
||||
# 2. Manual mount
|
||||
mount -o rw,nolock,mtype=hard,timeout=60 10.0.20.202:/mnt/pve/oracle-backups F:
|
||||
|
||||
# 3. Check Proxmox NFS server
|
||||
ssh root@10.0.20.202 "showmount -e localhost"
|
||||
# Should show: /mnt/pve/oracle-backups 10.0.20.37
|
||||
```
|
||||
|
||||
### ❌ Restore Fails
|
||||
|
||||
```bash
|
||||
# 1. Check backup files exist
|
||||
dir F:\ROA\autobackup\*.BKP
|
||||
|
||||
# 2. Check Oracle service
|
||||
sc query OracleServiceROA
|
||||
|
||||
# 3. Check PFILE exists
|
||||
dir C:\Users\oracle\admin\ROA\pfile\initROA.ora
|
||||
|
||||
# 4. View restore log
|
||||
type D:\oracle\logs\restore_from_zero.log
|
||||
```
|
||||
|
||||
### ❌ VM Won't Start
|
||||
|
||||
```bash
|
||||
# Check VM status
|
||||
ssh root@10.0.20.202 "qm status 109"
|
||||
|
||||
# Check VM config
|
||||
ssh root@10.0.20.202 "qm config 109 | grep -E 'memory|cores|bootdisk'"
|
||||
|
||||
# Force unlock if locked
|
||||
ssh root@10.0.20.202 "qm unlock 109"
|
||||
|
||||
# Start with console
|
||||
ssh root@10.0.20.202 "qm start 109 && qm terminal 109"
|
||||
```
|
||||
|
||||
## 📈 Monitoring & Metrics
|
||||
|
||||
### Key Metrics
|
||||
| Metric | Target | Alert Threshold |
|
||||
|--------|--------|-----------------|
|
||||
| FULL Backup Age | < 24h | > 25h |
|
||||
| CUMULATIVE Age | < 6h | > 7h |
|
||||
| Backup Size | ~7 GB/day | > 10 GB |
|
||||
| Restore Time | < 15 min | > 30 min |
|
||||
| Disk Usage | < 80% | > 80% |
|
||||
|
||||
### Check Logs
|
||||
|
||||
```bash
|
||||
# Backup logs (on PRIMARY)
|
||||
Get-Content D:\rman_backup\logs\backup_*.log -Tail 50
|
||||
|
||||
# Transfer logs (on PRIMARY) - UNIFIED script
|
||||
Get-Content D:\rman_backup\logs\transfer_*.log -Tail 50
|
||||
|
||||
# Monitoring logs (on Proxmox)
|
||||
tail -50 /var/log/oracle-dr/*.log
|
||||
|
||||
# Restore logs (on VM 109)
|
||||
type D:\oracle\logs\restore_from_zero.log
|
||||
```
|
||||
|
||||
## 🔐 Security & Access
|
||||
|
||||
### SSH Keys Setup
|
||||
```
|
||||
PRIMARY (10.0.20.36) ──────► PROXMOX (10.0.20.202)
|
||||
SSH Key
|
||||
Port 22
|
||||
|
||||
LINUX WORKSTATION ─────────► PROXMOX (10.0.20.202)
|
||||
SSH Key
|
||||
Port 22
|
||||
|
||||
LINUX WORKSTATION ─────────► VM 109 (10.0.20.37)
|
||||
SSH Key
|
||||
Port 22122
|
||||
```
|
||||
|
||||
### Required Credentials
|
||||
- **PRIMARY**: Administrator (for scheduled tasks)
|
||||
- **PROXMOX**: root (for scripts and VM control)
|
||||
- **VM 109**: romfast (user), SYSTEM (Oracle service)
|
||||
|
||||
## 📅 Maintenance Schedule
|
||||
|
||||
| Day | Time | Action | Duration | Impact |
|
||||
|-----|------|--------|----------|--------|
|
||||
| Daily | 02:30 | FULL Backup | 30 min | None |
|
||||
| Daily | 09:00 | Monitor Backups | 1 min | None |
|
||||
| Daily | 13:00 | CUMULATIVE Backup | 5 min | None |
|
||||
| Daily | 18:00 | CUMULATIVE Backup | 5 min | None |
|
||||
| Saturday | 06:00 | DR Test | 30 min | None |
|
||||
|
||||
## 🚨 Disaster Recovery Procedure
|
||||
|
||||
### When PRIMARY is DOWN:
|
||||
|
||||
1. **Confirm PRIMARY is unreachable**
|
||||
```bash
|
||||
ping 10.0.20.36 # Should fail
|
||||
```
|
||||
|
||||
2. **Start DR VM**
|
||||
```bash
|
||||
ssh root@10.0.20.202 "qm start 109"
|
||||
```
|
||||
|
||||
3. **Wait for boot (3 minutes)**
|
||||
|
||||
4. **Connect to DR VM**
|
||||
```bash
|
||||
ssh -p 22122 romfast@10.0.20.37
|
||||
```
|
||||
|
||||
5. **Run restore**
|
||||
```cmd
|
||||
D:\oracle\scripts\rman_restore_from_zero.cmd
|
||||
```
|
||||
|
||||
6. **Verify database**
|
||||
```sql
|
||||
sqlplus / as sysdba
|
||||
SELECT name, open_mode FROM v$database;
|
||||
-- Should show: ROA, READ WRITE
|
||||
```
|
||||
|
||||
7. **Update application connections**
|
||||
- Change from: 10.0.20.36:1521/ROA
|
||||
- Change to: 10.0.20.37:1521/ROA
|
||||
|
||||
8. **Monitor DR system**
|
||||
- Database is now production
|
||||
- Do NOT run cleanup!
|
||||
- Keep VM running
|
||||
|
||||
## 📝 Quick Reference Card
|
||||
|
||||
```
|
||||
╔══════════════════════════════════════════════════════════════╗
|
||||
║ DR QUICK REFERENCE ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ PRIMARY DOWN? ║
|
||||
║ ssh root@10.0.20.202 ║
|
||||
║ qm start 109 ║
|
||||
║ # Wait 3 min ║
|
||||
║ ssh -p 22122 romfast@10.0.20.37 ║
|
||||
║ D:\oracle\scripts\rman_restore_from_zero.cmd ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ TEST DR? ║
|
||||
║ ssh root@10.0.20.202 "/opt/scripts/weekly-dr-test-proxmox.sh"║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ CHECK BACKUPS? ║
|
||||
║ ssh root@10.0.20.202 "/opt/scripts/oracle-backup-monitor-proxmox.sh"║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ SUPPORT: ║
|
||||
║ Logs: /var/log/oracle-dr/ ║
|
||||
║ Docs: proxmox/vm109-windows-dr/docs/ ║
|
||||
╚══════════════════════════════════════════════════════════════╝
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📂 Structură Director
|
||||
|
||||
```
|
||||
vm109-windows-dr/
|
||||
├── README.md # Acest fișier
|
||||
├── docs/
|
||||
│ ├── PLAN_TESTARE_MONITORIZARE.md # Plan testare și monitorizare DR
|
||||
│ ├── PROXMOX_NOTIFICATIONS_README.md # Configurare notificări Proxmox
|
||||
│ └── archive/ # Planuri și statusuri anterioare
|
||||
│ ├── DR_UPGRADE_TO_CUMULATIVE_PLAN.md
|
||||
│ ├── DR_VM_MIGRATION_GUIDE.md
|
||||
│ ├── DR_WINDOWS_VM_IMPLEMENTATION_PLAN.md
|
||||
│ └── DR_WINDOWS_VM_STATUS_2025-10-09.md
|
||||
└── scripts/
|
||||
├── oracle-backup-monitor-proxmox.sh # Monitorizare zilnică (Proxmox)
|
||||
├── weekly-dr-test-proxmox.sh # Test săptămânal DR (Proxmox)
|
||||
├── rman_backup.bat # RMAN full backup (Windows)
|
||||
├── rman_backup_incremental.bat # RMAN incremental (Windows)
|
||||
├── transfer_backups.ps1 # Transfer backup-uri (Windows)
|
||||
├── rman_restore_from_zero.ps1 # Restore complet (Windows DR)
|
||||
├── cleanup_database.ps1 # Cleanup după test (Windows DR)
|
||||
└── *.ps1 # Alte scripturi configurare
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**Last Updated:** 2026-01-27
|
||||
**Version:** 2.2 - Unified transfer script (transfer_backups.ps1)
|
||||
**Status:** ✅ Production Ready
|
||||
|
||||
## 📋 Changelog
|
||||
|
||||
### v2.2 (Oct 31, 2025)
|
||||
- ✨ **Unified transfer script**: Replaced `transfer_to_dr.ps1` and `transfer_incremental.ps1` with single `transfer_backups.ps1`
|
||||
- 🎯 **Smart duplicate detection**: Automatically skips files that exist on DR
|
||||
- ⚡ **Flexible scheduling**: Can run after any backup type or manually
|
||||
- 🔧 **Simplified maintenance**: One script to maintain instead of two
|
||||
|
||||
### v2.1 (Oct 11, 2025)
|
||||
- Added restore test debugging guide
|
||||
- Implemented new backup naming convention
|
||||
119
proxmox/vm109-windows-dr/docs/PLAN_TESTARE_MONITORIZARE.md
Normal file
119
proxmox/vm109-windows-dr/docs/PLAN_TESTARE_MONITORIZARE.md
Normal file
@@ -0,0 +1,119 @@
|
||||
# Plan de Testare pentru Scripturile de Monitorizare Oracle DR
|
||||
|
||||
**Director:** `proxmox/vm109-windows-dr/docs/`
|
||||
**Scripturi:** `proxmox/vm109-windows-dr/scripts/`
|
||||
|
||||
## Obiective
|
||||
1. Testarea funcționalității de notificări pentru scripturile de monitorizare
|
||||
2. Verificarea funcționării corecte fără erori
|
||||
3. Asigurarea că scriptul de DR test trimite notificare cu email indiferent de rezultat
|
||||
4. Salvarea planului pentru session hand-off
|
||||
|
||||
## Componente de Testat
|
||||
|
||||
### 1. Script Monitorizare Backup-uri (`oracle-backup-monitor-proxmox.sh`)
|
||||
- ✅ Testare funcționare normală (fără erori)
|
||||
- ✅ Verificare detectare probleme backup-uri
|
||||
- ✅ Testare trimitere notificări prin PVE::Notify
|
||||
- ✅ Verificare creare automată template-uri
|
||||
|
||||
### 2. Script Test DR Săptămânal (`weekly-dr-test-proxmox.sh`)
|
||||
- ✅ Testare flux complet de restaurare
|
||||
- ✅ Verificare trimitere notificare SUCCESS/FAIL
|
||||
- ✅ Configurare pentru notificare garantată (indiferent de rezultat)
|
||||
- ✅ Testare integrare cu sistemul de notificări Proxmox
|
||||
|
||||
### 3. Script Restaurare Bază de Date (`rman_restore_from_zero.cmd`)
|
||||
- ✅ Testare verificare acces NFS mount
|
||||
- ✅ Verificare proces de restaurare complet
|
||||
- ✅ Validare integrare cu scriptul DR test
|
||||
|
||||
## Etape de Testare
|
||||
|
||||
### Faza 1: Pregătire Mediului
|
||||
1. Verificare dependențe instalate (jq, PVE::Notify Perl modules)
|
||||
2. Verificare configurare notificări Proxmox
|
||||
3. Creare backup-uri de test în directorul `/mnt/pve/oracle-backups/ROA/autobackup`
|
||||
4. Verificare conectivitate SSH către VM DR (10.0.20.37)
|
||||
|
||||
### Faza 2: Testare Script Monitorizare
|
||||
1. Rulare `oracle-backup-monitor-proxmox.sh --install` pentru creare template-uri
|
||||
2. Verificare template-uri create în `/usr/share/pve-manager/templates/default/`
|
||||
3. Testare în condiții normale (toate backup-urile OK)
|
||||
4. Simulare problemă: backup expirat, spațiu disk insuficient
|
||||
5. Verificare recepționare notificări
|
||||
|
||||
### Faza 3: Testare Script DR Test
|
||||
1. Rulare `weekly-dr-test-proxmox.sh --install`
|
||||
2. Testare în mod dry-run (fără pornire VM reală)
|
||||
3. Verificare flux complet de restaurare
|
||||
4. Validare trimitere notificare atât pentru succes cât și pentru eșec
|
||||
5. Testare cleanup automat după test
|
||||
|
||||
### Faza 4: Validare Integrare
|
||||
1. Testare ambele scripturi împreună
|
||||
2. Verificare performanță și timp de răspuns
|
||||
3. Validare log-uri și rapoarte generate
|
||||
4. Configurare cron pentru execuție automată
|
||||
|
||||
### Faza 5: Validare Format Notificări
|
||||
1. Reinstalare template-uri compacte: `scripts/oracle-backup-monitor-proxmox.sh --install`
|
||||
2. Generare notificări reale din scripturi (backup monitor + DR test) și analiză în clienți email
|
||||
3. Verificare afișare în client email (text + HTML) și în GUI Proxmox
|
||||
4. Rulare `weekly-dr-test-proxmox.sh` în mediu controlat și validare sumar compact în email (inclusiv componente, pași, timeline)
|
||||
5. Capturare feedback utilizatori finali (Gmail + Outlook) pentru lizibilitate
|
||||
|
||||
### Faza 6: Testare Erori și Edge Cases
|
||||
1. Testare fără conectivitate la VM DR
|
||||
2. Testare director backup-uri gol
|
||||
3. Testare eșec restaurare database
|
||||
4. Testare timeout operațiuni
|
||||
5. Verificare comportament în aceste scenarii
|
||||
|
||||
## Modificări Necesar pentru Script DR Test
|
||||
|
||||
### Configurare Notificare Forțată
|
||||
Se va modifica `weekly-dr-test-proxmox.sh` pentru a trimite **întotdeauna** notificare:
|
||||
- ✅ Trackează toate testele (chiar și cele care eșuează la început)
|
||||
- ✅ Trimite raport detaliat indiferent de rezultat
|
||||
- ✅ Include timeline complet al pașilor executați
|
||||
- ✅ Generează notificare cu severity corespunzător
|
||||
|
||||
## Teste Specifice
|
||||
|
||||
### Test 1: Funcționare Normală
|
||||
- Scenariu: Toate componentele funcționează corect
|
||||
- Rezultat așteptat: Notificări succes, raport complet
|
||||
|
||||
### Test 2: Eșec Conectivitate VM
|
||||
- Scenariu: VM DR nu pornește sau nu răspunde la SSH
|
||||
- Rezultat așteptat: Notificare eșec cu detalii despre punctul de blocaj
|
||||
|
||||
### Test 3: Backup-uri Lipsă
|
||||
- Scenariu: Director backup-uri gol sau fișiere corupte
|
||||
- Rezultat așteptat: Notificare eroare + raport detaliat
|
||||
|
||||
### Test 4: Eșec Restaurare Database
|
||||
- Scenariu: RMAN restore eșuează la un pas specific
|
||||
- Rezultat așteptat: Notificare cu pasul exact unde a eșuat + log-uri
|
||||
|
||||
## Valide de Succes
|
||||
- ✅ Ambele scripturi rulează fără erori sintactice
|
||||
- ✅ Template-urile de notificare se creează automat
|
||||
- ✅ Notificările se trimit prin sistemul Proxmox
|
||||
- ✅ Email-uri raport sunt formatate corect (text + HTML)
|
||||
- ✅ Log-ul DR test conține timeline detaliat
|
||||
- ✅ Configurare cron funcționează corect
|
||||
|
||||
## Schedule Testare
|
||||
1. **Ziua 1**: Testare individuală scripturi
|
||||
2. **Ziua 2**: Testare integrat și scenarii de erori
|
||||
3. **Ziua 3**: Testare performance și configurare producție
|
||||
4. **Ziua 4**: Monitorizare continuă și validare finală
|
||||
|
||||
## Salvare Plan
|
||||
Planul salvat pentru hand-off sesiune.
|
||||
|
||||
---
|
||||
*Creat: 2025-10-10*
|
||||
*Status: Ready for implementation*
|
||||
300
proxmox/vm109-windows-dr/docs/PROXMOX_NOTIFICATIONS_README.md
Normal file
300
proxmox/vm109-windows-dr/docs/PROXMOX_NOTIFICATIONS_README.md
Normal file
@@ -0,0 +1,300 @@
|
||||
# Oracle DR Monitoring cu Notificări Proxmox Native
|
||||
|
||||
**Director:** `proxmox/vm109-windows-dr/docs/`
|
||||
**Scripturi:** `proxmox/vm109-windows-dr/scripts/`
|
||||
|
||||
## 🎯 Overview
|
||||
|
||||
Sistem de monitorizare și alertare pentru Oracle DR care folosește **sistemul nativ de notificări Proxmox** (PVE::Notify) - același sistem folosit pentru alertele HA, backup-uri, etc.
|
||||
|
||||
**Avantaje majore:**
|
||||
- ✅ **Zero configurare email** - folosește setup-ul existent Proxmox
|
||||
- ✅ **Scripturi autosuficiente** - creează automat template-urile necesare
|
||||
- ✅ **Notificări profesionale** - HTML formatat, culori, grafice
|
||||
- ✅ **Integrare completă** - apare în Datacenter > Notifications
|
||||
- ✅ **Flexibilitate maximă** - schimbi destinația din GUI, nu din cod
|
||||
|
||||
## 📦 Componente
|
||||
|
||||
### 1. **oracle-backup-monitor-proxmox.sh**
|
||||
Monitorizează backup-urile Oracle și trimite alerte când:
|
||||
- Backup FULL > 25 ore vechime
|
||||
- Backup CUMULATIVE > 7 ore vechime
|
||||
- Spațiu disk > 80% plin
|
||||
- Lipsesc backup-uri
|
||||
|
||||
### 2. **weekly-dr-test-proxmox.sh**
|
||||
Rulează test DR complet automat:
|
||||
- Pornește VM-ul DR
|
||||
- Verifică mount NFS
|
||||
- Restaurează database
|
||||
- Validează datele
|
||||
- Cleanup și shutdown
|
||||
- Raport detaliat cu timeline
|
||||
|
||||
## 🚀 Instalare Rapidă (3 minute)
|
||||
|
||||
### Pe Proxmox Host:
|
||||
|
||||
```bash
|
||||
# 1. Copiază scripturile
|
||||
mkdir -p /opt/scripts
|
||||
cd /opt/scripts
|
||||
wget https://your-repo/oracle-backup-monitor-proxmox.sh
|
||||
wget https://your-repo/weekly-dr-test-proxmox.sh
|
||||
chmod +x *.sh
|
||||
|
||||
# 2. Instalează dependențe (dacă nu există)
|
||||
apt-get update
|
||||
apt-get install -y jq dos2unix
|
||||
|
||||
# 3. Corectează line endings (dacă vin din Windows)
|
||||
dos2unix /opt/scripts/*.sh
|
||||
|
||||
# 4. Instalează template-urile (AUTOMAT!)
|
||||
/opt/scripts/oracle-backup-monitor-proxmox.sh --install
|
||||
/opt/scripts/weekly-dr-test-proxmox.sh --install
|
||||
|
||||
# 5. Testează manual
|
||||
/opt/scripts/oracle-backup-monitor-proxmox.sh
|
||||
/opt/scripts/weekly-dr-test-proxmox.sh
|
||||
|
||||
# 6. Adaugă în cron
|
||||
crontab -e
|
||||
# Adaugă:
|
||||
0 9 * * * /opt/scripts/oracle-backup-monitor-proxmox.sh
|
||||
0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh
|
||||
```
|
||||
|
||||
**ATÂT! Nu mai trebuie să faci nimic!**
|
||||
|
||||
## 📧 Cum Funcționează Notificările
|
||||
|
||||
### Fluxul de notificare:
|
||||
|
||||
```
|
||||
Script detectează problemă
|
||||
↓
|
||||
Creează JSON cu datele
|
||||
↓
|
||||
Apelează PVE::Notify
|
||||
↓
|
||||
Proxmox procesează template-ul Handlebars
|
||||
↓
|
||||
Trimite notificare conform config din GUI
|
||||
↓
|
||||
Primești email/webhook/etc
|
||||
```
|
||||
|
||||
### Ce primești:
|
||||
|
||||
#### Email pentru Backup Monitor:
|
||||
```
|
||||
Subject: Oracle Backup WARNING - pveelite
|
||||
|
||||
Oracle Backup Monitoring Alert
|
||||
==============================
|
||||
Severity: WARNING
|
||||
Date: 2025-10-10 21:00:00
|
||||
Status: WARNING
|
||||
|
||||
WARNINGS:
|
||||
- FULL backup is 26 hours old (threshold: 25)
|
||||
|
||||
Backup Details:
|
||||
- Total Backups: 15
|
||||
- Total Size: 8.3 GB
|
||||
- FULL Backup Age: 26 hours ⚠️
|
||||
- CUMULATIVE Backup Age: 3 hours ✓
|
||||
- Disk Usage: 45%
|
||||
```
|
||||
|
||||
#### Email pentru DR Test (HTML):
|
||||

|
||||
|
||||
Conține:
|
||||
- Timeline vizual cu toate etapele
|
||||
- Metrici în card-uri colorate
|
||||
- Tabel cu detalii sistem
|
||||
- Evidențiere erori/warning-uri
|
||||
|
||||
## 🎨 Template-uri Handlebars
|
||||
|
||||
Scripturile creează **automat** 6 template-uri:
|
||||
|
||||
### Pentru Backup Monitor:
|
||||
- `oracle-backup-subject.txt.hbs` - Subiect email
|
||||
- `oracle-backup-body.txt.hbs` - Corp text
|
||||
- `oracle-backup-body.html.hbs` - Corp HTML formatat
|
||||
|
||||
### Pentru DR Test:
|
||||
- `oracle-dr-test-subject.txt.hbs` - Subiect email
|
||||
- `oracle-dr-test-body.txt.hbs` - Corp text
|
||||
- `oracle-dr-test-body.html.hbs` - Corp HTML cu timeline
|
||||
|
||||
**Locație:** `/usr/share/pve-manager/templates/default/`
|
||||
|
||||
## 🔧 Configurare Avansată (Opțional)
|
||||
|
||||
### Matching Rules în Proxmox GUI
|
||||
|
||||
Poți crea reguli pentru a ruta notificările diferit:
|
||||
|
||||
1. **Datacenter > Notifications > Add > Matcher**
|
||||
|
||||
2. **Exemplu 1:** Trimite erorile către echipa on-call
|
||||
```
|
||||
Name: oracle-critical
|
||||
Match field: severity equals error
|
||||
Match field: type equals oracle-backup
|
||||
Target: oncall-email
|
||||
```
|
||||
|
||||
3. **Exemplu 2:** Warning-uri doar în Slack
|
||||
```
|
||||
Name: oracle-warnings
|
||||
Match field: severity equals warning
|
||||
Match field: type contains oracle
|
||||
Target: slack-webhook
|
||||
```
|
||||
|
||||
### Modificare Template-uri
|
||||
|
||||
Dacă vrei să personalizezi template-urile:
|
||||
|
||||
```bash
|
||||
# Editează template-ul
|
||||
nano /usr/share/pve-manager/templates/default/oracle-backup-body.html.hbs
|
||||
|
||||
# Adaugă câmpuri noi, schimbă culori, etc.
|
||||
# Folosește sintaxa Handlebars: {{variable}}, {{#if condition}}, {{#each array}}
|
||||
```
|
||||
|
||||
## 📊 Monitorizare și Debugging
|
||||
|
||||
### Verifică template-urile:
|
||||
```bash
|
||||
ls -la /usr/share/pve-manager/templates/default/oracle-*
|
||||
```
|
||||
|
||||
### Vezi log-uri notificări:
|
||||
```bash
|
||||
# Log-uri Proxmox
|
||||
journalctl -u pveproxy -f | grep notify
|
||||
|
||||
# Log-uri scripturi
|
||||
tail -f /var/log/oracle-dr/*.log
|
||||
```
|
||||
|
||||
### Testează notificări manual:
|
||||
```bash
|
||||
# Forțează o alertă de test
|
||||
echo "test" > /mnt/pve/oracle-backups/ROA/autobackup/test.BKP
|
||||
./oracle-backup-monitor-proxmox.sh
|
||||
rm /mnt/pve/oracle-backups/ROA/autobackup/test.BKP
|
||||
```
|
||||
|
||||
## 🆚 Comparație cu Metode Clasice
|
||||
|
||||
| Aspect | Email Manual | Webhook | **PVE::Notify** |
|
||||
|--------|--------------|---------|-----------------|
|
||||
| Configurare | Complex (SMTP) | Medium | **Zero** ✅ |
|
||||
| Template-uri | În script | În script | **Handlebars** ✅ |
|
||||
| Flexibilitate | Hardcodat | Hardcodat | **GUI Proxmox** ✅ |
|
||||
| Formatare | Basic | JSON | **HTML Rich** ✅ |
|
||||
| Maintenance | Per script | Per script | **Centralizat** ✅ |
|
||||
| Integrare | Separată | Separată | **Nativă** ✅ |
|
||||
|
||||
## 🔐 Securitate
|
||||
|
||||
- Scripturile rulează local pe Proxmox (no remote execution)
|
||||
- Folosesc SSH keys pentru conectare la VM-uri
|
||||
- Template-urile sunt read-only pentru non-root
|
||||
- Notificările urmează security policy-ul Proxmox
|
||||
|
||||
## 🐛 Troubleshooting
|
||||
|
||||
### Problemă: Nu primesc notificări
|
||||
|
||||
1. Verifică dacă Proxmox trimite alte notificări:
|
||||
```bash
|
||||
# Test notificare Proxmox
|
||||
pvesh create /nodes/$(hostname)/apt/update
|
||||
# Ar trebui să primești notificare despre update
|
||||
```
|
||||
|
||||
2. Verifică template-urile:
|
||||
```bash
|
||||
ls /usr/share/pve-manager/templates/default/oracle-*
|
||||
# Trebuie să existe 6 fișiere
|
||||
```
|
||||
|
||||
3. Verifică configurația notificări:
|
||||
```bash
|
||||
cat /etc/pve/notifications.cfg
|
||||
```
|
||||
|
||||
### Problemă: Template-uri nu se creează
|
||||
|
||||
```bash
|
||||
# Rulează cu debug
|
||||
bash -x ./oracle-backup-monitor-proxmox.sh --install
|
||||
|
||||
# Verifică permisiuni
|
||||
ls -ld /usr/share/pve-manager/templates/default/
|
||||
```
|
||||
|
||||
### Problemă: Eroare PVE::Notify
|
||||
|
||||
```bash
|
||||
# Verifică că perl modules sunt instalate
|
||||
perl -e 'use PVE::Notify; print "OK\n"'
|
||||
|
||||
# Reinstalează dacă lipsesc
|
||||
apt-get install --reinstall libpve-common-perl
|
||||
```
|
||||
|
||||
## 📈 Metrici și KPIs
|
||||
|
||||
Scripturile raportează automat:
|
||||
|
||||
### Backup Monitor:
|
||||
- Vârsta backup-urilor (ore)
|
||||
- Număr total backup-uri
|
||||
- Dimensiune totală (GB)
|
||||
- Utilizare disk (%)
|
||||
|
||||
### DR Test:
|
||||
- Durata totală test (minute)
|
||||
- Timp restaurare (minute)
|
||||
- Număr tabele restaurate
|
||||
- Status fiecare etapă
|
||||
- Spațiu eliberat (GB)
|
||||
|
||||
## 🎉 Beneficii pentru Echipă
|
||||
|
||||
1. **Zero Training** - folosește sistemul cunoscut Proxmox
|
||||
2. **Zero Maintenance** - nu trebuie actualizate credențiale email
|
||||
3. **Consistență** - toate alertele vin în același format
|
||||
4. **Vizibilitate** - apare în dashboard Proxmox
|
||||
5. **Flexibilitate** - schimbi destinatari din GUI instant
|
||||
|
||||
## 📝 Note Finale
|
||||
|
||||
- Scripturile sunt **idempotente** - pot fi rulate oricând
|
||||
- Template-urile se creează **doar dacă lipsesc**
|
||||
- Notificările se trimit **doar când sunt probleme** (sau success pentru DR test)
|
||||
- Log-urile se păstrează **local pentru audit**
|
||||
|
||||
## 🤝 Suport
|
||||
|
||||
Pentru probleme sau întrebări:
|
||||
1. Verifică această documentație
|
||||
2. Verifică log-urile: `/var/log/oracle-dr/`
|
||||
3. Rulează cu `--help` pentru opțiuni
|
||||
|
||||
---
|
||||
|
||||
*Dezvoltat pentru sistemul Oracle DR pe Proxmox*
|
||||
*Bazat pe pattern-ul ha-monitor.sh din Proxmox VE*
|
||||
*Versiune: 1.0 - Octombrie 2025*
|
||||
File diff suppressed because it is too large
Load Diff
356
proxmox/vm109-windows-dr/docs/archive/DR_VM_MIGRATION_GUIDE.md
Normal file
356
proxmox/vm109-windows-dr/docs/archive/DR_VM_MIGRATION_GUIDE.md
Normal file
@@ -0,0 +1,356 @@
|
||||
# Oracle DR VM - Migration Between Proxmox Nodes
|
||||
|
||||
**Purpose:** How to migrate VM 109 between Proxmox nodes while maintaining backup access
|
||||
**Scenario:** Move VM from pveelite (10.0.20.202) to pvemini (10.0.20.201) or vice versa
|
||||
|
||||
---
|
||||
|
||||
## 📋 OVERVIEW
|
||||
|
||||
**Current Setup:**
|
||||
- VM 109 runs on pveelite (10.0.20.202)
|
||||
- Backups stored on pveelite: `/mnt/pve/oracle-backups`
|
||||
- VM has mount point: `qm set 109 -mp0 /mnt/pve/oracle-backups`
|
||||
- Mount appears in Windows as **F:\** (E:\ already used)
|
||||
|
||||
**Challenge:**
|
||||
- Mount points are **node-local** - path `/mnt/pve/oracle-backups` exists only on pveelite
|
||||
- If you migrate VM to pvemini, mount point breaks
|
||||
|
||||
**Solution:**
|
||||
- Create same directory structure on destination node
|
||||
- Sync backups between nodes
|
||||
- Mount point works identically on new node
|
||||
|
||||
---
|
||||
|
||||
## 🔄 MIGRATION PROCEDURE
|
||||
|
||||
### PRE-MIGRATION CHECKLIST
|
||||
|
||||
- [ ] VM 109 is powered OFF
|
||||
- [ ] You have root SSH access to both Proxmox nodes
|
||||
- [ ] You know which node you're migrating TO
|
||||
- [ ] Backups are current (check timestamp)
|
||||
|
||||
---
|
||||
|
||||
### STEP 1: Prepare Destination Node (pvemini)
|
||||
|
||||
**On pvemini (10.0.20.201):**
|
||||
|
||||
```bash
|
||||
ssh root@10.0.20.201
|
||||
|
||||
# Create identical directory structure
|
||||
mkdir -p /mnt/pve/oracle-backups/ROA/autobackup
|
||||
chmod 755 /mnt/pve/oracle-backups
|
||||
chmod 755 /mnt/pve/oracle-backups/ROA
|
||||
chmod 755 /mnt/pve/oracle-backups/ROA/autobackup
|
||||
|
||||
# Verify structure
|
||||
ls -la /mnt/pve/oracle-backups/ROA/autobackup
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### STEP 2: Sync Backups from Source to Destination
|
||||
|
||||
**Option A: Full Sync (first time migration)**
|
||||
|
||||
```bash
|
||||
# On pvemini, sync all backups from pveelite
|
||||
rsync -avz --progress \
|
||||
root@10.0.20.202:/mnt/pve/oracle-backups/ \
|
||||
/mnt/pve/oracle-backups/
|
||||
|
||||
# This copies all backup files (~15 GB, takes 2-3 minutes on 1Gbps network)
|
||||
```
|
||||
|
||||
**Option B: Incremental Sync (if you already synced before)**
|
||||
|
||||
```bash
|
||||
# On pvemini, sync only new/changed files
|
||||
rsync -avz --progress --update \
|
||||
root@10.0.20.202:/mnt/pve/oracle-backups/ \
|
||||
/mnt/pve/oracle-backups/
|
||||
|
||||
# Much faster - only copies new backups
|
||||
```
|
||||
|
||||
**Verify sync:**
|
||||
```bash
|
||||
# Check file count matches
|
||||
ssh root@10.0.20.202 "ls /mnt/pve/oracle-backups/ROA/autobackup/*.bkp | wc -l"
|
||||
ls /mnt/pve/oracle-backups/ROA/autobackup/*.bkp | wc -l
|
||||
|
||||
# Should be same number
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### STEP 3: Migrate VM via Proxmox
|
||||
|
||||
**Option A: Online Migration (VM stays running)**
|
||||
|
||||
```bash
|
||||
# From Proxmox CLI on source node (pveelite):
|
||||
qm migrate 109 pvemini --online
|
||||
|
||||
# This uses live migration - VM doesn't stop
|
||||
# Takes 5-10 minutes depending on RAM/disk
|
||||
```
|
||||
|
||||
**Option B: Offline Migration (VM must be stopped)**
|
||||
|
||||
```bash
|
||||
# Stop VM first
|
||||
qm stop 109
|
||||
|
||||
# Migrate
|
||||
qm migrate 109 pvemini
|
||||
|
||||
# Faster than online, but requires downtime
|
||||
```
|
||||
|
||||
**Option C: Via Proxmox Web UI**
|
||||
|
||||
```
|
||||
1. Select VM 109 on pveelite
|
||||
2. Click "Migrate"
|
||||
3. Select target node: pvemini
|
||||
4. Choose migration type: online or offline
|
||||
5. Click "Migrate"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### STEP 4: Verify Mount Point After Migration
|
||||
|
||||
**After migration completes:**
|
||||
|
||||
```bash
|
||||
# On pvemini, check VM config includes mount point
|
||||
qm config 109 | grep mp0
|
||||
|
||||
# Expected output:
|
||||
# mp0: /mnt/pve/oracle-backups,mp=/mnt/oracle-backups
|
||||
|
||||
# If missing, add it:
|
||||
qm set 109 -mp0 /mnt/pve/oracle-backups,mp=/mnt/oracle-backups
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### STEP 5: Start VM and Verify Access
|
||||
|
||||
```bash
|
||||
# Start VM on new node
|
||||
qm start 109
|
||||
|
||||
# Wait for boot
|
||||
sleep 180
|
||||
|
||||
# Check mount in Windows
|
||||
ssh -p 22122 romfast@10.0.20.37 "Get-PSDrive F"
|
||||
|
||||
# Should show F:\ with Used/Free space
|
||||
|
||||
# Verify backup files accessible
|
||||
ssh -p 22122 romfast@10.0.20.37 "Get-ChildItem F:\ROA\autobackup\*.bkp | Measure-Object"
|
||||
|
||||
# Should show backup file count
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### STEP 6: Update PRIMARY Transfer Scripts
|
||||
|
||||
**On PRIMARY (10.0.20.36):**
|
||||
|
||||
Backup transfer scripts need to know which node to send to.
|
||||
|
||||
**Option A: Update scripts to point to new node**
|
||||
|
||||
```powershell
|
||||
# Edit transfer scripts
|
||||
cd D:\rman_backup
|
||||
|
||||
# Find and replace in transfer scripts:
|
||||
# ÎNAINTE:
|
||||
$DRHost = "10.0.20.202" # pveelite
|
||||
|
||||
# DUPĂ:
|
||||
$DRHost = "10.0.20.201" # pvemini
|
||||
```
|
||||
|
||||
**Option B: Use DNS/hostname (RECOMMENDED)**
|
||||
|
||||
```powershell
|
||||
# In transfer scripts, use hostname instead of IP:
|
||||
$DRHost = "pvedr" # DNS name
|
||||
|
||||
# Then update DNS to point to active node:
|
||||
# pvedr → 10.0.20.201 (currently pvemini)
|
||||
# When you migrate back, just update DNS
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔄 ONGOING SYNC STRATEGY
|
||||
|
||||
### If VM Stays on New Node Long-Term
|
||||
|
||||
**Setup automated sync from PRIMARY → new node:**
|
||||
|
||||
Just update transfer scripts as in Step 6 above. Backups will now go directly to pvemini.
|
||||
|
||||
**Old backups on pveelite:**
|
||||
- Can be deleted after verification
|
||||
- Or kept as additional backup copy (recommended)
|
||||
|
||||
```bash
|
||||
# On pveelite, cleanup old backups after 7 days
|
||||
find /mnt/pve/oracle-backups/ROA/autobackup -name "*.bkp" -mtime +7 -delete
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### If You Migrate VM Back and Forth
|
||||
|
||||
**Scenario:** VM moves between nodes frequently
|
||||
|
||||
**Solution 1: Sync in both directions**
|
||||
|
||||
```bash
|
||||
# Cronjob on pveelite (every 6 hours)
|
||||
0 */6 * * * rsync -az root@10.0.20.201:/mnt/pve/oracle-backups/ /mnt/pve/oracle-backups/
|
||||
|
||||
# Cronjob on pvemini (every 6 hours)
|
||||
0 */6 * * * rsync -az root@10.0.20.202:/mnt/pve/oracle-backups/ /mnt/pve/oracle-backups/
|
||||
```
|
||||
|
||||
**Solution 2: Shared Storage (NFS/CIFS)**
|
||||
|
||||
Use Proxmox shared storage instead of local paths:
|
||||
- Setup NFS server on one node
|
||||
- Both nodes mount same NFS share
|
||||
- `/mnt/pve/oracle-backups` points to shared storage
|
||||
- VM migration doesn't require backup sync
|
||||
|
||||
---
|
||||
|
||||
## 📊 MIGRATION CHECKLIST
|
||||
|
||||
### Before Migration:
|
||||
- [ ] VM 109 is stopped (or prepared for online migration)
|
||||
- [ ] Destination node has directory: `/mnt/pve/oracle-backups/ROA/autobackup`
|
||||
- [ ] Backups synced to destination node (rsync completed)
|
||||
- [ ] You have tested restore recently (weekly test passed)
|
||||
|
||||
### During Migration:
|
||||
- [ ] VM migration initiated (online or offline)
|
||||
- [ ] Migration progress monitored (no errors)
|
||||
- [ ] Migration completed successfully
|
||||
|
||||
### After Migration:
|
||||
- [ ] VM 109 shows as running on new node
|
||||
- [ ] Mount point configured: `qm config 109 | grep mp0`
|
||||
- [ ] VM started successfully
|
||||
- [ ] F:\ drive accessible in Windows: `Get-PSDrive F`
|
||||
- [ ] Backup files visible: `Get-ChildItem F:\ROA\autobackup\*.bkp`
|
||||
- [ ] PRIMARY transfer scripts updated (point to new node IP)
|
||||
- [ ] Test restore completed successfully
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ TROUBLESHOOTING
|
||||
|
||||
### Mount Point Not Visible in VM After Migration
|
||||
|
||||
**Symptom:** F:\ drive missing in Windows after migration
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# On new node, verify mount point config
|
||||
qm config 109 | grep mp0
|
||||
|
||||
# If missing, add it
|
||||
qm set 109 -mp0 /mnt/pve/oracle-backups,mp=/mnt/oracle-backups
|
||||
|
||||
# Restart VM
|
||||
qm stop 109
|
||||
qm start 109
|
||||
```
|
||||
|
||||
### Backup Files Not Accessible
|
||||
|
||||
**Symptom:** F:\ exists but shows as empty
|
||||
|
||||
**Cause:** Backups not synced to new node
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Re-sync backups from old node
|
||||
rsync -avz root@10.0.20.202:/mnt/pve/oracle-backups/ /mnt/pve/oracle-backups/
|
||||
|
||||
# Verify files exist
|
||||
ls -lh /mnt/pve/oracle-backups/ROA/autobackup/*.bkp
|
||||
```
|
||||
|
||||
### PRIMARY Still Sending to Old Node
|
||||
|
||||
**Symptom:** New backups not appearing on new node
|
||||
|
||||
**Cause:** Transfer scripts still point to old node IP
|
||||
|
||||
**Solution:**
|
||||
Update `$DRHost` in transfer scripts on PRIMARY (see Step 6)
|
||||
|
||||
---
|
||||
|
||||
## 🎯 MIGRATION TIMELINE
|
||||
|
||||
| Task | Duration | Downtime |
|
||||
|------|----------|----------|
|
||||
| Prepare destination node | 5 min | None |
|
||||
| Sync backups (full, ~15GB) | 3 min | None |
|
||||
| Migrate VM (offline) | 5 min | **5 min** |
|
||||
| Verify and start VM | 3 min | **3 min** |
|
||||
| Update PRIMARY scripts | 2 min | None |
|
||||
| **Total** | **18 min** | **8 min** |
|
||||
|
||||
**With online migration:** 0 minutes downtime (VM keeps running during migration)
|
||||
|
||||
---
|
||||
|
||||
## 📞 QUICK REFERENCE
|
||||
|
||||
**Current Setup:**
|
||||
- Source node: pveelite (10.0.20.202)
|
||||
- Destination node: pvemini (10.0.20.201)
|
||||
- VM: 109 (oracle-dr-windows)
|
||||
- Backup path: `/mnt/pve/oracle-backups`
|
||||
- Windows mount: F:\ (not E:\ - already used)
|
||||
|
||||
**Key Commands:**
|
||||
```bash
|
||||
# Sync backups
|
||||
rsync -avz root@SOURCE:/mnt/pve/oracle-backups/ /mnt/pve/oracle-backups/
|
||||
|
||||
# Migrate VM
|
||||
qm migrate 109 DESTINATION --online
|
||||
|
||||
# Check mount
|
||||
qm config 109 | grep mp0
|
||||
|
||||
# Add mount if missing
|
||||
qm set 109 -mp0 /mnt/pve/oracle-backups,mp=/mnt/oracle-backups
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**Generated:** 2025-10-09
|
||||
**Version:** 1.0
|
||||
**Status:** Ready for use
|
||||
**See Also:** DR_UPGRADE_TO_CUMULATIVE_PLAN.md
|
||||
@@ -0,0 +1,963 @@
|
||||
# Oracle DR - Windows VM Implementation Plan
|
||||
**Generated:** 2025-10-08
|
||||
**Objective:** Replace Linux LXC DR with Windows VM for same-platform RMAN restore
|
||||
**Target:** Windows VM in Proxmox, IP 10.0.20.37, Oracle 19c SE2
|
||||
|
||||
---
|
||||
|
||||
## 📋 PRE-IMPLEMENTATION CHECKLIST
|
||||
|
||||
### Current Infrastructure (IMPLEMENTED ✅)
|
||||
- ✅ PRIMARY: Windows Server, Oracle 19c SE2, IP: 10.0.20.36, SSH port 22122
|
||||
- ✅ Database: ROA, DBID: 1363569330
|
||||
- ✅ RMAN backups: FULL daily (02:30 AM)
|
||||
- ✅ DIFFERENTIAL INCREMENTAL (14:00) - NOT USED (causes UNDO corruption on restore)
|
||||
- ✅ Transfer scripts: PowerShell scripts transferring to VM 109 (Windows)
|
||||
- ✅ Backup size: ~6-7GB compressed (from 23GB), retention 2 days
|
||||
- ✅ DR target: Windows VM 109 (10.0.20.37) on pveelite - **OPERATIONAL**
|
||||
|
||||
### Planned Upgrade (see DR_UPGRADE_TO_CUMULATIVE_PLAN.md)
|
||||
- 🔄 Convert DIFFERENTIAL → **CUMULATIVE** incremental backups
|
||||
- 🔄 Add second daily incremental (13:00 + 18:00 vs current 14:00 only)
|
||||
- 🔄 Store backups on Proxmox host (pveelite), mounted in VM when needed
|
||||
- 🔄 Target RPO: **3-4 hours** (vs current 24 hours)
|
||||
|
||||
### What We'll Build
|
||||
- 🎯 Windows VM in Proxmox (replaces LXC 109)
|
||||
- 🎯 IP: 10.0.20.37 (same as current LXC)
|
||||
- 🎯 Oracle 19c SE2 installed (empty database template)
|
||||
- 🎯 OpenSSH Server for passwordless transfer
|
||||
- 🎯 RMAN restore scripts (automated DR recovery)
|
||||
- 🎯 Zero daily resource consumption (VM powered off when not needed)
|
||||
|
||||
### Resource Requirements
|
||||
- RAM: 4-6 GB (allocated, but VM runs only during DR events)
|
||||
- Disk: 100 GB (OS + Oracle + backup storage)
|
||||
- CPU: 2-4 vCPU
|
||||
- Network: Access to 10.0.20.0/24
|
||||
|
||||
---
|
||||
|
||||
## 🚀 PHASE 1: CREATE WINDOWS VM IN PROXMOX (30 minutes)
|
||||
|
||||
### Step 1.1: Download Windows 11 ISO
|
||||
```bash
|
||||
# On Proxmox host or download station
|
||||
cd /var/lib/vz/template/iso
|
||||
|
||||
# Option A: Download Windows 11 from Microsoft
|
||||
wget -O Win11_EnglishInternational_x64v1.iso \
|
||||
"https://software-download.microsoft.com/download/pr/..."
|
||||
|
||||
# Option B: Upload existing ISO via Proxmox web UI
|
||||
# Datacenter → Storage → ISO Images → Upload
|
||||
```
|
||||
|
||||
### Step 1.2: Create VM in Proxmox Web UI
|
||||
```
|
||||
Proxmox Web UI → Create VM
|
||||
|
||||
General:
|
||||
- VM ID: 109 (same as LXC number for consistency)
|
||||
- Name: oracle-dr-windows
|
||||
- Start at boot: NO (VM stays off until DR event)
|
||||
|
||||
OS:
|
||||
- ISO: Win11_EnglishInternational_x64v1.iso
|
||||
- Type: Microsoft Windows
|
||||
- Version: 11/2022
|
||||
|
||||
System:
|
||||
- Machine: q35
|
||||
- BIOS: OVMF (UEFI)
|
||||
- Add TPM: YES (for Windows 11)
|
||||
- SCSI Controller: VirtIO SCSI
|
||||
|
||||
Disks:
|
||||
- Bus/Device: SCSI 0
|
||||
- Storage: local-lvm (or your storage)
|
||||
- Size: 100 GB
|
||||
- Cache: Write back
|
||||
- Discard: YES
|
||||
- IO thread: YES
|
||||
|
||||
CPU:
|
||||
- Cores: 4
|
||||
- Type: host
|
||||
|
||||
Memory:
|
||||
- RAM: 6144 MB (6 GB)
|
||||
- Ballooning: NO
|
||||
|
||||
Network:
|
||||
- Bridge: vmbr0
|
||||
- Model: VirtIO
|
||||
- Firewall: NO
|
||||
```
|
||||
|
||||
### Step 1.3: Install Windows 11
|
||||
```
|
||||
1. Start VM → Open Console (noVNC)
|
||||
2. Boot from ISO
|
||||
3. Windows Setup:
|
||||
- Language: English
|
||||
- Install Now
|
||||
- Windows 11 Pro (or your edition)
|
||||
- Custom Install
|
||||
- Load driver: Browse → virtio-win-0.1.x (if needed for disk detection)
|
||||
- Select disk → Format → Next
|
||||
|
||||
4. Initial Setup:
|
||||
- Computer name: ORACLE-DR
|
||||
- Local account: Administrator / <strong-password>
|
||||
- Disable all telemetry/tracking options
|
||||
|
||||
5. First boot:
|
||||
- Disable Windows Defender real-time protection (for Oracle performance)
|
||||
- Disable Windows Update automatic restart
|
||||
- Install VirtIO drivers (guest tools)
|
||||
```
|
||||
|
||||
### Step 1.4: Configure Network (Static IP)
|
||||
```powershell
|
||||
# In Windows VM, run PowerShell as Administrator
|
||||
|
||||
# Set static IP 10.0.20.37
|
||||
New-NetIPAddress -InterfaceAlias "Ethernet" -IPAddress 10.0.20.37 -PrefixLength 24 -DefaultGateway 10.0.20.1
|
||||
|
||||
# Set DNS
|
||||
Set-DnsClientServerAddress -InterfaceAlias "Ethernet" -ServerAddresses ("10.0.20.1","8.8.8.8")
|
||||
|
||||
# Verify
|
||||
Get-NetIPAddress | Where-Object {$_.IPAddress -eq "10.0.20.37"}
|
||||
Test-Connection 10.0.20.36 -Count 2
|
||||
```
|
||||
|
||||
### Step 1.5: Windows Initial Configuration
|
||||
```powershell
|
||||
# Run as Administrator
|
||||
|
||||
# Enable Remote Desktop (optional, for management)
|
||||
Set-ItemProperty -Path 'HKLM:\System\CurrentControlSet\Control\Terminal Server' -Name "fDenyTSConnections" -Value 0
|
||||
Enable-NetFirewallRule -DisplayGroup "Remote Desktop"
|
||||
|
||||
# Disable Windows Firewall for private network (or configure rules)
|
||||
Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled False
|
||||
|
||||
# Set timezone
|
||||
Set-TimeZone -Id "GTB Standard Time" # Romania timezone
|
||||
|
||||
# Disable hibernation (saves disk space)
|
||||
powercfg /hibernate off
|
||||
|
||||
# Create directories for Oracle
|
||||
New-Item -ItemType Directory -Path "D:\oracle" -Force
|
||||
New-Item -ItemType Directory -Path "D:\oracle\backups" -Force
|
||||
New-Item -ItemType Directory -Path "D:\oracle\oradata" -Force
|
||||
New-Item -ItemType Directory -Path "D:\oracle\fra" -Force
|
||||
```
|
||||
|
||||
**✅ PHASE 1 COMPLETE:** Windows VM created, network configured, ready for Oracle installation
|
||||
|
||||
---
|
||||
|
||||
## 🗄️ PHASE 2: INSTALL ORACLE 19c (60-90 minutes)
|
||||
|
||||
### Step 2.1: Download Oracle 19c
|
||||
```
|
||||
On developer machine or PRIMARY:
|
||||
|
||||
1. Go to: https://www.oracle.com/database/technologies/oracle19c-windows-downloads.html
|
||||
2. Download: WINDOWS.X64_193000_db_home.zip (3.0 GB)
|
||||
3. Transfer to VM:
|
||||
- Option A: Shared folder via Proxmox
|
||||
- Option B: HTTP file server
|
||||
- Option C: Direct download in VM
|
||||
```
|
||||
|
||||
### Step 2.2: Prepare Installation (in Windows VM)
|
||||
```powershell
|
||||
# Run as Administrator
|
||||
|
||||
# Extract Oracle installation
|
||||
Expand-Archive -Path "C:\Temp\WINDOWS.X64_193000_db_home.zip" -DestinationPath "D:\oracle\product\19c\dbhome_1"
|
||||
|
||||
# Create response file for silent install
|
||||
$responseFile = @"
|
||||
oracle.install.option=INSTALL_DB_SWONLY
|
||||
UNIX_GROUP_NAME=
|
||||
INVENTORY_LOCATION=D:\oracle\oraInventory
|
||||
ORACLE_HOME=D:\oracle\product\19c\dbhome_1
|
||||
ORACLE_BASE=D:\oracle
|
||||
oracle.install.db.InstallEdition=SE2
|
||||
oracle.install.db.OSDBA_GROUP=ORA_DBA
|
||||
oracle.install.db.OSOPER_GROUP=ORA_OPER
|
||||
oracle.install.db.OSBACKUPDBA_GROUP=ORA_BACKUPDBA
|
||||
oracle.install.db.OSDGDBA_GROUP=ORA_DG
|
||||
oracle.install.db.OSKMDBA_GROUP=ORA_KM
|
||||
oracle.install.db.OSRACDBA_GROUP=ORA_RAC
|
||||
DECLINE_SECURITY_UPDATES=true
|
||||
"@
|
||||
|
||||
$responseFile | Out-File -FilePath "D:\oracle\db_install.rsp" -Encoding ASCII
|
||||
```
|
||||
|
||||
### Step 2.3: Silent Installation
|
||||
```powershell
|
||||
# Run as Administrator
|
||||
|
||||
cd D:\oracle\product\19c\dbhome_1
|
||||
|
||||
# Silent install (takes 30-60 minutes)
|
||||
.\setup.exe -silent -responseFile D:\oracle\db_install.rsp -ignorePrereqFailure
|
||||
|
||||
# Wait for completion, check log:
|
||||
# D:\oracle\oraInventory\logs\installActions<timestamp>.log
|
||||
|
||||
# Run root scripts (as Administrator)
|
||||
D:\oracle\product\19c\dbhome_1\root.bat
|
||||
```
|
||||
|
||||
### Step 2.4: Create Listener
|
||||
```powershell
|
||||
# Set environment
|
||||
$env:ORACLE_HOME = "D:\oracle\product\19c\dbhome_1"
|
||||
$env:PATH = "$env:ORACLE_HOME\bin;$env:PATH"
|
||||
|
||||
# Create listener.ora
|
||||
$listenerOra = @"
|
||||
LISTENER =
|
||||
(DESCRIPTION_LIST =
|
||||
(DESCRIPTION =
|
||||
(ADDRESS = (PROTOCOL = TCP)(HOST = 10.0.20.37)(PORT = 1521))
|
||||
)
|
||||
)
|
||||
"@
|
||||
|
||||
$listenerOra | Out-File -FilePath "D:\oracle\product\19c\dbhome_1\network\admin\listener.ora" -Encoding ASCII
|
||||
|
||||
# Start listener
|
||||
lsnrctl start
|
||||
|
||||
# Configure listener as Windows service (optional)
|
||||
```
|
||||
|
||||
### Step 2.5: Create Empty Database Template (for faster DR restore)
|
||||
```powershell
|
||||
# Create init parameter file
|
||||
$initROA = @"
|
||||
DB_NAME=ROA
|
||||
DB_BLOCK_SIZE=8192
|
||||
COMPATIBLE=19.0.0
|
||||
MEMORY_TARGET=2G
|
||||
PROCESSES=300
|
||||
OPEN_CURSORS=300
|
||||
DB_RECOVERY_FILE_DEST=D:\oracle\fra
|
||||
DB_RECOVERY_FILE_DEST_SIZE=20G
|
||||
CONTROL_FILES=('D:\oracle\oradata\ROA\control01.ctl','D:\oracle\oradata\ROA\control02.ctl')
|
||||
"@
|
||||
|
||||
$initROA | Out-File -FilePath "D:\oracle\product\19c\dbhome_1\database\initROA.ora" -Encoding ASCII
|
||||
|
||||
# Create directory structure
|
||||
New-Item -ItemType Directory -Path "D:\oracle\oradata\ROA" -Force
|
||||
New-Item -ItemType Directory -Path "D:\oracle\fra" -Force
|
||||
|
||||
# Note: We will NOT create the database now
|
||||
# Database will be created via RMAN RESTORE during DR event
|
||||
```
|
||||
|
||||
**✅ PHASE 2 COMPLETE:** Oracle 19c installed, listener configured, ready for SSH setup
|
||||
|
||||
---
|
||||
|
||||
## 🔐 PHASE 3: CONFIGURE SSH FOR AUTOMATED TRANSFERS (20 minutes)
|
||||
|
||||
### Step 3.1: Install OpenSSH Server
|
||||
```powershell
|
||||
# Run as Administrator
|
||||
|
||||
# Install OpenSSH Server
|
||||
Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
|
||||
|
||||
# Start and enable service
|
||||
Start-Service sshd
|
||||
Set-Service -Name sshd -StartupType 'Automatic'
|
||||
|
||||
# Confirm firewall rule
|
||||
Get-NetFirewallRule -Name *ssh*
|
||||
|
||||
# Test SSH from developer machine
|
||||
# ssh Administrator@10.0.20.37
|
||||
```
|
||||
|
||||
### Step 3.2: Configure Passwordless SSH (Key-based Authentication)
|
||||
```powershell
|
||||
# On Windows VM, as Administrator
|
||||
|
||||
# Create .ssh directory
|
||||
$sshDir = "$env:ProgramData\ssh"
|
||||
New-Item -ItemType Directory -Path $sshDir -Force
|
||||
|
||||
# Get public key from PRIMARY server
|
||||
# Option A: Copy manually from PRIMARY C:\Users\Administrator\.ssh\id_rsa.pub
|
||||
# Option B: Download via SCP from developer machine
|
||||
|
||||
# For this example, manually copy the content:
|
||||
# From PRIMARY run: Get-Content C:\Users\Administrator\.ssh\id_rsa.pub
|
||||
|
||||
# On DR Windows VM:
|
||||
$publicKey = "<paste-public-key-here>"
|
||||
$publicKey | Out-File -FilePath "$sshDir\administrators_authorized_keys" -Encoding ASCII
|
||||
|
||||
# Set permissions (CRITICAL for SSH to work)
|
||||
icacls "$sshDir\administrators_authorized_keys" /inheritance:r
|
||||
icacls "$sshDir\administrators_authorized_keys" /grant "SYSTEM:(F)"
|
||||
icacls "$sshDir\administrators_authorized_keys" /grant "BUILTIN\Administrators:(F)"
|
||||
|
||||
# Restart SSH service
|
||||
Restart-Service sshd
|
||||
```
|
||||
|
||||
### Step 3.3: Configure SSH for SYSTEM Account (for scheduled tasks)
|
||||
```powershell
|
||||
# Windows scheduled tasks run as SYSTEM, so we need SYSTEM's SSH key
|
||||
|
||||
# Create SYSTEM's .ssh directory
|
||||
$systemSSHDir = "C:\Windows\System32\config\systemprofile\.ssh"
|
||||
New-Item -ItemType Directory -Path $systemSSHDir -Force
|
||||
|
||||
# Copy the same authorized_keys
|
||||
Copy-Item "$env:ProgramData\ssh\administrators_authorized_keys" `
|
||||
-Destination "$systemSSHDir\authorized_keys" -Force
|
||||
|
||||
# Set permissions
|
||||
icacls "$systemSSHDir\authorized_keys" /inheritance:r
|
||||
icacls "$systemSSHDir\authorized_keys" /grant "SYSTEM:(F)"
|
||||
```
|
||||
|
||||
### Step 3.4: Test SSH Connection from PRIMARY
|
||||
```powershell
|
||||
# On PRIMARY (10.0.20.36), test SSH to DR VM
|
||||
|
||||
# Test 1: Manual connection
|
||||
ssh -i C:\Users\Administrator\.ssh\id_rsa Administrator@10.0.20.37 "echo SSH_OK"
|
||||
|
||||
# Test 2: File transfer
|
||||
echo "test content" > C:\Temp\test.txt
|
||||
scp -i C:\Users\Administrator\.ssh\id_rsa C:\Temp\test.txt Administrator@10.0.20.37:D:\oracle\backups\
|
||||
|
||||
# If successful, you should see the file on DR VM
|
||||
```
|
||||
|
||||
**✅ PHASE 3 COMPLETE:** OpenSSH configured, passwordless authentication working
|
||||
|
||||
---
|
||||
|
||||
## 📝 PHASE 4: UPDATE TRANSFER SCRIPTS (15 minutes)
|
||||
|
||||
### Step 4.1: Modify 02_transfer_to_dr.ps1 for Windows Target
|
||||
```powershell
|
||||
# File: D:\rman_backup\02_transfer_to_dr_windows.ps1
|
||||
# Changes needed:
|
||||
|
||||
# OLD (Linux target):
|
||||
# $DRPath = "/opt/oracle/backups/primary"
|
||||
|
||||
# NEW (Windows target):
|
||||
$DRHost = "10.0.20.37"
|
||||
$DRUser = "Administrator" # Changed from "root"
|
||||
$DRPath = "D:/oracle/backups/primary" # Windows path with forward slashes for SCP
|
||||
$SSHKeyPath = "C:\Users\Administrator\.ssh\id_rsa"
|
||||
|
||||
# Update SSH commands to use Windows paths
|
||||
# Example: Directory creation
|
||||
$null = & ssh -n -i $SSHKeyPath "${DRUser}@${DRHost}" `
|
||||
"New-Item -ItemType Directory -Path '$DRPath' -Force" 2>&1
|
||||
|
||||
# Update cleanup command for Windows
|
||||
function Cleanup-OldBackupsOnDR {
|
||||
Write-Log "Cleaning up old backups on DR (keeping last 2 days)..."
|
||||
|
||||
try {
|
||||
$cleanupCmd = @"
|
||||
Get-ChildItem -Path '$DRPath' -Filter '*.BKP' |
|
||||
Where-Object { `$_.LastWriteTime -lt (Get-Date).AddDays(-2) } |
|
||||
Remove-Item -Force
|
||||
"@
|
||||
$result = & ssh -n -i $SSHKeyPath "${DRUser}@${DRHost}" "powershell -Command `"$cleanupCmd`"" 2>&1
|
||||
|
||||
Write-Log "Cleanup completed on DR"
|
||||
} catch {
|
||||
Write-Log "Cleanup warning: $_" "WARNING"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Step 4.2: Create Updated Transfer Scripts
|
||||
```powershell
|
||||
# Save updated versions:
|
||||
# - 02_transfer_to_dr_windows.ps1 (FULL backup transfer)
|
||||
# - 02b_transfer_incremental_to_dr_windows.ps1 (INCREMENTAL transfer)
|
||||
|
||||
# Key changes for Windows:
|
||||
# 1. DRUser = "Administrator" instead of "root"
|
||||
# 2. DRPath = "D:/oracle/backups/primary" (Windows path)
|
||||
# 3. SSH commands use PowerShell instead of Linux commands
|
||||
# 4. Directory check: Test-Path instead of "test -f"
|
||||
# 5. Cleanup: Get-ChildItem instead of find
|
||||
```
|
||||
|
||||
### Step 4.3: Test Transfer Script
|
||||
```powershell
|
||||
# On PRIMARY, test the new script
|
||||
|
||||
# Manual test
|
||||
D:\rman_backup\02_transfer_to_dr_windows.ps1
|
||||
|
||||
# Check log output
|
||||
Get-Content "D:\rman_backup\logs\transfer_$(Get-Date -Format 'yyyyMMdd').log" -Tail 50
|
||||
|
||||
# Verify on DR VM
|
||||
ssh Administrator@10.0.20.37 "Get-ChildItem D:\oracle\backups\primary"
|
||||
```
|
||||
|
||||
**✅ PHASE 4 COMPLETE:** Transfer scripts updated and tested for Windows target
|
||||
|
||||
---
|
||||
|
||||
## 🔄 PHASE 5: CREATE RMAN RESTORE SCRIPT ON DR VM (30 minutes)
|
||||
|
||||
### Step 5.1: Create RMAN Restore Script
|
||||
```powershell
|
||||
# File: D:\oracle\scripts\rman_restore_from_primary.ps1
|
||||
# Run on DR Windows VM
|
||||
|
||||
param(
|
||||
[string]$BackupPath = "D:\oracle\backups\primary",
|
||||
[string]$OracleHome = "D:\oracle\product\19c\dbhome_1",
|
||||
[string]$OracleBase = "D:\oracle",
|
||||
[string]$DataDir = "D:\oracle\oradata\ROA",
|
||||
[string]$FRADir = "D:\oracle\fra",
|
||||
[int]$DBID = 1363569330,
|
||||
[string]$LogFile = "D:\oracle\logs\restore_$(Get-Date -Format 'yyyyMMdd_HHmmss').log"
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function Write-Log {
|
||||
param([string]$Message, [string]$Level = "INFO")
|
||||
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
|
||||
$logLine = "[$timestamp] [$Level] $Message"
|
||||
Write-Host $logLine
|
||||
Add-Content -Path $LogFile -Value $logLine -Encoding UTF8
|
||||
}
|
||||
|
||||
try {
|
||||
Write-Log "======================================================================"
|
||||
Write-Log "Oracle DR Restore - Starting"
|
||||
Write-Log "======================================================================"
|
||||
Write-Log "Backup Path: $BackupPath"
|
||||
Write-Log "Oracle Home: $OracleHome"
|
||||
Write-Log "DBID: $DBID"
|
||||
|
||||
# Set environment
|
||||
$env:ORACLE_HOME = $OracleHome
|
||||
$env:ORACLE_SID = "ROA"
|
||||
$env:PATH = "$OracleHome\bin;$env:PATH"
|
||||
|
||||
# Step 1: Cleanup old database files
|
||||
Write-Log "[1/6] Cleaning old database files..."
|
||||
if (Test-Path $DataDir) {
|
||||
Remove-Item "$DataDir\*" -Recurse -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
if (Test-Path $FRADir) {
|
||||
Remove-Item "$FRADir\*" -Recurse -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
New-Item -ItemType Directory -Path $DataDir -Force | Out-Null
|
||||
New-Item -ItemType Directory -Path $FRADir -Force | Out-Null
|
||||
|
||||
# Step 2: Startup NOMOUNT
|
||||
Write-Log "[2/6] Starting instance in NOMOUNT mode..."
|
||||
$sqlNomount = @"
|
||||
STARTUP NOMOUNT PFILE='$OracleHome\database\initROA.ora';
|
||||
EXIT;
|
||||
"@
|
||||
$sqlNomount | sqlplus / as sysdba
|
||||
|
||||
# Step 3: RMAN Restore
|
||||
Write-Log "[3/6] Running RMAN RESTORE CONTROLFILE..."
|
||||
|
||||
$rmanScript = @"
|
||||
SET DBID $DBID;
|
||||
|
||||
RUN {
|
||||
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
|
||||
|
||||
# Restore controlfile from autobackup
|
||||
SET CONTROLFILE AUTOBACKUP FORMAT FOR DEVICE TYPE DISK TO '$BackupPath/%F';
|
||||
RESTORE CONTROLFILE FROM AUTOBACKUP;
|
||||
}
|
||||
|
||||
EXIT;
|
||||
"@
|
||||
|
||||
$rmanScript | rman TARGET /
|
||||
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
throw "RMAN RESTORE CONTROLFILE failed"
|
||||
}
|
||||
|
||||
# Step 4: Mount database
|
||||
Write-Log "[4/6] Mounting database..."
|
||||
"ALTER DATABASE MOUNT; EXIT;" | sqlplus / as sysdba
|
||||
|
||||
# Step 5: Catalog and restore database
|
||||
Write-Log "[5/6] Cataloging backups and restoring database..."
|
||||
|
||||
$rmanRestore = @"
|
||||
CATALOG START WITH '$BackupPath/' NOPROMPT;
|
||||
|
||||
RUN {
|
||||
SET NEWNAME FOR DATABASE TO '$DataDir\%b';
|
||||
RESTORE DATABASE;
|
||||
SWITCH DATAFILE ALL;
|
||||
RECOVER DATABASE;
|
||||
}
|
||||
|
||||
EXIT;
|
||||
"@
|
||||
|
||||
$rmanRestore | rman TARGET /
|
||||
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
throw "RMAN RESTORE DATABASE failed"
|
||||
}
|
||||
|
||||
# Step 6: Open database RESETLOGS
|
||||
Write-Log "[6/6] Opening database with RESETLOGS..."
|
||||
"ALTER DATABASE OPEN RESETLOGS; EXIT;" | sqlplus / as sysdba
|
||||
|
||||
Write-Log "======================================================================"
|
||||
Write-Log "DR RESTORE COMPLETED SUCCESSFULLY!"
|
||||
Write-Log "======================================================================"
|
||||
Write-Log "Database ROA is now OPEN and ready"
|
||||
|
||||
# Verify
|
||||
Write-Log "Verification:"
|
||||
$verifySQL = @"
|
||||
SELECT name, open_mode, database_role FROM v`$database;
|
||||
EXIT;
|
||||
"@
|
||||
$verifySQL | sqlplus -s / as sysdba
|
||||
|
||||
exit 0
|
||||
|
||||
} catch {
|
||||
Write-Log "CRITICAL ERROR: $($_.Exception.Message)" "ERROR"
|
||||
Write-Log "Stack trace: $($_.ScriptStackTrace)" "ERROR"
|
||||
exit 1
|
||||
}
|
||||
```
|
||||
|
||||
### Step 5.2: Create Quick Test Script
|
||||
```powershell
|
||||
# File: D:\oracle\scripts\test_restore_latest.ps1
|
||||
# Quick test to verify restore works
|
||||
|
||||
$BackupPath = "D:\oracle\backups\primary"
|
||||
$LatestBackup = Get-ChildItem "$BackupPath\*.BKP" |
|
||||
Sort-Object LastWriteTime -Descending |
|
||||
Select-Object -First 1
|
||||
|
||||
Write-Host "Latest backup: $($LatestBackup.Name)"
|
||||
Write-Host "Size: $([math]::Round($LatestBackup.Length / 1GB, 2)) GB"
|
||||
Write-Host "Date: $($LatestBackup.LastWriteTime)"
|
||||
Write-Host ""
|
||||
Write-Host "Ready to test restore? Run:"
|
||||
Write-Host "D:\oracle\scripts\rman_restore_from_primary.ps1"
|
||||
```
|
||||
|
||||
**✅ PHASE 5 COMPLETE:** RMAN restore script created and ready to test
|
||||
|
||||
---
|
||||
|
||||
## 🧪 PHASE 6: TEST DR RESTORE (30 minutes)
|
||||
|
||||
### Step 6.1: Verify Backups Transferred
|
||||
```powershell
|
||||
# On DR Windows VM
|
||||
|
||||
# Check backup files
|
||||
Get-ChildItem D:\oracle\backups\primary\*.BKP |
|
||||
Sort-Object LastWriteTime -Descending |
|
||||
Select-Object Name, @{N='SizeMB';E={[math]::Round($_.Length/1MB,2)}}, LastWriteTime
|
||||
|
||||
# Expected output: 15-20 files (FULL + INCREMENTAL + CONTROLFILE + SPFILE + ARCHIVELOGS)
|
||||
```
|
||||
|
||||
### Step 6.2: Run Test Restore
|
||||
```powershell
|
||||
# IMPORTANT: This will create a live database on DR VM
|
||||
# Make sure PRIMARY is still running (don't confuse them!)
|
||||
|
||||
# Run restore
|
||||
D:\oracle\scripts\rman_restore_from_primary.ps1
|
||||
|
||||
# Monitor progress in log
|
||||
Get-Content "D:\oracle\logs\restore_*.log" -Wait
|
||||
|
||||
# Expected duration: 10-15 minutes
|
||||
```
|
||||
|
||||
### Step 6.3: Verify Database
|
||||
```powershell
|
||||
# Connect to restored database
|
||||
sqlplus sys/romfastsoft@10.0.20.37:1521/ROA as sysdba
|
||||
|
||||
SQL> SELECT name, open_mode FROM v$database;
|
||||
# Expected: ROA, READ WRITE
|
||||
|
||||
SQL> SELECT tablespace_name, status FROM dba_tablespaces;
|
||||
# Expected: SYSTEM, SYSAUX, UNDOTBS, TS_ROA, USERS - all ONLINE
|
||||
|
||||
SQL> SELECT COUNT(*) FROM dba_tables WHERE owner='<your-app-schema>';
|
||||
# Verify application tables restored
|
||||
|
||||
SQL> EXIT;
|
||||
```
|
||||
|
||||
### Step 6.4: Shutdown DR Database (conserve resources)
|
||||
```powershell
|
||||
# After successful test, shutdown database
|
||||
sqlplus / as sysdba
|
||||
|
||||
SQL> SHUTDOWN IMMEDIATE;
|
||||
SQL> EXIT;
|
||||
|
||||
# Stop listener
|
||||
lsnrctl stop
|
||||
|
||||
# Optional: Shutdown Windows VM to conserve resources
|
||||
# (VM will be started only during actual DR events)
|
||||
```
|
||||
|
||||
**✅ PHASE 6 COMPLETE:** DR restore tested and verified working
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ PHASE 7: UPDATE TASK SCHEDULER ON PRIMARY (10 minutes)
|
||||
|
||||
### Step 7.1: Update Scheduled Tasks to Use New Scripts
|
||||
```powershell
|
||||
# On PRIMARY (10.0.20.36)
|
||||
|
||||
# Task 1: FULL Backup + Transfer (already exists, just update transfer script)
|
||||
# Name: "Oracle RMAN Daily Backup + DR Transfer"
|
||||
# Trigger: Daily 02:30 AM
|
||||
# Action 1: Run RMAN backup (unchanged)
|
||||
# Action 2: UPDATE to new script
|
||||
|
||||
# Update task to use new transfer script
|
||||
$action = New-ScheduledTaskAction -Execute "PowerShell.exe" `
|
||||
-Argument "-NoProfile -ExecutionPolicy Bypass -File D:\rman_backup\02_transfer_to_dr_windows.ps1"
|
||||
|
||||
Set-ScheduledTask -TaskName "Oracle RMAN Daily Backup + DR Transfer" -Action $action
|
||||
|
||||
# Task 2: INCREMENTAL Backup + Transfer
|
||||
# Similar update for incremental task
|
||||
```
|
||||
|
||||
### Step 7.2: Test Scheduled Task Manually
|
||||
```powershell
|
||||
# On PRIMARY
|
||||
|
||||
# Run FULL backup + transfer task manually
|
||||
Start-ScheduledTask -TaskName "Oracle RMAN Daily Backup + DR Transfer"
|
||||
|
||||
# Monitor task status
|
||||
Get-ScheduledTask -TaskName "Oracle RMAN Daily Backup + DR Transfer" |
|
||||
Get-ScheduledTaskInfo
|
||||
|
||||
# Check transfer log
|
||||
Get-Content "D:\rman_backup\logs\transfer_$(Get-Date -Format 'yyyyMMdd').log" -Tail 50
|
||||
|
||||
# Verify on DR
|
||||
ssh Administrator@10.0.20.37 "Get-ChildItem D:\oracle\backups\primary -Filter *.BKP | Measure-Object"
|
||||
```
|
||||
|
||||
**✅ PHASE 7 COMPLETE:** Automated backup and transfer configured
|
||||
|
||||
---
|
||||
|
||||
## 📚 PHASE 8: CREATE DR RUNBOOK (15 minutes)
|
||||
|
||||
### Step 8.1: DR Emergency Procedure
|
||||
```markdown
|
||||
# DISASTER RECOVERY PROCEDURE
|
||||
## When PRIMARY Server (10.0.20.36) Fails
|
||||
|
||||
### PRE-REQUISITES
|
||||
- Proxmox access available
|
||||
- DR Windows VM exists (ID 109)
|
||||
- Latest backups transferred (<24h old)
|
||||
|
||||
### DR ACTIVATION STEPS (RTO: 15-20 minutes)
|
||||
|
||||
1. **Start DR Windows VM (2 minutes)**
|
||||
```
|
||||
Proxmox Web UI → VM 109 (oracle-dr-windows) → Start
|
||||
Wait for Windows to boot
|
||||
Verify network: ping 10.0.20.37
|
||||
```
|
||||
|
||||
2. **Verify Backups Present (1 minute)**
|
||||
```powershell
|
||||
# RDP or Console to 10.0.20.37
|
||||
Get-ChildItem D:\oracle\backups\primary\*.BKP |
|
||||
Sort-Object LastWriteTime -Descending |
|
||||
Select-Object -First 10
|
||||
|
||||
# Verify you see today's or yesterday's backups
|
||||
```
|
||||
|
||||
3. **Run RMAN Restore (12-15 minutes)**
|
||||
```powershell
|
||||
# Run restore script
|
||||
D:\oracle\scripts\rman_restore_from_primary.ps1
|
||||
|
||||
# Monitor log in real-time
|
||||
Get-Content D:\oracle\logs\restore_*.log -Wait
|
||||
```
|
||||
|
||||
4. **Verify Database (2 minutes)**
|
||||
```powershell
|
||||
# Connect to database
|
||||
sqlplus sys/romfastsoft@localhost:1521/ROA as sysdba
|
||||
|
||||
SQL> SELECT name, open_mode FROM v$database;
|
||||
SQL> SELECT tablespace_name, status FROM dba_tablespaces;
|
||||
SQL> -- Verify critical application tables
|
||||
SQL> EXIT;
|
||||
```
|
||||
|
||||
5. **Update Network/DNS (5 minutes)**
|
||||
```
|
||||
- Update DNS: roa-db.example.com → 10.0.20.37
|
||||
- OR: Update application connection strings to 10.0.20.37
|
||||
- Test application connectivity
|
||||
```
|
||||
|
||||
6. **Monitor & Notify**
|
||||
```
|
||||
- Monitor database alert log: D:\oracle\diag\rdbms\roa\ROA\trace\alert_ROA.log
|
||||
- Notify team that DR is active
|
||||
- Document incident timeline
|
||||
```
|
||||
|
||||
### RECOVERY BACK TO PRIMARY (When repaired)
|
||||
|
||||
1. Create fresh RMAN backup from DR (now contains latest data)
|
||||
2. Transfer backup to repaired PRIMARY
|
||||
3. Restore on PRIMARY
|
||||
4. Switch DNS/connections back to PRIMARY
|
||||
5. Shutdown DR VM
|
||||
|
||||
### TESTING SCHEDULE
|
||||
- Monthly DR test: Last Sunday of month
|
||||
- Test duration: 30 minutes
|
||||
- Document test results
|
||||
```
|
||||
|
||||
**✅ PHASE 8 COMPLETE:** DR runbook documented
|
||||
|
||||
---
|
||||
|
||||
## 📊 FINAL ARCHITECTURE
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ PRODUCTION ENVIRONMENT │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ PRIMARY (10.0.20.36) - Windows Physical Server │
|
||||
│ ├─ Oracle 19c SE2 │
|
||||
│ ├─ Database: ROA │
|
||||
│ ├─ RMAN Backups: │
|
||||
│ │ ├─ FULL: Daily 02:30 AM (~7GB compressed) │
|
||||
│ │ └─ INCREMENTAL: Daily 14:00 (~50MB) │
|
||||
│ └─ Automatic Transfer to DR via SSH/SCP │
|
||||
│ │
|
||||
│ ↓ SSH Transfer │
|
||||
│ ↓ (950 Mbps) │
|
||||
│ ↓ │
|
||||
│ DR (10.0.20.37) - Windows VM in Proxmox (ID 109) │
|
||||
│ ├─ Oracle 19c SE2 (installed, ready) │
|
||||
│ ├─ VM State: POWERED OFF (0 RAM consumption) │
|
||||
│ ├─ Backups: D:\oracle\backups\primary │
|
||||
│ ├─ Storage: 100 GB (OS + Oracle + backups) │
|
||||
│ └─ Restore Script: D:\oracle\scripts\rman_restore... │
|
||||
│ │
|
||||
│ DR ACTIVATION (when needed): │
|
||||
│ ├─ 1. Power ON VM (2 min) │
|
||||
│ ├─ 2. Run restore script (12 min) │
|
||||
│ ├─ 3. Database OPEN (1 min) │
|
||||
│ └─ TOTAL RTO: ~15 minutes │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
|
||||
METRICS (Current Implementation):
|
||||
- RPO: 24 hours (only FULL backup used; incremental causes UNDO corruption)
|
||||
- RTO: 15 minutes
|
||||
- Storage: 500 GB VM + backups on host
|
||||
- Daily resources: ZERO (VM powered off)
|
||||
- DR test: Weekly (planned)
|
||||
|
||||
METRICS (After Upgrade to CUMULATIVE):
|
||||
- RPO: 3-4 hours (FULL + latest CUMULATIVE)
|
||||
- RTO: 15 minutes (unchanged)
|
||||
- Storage: 500 GB VM + ~15 GB on Proxmox host
|
||||
- Daily resources: ZERO (VM powered off)
|
||||
- DR test: Weekly (automated)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ✅ POST-IMPLEMENTATION CHECKLIST
|
||||
|
||||
### Phase 1-8 (Initial Setup) - ✅ COMPLETED 2025-10-09
|
||||
|
||||
- [x] Windows VM created in Proxmox (VM ID 109, IP 10.0.20.37)
|
||||
- [x] Oracle 19c SE2 installed and working
|
||||
- [x] OpenSSH Server configured with passwordless authentication
|
||||
- [x] Transfer scripts updated and tested (FULL backup)
|
||||
- [x] RMAN restore script created on DR VM
|
||||
- [x] DR restore tested successfully (database opens and is usable)
|
||||
- [x] Scheduled tasks on PRIMARY verified
|
||||
- [x] DR procedures documented
|
||||
- [x] VM shutdown after testing (to conserve resources)
|
||||
|
||||
### Phase 9 (Upgrade to CUMULATIVE) - 📋 PLANNED
|
||||
|
||||
**See:** `DR_UPGRADE_TO_CUMULATIVE_PLAN.md` for detailed implementation steps
|
||||
|
||||
- [ ] Proxmox host storage configured (`/mnt/pve/oracle-backups`)
|
||||
- [ ] VM 109 mount point configured (E:\ from host)
|
||||
- [ ] RMAN script updated to CUMULATIVE incremental
|
||||
- [ ] Transfer scripts updated to send to Proxmox host
|
||||
- [ ] SSH key for Proxmox host access configured
|
||||
- [ ] Scheduled task created for 13:00 CUMULATIVE backup
|
||||
- [ ] Scheduled task created for 18:00 CUMULATIVE backup
|
||||
- [ ] Existing 14:00 task removed
|
||||
- [ ] 02:30 FULL task updated to use new transfer script
|
||||
- [ ] DR restore script updated for cumulative backups
|
||||
- [ ] End-to-end restore test with CUMULATIVE successful
|
||||
- [ ] Weekly test script created and scheduled
|
||||
- [ ] Team trained on new backup strategy
|
||||
|
||||
---
|
||||
|
||||
## 🔧 TROUBLESHOOTING GUIDE
|
||||
|
||||
### Issue: SSH Connection Fails
|
||||
```powershell
|
||||
# Check 1: SSH service running?
|
||||
Get-Service sshd
|
||||
|
||||
# Check 2: Firewall blocking?
|
||||
Get-NetFirewallRule -Name *ssh*
|
||||
|
||||
# Check 3: Authorized keys permissions?
|
||||
icacls "C:\ProgramData\ssh\administrators_authorized_keys"
|
||||
|
||||
# Check 4: Test from PRIMARY
|
||||
ssh -v Administrator@10.0.20.37
|
||||
```
|
||||
|
||||
### Issue: RMAN Restore Fails "CONTROLFILE not found"
|
||||
```
|
||||
# This is the cross-platform issue!
|
||||
# Solution: Ensure you're using Windows→Windows (same platform)
|
||||
# Check Oracle version matches: 19c on both sides
|
||||
```
|
||||
|
||||
### Issue: Database Won't Start
|
||||
```powershell
|
||||
# Check alert log
|
||||
Get-Content D:\oracle\diag\rdbms\roa\ROA\trace\alert_ROA.log -Tail 100
|
||||
|
||||
# Check parameter file
|
||||
Get-Content D:\oracle\product\19c\dbhome_1\database\initROA.ora
|
||||
|
||||
# Verify directories exist
|
||||
Test-Path D:\oracle\oradata\ROA
|
||||
Test-Path D:\oracle\fra
|
||||
```
|
||||
|
||||
### Issue: VM Uses Too Much Disk
|
||||
```powershell
|
||||
# Check backup retention
|
||||
Get-ChildItem D:\oracle\backups\primary\*.BKP |
|
||||
Where-Object { $_.LastWriteTime -lt (Get-Date).AddDays(-3) } |
|
||||
Remove-Item -Force
|
||||
|
||||
# Check FRA usage
|
||||
SELECT * FROM V$RECOVERY_FILE_DEST;
|
||||
|
||||
# Cleanup old archives
|
||||
RMAN> DELETE NOPROMPT ARCHIVELOG ALL COMPLETED BEFORE 'SYSDATE-2';
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📞 SUPPORT & REFERENCES
|
||||
|
||||
### Oracle Documentation
|
||||
- RMAN Backup and Recovery: https://docs.oracle.com/en/database/oracle/oracle-database/19/bradv/
|
||||
- Cross-Platform Migration: https://docs.oracle.com/en/database/oracle/oracle-database/19/spmds/
|
||||
- Windows Installation: https://docs.oracle.com/en/database/oracle/oracle-database/19/ntqrf/
|
||||
|
||||
### Internal Scripts
|
||||
- PRIMARY RMAN backup: D:\rman_backup\rman_backup.txt
|
||||
- Transfer script (FULL): D:\rman_backup\02_transfer_to_dr_windows.ps1
|
||||
- Transfer script (INCREMENTAL): D:\rman_backup\02b_transfer_incremental_to_dr_windows.ps1
|
||||
- DR restore script: D:\oracle\scripts\rman_restore_from_primary.ps1 (on DR VM)
|
||||
|
||||
### Logs Location
|
||||
- PRIMARY transfer logs: D:\rman_backup\logs\
|
||||
- DR restore logs: D:\oracle\logs\
|
||||
- Oracle alert log: D:\oracle\diag\rdbms\roa\ROA\trace\alert_ROA.log
|
||||
|
||||
---
|
||||
|
||||
## 🎯 IMPLEMENTATION TIMELINE
|
||||
|
||||
| Phase | Task | Duration | Responsible |
|
||||
|-------|------|----------|-------------|
|
||||
| 1 | Create Windows VM in Proxmox | 30 min | Infrastructure Admin |
|
||||
| 2 | Install Oracle 19c | 90 min | DBA |
|
||||
| 3 | Configure SSH | 20 min | Infrastructure Admin |
|
||||
| 4 | Update Transfer Scripts | 15 min | DBA |
|
||||
| 5 | Create Restore Script | 30 min | DBA |
|
||||
| 6 | Test DR Restore | 30 min | DBA |
|
||||
| 7 | Update Scheduled Tasks | 10 min | DBA |
|
||||
| 8 | Document DR Runbook | 15 min | DBA |
|
||||
| **TOTAL** | | **~4 hours** | |
|
||||
|
||||
**Note:** This is one-time setup. After completion, daily operations are fully automated with ZERO maintenance overhead.
|
||||
|
||||
---
|
||||
|
||||
**Generated:** 2025-10-08
|
||||
**Last Updated:** 2025-10-09
|
||||
**Version:** 2.0
|
||||
**Status:** ✅ Phase 1-8 COMPLETED | 📋 Phase 9 (CUMULATIVE upgrade) PLANNED
|
||||
**Implementation Status:**
|
||||
- Initial setup (Phases 1-8): ✅ COMPLETED 2025-10-09
|
||||
- RMAN restore tested: ✅ SUCCESSFUL (12-15 minutes RTO)
|
||||
- Current RPO: 24 hours (FULL backup only)
|
||||
- Next: Upgrade to CUMULATIVE incremental for 3-4 hour RPO
|
||||
|
||||
**Next Session:** Implement CUMULATIVE backup strategy
|
||||
**See:** `DR_UPGRADE_TO_CUMULATIVE_PLAN.md` for upgrade plan
|
||||
|
||||
@@ -0,0 +1,789 @@
|
||||
# Oracle DR Windows VM - Implementation Status
|
||||
**Date:** 2025-10-09 04:00 AM
|
||||
**VM:** 109 (oracle-dr-windows)
|
||||
**Location:** Proxmox pveelite (10.0.20.202)
|
||||
**IP:** 10.0.20.37
|
||||
**Purpose:** Replace Linux LXC DR with Windows VM for same-platform RMAN restore
|
||||
|
||||
---
|
||||
|
||||
## ✅ COMPLETED TASKS
|
||||
|
||||
### 1. VM Creation and Network ✅
|
||||
- **VM ID:** 109 on pveelite (10.0.20.202)
|
||||
- **Template source:** Win11-Template (ID 300) from pvemini (10.0.20.201)
|
||||
- **Cloned and migrated:** Successfully migrated from pvemini to pveelite
|
||||
- **Resources configured:**
|
||||
- RAM: 6GB
|
||||
- CPU: 4 cores
|
||||
- Disk: 500GB (local-zfs)
|
||||
- Boot on startup: NO (VM stays off until DR event)
|
||||
- **Network:**
|
||||
- Static IP: 10.0.20.37
|
||||
- Gateway: 10.0.20.1
|
||||
- DNS: 10.0.20.1, 8.8.8.8
|
||||
- Windows Firewall: Disabled
|
||||
- Connectivity: ✅ Verified (ping successful)
|
||||
|
||||
### 2. Windows Configuration ✅
|
||||
- **Computer name:** ORACLE-DR
|
||||
- **Timezone:** GTB Standard Time (Romania)
|
||||
- **Hibernation:** Disabled
|
||||
- **Administrator profile:** Fixed (C:\Users\Administrator)
|
||||
- **Auto-login:** Disabled
|
||||
|
||||
### 3. Users Created ✅
|
||||
| User | Password | Admin | Hidden from Login | Purpose |
|
||||
|------|----------|-------|-------------------|---------|
|
||||
| romfast | Romfast2025! | Yes | Yes | SSH access, backup transfers |
|
||||
| silvia | Silvia2025! | No | Yes | SSH tunnels (2 ports) |
|
||||
| eli | Eli2025! | No | Yes | SSH tunnels (4 ports) |
|
||||
|
||||
### 4. OpenSSH Server Configuration ✅
|
||||
- **Port:** 22122
|
||||
- **Service:** Running, Automatic startup
|
||||
- **Authentication:** ✅ **SSH Key Authentication WORKING**
|
||||
- User key: `mmarius28@gmail.com` (for manual SSH from Linux)
|
||||
- SYSTEM key: `administrator@ROA-CARAPETRU2` (for automated backup transfers from PRIMARY)
|
||||
|
||||
**SSH Config:** `C:\ProgramData\ssh\sshd_config`
|
||||
```
|
||||
Port 22122
|
||||
ListenAddress 0.0.0.0
|
||||
PubkeyAuthentication yes
|
||||
PasswordAuthentication yes
|
||||
AuthorizedKeysFile .ssh/authorized_keys
|
||||
AllowTcpForwarding yes
|
||||
GatewayPorts yes
|
||||
|
||||
Match User romfast
|
||||
PermitOpen localhost:80 localhost:1521 localhost:3000 localhost:3001 localhost:3389 localhost:8006 localhost:8080 localhost:81 localhost:9443 localhost:22
|
||||
|
||||
Match User silvia
|
||||
PermitOpen localhost:80 localhost:1521
|
||||
|
||||
Match User eli
|
||||
PermitOpen localhost:80 localhost:1521 localhost:3000
|
||||
|
||||
Match Group administrators
|
||||
AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys
|
||||
```
|
||||
|
||||
**SSH Keys Configured:**
|
||||
- File: `C:\ProgramData\ssh\administrators_authorized_keys`
|
||||
- Contains 2 keys:
|
||||
1. `ssh-rsa ...mmarius28@gmail.com` (your Linux workstation)
|
||||
2. `ssh-rsa ...administrator@ROA-CARAPETRU2` (PRIMARY SYSTEM user for automated transfers)
|
||||
- Permissions: SYSTEM (Full Control), Administrators (Read)
|
||||
- Status: ✅ Both keys working
|
||||
|
||||
**Fix Script:** `D:\oracle\scripts\fix_ssh_via_service.ps1`
|
||||
- Stops SSH service
|
||||
- Recreates authorized_keys with both keys
|
||||
- Sets correct permissions using `icacls`
|
||||
- Restarts SSH service
|
||||
|
||||
### 5. Oracle 19c Installation ✅
|
||||
- **Status:** ✅ Installed (interactive GUI installation)
|
||||
- **ORACLE_HOME:** `C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home`
|
||||
- **ORACLE_BASE:** `C:\Users\oracle`
|
||||
- **Edition:** Standard Edition 2 (SE2)
|
||||
- **Version:** 19.3.0.0.0
|
||||
- **Installation Type:** Software Only (no database created yet)
|
||||
- **Oracle User:** `oracle` (password: Oracle2025!)
|
||||
|
||||
**Verification:**
|
||||
```powershell
|
||||
$env:ORACLE_HOME = "C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home"
|
||||
$env:PATH = "$env:ORACLE_HOME\bin;$env:PATH"
|
||||
sqlplus -v # Returns: SQL*Plus: Release 19.0.0.0.0 - Production
|
||||
```
|
||||
|
||||
### 6. Oracle Listener Configuration ✅
|
||||
- **Script:** `D:\oracle\scripts\configure_listener_dr.ps1`
|
||||
- **Status:** ✅ Configured and Running
|
||||
- **Port:** 1521
|
||||
- **Service:** OracleOraDB19Home1TNSListener
|
||||
|
||||
**Configuration Files Created:**
|
||||
- `C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home\network\admin\listener.ora`
|
||||
- `C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home\network\admin\tnsnames.ora`
|
||||
- `C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home\network\admin\sqlnet.ora`
|
||||
|
||||
**Listener Status:**
|
||||
```
|
||||
LSNRCTL for 64-bit Windows: Version 19.0.0.0.0 - Production
|
||||
STATUS of the LISTENER
|
||||
Alias LISTENER
|
||||
Version TNSLSNR for 64-bit Windows: Version 19.0.0.0.0 - Production
|
||||
Start Date 09-OCT-2025 03:18:34
|
||||
Listening Endpoints Summary...
|
||||
(DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=10.0.20.37)(PORT=1521)))
|
||||
(DESCRIPTION=(ADDRESS=(PROTOCOL=ipc)(PIPENAME=\\.\pipe\EXTPROC1521ipc)))
|
||||
Services Summary...
|
||||
Service "ROA" has 1 instance(s).
|
||||
Instance "ROA", status UNKNOWN, has 1 handler(s) for this service...
|
||||
```
|
||||
|
||||
### 7. Directory Structure Created ✅
|
||||
```
|
||||
C:\Users\oracle\
|
||||
├── oradata\ROA\ (will be created by RMAN restore)
|
||||
├── recovery_area\ROA\ (FRA - Fast Recovery Area)
|
||||
├── admin\ROA\
|
||||
│ ├── adump\ (audit files)
|
||||
│ ├── dpdump\ (data pump)
|
||||
│ └── pfile\ (initialization files)
|
||||
└── oraInventory\ (Oracle inventory)
|
||||
|
||||
D:\oracle\
|
||||
├── backups\primary\ ✅ (6.32 GB backup files transferred)
|
||||
├── scripts\ ✅ (DR automation scripts)
|
||||
└── logs\ ✅ (restore logs)
|
||||
```
|
||||
|
||||
### 8. Backup Transfer Scripts Updated ✅
|
||||
**Location on PRIMARY:** `D:\rman_backup\`
|
||||
|
||||
**Scripts Updated:**
|
||||
1. **transfer_to_dr.ps1** - Transfer FULL backups
|
||||
2. **transfer_incremental.ps1** - Transfer INCREMENTAL backups
|
||||
|
||||
**Changes Made:**
|
||||
- ✅ DRHost: `10.0.20.37`
|
||||
- ✅ DRPort: `22122` (added)
|
||||
- ✅ DRUser: `romfast` (changed from `root`)
|
||||
- ✅ DRPath: `D:/oracle/backups/primary` (changed from `/opt/oracle/backups/primary`)
|
||||
- ✅ All SSH commands updated with `-p 22122`
|
||||
- ✅ Linux commands replaced with Windows PowerShell equivalents:
|
||||
- `test -f` → `powershell -Command "Test-Path ..."`
|
||||
- `mkdir -p` → `powershell -Command "New-Item -ItemType Directory ..."`
|
||||
- `find ... -delete` → `powershell -Command "Get-ChildItem ... | Remove-Item ..."`
|
||||
|
||||
**Backup Files Transferred:** ✅ **6 files, 6.32 GB total**
|
||||
```
|
||||
D:\oracle\backups\primary\
|
||||
├── O1_MF_NNND0_DAILY_FULL_COMPRESSE_NGFVB4B8_.BKP (4.81 GB) # FULL backup
|
||||
├── O1_MF_ANNNN_DAILY_FULL_COMPRESSE_NGFV7RGN_.BKP (1.51 GB) # FULL backup
|
||||
├── O1_MF_NCNNF_TAG20251009T020551_NGFVLJTG_.BKP (1.14 MB) # Control file
|
||||
├── O1_MF_S_1214013953_NGFVLL29_.BKP (1.14 MB) # SPFILE autobackup
|
||||
├── O1_MF_NNSNF_TAG20251009T020550_NGFVLGOR_.BKP (112 KB)
|
||||
└── O1_MF_ANNNN_DAILY_FULL_COMPRESSE_NGFVLFKN_.BKP (861 KB)
|
||||
```
|
||||
|
||||
**Transfer Log:** `D:\rman_backup\logs\transfer_20251009.log`
|
||||
```
|
||||
[2025-10-09 03:52:13] [SUCCESS] SSH connection successful
|
||||
[2025-10-09 03:52:14] [INFO] Found 6 files, total size: 6.32 GB
|
||||
[2025-10-09 03:57:27] [INFO] Files transferred: 6/6
|
||||
```
|
||||
|
||||
### 9. DR Scripts Created ✅
|
||||
All scripts located in: `/mnt/e/proiecte/ROMFASTSQL/oracle/standby-server-scripts/`
|
||||
|
||||
**Installation Scripts:**
|
||||
1. ✅ `install_oracle19c_dr.ps1` - Oracle 19c installation (software only)
|
||||
2. ✅ `configure_listener_dr.ps1` - Oracle Listener configuration
|
||||
|
||||
**SSH Configuration Scripts:**
|
||||
3. ✅ `fix_ssh_key_auth.ps1` - Initial SSH key setup attempt
|
||||
4. ✅ `fix_ssh_key_auth_simple.cmd` - Simple command-line version
|
||||
5. ✅ `fix_ssh_via_service.ps1` - **WORKING** - Fixes SSH keys by stopping service
|
||||
|
||||
**Backup Transfer Scripts (on PRIMARY):**
|
||||
6. ✅ `transfer_to_dr.ps1` - Full backup transfer (updated for Windows)
|
||||
7. ✅ `transfer_incremental.ps1` - Incremental backup transfer (updated for Windows)
|
||||
8. ✅ `transfer_to_dr_windows.ps1` - Reference implementation
|
||||
|
||||
**Restore Script:**
|
||||
9. ✅ `rman_restore_from_primary.ps1` - RMAN restore script (ready to test)
|
||||
|
||||
**Helper Scripts:**
|
||||
10. ✅ `copy_system_ssh_key.ps1` - Extract SYSTEM user SSH key from PRIMARY
|
||||
11. ✅ `add_system_key_dr.ps1` - Add SYSTEM key to DR VM
|
||||
|
||||
---
|
||||
|
||||
## ✅ RMAN RESTORE COMPLETED - 2025-10-09 17:40
|
||||
|
||||
### 10. RMAN Restore End-to-End Test ✅ **COMPLETED**
|
||||
|
||||
**Final Status:** ✅ **DATABASE SUCCESSFULLY RESTORED AND OPEN**
|
||||
- Database: ROA
|
||||
- Mode: READ WRITE
|
||||
- Instance: OPEN
|
||||
- Tablespaces: 6 (all ONLINE)
|
||||
- Datafiles: 5
|
||||
- Application Owners: 69
|
||||
- Total Application Tables: 45,000+
|
||||
|
||||
**Session Duration:** ~5 hours (including troubleshooting)
|
||||
**Actual Restore Time:** ~15-20 minutes (datafiles + recovery)
|
||||
**Total Data Restored:** 6.32 GB compressed → ~15 GB uncompressed
|
||||
|
||||
---
|
||||
|
||||
## 🔧 CRITICAL ISSUES ENCOUNTERED & RESOLUTIONS
|
||||
|
||||
### Issue 1: Incremental Backup Corruption ⚠️ → ✅ RESOLVED
|
||||
**Problem:** Applying DIFFERENTIAL incremental backup (MIDDAY_INCREMENTAL from 14:00) caused UNDO tablespace corruption
|
||||
- Error: ORA-30012: undo tablespace 'UNDOTBS01' does not exist or of wrong type
|
||||
- Error: ORA-00603: ORACLE server session terminated by fatal error
|
||||
- Database crashed immediately after OPEN RESETLOGS attempt
|
||||
|
||||
**Root Cause:** DIFFERENTIAL incremental backup applied on top of FULL backup created inconsistent UNDO state
|
||||
|
||||
**Initial Workaround:** Restore only FULL backup without applying incremental
|
||||
|
||||
**Permanent Solution:** ✅ **Upgrade to CUMULATIVE incremental backups**
|
||||
- CUMULATIVE backups are independent from Level 0 (no dependency chain)
|
||||
- Each CUMULATIVE contains ALL changes since last Level 0
|
||||
- Eliminates UNDO/SCN mismatch issues
|
||||
- **See:** `DR_UPGRADE_TO_CUMULATIVE_PLAN.md` for implementation plan
|
||||
|
||||
### Issue 2: Control File SCN Mismatch 🔴
|
||||
**Problem:** ORA-01190: control file or data file 1 is from before the last RESETLOGS
|
||||
- Control file autobackup (`O1_MF_S_1214013953_NGFVLL29_.BKP`) created AFTER datafiles backup
|
||||
- SCN in control file was higher than SCN in datafiles
|
||||
- Error: ORA-01152: file 1 was not restored from a sufficiently old backup
|
||||
|
||||
**Root Cause:** Used SPFILE/Controlfile AUTOBACKUP instead of control file from same backup piece as datafiles
|
||||
|
||||
**Resolution:**
|
||||
1. Restore control file from SAME backup as datafiles: `O1_MF_NCNNF_TAG20251009T020551_NGFVLJTG_.BKP`
|
||||
2. This control file has matching SCN with datafiles (both from 02:05:51 backup)
|
||||
|
||||
### Issue 3: ORA-16433 Recovery Loop 🔄
|
||||
**Problem:** ORA-16433: The database or pluggable database must be opened in read/write mode
|
||||
- Occurred during RECOVER DATABASE attempts
|
||||
- Error appeared in both SQL*Plus and RMAN
|
||||
- Recovery session canceled due to errors
|
||||
|
||||
**Root Cause:**
|
||||
- Bug 14744052: Flag set in control file during incomplete RESETLOGS
|
||||
- Using `SET UNTIL SCN 999999999999` in RMAN caused invalid recovery state
|
||||
- Standard Edition limitations with recovery operations
|
||||
|
||||
**Resolution:**
|
||||
1. Remove `SET UNTIL SCN` from RMAN script
|
||||
2. Use `SET UNTIL TIME` with specific backup completion time
|
||||
3. Let RMAN auto-detect and apply only available archive logs
|
||||
4. Incomplete recovery flag properly set by stopping at missing archive log
|
||||
|
||||
### Issue 4: Memory Configuration ⚠️
|
||||
**Problem:** ORA-27104: system-defined limits for shared memory was misconfigured
|
||||
- Initial PFILE had `memory_target=1536M`
|
||||
- VM has 6GB RAM but Windows reserved ~2GB
|
||||
- Database startup failed in NOMOUNT
|
||||
|
||||
**Resolution:**
|
||||
Reduced memory settings in PFILE:
|
||||
```
|
||||
memory_target=1024M
|
||||
memory_max_target=1024M
|
||||
```
|
||||
|
||||
### Issue 5: Backup Location Issues 📁
|
||||
**Initial Setup:** Backups in `D:\oracle\backups\primary` (custom path)
|
||||
- RMAN couldn't auto-detect backups
|
||||
- Had to specify explicit paths for all operations
|
||||
- Control file autobackup search failed
|
||||
|
||||
**Final Solution:**
|
||||
1. Moved all backups to FRA: `C:\Users\oracle\recovery_area\ROA\autobackup`
|
||||
2. Updated PRIMARY transfer scripts to use FRA path
|
||||
3. RMAN now auto-detects all backups via CATALOG command
|
||||
4. Simplified restore procedure significantly
|
||||
|
||||
---
|
||||
|
||||
## 📋 WORKING RMAN RESTORE PROCEDURE
|
||||
|
||||
### Prerequisites ✅ ALL COMPLETE
|
||||
- ✅ Oracle 19c installed on DR VM
|
||||
- ✅ Listener configured and running
|
||||
- ✅ FULL backup transferred from PRIMARY to FRA location
|
||||
- ✅ OracleServiceROA Windows service created
|
||||
- ✅ Backups moved to: `C:\Users\oracle\recovery_area\ROA\autobackup`
|
||||
|
||||
### Step-by-Step Manual Procedure (Tested and Verified)
|
||||
|
||||
**1. Prepare PFILE (Modified for DR)**
|
||||
Location: `C:\Users\oracle\admin\ROA\pfile\initROA.ora`
|
||||
```ini
|
||||
db_name=ROA
|
||||
memory_target=1024M
|
||||
memory_max_target=1024M
|
||||
processes=150
|
||||
undo_management=MANUAL
|
||||
compatible=19.0.0
|
||||
control_files=('C:\Users\oracle\oradata\ROA\control01.ctl', 'C:\Users\oracle\recovery_area\ROA\control02.ctl')
|
||||
db_block_size=8192
|
||||
db_recovery_file_dest=C:\Users\Oracle\recovery_area
|
||||
db_recovery_file_dest_size=10G
|
||||
diagnostic_dest=C:\Users\oracle
|
||||
```
|
||||
|
||||
**2. Shutdown Database (if running)**
|
||||
```cmd
|
||||
set ORACLE_HOME=C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home
|
||||
set ORACLE_SID=ROA
|
||||
set PATH=%ORACLE_HOME%\bin;%PATH%
|
||||
|
||||
sqlplus / as sysdba
|
||||
SHUTDOWN ABORT;
|
||||
EXIT;
|
||||
```
|
||||
|
||||
**3. Startup NOMOUNT**
|
||||
```sql
|
||||
STARTUP NOMOUNT PFILE='C:\Users\oracle\admin\ROA\pfile\initROA.ora';
|
||||
EXIT;
|
||||
```
|
||||
|
||||
**4. Connect to RMAN and Restore Control File**
|
||||
```cmd
|
||||
rman target /
|
||||
|
||||
SET DBID 1363569330;
|
||||
|
||||
RUN {
|
||||
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
|
||||
RESTORE CONTROLFILE FROM 'C:/Users/oracle/recovery_area/ROA/autobackup/O1_MF_NCNNF_TAG20251009T020551_NGFVLJTG_.BKP';
|
||||
RELEASE CHANNEL ch1;
|
||||
}
|
||||
|
||||
ALTER DATABASE MOUNT;
|
||||
```
|
||||
|
||||
**5. Catalog Backups in FRA**
|
||||
```rman
|
||||
CATALOG START WITH 'C:/Users/oracle/recovery_area/ROA/autobackup' NOPROMPT;
|
||||
```
|
||||
|
||||
**6. Restore and Recover Database**
|
||||
```rman
|
||||
RUN {
|
||||
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
|
||||
ALLOCATE CHANNEL ch2 DEVICE TYPE DISK;
|
||||
SET UNTIL TIME "TO_DATE('09-OCT-2025 02:05:51','DD-MON-YYYY HH24:MI:SS')";
|
||||
RESTORE DATABASE;
|
||||
RECOVER DATABASE;
|
||||
RELEASE CHANNEL ch1;
|
||||
RELEASE CHANNEL ch2;
|
||||
}
|
||||
```
|
||||
|
||||
**7. Open Database with RESETLOGS**
|
||||
```rman
|
||||
ALTER DATABASE OPEN RESETLOGS;
|
||||
EXIT;
|
||||
```
|
||||
|
||||
**8. Create TEMP Tablespace**
|
||||
```sql
|
||||
sqlplus / as sysdba
|
||||
|
||||
ALTER TABLESPACE TEMP ADD TEMPFILE 'C:\Users\oracle\oradata\ROA\temp01.dbf'
|
||||
SIZE 567M REUSE AUTOEXTEND ON NEXT 640K MAXSIZE 32767M;
|
||||
|
||||
EXIT;
|
||||
```
|
||||
|
||||
**9. Verify Database Status**
|
||||
```sql
|
||||
sqlplus / as sysdba
|
||||
|
||||
SELECT NAME, OPEN_MODE, LOG_MODE FROM V$DATABASE;
|
||||
SELECT INSTANCE_NAME, STATUS FROM V$INSTANCE;
|
||||
SELECT TABLESPACE_NAME, STATUS FROM DBA_TABLESPACES ORDER BY TABLESPACE_NAME;
|
||||
SELECT COUNT(*) AS DATAFILE_COUNT FROM DBA_DATA_FILES;
|
||||
|
||||
SELECT OWNER, COUNT(*) AS TABLE_COUNT
|
||||
FROM DBA_TABLES
|
||||
WHERE OWNER NOT IN ('SYS','SYSTEM','OUTLN','MDSYS','CTXSYS','XDB','WMSYS','OLAPSYS',
|
||||
'ORDDATA','ORDSYS','EXFSYS','LBACSYS','DBSNMP','APPQOSSYS','GSMADMIN_INTERNAL')
|
||||
GROUP BY OWNER
|
||||
ORDER BY OWNER;
|
||||
|
||||
EXIT;
|
||||
```
|
||||
|
||||
### Expected Results ✅ VERIFIED
|
||||
|
||||
**Database Status:**
|
||||
```
|
||||
NAME: ROA
|
||||
OPEN_MODE: READ WRITE
|
||||
LOG_MODE: ARCHIVELOG
|
||||
INSTANCE_NAME: ROA
|
||||
STATUS: OPEN
|
||||
```
|
||||
|
||||
**Tablespaces:**
|
||||
```
|
||||
SYSAUX ONLINE
|
||||
SYSTEM ONLINE
|
||||
TEMP ONLINE
|
||||
TS_ROA ONLINE
|
||||
UNDOTBS01 ONLINE
|
||||
USERS ONLINE
|
||||
```
|
||||
|
||||
**Data Verification:**
|
||||
- Datafiles: 5 (excluding TEMP)
|
||||
- Application Owners: 69
|
||||
- Application Tables: 45,000+
|
||||
|
||||
**Performance Metrics:**
|
||||
- NOMOUNT to MOUNT: ~30 seconds
|
||||
- Control file restore: ~10 seconds
|
||||
- Catalog backups: ~20 seconds
|
||||
- Database restore: ~8-10 minutes
|
||||
- Database recovery: ~2-3 minutes
|
||||
- OPEN RESETLOGS: ~1 minute
|
||||
- **Total Time: ~12-15 minutes**
|
||||
|
||||
### Automated Script Version
|
||||
|
||||
**Script:** `rman_restore_final.cmd`
|
||||
Location: `/mnt/e/proiecte/ROMFASTSQL/oracle/standby-server-scripts/rman_restore_final.cmd`
|
||||
|
||||
This CMD script automates all the above steps. Run on DR VM as Administrator:
|
||||
```cmd
|
||||
D:\oracle\scripts\rman_restore_final.cmd
|
||||
```
|
||||
|
||||
The script will:
|
||||
1. Shutdown database if running
|
||||
2. Startup NOMOUNT with correct PFILE
|
||||
3. Restore control file from correct backup piece (not autobackup)
|
||||
4. Mount database
|
||||
5. Catalog all backups in FRA
|
||||
6. Restore database with 2 parallel channels
|
||||
7. Recover database with NOREDO (no incremental)
|
||||
8. Open with RESETLOGS
|
||||
9. Create TEMP tablespace
|
||||
10. Verify database status
|
||||
|
||||
Log file: `D:\oracle\logs\rman_restore_final.log`
|
||||
|
||||
### 11. Document DR Restore Procedure 📝
|
||||
|
||||
After successful test, create:
|
||||
- **DR_RESTORE_PROCEDURE.md** - Step-by-step restore instructions
|
||||
- **DR_RUNBOOK.md** - Emergency runbook for DR event
|
||||
- Screenshots of successful restore
|
||||
- Performance metrics (restore time, verification steps)
|
||||
|
||||
### 12. Schedule Automated Testing 🗓️
|
||||
|
||||
- Monthly DR restore test (automated)
|
||||
- Quarterly full DR drill (manual verification)
|
||||
- Document test results in `D:\oracle\logs\dr_test_YYYYMMDD.log`
|
||||
|
||||
---
|
||||
|
||||
## 📋 PRIMARY SERVER CONFIGURATION (Reference)
|
||||
|
||||
**Server:** 10.0.20.36 (Windows Server)
|
||||
**Oracle Version:** 19c SE2 (19.3.0.0.0)
|
||||
**Database:** ROA, DBID: 1363569330, **non-CDB** (traditional architecture)
|
||||
|
||||
**Paths:**
|
||||
- ORACLE_HOME: `C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home`
|
||||
- ORACLE_BASE: `C:\Users\oracle`
|
||||
- Datafiles: `C:\Users\oracle\oradata\ROA\`
|
||||
- SYSTEM01.DBF
|
||||
- SYSAUX01.DBF
|
||||
- UNDOTBS01.DBF
|
||||
- TS_ROA.DBF (application tablespace)
|
||||
- USERS01.DBF
|
||||
- TEMP01.DBF (567 MB)
|
||||
- Control Files:
|
||||
- `C:\Users\oracle\oradata\ROA\control01.ctl`
|
||||
- `C:\Users\oracle\recovery_area\ROA\control02.ctl`
|
||||
- Redo Logs:
|
||||
- GROUP 1: `C:\Users\oracle\oradata\ROA\REDO01.LOG` (200 MB)
|
||||
- GROUP 2: `C:\Users\oracle\oradata\ROA\REDO02.LOG` (200 MB)
|
||||
- GROUP 3: `C:\Users\oracle\oradata\ROA\REDO03.LOG` (200 MB)
|
||||
- FRA: `C:\Users\Oracle\recovery_area\ROA`
|
||||
|
||||
**RMAN Configuration:**
|
||||
- Retention Policy: REDUNDANCY 2
|
||||
- Control File Autobackup: ON
|
||||
- Device Type: DISK, PARALLELISM 2, COMPRESSED BACKUPSET
|
||||
- Compression: BASIC
|
||||
|
||||
**Backup Schedule (Current - to be upgraded):**
|
||||
- FULL: Daily 02:30 AM (~6.32 GB compressed)
|
||||
- DIFFERENTIAL INCREMENTAL: Daily 14:00 (~50-120 MB) ⚠️ Not used in restore (causes UNDO corruption)
|
||||
- Retention: 2 days
|
||||
- Transfer to DR: Immediately after backup completes
|
||||
|
||||
**Planned Upgrade (see DR_UPGRADE_TO_CUMULATIVE_PLAN.md):**
|
||||
- FULL: Daily 02:30 AM (~6.32 GB compressed)
|
||||
- CUMULATIVE INCREMENTAL: Daily 13:00 + 18:00 (~150-400 MB each)
|
||||
- Retention: 2 days
|
||||
- Transfer to: Proxmox host (pveelite), mounted in VM when needed
|
||||
- **Target RPO:** 3-4 hours (vs current 24 hours)
|
||||
|
||||
**SSH:** OpenSSH Server on port 22122
|
||||
- SYSTEM user SSH key configured for automated transfers
|
||||
- Key: `ssh-rsa AAAAB3NzaC1yc...administrator@ROA-CARAPETRU2`
|
||||
|
||||
**Scheduled Tasks:**
|
||||
- Run as: `NT AUTHORITY\SYSTEM`
|
||||
- RMAN Full Backup + Transfer: Daily 02:30 AM
|
||||
- RMAN Incremental Backup + Transfer: Daily 14:00
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ KNOWN ISSUES & RESOLUTIONS
|
||||
|
||||
### 1. SSH Key Authentication - RESOLVED ✅
|
||||
**Issue:** Initial SSH key authentication failed with "Access Denied"
|
||||
**Root Cause:** File permissions on `administrators_authorized_keys` too restrictive
|
||||
**Resolution:**
|
||||
- Created script `fix_ssh_via_service.ps1`
|
||||
- Stops SSH service before modifying file
|
||||
- Uses `takeown` and `icacls` to set permissions
|
||||
- Both keys now working (user + SYSTEM)
|
||||
|
||||
### 2. Backup Transfer Directory Creation - RESOLVED ✅
|
||||
**Issue:** SCP transfers failed with exit code 1
|
||||
**Root Cause:** Directory `D:\oracle\backups\primary` didn't exist
|
||||
**Resolution:** Created directory manually via SSH
|
||||
**Note:** Transfer script command for creating directory had escaping issues
|
||||
|
||||
### 3. Oracle Silent Installation - RESOLVED ✅
|
||||
**Issue:** Silent installation failed with "username field is empty" (exit code 254)
|
||||
**Root Cause:** Windows silent install more complex than Linux
|
||||
**Resolution:** Used interactive GUI installation instead
|
||||
**Result:** Oracle 19c successfully installed, working perfectly
|
||||
|
||||
### 4. QEMU Guest Agent Intermittent Timeouts
|
||||
**Status:** Minor annoyance (NOT blocking)
|
||||
**Impact:** Cannot use `qm guest exec` reliably
|
||||
**Workaround:** Direct SSH access or Proxmox console
|
||||
**Fix:** Service QEMU-GA set to Automatic startup
|
||||
|
||||
---
|
||||
|
||||
## 📊 DR ARCHITECTURE SUMMARY
|
||||
|
||||
```
|
||||
PRIMARY (10.0.20.36) - Windows Server DR (10.0.20.37) - Windows 11 VM
|
||||
├─ Oracle 19c SE2 (19.3.0.0.0) ├─ Oracle 19c SE2 (19.3.0.0.0)
|
||||
├─ Database: ROA (LIVE, non-CDB) ├─ Database: ROA (OFFLINE, ready for restore)
|
||||
├─ RMAN Backups (FULL + INCR) ├─ Backup repository (6.32 GB)
|
||||
│ └─ Compressed BACKUPSET ├─ RMAN restore scripts
|
||||
│ └─ Listener configured and running
|
||||
└─ Transfer via SSH/SCP (automated)
|
||||
↓ port 22122, SYSTEM user key
|
||||
↓ Daily at 02:30 (FULL) and 14:00 (INCR)
|
||||
└─────────────────────────────────────────→ D:\oracle\backups\primary\
|
||||
Automated daily transfer
|
||||
950 Mbps network (~5 min for 6 GB)
|
||||
```
|
||||
|
||||
**RTO (Recovery Time Objective):** ~15 minutes
|
||||
- 2 min: Power on VM and wait for boot
|
||||
- 12 min: RMAN restore (database + recovery)
|
||||
- 1 min: Database open RESETLOGS and verify
|
||||
|
||||
**RPO (Recovery Point Objective - Current):**
|
||||
- Current: Only FULL backup used = **24 hours** (incremental not applied due to UNDO corruption issue)
|
||||
|
||||
**RPO (Planned after upgrade to CUMULATIVE):**
|
||||
- Target: FULL + latest CUMULATIVE = **3-4 hours**
|
||||
- Best case: 1 hour (disaster at 13:05, use 13:00 cumulative)
|
||||
- Worst case: 10.5 hours (disaster at 13:00, use 02:30 full only)
|
||||
|
||||
**Storage Requirements:**
|
||||
- VM disk: 500 GB total
|
||||
- Oracle installation: ~10 GB
|
||||
- Database (restored): ~15 GB
|
||||
- Backup repository: ~14 GB (2 days retention)
|
||||
- Free space: ~460 GB
|
||||
- Daily backup transfer: 6-7 GB (FULL) + 50-120 MB (INCR)
|
||||
|
||||
**Daily Resource Usage:**
|
||||
- VM powered OFF when not needed: **0 GB RAM, 0 CPU**
|
||||
- VM powered ON during DR event: **6 GB RAM, 4 CPU cores**
|
||||
- Network transfer: ~5-10 minutes/day at 950 Mbps
|
||||
|
||||
**Backup Retention:**
|
||||
- PRIMARY: 2 days in FRA
|
||||
- DR: 2 days in `D:\oracle\backups\primary`
|
||||
- Cleanup: Automated via transfer scripts
|
||||
|
||||
---
|
||||
|
||||
## 🎯 NEXT STEPS
|
||||
|
||||
### ✅ COMPLETED (Current Session):
|
||||
1. ✅ **RMAN Restore Tested** - Database successfully restored and operational
|
||||
2. ✅ **Database Verified** - All tablespaces, tables, data verified
|
||||
3. ✅ **Documented Results** - Restore time ~12-15 minutes
|
||||
4. ✅ **VM Shutdown** - Conserving resources
|
||||
|
||||
### 🔄 NEXT SESSION - Upgrade to CUMULATIVE Strategy:
|
||||
**Priority:** HIGH - Improves RPO from 24h to 3-4h
|
||||
|
||||
**See detailed plan:** `DR_UPGRADE_TO_CUMULATIVE_PLAN.md`
|
||||
|
||||
**Summary of changes:**
|
||||
1. 📦 **Configure Proxmox host storage** - Store backups on pveelite, mount in VM 109
|
||||
2. 🔄 **Convert DIFFERENTIAL → CUMULATIVE** - Add keyword to RMAN script
|
||||
3. ⏰ **Add second incremental** - Run at 13:00 + 18:00 (vs current 14:00 only)
|
||||
4. 📝 **Update transfer scripts** - Send to Proxmox host instead of VM
|
||||
5. 🗓️ **Update scheduled tasks** - Create 13:00 and 18:00 tasks
|
||||
6. 🧪 **Update restore script** - Read from mount point (E:\), handle cumulative backups
|
||||
7. ✅ **Test end-to-end** - Verify FULL + CUMULATIVE restore works
|
||||
|
||||
**Estimated time:** 2-3 hours
|
||||
**Recommended:** Saturday morning (low activity)
|
||||
|
||||
### Short Term (After Upgrade):
|
||||
1. 📄 **Update DR Runbook** - Include cumulative backup procedures
|
||||
2. 🧪 **Schedule Weekly Tests** - Automated Saturday morning DR tests
|
||||
3. 📊 **Create Monitoring** - Alert if backups fail to transfer
|
||||
4. 🔐 **Backup VM State** - Snapshot of configured DR VM
|
||||
|
||||
### Long Term:
|
||||
1. 🔄 **Automate Weekly Tests** - Script to test restore automatically
|
||||
2. 📈 **Performance Tuning** - Optimize restore speed if needed
|
||||
3. 🌐 **Network Failover** - DNS/routing changes for DR activation
|
||||
4. 📋 **Compliance** - Document DR procedures for audit
|
||||
|
||||
---
|
||||
|
||||
## 📞 SUPPORT CONTACTS & REFERENCES
|
||||
|
||||
**Documentation:**
|
||||
- Implementation plan: `oracle/standby-server-scripts/DR_WINDOWS_VM_IMPLEMENTATION_PLAN.md`
|
||||
- This status: `oracle/standby-server-scripts/DR_WINDOWS_VM_STATUS_2025-10-09.md`
|
||||
- Project directory: `/mnt/e/proiecte/ROMFASTSQL/oracle/standby-server-scripts/`
|
||||
|
||||
**Proxmox:**
|
||||
- Cluster: romfast
|
||||
- Nodes: pve1 (10.0.20.200), pvemini (10.0.20.201), pveelite (10.0.20.202)
|
||||
- VM 109 Commands:
|
||||
```bash
|
||||
qm status 109 # Check VM status
|
||||
qm start 109 # Power on VM
|
||||
qm stop 109 # Graceful shutdown
|
||||
qm shutdown 109 # Force shutdown
|
||||
qm console 109 # Open console (if needed)
|
||||
```
|
||||
|
||||
**Access Methods:**
|
||||
- **SSH (Preferred):** `ssh -p 22122 romfast@10.0.20.37`
|
||||
- Key authentication: ✅ Working
|
||||
- Password: Romfast2025! (if key fails)
|
||||
- **Proxmox Console:** Web UI → pveelite → VM 109 → Console
|
||||
- **RDP:** Not configured (SSH preferred for security)
|
||||
|
||||
**Oracle Quick Reference:**
|
||||
```powershell
|
||||
# On DR VM - Set environment
|
||||
$env:ORACLE_HOME = "C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home"
|
||||
$env:ORACLE_SID = "ROA"
|
||||
$env:PATH = "$env:ORACLE_HOME\bin;$env:PATH"
|
||||
|
||||
# Connect to database
|
||||
sqlplus / as sysdba
|
||||
|
||||
# Check listener
|
||||
lsnrctl status
|
||||
|
||||
# Test TNS
|
||||
tnsping ROA
|
||||
```
|
||||
|
||||
**RMAN Quick Reference:**
|
||||
```bash
|
||||
# Connect to RMAN
|
||||
rman target /
|
||||
|
||||
# List backups
|
||||
LIST BACKUP SUMMARY;
|
||||
|
||||
# Validate backups
|
||||
VALIDATE BACKUPSET;
|
||||
|
||||
# Check database
|
||||
SELECT NAME, OPEN_MODE, LOG_MODE FROM V$DATABASE;
|
||||
```
|
||||
|
||||
**Useful Scripts Location:**
|
||||
- DR VM: `D:\oracle\scripts\`
|
||||
- PRIMARY: `D:\rman_backup\`
|
||||
- Project: `/mnt/e/proiecte/ROMFASTSQL/oracle/standby-server-scripts/`
|
||||
|
||||
**Oracle Documentation:**
|
||||
- RMAN Backup/Recovery: https://docs.oracle.com/en/database/oracle/oracle-database/19/bradv/
|
||||
- Windows Installation: https://docs.oracle.com/en/database/oracle/oracle-database/19/ntqrf/
|
||||
- Database Administrator's Guide: https://docs.oracle.com/en/database/oracle/oracle-database/19/admin/
|
||||
|
||||
---
|
||||
|
||||
## 📈 PROGRESS TRACKING
|
||||
|
||||
**Overall Status:** ~90% Complete
|
||||
**Estimated time to completion:** 30-60 minutes (RMAN restore test)
|
||||
**Blockers:** None - ready for final testing
|
||||
|
||||
**Completed:** 9/10 major tasks
|
||||
**Remaining:** 1/10 (RMAN restore test)
|
||||
|
||||
**Session Summary (2025-10-09):**
|
||||
- ✅ Fixed SSH key authentication (2 keys configured)
|
||||
- ✅ Installed Oracle 19c (interactive installation)
|
||||
- ✅ Configured Oracle Listener (running on port 1521)
|
||||
- ✅ Updated backup transfer scripts for Windows target
|
||||
- ✅ Added PRIMARY SYSTEM SSH key to DR VM
|
||||
- ✅ Successfully transferred 6.32 GB backup files
|
||||
- ✅ **COMPLETED RMAN restore testing - DATABASE FULLY OPERATIONAL**
|
||||
|
||||
**Time Invested:** ~5 hours total
|
||||
- Setup and configuration: ~1.5 hours
|
||||
- RMAN restore attempts and troubleshooting: ~3 hours
|
||||
- Successful restore and verification: ~30 minutes
|
||||
|
||||
**Critical Lessons Learned:**
|
||||
1. **Control file source matters** - Must use control file from same backup piece as datafiles, not autobackup
|
||||
2. **Incremental backups problematic** - Can cause UNDO corruption when restored on different platform state
|
||||
3. **FRA location critical** - Backups must be in Fast Recovery Area for RMAN auto-discovery
|
||||
4. **Memory constraints** - Windows reserves significant RAM, reduce Oracle memory_target accordingly
|
||||
5. **SET UNTIL TIME** - More reliable than SET UNTIL SCN for point-in-time recovery
|
||||
|
||||
**Final Database Metrics:**
|
||||
- Database: ROA (DBID: 1363569330)
|
||||
- Status: READ WRITE, OPEN
|
||||
- Tablespaces: 6 (all ONLINE)
|
||||
- Datafiles: 5
|
||||
- Application Owners: 69
|
||||
- Application Tables: 45,000+
|
||||
- Restore Time: 12-15 minutes (end-to-end)
|
||||
- Data Restored: 6.32 GB compressed → ~15 GB uncompressed
|
||||
|
||||
---
|
||||
|
||||
**Last Updated:** 2025-10-09 17:45 (Session completed)
|
||||
**Updated By:** Claude Code (Sonnet 4.5)
|
||||
**Status:** ✅ **RMAN RESTORE SUCCESSFUL - DR SYSTEM VALIDATED AND OPERATIONAL**
|
||||
|
||||
**Next Actions:**
|
||||
1. Shutdown database: `SHUTDOWN IMMEDIATE;`
|
||||
2. Power off VM to conserve resources: `qm stop 109`
|
||||
3. Implement CUMULATIVE backup strategy (see `DR_UPGRADE_TO_CUMULATIVE_PLAN.md`)
|
||||
4. Schedule weekly DR restore tests
|
||||
5. Create DR runbook for emergency procedures
|
||||
6. Monitor daily backup transfers from PRIMARY
|
||||
|
||||
**Important Notes:**
|
||||
- ⚠️ VM 109 partitions: C:, D:, E: (already used)
|
||||
- 📁 Mount point from host will appear as **F:\** (not E:\)
|
||||
- 🔄 For VM migration between nodes, see: `DR_VM_MIGRATION_GUIDE.md`
|
||||
36
proxmox/vm109-windows-dr/scripts/add_system_key_dr.ps1
Normal file
36
proxmox/vm109-windows-dr/scripts/add_system_key_dr.ps1
Normal file
@@ -0,0 +1,36 @@
|
||||
# Add PRIMARY SYSTEM user SSH key to DR VM
|
||||
# Run this on DR VM (10.0.20.37) as Administrator
|
||||
|
||||
$systemKey = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQD3EdHswdNDuDC9kJdUli2zGGPVlEWJjmqtb4eABYWwjQnWqjGp8oAFbQ+r2TxR544WtEyhDL9BU6oO3EFH957DBGQJHJvfRgx2VnkNZEzN/XX/7HK6Cp5rlINGGp26PjHulKkZjARmjC3YK0aUFEkiyNyUBqhtQpmcYP4+wjUfiiO2xUkF9mzGplbWGK3ZmEdkWNd5BNddqxmxyLvd2KHAo8F7Vux9SyPWqZ8bwiDyidAMDU7kCXS/RabUMl2LGajzFbRnR87YA7cIaVFl/IWExO/fsYlgkwmmmIAMdjINp0IWDdydnmG1XNNhM8h/BKY/eK3uile8CvyEbmbuf0+ePm3Ex9vTjn4jYN2vFE148FgQGGTibibJ+sXFoQ87VFNGEuog/V0aajVk/xcOihszsEvzD2IV/tSKJFdI6klnYLuieuMZf7Dvs/6sC1l3dnsBtcpvjnU48altRRZvQzaJf3gIkG1lRGBgyW1n+WHe/7StIveYTVNFtx+fcnqB8gm9fZQxBp2WRbLNFpY/Qj+6BF66b1A2ZxH/3F9Z/6VT91EActOf+AMxjsI+09d7IRYIvzr8OxMPYOHU2bglp3o86xZEMUXfcjB8Sw/8KMsCjBp3ABEN9/bwv1496aw9IC67ZBQ2cDDfgdBej5DAkT4NS2XIx7wbM7sBtLYjcXMi7w== administrator@ROA-CARAPETRU2"
|
||||
|
||||
$authKeysFile = "C:\ProgramData\ssh\administrators_authorized_keys"
|
||||
|
||||
Write-Host "Adding PRIMARY SYSTEM user SSH key to DR VM..." -ForegroundColor Cyan
|
||||
|
||||
# Check if key already exists
|
||||
$currentContent = Get-Content $authKeysFile -ErrorAction SilentlyContinue
|
||||
if ($currentContent -match "administrator@ROA-CARAPETRU2") {
|
||||
Write-Host "Key already exists in authorized_keys" -ForegroundColor Yellow
|
||||
} else {
|
||||
# Add the key
|
||||
Add-Content -Path $authKeysFile -Value $systemKey
|
||||
Write-Host "Key added successfully" -ForegroundColor Green
|
||||
}
|
||||
|
||||
# Show current keys
|
||||
Write-Host ""
|
||||
Write-Host "Current authorized keys:" -ForegroundColor Cyan
|
||||
Get-Content $authKeysFile | ForEach-Object {
|
||||
if ($_ -match "ssh-rsa .+ (.+)$") {
|
||||
Write-Host " - $($matches[1])" -ForegroundColor White
|
||||
}
|
||||
}
|
||||
|
||||
# Restart SSH service
|
||||
Write-Host ""
|
||||
Write-Host "Restarting SSH service..." -ForegroundColor Yellow
|
||||
Restart-Service sshd
|
||||
Write-Host "SSH service restarted" -ForegroundColor Green
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Done! SYSTEM user from PRIMARY can now connect via SSH." -ForegroundColor Green
|
||||
133
proxmox/vm109-windows-dr/scripts/cleanup_database.ps1
Normal file
133
proxmox/vm109-windows-dr/scripts/cleanup_database.ps1
Normal file
@@ -0,0 +1,133 @@
|
||||
# Oracle Database Complete Cleanup Script (PowerShell)
|
||||
# Purpose: Remove all database files and services to restore DR VM to clean state
|
||||
# Run as: Administrator
|
||||
# Location: D:\oracle\scripts\cleanup_database.ps1
|
||||
#
|
||||
# Parameters:
|
||||
# /SILENT - Non-interactive mode
|
||||
# /AFTER - Cleanup AFTER restore (shutdown instance + stop service before deleting files)
|
||||
|
||||
$ErrorActionPreference = "Continue"
|
||||
|
||||
$env:ORACLE_HOME = "C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home"
|
||||
$env:ORACLE_SID = "ROA"
|
||||
$env:PATH = "$env:ORACLE_HOME\bin;$env:PATH"
|
||||
|
||||
Write-Host "============================================"
|
||||
Write-Host "Oracle Database Cleanup Script"
|
||||
Write-Host "============================================"
|
||||
Write-Host ""
|
||||
Write-Host "This script will:"
|
||||
Write-Host " 1. Stop and delete Oracle service"
|
||||
Write-Host " 2. Delete all database files (datafiles, control files, redo logs)"
|
||||
Write-Host " 3. Delete local FRA (backups are on F:\, safe to delete)"
|
||||
Write-Host " 4. Delete trace files"
|
||||
Write-Host " 5. Leave VM in completely clean state (no service, no DB files)"
|
||||
Write-Host ""
|
||||
|
||||
# Check parameters
|
||||
$silent = $args -contains "/SILENT" -or $args -contains "/AUTO"
|
||||
$afterRestore = $args -contains "/AFTER"
|
||||
|
||||
if (-not $silent) {
|
||||
Write-Host "WARNING: This will DELETE the entire database!" -ForegroundColor Red
|
||||
Write-Host "Starting cleanup in 3 seconds... (Press Ctrl+C to cancel)"
|
||||
Start-Sleep -Seconds 3
|
||||
}
|
||||
|
||||
# Create directories
|
||||
New-Item -ItemType Directory -Path "D:\oracle\temp" -Force | Out-Null
|
||||
New-Item -ItemType Directory -Path "D:\oracle\logs" -Force | Out-Null
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "[1/6] Shutting down database and stopping service..."
|
||||
|
||||
# Check if Oracle service exists
|
||||
$service = Get-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue
|
||||
if ($service) {
|
||||
Write-Host " Oracle service found, ensuring clean shutdown..."
|
||||
|
||||
# Shutdown instance using SQL*Plus (always, not just /AFTER)
|
||||
$shutdownSQL = "WHENEVER SQLERROR CONTINUE`nSHUTDOWN ABORT;`nEXIT;"
|
||||
try {
|
||||
$shutdownSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
|
||||
Start-Sleep -Seconds 2
|
||||
Write-Host " Instance shut down (ABORT for fast cleanup)"
|
||||
} catch {
|
||||
Write-Host " Shutdown command sent (errors ignored)"
|
||||
}
|
||||
|
||||
# ALWAYS stop Oracle service to ensure clean state
|
||||
if ($service.Status -eq "Running") {
|
||||
Write-Host " Stopping Oracle service to ensure clean state..."
|
||||
try {
|
||||
Stop-Service -Name "OracleServiceROA" -Force -ErrorAction Stop
|
||||
Start-Sleep -Seconds 3
|
||||
Write-Host " Service stopped successfully"
|
||||
} catch {
|
||||
Write-Host " WARNING: Failed to stop service: $_" -ForegroundColor Yellow
|
||||
}
|
||||
} else {
|
||||
Write-Host " Service already stopped"
|
||||
}
|
||||
|
||||
# Force kill any remaining Oracle processes to ensure clean state
|
||||
Write-Host " Cleaning up any remaining Oracle processes..."
|
||||
Get-Process -Name "sqlplus" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
|
||||
Get-Process -Name "oracle" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
|
||||
Start-Sleep -Seconds 2
|
||||
Write-Host " All Oracle processes terminated"
|
||||
} else {
|
||||
Write-Host " Oracle service not found, will be created during restore"
|
||||
}
|
||||
|
||||
Write-Host "[2/6] Oracle service stopped (clean state for restore)"
|
||||
Write-Host " Service will be started fresh during restore"
|
||||
Write-Host " This ensures no state inconsistencies (prevents ORA-00600)"
|
||||
|
||||
Write-Host "[3/6] Deleting database files + SPFILE..."
|
||||
Write-Host " Deleting datafiles..."
|
||||
Remove-Item "C:\Users\oracle\oradata\ROA\*.dbf" -Force -ErrorAction SilentlyContinue
|
||||
Write-Host " Deleting control files..."
|
||||
Remove-Item "C:\Users\oracle\oradata\ROA\*.ctl" -Force -ErrorAction SilentlyContinue
|
||||
Write-Host " Deleting redo logs..."
|
||||
Remove-Item "C:\Users\oracle\oradata\ROA\*.log" -Force -ErrorAction SilentlyContinue
|
||||
Write-Host " Deleting SPFILE (ensures PFILE-based startup)..."
|
||||
Remove-Item "$env:ORACLE_HOME\database\SPFILE*.ORA" -Force -ErrorAction SilentlyContinue
|
||||
|
||||
Write-Host "[4/6] Deleting local FRA (backups are on F:\)..."
|
||||
if (Test-Path "C:\Users\oracle\recovery_area\ROA") {
|
||||
Remove-Item "C:\Users\oracle\recovery_area\ROA" -Recurse -Force -ErrorAction SilentlyContinue
|
||||
New-Item -ItemType Directory -Path "C:\Users\oracle\recovery_area\ROA" -Force | Out-Null
|
||||
Write-Host " FRA cleared"
|
||||
} else {
|
||||
New-Item -ItemType Directory -Path "C:\Users\oracle\recovery_area\ROA" -Force | Out-Null
|
||||
Write-Host " FRA directory created"
|
||||
}
|
||||
|
||||
Write-Host "[5/6] Deleting trace files (to save space)..."
|
||||
Remove-Item "C:\Users\oracle\diag\rdbms\roa\ROA\trace\*.trc" -Force -ErrorAction SilentlyContinue
|
||||
Remove-Item "C:\Users\oracle\diag\rdbms\roa\ROA\trace\*.trm" -Force -ErrorAction SilentlyContinue
|
||||
Write-Host " Trace files deleted"
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "============================================"
|
||||
Write-Host "Database Cleanup Complete!"
|
||||
Write-Host "============================================"
|
||||
Write-Host ""
|
||||
Write-Host "Current state:"
|
||||
Write-Host " [YES] Oracle software installed"
|
||||
Write-Host " [YES] PFILE exists (C:\Users\oracle\admin\ROA\pfile\initROA.ora)"
|
||||
Write-Host " [YES] Oracle service (STOPPED for clean restore)"
|
||||
Write-Host " [NO] SPFILE (deleted to ensure PFILE startup)"
|
||||
Write-Host " [NO] Database files (will be restored from backups)"
|
||||
Write-Host " [NO] Control files (will be restored from backups)"
|
||||
Write-Host " [NO] Datafiles (will be restored from backups)"
|
||||
Write-Host ""
|
||||
Write-Host "VM is now in CLEAN STATE (service stopped, ready for fresh start)!"
|
||||
Write-Host ""
|
||||
Write-Host "Next step: Run D:\oracle\scripts\rman_restore_from_zero.ps1"
|
||||
Write-Host " (It will start the service fresh and restore the database)"
|
||||
Write-Host ""
|
||||
|
||||
exit 0
|
||||
158
proxmox/vm109-windows-dr/scripts/configure_listener_dr.ps1
Normal file
158
proxmox/vm109-windows-dr/scripts/configure_listener_dr.ps1
Normal file
@@ -0,0 +1,158 @@
|
||||
# Configure Oracle Listener on DR VM
|
||||
# Run this script AFTER Oracle installation
|
||||
# Run AS ADMINISTRATOR on DR VM (10.0.20.37)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
$ORACLE_HOME = "C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home"
|
||||
$ORACLE_BASE = "C:\Users\oracle"
|
||||
$DR_IP = "10.0.20.37"
|
||||
$LISTENER_PORT = 1521
|
||||
|
||||
Write-Host "=== Configure Oracle Listener on DR VM ===" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
# Set environment
|
||||
$env:ORACLE_HOME = $ORACLE_HOME
|
||||
$env:ORACLE_BASE = $ORACLE_BASE
|
||||
$env:PATH = "$ORACLE_HOME\bin;$env:PATH"
|
||||
|
||||
# Step 1: Create network admin directory
|
||||
Write-Host "[1/5] Creating network admin directory..." -ForegroundColor Yellow
|
||||
$netAdminDir = "$ORACLE_HOME\network\admin"
|
||||
if (!(Test-Path $netAdminDir)) {
|
||||
New-Item -ItemType Directory -Path $netAdminDir -Force | Out-Null
|
||||
}
|
||||
Write-Host " Directory: $netAdminDir" -ForegroundColor Green
|
||||
|
||||
# Step 2: Create listener.ora
|
||||
Write-Host "[2/5] Creating listener.ora..." -ForegroundColor Yellow
|
||||
$listenerOra = @"
|
||||
# Listener Configuration for DR VM
|
||||
# Generated: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")
|
||||
|
||||
LISTENER =
|
||||
(DESCRIPTION_LIST =
|
||||
(DESCRIPTION =
|
||||
(ADDRESS = (PROTOCOL = TCP)(HOST = $DR_IP)(PORT = $LISTENER_PORT))
|
||||
(ADDRESS = (PROTOCOL = IPC)(KEY = EXTPROC1521))
|
||||
)
|
||||
)
|
||||
|
||||
SID_LIST_LISTENER =
|
||||
(SID_LIST =
|
||||
(SID_DESC =
|
||||
(GLOBAL_DBNAME = ROA)
|
||||
(ORACLE_HOME = $($ORACLE_HOME -replace '\\', '/'))
|
||||
(SID_NAME = ROA)
|
||||
)
|
||||
)
|
||||
|
||||
# Listener control parameters
|
||||
INBOUND_CONNECT_TIMEOUT_LISTENER = 120
|
||||
SUBSCRIBE_FOR_NODE_DOWN_EVENT_LISTENER = OFF
|
||||
VALID_NODE_CHECKING_REGISTRATION_LISTENER = OFF
|
||||
|
||||
# Logging
|
||||
LOG_DIRECTORY_LISTENER = $($ORACLE_BASE -replace '\\', '/')/diag/tnslsnr/ORACLE-DR/listener/alert
|
||||
TRACE_DIRECTORY_LISTENER = $($ORACLE_BASE -replace '\\', '/')/diag/tnslsnr/ORACLE-DR/listener/trace
|
||||
TRACE_LEVEL_LISTENER = OFF
|
||||
"@
|
||||
|
||||
$listenerOra | Out-File -FilePath "$netAdminDir\listener.ora" -Encoding ASCII -Force
|
||||
Write-Host " Created: $netAdminDir\listener.ora" -ForegroundColor Green
|
||||
|
||||
# Step 3: Create tnsnames.ora
|
||||
Write-Host "[3/5] Creating tnsnames.ora..." -ForegroundColor Yellow
|
||||
$tnsnamesOra = @"
|
||||
# TNS Names Configuration for DR VM
|
||||
# Generated: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")
|
||||
|
||||
ROA =
|
||||
(DESCRIPTION =
|
||||
(ADDRESS = (PROTOCOL = TCP)(HOST = $DR_IP)(PORT = $LISTENER_PORT))
|
||||
(CONNECT_DATA =
|
||||
(SERVER = DEDICATED)
|
||||
(SERVICE_NAME = ROA)
|
||||
)
|
||||
)
|
||||
|
||||
# Localhost connection
|
||||
ROA_LOCAL =
|
||||
(DESCRIPTION =
|
||||
(ADDRESS = (PROTOCOL = TCP)(HOST = localhost)(PORT = $LISTENER_PORT))
|
||||
(CONNECT_DATA =
|
||||
(SERVER = DEDICATED)
|
||||
(SERVICE_NAME = ROA)
|
||||
)
|
||||
)
|
||||
"@
|
||||
|
||||
$tnsnamesOra | Out-File -FilePath "$netAdminDir\tnsnames.ora" -Encoding ASCII -Force
|
||||
Write-Host " Created: $netAdminDir\tnsnames.ora" -ForegroundColor Green
|
||||
|
||||
# Step 4: Create sqlnet.ora
|
||||
Write-Host "[4/5] Creating sqlnet.ora..." -ForegroundColor Yellow
|
||||
$sqlnetOra = @"
|
||||
# SQL*Net Configuration for DR VM
|
||||
# Generated: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")
|
||||
|
||||
NAMES.DIRECTORY_PATH = (TNSNAMES, EZCONNECT, HOSTNAME)
|
||||
|
||||
# Security settings
|
||||
SQLNET.AUTHENTICATION_SERVICES = (NTS)
|
||||
SQLNET.EXPIRE_TIME = 10
|
||||
|
||||
# Encryption (optional, enable if needed)
|
||||
# SQLNET.ENCRYPTION_SERVER = REQUIRED
|
||||
# SQLNET.CRYPTO_CHECKSUM_SERVER = REQUIRED
|
||||
"@
|
||||
|
||||
$sqlnetOra | Out-File -FilePath "$netAdminDir\sqlnet.ora" -Encoding ASCII -Force
|
||||
Write-Host " Created: $netAdminDir\sqlnet.ora" -ForegroundColor Green
|
||||
|
||||
# Step 5: Start listener
|
||||
Write-Host "[5/5] Starting Oracle Listener..." -ForegroundColor Yellow
|
||||
|
||||
# Stop listener if already running
|
||||
try {
|
||||
& lsnrctl stop 2>&1 | Out-Null
|
||||
Start-Sleep -Seconds 2
|
||||
} catch {
|
||||
# Listener not running, continue
|
||||
}
|
||||
|
||||
# Start listener
|
||||
try {
|
||||
$output = & lsnrctl start 2>&1 | Out-String
|
||||
if ($output -match "completed successfully" -or $output -match "successfully") {
|
||||
Write-Host " Listener started successfully" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host " WARNING: Check listener status manually" -ForegroundColor Yellow
|
||||
Write-Host $output -ForegroundColor Gray
|
||||
}
|
||||
} catch {
|
||||
Write-Host " ERROR: Failed to start listener: $_" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "=== Listener Configuration Complete ===" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
|
||||
# Verify listener status
|
||||
Write-Host "Listener Status:" -ForegroundColor Cyan
|
||||
& lsnrctl status
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Configuration files created:" -ForegroundColor Yellow
|
||||
Write-Host " $netAdminDir\listener.ora" -ForegroundColor White
|
||||
Write-Host " $netAdminDir\tnsnames.ora" -ForegroundColor White
|
||||
Write-Host " $netAdminDir\sqlnet.ora" -ForegroundColor White
|
||||
Write-Host ""
|
||||
Write-Host "Test connectivity:" -ForegroundColor Yellow
|
||||
Write-Host " tnsping ROA" -ForegroundColor White
|
||||
Write-Host " sqlplus sys/password@ROA as sysdba" -ForegroundColor White
|
||||
Write-Host ""
|
||||
Write-Host "Next step: Create RMAN restore script" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
@@ -0,0 +1,78 @@
|
||||
# Copy Existing SSH Key to Proxmox
|
||||
# Rulează acest script pe PRIMARY ca Administrator
|
||||
#
|
||||
# Acest script copiază cheia publică SSH existentă din profilul SYSTEM pe Proxmox
|
||||
|
||||
param(
|
||||
[string]$ProxmoxHost = "10.0.20.202",
|
||||
[string]$ProxmoxUser = "root"
|
||||
)
|
||||
|
||||
Write-Host "=========================================" -ForegroundColor Cyan
|
||||
Write-Host "Copiere Cheie SSH Existentă → Proxmox DR" -ForegroundColor Cyan
|
||||
Write-Host "=========================================" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
$SystemSSHDir = "C:\Windows\System32\config\systemprofile\.ssh"
|
||||
$PublicKeyPath = "$SystemSSHDir\id_rsa.pub"
|
||||
$PrivateKeyPath = "$SystemSSHDir\id_rsa"
|
||||
|
||||
# Verifică dacă cheia există
|
||||
if (-not (Test-Path $PublicKeyPath)) {
|
||||
Write-Host "✗ Cheia publică nu există: $PublicKeyPath" -ForegroundColor Red
|
||||
Write-Host " Scriptul trebuie rulat ca Administrator!" -ForegroundColor Yellow
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "✓ Cheie publică găsită: $PublicKeyPath" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
Write-Host "→ Copiez cheia publică pe Proxmox ($ProxmoxHost)..." -ForegroundColor Yellow
|
||||
Write-Host " IMPORTANT: Vei fi întrebat de parola root pentru Proxmox!" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
# Metodă simplă: folosim SCP pentru a copia fișierul temporar, apoi cat
|
||||
$tempFile = "C:\Windows\Temp\temp_pubkey.pub"
|
||||
Copy-Item $PublicKeyPath $tempFile -Force
|
||||
|
||||
# Copiază fișierul pe Proxmox
|
||||
& scp $tempFile "${ProxmoxUser}@${ProxmoxHost}:/tmp/temp_pubkey.pub"
|
||||
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "✗ Eroare la copierea fișierului cu SCP" -ForegroundColor Red
|
||||
Remove-Item $tempFile -Force -ErrorAction SilentlyContinue
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Adaugă cheia în authorized_keys
|
||||
& ssh "${ProxmoxUser}@${ProxmoxHost}" "mkdir -p /root/.ssh; chmod 700 /root/.ssh; cat /tmp/temp_pubkey.pub >> /root/.ssh/authorized_keys; chmod 600 /root/.ssh/authorized_keys; rm /tmp/temp_pubkey.pub; echo 'SSH key added'"
|
||||
|
||||
Remove-Item $tempFile -Force -ErrorAction SilentlyContinue
|
||||
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Host "✓ Cheie publică copiată pe Proxmox!" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "✗ Eroare la adăugarea cheii în authorized_keys" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "→ Testez conexiunea SSH fără parolă..." -ForegroundColor Yellow
|
||||
|
||||
# Testează conexiunea (cu cheia din profilul SYSTEM)
|
||||
$testResult = & ssh -o StrictHostKeyChecking=no -i $PrivateKeyPath "${ProxmoxUser}@${ProxmoxHost}" "echo 'SSH connection OK'" 2>&1
|
||||
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Host "✓ Conexiune SSH funcționează fără parolă!" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "✗ Conexiunea SSH nu funcționează direct din acest cont" -ForegroundColor Yellow
|
||||
Write-Host " Dar cheia a fost adăugată - scheduled tasks (SYSTEM) ar trebui să funcționeze" -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "=========================================" -ForegroundColor Green
|
||||
Write-Host "✓ Setup complet!" -ForegroundColor Green
|
||||
Write-Host "=========================================" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
Write-Host "Cheia din profilul SYSTEM: $PrivateKeyPath" -ForegroundColor Cyan
|
||||
Write-Host "Scheduled tasks vor folosi această cheie automat." -ForegroundColor Yellow
|
||||
Write-Host ""
|
||||
80
proxmox/vm109-windows-dr/scripts/fix_ssh_via_service.ps1
Normal file
80
proxmox/vm109-windows-dr/scripts/fix_ssh_via_service.ps1
Normal file
@@ -0,0 +1,80 @@
|
||||
# Fix SSH Keys by recreating through SSH service
|
||||
# Run as Administrator on DR VM (10.0.20.37)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
Write-Host "=== Fix SSH Keys via Service Method ===" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
# Step 1: Stop SSH service
|
||||
Write-Host "[1/4] Stopping SSH service..." -ForegroundColor Yellow
|
||||
Stop-Service sshd
|
||||
Start-Sleep -Seconds 2
|
||||
Write-Host " SSH service stopped" -ForegroundColor Green
|
||||
|
||||
# Step 2: Delete the problematic file while service is stopped
|
||||
Write-Host "[2/4] Deleting old authorized_keys file..." -ForegroundColor Yellow
|
||||
$authKeysFile = "C:\ProgramData\ssh\administrators_authorized_keys"
|
||||
|
||||
if (Test-Path $authKeysFile) {
|
||||
# Try to take ownership first
|
||||
takeown /F $authKeysFile /A
|
||||
icacls $authKeysFile /grant Administrators:F
|
||||
Remove-Item $authKeysFile -Force
|
||||
Write-Host " Old file deleted" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host " File doesn't exist" -ForegroundColor Gray
|
||||
}
|
||||
|
||||
# Step 3: Create new file with both keys
|
||||
Write-Host "[3/4] Creating new authorized_keys file..." -ForegroundColor Yellow
|
||||
|
||||
$bothKeys = @"
|
||||
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC88mX/oQnAoU54kazp6iYmmg91IR8pbnYV3aw5aJfSsiSBUjqo+XbvrWRvq//lli48k2kuNfq8olKrPvqKHcIccbcbgFrES5k2ErSyXjvbUlxuyHFRIfBoXvAhMMX6LZR+4Qc0i3VThQ1PgY0tYDbf2XQBAyrog5EU9H/q2NzJEulTs7kSR0FIt1goWXqKJYLA9Pn7Ardt7doPzR8EH/spB8hXctO0BaAorX3p3rd4bvOZoOcht4pTmyJBRzoZRRlscCZRCOxjQDk+y4v9eOPzwMc0dRlVxIbqt8Sua5khGTlmeQTmDqxCmdtgrTNWT4hwPVG1L4Jfw2bgX3IqCGKB4juDUF+Eh6hrQeuTIF7xbCIGGy9N/lKIKO3vr4sTf51gVM9CWJ0bE/CTKbiRPfWbUXIUA4yZ96gJf0QAqcIcutnntomdtkdV8G1RYVKSQEE4oxF3mCRxR+1d5Fn/UXGlms9Q2u/QAq7n5BYLPczUFSkdBdfITOqiCIzlX8WpPD7v/vt8Wsbyf24B/FSYvp+X0AcX5qQbNeljChAxqRy6VNhmh5ucUkMFxfUSTWij+AVqmCPvxVVFKPw32G6jN59BmwirmIxd0i6wTRj3rrUuyO/6+kjErjthkYKFIDBAgdCnV0rrkrPRNKmbS0DtgRcID3ILq2UqR3AYmDf2azf8hQ== mmarius28@gmail.com
|
||||
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQD3EdHswdNDuDC9kJdUli2zGGPVlEWJjmqtb4eABYWwjQnWqjGp8oAFbQ+r2TxR544WtEyhDL9BU6oO3EFH957DBGQJHJvfRgx2VnkNZEzN/XX/7HK6Cp5rlINGGp26PjHulKkZjARmjC3YK0aUFEkiyNyUBqhtQpmcYP4+wjUfiiO2xUkF9mzGplbWGK3ZmEdkWNd5BNddqxmxyLvd2KHAo8F7Vux9SyPWqZ8bwiDyidAMDU7kCXS/RabUMl2LGajzFbRnR87YA7cIaVFl/IWExO/fsYlgkwmmmIAMdjINp0IWDdydnmG1XNNhM8h/BKY/eK3uile8CvyEbmbuf0+ePm3Ex9vTjn4jYN2vFE148FgQGGTibibJ+sXFoQ87VFNGEuog/V0aajVk/xcOihszsEvzD2IV/tSKJFdI6klnYLuieuMZf7Dvs/6sC1l3dnsBtcpvjnU48altRRZvQzaJf3gIkG1lRGBgyW1n+WHe/7StIveYTVNFtx+fcnqB8gm9fZQxBp2WRbLNFpY/Qj+6BF66b1A2ZxH/3F9Z/6VT91EActOf+AMxjsI+09d7IRYIvzr8OxMPYOHU2bglp3o86xZEMUXfcjB8Sw/8KMsCjBp3ABEN9/bwv1496aw9IC67ZBQ2cDDfgdBej5DAkT4NS2XIx7wbM7sBtLYjcXMi7w== administrator@ROA-CARAPETRU2
|
||||
"@
|
||||
|
||||
# Create the file
|
||||
$bothKeys | Out-File -FilePath $authKeysFile -Encoding ASCII -NoNewline -Force
|
||||
|
||||
# Set permissions using icacls (more reliable than PowerShell ACL)
|
||||
icacls $authKeysFile /inheritance:r
|
||||
icacls $authKeysFile /grant "NT AUTHORITY\SYSTEM:(F)"
|
||||
icacls $authKeysFile /grant "BUILTIN\Administrators:(R)"
|
||||
|
||||
Write-Host " New file created with correct permissions" -ForegroundColor Green
|
||||
|
||||
# Step 4: Start SSH service
|
||||
Write-Host "[4/4] Starting SSH service..." -ForegroundColor Yellow
|
||||
Start-Service sshd
|
||||
Start-Sleep -Seconds 2
|
||||
Write-Host " SSH service started" -ForegroundColor Green
|
||||
|
||||
# Verification
|
||||
Write-Host ""
|
||||
Write-Host "=== Verification ===" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
Write-Host "File permissions:" -ForegroundColor Yellow
|
||||
icacls $authKeysFile
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "File content (number of lines):" -ForegroundColor Yellow
|
||||
$lines = Get-Content $authKeysFile
|
||||
Write-Host " Total keys: $($lines.Count)" -ForegroundColor White
|
||||
|
||||
foreach ($line in $lines) {
|
||||
if ($line -match "ssh-rsa .+ (.+)$") {
|
||||
Write-Host " ✓ $($matches[1])" -ForegroundColor Green
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "SSH service status:" -ForegroundColor Yellow
|
||||
Get-Service sshd | Format-Table Name, Status, StartType -AutoSize
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "=== Setup Complete ===" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
Write-Host "Next: Test SSH connection from PRIMARY server" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
34
proxmox/vm109-windows-dr/scripts/initROA.ora
Normal file
34
proxmox/vm109-windows-dr/scripts/initROA.ora
Normal file
@@ -0,0 +1,34 @@
|
||||
# Initialization Parameters for ROA Database - DR VM
|
||||
# Generated: 2025-10-09 04:07:45
|
||||
|
||||
# Database Identification
|
||||
db_name=ROA
|
||||
db_unique_name=ROA
|
||||
|
||||
# Memory Configuration
|
||||
memory_target=1024M
|
||||
memory_max_target=1024M
|
||||
|
||||
# File Locations
|
||||
control_files=('C:\Users\oracle\oradata\ROA\control01.ctl', 'C:\Users\oracle\recovery_area\ROA\control02.ctl')
|
||||
db_recovery_file_dest='C:\Users\oracle\recovery_area'
|
||||
db_recovery_file_dest_size=20G
|
||||
audit_file_dest='C:\Users\oracle\admin\ROA\adump'
|
||||
|
||||
# Redo and Archive Log
|
||||
log_archive_format=%t_%s_%r.dbf
|
||||
|
||||
# Compatibility
|
||||
compatible=19.0.0
|
||||
|
||||
# Character Set
|
||||
nls_language=AMERICAN
|
||||
nls_territory=AMERICA
|
||||
|
||||
# Processes and Sessions
|
||||
processes=300
|
||||
sessions=472
|
||||
|
||||
# Miscellaneous
|
||||
diagnostic_dest='C:\Users\oracle'
|
||||
_allow_resetlogs_corruption=TRUE
|
||||
@@ -0,0 +1,512 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Oracle Backup Monitor for Proxmox with PVE::Notify
|
||||
# Monitors Oracle backups and sends notifications via Proxmox notification system
|
||||
#
|
||||
# Location: /opt/scripts/oracle-backup-monitor-proxmox.sh (on Proxmox host)
|
||||
# Schedule: Add to cron for daily execution
|
||||
#
|
||||
# This script is SELF-SUFFICIENT:
|
||||
# - Automatically creates notification templates if they don't exist
|
||||
# - Uses Proxmox native notification system (same as HA alerts)
|
||||
# - No email configuration needed - uses existing Proxmox setup
|
||||
#
|
||||
# Installation:
|
||||
# cp oracle-backup-monitor-proxmox.sh /opt/scripts/
|
||||
# chmod +x /opt/scripts/oracle-backup-monitor-proxmox.sh
|
||||
# /opt/scripts/oracle-backup-monitor-proxmox.sh --install # Creates templates
|
||||
# crontab -e # Add: 0 9 * * * /opt/scripts/oracle-backup-monitor-proxmox.sh
|
||||
#
|
||||
# Author: Claude (based on ha-monitor.sh pattern)
|
||||
# Version: 1.0
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Configuration
|
||||
PRIMARY_HOST="10.0.20.36"
|
||||
PRIMARY_PORT="22122"
|
||||
PRIMARY_USER="Administrator"
|
||||
BACKUP_PATH="/mnt/pve/oracle-backups/ROA/autobackup"
|
||||
MAX_FULL_AGE_HOURS=25
|
||||
MAX_CUMULATIVE_AGE_HOURS=7
|
||||
TEMPLATE_DIR="/usr/share/pve-manager/templates/default"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Function to create notification templates
|
||||
create_templates() {
|
||||
echo -e "${GREEN}Creating Oracle backup notification templates...${NC}"
|
||||
|
||||
# Create templates directory if needed
|
||||
mkdir -p "$TEMPLATE_DIR"
|
||||
|
||||
# Subject template
|
||||
cat > "$TEMPLATE_DIR/oracle-backup-subject.txt.hbs" <<'EOF'
|
||||
Oracle Backup {{status}} | {{node}}
|
||||
EOF
|
||||
|
||||
# Text body template
|
||||
cat > "$TEMPLATE_DIR/oracle-backup-body.txt.hbs" <<'EOF'
|
||||
Oracle Backup {{status}} | {{node}}
|
||||
Date: {{date}}
|
||||
|
||||
SUMMARY
|
||||
- Full backup: {{full_backup_age}}h (limit {{full_backup_limit}}h) -> {{#if full_backup_ok}}OK{{else}}CHECK{{/if}}
|
||||
- Incremental: {{cumulative_backup_age}}h (limit {{cumulative_backup_limit}}h) -> {{#if cumulative_backup_ok}}OK{{else}}CHECK{{/if}}
|
||||
- Backups: {{total_backups}} files ({{total_size_label}})
|
||||
- Disk usage: {{disk_usage}}%
|
||||
|
||||
{{#if has_errors}}
|
||||
ISSUES
|
||||
{{#each errors}}
|
||||
- {{this}}
|
||||
{{/each}}
|
||||
{{/if}}
|
||||
|
||||
{{#if has_warnings}}
|
||||
WARNINGS
|
||||
{{#each warnings}}
|
||||
- {{this}}
|
||||
{{/each}}
|
||||
{{/if}}
|
||||
|
||||
FULL BACKUPS ({{full_backup_count}} files)
|
||||
{{#if has_full_backups}}
|
||||
{{#each full_backup_list}}
|
||||
- {{this}}
|
||||
{{/each}}
|
||||
{{else}}
|
||||
- none detected
|
||||
{{/if}}
|
||||
|
||||
INCREMENTAL BACKUPS ({{incr_backup_count}} files)
|
||||
{{#if has_incr_backups}}
|
||||
{{#each incr_backup_list}}
|
||||
- {{this}}
|
||||
{{/each}}
|
||||
{{else}}
|
||||
- none detected
|
||||
{{/if}}
|
||||
|
||||
Next check: +24h via Proxmox Monitor
|
||||
EOF
|
||||
|
||||
# HTML body template (lightweight Gmail-friendly)
|
||||
cat > "$TEMPLATE_DIR/oracle-backup-body.html.hbs" <<'EOF'
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>Oracle Backup {{status}} | {{node}}</title>
|
||||
</head>
|
||||
<body style="margin:0;padding:16px;font-family:Arial,Helvetica,sans-serif;background:#ffffff;color:#2c3e50;">
|
||||
<table style="width:100%;max-width:640px;margin:0 auto;border-collapse:collapse;">
|
||||
<tr>
|
||||
<td style="padding:0 0 12px 0;font-size:18px;font-weight:600;">
|
||||
Oracle Backup {{status}} | {{node}}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:0 0 16px 0;font-size:13px;color:#6c757d;">
|
||||
{{date}}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:12px;border:1px solid #e1e4e8;border-radius:4px;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:14px;">
|
||||
<tr>
|
||||
<td style="padding:4px 0;">Full backup</td>
|
||||
<td style="padding:4px 0;text-align:right;">
|
||||
{{full_backup_age}}h / {{full_backup_limit}}h · {{#if full_backup_ok}}OK{{else}}CHECK{{/if}}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:4px 0;">Incremental</td>
|
||||
<td style="padding:4px 0;text-align:right;">
|
||||
{{cumulative_backup_age}}h / {{cumulative_backup_limit}}h · {{#if cumulative_backup_ok}}OK{{else}}CHECK{{/if}}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:4px 0;">Backups</td>
|
||||
<td style="padding:4px 0;text-align:right;">{{total_backups}} files ({{total_size_label}})</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:4px 0;">Disk usage</td>
|
||||
<td style="padding:4px 0;text-align:right;">{{disk_usage}}%</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
{{#if has_errors}}
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff5f5;border:1px solid #f1b0b7;border-radius:4px;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;color:#c82333;">Issues</td></tr>
|
||||
{{#each errors}}
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #f8d7da;">• {{this}}</td></tr>
|
||||
{{/each}}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
{{/if}}
|
||||
|
||||
{{#if has_warnings}}
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff8e5;border:1px solid #ffe8a1;border-radius:4px;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;color:#856404;">Warnings</td></tr>
|
||||
{{#each warnings}}
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #ffe8a1;">• {{this}}</td></tr>
|
||||
{{/each}}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
{{/if}}
|
||||
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:13px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;">FULL Backups ({{full_backup_count}} files)</td></tr>
|
||||
{{#if has_full_backups}}
|
||||
{{#each full_backup_list}}
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">• {{this}}</td></tr>
|
||||
{{/each}}
|
||||
{{else}}
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">• none detected</td></tr>
|
||||
{{/if}}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:13px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;">INCREMENTAL Backups ({{incr_backup_count}} files)</td></tr>
|
||||
{{#if has_incr_backups}}
|
||||
{{#each incr_backup_list}}
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">• {{this}}</td></tr>
|
||||
{{/each}}
|
||||
{{else}}
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">• none detected</td></tr>
|
||||
{{/if}}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;font-size:12px;color:#6c757d;">
|
||||
Next automated check: +24h via Proxmox Monitor
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
|
||||
echo -e "${GREEN}Templates created successfully in $TEMPLATE_DIR${NC}"
|
||||
}
|
||||
|
||||
# Function to send notification via PVE::Notify
|
||||
send_pve_notification() {
|
||||
local severity="$1"
|
||||
local status="$2"
|
||||
local data="$3"
|
||||
|
||||
# Create Perl script to call PVE::Notify
|
||||
cat > /tmp/oracle-notify.pl <<'PERL_SCRIPT'
|
||||
#!/usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
use PVE::Notify;
|
||||
use JSON;
|
||||
|
||||
my $json_data = do { local $/; <STDIN> };
|
||||
my $data = decode_json($json_data);
|
||||
|
||||
my $severity = $data->{severity} // 'info';
|
||||
my $template_name = 'oracle-backup';
|
||||
|
||||
# Add fields for matching rules
|
||||
my $fields = {
|
||||
type => 'oracle-backup',
|
||||
severity => $severity,
|
||||
hostname => $data->{node} // 'unknown',
|
||||
};
|
||||
|
||||
# Send notification
|
||||
eval {
|
||||
PVE::Notify::notify(
|
||||
$severity,
|
||||
$template_name,
|
||||
$data,
|
||||
$fields
|
||||
);
|
||||
};
|
||||
|
||||
if ($@) {
|
||||
print "Error sending notification: $@\n";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
print "Notification sent successfully\n";
|
||||
PERL_SCRIPT
|
||||
|
||||
chmod +x /tmp/oracle-notify.pl
|
||||
|
||||
# Send notification
|
||||
echo "$data" | perl /tmp/oracle-notify.pl
|
||||
|
||||
rm -f /tmp/oracle-notify.pl
|
||||
}
|
||||
|
||||
# Function to check backups
|
||||
check_backups() {
|
||||
local status="OK"
|
||||
local errors=()
|
||||
local warnings=()
|
||||
|
||||
echo "Checking Oracle backups..."
|
||||
|
||||
local total_backups=0
|
||||
local total_size_label="0G"
|
||||
local full_age_hours="N/A"
|
||||
local cumulative_age_hours="N/A"
|
||||
local full_backup_ok=false
|
||||
local cumulative_backup_ok=false
|
||||
local disk_usage=0
|
||||
local -a backup_entries=()
|
||||
|
||||
if [ ! -d "$BACKUP_PATH" ]; then
|
||||
status="ERROR"
|
||||
errors+=("Backup path $BACKUP_PATH not accessible")
|
||||
else
|
||||
if compgen -G "$BACKUP_PATH"/*.BKP > /dev/null; then
|
||||
total_backups=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*.BKP' | wc -l)
|
||||
total_backups=${total_backups//[[:space:]]/}
|
||||
[ -z "$total_backups" ] && total_backups=0
|
||||
local total_size=$(du -shc "$BACKUP_PATH"/*.BKP 2>/dev/null | tail -1 | awk '{print $1}')
|
||||
[ -z "$total_size" ] && total_size="0G"
|
||||
total_size_label="$total_size"
|
||||
|
||||
# Search for FULL backups (both old and new naming conventions)
|
||||
# Old format: *FULL*.BKP, New format: L0_*.BKP
|
||||
local latest_full=$(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*FULL*.BKP' -o -name 'L0_*.BKP' \) -printf '%T@ %p\n' | sort -nr | head -1 | cut -d' ' -f2-)
|
||||
if [ -n "$latest_full" ]; then
|
||||
local full_timestamp=$(stat -c %Y "$latest_full")
|
||||
local current_timestamp=$(date +%s)
|
||||
full_age_hours=$(( (current_timestamp - full_timestamp) / 3600 ))
|
||||
if [ "$full_age_hours" -gt "$MAX_FULL_AGE_HOURS" ]; then
|
||||
status="WARNING"
|
||||
warnings+=("FULL backup is $full_age_hours hours old (threshold: $MAX_FULL_AGE_HOURS)")
|
||||
else
|
||||
full_backup_ok=true
|
||||
fi
|
||||
else
|
||||
status="ERROR"
|
||||
errors+=("No FULL backup found")
|
||||
fi
|
||||
|
||||
# Search for INCREMENTAL backups (both old and new naming conventions)
|
||||
# Old format: *INCR*.BKP, *INCREMENTAL*.BKP, *CUMULATIVE*.BKP
|
||||
# New format: L1_*.BKP
|
||||
local latest_cumulative=$(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*INCR*.BKP' -o -name '*INCREMENTAL*.BKP' -o -name '*CUMULATIVE*.BKP' -o -name 'L1_*.BKP' \) -printf '%T@ %p\n' | sort -nr | head -1 | cut -d' ' -f2-)
|
||||
if [ -n "$latest_cumulative" ]; then
|
||||
local cumulative_timestamp=$(stat -c %Y "$latest_cumulative")
|
||||
local current_timestamp=$(date +%s)
|
||||
cumulative_age_hours=$(( (current_timestamp - cumulative_timestamp) / 3600 ))
|
||||
if [ "$cumulative_age_hours" -gt "$MAX_CUMULATIVE_AGE_HOURS" ]; then
|
||||
if [ "$status" != "ERROR" ]; then status="WARNING"; fi
|
||||
warnings+=("CUMULATIVE backup is $cumulative_age_hours hours old (threshold: $MAX_CUMULATIVE_AGE_HOURS)")
|
||||
else
|
||||
cumulative_backup_ok=true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Collect ALL FULL backups (both old and new naming conventions)
|
||||
local -a full_backups=()
|
||||
local -a full_backup_entries=()
|
||||
if readarray -t full_backups < <(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*FULL*.BKP' -o -name 'L0_*.BKP' \) -printf '%T@ %p\n' | sort -nr | cut -d' ' -f2-); then
|
||||
for backup_file in "${full_backups[@]}"; do
|
||||
[ -z "$backup_file" ] && continue
|
||||
local backup_name=$(basename "$backup_file")
|
||||
local backup_time=$(date -r "$backup_file" '+%Y-%m-%d %H:%M')
|
||||
local backup_size=$(du -sh "$backup_file" 2>/dev/null | cut -f1)
|
||||
[ -z "$backup_size" ] && backup_size="N/A"
|
||||
full_backup_entries+=("$backup_time | $backup_name | $backup_size")
|
||||
done
|
||||
fi
|
||||
|
||||
# Collect ALL INCREMENTAL backups (both old and new naming conventions)
|
||||
local -a incr_backups=()
|
||||
local -a incr_backup_entries=()
|
||||
if readarray -t incr_backups < <(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*INCR*.BKP' -o -name '*INCREMENTAL*.BKP' -o -name '*CUMULATIVE*.BKP' -o -name 'L1_*.BKP' \) -printf '%T@ %p\n' | sort -nr | cut -d' ' -f2-); then
|
||||
for backup_file in "${incr_backups[@]}"; do
|
||||
[ -z "$backup_file" ] && continue
|
||||
local backup_name=$(basename "$backup_file")
|
||||
local backup_time=$(date -r "$backup_file" '+%Y-%m-%d %H:%M')
|
||||
local backup_size=$(du -sh "$backup_file" 2>/dev/null | cut -f1)
|
||||
[ -z "$backup_size" ] && backup_size="N/A"
|
||||
incr_backup_entries+=("$backup_time | $backup_name | $backup_size")
|
||||
done
|
||||
fi
|
||||
else
|
||||
status="ERROR"
|
||||
errors+=("No backup files found in $BACKUP_PATH")
|
||||
fi
|
||||
|
||||
local disk_usage_raw=$(df "$BACKUP_PATH" 2>/dev/null | tail -1 | awk '{print int($5)}')
|
||||
if [ -n "$disk_usage_raw" ]; then
|
||||
disk_usage="$disk_usage_raw"
|
||||
else
|
||||
if [ "$status" = "OK" ]; then status="WARNING"; fi
|
||||
warnings+=("Unable to determine disk usage for $BACKUP_PATH")
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$disk_usage" -gt 90 ]; then
|
||||
status="ERROR"
|
||||
errors+=("Disk usage critical: ${disk_usage}%")
|
||||
elif [ "$disk_usage" -gt 80 ]; then
|
||||
if [ "$status" != "ERROR" ]; then status="WARNING"; fi
|
||||
warnings+=("Disk usage high: ${disk_usage}%")
|
||||
fi
|
||||
|
||||
local severity="info"
|
||||
[ "$status" = "WARNING" ] && severity="warning"
|
||||
[ "$status" = "ERROR" ] && severity="error"
|
||||
|
||||
local errors_json
|
||||
if [ ${#errors[@]} -eq 0 ]; then
|
||||
errors_json='[]'
|
||||
else
|
||||
errors_json=$(printf '%s\n' "${errors[@]}" | jq -R . | jq -s .)
|
||||
fi
|
||||
|
||||
local warnings_json
|
||||
if [ ${#warnings[@]} -eq 0 ]; then
|
||||
warnings_json='[]'
|
||||
else
|
||||
warnings_json=$(printf '%s\n' "${warnings[@]}" | jq -R . | jq -s .)
|
||||
fi
|
||||
|
||||
local full_backup_list_json
|
||||
if [ ${#full_backup_entries[@]} -eq 0 ]; then
|
||||
full_backup_list_json='[]'
|
||||
else
|
||||
full_backup_list_json=$(printf '%s\n' "${full_backup_entries[@]}" | jq -R . | jq -s .)
|
||||
fi
|
||||
|
||||
local incr_backup_list_json
|
||||
if [ ${#incr_backup_entries[@]} -eq 0 ]; then
|
||||
incr_backup_list_json='[]'
|
||||
else
|
||||
incr_backup_list_json=$(printf '%s\n' "${incr_backup_entries[@]}" | jq -R . | jq -s .)
|
||||
fi
|
||||
|
||||
local has_errors=false
|
||||
local has_warnings=false
|
||||
local has_full_backups=false
|
||||
local has_incr_backups=false
|
||||
[ ${#errors[@]} -gt 0 ] && has_errors=true
|
||||
[ ${#warnings[@]} -gt 0 ] && has_warnings=true
|
||||
[ ${#full_backup_entries[@]} -gt 0 ] && has_full_backups=true
|
||||
[ ${#incr_backup_entries[@]} -gt 0 ] && has_incr_backups=true
|
||||
|
||||
local json_data=$(cat <<JSON
|
||||
{
|
||||
"severity": "$severity",
|
||||
"node": "$(hostname)",
|
||||
"date": "$(date +'%Y-%m-%d %H:%M:%S')",
|
||||
"status": "$status",
|
||||
"errors": $errors_json,
|
||||
"warnings": $warnings_json,
|
||||
"has_errors": $has_errors,
|
||||
"has_warnings": $has_warnings,
|
||||
"total_backups": $total_backups,
|
||||
"total_size_gb": "${total_size_label%G}",
|
||||
"total_size_label": "$total_size_label",
|
||||
"full_backup_age": "${full_age_hours}",
|
||||
"cumulative_backup_age": "${cumulative_age_hours}",
|
||||
"disk_usage": "${disk_usage}",
|
||||
"full_backup_ok": $([ "$full_backup_ok" = "true" ] && echo "true" || echo "false"),
|
||||
"cumulative_backup_ok": $([ "$cumulative_backup_ok" = "true" ] && echo "true" || echo "false"),
|
||||
"is_error": $([ "$status" = "ERROR" ] && echo "true" || echo "false"),
|
||||
"is_warning": $([ "$status" = "WARNING" ] && echo "true" || echo "false"),
|
||||
"full_backup_list": $full_backup_list_json,
|
||||
"incr_backup_list": $incr_backup_list_json,
|
||||
"has_full_backups": $has_full_backups,
|
||||
"has_incr_backups": $has_incr_backups,
|
||||
"full_backup_count": ${#full_backup_entries[@]},
|
||||
"incr_backup_count": ${#incr_backup_entries[@]},
|
||||
"full_backup_limit": "$MAX_FULL_AGE_HOURS",
|
||||
"cumulative_backup_limit": "$MAX_CUMULATIVE_AGE_HOURS"
|
||||
}
|
||||
JSON
|
||||
)
|
||||
|
||||
if [ "$status" != "OK" ]; then
|
||||
echo -e "${YELLOW}Issues detected, sending notification...${NC}"
|
||||
send_pve_notification "$severity" "$status" "$json_data"
|
||||
else
|
||||
echo -e "${GREEN}All backups are healthy${NC}"
|
||||
# Optionally send success notification (uncomment if desired)
|
||||
# send_pve_notification "info" "$status" "$json_data"
|
||||
fi
|
||||
|
||||
echo "Status: $status"
|
||||
echo "Total backups: $total_backups"
|
||||
echo "Total size: $total_size_label"
|
||||
echo "FULL backup age: $full_age_hours hours"
|
||||
echo "CUMULATIVE backup age: $cumulative_age_hours hours"
|
||||
echo "Disk usage: ${disk_usage}%"
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
case "${1:-}" in
|
||||
--install)
|
||||
create_templates
|
||||
echo ""
|
||||
echo -e "${GREEN}Installation complete!${NC}"
|
||||
echo "Next steps:"
|
||||
echo "1. Test the monitor: /opt/scripts/oracle-backup-monitor-proxmox.sh"
|
||||
echo "2. Add to cron: crontab -e"
|
||||
echo " Add line: 0 9 * * * /opt/scripts/oracle-backup-monitor-proxmox.sh"
|
||||
echo "3. Configure notifications in Proxmox GUI if needed:"
|
||||
echo " Datacenter > Notifications > Add matching rules for 'oracle-backup'"
|
||||
;;
|
||||
--help)
|
||||
echo "Oracle Backup Monitor for Proxmox"
|
||||
echo "Usage:"
|
||||
echo " $0 - Check backups and send alerts if issues found"
|
||||
echo " $0 --install - Create notification templates"
|
||||
echo " $0 --help - Show this help"
|
||||
;;
|
||||
*)
|
||||
# Check if templates exist, create if missing
|
||||
if [ ! -f "$TEMPLATE_DIR/oracle-backup-subject.txt.hbs" ]; then
|
||||
echo -e "${YELLOW}Templates not found, creating...${NC}"
|
||||
create_templates
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Run backup check
|
||||
check_backups
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Check dependencies
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo -e "${RED}Error: jq is not installed${NC}"
|
||||
echo "Install with: apt-get install jq"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
main "$@"
|
||||
43
proxmox/vm109-windows-dr/scripts/rman_backup.bat
Normal file
43
proxmox/vm109-windows-dr/scripts/rman_backup.bat
Normal file
@@ -0,0 +1,43 @@
|
||||
@echo off
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
REM ===================================================================
|
||||
REM Oracle RMAN FULL Backup with Logging
|
||||
REM Runs at: 02:30 AM (scheduled task)
|
||||
REM Duration: ~30 minutes
|
||||
REM ===================================================================
|
||||
|
||||
REM Get script directory (where this .bat file is located)
|
||||
set SCRIPTDIR=%~dp0
|
||||
set LOGDIR=%SCRIPTDIR%logs
|
||||
set TIMESTAMP=%date:~-4,4%%date:~-7,2%%date:~-10,2%_%time:~0,2%%time:~3,2%%time:~6,2%
|
||||
set TIMESTAMP=%TIMESTAMP: =0%
|
||||
set LOGFILE=%LOGDIR%\backup_full_%TIMESTAMP%.log
|
||||
|
||||
REM Create log directory if not exists
|
||||
if not exist "%LOGDIR%" mkdir "%LOGDIR%"
|
||||
|
||||
echo ========================================= >> "%LOGFILE%" 2>&1
|
||||
echo Oracle RMAN FULL Backup Started >> "%LOGFILE%" 2>&1
|
||||
echo Date: %date% %time% >> "%LOGFILE%" 2>&1
|
||||
echo Script Directory: %SCRIPTDIR% >> "%LOGFILE%" 2>&1
|
||||
echo ========================================= >> "%LOGFILE%" 2>&1
|
||||
echo. >> "%LOGFILE%" 2>&1
|
||||
|
||||
REM Run RMAN backup - show in console AND save to log (using PowerShell tee)
|
||||
powershell -Command "& cmd.exe /c 'rman target sys/romfastsoft@roa cmdfile=\"%SCRIPTDIR%rman_backup.txt\"' 2>&1 | Tee-Object -FilePath '%LOGFILE%' -Append"
|
||||
|
||||
set EXITCODE=%ERRORLEVEL%
|
||||
|
||||
echo. >> "%LOGFILE%" 2>&1
|
||||
echo ========================================= >> "%LOGFILE%" 2>&1
|
||||
echo Oracle RMAN FULL Backup Completed >> "%LOGFILE%" 2>&1
|
||||
echo Date: %date% %time% >> "%LOGFILE%" 2>&1
|
||||
echo Exit Code: %EXITCODE% >> "%LOGFILE%" 2>&1
|
||||
echo ========================================= >> "%LOGFILE%" 2>&1
|
||||
|
||||
REM Print summary to console
|
||||
echo [%date% %time%] RMAN FULL backup completed with exit code: %EXITCODE%
|
||||
echo [%date% %time%] Log file: %LOGFILE%
|
||||
|
||||
exit /b %EXITCODE%
|
||||
30
proxmox/vm109-windows-dr/scripts/rman_backup.txt
Normal file
30
proxmox/vm109-windows-dr/scripts/rman_backup.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
RUN {
|
||||
CONFIGURE RETENTION POLICY TO REDUNDANCY 2;
|
||||
CONFIGURE CONTROLFILE AUTOBACKUP ON;
|
||||
CONFIGURE COMPRESSION ALGORITHM 'BASIC';
|
||||
|
||||
# Full backup COMPRESSED + Archive logs (șterge logs după backup)
|
||||
# FORMAT: L0_<dbname>_<YYYYMMDD>_<set#>_<piece#>
|
||||
# Files will be stored in recovery area for easier transfer to DR
|
||||
BACKUP AS COMPRESSED BACKUPSET
|
||||
INCREMENTAL LEVEL 0
|
||||
TAG 'DAILY_FULL_COMPRESSED'
|
||||
FORMAT 'C:\Users\oracle\recovery_area\ROA\autobackup\L0_%d_%T_%s_%p.BKP'
|
||||
DATABASE
|
||||
PLUS ARCHIVELOG DELETE INPUT
|
||||
FORMAT 'C:\Users\oracle\recovery_area\ROA\autobackup\ARC_%d_%T_%s_%p.BKP';
|
||||
|
||||
# Backup SPFILE și Control File
|
||||
BACKUP AS COMPRESSED BACKUPSET
|
||||
TAG 'SPFILE_BACKUP'
|
||||
FORMAT 'C:\Users\oracle\recovery_area\ROA\autobackup\SPFILE_%d_%T_%s_%p.BKP'
|
||||
SPFILE;
|
||||
|
||||
BACKUP
|
||||
TAG 'CONTROLFILE_BACKUP'
|
||||
FORMAT 'C:\Users\oracle\recovery_area\ROA\autobackup\CF_%d_%T_%s_%p.BKP'
|
||||
CURRENT CONTROLFILE;
|
||||
|
||||
# Cleanup old backups (păstrează ultimele 2 - REDUNDANCY 2)
|
||||
DELETE NOPROMPT OBSOLETE;
|
||||
}
|
||||
43
proxmox/vm109-windows-dr/scripts/rman_backup_incremental.bat
Normal file
43
proxmox/vm109-windows-dr/scripts/rman_backup_incremental.bat
Normal file
@@ -0,0 +1,43 @@
|
||||
@echo off
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
REM ===================================================================
|
||||
REM Oracle RMAN INCREMENTAL/CUMULATIVE Backup with Logging
|
||||
REM Runs at: 13:00 and 18:00 (scheduled tasks)
|
||||
REM Duration: ~5-10 minutes
|
||||
REM ===================================================================
|
||||
|
||||
REM Get script directory (where this .bat file is located)
|
||||
set SCRIPTDIR=%~dp0
|
||||
set LOGDIR=%SCRIPTDIR%logs
|
||||
set TIMESTAMP=%date:~-4,4%%date:~-7,2%%date:~-10,2%_%time:~0,2%%time:~3,2%%time:~6,2%
|
||||
set TIMESTAMP=%TIMESTAMP: =0%
|
||||
set LOGFILE=%LOGDIR%\backup_incremental_%TIMESTAMP%.log
|
||||
|
||||
REM Create log directory if not exists
|
||||
if not exist "%LOGDIR%" mkdir "%LOGDIR%"
|
||||
|
||||
echo ========================================= >> "%LOGFILE%" 2>&1
|
||||
echo Oracle RMAN INCREMENTAL Backup Started >> "%LOGFILE%" 2>&1
|
||||
echo Date: %date% %time% >> "%LOGFILE%" 2>&1
|
||||
echo Script Directory: %SCRIPTDIR% >> "%LOGFILE%" 2>&1
|
||||
echo ========================================= >> "%LOGFILE%" 2>&1
|
||||
echo. >> "%LOGFILE%" 2>&1
|
||||
|
||||
REM Run RMAN backup - show in console AND save to log (using PowerShell tee)
|
||||
powershell -Command "& cmd.exe /c 'rman target sys/romfastsoft@roa cmdfile=\"%SCRIPTDIR%rman_backup_incremental.txt\"' 2>&1 | Tee-Object -FilePath '%LOGFILE%' -Append"
|
||||
|
||||
set EXITCODE=%ERRORLEVEL%
|
||||
|
||||
echo. >> "%LOGFILE%" 2>&1
|
||||
echo ========================================= >> "%LOGFILE%" 2>&1
|
||||
echo Oracle RMAN INCREMENTAL Backup Completed >> "%LOGFILE%" 2>&1
|
||||
echo Date: %date% %time% >> "%LOGFILE%" 2>&1
|
||||
echo Exit Code: %EXITCODE% >> "%LOGFILE%" 2>&1
|
||||
echo ========================================= >> "%LOGFILE%" 2>&1
|
||||
|
||||
REM Print summary to console
|
||||
echo [%date% %time%] RMAN INCREMENTAL backup completed with exit code: %EXITCODE%
|
||||
echo [%date% %time%] Log file: %LOGFILE%
|
||||
|
||||
exit /b %EXITCODE%
|
||||
26
proxmox/vm109-windows-dr/scripts/rman_backup_incremental.txt
Normal file
26
proxmox/vm109-windows-dr/scripts/rman_backup_incremental.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
RUN {
|
||||
# Incremental Level 1 CUMULATIVE backup
|
||||
# Backup doar modificările de la ultimul Level 0 (full backup de la 02:00 AM)
|
||||
# FORMAT: L1_<dbname>_<YYYYMMDD>_<set#>_<piece#>
|
||||
# Files will be stored in recovery area for easier transfer to DR
|
||||
BACKUP AS COMPRESSED BACKUPSET
|
||||
INCREMENTAL LEVEL 1 CUMULATIVE
|
||||
TAG 'MIDDAY_INCREMENTAL'
|
||||
FORMAT 'C:\Users\oracle\recovery_area\ROA\autobackup\L1_%d_%T_%s_%p.BKP'
|
||||
DATABASE
|
||||
PLUS ARCHIVELOG DELETE INPUT
|
||||
FORMAT 'C:\Users\oracle\recovery_area\ROA\autobackup\ARC_%d_%T_%s_%p.BKP';
|
||||
|
||||
# Backup SPFILE și controlfile (pentru siguranță)
|
||||
BACKUP AS COMPRESSED BACKUPSET
|
||||
TAG 'SPFILE_BACKUP'
|
||||
FORMAT 'C:\Users\oracle\recovery_area\ROA\autobackup\SPFILE_%d_%T_%s_%p.BKP'
|
||||
SPFILE;
|
||||
|
||||
BACKUP
|
||||
TAG 'CONTROLFILE_BACKUP'
|
||||
FORMAT 'C:\Users\oracle\recovery_area\ROA\autobackup\CF_%d_%T_%s_%p.BKP'
|
||||
CURRENT CONTROLFILE;
|
||||
|
||||
# NU ștergem obsolete aici - se face la full backup
|
||||
}
|
||||
550
proxmox/vm109-windows-dr/scripts/rman_restore_from_zero.ps1
Normal file
550
proxmox/vm109-windows-dr/scripts/rman_restore_from_zero.ps1
Normal file
@@ -0,0 +1,550 @@
|
||||
# RMAN Restore Database FROM ZERO - Clean State (PowerShell)
|
||||
# Backups are on F:\ (NFS mount from Proxmox host)
|
||||
# Run as: Administrator
|
||||
# Location: D:\oracle\scripts\rman_restore_from_zero.ps1
|
||||
#
|
||||
# Parameters:
|
||||
# -TestMode: Skip service reconfiguration and Listener startup (for weekly DR tests)
|
||||
|
||||
param(
|
||||
[switch]$TestMode
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Continue"
|
||||
|
||||
$env:ORACLE_HOME = "C:\Users\Administrator\Downloads\WINDOWS.X64_193000_db_home"
|
||||
$env:ORACLE_SID = "ROA"
|
||||
$env:PATH = "$env:ORACLE_HOME\bin;$env:PATH"
|
||||
|
||||
Write-Host "============================================"
|
||||
Write-Host "RMAN Database Restore FROM ZERO"
|
||||
Write-Host "============================================"
|
||||
Write-Host ""
|
||||
Write-Host "Database: ROA"
|
||||
Write-Host "DBID: 1363569330"
|
||||
Write-Host "Backups: F:\ROA\autobackup (NFS mount from Proxmox)"
|
||||
Write-Host ""
|
||||
Write-Host "This script will:"
|
||||
Write-Host " 1. CLEANUP: Delete any existing database files"
|
||||
Write-Host " 2. RESTORE: Restore from F:\ backups"
|
||||
Write-Host " 3. VERIFY: Check database is working"
|
||||
Write-Host ""
|
||||
|
||||
# Verify F:\ mount is accessible
|
||||
if (-not (Test-Path "F:\ROA\autobackup")) {
|
||||
Write-Host "ERROR: F:\ROA\autobackup not accessible!" -ForegroundColor Red
|
||||
Write-Host ""
|
||||
Write-Host "Please verify:"
|
||||
Write-Host " 1. F:\ drive is mounted: dir F:\"
|
||||
Write-Host " 2. NFS mount command: mount -o rw,nolock,mtype=hard,timeout=60 10.0.20.202:/mnt/pve/oracle-backups F:"
|
||||
Write-Host " 3. Proxmox host is reachable: ping 10.0.20.202"
|
||||
Write-Host ""
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "[OK] F:\ROA\autobackup is accessible"
|
||||
Write-Host ""
|
||||
|
||||
# Create directories with proper permissions
|
||||
try {
|
||||
New-Item -ItemType Directory -Path "D:\oracle\temp" -Force -ErrorAction Stop | Out-Null
|
||||
New-Item -ItemType Directory -Path "D:\oracle\logs" -Force -ErrorAction Stop | Out-Null
|
||||
Write-Host "[OK] Created required directories"
|
||||
} catch {
|
||||
Write-Host "ERROR: Failed to create directories: $_" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "============================================"
|
||||
Write-Host "STEP 1: CLEANUP - Delete existing database"
|
||||
Write-Host "============================================"
|
||||
Write-Host ""
|
||||
Write-Host "Calling cleanup_database.ps1..."
|
||||
Write-Host ""
|
||||
|
||||
# Call cleanup script with /SILENT flag
|
||||
& "D:\oracle\scripts\cleanup_database.ps1" /SILENT
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host ""
|
||||
Write-Host "ERROR: Cleanup failed!" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "[OK] Cleanup complete - VM is in clean state"
|
||||
Write-Host ""
|
||||
Write-Host "Starting restore in 2 seconds..."
|
||||
Start-Sleep -Seconds 2
|
||||
|
||||
Write-Host "============================================"
|
||||
Write-Host "STEP 2: RESTORE - Restore from F:\ backups"
|
||||
Write-Host "============================================"
|
||||
Write-Host ""
|
||||
|
||||
# Step 2.1: Check Oracle service (create only if missing)
|
||||
Write-Host "[2.1] Checking Oracle service..."
|
||||
$service = Get-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue
|
||||
|
||||
if ($service) {
|
||||
Write-Host "[OK] Oracle service already exists, skipping creation (15s saved!)" -ForegroundColor Green
|
||||
Write-Host " Service will be reused for this restore"
|
||||
|
||||
# Ensure service is running (required for SQL*Plus connection)
|
||||
if ($service.Status -ne "Running") {
|
||||
Write-Host " Service is stopped, starting it (may take 30-60 seconds)..."
|
||||
|
||||
# Start service in background job to avoid blocking
|
||||
$startJob = Start-Job -ScriptBlock {
|
||||
Start-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue
|
||||
}
|
||||
|
||||
# Poll for service status with timeout
|
||||
$maxWait = 60
|
||||
$elapsed = 0
|
||||
$pollInterval = 3
|
||||
$serviceStarted = $false
|
||||
|
||||
while ($elapsed -lt $maxWait) {
|
||||
Start-Sleep -Seconds $pollInterval
|
||||
$elapsed += $pollInterval
|
||||
|
||||
# Refresh service status
|
||||
$currentService = Get-Service -Name "OracleServiceROA" -ErrorAction SilentlyContinue
|
||||
if ($currentService -and $currentService.Status -eq "Running") {
|
||||
$serviceStarted = $true
|
||||
Write-Host " [OK] Service started successfully after $elapsed seconds"
|
||||
break
|
||||
}
|
||||
|
||||
if ($elapsed % 15 -eq 0) {
|
||||
Write-Host " Still waiting for service to start... ($elapsed/$maxWait seconds)"
|
||||
}
|
||||
}
|
||||
|
||||
# Cleanup background job
|
||||
Stop-Job -Job $startJob -ErrorAction SilentlyContinue
|
||||
Remove-Job -Job $startJob -ErrorAction SilentlyContinue
|
||||
|
||||
if (-not $serviceStarted) {
|
||||
Write-Host " WARNING: Service did not start within $maxWait seconds" -ForegroundColor Yellow
|
||||
Write-Host " This may cause SQL*Plus connection issues (ORA-12560)"
|
||||
Write-Host " Attempting to continue anyway..."
|
||||
}
|
||||
} else {
|
||||
Write-Host " Service is already running"
|
||||
}
|
||||
} else {
|
||||
Write-Host " Oracle service not found, creating from PFILE..."
|
||||
|
||||
# Check if PFILE exists, create if missing
|
||||
$pfilePath = "C:\Users\oracle\admin\ROA\pfile\initROA.ora"
|
||||
if (-not (Test-Path $pfilePath)) {
|
||||
Write-Host " PFILE not found, creating default initROA.ora..." -ForegroundColor Yellow
|
||||
|
||||
# Create directory if needed
|
||||
$pfileDir = Split-Path $pfilePath -Parent
|
||||
if (-not (Test-Path $pfileDir)) {
|
||||
New-Item -ItemType Directory -Path $pfileDir -Force | Out-Null
|
||||
}
|
||||
|
||||
# Create PFILE with tested configuration
|
||||
$pfileContent = @"
|
||||
# Initialization Parameters for ROA Database - DR VM
|
||||
# Auto-generated by rman_restore_from_zero.ps1 - $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')
|
||||
|
||||
# Database Identification
|
||||
db_name=ROA
|
||||
db_unique_name=ROA
|
||||
|
||||
# Memory Configuration
|
||||
memory_target=1024M
|
||||
memory_max_target=1024M
|
||||
|
||||
# File Locations
|
||||
control_files=('C:\Users\oracle\oradata\ROA\control01.ctl', 'C:\Users\oracle\recovery_area\ROA\control02.ctl')
|
||||
db_recovery_file_dest='C:\Users\oracle\recovery_area'
|
||||
db_recovery_file_dest_size=50G
|
||||
audit_file_dest='C:\Users\oracle\admin\ROA\adump'
|
||||
|
||||
# Redo and Archive Log
|
||||
log_archive_format=%t_%s_%r.dbf
|
||||
|
||||
# Compatibility
|
||||
compatible=19.0.0
|
||||
|
||||
# Character Set
|
||||
nls_language=AMERICAN
|
||||
nls_territory=AMERICA
|
||||
|
||||
# Processes and Sessions
|
||||
processes=300
|
||||
sessions=472
|
||||
|
||||
# Miscellaneous
|
||||
diagnostic_dest='C:\Users\oracle'
|
||||
_allow_resetlogs_corruption=TRUE
|
||||
"@
|
||||
|
||||
try {
|
||||
$pfileContent | Out-File -FilePath $pfilePath -Encoding ASCII -ErrorAction Stop
|
||||
Write-Host " [OK] Created PFILE: $pfilePath" -ForegroundColor Green
|
||||
} catch {
|
||||
Write-Host "ERROR: Failed to create PFILE: $_" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
& oradim -new -sid ROA -startmode auto -pfile $pfilePath 2>&1 | Out-Null
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "ERROR: Failed to create Oracle service" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
Write-Host "[OK] Oracle service created successfully (AUTOMATIC startup)"
|
||||
Start-Sleep -Seconds 2
|
||||
}
|
||||
|
||||
# Step 2.2: Startup NOMOUNT
|
||||
Write-Host "[2.2] Starting database in NOMOUNT mode..."
|
||||
|
||||
# First, ensure any partially started instance is shut down
|
||||
# (Service auto-start may have started instance in error state without control files)
|
||||
Write-Host " Ensuring clean state - shutting down any existing instance..."
|
||||
$cleanupSQL = @"
|
||||
WHENEVER SQLERROR CONTINUE
|
||||
SHUTDOWN ABORT;
|
||||
EXIT;
|
||||
"@
|
||||
$cleanupSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
|
||||
|
||||
# Now start cleanly in NOMOUNT
|
||||
Write-Host " Starting fresh instance in NOMOUNT mode..."
|
||||
$nomountSQL = @"
|
||||
STARTUP NOMOUNT PFILE='C:\Users\oracle\admin\ROA\pfile\initROA.ora';
|
||||
EXIT;
|
||||
"@
|
||||
|
||||
$nomountSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "ERROR: Failed to startup NOMOUNT" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
Write-Host "[OK] Database started in NOMOUNT mode"
|
||||
Start-Sleep -Seconds 3
|
||||
|
||||
# Step 2.3: Copy backups and create RMAN script
|
||||
Write-Host "[2.3] Preparing RMAN restore..."
|
||||
$rmanScript = "D:\oracle\temp\restore_from_zero.rman"
|
||||
$logFile = "D:\oracle\logs\restore_from_zero.log"
|
||||
|
||||
# Copy backups from F:\ to recovery area (mode-dependent)
|
||||
New-Item -ItemType Directory -Path "C:\Users\oracle\recovery_area\ROA\autobackup" -Force | Out-Null
|
||||
|
||||
if ($TestMode) {
|
||||
Write-Host "[INFO] Copying selected backups from F:\ROA\autobackup to recovery area..."
|
||||
Write-Host " TEST MODE: Only latest backup set (faster for weekly tests)"
|
||||
} else {
|
||||
Write-Host "[INFO] Copying all backups from F:\ROA\autobackup to recovery area..."
|
||||
Write-Host " STANDALONE MODE: All backups for maximum DR safety"
|
||||
}
|
||||
|
||||
# Select backup files based on mode (TestMode vs Standalone)
|
||||
try {
|
||||
if ($TestMode) {
|
||||
# TEST MODE: Copy latest L0 backup set + all incrementals/archives
|
||||
Write-Host "[INFO] TEST MODE: Selecting latest backup set using naming convention..." -ForegroundColor Cyan
|
||||
|
||||
# Check if new naming convention is in use (L0_*, L1_*, etc.)
|
||||
$l0Backups = Get-ChildItem "F:\ROA\autobackup\L0_*.BKP" -ErrorAction Continue |
|
||||
Sort-Object LastWriteTime -Descending
|
||||
|
||||
if ($l0Backups.Count -gt 0) {
|
||||
# New naming convention detected - use smart selection
|
||||
Write-Host "[INFO] Using naming convention for optimized backup selection" -ForegroundColor Cyan
|
||||
|
||||
$latestL0 = $l0Backups[0]
|
||||
# Extract date from filename: L0_ROA_20251011_123_1.BKP -> 20251011
|
||||
if ($latestL0.Name -match 'L0_\w+_(\d{8})_') {
|
||||
$backupDate = $Matches[1]
|
||||
Write-Host "[INFO] Latest Level 0 backup date: $backupDate" -ForegroundColor Cyan
|
||||
Write-Host " Base file: $($latestL0.Name)" -ForegroundColor Cyan
|
||||
|
||||
# Select all files from this backup set:
|
||||
# - All L0_*_<date>_* (all pieces of Level 0)
|
||||
# - All L1_*_<date>_* or later (incrementals from same day or after)
|
||||
# - All ARC_*_<date>_* or later (archive logs)
|
||||
# - All SPFILE_* and CF_* (needed for restore)
|
||||
|
||||
$backupFiles = Get-ChildItem "F:\ROA\autobackup\*.BKP" -ErrorAction Continue |
|
||||
Where-Object {
|
||||
$_.Name -match "^L0_\w+_${backupDate}_" -or # Level 0 pieces
|
||||
$_.Name -match "^L1_\w+_\d{8}_" -or # All Level 1 incrementals
|
||||
$_.Name -match "^ARC_\w+_\d{8}_" -or # All archive logs
|
||||
$_.Name -match "^SPFILE_\w+_${backupDate}_" -or # SPFILE from same day
|
||||
$_.Name -match "^CF_\w+_${backupDate}_" -or # Controlfile from same day
|
||||
$_.Name -match "^O1_MF_S_" # Autobackup control files (always needed)
|
||||
}
|
||||
|
||||
Write-Host "[INFO] Selected $($backupFiles.Count) files for restore:" -ForegroundColor Cyan
|
||||
Write-Host " - Level 0 pieces for date $backupDate" -ForegroundColor Cyan
|
||||
Write-Host " - All Level 1 incrementals" -ForegroundColor Cyan
|
||||
Write-Host " - All archive logs" -ForegroundColor Cyan
|
||||
Write-Host " - SPFILE and Control file backups" -ForegroundColor Cyan
|
||||
|
||||
} else {
|
||||
Write-Host "WARNING: Cannot parse date from L0 filename, using ALL L0/L1/ARC files" -ForegroundColor Yellow
|
||||
$backupFiles = Get-ChildItem "F:\ROA\autobackup\*.BKP" -ErrorAction Continue |
|
||||
Where-Object { $_.Name -match "^(L0_|L1_|ARC_|SPFILE_|CF_)" }
|
||||
}
|
||||
|
||||
} else {
|
||||
# Old naming convention (autobackup format) - fallback to copying all
|
||||
Write-Host "[INFO] Old naming convention detected - copying ALL backups for safety" -ForegroundColor Yellow
|
||||
Write-Host " (New naming convention will be used after next backup runs)" -ForegroundColor Yellow
|
||||
$backupFiles = Get-ChildItem "F:\ROA\autobackup\*.BKP" -ErrorAction Continue
|
||||
}
|
||||
|
||||
Write-Host "[INFO] Total files selected: $($backupFiles.Count)" -ForegroundColor Cyan
|
||||
|
||||
} else {
|
||||
# STANDALONE MODE: Copy ALL backups (disaster recovery - maximum safety with fallback)
|
||||
Write-Host "[INFO] STANDALONE MODE: Copying ALL backups for maximum DR safety..." -ForegroundColor Yellow
|
||||
$backupFiles = Get-ChildItem "F:\ROA\autobackup\*.BKP" -ErrorAction Continue
|
||||
Write-Host "[INFO] Full DR restore - will copy all available backups (includes redundancy)" -ForegroundColor Yellow
|
||||
}
|
||||
} catch {
|
||||
Write-Host "WARNING: Cannot enumerate backup files on F: drive - $_" -ForegroundColor Yellow
|
||||
$backupFiles = @()
|
||||
}
|
||||
|
||||
# Validate backup count
|
||||
$minRequired = 2
|
||||
if ($backupFiles.Count -lt $minRequired) {
|
||||
Write-Host "ERROR: Insufficient backup files found on F: drive (found: $($backupFiles.Count))" -ForegroundColor Red
|
||||
Write-Host " At least $minRequired backup files required for successful restore"
|
||||
Write-Host " Checking F:\ROA\autobackup directory..."
|
||||
try {
|
||||
$dirCheck = Get-ChildItem "F:\ROA\autobackup" -ErrorAction Continue
|
||||
Write-Host " Directory contents: $($dirCheck.Count) files"
|
||||
foreach ($file in $dirCheck) {
|
||||
Write-Host " $($file.Name) - $([math]::Round($file.Length / 1GB, 2)) GB" -ForegroundColor Gray
|
||||
}
|
||||
} catch {
|
||||
Write-Host " Cannot access directory: $_" -ForegroundColor Red
|
||||
}
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "[INFO] Found $($backupFiles.Count) backup files, total size: $([math]::Round(($backupFiles | Measure-Object -Property Length -Sum).Sum / 1GB, 2)) GB"
|
||||
|
||||
# Copy backups with better error handling
|
||||
Write-Host "[INFO] Starting backup copy operation..."
|
||||
$copyErrors = @()
|
||||
foreach ($backupFile in $backupFiles) {
|
||||
try {
|
||||
Write-Host "[INFO] Copying $($backupFile.Name)..."
|
||||
Copy-Item $backupFile.FullName "C:\Users\oracle\recovery_area\ROA\autobackup\" -Force -ErrorAction Stop
|
||||
Write-Host "[OK] Copied $($backupFile.Name)" -ForegroundColor Green
|
||||
} catch {
|
||||
Write-Host "ERROR: Failed to copy $($backupFile.Name) - $_" -ForegroundColor Red
|
||||
$copyErrors += "$($backupFile.Name): $_"
|
||||
}
|
||||
}
|
||||
|
||||
if ($copyErrors.Count -gt 0) {
|
||||
Write-Host "ERROR: Backup copy failed for $($copyErrors.Count) files" -ForegroundColor Red
|
||||
foreach ($error in $copyErrors) {
|
||||
Write-Host " $error" -ForegroundColor Red
|
||||
}
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Verify copied backups
|
||||
try {
|
||||
$copiedFiles = Get-ChildItem "C:\Users\oracle\recovery_area\ROA\autobackup\*.BKP" -ErrorAction Continue
|
||||
} catch {
|
||||
Write-Host "ERROR: Cannot verify copied backups - $_" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
if ($copiedFiles.Count -ne $backupFiles.Count) {
|
||||
Write-Host "ERROR: Backup copy verification failed - file count mismatch" -ForegroundColor Red
|
||||
Write-Host " Expected: $($backupFiles.Count), Copied: $($copiedFiles.Count)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "[OK] All $($copiedFiles.Count) backups copied and verified to recovery area"
|
||||
|
||||
# Create RMAN script
|
||||
$rmanContent = @"
|
||||
SET DBID 1363569330;
|
||||
|
||||
RUN {
|
||||
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
|
||||
RESTORE CONTROLFILE FROM AUTOBACKUP;
|
||||
RELEASE CHANNEL ch1;
|
||||
}
|
||||
|
||||
ALTER DATABASE MOUNT;
|
||||
|
||||
CATALOG START WITH 'C:/USERS/ORACLE/RECOVERY_AREA/ROA/AUTOBACKUP' NOPROMPT;
|
||||
|
||||
CROSSCHECK BACKUP;
|
||||
DELETE NOPROMPT EXPIRED BACKUP;
|
||||
|
||||
RUN {
|
||||
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
|
||||
ALLOCATE CHANNEL ch2 DEVICE TYPE DISK;
|
||||
RESTORE DATABASE;
|
||||
RELEASE CHANNEL ch1;
|
||||
RELEASE CHANNEL ch2;
|
||||
}
|
||||
|
||||
RUN {
|
||||
ALLOCATE CHANNEL ch1 DEVICE TYPE DISK;
|
||||
RECOVER DATABASE NOREDO;
|
||||
RELEASE CHANNEL ch1;
|
||||
}
|
||||
|
||||
ALTER DATABASE OPEN RESETLOGS;
|
||||
|
||||
DELETE NOPROMPT OBSOLETE;
|
||||
|
||||
EXIT;
|
||||
"@
|
||||
|
||||
$rmanContent | Out-File -FilePath $rmanScript -Encoding ASCII
|
||||
Write-Host "[OK] RMAN script created: $rmanScript"
|
||||
|
||||
# Step 2.4: Run RMAN restore
|
||||
Write-Host "[2.4] Running RMAN restore (this will take 10-20 minutes)..."
|
||||
Write-Host " Log file: $logFile"
|
||||
Write-Host ""
|
||||
|
||||
& rman target / cmdfile=$rmanScript log=$logFile
|
||||
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host ""
|
||||
Write-Host "ERROR: RMAN restore failed!" -ForegroundColor Red
|
||||
Write-Host "Check log: $logFile"
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "[OK] RMAN restore completed successfully!"
|
||||
Write-Host ""
|
||||
|
||||
Write-Host "============================================"
|
||||
Write-Host "STEP 3: VERIFY - Check database status"
|
||||
Write-Host "============================================"
|
||||
Write-Host ""
|
||||
Write-Host "[3.1] Verifying database..."
|
||||
|
||||
$verifySQL = @"
|
||||
SET PAGESIZE 100 LINESIZE 200
|
||||
COLUMN info FORMAT A80
|
||||
SELECT 'DB_NAME: ' || NAME || ', OPEN_MODE: ' || OPEN_MODE AS info FROM V`$DATABASE;
|
||||
SELECT 'INSTANCE: ' || INSTANCE_NAME || ', STATUS: ' || STATUS AS info FROM V`$INSTANCE;
|
||||
SELECT 'TABLESPACES: ' || COUNT(*) AS info FROM DBA_TABLESPACES;
|
||||
SELECT 'DATAFILES: ' || COUNT(*) AS info FROM DBA_DATA_FILES;
|
||||
SELECT 'TABLES: ' || COUNT(*) AS info FROM DBA_TABLES WHERE OWNER NOT IN ('SYS','SYSTEM');
|
||||
EXIT;
|
||||
"@
|
||||
|
||||
$verifySQL | & sqlplus -S / as sysdba
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "[3.2] Creating SPFILE for database persistence..."
|
||||
$spfileSQL = @"
|
||||
CREATE SPFILE FROM PFILE='C:\Users\oracle\admin\ROA\pfile\initROA.ora';
|
||||
EXIT;
|
||||
"@
|
||||
|
||||
$spfileSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "WARNING: Failed to create SPFILE - database may not persist after connections close" -ForegroundColor Yellow
|
||||
} else {
|
||||
Write-Host "[OK] SPFILE created successfully"
|
||||
|
||||
# Check if running in TestMode (weekly DR test)
|
||||
if ($TestMode) {
|
||||
Write-Host "[3.3] Running in TEST MODE - skipping service reconfiguration"
|
||||
Write-Host " Database is OPEN and ready for verification"
|
||||
Write-Host " Service will remain configured with PFILE (OK for testing)"
|
||||
} else {
|
||||
# Full configuration for standalone/production use
|
||||
Write-Host "[3.3] Reconfiguring Oracle service to use SPFILE..."
|
||||
|
||||
# Shutdown database cleanly
|
||||
Write-Host " Shutting down database temporarily..."
|
||||
$shutdownSQL = @"
|
||||
SHUTDOWN IMMEDIATE;
|
||||
EXIT;
|
||||
"@
|
||||
$shutdownSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
|
||||
Start-Sleep -Seconds 3
|
||||
|
||||
# Delete and recreate service with SPFILE
|
||||
Write-Host " Recreating service with SPFILE..."
|
||||
& oradim -delete -sid ROA 2>&1 | Out-Null
|
||||
Start-Sleep -Seconds 2
|
||||
& oradim -new -sid ROA -startmode auto -spfile 2>&1 | Out-Null
|
||||
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host " WARNING: Failed to recreate service with SPFILE" -ForegroundColor Yellow
|
||||
} else {
|
||||
Write-Host " [OK] Service now configured with SPFILE and AUTOMATIC startup"
|
||||
}
|
||||
|
||||
# Restart database
|
||||
Write-Host " Starting database with SPFILE..."
|
||||
$startupSQL = @"
|
||||
STARTUP;
|
||||
EXIT;
|
||||
"@
|
||||
$startupSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
|
||||
Start-Sleep -Seconds 3
|
||||
Write-Host "[OK] Database restarted with SPFILE configuration"
|
||||
|
||||
# Start Oracle Listener
|
||||
Write-Host "[3.4] Starting Oracle Listener..."
|
||||
|
||||
# Set Listener service to AUTOMATIC and start it
|
||||
Set-Service -Name "OracleOraDB19Home1TNSListener" -StartupType Automatic -ErrorAction SilentlyContinue
|
||||
Start-Service -Name "OracleOraDB19Home1TNSListener" -ErrorAction SilentlyContinue
|
||||
|
||||
if ((Get-Service -Name "OracleOraDB19Home1TNSListener" -ErrorAction SilentlyContinue).Status -eq "Running") {
|
||||
Write-Host "[OK] Listener started successfully"
|
||||
} else {
|
||||
Write-Host "WARNING: Failed to start Listener automatically, trying lsnrctl..." -ForegroundColor Yellow
|
||||
& lsnrctl start 2>&1 | Out-Null
|
||||
}
|
||||
|
||||
Start-Sleep -Seconds 2
|
||||
|
||||
# Register database with listener
|
||||
$registerSQL = @"
|
||||
ALTER SYSTEM REGISTER;
|
||||
EXIT;
|
||||
"@
|
||||
$registerSQL | & sqlplus -S / as sysdba 2>&1 | Out-Null
|
||||
Write-Host "[OK] Database registered with Listener"
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "============================================"
|
||||
Write-Host "Database Restore FROM ZERO Complete!"
|
||||
Write-Host "============================================"
|
||||
Write-Host ""
|
||||
Write-Host "Restore log: $logFile"
|
||||
Write-Host ""
|
||||
Write-Host "Database is OPEN and ready for testing!" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
Write-Host "Next steps:"
|
||||
Write-Host " 1. Test application connectivity"
|
||||
Write-Host " 2. Verify data integrity"
|
||||
Write-Host " 3. Run cleanup_database.ps1 to remove database after test"
|
||||
Write-Host " 4. Shutdown DR VM to conserve resources"
|
||||
Write-Host ""
|
||||
|
||||
exit 0
|
||||
231
proxmox/vm109-windows-dr/scripts/transfer_backups.ps1
Normal file
231
proxmox/vm109-windows-dr/scripts/transfer_backups.ps1
Normal file
@@ -0,0 +1,231 @@
|
||||
# Transfer Oracle Backups (Full + Incremental) towards DR Server
|
||||
# Script UNIFICAT - poate fi rulat după orice tip de backup
|
||||
# Transferă TOATE fișierele backup, skip-uiește duplicatele automat
|
||||
#
|
||||
# Poate fi apelat de:
|
||||
# - Task Scheduler după full backup (03:00 AM)
|
||||
# - Task Scheduler după incremental backup (14:30)
|
||||
# - Manual oricând pentru recovery
|
||||
|
||||
param(
|
||||
[string]$SourceFRA = "C:\Users\Oracle\recovery_area\ROA",
|
||||
[string]$DRHost = "10.0.20.202",
|
||||
[int]$DRPort = 22,
|
||||
[string]$DRUser = "root",
|
||||
[string]$DRPath = "/mnt/pve/oracle-backups/ROA/autobackup",
|
||||
[string]$SSHKeyPath = "$env:USERPROFILE\.ssh\id_rsa",
|
||||
[int]$RetentionDays = 2,
|
||||
[string]$LogFile = "D:\rman_backup\logs\transfer_$(Get-Date -Format 'yyyyMMdd_HHmm').log"
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Continue"
|
||||
|
||||
function Write-Log {
|
||||
param([string]$Message, [string]$Level = "INFO")
|
||||
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
|
||||
$logLine = "[$timestamp] [$Level] $Message"
|
||||
Write-Host $logLine
|
||||
Add-Content -Path $LogFile -Value $logLine -Encoding UTF8 -ErrorAction SilentlyContinue
|
||||
}
|
||||
|
||||
function Test-SSHConnection {
|
||||
Write-Log "Testing SSH connection to $DRHost`:$DRPort..."
|
||||
|
||||
try {
|
||||
# Folosește -n pentru a nu citi din stdin (fix pentru blocare)
|
||||
$null = & ssh -n -p $DRPort -i $SSHKeyPath -o StrictHostKeyChecking=no -o ConnectTimeout=10 "${DRUser}@${DRHost}" "exit 0" 2>&1
|
||||
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Log "SSH connection successful" "SUCCESS"
|
||||
return $true
|
||||
} else {
|
||||
Write-Log "SSH connection failed with exit code: $LASTEXITCODE" "ERROR"
|
||||
return $false
|
||||
}
|
||||
} catch {
|
||||
Write-Log "SSH connection error: $_" "ERROR"
|
||||
return $false
|
||||
}
|
||||
}
|
||||
|
||||
function Get-AllBackupFiles {
|
||||
Write-Log "Searching for all backup files in FRA..."
|
||||
|
||||
$backupFiles = @()
|
||||
|
||||
$searchPaths = @(
|
||||
"$SourceFRA\BACKUPSET",
|
||||
"$SourceFRA\AUTOBACKUP"
|
||||
)
|
||||
|
||||
foreach ($path in $searchPaths) {
|
||||
if (Test-Path $path) {
|
||||
# Get ALL backup files (duplicates will be skipped during transfer)
|
||||
$files = Get-ChildItem -Path $path -Recurse -File -ErrorAction SilentlyContinue |
|
||||
Where-Object {
|
||||
$_.Name -match '\.(BKP|bkp)$' -and
|
||||
$_.Name -notlike "*__TAG_*" # Exclude old uncompressed backups
|
||||
} |
|
||||
Sort-Object LastWriteTime -Descending
|
||||
|
||||
$backupFiles += $files
|
||||
}
|
||||
}
|
||||
|
||||
if ($backupFiles.Count -eq 0) {
|
||||
Write-Log "No backup files found!" "WARNING"
|
||||
return @()
|
||||
}
|
||||
|
||||
$totalSizeGB = ($backupFiles | Measure-Object -Property Length -Sum).Sum / 1GB
|
||||
Write-Log "Found $($backupFiles.Count) backup files, total size: $([math]::Round($totalSizeGB, 2)) GB"
|
||||
|
||||
return $backupFiles
|
||||
}
|
||||
|
||||
function Transfer-FileToDR {
|
||||
param([System.IO.FileInfo]$File, [string]$DestPath)
|
||||
|
||||
$fileName = $File.Name
|
||||
$fileSizeMB = [math]::Round($File.Length / 1MB, 2)
|
||||
|
||||
try {
|
||||
# Check dacă fișierul există deja pe DR (skip duplicates) - Linux bash command
|
||||
$checkCmd = "test -f '$DestPath/$fileName' && echo 'True' || echo 'False'"
|
||||
$checkResult = & ssh -n -p $DRPort -i $SSHKeyPath "${DRUser}@${DRHost}" $checkCmd 2>&1
|
||||
|
||||
if ($checkResult -match "True") {
|
||||
Write-Log "Skipping (already on DR): $fileName" "INFO"
|
||||
return $true
|
||||
}
|
||||
|
||||
Write-Log "Transferring: $fileName ($fileSizeMB MB)"
|
||||
|
||||
# SCP transfer - NO compression (files already compressed by RMAN)
|
||||
# Use cipher aes128-gcm for better performance
|
||||
$null = & scp -P $DRPort -i $SSHKeyPath -o StrictHostKeyChecking=no -o Compression=no -o Cipher=aes128-gcm@openssh.com $File.FullName "${DRUser}@${DRHost}:${DestPath}/" 2>&1
|
||||
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Log "Transferred: $fileName" "SUCCESS"
|
||||
return $true
|
||||
} else {
|
||||
Write-Log "Failed to transfer: $fileName (exit code: $LASTEXITCODE)" "ERROR"
|
||||
return $false
|
||||
}
|
||||
} catch {
|
||||
Write-Log "Transfer error for $fileName : $_" "ERROR"
|
||||
return $false
|
||||
}
|
||||
}
|
||||
|
||||
function Cleanup-OldBackupsOnDR {
|
||||
Write-Log "Cleaning up old backups on DR (keeping last $RetentionDays days)..."
|
||||
|
||||
try {
|
||||
# Count fișiere înainte de cleanup
|
||||
$countBefore = & ssh -n -p $DRPort -i $SSHKeyPath "${DRUser}@${DRHost}" "find '$DRPath' -name '*.BKP' -type f | wc -l" 2>&1
|
||||
Write-Log "Backups before cleanup: $countBefore"
|
||||
|
||||
# Cleanup: șterge fișiere mai vechi de $RetentionDays zile - Linux find command
|
||||
# -mtime +N înseamnă "mai vechi de N zile", deci pentru a păstra RetentionDays zile, folosim +($RetentionDays - 1)
|
||||
$mtimeDays = $RetentionDays - 1
|
||||
$cleanupCmd = "find '$DRPath' -name '*.BKP' -type f -mtime +$mtimeDays -delete 2>&1"
|
||||
$result = & ssh -n -p $DRPort -i $SSHKeyPath "${DRUser}@${DRHost}" $cleanupCmd 2>&1
|
||||
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Log "Cleanup warning: $result" "WARNING"
|
||||
}
|
||||
|
||||
# Count fișiere după cleanup
|
||||
$countAfter = & ssh -n -p $DRPort -i $SSHKeyPath "${DRUser}@${DRHost}" "find '$DRPath' -name '*.BKP' -type f | wc -l" 2>&1
|
||||
$deleted = [int]$countBefore - [int]$countAfter
|
||||
|
||||
Write-Log "Cleanup completed: Deleted $deleted old backup files, $countAfter remaining"
|
||||
} catch {
|
||||
Write-Log "Cleanup error: $_" "WARNING"
|
||||
}
|
||||
}
|
||||
|
||||
# ==================== MAIN ====================
|
||||
|
||||
try {
|
||||
Write-Log "========================================="
|
||||
Write-Log "Oracle Backup Transfer Started (UNIFIED)"
|
||||
Write-Log "========================================="
|
||||
Write-Log "Source FRA: $SourceFRA"
|
||||
Write-Log "DR Server: $DRHost"
|
||||
Write-Log "DR Path: $DRPath"
|
||||
|
||||
# Verificare prerequisite
|
||||
if (-not (Test-Path $SourceFRA)) {
|
||||
throw "Source FRA path not found: $SourceFRA"
|
||||
}
|
||||
|
||||
if (-not (Test-Path $SSHKeyPath)) {
|
||||
throw "SSH key not found: $SSHKeyPath"
|
||||
}
|
||||
|
||||
# Test SSH connection
|
||||
if (-not (Test-SSHConnection)) {
|
||||
throw "Cannot connect to DR server via SSH"
|
||||
}
|
||||
|
||||
# Creare director pe DR - Linux mkdir command
|
||||
Write-Log "Ensuring DR directory exists..."
|
||||
$null = & ssh -n -p $DRPort -i $SSHKeyPath "${DRUser}@${DRHost}" "mkdir -p '$DRPath'" 2>&1
|
||||
|
||||
# Găsește TOATE backup-urile
|
||||
$backupFiles = Get-AllBackupFiles
|
||||
|
||||
if ($backupFiles.Count -eq 0) {
|
||||
Write-Log "No backup files to transfer (this might be normal for first run)" "WARNING"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Transfer fișiere
|
||||
Write-Log "Starting file transfer..."
|
||||
$successCount = 0
|
||||
$failCount = 0
|
||||
$skippedCount = 0
|
||||
|
||||
foreach ($file in $backupFiles) {
|
||||
$result = Transfer-FileToDR -File $file -DestPath $DRPath
|
||||
|
||||
if ($result) {
|
||||
# Check if it was skipped or transferred
|
||||
$fileName = $file.Name
|
||||
$checkCmd = "test -f '$DRPath/$fileName' && echo 'True' || echo 'False'"
|
||||
$checkResult = & ssh -n -p $DRPort -i $SSHKeyPath "${DRUser}@${DRHost}" $checkCmd 2>&1
|
||||
|
||||
if ($checkResult -match "True") {
|
||||
$successCount++
|
||||
}
|
||||
} else {
|
||||
$failCount++
|
||||
}
|
||||
}
|
||||
|
||||
Write-Log "Transfer summary: $successCount succeeded, $failCount failed"
|
||||
|
||||
if ($failCount -gt 0) {
|
||||
Write-Log "Some transfers failed!" "WARNING"
|
||||
}
|
||||
|
||||
# Cleanup old backups pe DR
|
||||
Cleanup-OldBackupsOnDR
|
||||
|
||||
Write-Log "========================================="
|
||||
Write-Log "Backup Transfer Completed Successfully"
|
||||
Write-Log "========================================="
|
||||
Write-Log "Files processed: $($backupFiles.Count)"
|
||||
Write-Log "Successful: $successCount"
|
||||
Write-Log "Failed: $failCount"
|
||||
Write-Log "DR Server: ${DRHost}:${DRPath}"
|
||||
|
||||
exit 0
|
||||
|
||||
} catch {
|
||||
Write-Log "CRITICAL ERROR: $($_.Exception.Message)" "ERROR"
|
||||
Write-Log "Stack trace: $($_.ScriptStackTrace)" "ERROR"
|
||||
exit 1
|
||||
}
|
||||
649
proxmox/vm109-windows-dr/scripts/weekly-dr-test-proxmox.sh
Normal file
649
proxmox/vm109-windows-dr/scripts/weekly-dr-test-proxmox.sh
Normal file
@@ -0,0 +1,649 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Oracle DR Weekly Test with Proxmox PVE::Notify
|
||||
# Automated DR test with notifications via Proxmox notification system
|
||||
#
|
||||
# Location: /opt/scripts/weekly-dr-test-proxmox.sh (on Proxmox host)
|
||||
# Schedule: Add to cron for weekly execution (Saturdays)
|
||||
#
|
||||
# This script is SELF-SUFFICIENT:
|
||||
# - Automatically creates notification templates if they don't exist
|
||||
# - Uses Proxmox native notification system
|
||||
# - No email configuration needed - uses existing Proxmox setup
|
||||
#
|
||||
# Installation:
|
||||
# cp weekly-dr-test-proxmox.sh /opt/scripts/
|
||||
# chmod +x /opt/scripts/weekly-dr-test-proxmox.sh
|
||||
# /opt/scripts/weekly-dr-test-proxmox.sh --install # Creates templates
|
||||
# crontab -e # Add: 0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh
|
||||
#
|
||||
# Author: Claude (based on ha-monitor.sh pattern)
|
||||
# Version: 1.0
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Set proper PATH for cron execution
|
||||
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
|
||||
# Configuration
|
||||
DR_VM_ID="109"
|
||||
DR_VM_IP="10.0.20.37"
|
||||
DR_VM_PORT="22122"
|
||||
DR_VM_USER="romfast"
|
||||
BACKUP_PATH="/mnt/pve/oracle-backups/ROA/autobackup"
|
||||
MAX_RESTORE_TIME_MIN=30
|
||||
TEMPLATE_DIR="/usr/share/pve-manager/templates/default"
|
||||
LOG_DIR="/var/log/oracle-dr"
|
||||
LOG_FILE="$LOG_DIR/dr_test_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Create log directory
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# Function to create notification templates
|
||||
create_templates() {
|
||||
echo -e "${GREEN}Creating Oracle DR test notification templates...${NC}"
|
||||
|
||||
# Create templates directory if needed
|
||||
mkdir -p "$TEMPLATE_DIR"
|
||||
|
||||
# Subject template
|
||||
cat > "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" <<'EOF'
|
||||
Oracle DR Test {{test_result}} | {{date}}
|
||||
EOF
|
||||
|
||||
# Text body template
|
||||
cat > "$TEMPLATE_DIR/oracle-dr-test-body.txt.hbs" <<'EOF'
|
||||
Oracle DR Test {{test_result}} | {{date}}
|
||||
Severity: {{severity}}
|
||||
|
||||
SUMMARY
|
||||
- Outcome: {{test_result}}
|
||||
- Duration: {{total_duration}} min (restore {{restore_duration}} min)
|
||||
- Backups used: {{backup_count}}
|
||||
- Tables restored: {{tables_restored}}
|
||||
|
||||
COMPONENTS
|
||||
- VM {{vm_id}} ({{vm_ip}}): {{vm_status}}
|
||||
- NFS: {{nfs_status}}
|
||||
- Database: {{database_status}}
|
||||
- Cleanup: {{disk_freed}} GB freed
|
||||
|
||||
STEPS
|
||||
{{#each test_steps}}
|
||||
- {{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} - {{this.status}}{{/if}}
|
||||
{{/each}}
|
||||
|
||||
{{#if has_errors}}
|
||||
ISSUES
|
||||
{{#each errors}}
|
||||
- {{this}}
|
||||
{{/each}}
|
||||
{{/if}}
|
||||
|
||||
{{#if has_warnings}}
|
||||
WARNINGS
|
||||
{{#each warnings}}
|
||||
- {{this}}
|
||||
{{/each}}
|
||||
{{/if}}
|
||||
|
||||
RMAN RESTORE LOG (complete)
|
||||
---
|
||||
{{restore_log}}
|
||||
---
|
||||
|
||||
BASH SCRIPT LOG (last 100 lines)
|
||||
---
|
||||
{{bash_log}}
|
||||
---
|
||||
|
||||
Full log: {{log_file}}
|
||||
Next test: Saturday 06:00
|
||||
EOF
|
||||
|
||||
# HTML body template (compact Gmail-friendly layout)
|
||||
cat > "$TEMPLATE_DIR/oracle-dr-test-body.html.hbs" <<'EOF'
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>Oracle DR Test {{test_result}} | {{date}}</title>
|
||||
</head>
|
||||
<body style="margin:0;padding:16px;font-family:Arial,Helvetica,sans-serif;background:#ffffff;color:#2c3e50;">
|
||||
<table style="width:100%;max-width:640px;margin:0 auto;border-collapse:collapse;">
|
||||
<tr>
|
||||
<td style="padding:0 0 12px 0;font-size:18px;font-weight:600;">
|
||||
Oracle DR Test {{test_result}}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:0 0 8px 0;font-size:13px;color:#6c757d;">{{date}} · Severity: {{severity}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:12px;border:1px solid #e1e4e8;border-radius:4px;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:14px;">
|
||||
<tr><td style="padding:4px 0;">Outcome</td><td style="padding:4px 0;text-align:right;">{{test_result}}</td></tr>
|
||||
<tr><td style="padding:4px 0;">Duration</td><td style="padding:4px 0;text-align:right;">{{total_duration}} min (restore {{restore_duration}} min)</td></tr>
|
||||
<tr><td style="padding:4px 0;">Backups used</td><td style="padding:4px 0;text-align:right;">{{backup_count}}</td></tr>
|
||||
<tr><td style="padding:4px 0;">Tables restored</td><td style="padding:4px 0;text-align:right;">{{tables_restored}}</td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:14px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;">Components</td></tr>
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">VM {{vm_id}} ({{vm_ip}}): {{vm_status}}</td></tr>
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">NFS: {{nfs_status}}</td></tr>
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">Database: {{database_status}}</td></tr>
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">Cleanup: {{disk_freed}} GB freed</td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:14px;">
|
||||
<tr><td style="padding:0 0 6px 0;font-weight:600;">Steps</td></tr>
|
||||
{{#each test_steps}}
|
||||
<tr>
|
||||
<td style="padding:4px 0;border-bottom:1px solid #f1f1f1;">{{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} – {{this.status}}{{/if}}</td>
|
||||
</tr>
|
||||
{{/each}}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
{{#if has_errors}}
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff5f5;border:1px solid #f1b0b7;border-radius:4px;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;color:#c82333;">Issues</td></tr>
|
||||
{{#each errors}}
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #f8d7da;">• {{this}}</td></tr>
|
||||
{{/each}}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
{{/if}}
|
||||
|
||||
{{#if has_warnings}}
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff8e5;border:1px solid #ffe8a1;border-radius:4px;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;color:#856404;">Warnings</td></tr>
|
||||
{{#each warnings}}
|
||||
<tr><td style="padding:6px 12px;border-top:1px solid #ffe8a1;">• {{this}}</td></tr>
|
||||
{{/each}}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
{{/if}}
|
||||
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:12px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;font-size:13px;">RMAN Restore Log (complete)</td></tr>
|
||||
<tr><td style="padding:8px 12px;font-family:monospace;white-space:pre-wrap;word-wrap:break-word;border-top:1px solid #e1e4e8;">{{restore_log}}</td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:12px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
|
||||
<tr><td style="padding:8px 12px;font-weight:600;font-size:13px;">Bash Script Log (last 100 lines)</td></tr>
|
||||
<tr><td style="padding:8px 12px;font-family:monospace;white-space:pre-wrap;word-wrap:break-word;border-top:1px solid #e1e4e8;">{{bash_log}}</td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td style="padding:16px 0 0 0;font-size:12px;color:#6c757d;">
|
||||
Full log: {{log_file}} · Next test: Saturday 06:00
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
|
||||
echo -e "${GREEN}Templates created successfully in $TEMPLATE_DIR${NC}"
|
||||
}
|
||||
|
||||
# Function to send notification via PVE::Notify
|
||||
send_pve_notification() {
|
||||
local severity="$1"
|
||||
local data="$2"
|
||||
|
||||
# Create Perl script to call PVE::Notify
|
||||
cat > /tmp/oracle-dr-notify.pl <<'PERL_SCRIPT'
|
||||
#!/usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
use PVE::Notify;
|
||||
use JSON;
|
||||
|
||||
my $json_data = do { local $/; <STDIN> };
|
||||
my $data = decode_json($json_data);
|
||||
|
||||
my $severity = $data->{severity} // 'info';
|
||||
my $template_name = 'oracle-dr-test';
|
||||
|
||||
# Add fields for matching rules
|
||||
my $fields = {
|
||||
type => 'oracle-dr-test',
|
||||
severity => $severity,
|
||||
test_result => $data->{test_result},
|
||||
};
|
||||
|
||||
# Send notification
|
||||
eval {
|
||||
PVE::Notify::notify(
|
||||
$severity,
|
||||
$template_name,
|
||||
$data,
|
||||
$fields
|
||||
);
|
||||
};
|
||||
|
||||
if ($@) {
|
||||
print "Error sending notification: $@\n";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
print "Notification sent successfully\n";
|
||||
PERL_SCRIPT
|
||||
|
||||
chmod +x /tmp/oracle-dr-notify.pl
|
||||
|
||||
# Send notification
|
||||
echo "$data" | perl /tmp/oracle-dr-notify.pl
|
||||
|
||||
rm -f /tmp/oracle-dr-notify.pl
|
||||
}
|
||||
|
||||
# Logging functions
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
# Test tracking
|
||||
TEST_STEPS=()
|
||||
ERRORS=()
|
||||
WARNINGS=()
|
||||
TEST_START_TIME=$(date +%s)
|
||||
|
||||
# Function to track test steps
|
||||
track_step() {
|
||||
local name="$1"
|
||||
local passed="$2"
|
||||
local status="$3"
|
||||
local start_time="$4"
|
||||
local end_time=$(date +%s)
|
||||
local duration=$((end_time - start_time))
|
||||
|
||||
local step_json
|
||||
step_json=$(jq -n \
|
||||
--arg name "$name" \
|
||||
--arg status "$status" \
|
||||
--arg duration "$duration" \
|
||||
--arg passed "$passed" \
|
||||
'{name:$name, status:$status, duration:($duration|tonumber), passed:($passed == "true")}'
|
||||
)
|
||||
|
||||
TEST_STEPS+=("$step_json")
|
||||
|
||||
if [ "$passed" = "false" ]; then
|
||||
ERRORS+=("$name: $status")
|
||||
fi
|
||||
}
|
||||
|
||||
# Main test workflow
|
||||
run_dr_test() {
|
||||
local test_result="FAILED"
|
||||
local severity="error"
|
||||
local is_success=false
|
||||
local restore_duration=0
|
||||
local tables_restored=0
|
||||
local db_status="UNKNOWN"
|
||||
local nfs_status="Not checked"
|
||||
local vm_status_label="Not started"
|
||||
local cleanup_freed=0
|
||||
local backup_count=0
|
||||
local restore_log="Not collected"
|
||||
|
||||
log "=========================================="
|
||||
log "Oracle DR Weekly Test - Starting"
|
||||
log "=========================================="
|
||||
|
||||
# Step 1: Pre-flight checks
|
||||
local step_start=$(date +%s)
|
||||
log "STEP 1: Pre-flight checks"
|
||||
|
||||
# Check backups exist
|
||||
backup_count=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*.BKP' 2>/dev/null | wc -l)
|
||||
|
||||
if [ "$backup_count" -lt 2 ]; then
|
||||
track_step "Pre-flight checks" false "Insufficient backups (found: $backup_count)" "$step_start"
|
||||
test_result="FAILED - No backups"
|
||||
else
|
||||
track_step "Pre-flight checks" true "Found $backup_count backups" "$step_start"
|
||||
|
||||
# Step 2: Start VM
|
||||
step_start=$(date +%s)
|
||||
log "STEP 2: Starting DR VM"
|
||||
|
||||
if qm start "$DR_VM_ID" 2>/dev/null; then
|
||||
vm_status_label="Running"
|
||||
|
||||
# Intelligent VM boot wait with polling (max 180s)
|
||||
local MAX_BOOT_WAIT=180
|
||||
local POLL_INTERVAL=5
|
||||
local boot_elapsed=0
|
||||
local vm_ready=false
|
||||
|
||||
log "Waiting for VM to become ready (SSH + PowerShell, max ${MAX_BOOT_WAIT}s)..."
|
||||
|
||||
while [ $boot_elapsed -lt $MAX_BOOT_WAIT ]; do
|
||||
# Check 1: VM running status in Proxmox
|
||||
local vm_qm_status
|
||||
vm_qm_status=$(qm status "$DR_VM_ID" 2>/dev/null | grep -o "running" || echo "")
|
||||
|
||||
if [ "$vm_qm_status" = "running" ]; then
|
||||
# Check 2: SSH connectivity and PowerShell availability (what we actually need)
|
||||
if ssh -p "$DR_VM_PORT" -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o BatchMode=yes "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -Command 'Write-Output ready'" >/dev/null 2>&1; then
|
||||
log "VM ready after ${boot_elapsed}s (SSH and PowerShell responding)"
|
||||
vm_ready=true
|
||||
break
|
||||
fi
|
||||
fi
|
||||
|
||||
sleep $POLL_INTERVAL
|
||||
boot_elapsed=$((boot_elapsed + POLL_INTERVAL))
|
||||
|
||||
# Progress logging every 30 seconds
|
||||
if [ $((boot_elapsed % 30)) -eq 0 ] && [ $boot_elapsed -lt $MAX_BOOT_WAIT ]; then
|
||||
log "Still waiting for VM... (${boot_elapsed}s/${MAX_BOOT_WAIT}s elapsed)"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$vm_ready" = false ]; then
|
||||
log_warning "VM did not respond within ${MAX_BOOT_WAIT}s, continuing anyway (may cause subsequent failures)"
|
||||
fi
|
||||
|
||||
track_step "VM Startup" true "VM $DR_VM_ID started and ready (${boot_elapsed}s)" "$step_start"
|
||||
|
||||
# Step 3: Verify NFS mount
|
||||
step_start=$(date +%s)
|
||||
log "STEP 3: Verifying NFS mount"
|
||||
|
||||
nfs_status="Not Mounted"
|
||||
if ssh -p "$DR_VM_PORT" -o ConnectTimeout=10 "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -Command 'Test-Path F:\\ROA\\autobackup'" 2>/dev/null; then
|
||||
nfs_status="Mounted"
|
||||
track_step "NFS Mount Check" true "F:\\ drive accessible" "$step_start"
|
||||
else
|
||||
track_step "NFS Mount Check" false "F:\\ drive not accessible" "$step_start"
|
||||
WARNINGS+=("NFS mount may need manual intervention")
|
||||
fi
|
||||
|
||||
# Step 4: Run restore
|
||||
step_start=$(date +%s)
|
||||
local restore_start=$step_start
|
||||
log "STEP 4: Running database restore"
|
||||
|
||||
if ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\rman_restore_from_zero.ps1 -TestMode" 2>&1 | tee -a "$LOG_FILE"; then
|
||||
|
||||
local restore_end=$(date +%s)
|
||||
restore_duration=$(( (restore_end - restore_start) / 60 ))
|
||||
|
||||
track_step "Database Restore" true "Restored in $restore_duration minutes" "$step_start"
|
||||
|
||||
# Step 5: Verify database
|
||||
step_start=$(date +%s)
|
||||
log "STEP 5: Verifying database"
|
||||
|
||||
# Parse database status from LOG_FILE (rman_restore_from_zero.ps1 already verified it)
|
||||
# Look for "OPEN_MODE: READ WRITE" in the captured output
|
||||
if grep -q "OPEN_MODE: READ WRITE" "$LOG_FILE" 2>/dev/null; then
|
||||
db_status="READ WRITE"
|
||||
else
|
||||
db_status=""
|
||||
fi
|
||||
|
||||
# Parse table count from LOG_FILE (already captured in STEP 3 output)
|
||||
# Look for "TABLES: <number>" in the output
|
||||
tables_restored=$(grep -oP "TABLES:\s*\K\d+" "$LOG_FILE" 2>/dev/null | tail -1 || echo "0")
|
||||
tables_restored=$(echo "$tables_restored" | tr -cd '0-9')
|
||||
[ -z "$tables_restored" ] && tables_restored=0
|
||||
|
||||
if [[ "$db_status" == "READ WRITE" ]] && [ "$tables_restored" -gt 0 ]; then
|
||||
track_step "Database Verification" true "Database OPEN, $tables_restored tables" "$step_start"
|
||||
test_result="PASSED"
|
||||
severity="info"
|
||||
is_success=true
|
||||
else
|
||||
track_step "Database Verification" false "Database not OPEN" "$step_start"
|
||||
fi
|
||||
|
||||
# Collect restore log from VM (always attempt collection - FULL log)
|
||||
log "Collecting restore log from DR VM..."
|
||||
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -Command \"Get-Content 'D:\\oracle\\logs\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
|
||||
|
||||
# If not found, try alternate locations
|
||||
if [ -z "$restore_log" ]; then
|
||||
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -Command \"Get-Content 'D:\\oracle\\temp\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
|
||||
fi
|
||||
|
||||
# Still not found, use fallback message
|
||||
if [ -z "$restore_log" ]; then
|
||||
restore_log="Restore log not available (file may not exist or was not generated)"
|
||||
fi
|
||||
|
||||
# Step 6: Cleanup (AFTER restore - stop service to release file locks)
|
||||
step_start=$(date +%s)
|
||||
log "STEP 6: Running cleanup"
|
||||
|
||||
ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\cleanup_database.ps1 /SILENT /AFTER" 2>/dev/null
|
||||
|
||||
cleanup_freed=8
|
||||
track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start"
|
||||
|
||||
else
|
||||
# Collect restore log even when restore fails (FULL log)
|
||||
log "Collecting restore log after failure..."
|
||||
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -Command \"Get-Content 'D:\\oracle\\logs\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$restore_log" ]; then
|
||||
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -Command \"Get-Content 'D:\\oracle\\temp\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
|
||||
fi
|
||||
|
||||
# Always try to get some error output from RMAN script
|
||||
if [ -z "$restore_log" ]; then
|
||||
last_error=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
|
||||
"powershell -Command \"Get-Content 'D:\\oracle\\temp\\*.rman' -Tail 20 -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
|
||||
if [ -n "$last_error" ]; then
|
||||
restore_log="RMAN script content (last 20 lines):\n$last_error"
|
||||
else
|
||||
restore_log="No restore logs or RMAN scripts found"
|
||||
fi
|
||||
fi
|
||||
|
||||
track_step "Database Restore" false "Restore failed" "$step_start"
|
||||
fi
|
||||
|
||||
# Step 7: Shutdown VM
|
||||
step_start=$(date +%s)
|
||||
log "STEP 7: Shutting down VM"
|
||||
|
||||
ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" "shutdown /s /t 30" 2>/dev/null
|
||||
sleep 60
|
||||
qm stop "$DR_VM_ID" 2>/dev/null
|
||||
|
||||
track_step "VM Shutdown" true "VM stopped" "$step_start"
|
||||
vm_status_label="Stopped"
|
||||
|
||||
else
|
||||
track_step "VM Startup" false "Failed to start VM $DR_VM_ID" "$step_start"
|
||||
vm_status_label="Failed to start"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Calculate total duration
|
||||
local test_end_time=$(date +%s)
|
||||
local total_duration=$(( (test_end_time - TEST_START_TIME) / 60 ))
|
||||
|
||||
# Prepare notification data
|
||||
local steps_json
|
||||
if [ ${#TEST_STEPS[@]} -eq 0 ]; then
|
||||
steps_json='[]'
|
||||
else
|
||||
steps_json=$(printf '%s\n' "${TEST_STEPS[@]}" | jq -s '.')
|
||||
fi
|
||||
|
||||
local errors_json
|
||||
if [ ${#ERRORS[@]} -eq 0 ]; then
|
||||
errors_json='[]'
|
||||
else
|
||||
errors_json=$(printf '%s\n' "${ERRORS[@]}" | jq -R . | jq -s .)
|
||||
fi
|
||||
|
||||
local warnings_json
|
||||
if [ ${#WARNINGS[@]} -eq 0 ]; then
|
||||
warnings_json='[]'
|
||||
else
|
||||
warnings_json=$(printf '%s\n' "${WARNINGS[@]}" | jq -R . | jq -s .)
|
||||
fi
|
||||
|
||||
local has_errors=false
|
||||
local has_warnings=false
|
||||
[ ${#ERRORS[@]} -gt 0 ] && has_errors=true
|
||||
[ ${#WARNINGS[@]} -gt 0 ] && has_warnings=true
|
||||
|
||||
if [ "$is_success" = true ] && [ "$has_warnings" = true ]; then
|
||||
severity="warning"
|
||||
fi
|
||||
|
||||
local db_status_clean=$(echo "$db_status" | tr -d '\r' | sed 's/^ *//;s/ *$//')
|
||||
|
||||
# Escape restore log for JSON
|
||||
local restore_log_json
|
||||
restore_log_json=$(echo "$restore_log" | jq -Rs .)
|
||||
|
||||
# Collect last 100 lines of bash script log
|
||||
local bash_log
|
||||
bash_log=$(tail -100 "$LOG_FILE" 2>/dev/null || echo "Bash log not available")
|
||||
local bash_log_json
|
||||
bash_log_json=$(echo "$bash_log" | jq -Rs .)
|
||||
|
||||
local json_data=$(cat <<JSON
|
||||
{
|
||||
"severity": "$severity",
|
||||
"test_result": "$test_result",
|
||||
"date": "$(date '+%Y-%m-%d %H:%M:%S')",
|
||||
"total_duration": $total_duration,
|
||||
"is_success": $is_success,
|
||||
"has_errors": $has_errors,
|
||||
"has_warnings": $has_warnings,
|
||||
"test_steps": $steps_json,
|
||||
"errors": $errors_json,
|
||||
"warnings": $warnings_json,
|
||||
"backup_count": $backup_count,
|
||||
"restore_duration": $restore_duration,
|
||||
"tables_restored": ${tables_restored:-0},
|
||||
"database_status": "${db_status_clean:-UNKNOWN}",
|
||||
"disk_freed": $cleanup_freed,
|
||||
"vm_id": "$DR_VM_ID",
|
||||
"vm_ip": "$DR_VM_IP",
|
||||
"vm_status": "$vm_status_label",
|
||||
"nfs_status": "${nfs_status:-Unknown}",
|
||||
"log_file": "$LOG_FILE",
|
||||
"restore_log": $restore_log_json,
|
||||
"bash_log": $bash_log_json
|
||||
}
|
||||
JSON
|
||||
)
|
||||
|
||||
# Send notification
|
||||
log "Sending notification..."
|
||||
send_pve_notification "$severity" "$json_data"
|
||||
|
||||
# Final summary
|
||||
log "=========================================="
|
||||
log "Oracle DR Test Complete: $test_result"
|
||||
log "Duration: $total_duration minutes"
|
||||
log "Log: $LOG_FILE"
|
||||
log "=========================================="
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
case "${1:-}" in
|
||||
--install)
|
||||
create_templates
|
||||
echo ""
|
||||
echo -e "${GREEN}Installation complete!${NC}"
|
||||
echo "Next steps:"
|
||||
echo "1. Test the script: /opt/scripts/weekly-dr-test-proxmox.sh"
|
||||
echo "2. Add to cron: crontab -e"
|
||||
echo " Add line: 0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh"
|
||||
echo "3. Configure notifications in Proxmox GUI if needed:"
|
||||
echo " Datacenter > Notifications > Add matching rules for 'oracle-dr-test'"
|
||||
;;
|
||||
--help)
|
||||
echo "Oracle DR Weekly Test for Proxmox"
|
||||
echo "Usage:"
|
||||
echo " $0 - Run DR test"
|
||||
echo " $0 --install - Create notification templates"
|
||||
echo " $0 --help - Show this help"
|
||||
;;
|
||||
*)
|
||||
# Check if templates exist, create if missing
|
||||
if [ ! -f "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" ]; then
|
||||
echo -e "${YELLOW}Templates not found, creating...${NC}"
|
||||
create_templates
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Run DR test
|
||||
run_dr_test
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Check dependencies
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo -e "${RED}Error: jq is not installed${NC}"
|
||||
echo "Install with: apt-get install jq"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user