diff --git a/oracle/standby-server-scripts/weekly-dr-test-proxmox.sh b/oracle/standby-server-scripts/weekly-dr-test-proxmox.sh index 946dc44..fbdc616 100644 --- a/oracle/standby-server-scripts/weekly-dr-test-proxmox.sh +++ b/oracle/standby-server-scripts/weekly-dr-test-proxmox.sh @@ -353,8 +353,44 @@ run_dr_test() { if qm start "$DR_VM_ID" 2>/dev/null; then vm_status_label="Running" - sleep 180 # Wait for boot - track_step "VM Startup" true "VM $DR_VM_ID started" "$step_start" + + # Intelligent VM boot wait with polling (max 180s) + local MAX_BOOT_WAIT=180 + local POLL_INTERVAL=5 + local boot_elapsed=0 + local vm_ready=false + + log "Waiting for VM to become ready (SSH + PowerShell, max ${MAX_BOOT_WAIT}s)..." + + while [ $boot_elapsed -lt $MAX_BOOT_WAIT ]; do + # Check 1: VM running status in Proxmox + local vm_qm_status + vm_qm_status=$(qm status "$DR_VM_ID" 2>/dev/null | grep -o "running" || echo "") + + if [ "$vm_qm_status" = "running" ]; then + # Check 2: SSH connectivity and PowerShell availability (what we actually need) + if ssh -p "$DR_VM_PORT" -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o BatchMode=yes "$DR_VM_USER@$DR_VM_IP" \ + "powershell -Command 'Write-Output ready'" >/dev/null 2>&1; then + log "VM ready after ${boot_elapsed}s (SSH and PowerShell responding)" + vm_ready=true + break + fi + fi + + sleep $POLL_INTERVAL + boot_elapsed=$((boot_elapsed + POLL_INTERVAL)) + + # Progress logging every 30 seconds + if [ $((boot_elapsed % 30)) -eq 0 ] && [ $boot_elapsed -lt $MAX_BOOT_WAIT ]; then + log "Still waiting for VM... (${boot_elapsed}s/${MAX_BOOT_WAIT}s elapsed)" + fi + done + + if [ "$vm_ready" = false ]; then + log_warning "VM did not respond within ${MAX_BOOT_WAIT}s, continuing anyway (may cause subsequent failures)" + fi + + track_step "VM Startup" true "VM $DR_VM_ID started and ready (${boot_elapsed}s)" "$step_start" # Step 3: Verify NFS mount step_start=$(date +%s)