"""End-to-end scripted walkthrough — simulează exact ce face un user pe Discord: 1. /l → click Planifică pe game-library (proiect cu UI scope) 2. Modal se deschide; user tastează descriere; submit 3. start_planning_session creează entry → status='planning' 4. Agent răspunde la primul turn (office-hours) 5. User răspunde un mesaj normal → router rutează la orchestrator (NU la chat normal) 6. User apasă "Continuă faza" → advance la /plan-ceo-review (fresh subprocess) 7. Repeat pentru /plan-eng-review și /plan-design-review (UI scope detectat) 8. La sfârșitul ultimului phase, advance scrie final-plan.md stub 9. User apasă "Dau drumul tonight" → planning_approve 10. Status='approved', final_plan_path setat în approved-tasks.json 11. Re-citim approved-tasks.json și verificăm că night-execute ar avea toate câmpurile necesare (slug, description, status, final_plan_path) Subprocess `claude -p` e mock-uit — nu consumăm credite. Acoperă totul între `start_planning_session` și `planning_approve` ca un single test. """ from __future__ import annotations import json from unittest.mock import patch import pytest from src import planning_orchestrator, planning_session, ralph_flow, router @pytest.fixture def tmp_e2e(tmp_path, monkeypatch): """Redirect every state file + workspace into a tmp dir.""" sessions = tmp_path / "sessions" sessions.mkdir() monkeypatch.setattr(planning_session, "SESSIONS_DIR", sessions) monkeypatch.setattr( planning_session, "PLANNING_STATE_FILE", sessions / "planning.json" ) # Ralph flow state isolation monkeypatch.setattr(ralph_flow, "_STATE_FILE", sessions / "ralph_flow.json") monkeypatch.setattr(ralph_flow, "SESSIONS_DIR", sessions) approved = tmp_path / "approved-tasks.json" approved.write_text(json.dumps({"projects": [], "last_updated": None})) monkeypatch.setattr(router, "APPROVED_TASKS_FILE", approved) workspace = tmp_path / "workspace" workspace.mkdir() (workspace / "game-library").mkdir() monkeypatch.setattr(planning_session, "WORKSPACE_ROOT", workspace) monkeypatch.setattr(planning_orchestrator, "WORKSPACE_ROOT", workspace) yield {"sessions": sessions, "approved": approved, "workspace": workspace} def _fake_run_claude_factory(): """Return a side-effect function that mocks each subprocess call. Tracks calls so the test can verify subprocess was invoked once per phase. Returns realistic-shaped JSON results. """ state = {"calls": 0, "session_ids": []} def fake(*args, **kwargs): state["calls"] += 1 sid = f"s-{state['calls']}" state["session_ids"].append(sid) # Odd turns ask a question; even turns emit PHASE_READY_MARKER. text = ( f"Acesta e turn-ul {state['calls']}. Ce vrei să facem mai concret?" if state["calls"] % 2 == 1 else f"Confirm. PHASE_STATUS: ready_to_advance — turn {state['calls']}." ) return { "result": text, "session_id": sid, "usage": {"input_tokens": 100, "output_tokens": 80}, "total_cost_usd": 0.5, "subtype": "success", "is_error": False, "duration_ms": 12000, } return fake, state def _approved_for(slug, approved_path): data = json.loads(approved_path.read_text()) for p in data["projects"]: if p["name"] == slug: return p return None # --------------------------------------------------------------------------- # The walkthrough # --------------------------------------------------------------------------- def test_full_planning_walkthrough_with_ui_scope(tmp_e2e): slug = "game-library" description = "Adaug un filtru de genuri pe pagina principală a game-library" channel = "discord-channel-1" adapter = "discord" fake, calls = _fake_run_claude_factory() with patch.object(planning_session, "_run_claude", fake): # Step 1+2+3: user clicks Planifică and types description (modal submit) → # Discord/Telegram callback invokes start_planning_session. first_text = router.start_planning_session(slug, description, channel, adapter) assert "turn" in first_text.lower() # Status moved to "planning" entry = _approved_for(slug, tmp_e2e["approved"]) assert entry is not None assert entry["status"] == "planning" assert entry["planning_session_id"] is not None # 4 phases planned because description has UI scope state = planning_session.get_planning_state(adapter, channel) assert state is not None assert state["phases_planned"] == [ "/office-hours", "/plan-ceo-review", "/plan-eng-review", "/plan-design-review", ] assert state["phase"] == "/office-hours" # Step 5: user replies with a plain message → route_message detects # planning state and routes to orchestrator (not chat fallback). Plain # planning messages return is_cmd=False (still a "Claude response"-style) # but they MUST hit the orchestrator subprocess, not the main chat path. prior_calls = calls["calls"] response, _is_cmd = router.route_message( channel, "user-1", "Vreau filtru pe pagina principală cu RPG/FPS/MMO checkboxes", adapter_name=adapter, ) assert calls["calls"] == prior_calls + 1, "respond should spawn 1 subprocess" assert response # non-empty response from agent # Step 6+7: walk through the 3 remaining phases via advance(). # Each advance kicks off a fresh subprocess. prev_calls = calls["calls"] for expected_phase in ("/plan-ceo-review", "/plan-eng-review", "/plan-design-review"): session, text, completed = planning_orchestrator.PlanningOrchestrator.advance( adapter, channel, ) assert completed is False, f"phase {expected_phase} marked complete prematurely" state = planning_session.get_planning_state(adapter, channel) assert state["phase"] == expected_phase assert calls["calls"] == prev_calls + 1, "advance should spawn 1 fresh subprocess" prev_calls = calls["calls"] # Step 8: one more advance — pipeline complete; orchestrator writes final-plan.md stub session, summary, completed = planning_orchestrator.PlanningOrchestrator.advance( adapter, channel, ) assert completed is True final_plan = tmp_e2e["workspace"] / slug / "scripts" / "ralph" / "final-plan.md" assert final_plan.exists(), "final-plan.md stub trebuie scris la pipeline complet" body = final_plan.read_text(encoding="utf-8") assert slug in body # stub mentions project # All 4 phases recorded as completed state = planning_session.get_planning_state(adapter, channel) assert set(state["phases_completed"]) == { "/office-hours", "/plan-ceo-review", "/plan-eng-review", "/plan-design-review", } assert state["final_plan_path"] == str(final_plan) # Step 9+10: user clicks "Dau drumul tonight" → planning_approve. approval_msg = router._approve_from_planning(channel, adapter) assert "aprobat" in approval_msg.lower() or "tonight" in approval_msg.lower() # Step 11: approved-tasks.json has all the fields night-execute needs. entry = _approved_for(slug, tmp_e2e["approved"]) assert entry["status"] == "approved" assert entry["approved_at"] is not None assert entry["final_plan_path"] == str(final_plan) assert entry["description"] == description # planning_session_id is cleared once approved (no longer needed) assert entry.get("planning_session_id") in (None, "") def test_full_walkthrough_no_ui_scope_skips_design_phase(tmp_e2e): """Description without UI keywords should plan only 3 phases.""" slug = "game-library" description = "Refactor utility helpers — split string parsing into a separate module" channel = "discord-channel-2" adapter = "discord" fake, calls = _fake_run_claude_factory() with patch.object(planning_session, "_run_claude", fake): router.start_planning_session(slug, description, channel, adapter) state = planning_session.get_planning_state(adapter, channel) assert state["phases_planned"] == [ "/office-hours", "/plan-ceo-review", "/plan-eng-review", ] assert "/plan-design-review" not in state["phases_planned"] def test_walkthrough_cancel_mid_planning_reverts_to_pending(tmp_e2e): """User abandons planning via /cancel → status reverts to pending, state cleared.""" slug = "game-library" description = "Adaug pagina de profile cu avatar editing" channel = "discord-channel-3" adapter = "discord" fake, _calls = _fake_run_claude_factory() with patch.object(planning_session, "_run_claude", fake): router.start_planning_session(slug, description, channel, adapter) # Verify planning is active state = planning_session.get_planning_state(adapter, channel) assert state is not None entry = _approved_for(slug, tmp_e2e["approved"]) assert entry["status"] == "planning" # User types /cancel (router routes to cancel handler) response, is_cmd = router.route_message( channel, "user-1", "/cancel", adapter_name=adapter, ) assert is_cmd is True # State cleared assert planning_session.get_planning_state(adapter, channel) is None # Status reverted to pending entry = _approved_for(slug, tmp_e2e["approved"]) assert entry["status"] == "pending" assert entry.get("planning_session_id") in (None, "") def test_walkthrough_no_planning_state_falls_through_to_normal_chat(tmp_e2e): """Plain message without active planning should NOT touch orchestrator.""" fake, calls = _fake_run_claude_factory() with patch.object(planning_session, "_run_claude", fake), \ patch("src.router.send_message") as mock_send: mock_send.return_value = "(claude main session response)" # No prior start_planning_session — plain message goes to normal Claude response, is_cmd = router.route_message( "channel-no-plan", "user-1", "salut, ce mai faci?", adapter_name="discord", ) assert is_cmd is False # normal chat, not a command # Orchestrator subprocess NOT invoked assert calls["calls"] == 0 # Normal send_message WAS invoked mock_send.assert_called_once()