From 6d56356ada5ed9d5178515181d1894389816ddff Mon Sep 17 00:00:00 2001 From: Marius Mutu Date: Sun, 26 Apr 2026 19:11:35 +0000 Subject: [PATCH] feat(dashboard): integrate Ralph nav link + add e2e planning walkthrough test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dashboard/api.py: adaug link "Ralph" (lucide bot icon) în NAV_HTML între Workspace și KB. Pagina ralph.html se injectează corect cu nav-ul (verificat live via curl pe :8088/ralph.html). tests/test_e2e_planning_walkthrough.py (nou): 4 teste integration care simulează scripted exact ce face un user pe Discord: - click Planifică pe game-library cu UI scope → 4 faze (incl design-review) - /office-hours → ceo → eng → design → final-plan.md stub scris pe disk - "Dau drumul" → status approved + final_plan_path în approved-tasks.json - description fără UI keywords → 3 faze (skip design) - /cancel mid-planning → status revert pending, state cleared - mesaj fără planning state → cade pe Claude main chat (NU orchestrator) Subprocess `claude -p` mock-uit; testează tot wire-up-ul router → orchestrator → session și schema approved-tasks.json. Nu consumă credite. Co-Authored-By: Claude Opus 4.7 (1M context) --- dashboard/api.py | 4 + tests/test_e2e_planning_walkthrough.py | 259 +++++++++++++++++++++++++ 2 files changed, 263 insertions(+) create mode 100644 tests/test_e2e_planning_walkthrough.py diff --git a/dashboard/api.py b/dashboard/api.py index 09b48ff..4f1fcd4 100644 --- a/dashboard/api.py +++ b/dashboard/api.py @@ -59,6 +59,10 @@ NAV_HTML = '''
Workspace + + + Ralph + KB diff --git a/tests/test_e2e_planning_walkthrough.py b/tests/test_e2e_planning_walkthrough.py new file mode 100644 index 0000000..d85bdbb --- /dev/null +++ b/tests/test_e2e_planning_walkthrough.py @@ -0,0 +1,259 @@ +"""End-to-end scripted walkthrough — simulează exact ce face un user pe Discord: + + 1. /l → click Planifică pe game-library (proiect cu UI scope) + 2. Modal se deschide; user tastează descriere; submit + 3. start_planning_session creează entry → status='planning' + 4. Agent răspunde la primul turn (office-hours) + 5. User răspunde un mesaj normal → router rutează la orchestrator (NU la chat normal) + 6. User apasă "Continuă faza" → advance la /plan-ceo-review (fresh subprocess) + 7. Repeat pentru /plan-eng-review și /plan-design-review (UI scope detectat) + 8. La sfârșitul ultimului phase, advance scrie final-plan.md stub + 9. User apasă "Dau drumul tonight" → planning_approve + 10. Status='approved', final_plan_path setat în approved-tasks.json + 11. Re-citim approved-tasks.json și verificăm că night-execute ar avea + toate câmpurile necesare (slug, description, status, final_plan_path) + +Subprocess `claude -p` e mock-uit — nu consumăm credite. Acoperă totul +între `start_planning_session` și `planning_approve` ca un single test. +""" + +from __future__ import annotations + +import json +from unittest.mock import patch + +import pytest + +from src import planning_orchestrator, planning_session, ralph_flow, router + + +@pytest.fixture +def tmp_e2e(tmp_path, monkeypatch): + """Redirect every state file + workspace into a tmp dir.""" + sessions = tmp_path / "sessions" + sessions.mkdir() + monkeypatch.setattr(planning_session, "SESSIONS_DIR", sessions) + monkeypatch.setattr( + planning_session, "PLANNING_STATE_FILE", sessions / "planning.json" + ) + # Ralph flow state isolation + monkeypatch.setattr(ralph_flow, "_STATE_FILE", sessions / "ralph_flow.json") + monkeypatch.setattr(ralph_flow, "SESSIONS_DIR", sessions) + + approved = tmp_path / "approved-tasks.json" + approved.write_text(json.dumps({"projects": [], "last_updated": None})) + monkeypatch.setattr(router, "APPROVED_TASKS_FILE", approved) + + workspace = tmp_path / "workspace" + workspace.mkdir() + (workspace / "game-library").mkdir() + monkeypatch.setattr(planning_session, "WORKSPACE_ROOT", workspace) + monkeypatch.setattr(planning_orchestrator, "WORKSPACE_ROOT", workspace) + + yield {"sessions": sessions, "approved": approved, "workspace": workspace} + + +def _fake_run_claude_factory(): + """Return a side-effect function that mocks each subprocess call. + + Tracks calls so the test can verify subprocess was invoked once per phase. + Returns realistic-shaped JSON results. + """ + state = {"calls": 0, "session_ids": []} + + def fake(*args, **kwargs): + state["calls"] += 1 + sid = f"s-{state['calls']}" + state["session_ids"].append(sid) + # Odd turns ask a question; even turns emit PHASE_READY_MARKER. + text = ( + f"Acesta e turn-ul {state['calls']}. Ce vrei să facem mai concret?" + if state["calls"] % 2 == 1 + else f"Confirm. PHASE_STATUS: ready_to_advance — turn {state['calls']}." + ) + return { + "result": text, + "session_id": sid, + "usage": {"input_tokens": 100, "output_tokens": 80}, + "total_cost_usd": 0.5, + "subtype": "success", + "is_error": False, + "duration_ms": 12000, + } + + return fake, state + + +def _approved_for(slug, approved_path): + data = json.loads(approved_path.read_text()) + for p in data["projects"]: + if p["name"] == slug: + return p + return None + + +# --------------------------------------------------------------------------- +# The walkthrough +# --------------------------------------------------------------------------- + + +def test_full_planning_walkthrough_with_ui_scope(tmp_e2e): + slug = "game-library" + description = "Adaug un filtru de genuri pe pagina principală a game-library" + channel = "discord-channel-1" + adapter = "discord" + + fake, calls = _fake_run_claude_factory() + + with patch.object(planning_session, "_run_claude", fake): + # Step 1+2+3: user clicks Planifică and types description (modal submit) → + # Discord/Telegram callback invokes start_planning_session. + first_text = router.start_planning_session(slug, description, channel, adapter) + + assert "turn" in first_text.lower() + # Status moved to "planning" + entry = _approved_for(slug, tmp_e2e["approved"]) + assert entry is not None + assert entry["status"] == "planning" + assert entry["planning_session_id"] is not None + + # 4 phases planned because description has UI scope + state = planning_session.get_planning_state(adapter, channel) + assert state is not None + assert state["phases_planned"] == [ + "/office-hours", + "/plan-ceo-review", + "/plan-eng-review", + "/plan-design-review", + ] + assert state["phase"] == "/office-hours" + + # Step 5: user replies with a plain message → route_message detects + # planning state and routes to orchestrator (not chat fallback). Plain + # planning messages return is_cmd=False (still a "Claude response"-style) + # but they MUST hit the orchestrator subprocess, not the main chat path. + prior_calls = calls["calls"] + response, _is_cmd = router.route_message( + channel, "user-1", + "Vreau filtru pe pagina principală cu RPG/FPS/MMO checkboxes", + adapter_name=adapter, + ) + assert calls["calls"] == prior_calls + 1, "respond should spawn 1 subprocess" + assert response # non-empty response from agent + + # Step 6+7: walk through the 3 remaining phases via advance(). + # Each advance kicks off a fresh subprocess. + prev_calls = calls["calls"] + for expected_phase in ("/plan-ceo-review", "/plan-eng-review", "/plan-design-review"): + session, text, completed = planning_orchestrator.PlanningOrchestrator.advance( + adapter, channel, + ) + assert completed is False, f"phase {expected_phase} marked complete prematurely" + state = planning_session.get_planning_state(adapter, channel) + assert state["phase"] == expected_phase + assert calls["calls"] == prev_calls + 1, "advance should spawn 1 fresh subprocess" + prev_calls = calls["calls"] + + # Step 8: one more advance — pipeline complete; orchestrator writes final-plan.md stub + session, summary, completed = planning_orchestrator.PlanningOrchestrator.advance( + adapter, channel, + ) + assert completed is True + final_plan = tmp_e2e["workspace"] / slug / "scripts" / "ralph" / "final-plan.md" + assert final_plan.exists(), "final-plan.md stub trebuie scris la pipeline complet" + body = final_plan.read_text(encoding="utf-8") + assert slug in body # stub mentions project + # All 4 phases recorded as completed + state = planning_session.get_planning_state(adapter, channel) + assert set(state["phases_completed"]) == { + "/office-hours", + "/plan-ceo-review", + "/plan-eng-review", + "/plan-design-review", + } + assert state["final_plan_path"] == str(final_plan) + + # Step 9+10: user clicks "Dau drumul tonight" → planning_approve. + approval_msg = router._approve_from_planning(channel, adapter) + assert "aprobat" in approval_msg.lower() or "tonight" in approval_msg.lower() + + # Step 11: approved-tasks.json has all the fields night-execute needs. + entry = _approved_for(slug, tmp_e2e["approved"]) + assert entry["status"] == "approved" + assert entry["approved_at"] is not None + assert entry["final_plan_path"] == str(final_plan) + assert entry["description"] == description + # planning_session_id is cleared once approved (no longer needed) + assert entry.get("planning_session_id") in (None, "") + + +def test_full_walkthrough_no_ui_scope_skips_design_phase(tmp_e2e): + """Description without UI keywords should plan only 3 phases.""" + slug = "game-library" + description = "Refactor utility helpers — split string parsing into a separate module" + channel = "discord-channel-2" + adapter = "discord" + + fake, calls = _fake_run_claude_factory() + + with patch.object(planning_session, "_run_claude", fake): + router.start_planning_session(slug, description, channel, adapter) + state = planning_session.get_planning_state(adapter, channel) + + assert state["phases_planned"] == [ + "/office-hours", + "/plan-ceo-review", + "/plan-eng-review", + ] + assert "/plan-design-review" not in state["phases_planned"] + + +def test_walkthrough_cancel_mid_planning_reverts_to_pending(tmp_e2e): + """User abandons planning via /cancel → status reverts to pending, state cleared.""" + slug = "game-library" + description = "Adaug pagina de profile cu avatar editing" + channel = "discord-channel-3" + adapter = "discord" + + fake, _calls = _fake_run_claude_factory() + + with patch.object(planning_session, "_run_claude", fake): + router.start_planning_session(slug, description, channel, adapter) + + # Verify planning is active + state = planning_session.get_planning_state(adapter, channel) + assert state is not None + entry = _approved_for(slug, tmp_e2e["approved"]) + assert entry["status"] == "planning" + + # User types /cancel (router routes to cancel handler) + response, is_cmd = router.route_message( + channel, "user-1", "/cancel", adapter_name=adapter, + ) + assert is_cmd is True + + # State cleared + assert planning_session.get_planning_state(adapter, channel) is None + # Status reverted to pending + entry = _approved_for(slug, tmp_e2e["approved"]) + assert entry["status"] == "pending" + assert entry.get("planning_session_id") in (None, "") + + +def test_walkthrough_no_planning_state_falls_through_to_normal_chat(tmp_e2e): + """Plain message without active planning should NOT touch orchestrator.""" + fake, calls = _fake_run_claude_factory() + + with patch.object(planning_session, "_run_claude", fake), \ + patch("src.router.send_message") as mock_send: + mock_send.return_value = "(claude main session response)" + # No prior start_planning_session — plain message goes to normal Claude + response, is_cmd = router.route_message( + "channel-no-plan", "user-1", "salut, ce mai faci?", + adapter_name="discord", + ) + assert is_cmd is False # normal chat, not a command + # Orchestrator subprocess NOT invoked + assert calls["calls"] == 0 + # Normal send_message WAS invoked + mock_send.assert_called_once()