feat(ralph): smart gates + DAG + dashboard live (W3)

Restructurare Ralph QC loop pe smart gate dispatcher tag-driven (în loc de
5 faze fixe), DAG dependsOn cu propagare blocked, retry guard 3-strike, rate
limit detection, plus dashboard live cu polling 5s.

Changes:
- tools/ralph_prd_generator.py: parametru optional final_plan_path; când e
  furnizat, invocă Claude Opus pe final-plan.md pentru extragere user stories
  cu schema extinsă (tags, dependsOn, acceptanceCriteria 3-5). Backward compat
  păstrat — fără final_plan_path, fallback la heuristic-ul vechi.
- tools/ralph/prd-template.json: schema W3 (tags[], dependsOn[], retries,
  failed, blocked, failureReason, requiresDesignReview).
- tools/ralph/prompt.md: 4 faze (impl, base quality, smart gates, commit) +
  dispatcher pe story.tags. Tags vide → run-all-gates fallback (safe default).
- tools/ralph_dag.py (nou): tag validation heuristic anti-silent-regression
  (force ui dacă diff atinge .vue/.tsx/.html/.css/.scss; force db pentru
  migrations sau .sql; force vercel dacă există vercel.json) + topological
  sort cu blocked propagation + atomic prd.json updates.
- tools/ralph/ralph.sh: --max-turns 30, DAG-aware story selection, retry
  counter cu auto-fail la 3, rate limit detection (sleep 30min + 1 retry),
  CLI subcommands prin tools/ralph_dag.py helper.
- dashboard/handlers/ralph.py (nou): /api/ralph/status + /<slug>/log + /prd
  + /stop. Defensive vs corrupt prd.json. Sandbox-ed PID kill.
- dashboard/ralph.html (nou): live cards 3/2/1 col responsive, polling 5s,
  drawer pentru log/PRD viewer, status colors (--status-running/blocked/
  failed/complete declarate inline), Lucide icons cu aria-labels.
- dashboard/api.py: mount /api/ralph/* (GET status/log/prd, POST stop).
- tests/: 72 teste noi (smart gates, DAG, retry, dashboard endpoint).

Note arhitecturale:
- Polling 5s ales peste SSE/WebSocket (suficient pentru iter Ralph 8-15min)
- Tag validation rulează POST-iter pe diff git pentru anti-silent-regression
- Rate limit retry: 1 dată per rulare, apoi mark failed=rate_limited

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-26 18:36:35 +00:00
parent e06a79d98c
commit 655ed3ae09
11 changed files with 2282 additions and 189 deletions

305
dashboard/handlers/ralph.py Normal file
View File

@@ -0,0 +1,305 @@
"""Ralph live dashboard endpoints (W3).
Endpoints:
GET /api/ralph/status — toate proiectele Ralph (cards data)
GET /api/ralph/<slug>/log — tail progress.txt (default 100 lines)
GET /api/ralph/<slug>/prd — full prd.json content
POST /api/ralph/<slug>/stop — SIGTERM la Ralph PID
Polling: 5s din ralph.html (suficient pentru iter 8-15min Ralph).
NU SSE/WebSocket pentru MVP.
Citește status din `~/workspace/<slug>/scripts/ralph/`:
- prd.json → stories (passes/failed/blocked/retries)
- progress.txt → log human-readable
- logs/iteration-*.log → mtime ultimului iter
- .ralph.pid → PID activ (verificat cu os.kill 0)
Reuse path constants din `dashboard/constants.py` (WORKSPACE_DIR).
"""
import json
import os
import signal
from datetime import datetime
from pathlib import Path
from urllib.parse import unquote
import constants
# Path Ralph per proiect (mereu în scripts/ralph/)
def _ralph_dir(project_dir: Path) -> Path:
return project_dir / "scripts" / "ralph"
# Estimare ETA simplistă: avg iter time × stories rămase
DEFAULT_ITER_MINUTES = 12 # midpoint din intervalul 8-15min menționat în plan
class RalphHandlers:
"""Mixin pentru /api/ralph/* — Ralph live status + control."""
# ── helpers ────────────────────────────────────────────────
def _ralph_validate_slug(self, slug: str):
"""Validează slug-ul + returnează project_dir sau None."""
if not slug or "/" in slug or ".." in slug:
return None
slug = unquote(slug)
project_dir = constants.WORKSPACE_DIR / slug
try:
resolved = project_dir.resolve()
workspace_resolved = constants.WORKSPACE_DIR.resolve()
resolved.relative_to(workspace_resolved)
except (ValueError, OSError):
return None
if not project_dir.exists() or not project_dir.is_dir():
return None
return project_dir
def _ralph_pid_alive(self, ralph_dir: Path):
"""Întoarce (running: bool, pid: int|None)."""
pid_file = ralph_dir / ".ralph.pid"
if not pid_file.exists():
return False, None
try:
pid = int(pid_file.read_text().strip())
os.kill(pid, 0) # signal 0 = check existence
return True, pid
except (ValueError, ProcessLookupError, PermissionError, OSError):
return False, None
def _ralph_eta_minutes(self, stories_remaining: int, last_iter_mtime: float | None) -> int | None:
"""Estimează minute rămase — None dacă nu avem date."""
if stories_remaining <= 0:
return 0
return stories_remaining * DEFAULT_ITER_MINUTES
def _ralph_summarize_project(self, project_dir: Path) -> dict | None:
"""Construiește dict de status per proiect — None dacă nu e Ralph project."""
ralph_dir = _ralph_dir(project_dir)
prd_json = ralph_dir / "prd.json"
if not prd_json.exists():
return None
# Defensive parse — corupt prd.json nu trebuie să dărâme dashboard
try:
prd = json.loads(prd_json.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return {
"slug": project_dir.name,
"status": "error",
"error": "prd.json invalid sau ilizibil",
"running": False,
"pid": None,
"stories": [],
"storiesTotal": 0,
"storiesComplete": 0,
"storiesFailed": 0,
"storiesBlocked": 0,
}
stories = prd.get("userStories", []) or []
total = len(stories)
complete = sum(1 for s in stories if s.get("passes"))
failed = sum(1 for s in stories if s.get("failed"))
blocked = sum(1 for s in stories if s.get("blocked"))
remaining = total - complete - failed - blocked
running, pid = self._ralph_pid_alive(ralph_dir)
# Last iteration mtime (pentru "acum X")
logs_dir = ralph_dir / "logs"
last_iter_mtime = None
last_iter_iso = None
if logs_dir.exists():
iter_logs = sorted(logs_dir.glob("iteration-*.log"), key=lambda f: f.stat().st_mtime, reverse=True)
if iter_logs:
last_iter_mtime = iter_logs[0].stat().st_mtime
last_iter_iso = datetime.fromtimestamp(last_iter_mtime).isoformat()
# Status compus pentru UI cards
if running:
top_status = "running"
elif failed > 0 and remaining == 0:
top_status = "failed"
elif complete == total and total > 0:
top_status = "complete"
elif blocked > 0 and running is False:
top_status = "blocked"
else:
top_status = "idle"
# Current story (DAG-eligible cel mai mic priority)
current_story = None
if running:
eligible = [
s for s in stories
if not s.get("passes") and not s.get("failed") and not s.get("blocked")
]
eligible.sort(key=lambda s: (s.get("priority", 999), s.get("id", "")))
if eligible:
current_story = {
"id": eligible[0].get("id"),
"title": eligible[0].get("title"),
"tags": eligible[0].get("tags", []),
"retries": eligible[0].get("retries", 0),
}
return {
"slug": project_dir.name,
"status": top_status,
"running": running,
"pid": pid,
"branchName": prd.get("branchName", ""),
"storiesTotal": total,
"storiesComplete": complete,
"storiesFailed": failed,
"storiesBlocked": blocked,
"storiesRemaining": remaining,
"currentStory": current_story,
"lastIterAt": last_iter_iso,
"etaMinutes": self._ralph_eta_minutes(remaining, last_iter_mtime),
"stories": [
{
"id": s.get("id"),
"title": s.get("title"),
"passes": bool(s.get("passes")),
"failed": bool(s.get("failed")),
"blocked": bool(s.get("blocked")),
"retries": int(s.get("retries", 0)),
"tags": s.get("tags", []),
"failureReason": s.get("failureReason", ""),
}
for s in stories
],
}
# ── /api/ralph/status (GET) ────────────────────────────────
def handle_ralph_status(self):
"""Întoarce status pentru toate proiectele Ralph din workspace."""
try:
projects = []
if not constants.WORKSPACE_DIR.exists():
self.send_json({"projects": [], "fetchedAt": datetime.now().isoformat()})
return
for entry in sorted(constants.WORKSPACE_DIR.iterdir()):
if not entry.is_dir() or entry.name.startswith("."):
continue
summary = self._ralph_summarize_project(entry)
if summary is not None:
projects.append(summary)
self.send_json({
"projects": projects,
"fetchedAt": datetime.now().isoformat(),
"count": len(projects),
})
except Exception as exc:
self.send_json({"error": str(exc)}, 500)
# ── /api/ralph/<slug>/log (GET) ────────────────────────────
def handle_ralph_log(self, slug: str):
"""Tail progress.txt pentru un slug. Default last 100 lines."""
try:
project_dir = self._ralph_validate_slug(slug)
if not project_dir:
self.send_json({"error": "Invalid project slug"}, 400)
return
from urllib.parse import parse_qs, urlparse
qs = parse_qs(urlparse(self.path).query)
try:
lines_n = min(int(qs.get("lines", ["100"])[0]), 1000)
except ValueError:
lines_n = 100
progress = _ralph_dir(project_dir) / "progress.txt"
if not progress.exists():
self.send_json({"slug": slug, "lines": [], "total": 0})
return
try:
content = progress.read_text(encoding="utf-8", errors="replace")
except OSError as exc:
self.send_json({"error": f"read failed: {exc}"}, 500)
return
all_lines = content.splitlines()
tail = all_lines[-lines_n:] if len(all_lines) > lines_n else all_lines
self.send_json({
"slug": slug,
"lines": tail,
"total": len(all_lines),
})
except Exception as exc:
self.send_json({"error": str(exc)}, 500)
# ── /api/ralph/<slug>/prd (GET) ────────────────────────────
def handle_ralph_prd(self, slug: str):
"""Returnează full prd.json pentru un slug."""
try:
project_dir = self._ralph_validate_slug(slug)
if not project_dir:
self.send_json({"error": "Invalid project slug"}, 400)
return
prd_json = _ralph_dir(project_dir) / "prd.json"
if not prd_json.exists():
self.send_json({"error": "prd.json not found"}, 404)
return
try:
data = json.loads(prd_json.read_text(encoding="utf-8"))
except json.JSONDecodeError as exc:
self.send_json({"error": f"prd.json invalid: {exc}"}, 500)
return
self.send_json(data)
except Exception as exc:
self.send_json({"error": str(exc)}, 500)
# ── /api/ralph/<slug>/stop (POST) ──────────────────────────
def handle_ralph_stop(self, slug: str):
"""Trimite SIGTERM la Ralph PID. Verifică că PID-ul e în WORKSPACE_DIR."""
try:
project_dir = self._ralph_validate_slug(slug)
if not project_dir:
self.send_json({"success": False, "error": "Invalid project slug"}, 400)
return
ralph_dir = _ralph_dir(project_dir)
pid_file = ralph_dir / ".ralph.pid"
if not pid_file.exists():
self.send_json({"success": False, "error": "No PID file"}, 404)
return
try:
pid = int(pid_file.read_text().strip())
except (ValueError, OSError) as exc:
self.send_json({"success": False, "error": f"Invalid PID file: {exc}"}, 500)
return
# Sandbox: verifică că procesul e în workspace (nu omoară random PID)
try:
proc_cwd = Path(f"/proc/{pid}/cwd").resolve()
if not str(proc_cwd).startswith(str(constants.WORKSPACE_DIR)):
self.send_json({"success": False, "error": "PID not in workspace"}, 403)
return
except (FileNotFoundError, PermissionError):
# Procesul nu mai există — best-effort cleanup
self.send_json({"success": True, "message": "Process already stopped"})
return
try:
os.killpg(os.getpgid(pid), signal.SIGTERM)
except ProcessLookupError:
self.send_json({"success": True, "message": "Process already stopped"})
return
except PermissionError:
self.send_json({"success": False, "error": "Permission denied"}, 403)
return
self.send_json({"success": True, "message": f"Ralph stopped (PID {pid})"})
except Exception as exc:
self.send_json({"success": False, "error": str(exc)}, 500)