feat(ralph): smart gates + DAG + dashboard live (W3)

Restructurare Ralph QC loop pe smart gate dispatcher tag-driven (în loc de
5 faze fixe), DAG dependsOn cu propagare blocked, retry guard 3-strike, rate
limit detection, plus dashboard live cu polling 5s.

Changes:
- tools/ralph_prd_generator.py: parametru optional final_plan_path; când e
  furnizat, invocă Claude Opus pe final-plan.md pentru extragere user stories
  cu schema extinsă (tags, dependsOn, acceptanceCriteria 3-5). Backward compat
  păstrat — fără final_plan_path, fallback la heuristic-ul vechi.
- tools/ralph/prd-template.json: schema W3 (tags[], dependsOn[], retries,
  failed, blocked, failureReason, requiresDesignReview).
- tools/ralph/prompt.md: 4 faze (impl, base quality, smart gates, commit) +
  dispatcher pe story.tags. Tags vide → run-all-gates fallback (safe default).
- tools/ralph_dag.py (nou): tag validation heuristic anti-silent-regression
  (force ui dacă diff atinge .vue/.tsx/.html/.css/.scss; force db pentru
  migrations sau .sql; force vercel dacă există vercel.json) + topological
  sort cu blocked propagation + atomic prd.json updates.
- tools/ralph/ralph.sh: --max-turns 30, DAG-aware story selection, retry
  counter cu auto-fail la 3, rate limit detection (sleep 30min + 1 retry),
  CLI subcommands prin tools/ralph_dag.py helper.
- dashboard/handlers/ralph.py (nou): /api/ralph/status + /<slug>/log + /prd
  + /stop. Defensive vs corrupt prd.json. Sandbox-ed PID kill.
- dashboard/ralph.html (nou): live cards 3/2/1 col responsive, polling 5s,
  drawer pentru log/PRD viewer, status colors (--status-running/blocked/
  failed/complete declarate inline), Lucide icons cu aria-labels.
- dashboard/api.py: mount /api/ralph/* (GET status/log/prd, POST stop).
- tests/: 72 teste noi (smart gates, DAG, retry, dashboard endpoint).

Note arhitecturale:
- Polling 5s ales peste SSE/WebSocket (suficient pentru iter Ralph 8-15min)
- Tag validation rulează POST-iter pe diff git pentru anti-silent-regression
- Rate limit retry: 1 dată per rulare, apoi mark failed=rate_limited

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-26 18:36:35 +00:00
parent e06a79d98c
commit 655ed3ae09
11 changed files with 2282 additions and 189 deletions

View File

@@ -1,7 +1,13 @@
#!/bin/bash
# Ralph pentru Claude Code - Loop autonom de agent AI
# Ralph pentru Claude Code - Loop autonom de agent AI (W3: smart gates + DAG + rate limit)
# Adaptat din Ralph original (snarktank/ralph) pentru Claude Code CLI
# Usage: ./ralph.sh [max_iterations] [project_dir]
#
# Env vars (opționale):
# RALPH_MAX_TURNS — --max-turns per iter (default 30)
# RALPH_RATE_LIMIT_SLEEP — sleep după rate limit detection (default 1800 = 30min)
# RALPH_DAG_HELPER — path la tools/ralph_dag.py (auto-detect default)
# RALPH_PYTHON — interpreter Python pentru DAG helper (default python3)
set -e
@@ -15,6 +21,24 @@ SCREENSHOTS_DIR="$SCRIPT_DIR/screenshots"
LAST_BRANCH_FILE="$SCRIPT_DIR/.last-branch"
PROMPT_FILE="$SCRIPT_DIR/prompt.md"
# W3 config
MAX_TURNS=${RALPH_MAX_TURNS:-30}
RATE_LIMIT_SLEEP=${RALPH_RATE_LIMIT_SLEEP:-1800}
RALPH_PYTHON=${RALPH_PYTHON:-python3}
# DAG helper auto-detect: prefer co-located cu echo-core; fallback la $SCRIPT_DIR
if [ -n "$RALPH_DAG_HELPER" ] && [ -f "$RALPH_DAG_HELPER" ]; then
DAG_HELPER="$RALPH_DAG_HELPER"
elif [ -f "/home/moltbot/echo-core/tools/ralph_dag.py" ]; then
DAG_HELPER="/home/moltbot/echo-core/tools/ralph_dag.py"
elif [ -f "/home/moltbot/echo-core-qc/tools/ralph_dag.py" ]; then
DAG_HELPER="/home/moltbot/echo-core-qc/tools/ralph_dag.py"
elif [ -f "$SCRIPT_DIR/ralph_dag.py" ]; then
DAG_HELPER="$SCRIPT_DIR/ralph_dag.py"
else
DAG_HELPER=""
fi
# Verifică că jq este instalat
if ! command -v jq &> /dev/null; then
echo "Eroare: jq nu este instalat. Rulează: apt install jq"
@@ -132,6 +156,51 @@ check_all_complete() {
[ "$incomplete" -eq 0 ]
}
# W3: nimic eligibil = toate sunt fie passes, fie failed, fie blocked
check_no_eligible() {
local n=$(jq '[.userStories[] | select(.passes != true and .failed != true and .blocked != true)] | length' "$PRD_FILE" 2>/dev/null || echo "999")
[ "$n" -eq 0 ]
}
# W3: alege next eligible story via DAG helper. Print story ID sau "" dacă nimic.
dag_next_story() {
if [ -n "$DAG_HELPER" ]; then
"$RALPH_PYTHON" "$DAG_HELPER" next-story "$PRD_FILE" 2>/dev/null || echo ""
else
# Fallback simplu (fără DAG): primul story cu passes!=true && failed!=true && blocked!=true, priority asc
jq -r '[.userStories[] | select(.passes != true and .failed != true and .blocked != true)] | sort_by(.priority) | .[0].id // ""' "$PRD_FILE"
fi
}
dag_incr_retry() {
local sid="$1"
if [ -n "$DAG_HELPER" ]; then
"$RALPH_PYTHON" "$DAG_HELPER" incr-retry "$PRD_FILE" "$sid" 2>/dev/null || echo "0"
else
echo "0"
fi
}
dag_mark_failed() {
local sid="$1" reason="$2"
if [ -n "$DAG_HELPER" ]; then
"$RALPH_PYTHON" "$DAG_HELPER" mark-failed "$PRD_FILE" "$sid" "$reason" 2>/dev/null || true
fi
}
dag_force_tags() {
local sid="$1"
if [ -n "$DAG_HELPER" ]; then
"$RALPH_PYTHON" "$DAG_HELPER" force-tags "$PRD_FILE" "$sid" "$PROJECT_DIR" 2>/dev/null || true
fi
}
# W3: detectează rate limit în output Claude (heuristic — Anthropic nu are exit code dedicat)
is_rate_limited() {
local output="$1"
echo "$output" | grep -qiE "rate limit|rate_limit_exceeded|429|too many requests"
}
# Afișare status inițial
echo ""
echo "======================================================================="
@@ -155,6 +224,9 @@ if check_all_complete; then
exit 0
fi
# Tracker pentru rate limit retry (max 1 retry de iterație-rate-limit per rulare)
RATE_LIMIT_RETRY_USED=0
# Loop principal
for i in $(seq 1 $MAX_ITERATIONS); do
echo ""
@@ -162,17 +234,38 @@ for i in $(seq 1 $MAX_ITERATIONS); do
echo " Ralph Iterația $i din $MAX_ITERATIONS"
echo "==================================================================="
# W3: alege next story via DAG (propagă blocked dacă vreun dep a eșuat)
CURRENT_STORY=$(dag_next_story)
if [ -z "$CURRENT_STORY" ]; then
echo ""
echo "==================================================================="
if check_all_complete; then
echo " TOATE STORY-URILE DIN PRD SUNT COMPLETE!"
exit 0
else
echo " NICIUN STORY ELIGIBIL (toate fie complete, fie failed, fie blocked)"
echo " Stories incomplete:"
jq -r '.userStories[] | select(.passes != true) | " - \(.id): \(.title) [failed=\(.failed // false) blocked=\(.blocked // false) retries=\(.retries // 0)]"' "$PRD_FILE"
exit 0
fi
fi
# Status curent
COMPLETE_NOW=$(jq '[.userStories[] | select(.passes == true)] | length' "$PRD_FILE")
NEXT_STORY=$(jq -r '[.userStories[] | select(.passes != true)] | sort_by(.priority) | .[0] | "\(.id): \(.title)"' "$PRD_FILE")
NEXT_TITLE=$(jq -r --arg id "$CURRENT_STORY" '.userStories[] | select(.id == $id) | "\(.id): \(.title)"' "$PRD_FILE")
STORY_TAGS=$(jq -r --arg id "$CURRENT_STORY" '.userStories[] | select(.id == $id) | (.tags // []) | join(",")' "$PRD_FILE")
STORY_RETRIES=$(jq -r --arg id "$CURRENT_STORY" '.userStories[] | select(.id == $id) | (.retries // 0)' "$PRD_FILE")
echo " Progress: $COMPLETE_NOW / $TOTAL_STORIES stories complete"
echo " Next: $NEXT_STORY"
echo " Next: $NEXT_TITLE [tags: ${STORY_TAGS:-<none>}, retries: $STORY_RETRIES]"
echo ""
# Pregătește prompt-ul cu context
FULL_PROMPT=$(cat <<EOF
# Context pentru această iterație Ralph
## Story țintă (DAG-eligible):
$CURRENT_STORY (tags: ${STORY_TAGS:-<none>})
## PRD (prd.json):
$(cat "$PRD_FILE")
@@ -188,10 +281,34 @@ EOF
LOG_FILE="$SCRIPT_DIR/logs/iteration-$i-$(date +%Y%m%d-%H%M%S).log"
mkdir -p "$SCRIPT_DIR/logs"
# --output-format json avoids streaming mode issues
echo "$FULL_PROMPT" | claude -p --dangerously-skip-permissions --output-format json 2>&1 | tee "$LOG_FILE" || true
# --output-format json + --max-turns pentru control runtime
set +e
echo "$FULL_PROMPT" | claude -p \
--dangerously-skip-permissions \
--output-format json \
--max-turns "$MAX_TURNS" \
2>&1 | tee "$LOG_FILE"
CLAUDE_EXIT=${PIPESTATUS[1]}
set -e
OUTPUT=$(cat "$LOG_FILE")
# W3: rate limit detection (max 1 retry per rulare)
if is_rate_limited "$OUTPUT" || [ "$CLAUDE_EXIT" = "29" ]; then
if [ "$RATE_LIMIT_RETRY_USED" = "0" ]; then
echo ""
echo " ⏸️ Rate limit detectat. Sleep ${RATE_LIMIT_SLEEP}s, apoi retry o dată."
RATE_LIMIT_RETRY_USED=1
echo "## Rate limit la iter $i — sleep $RATE_LIMIT_SLEEP" >> "$PROGRESS_FILE"
sleep "$RATE_LIMIT_SLEEP"
continue # retry aceeași iterație
else
echo " ❌ Rate limit din nou — abort run, mark $CURRENT_STORY rate_limited"
dag_mark_failed "$CURRENT_STORY" "rate_limited"
echo "## Rate limit final la iter $i — abort" >> "$PROGRESS_FILE"
exit 2
fi
fi
# Verifică dacă toate task-urile sunt complete
if echo "$OUTPUT" | grep -q "<promise>COMPLETE</promise>"; then
echo ""
@@ -211,6 +328,23 @@ EOF
exit 0
fi
# W3: tag validation post-iter — chiar dacă Opus a marcat docs, dacă diff atinge .vue/.tsx, force ui
dag_force_tags "$CURRENT_STORY" >/dev/null 2>&1 || true
# W3: dacă story-ul curent ÎNCĂ nu trece (passes==false), incrementăm retries
STILL_INCOMPLETE=$(jq -r --arg id "$CURRENT_STORY" '.userStories[] | select(.id == $id) | (.passes == true)' "$PRD_FILE")
if [ "$STILL_INCOMPLETE" != "true" ]; then
NEW_RETRY=$(dag_incr_retry "$CURRENT_STORY")
echo " Story $CURRENT_STORY încă incomplet. Retries: $NEW_RETRY/3"
if [ "$NEW_RETRY" -ge 3 ] 2>/dev/null; then
echo "$CURRENT_STORY failed: max_retries — sare la următorul"
# mark-failed e deja făcut de incr-retry când >=3, dar idempotent o re-aplicăm
dag_mark_failed "$CURRENT_STORY" "max_retries"
fi
else
echo " ✅ Story $CURRENT_STORY marcat passes=true în iterația asta."
fi
echo " Iterația $i completă. Continuăm..."
sleep 2
done