feat(dashboard): unified workspace hub — cookie auth, 9-state projects, planning chat

Merges workspace.html + ralph.html into a single unified project hub with: - Cookie-based auth (DASHBOARD_TOKEN, HttpOnly, SameSite=Strict) - 9-state project badge system (running-ralph/manual, planning, approved, pending, blocked, failed, complete, idle) with BUTTONS_FOR_STATE matrix - SSE realtime + polling fallback, version-based optimistic concurrency (If-Match) - Planning chat modal (phase stepper, markdown bubbles, 50s+ wait state, auto-resume) - Propose modal (Variant B: inline Plan-with-Echo checkbox) - 5-type toast taxonomy (success/info/warning/busy/error, 3px colored left-bar) - Inter font self-hosted + shared tokens.css design system + DESIGN.md - src/jsonlock.py (flock helper, sidecar .lock for stable inode) - src/approved_tasks_cli.py (shell-safe wrapper for cron/ralph.sh) - 55 new tests (T#1–T#30) + real jsonlock bug fix caught by T#16/T#28 - No emoji anywhere (enforced by test_dashboard_no_emoji.py) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 07:26:19 +00:00
parent e771479d67
commit 5e930ade02
26 changed files with 5700 additions and 1569 deletions
--- a/src/approved_tasks_cli.py
+++ b/src/approved_tasks_cli.py
@@ -0,0 +1,280 @@
+"""CLI wrapper for atomic mutations of `approved-tasks.json`.
+
+Shell scripts (ralph.sh) and cron-job prompts cannot import
+`src.jsonlock` directly. This module is the bridge: every subcommand
+serialises its mutation through ``write_locked`` so external writers
+honour the same flock invariant as the in-process code in
+``src/router.py`` / ``src/planning_session.py``.
+
+Run via:
+
+    python3 -m src.approved_tasks_cli <subcommand> [args]
+
+Subcommands:
+    set-status     --slug SLUG --status STATUS
+    set-field      --slug SLUG --key KEY --value VALUE [--int|--null|--now|--json]
+    add-project    --slug SLUG --description DESC [--status STATUS]
+    mark-running   --slug SLUG --pid PID
+    mark-failed    --slug SLUG [--error MSG]
+    show           [--slug SLUG]              # read-only inspection
+
+All mutators bump ``last_updated`` to the current UTC ISO timestamp.
+
+Exit codes:
+    0  success
+    1  bad usage / invalid argument
+    2  slug not found
+    3  lock timeout / IO error
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+# Make `from src.jsonlock import ...` work whether invoked as
+# `python -m src.approved_tasks_cli` (sys.path already correct) or as
+# `python3 src/approved_tasks_cli.py` (need to prepend project root).
+_PROJECT_ROOT = Path(__file__).resolve().parent.parent
+if str(_PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(_PROJECT_ROOT))
+
+from src.jsonlock import read_locked, write_locked, LockTimeoutError  # noqa: E402
+
+APPROVED_TASKS_FILE = _PROJECT_ROOT / "approved-tasks.json"
+
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _bump_timestamp(data: dict) -> None:
+    data["last_updated"] = _now_iso()
+
+
+def _find_project(data: dict, slug: str) -> dict | None:
+    for p in data.get("projects", []):
+        if p.get("name", "").lower() == slug.lower():
+            return p
+    return None
+
+
+def _coerce_value(value: str, *, as_int: bool, as_null: bool, as_now: bool, as_json: bool):
+    if as_null:
+        return None
+    if as_now:
+        return _now_iso()
+    if as_int:
+        try:
+            return int(value)
+        except ValueError as exc:
+            raise SystemExit(f"value '{value}' is not a valid int: {exc}") from exc
+    if as_json:
+        try:
+            return json.loads(value)
+        except json.JSONDecodeError as exc:
+            raise SystemExit(f"value is not valid JSON: {exc}") from exc
+    return value
+
+
+# ---- subcommand implementations -------------------------------------------
+
+
+def cmd_set_status(args) -> int:
+    def mutator(data: dict) -> dict:
+        proj = _find_project(data, args.slug)
+        if proj is None:
+            raise KeyError(args.slug)
+        proj["status"] = args.status
+        _bump_timestamp(data)
+        return data
+
+    try:
+        write_locked(str(APPROVED_TASKS_FILE), mutator)
+    except KeyError:
+        print(f"slug '{args.slug}' not found", file=sys.stderr)
+        return 2
+    print(f"set status of '{args.slug}' = '{args.status}'")
+    return 0
+
+
+def cmd_set_field(args) -> int:
+    value = _coerce_value(
+        args.value or "",
+        as_int=args.int,
+        as_null=args.null,
+        as_now=args.now,
+        as_json=args.json_value,
+    )
+
+    def mutator(data: dict) -> dict:
+        proj = _find_project(data, args.slug)
+        if proj is None:
+            raise KeyError(args.slug)
+        proj[args.key] = value
+        _bump_timestamp(data)
+        return data
+
+    try:
+        write_locked(str(APPROVED_TASKS_FILE), mutator)
+    except KeyError:
+        print(f"slug '{args.slug}' not found", file=sys.stderr)
+        return 2
+    print(f"set {args.slug}.{args.key} = {value!r}")
+    return 0
+
+
+def cmd_add_project(args) -> int:
+    def mutator(data: dict) -> dict:
+        data.setdefault("projects", [])
+        if _find_project(data, args.slug) is not None:
+            raise FileExistsError(args.slug)
+        entry = {
+            "name": args.slug,
+            "description": args.description,
+            "status": args.status,
+            "planning_session_id": None,
+            "final_plan_path": None,
+            "proposed_at": _now_iso(),
+            "approved_at": None,
+            "started_at": None,
+            "pid": None,
+        }
+        data["projects"].append(entry)
+        _bump_timestamp(data)
+        return data
+
+    try:
+        write_locked(str(APPROVED_TASKS_FILE), mutator)
+    except FileExistsError:
+        print(f"slug '{args.slug}' already exists — refusing to overwrite", file=sys.stderr)
+        return 1
+    print(f"added project '{args.slug}' (status={args.status})")
+    return 0
+
+
+def cmd_mark_running(args) -> int:
+    """Convenience: status=running, started_at=now, pid=<PID> in ONE locked write."""
+    def mutator(data: dict) -> dict:
+        proj = _find_project(data, args.slug)
+        if proj is None:
+            raise KeyError(args.slug)
+        proj["status"] = "running"
+        proj["started_at"] = _now_iso()
+        proj["pid"] = args.pid
+        _bump_timestamp(data)
+        return data
+
+    try:
+        write_locked(str(APPROVED_TASKS_FILE), mutator)
+    except KeyError:
+        print(f"slug '{args.slug}' not found", file=sys.stderr)
+        return 2
+    print(f"marked '{args.slug}' running (pid={args.pid})")
+    return 0
+
+
+def cmd_mark_failed(args) -> int:
+    def mutator(data: dict) -> dict:
+        proj = _find_project(data, args.slug)
+        if proj is None:
+            raise KeyError(args.slug)
+        proj["status"] = "failed"
+        if args.error:
+            proj["error"] = args.error
+        _bump_timestamp(data)
+        return data
+
+    try:
+        write_locked(str(APPROVED_TASKS_FILE), mutator)
+    except KeyError:
+        print(f"slug '{args.slug}' not found", file=sys.stderr)
+        return 2
+    print(f"marked '{args.slug}' failed")
+    return 0
+
+
+def cmd_show(args) -> int:
+    try:
+        data = read_locked(str(APPROVED_TASKS_FILE))
+    except FileNotFoundError:
+        print(f"file not found: {APPROVED_TASKS_FILE}", file=sys.stderr)
+        return 3
+    if args.slug:
+        proj = _find_project(data, args.slug)
+        if proj is None:
+            print(f"slug '{args.slug}' not found", file=sys.stderr)
+            return 2
+        print(json.dumps(proj, indent=2, ensure_ascii=False))
+    else:
+        print(json.dumps(data, indent=2, ensure_ascii=False))
+    return 0
+
+
+# ---- argparse setup -------------------------------------------------------
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        prog="approved-tasks",
+        description="Atomic CLI for approved-tasks.json (uses src.jsonlock.write_locked).",
+    )
+    sub = p.add_subparsers(dest="command", required=True)
+
+    sp = sub.add_parser("set-status", help="Set the status field of a project.")
+    sp.add_argument("--slug", required=True)
+    sp.add_argument("--status", required=True)
+    sp.set_defaults(func=cmd_set_status)
+
+    sp = sub.add_parser("set-field", help="Set an arbitrary field on a project.")
+    sp.add_argument("--slug", required=True)
+    sp.add_argument("--key", required=True)
+    sp.add_argument("--value", default="", help="String value (or use --null/--now).")
+    g = sp.add_mutually_exclusive_group()
+    g.add_argument("--int", action="store_true", help="Coerce --value to int.")
+    g.add_argument("--null", action="store_true", help="Set value to JSON null.")
+    g.add_argument("--now", action="store_true", help="Set value to current UTC ISO timestamp.")
+    g.add_argument("--json-value", action="store_true", dest="json_value",
+                   help="Parse --value as JSON.")
+    sp.set_defaults(func=cmd_set_field)
+
+    sp = sub.add_parser("add-project", help="Append a new project entry.")
+    sp.add_argument("--slug", required=True)
+    sp.add_argument("--description", required=True)
+    sp.add_argument("--status", default="pending")
+    sp.set_defaults(func=cmd_add_project)
+
+    sp = sub.add_parser("mark-running", help="Atomic: status=running, started_at=now, pid=<PID>.")
+    sp.add_argument("--slug", required=True)
+    sp.add_argument("--pid", type=int, required=True)
+    sp.set_defaults(func=cmd_mark_running)
+
+    sp = sub.add_parser("mark-failed", help="Set status=failed (and optionally an error message).")
+    sp.add_argument("--slug", required=True)
+    sp.add_argument("--error", default=None)
+    sp.set_defaults(func=cmd_mark_failed)
+
+    sp = sub.add_parser("show", help="Print approved-tasks.json (or one project) to stdout.")
+    sp.add_argument("--slug", default=None)
+    sp.set_defaults(func=cmd_show)
+
+    return p
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+    try:
+        return args.func(args)
+    except LockTimeoutError as exc:
+        print(f"lock timeout: {exc}", file=sys.stderr)
+        return 3
+    except OSError as exc:
+        print(f"io error: {exc}", file=sys.stderr)
+        return 3
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/src/jsonlock.py
+++ b/src/jsonlock.py
@@ -0,0 +1,147 @@
+"""Shared flock-based JSON locking helper.
+
+Lock ordering invariant: always acquire locks in alphabetical order by filename
+to avoid deadlock when a caller holds multiple locks simultaneously.
+
+Implementation note (2026-04 — Lane C2 fix):
+We lock on a sidecar `<path>.lock` file rather than the data file itself.
+That's because `write_locked` uses `os.replace(tmp, target)` for atomic
+publish — but `replace` swaps the inode behind `target`, which means a flock
+held on the *old* fd no longer guards the new file. Concurrent writers on a
+sidecar lockfile (whose inode is stable) get correct serialisation across
+threads and processes.
+"""
+import errno
+import fcntl
+import json
+import logging
+import os
+import threading
+import time
+from typing import Callable
+
+_TIMEOUT_SEC = 5.0
+_POLL_INTERVAL = 0.05
+_log = logging.getLogger(__name__)
+
+
+class LockTimeoutError(Exception):
+    pass
+
+
+_local = threading.local()
+
+
+def _held_locks() -> dict:
+    """Per-thread map: abspath → (lockfd, refcount). Used for re-entrancy."""
+    if not hasattr(_local, 'locks'):
+        _local.locks = {}
+    return _local.locks
+
+
+def _try_lock(fd: int, lock_type: int, timeout: float) -> bool:
+    deadline = time.monotonic() + timeout
+    while True:
+        try:
+            fcntl.flock(fd, lock_type | fcntl.LOCK_NB)
+            return True
+        except BlockingIOError:
+            if time.monotonic() >= deadline:
+                return False
+            time.sleep(_POLL_INTERVAL)
+
+
+def _acquire(fd: int, lock_type: int) -> None:
+    if _try_lock(fd, lock_type, _TIMEOUT_SEC):
+        return
+    if _try_lock(fd, lock_type, _TIMEOUT_SEC):
+        return
+    raise LockTimeoutError(
+        f"could not acquire flock within {2 * _TIMEOUT_SEC}s (after retry)"
+    )
+
+
+def _open_lockfile(abspath: str) -> int:
+    """Open (creating if needed) the sidecar `<abspath>.lock` file."""
+    lock_path = abspath + ".lock"
+    # Ensure the parent dir exists — write_locked auto-creates the data file
+    # too, so we should be tolerant of the parent dir not having ever been
+    # touched.
+    parent = os.path.dirname(lock_path)
+    if parent:
+        try:
+            os.makedirs(parent, exist_ok=True)
+        except OSError as exc:
+            if exc.errno != errno.EEXIST:
+                raise
+    return os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o644)
+
+
+def read_locked(path: str) -> dict:
+    abspath = os.path.abspath(path)
+    held = _held_locks()
+    if abspath in held:
+        _log.debug("re-entrant read on %s; skipping flock", abspath)
+        with open(abspath, 'r', encoding='utf-8') as f:
+            return json.load(f)
+
+    lock_fd = _open_lockfile(abspath)
+    try:
+        _acquire(lock_fd, fcntl.LOCK_SH)
+        held[abspath] = (lock_fd, 1)
+        try:
+            with open(abspath, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        finally:
+            held.pop(abspath, None)
+            try:
+                fcntl.flock(lock_fd, fcntl.LOCK_UN)
+            except OSError:
+                pass
+    finally:
+        os.close(lock_fd)
+
+
+def write_locked(path: str, mutator: Callable[[dict], dict]) -> dict:
+    abspath = os.path.abspath(path)
+    held = _held_locks()
+    reentrant = abspath in held
+
+    lock_fd = -1 if reentrant else _open_lockfile(abspath)
+    try:
+        if not reentrant:
+            _acquire(lock_fd, fcntl.LOCK_EX)
+            held[abspath] = (lock_fd, 1)
+        else:
+            _log.debug("re-entrant write on %s; skipping flock", abspath)
+
+        try:
+            # Read current data (file may not exist yet — treat as {}).
+            try:
+                with open(abspath, 'r', encoding='utf-8') as f:
+                    text = f.read()
+                data = json.loads(text) if text.strip() else {}
+            except FileNotFoundError:
+                data = {}
+
+            new_data = mutator(data)
+
+            # Atomic-rename invariant: tmp file MUST be on the same filesystem
+            # as the target (sibling path guarantees this).
+            tmp_path = abspath + ".tmp"
+            with open(tmp_path, 'w', encoding='utf-8') as tmp:
+                json.dump(new_data, tmp, indent=2)
+                tmp.flush()
+                os.fsync(tmp.fileno())
+            os.replace(tmp_path, abspath)
+            return new_data
+        finally:
+            if not reentrant:
+                held.pop(abspath, None)
+                try:
+                    fcntl.flock(lock_fd, fcntl.LOCK_UN)
+                except OSError:
+                    pass
+    finally:
+        if not reentrant and lock_fd >= 0:
+            os.close(lock_fd)
--- a/src/planning_session.py
+++ b/src/planning_session.py
@@ -37,7 +37,6 @@ import logging
 import os
 import shutil
 import subprocess
-import tempfile
 import threading
 import time
 import uuid
@@ -52,6 +51,7 @@ from src.claude_session import (
    _run_claude,
    _safe_env,
 )
+from src.jsonlock import read_locked, write_locked

 logger = logging.getLogger(__name__)
 _invoke_log = logging.getLogger("echo-core.invoke")
@@ -106,33 +106,17 @@ def _channel_key(adapter: str, channel_id: str) -> str:


 def _load_planning_state() -> dict:
-    """Load planning sessions from disk. Returns {} if missing or empty."""
+    """Load planning sessions from disk under a shared flock. Returns {} if missing."""
    try:
-        text = PLANNING_STATE_FILE.read_text(encoding="utf-8")
-        if not text.strip():
-            return {}
-        return json.loads(text)
+        return read_locked(str(PLANNING_STATE_FILE))
    except (FileNotFoundError, json.JSONDecodeError):
        return {}


 def _save_planning_state(data: dict) -> None:
-    """Atomically write planning sessions via tempfile + os.replace."""
+    """Persist planning sessions under an exclusive flock + atomic replace."""
    SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
-    fd, tmp_path = tempfile.mkstemp(
-        dir=SESSIONS_DIR, prefix=".planning_", suffix=".json"
-    )
-    try:
-        with os.fdopen(fd, "w", encoding="utf-8") as f:
-            json.dump(data, f, indent=2, ensure_ascii=False)
-            f.write("\n")
-        os.replace(tmp_path, PLANNING_STATE_FILE)
-    except BaseException:
-        try:
-            os.unlink(tmp_path)
-        except OSError:
-            pass
-        raise
+    write_locked(str(PLANNING_STATE_FILE), lambda _existing: data)


 # ---------------------------------------------------------------------------
--- a/src/router.py
+++ b/src/router.py
@@ -18,6 +18,7 @@ from src.claude_session import (
    set_session_model,
    VALID_MODELS,
 )
+from src.jsonlock import read_locked, write_locked
 from src.planning_orchestrator import PlanningOrchestrator
 from src.planning_session import (
    clear_planning_state,
@@ -210,15 +211,20 @@ def _model_command(channel_id: str, text: str) -> str:


 def _load_approved_tasks() -> dict:
-    """Load approved-tasks.json, return empty structure if missing."""
-    if APPROVED_TASKS_FILE.exists():
-        return json.loads(APPROVED_TASKS_FILE.read_text())
-    return {"projects": [], "last_updated": None}
+    """Load approved-tasks.json under a shared flock; empty structure if missing."""
+    try:
+        data = read_locked(str(APPROVED_TASKS_FILE))
+    except FileNotFoundError:
+        return {"projects": [], "last_updated": None}
+    if not data:
+        return {"projects": [], "last_updated": None}
+    return data


 def _save_approved_tasks(data: dict) -> None:
+    """Persist approved-tasks.json under an exclusive flock + atomic replace."""
    data["last_updated"] = datetime.now(timezone.utc).isoformat()
-    APPROVED_TASKS_FILE.write_text(json.dumps(data, indent=2, ensure_ascii=False))
+    write_locked(str(APPROVED_TASKS_FILE), lambda _existing: data)


 RALPH_CMDS = {