feat(run): screenshot attach, Telegram ok:false fix, post-FIRE catchup guard
Three bundled fixes on the dispatch + FSM + notifier triangle:
1. Telegram silent-success bug: parse JSON body after 200 OK, raise on
ok:false so FanoutNotifier retries + DLQs + stats surface the failure.
Previously Discord succeeded while Telegram silently dropped.
2. Per-kind screenshot attach: new AlertsCfg dataclass with per-kind toggle
(late_start, catchup, arm, prime, trigger). _save_annotated_frame helper
extracted from inline FIRE block, threaded via Snapshot closure into
_handle_tick. Failures audit-logged, never silent.
3. Post-FIRE catchup regression (d7305fb): residual dark_green/dark_red dots
after a FIRE cycle look like startup-catchup from (color, state) alone.
New fsm.fired_in_session(direction) gate suppresses synth-arm after a
cycle already fired in that direction. Opposite direction unaffected.
Also: queue-overflow on_drop audit callback, periodic + shutdown heartbeat
stats per-backend, config back-compat (bool or dict for attach_screenshots).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -156,19 +156,43 @@ def test_stop_drains(tmp_path: Path) -> None:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _MockResponse:
|
||||
def __init__(self, status_code: int, text: str = "") -> None:
|
||||
def __init__(
|
||||
self,
|
||||
status_code: int,
|
||||
text: str = "",
|
||||
json_body: dict | None = None,
|
||||
raise_on_json: bool = False,
|
||||
) -> None:
|
||||
self.status_code = status_code
|
||||
self.text = text
|
||||
self._json_body = json_body if json_body is not None else {"ok": True, "result": {}}
|
||||
self._raise_on_json = raise_on_json
|
||||
|
||||
def json(self):
|
||||
if self._raise_on_json:
|
||||
raise ValueError("no JSON body")
|
||||
return self._json_body
|
||||
|
||||
|
||||
class _MockSession:
|
||||
def __init__(self, status_code: int = 204) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
status_code: int = 204,
|
||||
json_body: dict | None = None,
|
||||
raise_on_json: bool = False,
|
||||
) -> None:
|
||||
self.status_code = status_code
|
||||
self._json_body = json_body
|
||||
self._raise_on_json = raise_on_json
|
||||
self.calls: list[dict] = []
|
||||
|
||||
def post(self, url: str, **kwargs):
|
||||
self.calls.append({"url": url, **kwargs})
|
||||
return _MockResponse(self.status_code)
|
||||
return _MockResponse(
|
||||
self.status_code,
|
||||
json_body=self._json_body,
|
||||
raise_on_json=self._raise_on_json,
|
||||
)
|
||||
|
||||
|
||||
def test_discord_send_ok() -> None:
|
||||
@@ -219,3 +243,118 @@ def test_telegram_5xx_raises() -> None:
|
||||
n = TelegramNotifier("token", "chat123", session=_MockSession(500))
|
||||
with pytest.raises(RuntimeError, match="500"):
|
||||
n.send(_alert("x"))
|
||||
|
||||
|
||||
# Telegram returns 200 OK with {"ok": false, ...} for logical failures (bot
|
||||
# blocked, invalid chat_id, parse_mode errors). Previously silent — now raises
|
||||
# so FanoutNotifier retries + DLQs + stats count the failure.
|
||||
|
||||
def test_telegram_ok_true_passes() -> None:
|
||||
"""200 + {ok:true} → success, no raise."""
|
||||
from atm.notifier.telegram import TelegramNotifier
|
||||
session = _MockSession(200, json_body={"ok": True, "result": {"message_id": 42}})
|
||||
n = TelegramNotifier("token", "chat123", session=session)
|
||||
n.send(_alert("ok body")) # must not raise
|
||||
assert len(session.calls) == 1
|
||||
|
||||
|
||||
def test_telegram_ok_false_raises() -> None:
|
||||
"""200 + {ok:false, ...} → RuntimeError with code + description."""
|
||||
from atm.notifier.telegram import TelegramNotifier
|
||||
session = _MockSession(
|
||||
200,
|
||||
json_body={
|
||||
"ok": False,
|
||||
"error_code": 403,
|
||||
"description": "Forbidden: bot was blocked by the user",
|
||||
},
|
||||
)
|
||||
n = TelegramNotifier("token", "chat123", session=session)
|
||||
with pytest.raises(RuntimeError, match="logical failure.*403.*blocked"):
|
||||
n.send(_alert("x"))
|
||||
|
||||
|
||||
def test_telegram_malformed_json_treated_as_success() -> None:
|
||||
"""200 with non-JSON body → no raise (edge case, shouldn't happen in practice)."""
|
||||
from atm.notifier.telegram import TelegramNotifier
|
||||
session = _MockSession(200, raise_on_json=True)
|
||||
n = TelegramNotifier("token", "chat123", session=session)
|
||||
n.send(_alert("x")) # must not raise
|
||||
|
||||
|
||||
def test_telegram_ok_false_goes_to_dlq(tmp_path: Path) -> None:
|
||||
"""Integration: ok:false → 3 retries → DLQ entry written with description."""
|
||||
from atm.notifier.telegram import TelegramNotifier
|
||||
session = _MockSession(
|
||||
200,
|
||||
json_body={"ok": False, "error_code": 400, "description": "chat not found"},
|
||||
)
|
||||
backend = TelegramNotifier("token", "chat123", session=session)
|
||||
|
||||
dl = tmp_path / "dead.jsonl"
|
||||
fan = FanoutNotifier([backend], dl, max_retries=3, backoff_base=0.01)
|
||||
fan.send(_alert("will-fail"))
|
||||
fan.stop(timeout=5.0)
|
||||
|
||||
# 4 HTTP calls (1 initial + 3 retries)
|
||||
assert len(session.calls) == 4
|
||||
s = fan.stats()
|
||||
assert s["telegram"]["failed"] == 1
|
||||
assert s["telegram"]["retries"] == 3
|
||||
assert s["telegram"]["sent"] == 0
|
||||
|
||||
assert dl.exists()
|
||||
lines = [json.loads(l) for l in dl.read_text().splitlines()]
|
||||
assert len(lines) == 1
|
||||
entry = lines[0]
|
||||
assert entry["backend"] == "telegram"
|
||||
assert entry["alert_title"] == "will-fail"
|
||||
assert "chat not found" in entry["error_str"]
|
||||
assert "400" in entry["error_str"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# on_drop callback — queue overflow audit trail
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_fanout_on_drop_callback_invoked(tmp_path: Path) -> None:
|
||||
"""Queue-overflow drop calls on_drop(backend_name, dropped_alert)."""
|
||||
drops: list[tuple[str, Alert]] = []
|
||||
|
||||
def on_drop(name: str, alert: Alert) -> None:
|
||||
drops.append((name, alert))
|
||||
|
||||
dl = tmp_path / "dead.jsonl"
|
||||
slow = FakeBackend("slow", sleep_s=0.2)
|
||||
fan = FanoutNotifier(
|
||||
[slow], dl, queue_size=2, backoff_base=0.01, on_drop=on_drop,
|
||||
)
|
||||
for i in range(10):
|
||||
fan.send(_alert(f"a{i}"))
|
||||
fan.stop(timeout=10.0)
|
||||
|
||||
assert len(drops) > 0
|
||||
assert all(name == "slow" for name, _ in drops)
|
||||
# Oldest alerts are the ones dropped
|
||||
dropped_titles = {a.title for _, a in drops}
|
||||
assert "a0" in dropped_titles or "a1" in dropped_titles
|
||||
|
||||
|
||||
def test_fanout_on_drop_exception_swallowed(tmp_path: Path) -> None:
|
||||
"""on_drop raising must not break dispatch — audit failure must not silence alerts."""
|
||||
def bad_on_drop(_name: str, _alert: Alert) -> None:
|
||||
raise RuntimeError("audit broken")
|
||||
|
||||
dl = tmp_path / "dead.jsonl"
|
||||
slow = FakeBackend("slow", sleep_s=0.2)
|
||||
fan = FanoutNotifier(
|
||||
[slow], dl, queue_size=2, backoff_base=0.01, on_drop=bad_on_drop,
|
||||
)
|
||||
# Must not raise despite every drop invoking bad_on_drop
|
||||
for i in range(10):
|
||||
fan.send(_alert(f"a{i}"))
|
||||
fan.stop(timeout=10.0)
|
||||
|
||||
s = fan.stats()
|
||||
# Some alerts still went through
|
||||
assert s["slow"]["sent"] > 0 or s["slow"]["dropped"] > 0
|
||||
|
||||
Reference in New Issue
Block a user