feat(run): extract detection loop helpers + unconditional cmd drain

Refactor _detection_loop by moving _run_tick, _handle_fsm_result, _dispatch_command, and _drain_cmd_queue to module scope, passing dependencies via a RunContext dataclass. This unblocks direct unit testing of the drain path. CRITICAL bug fix: the previous loop issued `continue` when the tick returned res=None (canary paused or similar), which skipped the drain block. Commands piled up in cmd_queue while detection was paused — the hang observed on 2026-04-17 after canary drift-pause. The refactored loop now runs _drain_cmd_queue UNCONDITIONALLY on every iteration, after _handle_fsm_result, so pause-state never starves the command channel. Tests: test_drain_works_when_canary_paused, test_drain_works_when_out_of_window, test_drain_isolates_dispatch_exceptions (exception isolation + audit/warn wiring). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-18 11:52:28 +03:00
parent 153196f762
commit c5024ce600
2 changed files with 314 additions and 119 deletions
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -401,3 +401,139 @@ async def test_lifecycle_idle_armed_primed_autopoll_fire_stop(monkeypatch, tmp_p
    start_idx = scheduler_events.index("start:180")
    stop_idx = scheduler_events.index("stop")
    assert start_idx < stop_idx, "scheduler started after it stopped"
+
+
+# ---------------------------------------------------------------------------
+# Commit 1 regression tests: _drain_cmd_queue MUST run unconditionally,
+# even when canary is paused or when detection is otherwise skipped.
+# Prior bug: `continue` past the drain loop caused commands to pile up.
+# ---------------------------------------------------------------------------
+
+def _make_ctx_for_drain(cmd_queue, dispatched: list):
+    """Build a minimal RunContext where _dispatch_command just records calls."""
+    import atm.main as _main
+
+    class _FakeAudit:
+        def __init__(self): self.events = []
+        def log(self, e): self.events.append(e)
+
+    class _FakeNotifier:
+        def __init__(self): self.alerts = []
+        def send(self, a): self.alerts.append(a)
+
+    class _FakeCanary:
+        def __init__(self, paused=True):
+            self.is_paused = paused
+
+    class _FakeScheduler:
+        is_running = False
+        interval_s = None
+        def start(self, s): pass
+        def stop(self): pass
+
+    state = _main._LoopState(start=0.0)
+    ctx = _main.RunContext(
+        cfg=MagicMock(),
+        capture=lambda: None,
+        canary=_FakeCanary(paused=True),
+        detector=MagicMock(),
+        fsm=MagicMock(),
+        notifier=_FakeNotifier(),
+        audit=_FakeAudit(),
+        detection_log=_FakeAudit(),
+        scheduler=_FakeScheduler(),
+        samples_dir=Path("."),
+        fires_dir=Path("."),
+        cmd_queue=cmd_queue,
+        state=state,
+        levels_extractor_factory=lambda *a, **kw: None,
+    )
+    return ctx
+
+
+@pytest.mark.asyncio
+async def test_drain_works_when_canary_paused(monkeypatch):
+    """Regression: when canary.is_paused, _drain_cmd_queue still dispatches.
+
+    Prior bug: detection loop `continue`'d past the drain block whenever the
+    tick returned res=None (canary paused). Commands accumulated forever.
+    """
+    import atm.main as _main
+    from atm.commands import Command
+
+    q: asyncio.Queue = asyncio.Queue()
+    await q.put(Command(action="status"))
+    await q.put(Command(action="ss"))
+
+    dispatched: list = []
+
+    async def _fake_dispatch(ctx, cmd):
+        dispatched.append(cmd.action)
+
+    monkeypatch.setattr(_main, "_dispatch_command", _fake_dispatch)
+
+    ctx = _make_ctx_for_drain(q, dispatched)
+
+    await _main._drain_cmd_queue(ctx)
+
+    assert dispatched == ["status", "ss"]
+    assert q.empty()
+
+
+@pytest.mark.asyncio
+async def test_drain_works_when_out_of_window(monkeypatch):
+    """Drain must still fire when the tick skipped (e.g. out of operating hours).
+
+    The refactored loop runs _drain_cmd_queue unconditionally after every tick,
+    regardless of `_TickSyncResult` content.
+    """
+    import atm.main as _main
+    from atm.commands import Command
+
+    q: asyncio.Queue = asyncio.Queue()
+    await q.put(Command(action="stop"))
+
+    dispatched: list = []
+
+    async def _fake_dispatch(ctx, cmd):
+        dispatched.append(cmd.action)
+
+    monkeypatch.setattr(_main, "_dispatch_command", _fake_dispatch)
+
+    ctx = _make_ctx_for_drain(q, dispatched)
+    # Simulate out-of-window tick (empty _TickSyncResult, no res)
+    await _main._handle_fsm_result(ctx, _main._TickSyncResult())
+    await _main._drain_cmd_queue(ctx)
+
+    assert dispatched == ["stop"]
+
+
+@pytest.mark.asyncio
+async def test_drain_isolates_dispatch_exceptions(monkeypatch):
+    """If one command raises, remaining commands still drain + warn alert sent."""
+    import atm.main as _main
+    from atm.commands import Command
+
+    q: asyncio.Queue = asyncio.Queue()
+    await q.put(Command(action="status"))
+    await q.put(Command(action="ss"))
+
+    attempts: list = []
+
+    async def _fake_dispatch(ctx, cmd):
+        attempts.append(cmd.action)
+        if cmd.action == "status":
+            raise RuntimeError("boom")
+
+    monkeypatch.setattr(_main, "_dispatch_command", _fake_dispatch)
+
+    ctx = _make_ctx_for_drain(q, attempts)
+    await _main._drain_cmd_queue(ctx)
+
+    assert attempts == ["status", "ss"]
+    # warn alert for the failed command
+    warn_titles = [a.title for a in ctx.notifier.alerts if a.kind == "warn"]
+    assert any("status" in t for t in warn_titles)
+    # command_error audit event
+    errs = [e for e in ctx.audit.events if e.get("event") == "command_error"]
+    assert len(errs) == 1 and errs[0]["action"] == "status"