feat(run): extract detection loop helpers + unconditional cmd drain
Refactor _detection_loop by moving _run_tick, _handle_fsm_result, _dispatch_command, and _drain_cmd_queue to module scope, passing dependencies via a RunContext dataclass. This unblocks direct unit testing of the drain path. CRITICAL bug fix: the previous loop issued `continue` when the tick returned res=None (canary paused or similar), which skipped the drain block. Commands piled up in cmd_queue while detection was paused — the hang observed on 2026-04-17 after canary drift-pause. The refactored loop now runs _drain_cmd_queue UNCONDITIONALLY on every iteration, after _handle_fsm_result, so pause-state never starves the command channel. Tests: test_drain_works_when_canary_paused, test_drain_works_when_out_of_window, test_drain_isolates_dispatch_exceptions (exception isolation + audit/warn wiring). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -401,3 +401,139 @@ async def test_lifecycle_idle_armed_primed_autopoll_fire_stop(monkeypatch, tmp_p
|
||||
start_idx = scheduler_events.index("start:180")
|
||||
stop_idx = scheduler_events.index("stop")
|
||||
assert start_idx < stop_idx, "scheduler started after it stopped"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Commit 1 regression tests: _drain_cmd_queue MUST run unconditionally,
|
||||
# even when canary is paused or when detection is otherwise skipped.
|
||||
# Prior bug: `continue` past the drain loop caused commands to pile up.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_ctx_for_drain(cmd_queue, dispatched: list):
|
||||
"""Build a minimal RunContext where _dispatch_command just records calls."""
|
||||
import atm.main as _main
|
||||
|
||||
class _FakeAudit:
|
||||
def __init__(self): self.events = []
|
||||
def log(self, e): self.events.append(e)
|
||||
|
||||
class _FakeNotifier:
|
||||
def __init__(self): self.alerts = []
|
||||
def send(self, a): self.alerts.append(a)
|
||||
|
||||
class _FakeCanary:
|
||||
def __init__(self, paused=True):
|
||||
self.is_paused = paused
|
||||
|
||||
class _FakeScheduler:
|
||||
is_running = False
|
||||
interval_s = None
|
||||
def start(self, s): pass
|
||||
def stop(self): pass
|
||||
|
||||
state = _main._LoopState(start=0.0)
|
||||
ctx = _main.RunContext(
|
||||
cfg=MagicMock(),
|
||||
capture=lambda: None,
|
||||
canary=_FakeCanary(paused=True),
|
||||
detector=MagicMock(),
|
||||
fsm=MagicMock(),
|
||||
notifier=_FakeNotifier(),
|
||||
audit=_FakeAudit(),
|
||||
detection_log=_FakeAudit(),
|
||||
scheduler=_FakeScheduler(),
|
||||
samples_dir=Path("."),
|
||||
fires_dir=Path("."),
|
||||
cmd_queue=cmd_queue,
|
||||
state=state,
|
||||
levels_extractor_factory=lambda *a, **kw: None,
|
||||
)
|
||||
return ctx
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_drain_works_when_canary_paused(monkeypatch):
|
||||
"""Regression: when canary.is_paused, _drain_cmd_queue still dispatches.
|
||||
|
||||
Prior bug: detection loop `continue`'d past the drain block whenever the
|
||||
tick returned res=None (canary paused). Commands accumulated forever.
|
||||
"""
|
||||
import atm.main as _main
|
||||
from atm.commands import Command
|
||||
|
||||
q: asyncio.Queue = asyncio.Queue()
|
||||
await q.put(Command(action="status"))
|
||||
await q.put(Command(action="ss"))
|
||||
|
||||
dispatched: list = []
|
||||
|
||||
async def _fake_dispatch(ctx, cmd):
|
||||
dispatched.append(cmd.action)
|
||||
|
||||
monkeypatch.setattr(_main, "_dispatch_command", _fake_dispatch)
|
||||
|
||||
ctx = _make_ctx_for_drain(q, dispatched)
|
||||
|
||||
await _main._drain_cmd_queue(ctx)
|
||||
|
||||
assert dispatched == ["status", "ss"]
|
||||
assert q.empty()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_drain_works_when_out_of_window(monkeypatch):
|
||||
"""Drain must still fire when the tick skipped (e.g. out of operating hours).
|
||||
|
||||
The refactored loop runs _drain_cmd_queue unconditionally after every tick,
|
||||
regardless of `_TickSyncResult` content.
|
||||
"""
|
||||
import atm.main as _main
|
||||
from atm.commands import Command
|
||||
|
||||
q: asyncio.Queue = asyncio.Queue()
|
||||
await q.put(Command(action="stop"))
|
||||
|
||||
dispatched: list = []
|
||||
|
||||
async def _fake_dispatch(ctx, cmd):
|
||||
dispatched.append(cmd.action)
|
||||
|
||||
monkeypatch.setattr(_main, "_dispatch_command", _fake_dispatch)
|
||||
|
||||
ctx = _make_ctx_for_drain(q, dispatched)
|
||||
# Simulate out-of-window tick (empty _TickSyncResult, no res)
|
||||
await _main._handle_fsm_result(ctx, _main._TickSyncResult())
|
||||
await _main._drain_cmd_queue(ctx)
|
||||
|
||||
assert dispatched == ["stop"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_drain_isolates_dispatch_exceptions(monkeypatch):
|
||||
"""If one command raises, remaining commands still drain + warn alert sent."""
|
||||
import atm.main as _main
|
||||
from atm.commands import Command
|
||||
|
||||
q: asyncio.Queue = asyncio.Queue()
|
||||
await q.put(Command(action="status"))
|
||||
await q.put(Command(action="ss"))
|
||||
|
||||
attempts: list = []
|
||||
|
||||
async def _fake_dispatch(ctx, cmd):
|
||||
attempts.append(cmd.action)
|
||||
if cmd.action == "status":
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr(_main, "_dispatch_command", _fake_dispatch)
|
||||
|
||||
ctx = _make_ctx_for_drain(q, attempts)
|
||||
await _main._drain_cmd_queue(ctx)
|
||||
|
||||
assert attempts == ["status", "ss"]
|
||||
# warn alert for the failed command
|
||||
warn_titles = [a.title for a in ctx.notifier.alerts if a.kind == "warn"]
|
||||
assert any("status" in t for t in warn_titles)
|
||||
# command_error audit event
|
||||
errs = [e for e in ctx.audit.events if e.get("event") == "command_error"]
|
||||
assert len(errs) == 1 and errs[0]["action"] == "status"
|
||||
|
||||
Reference in New Issue
Block a user