feat: complete Faza 1 implementation (105 tests green)

All 12 modules built per reviewed plan: - detector, state_machine (5-state phased FSM), canary, levels Phase B - notifier fanout (Discord + Telegram, bounded queue, retry, dead-letter) - audit (JSONL daily rotation), journal, report (weekly R-multiple PnL) - calibrate + labeler (Tk, lazy-imported), dryrun with acceptance gate - unified CLI: atm calibrate|label|dryrun|run|journal|report README + Phase 2 prop-firm TOS audit checklist included. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 22:17:41 +00:00
parent 9207197a56
commit bf70ca3ac7
22 changed files with 2634 additions and 0 deletions
--- a/tests/test_calibrate.py
+++ b/tests/test_calibrate.py
@@ -0,0 +1,68 @@
+"""Tests for atm.calibrate."""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+
+def _minimal_config_data() -> dict:
+    return {
+        "window_title": "Test Chart",
+        "dot_roi": {"x": 0, "y": 0, "w": 100, "h": 100},
+        "chart_roi": {"x": 0, "y": 0, "w": 800, "h": 600},
+        "colors": {
+            "turquoise":   {"rgb": [0, 200, 200],   "tolerance": 30.0},
+            "yellow":      {"rgb": [255, 255, 0],   "tolerance": 30.0},
+            "dark_green":  {"rgb": [0, 128, 0],     "tolerance": 30.0},
+            "dark_red":    {"rgb": [139, 0, 0],     "tolerance": 30.0},
+            "light_green": {"rgb": [144, 238, 144], "tolerance": 30.0},
+            "light_red":   {"rgb": [255, 182, 193], "tolerance": 30.0},
+            "gray":        {"rgb": [128, 128, 128], "tolerance": 30.0},
+        },
+        "y_axis": {"p1_y": 100, "p1_price": 10000.0, "p2_y": 200, "p2_price": 9000.0},
+        "canary": {
+            "roi": {"x": 0, "y": 0, "w": 50, "h": 50},
+            "baseline_phash": "abc123",
+            "drift_threshold": 8,
+        },
+        "discord": {"webhook_url": "http://example.com/hook"},
+        "telegram": {"bot_token": "123:abc", "chat_id": "456"},
+    }
+
+
+def test_write_config_and_marker(tmp_path: Path) -> None:
+    from atm.calibrate import write_config
+    from atm.config import Config
+
+    config_path = write_config(_minimal_config_data(), tmp_path)
+
+    assert config_path.exists()
+    assert config_path.suffix == ".toml"
+
+    # Must be loadable by Config.load
+    cfg = Config.load(config_path)
+    assert cfg.window_title == "Test Chart"
+    assert cfg.y_axis.p1_price == pytest.approx(10000.0)
+
+    # Marker must point at the filename (basename only)
+    marker = tmp_path / "current.txt"
+    assert marker.exists()
+    assert marker.read_text(encoding="utf-8").strip() == config_path.name
+
+    # Config.load_current should also work
+    cfg2 = Config.load_current(tmp_path)
+    assert cfg2.window_title == cfg.window_title
+
+
+def test_import_safe() -> None:
+    """Importing atm.calibrate must succeed in a headless environment (no tkinter at top-level)."""
+    import importlib  # noqa: F401
+    import importlib.util
+
+    spec = importlib.util.find_spec("atm.calibrate")
+    assert spec is not None, "atm.calibrate module not found"
+    # Actually importing must not raise (tkinter is only used inside run_calibration)
+    mod = importlib.import_module("atm.calibrate")
+    assert hasattr(mod, "write_config")
+    assert hasattr(mod, "run_calibration")
--- a/tests/test_canary.py
+++ b/tests/test_canary.py
@@ -0,0 +1,152 @@
+"""Tests for src/atm/canary.py."""
+from __future__ import annotations
+
+import dataclasses
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from atm.canary import Canary, CanaryResult
+from atm.config import (
+    CanaryRegion,
+    ColorSpec,
+    Config,
+    DiscordCfg,
+    ROI,
+    TelegramCfg,
+    YAxisCalib,
+)
+from atm.vision import crop_roi, phash
+
+# ---------------------------------------------------------------------------
+# Fixtures / helpers
+# ---------------------------------------------------------------------------
+
+CANARY_ROI = ROI(x=0, y=0, w=50, h=50)
+
+
+def _make_base_cfg() -> Config:
+    colors = {
+        "turquoise":   ColorSpec(rgb=(0, 255, 255),   tolerance=30.0),
+        "yellow":      ColorSpec(rgb=(255, 255, 0),   tolerance=30.0),
+        "dark_green":  ColorSpec(rgb=(0, 100, 0),     tolerance=30.0),
+        "dark_red":    ColorSpec(rgb=(100, 0, 0),     tolerance=30.0),
+        "light_green": ColorSpec(rgb=(0, 255, 0),     tolerance=30.0),
+        "light_red":   ColorSpec(rgb=(255, 0, 0),     tolerance=30.0),
+        "gray":        ColorSpec(rgb=(128, 128, 128), tolerance=30.0),
+    }
+    # placeholder baseline_phash; tests replace canary via dataclasses.replace
+    return Config(
+        window_title="test",
+        dot_roi=ROI(x=0, y=0, w=100, h=50),
+        chart_roi=ROI(x=0, y=0, w=600, h=400),
+        colors=colors,
+        y_axis=YAxisCalib(p1_y=0, p1_price=100.0, p2_y=400, p2_price=80.0),
+        canary=CanaryRegion(roi=CANARY_ROI, baseline_phash="0" * 64, drift_threshold=8),
+        discord=DiscordCfg(webhook_url="http://example.com/hook"),
+        telegram=TelegramCfg(bot_token="tok", chat_id="123"),
+    )
+
+
+def _cfg_with_baseline(baseline_frame: np.ndarray) -> Config:
+    """Build a Config whose baseline_phash matches the given frame's canary ROI."""
+    roi_img = crop_roi(baseline_frame, CANARY_ROI)
+    h = phash(roi_img)
+    canary_region = CanaryRegion(roi=CANARY_ROI, baseline_phash=h, drift_threshold=8)
+    return dataclasses.replace(_make_base_cfg(), canary=canary_region)
+
+
+def _checkerboard(h: int, w: int, block: int = 8) -> np.ndarray:
+    """Return a checkerboard BGR image (high-frequency, distinct phash)."""
+    img = np.zeros((h, w, 3), dtype=np.uint8)
+    for y in range(0, h, block):
+        for x in range(0, w, block):
+            if (y // block + x // block) % 2 == 0:
+                img[y : y + block, x : x + block] = 255
+    return img
+
+
+# A purely black 100×100 frame as baseline
+BASELINE_FRAME = np.zeros((100, 100, 3), dtype=np.uint8)
+
+# A frame where the canary ROI is a checkerboard (visually very different)
+DRIFTED_FRAME = BASELINE_FRAME.copy()
+DRIFTED_FRAME[:50, :50] = _checkerboard(50, 50)
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+def test_no_drift() -> None:
+    """Same image as baseline → distance ≤ threshold, not paused."""
+    cfg = _cfg_with_baseline(BASELINE_FRAME)
+    canary = Canary(cfg)
+
+    result = canary.check(BASELINE_FRAME)
+
+    assert result.drifted is False
+    assert result.paused is False
+    assert canary.is_paused is False
+
+
+def test_drift_triggers_pause() -> None:
+    """Drastically different canary ROI → drifted=True, paused=True."""
+    cfg = _cfg_with_baseline(BASELINE_FRAME)
+    canary = Canary(cfg)
+
+    result = canary.check(DRIFTED_FRAME)
+
+    assert result.drifted is True
+    assert result.paused is True
+    assert canary.is_paused is True
+
+
+def test_persists_paused() -> None:
+    """After drift, feeding back a clean frame keeps paused=True."""
+    cfg = _cfg_with_baseline(BASELINE_FRAME)
+    canary = Canary(cfg)
+
+    canary.check(DRIFTED_FRAME)       # trigger pause
+    result = canary.check(BASELINE_FRAME)  # clean frame, but still paused
+
+    assert result.paused is True
+    assert canary.is_paused is True
+
+
+def test_resume_clears() -> None:
+    """resume() clears the paused flag; subsequent clean frame stays unpaused."""
+    cfg = _cfg_with_baseline(BASELINE_FRAME)
+    canary = Canary(cfg)
+
+    canary.check(DRIFTED_FRAME)   # pause
+    canary.resume()
+
+    assert canary.is_paused is False
+
+    result = canary.check(BASELINE_FRAME)
+    assert result.paused is False
+
+
+def test_pause_file_written(tmp_path: Path) -> None:
+    """When pause_flag_path is provided, the file is created on drift."""
+    flag = tmp_path / "paused.flag"
+    cfg = _cfg_with_baseline(BASELINE_FRAME)
+    canary = Canary(cfg, pause_flag_path=flag)
+
+    assert not flag.exists()
+    canary.check(DRIFTED_FRAME)
+    assert flag.exists()
+
+
+def test_resume_deletes_pause_file(tmp_path: Path) -> None:
+    """resume() deletes the pause flag file."""
+    flag = tmp_path / "paused.flag"
+    cfg = _cfg_with_baseline(BASELINE_FRAME)
+    canary = Canary(cfg, pause_flag_path=flag)
+
+    canary.check(DRIFTED_FRAME)
+    assert flag.exists()
+    canary.resume()
+    assert not flag.exists()
--- a/tests/test_detector.py
+++ b/tests/test_detector.py
@@ -0,0 +1,198 @@
+"""Tests for src/atm/detector.py."""
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from atm.config import (
+    CanaryRegion,
+    ColorSpec,
+    Config,
+    DiscordCfg,
+    ROI,
+    TelegramCfg,
+    YAxisCalib,
+)
+from atm.detector import DetectionResult, Detector
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+DOT_ROI = ROI(x=10, y=10, w=280, h=80)
+BG_VAL = 18  # background pixel value (18, 18, 18)
+
+# BGR values (OpenCV convention: B, G, R)
+# turquoise  RGB=(0,255,255)  → BGR=(255,255,0)
+# yellow     RGB=(255,255,0)  → BGR=(0,255,255)
+TURQUOISE_BGR = (255, 255, 0)
+YELLOW_BGR = (0, 255, 255)
+# A purple-ish colour far from every palette entry (RGB=(100,150,50))
+UNKNOWN_BGR = (50, 150, 100)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_frame(*dot_specs: tuple[tuple[int, int, int], int, int]) -> np.ndarray:
+    """Create a (100, 300, 3) uint8 BGR frame filled with background.
+
+    Each spec is (bgr_color, roi_x_start, roi_x_end) and paints a
+    full-height stripe inside DOT_ROI.  roi_x_end=280 reaches the right
+    boundary so pixel_rgb sampling stays within the dot.
+    """
+    frame = np.full((100, 300, 3), BG_VAL, dtype=np.uint8)
+    for bgr, x0, x1 in dot_specs:
+        fx0 = DOT_ROI.x + x0
+        fx1 = DOT_ROI.x + x1
+        fy0 = DOT_ROI.y
+        fy1 = DOT_ROI.y + DOT_ROI.h
+        frame[fy0:fy1, fx0:fx1] = bgr
+    return frame
+
+
+def _make_cfg(debounce_depth: int = 1) -> Config:
+    colors = {
+        "turquoise":   ColorSpec(rgb=(0, 255, 255),   tolerance=30.0),
+        "yellow":      ColorSpec(rgb=(255, 255, 0),   tolerance=30.0),
+        "dark_green":  ColorSpec(rgb=(0, 100, 0),     tolerance=30.0),
+        "dark_red":    ColorSpec(rgb=(100, 0, 0),     tolerance=30.0),
+        "light_green": ColorSpec(rgb=(0, 255, 0),     tolerance=30.0),
+        "light_red":   ColorSpec(rgb=(255, 0, 0),     tolerance=30.0),
+        "gray":        ColorSpec(rgb=(128, 128, 128), tolerance=30.0),
+    }
+    return Config(
+        window_title="test",
+        dot_roi=DOT_ROI,
+        chart_roi=ROI(x=0, y=0, w=600, h=400),
+        colors=colors,
+        y_axis=YAxisCalib(p1_y=0, p1_price=100.0, p2_y=400, p2_price=80.0),
+        canary=CanaryRegion(
+            roi=ROI(x=0, y=0, w=50, h=50),
+            baseline_phash="0" * 64,
+            drift_threshold=8,
+        ),
+        discord=DiscordCfg(webhook_url="http://example.com/hook"),
+        telegram=TelegramCfg(bot_token="tok", chat_id="123"),
+        debounce_depth=debounce_depth,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+def test_empty_roi_no_dot() -> None:
+    """All-background frame → dot not found."""
+    frame = np.full((100, 300, 3), BG_VAL, dtype=np.uint8)
+    cfg = _make_cfg()
+    det = Detector(cfg, capture=lambda: frame)
+
+    r = det.step(0.0)
+
+    assert r.window_found is True
+    assert r.dot_found is False
+    assert r.rgb is None
+    assert r.match is None
+    assert r.accepted is False
+
+
+def test_rightmost_cluster() -> None:
+    """Two dots at different x positions → detector returns rightmost colour."""
+    # turquoise on the left, yellow extending to the right ROI edge
+    frame = _make_frame(
+        (TURQUOISE_BGR, 50, 100),   # roi_x [50, 100)
+        (YELLOW_BGR, 200, 280),     # roi_x [200, 280) → right edge
+    )
+    cfg = _make_cfg()
+    det = Detector(cfg, capture=lambda: frame)
+
+    r = det.step(0.0)
+
+    assert r.dot_found is True
+    assert r.match is not None
+    assert r.match.name == "yellow"
+
+
+def test_debounce_depth_1() -> None:
+    """depth=1: single valid frame → accepted=True."""
+    frame = _make_frame((YELLOW_BGR, 200, 280))
+    cfg = _make_cfg(debounce_depth=1)
+    det = Detector(cfg, capture=lambda: frame)
+
+    r = det.step(0.0)
+
+    assert r.accepted is True
+    assert r.color == "yellow"
+
+
+def test_debounce_depth_2() -> None:
+    """depth=2: first frame → accepted=False; second same → accepted=True."""
+    frame = _make_frame((YELLOW_BGR, 200, 280))
+    cfg = _make_cfg(debounce_depth=2)
+    det = Detector(cfg, capture=lambda: frame)
+
+    r1 = det.step(0.0)
+    r2 = det.step(1.0)
+
+    assert r1.accepted is False
+    assert r2.accepted is True
+    assert r2.color == "yellow"
+
+
+def test_debounce_reset_on_change() -> None:
+    """depth=2: A then B → neither accepted."""
+    frame_a = _make_frame((TURQUOISE_BGR, 200, 280))
+    frame_b = _make_frame((YELLOW_BGR, 200, 280))
+    cfg = _make_cfg(debounce_depth=2)
+    frames = iter([frame_a, frame_b])
+    det = Detector(cfg, capture=lambda: next(frames))
+
+    r1 = det.step(0.0)
+    r2 = det.step(1.0)
+
+    assert r1.accepted is False
+    assert r2.accepted is False
+
+
+def test_unknown_not_accepted() -> None:
+    """Colour outside every palette tolerance → UNKNOWN, accepted=False."""
+    frame = _make_frame((UNKNOWN_BGR, 200, 280))
+    cfg = _make_cfg(debounce_depth=1)
+    det = Detector(cfg, capture=lambda: frame)
+
+    r = det.step(0.0)
+
+    assert r.dot_found is True
+    assert r.match is not None
+    assert r.match.name == "UNKNOWN"
+    assert r.accepted is False
+    assert r.color is None
+
+
+def test_window_lost() -> None:
+    """capture() returns None → window_found=False, safe defaults."""
+    cfg = _make_cfg()
+    det = Detector(cfg, capture=lambda: None)
+
+    r = det.step(0.0)
+
+    assert r.window_found is False
+    assert r.dot_found is False
+    assert r.rgb is None
+    assert r.match is None
+    assert r.accepted is False
+    assert r.color is None
+
+
+def test_rolling_window() -> None:
+    """Rolling window never exceeds 20 entries."""
+    frame = _make_frame((YELLOW_BGR, 200, 280))
+    cfg = _make_cfg()
+    det = Detector(cfg, capture=lambda: frame)
+
+    for i in range(25):
+        det.step(float(i))
+
+    assert len(det.rolling) <= 20
+    assert len(det.rolling) == 20
--- a/tests/test_dryrun.py
+++ b/tests/test_dryrun.py
@@ -0,0 +1,224 @@
+"""Tests for atm.dryrun."""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from atm.config import CanaryRegion, ColorSpec, Config, DiscordCfg, ROI, TelegramCfg, YAxisCalib
+from atm.dryrun import ConfusionMatrix, DryrunResult, dryrun
+
+# ---------------------------------------------------------------------------
+# Config fixture
+#
+# The 6x6 dot at x=250..255, y=50..55 in a 100x300 frame is sampled by
+# pixel_rgb(box=3) over a 7x7 patch: 24 dot pixels + 25 background (0,0,0).
+# Sampled RGB = int(true_RGB * 24/49).  Config colors match the sampled values
+# so classify_pixel returns the correct label.
+# ---------------------------------------------------------------------------
+
+_SCALE = 24 / 49  # fraction of dot pixels in the 7x7 sample box
+
+# True BGR paint values → sampled RGB ≈ int(true_RGB * _SCALE)
+_SAMPLED_RGB: dict[str, tuple[int, int, int]] = {
+    "turquoise":   (0, 97, 97),    # true (0, 200, 200)
+    "yellow":      (124, 124, 0),  # true (255, 255, 0)
+    "dark_green":  (0, 48, 0),     # true (0, 100, 0)
+    "dark_red":    (68, 0, 0),     # true (139, 0, 0)
+    "light_green": (70, 116, 70),  # true (144, 238, 144)
+    "light_red":   (124, 89, 94),  # true (255, 182, 193)
+    "gray":        (62, 62, 62),   # true (128, 128, 128)
+}
+
+# True RGB values used when painting frames (before sampling dilution)
+_TRUE_RGB: dict[str, tuple[int, int, int]] = {
+    "turquoise":   (0, 200, 200),
+    "yellow":      (255, 255, 0),
+    "dark_green":  (0, 100, 0),
+    "dark_red":    (139, 0, 0),
+    "light_green": (144, 238, 144),
+    "light_red":   (255, 182, 193),
+    "gray":        (128, 128, 128),
+}
+
+
+def _make_config() -> Config:
+    colors = {
+        name: ColorSpec(rgb=rgb, tolerance=5)
+        for name, rgb in _SAMPLED_RGB.items()
+    }
+    colors["background"] = ColorSpec(rgb=(0, 0, 0), tolerance=5)
+    return Config(
+        window_title="test",
+        dot_roi=ROI(x=0, y=0, w=300, h=100),
+        chart_roi=ROI(x=0, y=0, w=300, h=100),
+        colors=colors,
+        y_axis=YAxisCalib(p1_y=0, p1_price=100.0, p2_y=100, p2_price=0.0),
+        canary=CanaryRegion(
+            roi=ROI(x=0, y=0, w=10, h=10),
+            baseline_phash="0" * 64,
+        ),
+        discord=DiscordCfg(webhook_url="http://localhost/fake"),
+        telegram=TelegramCfg(bot_token="fake_token", chat_id="123"),
+        debounce_depth=1,
+    )
+
+
+def _make_dot_frame(rgb: tuple[int, int, int]) -> np.ndarray:
+    """100x300 BGR frame with a 6x6 dot at x=250,y=50."""
+    frame = np.zeros((100, 300, 3), dtype=np.uint8)
+    frame[50:56, 250:256] = (rgb[2], rgb[1], rgb[0])  # BGR
+    return frame
+
+
+# ---------------------------------------------------------------------------
+# 1. Confusion matrix unit test — pure math, no cv2/detector
+# ---------------------------------------------------------------------------
+
+
+def test_confusion_matrix_math() -> None:
+    cm = ConfusionMatrix()
+    cm.add("A", "A")
+    cm.add("A", "A")
+    cm.add("A", "B")  # FN for A, FP for B
+    cm.add("B", "B")
+
+    per = cm.per_label()
+
+    # A: support=3, TP=2, FP=0 (B never predicted as A), FN=1
+    assert per["A"]["support"] == 3.0
+    assert per["A"]["precision"] == pytest.approx(1.0)   # TP/(TP+FP) = 2/2
+    assert per["A"]["recall"] == pytest.approx(2 / 3)
+    assert per["A"]["f1"] == pytest.approx(2 * 1.0 * (2 / 3) / (1.0 + 2 / 3))
+
+    # B: support=1, TP=1, FP=1 (one A was predicted as B), FN=0
+    assert per["B"]["support"] == 1.0
+    assert per["B"]["precision"] == pytest.approx(0.5)   # TP/(TP+FP) = 1/2
+    assert per["B"]["recall"] == pytest.approx(1.0)
+
+    # Overall accuracy: 3 correct out of 4
+    assert cm.overall_accuracy() == pytest.approx(3 / 4)
+
+
+# ---------------------------------------------------------------------------
+# 2-5: integration tests that require atm.detector
+# ---------------------------------------------------------------------------
+
+
+def test_dryrun_perfect_match(tmp_path: Path) -> None:
+    pytest.importorskip("atm.detector")
+    cfg = _make_config()
+    colors_6 = ["turquoise", "yellow", "dark_green", "dark_red", "light_green", "light_red"]
+
+    import cv2
+
+    labels: dict[str, str] = {}
+    for idx, name in enumerate(colors_6):
+        frame = _make_dot_frame(_TRUE_RGB[name])
+        cv2.imwrite(str(tmp_path / f"{idx}.png"), frame)
+        labels[str(idx)] = name
+
+    labels_path = tmp_path / "labels.json"
+    labels_path.write_text(json.dumps(labels))
+
+    result = dryrun(tmp_path, labels_path, cfg)
+
+    assert result.n_samples == 6
+    assert result.n_labeled == 6
+    assert result.precision_overall == pytest.approx(1.0)
+    assert result.recall_overall == pytest.approx(1.0)
+    assert result.acceptance_pass is True
+
+    # Diagonal-only: each label predicts only itself
+    per = result.confusion.per_label()
+    for name in colors_6:
+        assert result.confusion.counts[name] == {name: 1}, (
+            f"Expected diagonal for {name}, got {result.confusion.counts[name]}"
+        )
+    assert all(m["precision"] == pytest.approx(1.0) for m in per.values())
+    assert all(m["recall"] == pytest.approx(1.0) for m in per.values())
+
+
+def test_dryrun_with_unlabeled_sample(tmp_path: Path) -> None:
+    pytest.importorskip("atm.detector")
+    cfg = _make_config()
+
+    import cv2
+
+    # Write 3 labeled frames + 1 unlabeled
+    labels: dict[str, str] = {}
+    for idx, name in enumerate(["turquoise", "yellow", "dark_green"]):
+        frame = _make_dot_frame(_TRUE_RGB[name])
+        cv2.imwrite(str(tmp_path / f"{idx}.png"), frame)
+        labels[str(idx)] = name
+
+    # Frame "3" exists on disk but has NO label entry
+    unlabeled_frame = _make_dot_frame(_TRUE_RGB["dark_red"])
+    cv2.imwrite(str(tmp_path / "3.png"), unlabeled_frame)
+
+    labels_path = tmp_path / "labels.json"
+    labels_path.write_text(json.dumps(labels))
+
+    result = dryrun(tmp_path, labels_path, cfg)
+
+    assert result.n_samples == 4         # 4 PNGs on disk
+    assert result.n_labeled == 3         # only 3 labeled
+    # "3" not in confusion
+    assert "3" not in result.confusion.counts
+    # Only the 3 labeled colors appear
+    assert set(result.confusion.counts.keys()) == {"turquoise", "yellow", "dark_green"}
+
+
+def test_dryrun_misclassification_fails_gate(tmp_path: Path) -> None:
+    pytest.importorskip("atm.detector")
+    cfg = _make_config()
+
+    import cv2
+
+    colors_6 = ["turquoise", "yellow", "dark_green", "dark_red", "light_green", "light_red"]
+    labels: dict[str, str] = {}
+    for idx, name in enumerate(colors_6):
+        frame = _make_dot_frame(_TRUE_RGB[name])
+        cv2.imwrite(str(tmp_path / f"{idx}.png"), frame)
+        labels[str(idx)] = name
+
+    # Swap label of frame 0 (turquoise dot → labeled as "yellow")
+    labels["0"] = "yellow"
+
+    labels_path = tmp_path / "labels.json"
+    labels_path.write_text(json.dumps(labels))
+
+    result = dryrun(tmp_path, labels_path, cfg)
+
+    assert result.acceptance_pass is False
+    # recall for "yellow" drops: one yellow-labeled frame predicted as turquoise
+    per = result.confusion.per_label()
+    assert per["yellow"]["recall"] < 1.0
+
+
+def test_fire_event_captured(tmp_path: Path) -> None:
+    pytest.importorskip("atm.detector")
+    cfg = _make_config()
+
+    import cv2
+
+    # Sequence that triggers a BUY fire: turquoise → gray → dark_green → light_green
+    sequence = ["turquoise", "gray", "dark_green", "light_green"]
+    labels: dict[str, str] = {}
+    for idx, name in enumerate(sequence):
+        frame = _make_dot_frame(_TRUE_RGB[name])
+        cv2.imwrite(str(tmp_path / f"{idx}.png"), frame)
+        labels[str(idx)] = name
+
+    labels_path = tmp_path / "labels.json"
+    labels_path.write_text(json.dumps(labels))
+
+    result = dryrun(tmp_path, labels_path, cfg)
+
+    assert len(result.fire_events) == 1
+    ev = result.fire_events[0]
+    assert ev["direction"] == "BUY"
+    assert ev["ts"] == pytest.approx(15.0)   # i=3 → ts=3*5.0
+    assert ev["sample"] == "3"
--- a/tests/test_journal.py
+++ b/tests/test_journal.py
@@ -0,0 +1,95 @@
+"""Tests for atm.journal."""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from atm.journal import Journal, TradeEntry, prompt_entry
+
+
+def _sample() -> TradeEntry:
+    return TradeEntry(
+        ts="2026-04-14T10:00:00",
+        direction="BUY",
+        symbol="US30",
+        entry=40000.0,
+        sl=39950.0,
+        tp1=40100.0,
+        tp2=None,
+        exit=None,
+        outcome="open",
+        detected_ts=None,
+        notes="",
+    )
+
+
+def test_add_and_read_roundtrip(tmp_path: Path) -> None:
+    journal = Journal(tmp_path / "trades.jsonl")
+    e1 = TradeEntry(
+        ts="2026-04-14T10:00:00", direction="BUY", symbol="US30",
+        entry=40000.0, sl=39950.0, tp1=40100.0, tp2=None,
+        exit=40100.0, outcome="tp1", detected_ts=None, notes="",
+    )
+    e2 = TradeEntry(
+        ts="2026-04-14T11:00:00", direction="SELL", symbol="NQ",
+        entry=20000.0, sl=20050.0, tp1=None, tp2=None,
+        exit=20050.0, outcome="sl", detected_ts=None, notes="stop hit",
+    )
+
+    journal.add(e1)
+    journal.add(e2)
+
+    all_entries = journal.all()
+    assert len(all_entries) == 2
+    assert all_entries[0] == e1
+    assert all_entries[1] == e2
+
+
+def test_prompt_entry_with_defaults() -> None:
+    inputs = iter([
+        "2026-04-15T10:30:00",  # ts
+        "BUY",                   # direction
+        "US30",                  # symbol
+        "40000",                 # entry
+        "39950",                 # sl
+        "40100",                 # tp1
+        "",                      # tp2 (blank → None)
+        "",                      # exit (blank → None)
+        "open",                  # outcome
+        "",                      # notes
+    ])
+
+    detected = {
+        "direction": "BUY",
+        "symbol": "US30",
+        "detected_ts": "2026-04-15T10:29:45",
+    }
+
+    entry = prompt_entry(input_fn=lambda _: next(inputs), detected=detected)
+
+    assert entry.direction == "BUY"
+    assert entry.symbol == "US30"
+    assert entry.entry == 40000.0
+    assert entry.sl == 39950.0
+    assert entry.tp1 == 40100.0
+    assert entry.tp2 is None
+    assert entry.exit is None
+    assert entry.outcome == "open"
+    assert entry.detected_ts == "2026-04-15T10:29:45"
+    assert entry.notes == ""
+
+
+def test_file_line_buffered(tmp_path: Path) -> None:
+    """Each add() writes exactly one JSONL line, immediately readable."""
+    path = tmp_path / "trades.jsonl"
+    journal = Journal(path)
+
+    journal.add(_sample())
+    lines = path.read_text(encoding="utf-8").splitlines()
+    assert len(lines) == 1
+    json.loads(lines[0])  # must be valid JSON
+
+    journal.add(_sample())
+    lines = path.read_text(encoding="utf-8").splitlines()
+    assert len(lines) == 2
+    json.loads(lines[1])
--- a/tests/test_labeler.py
+++ b/tests/test_labeler.py
@@ -0,0 +1,54 @@
+"""Tests for atm.labeler."""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from atm.labeler import LabelStore, accuracy
+
+
+def test_label_store_roundtrip(tmp_path: Path) -> None:
+    path = tmp_path / "labels.json"
+    store = LabelStore(path)
+    store["img1.png"] = "turquoise"
+    store["img2.png"] = "yellow"
+    store.save()
+
+    store2 = LabelStore(path)
+    assert store2["img1.png"] == "turquoise"
+    assert store2["img2.png"] == "yellow"
+    assert store2.as_dict() == {"img1.png": "turquoise", "img2.png": "yellow"}
+
+
+def test_accuracy_perfect() -> None:
+    labels    = {"a.png": "turquoise", "b.png": "yellow", "c.png": "gray"}
+    predicted = {"a.png": "turquoise", "b.png": "yellow", "c.png": "gray"}
+    result = accuracy(labels, predicted)
+
+    assert result["accuracy"] == pytest.approx(1.0)
+    assert result["turquoise_precision"] == pytest.approx(1.0)
+    assert result["turquoise_recall"] == pytest.approx(1.0)
+    assert result["yellow_f1"] == pytest.approx(1.0)
+    assert result["gray_f1"] == pytest.approx(1.0)
+
+
+def test_accuracy_partial() -> None:
+    # a=turquoise correct, b=turquoise predicted yellow (FN turquoise / FP yellow), c=yellow correct
+    labels    = {"a.png": "turquoise", "b.png": "turquoise", "c.png": "yellow"}
+    predicted = {"a.png": "turquoise", "b.png": "yellow",    "c.png": "yellow"}
+
+    result = accuracy(labels, predicted)
+
+    # 2 out of 3 correct
+    assert result["accuracy"] == pytest.approx(2 / 3)
+
+    # turquoise: tp=1, fp=0, fn=1 → precision=1.0, recall=0.5, f1=2/3
+    assert result["turquoise_precision"] == pytest.approx(1.0)
+    assert result["turquoise_recall"] == pytest.approx(0.5)
+    assert result["turquoise_f1"] == pytest.approx(2 / 3)
+
+    # yellow: tp=1, fp=1, fn=0 → precision=0.5, recall=1.0, f1=2/3
+    assert result["yellow_precision"] == pytest.approx(0.5)
+    assert result["yellow_recall"] == pytest.approx(1.0)
+    assert result["yellow_f1"] == pytest.approx(2 / 3)
--- a/tests/test_levels.py
+++ b/tests/test_levels.py
@@ -0,0 +1,172 @@
+"""Tests for src/atm/levels.py."""
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from atm.config import (
+    CanaryRegion,
+    ColorSpec,
+    Config,
+    DiscordCfg,
+    ROI,
+    TelegramCfg,
+    YAxisCalib,
+)
+from atm.levels import Levels, LevelsExtractor, LevelsResult
+from atm.vision import pixel_y_to_price
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+# chart_roi starts at (0,0) so test frames can be exactly (H, W)
+CHART_ROI = ROI(x=0, y=0, w=600, h=400)
+CALIB = YAxisCalib(p1_y=0, p1_price=100.0, p2_y=400, p2_price=80.0)
+
+# light_red RGB=(255,0,0) → BGR=(0,0,255)
+# light_green RGB=(0,255,0) → BGR=(0,255,0)
+RED_BGR: tuple[int, int, int] = (0, 0, 255)
+GREEN_BGR: tuple[int, int, int] = (0, 255, 0)
+
+TOLERANCE = 30.0
+
+
+def _make_cfg(phaseb_timeout_s: int = 600) -> Config:
+    colors = {
+        "turquoise":   ColorSpec(rgb=(0, 255, 255),   tolerance=TOLERANCE),
+        "yellow":      ColorSpec(rgb=(255, 255, 0),   tolerance=TOLERANCE),
+        "dark_green":  ColorSpec(rgb=(0, 100, 0),     tolerance=TOLERANCE),
+        "dark_red":    ColorSpec(rgb=(100, 0, 0),     tolerance=TOLERANCE),
+        "light_green": ColorSpec(rgb=(0, 255, 0),     tolerance=TOLERANCE),
+        "light_red":   ColorSpec(rgb=(255, 0, 0),     tolerance=TOLERANCE),
+        "gray":        ColorSpec(rgb=(128, 128, 128), tolerance=TOLERANCE),
+    }
+    return Config(
+        window_title="test",
+        dot_roi=ROI(x=0, y=0, w=100, h=50),
+        chart_roi=CHART_ROI,
+        colors=colors,
+        y_axis=CALIB,
+        canary=CanaryRegion(
+            roi=ROI(x=0, y=0, w=50, h=50),
+            baseline_phash="0" * 64,
+            drift_threshold=8,
+        ),
+        discord=DiscordCfg(webhook_url="http://example.com/hook"),
+        telegram=TelegramCfg(bot_token="tok", chat_id="123"),
+        phaseb_timeout_s=phaseb_timeout_s,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Frame helpers
+# ---------------------------------------------------------------------------
+
+def _make_chart(*line_specs: tuple[tuple[int, int, int], int]) -> np.ndarray:
+    """Return a 400×600 black BGR frame with horizontal lines.
+
+    Each spec is (bgr_color, y_position).  Lines are painted 3px thick
+    and span the full width so Hough detection is reliable.
+    """
+    frame = np.zeros((400, 600, 3), dtype=np.uint8)
+    for bgr, y in line_specs:
+        y0 = max(0, y - 1)
+        y1 = min(400, y + 2)
+        frame[y0:y1, :] = bgr
+    return frame
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+def test_three_lines_buy_complete() -> None:
+    """BUY with 3 stable lines → complete after 2 calls, prices correct."""
+    # red = SL at y=300 (bottom / lowest price for BUY)
+    # green = TP1 at y=200, TP2 at y=100
+    frame = _make_chart(
+        (RED_BGR, 300),
+        (GREEN_BGR, 200),
+        (GREEN_BGR, 100),
+    )
+    cfg = _make_cfg()
+    ext = LevelsExtractor(cfg, direction="BUY", start_ts=0.0)
+
+    r1 = ext.step(frame, ts=1.0)
+    assert r1.status == "partial"  # first call: not yet stable
+
+    r2 = ext.step(frame, ts=2.0)
+    assert r2.status == "complete"
+    assert r2.levels is not None
+    assert r2.levels.partial is False
+
+    expected_tp2 = pixel_y_to_price(100, CALIB)   # 95.0
+    expected_tp1 = pixel_y_to_price(200, CALIB)   # 90.0
+    expected_sl  = pixel_y_to_price(300, CALIB)   # 85.0
+
+    assert r2.levels.tp2 == pytest.approx(expected_tp2, abs=1.0)
+    assert r2.levels.tp1 == pytest.approx(expected_tp1, abs=1.0)
+    assert r2.levels.sl  == pytest.approx(expected_sl,  abs=1.0)
+
+
+def test_two_lines_partial() -> None:
+    """2 lines → always partial."""
+    frame = _make_chart((RED_BGR, 300), (GREEN_BGR, 100))
+    cfg = _make_cfg()
+    ext = LevelsExtractor(cfg, direction="BUY", start_ts=0.0)
+
+    result = ext.step(frame, ts=1.0)
+
+    assert result.status == "partial"
+    assert result.levels is not None
+    assert result.levels.partial is True
+
+
+def test_zero_lines_waiting() -> None:
+    """No lines in chart → waiting."""
+    frame = np.zeros((400, 600, 3), dtype=np.uint8)
+    cfg = _make_cfg()
+    ext = LevelsExtractor(cfg, direction="BUY", start_ts=0.0)
+
+    result = ext.step(frame, ts=1.0)
+
+    assert result.status == "waiting"
+    assert result.levels is None
+
+
+def test_timeout() -> None:
+    """Elapsed > phaseb_timeout_s → timeout regardless of lines."""
+    frame = np.zeros((400, 600, 3), dtype=np.uint8)
+    cfg = _make_cfg(phaseb_timeout_s=600)
+    ext = LevelsExtractor(cfg, direction="BUY", start_ts=0.0)
+
+    result = ext.step(frame, ts=700.0)
+
+    assert result.status == "timeout"
+    assert result.levels is None
+    assert result.elapsed_s == pytest.approx(700.0)
+
+
+def test_sell_direction_assignment() -> None:
+    """SELL: topmost y → SL (highest price), bottom → TP2 (lowest price)."""
+    frame = _make_chart(
+        (RED_BGR, 300),
+        (GREEN_BGR, 200),
+        (GREEN_BGR, 100),
+    )
+    cfg = _make_cfg()
+    ext = LevelsExtractor(cfg, direction="SELL", start_ts=0.0)
+
+    ext.step(frame, ts=1.0)           # first call, not yet stable
+    r = ext.step(frame, ts=2.0)       # second call, stable → complete
+
+    assert r.status == "complete"
+    assert r.levels is not None
+
+    expected_sl  = pixel_y_to_price(100, CALIB)  # 95.0 (topmost = highest price)
+    expected_tp1 = pixel_y_to_price(200, CALIB)  # 90.0
+    expected_tp2 = pixel_y_to_price(300, CALIB)  # 85.0
+
+    assert r.levels.sl  == pytest.approx(expected_sl,  abs=1.0)
+    assert r.levels.tp1 == pytest.approx(expected_tp1, abs=1.0)
+    assert r.levels.tp2 == pytest.approx(expected_tp2, abs=1.0)
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -0,0 +1,137 @@
+"""Tests for atm.main unified CLI."""
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+import types
+from dataclasses import dataclass
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import pytest
+
+
+SUBCOMMANDS = ["calibrate", "label", "dryrun", "run", "journal", "report"]
+
+# Ensure subprocess invocations find the atm package even without pip install
+_SRC = str(Path(__file__).resolve().parent.parent / "src")
+_SUBPROCESS_ENV = {**os.environ, "PYTHONPATH": _SRC}
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _mock_config_class(cfg=None):
+    """Return a Config-like class whose load_current() returns *cfg*."""
+    if cfg is None:
+        cfg = MagicMock()
+    mock_cls = MagicMock()
+    mock_cls.load_current.return_value = cfg
+    return mock_cls
+
+
+# ---------------------------------------------------------------------------
+# test_help_works
+# ---------------------------------------------------------------------------
+
+def test_help_works():
+    result = subprocess.run(
+        [sys.executable, "-m", "atm", "--help"],
+        capture_output=True,
+        text=True,
+        env=_SUBPROCESS_ENV,
+    )
+    assert result.returncode == 0, result.stderr
+
+
+# ---------------------------------------------------------------------------
+# test_subcommands_listed
+# ---------------------------------------------------------------------------
+
+def test_subcommands_listed():
+    result = subprocess.run(
+        [sys.executable, "-m", "atm", "--help"],
+        capture_output=True,
+        text=True,
+        env=_SUBPROCESS_ENV,
+    )
+    output = result.stdout
+    for cmd in SUBCOMMANDS:
+        assert cmd in output, f"Expected subcommand '{cmd}' in --help output"
+
+
+# ---------------------------------------------------------------------------
+# test_dryrun_wiring
+# ---------------------------------------------------------------------------
+
+@dataclass
+class _DryrunResult:
+    acceptance_pass: bool
+
+
+def _make_dryrun_module(acceptance_pass: bool):
+    mod = types.ModuleType("atm.dryrun")
+    mod.dryrun = lambda *a, **kw: _DryrunResult(acceptance_pass=acceptance_pass)
+    mod.print_report = lambda r: None
+    return mod
+
+
+def test_dryrun_wiring_pass(monkeypatch, tmp_path):
+    import atm.main as _main
+
+    monkeypatch.setattr("atm.main.dryrun", _make_dryrun_module(acceptance_pass=True))
+    monkeypatch.setattr("atm.main.Config", _mock_config_class())
+
+    with pytest.raises(SystemExit) as exc_info:
+        _main.main(["dryrun", str(tmp_path)])
+
+    assert exc_info.value.code == 0
+
+
+def test_dryrun_wiring_fail(monkeypatch, tmp_path):
+    import atm.main as _main
+
+    monkeypatch.setattr("atm.main.dryrun", _make_dryrun_module(acceptance_pass=False))
+    monkeypatch.setattr("atm.main.Config", _mock_config_class())
+
+    with pytest.raises(SystemExit) as exc_info:
+        _main.main(["dryrun", str(tmp_path)])
+
+    assert exc_info.value.code == 1
+
+
+# ---------------------------------------------------------------------------
+# test_report_current_week_default
+# ---------------------------------------------------------------------------
+
+def test_report_current_week_default(monkeypatch, tmp_path):
+    import atm.main as _main
+
+    # Journal.all returns no entries — report should print a zero-trade week
+    monkeypatch.setattr("atm.journal.Journal.all", lambda self: [])
+
+    # Should not raise; no sys.exit expected
+    _main.main(["report", "--file", str(tmp_path / "trades.jsonl")])
+
+
+# ---------------------------------------------------------------------------
+# test_run_live_dry
+# ---------------------------------------------------------------------------
+
+def test_run_live_dry(monkeypatch):
+    import atm.main as _main
+
+    calls: list[dict] = []
+
+    def _mock_run_live(cfg, duration_s=None, capture_stub=False):
+        calls.append({"cfg": cfg, "duration_s": duration_s, "capture_stub": capture_stub})
+
+    monkeypatch.setattr("atm.main.run_live", _mock_run_live)
+    monkeypatch.setattr("atm.main.Config", _mock_config_class())
+
+    _main.main(["run", "--duration", "0"])
+
+    assert len(calls) == 1
+    assert calls[0]["duration_s"] == pytest.approx(0.0)
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -0,0 +1,76 @@
+"""Tests for atm.report."""
+from __future__ import annotations
+
+import pytest
+
+from atm.journal import TradeEntry
+from atm.report import iso_week, weekly_report
+
+WEEK = "2026-16"
+BASE_TS = "2026-04-14T10:00:00"
+
+
+def _trade(
+    outcome: str,
+    direction: str = "BUY",
+    entry: float = 100.0,
+    sl: float = 90.0,
+    exit_: float | None = None,
+    detected_ts: str | None = None,
+    ts: str = BASE_TS,
+) -> TradeEntry:
+    return TradeEntry(
+        ts=ts,
+        direction=direction,
+        symbol="US30",
+        entry=entry,
+        sl=sl,
+        tp1=None,
+        tp2=None,
+        exit=exit_,
+        outcome=outcome,
+        detected_ts=detected_ts,
+        notes="",
+    )
+
+
+def test_win_rate_and_pnl() -> None:
+    """5 synthetic trades: tp1 +2R, tp2 +3R, sl -1.5R, manual +2R, open excluded."""
+    trades = [
+        # tp1: BUY, entry=100, sl=90, exit=120 → R = (120-100)/(100-90) = +2.0
+        _trade("tp1", exit_=120.0, detected_ts="2026-04-14T09:59:55"),
+        # tp2: BUY, entry=100, sl=90, exit=130 → R = (130-100)/(100-90) = +3.0
+        _trade("tp2", exit_=130.0),
+        # sl:  BUY, entry=100, sl=90, exit=85  → R = (85-100)/(100-90) = -1.5
+        _trade("sl", exit_=85.0, detected_ts="2026-04-14T09:59:50"),
+        # manual: SELL, entry=100, sl=110, exit=80 → R = (80-100)/(100-110) = +2.0
+        _trade("manual", direction="SELL", sl=110.0, exit_=80.0),
+        # open: excluded from counts
+        _trade("open"),
+    ]
+
+    report = weekly_report(trades, WEEK)
+
+    assert report.week == WEEK
+    assert report.n_trades == 4
+    assert report.n_wins == 2
+    assert report.n_losses == 2
+    assert report.win_rate == pytest.approx(0.5)
+    assert report.pnl_r == pytest.approx(5.5)
+    # slippage: trade[0]=5s, trade[2]=10s → avg=7.5s
+    assert report.avg_slippage == pytest.approx(7.5)
+
+
+def test_iso_week() -> None:
+    assert iso_week("2026-04-14T10:00:00") == "2026-16"
+    assert iso_week("2026-01-01T00:00:00") == "2026-01"
+
+
+def test_empty_week() -> None:
+    report = weekly_report([], WEEK)
+    assert report.n_trades == 0
+    assert report.n_wins == 0
+    assert report.n_losses == 0
+    assert report.win_rate == 0.0
+    assert report.pnl_r == 0.0
+    assert report.avg_slippage is None