feat(cli): atm validate-calibration — offline color classification gate

Adds `atm validate-calibration LABEL_FILE` subcommand that runs the Detector on a set of labeled PNG frames and reports per-sample PASS/FAIL with top-3 candidate colors and RGB-distance suggestions for failures. Exits 0 on 100% PASS, 1 on any FAIL, 2 on missing/malformed label file. - New module src/atm/validate.py with ValidationReport + SampleRecord dataclasses; reuses Detector.step(frame), does not reimplement color classification. - main.py: new `validate-calibration` subparser and _cmd_validate_calibration handler wired into the dispatch map. - samples/calibration_labels.json seeded with 3 entries from the 2026-04-17 incident, plus a README describing the schema. - tests/test_validate.py covers the 3 planned cases: PASS, FAIL w/ top-3 + suggestion, missing file (graceful error, no traceback). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-18 11:54:48 +03:00
parent 23865776e3
commit 8bae507bbd
5 changed files with 535 additions and 0 deletions
--- a/tests/test_validate.py
+++ b/tests/test_validate.py
@@ -0,0 +1,214 @@
+"""Tests for atm.validate — offline calibration validation.
+
+Covers the 3 tests from plan section D':
+  17. test_validate_calibration_pass
+  18. test_validate_calibration_fail_reports_top_candidates
+  19. test_validate_calibration_file_not_found
+"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from atm.config import (
+    CanaryRegion,
+    ColorSpec,
+    Config,
+    DiscordCfg,
+    ROI,
+    TelegramCfg,
+    YAxisCalib,
+)
+from atm.detector import DetectionResult
+from atm.vision import ColorMatch
+
+
+def _make_config() -> Config:
+    """Minimal Config with a palette large enough to support top-3 candidates."""
+    colors = {
+        "turquoise":   ColorSpec(rgb=(0, 200, 200),    tolerance=30),
+        "yellow":      ColorSpec(rgb=(255, 255, 0),    tolerance=30),
+        "dark_green":  ColorSpec(rgb=(0, 100, 0),      tolerance=30),
+        "dark_red":    ColorSpec(rgb=(165, 42, 42),    tolerance=30),
+        "light_green": ColorSpec(rgb=(144, 238, 144),  tolerance=30),
+        "light_red":   ColorSpec(rgb=(255, 182, 193),  tolerance=30),
+        "gray":        ColorSpec(rgb=(128, 128, 128),  tolerance=30),
+        "background":  ColorSpec(rgb=(18, 18, 18),     tolerance=15),
+    }
+    return Config(
+        window_title="test",
+        dot_roi=ROI(x=0, y=0, w=100, h=100),
+        chart_roi=ROI(x=0, y=0, w=100, h=100),
+        colors=colors,
+        y_axis=YAxisCalib(p1_y=0, p1_price=100.0, p2_y=100, p2_price=0.0),
+        canary=CanaryRegion(
+            roi=ROI(x=0, y=0, w=10, h=10),
+            baseline_phash="0" * 64,
+        ),
+        discord=DiscordCfg(webhook_url="http://localhost/fake"),
+        telegram=TelegramCfg(bot_token="fake_token", chat_id="123"),
+        debounce_depth=1,
+    )
+
+
+def _write_labels(tmp_path: Path, entries: list[dict]) -> Path:
+    f = tmp_path / "labels.json"
+    f.write_text(json.dumps(entries), encoding="utf-8")
+    return f
+
+
+def _write_blank_png(tmp_path: Path, name: str) -> Path:
+    """Write a trivially-valid 10x10 BGR image so cv2.imread returns non-None."""
+    import cv2
+    p = tmp_path / name
+    arr = np.zeros((10, 10, 3), dtype=np.uint8)
+    cv2.imwrite(str(p), arr)
+    return p
+
+
+# ---------------------------------------------------------------------------
+# Test 17: PASS path — mocked Detector.step returns expected color
+# ---------------------------------------------------------------------------
+
+def test_validate_calibration_pass(monkeypatch, tmp_path):
+    from atm import validate as validate_mod
+
+    img_path = _write_blank_png(tmp_path, "yellow_sample.png")
+    labels = _write_labels(
+        tmp_path,
+        [{"path": str(img_path), "expected": "yellow", "note": "test"}],
+    )
+
+    def fake_step(self, ts, frame=None):
+        return DetectionResult(
+            ts=ts,
+            window_found=True,
+            dot_found=True,
+            rgb=(250, 250, 5),
+            match=ColorMatch(name="yellow", distance=6.0, confidence=0.94),
+            accepted=True,
+            color="yellow",
+        )
+
+    monkeypatch.setattr("atm.detector.Detector.step", fake_step)
+
+    report = validate_mod.validate_calibration(labels, _make_config())
+
+    assert report.total == 1
+    assert report.passed == 1
+    assert report.failed == 0
+    assert report.all_pass is True
+    rec = report.records[0]
+    assert rec.passed is True
+    assert rec.detected == "yellow"
+    assert rec.expected == "yellow"
+    assert "[PASS]" in report.render()
+
+    # CLI wiring: exit 0
+    import atm.main as _main
+
+    class _Args:
+        label_file = labels
+
+    monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
+    with pytest.raises(SystemExit) as exc_info:
+        _main._cmd_validate_calibration(_Args())
+    assert exc_info.value.code == 0
+
+
+# ---------------------------------------------------------------------------
+# Test 18: FAIL path — Detector returns wrong color; report lists top 3
+#                      candidates and a SUGGESTIONS line with RGB distance.
+# ---------------------------------------------------------------------------
+
+def test_validate_calibration_fail_reports_top_candidates(monkeypatch, tmp_path):
+    from atm import validate as validate_mod
+
+    img_path = _write_blank_png(tmp_path, "dark_red_sample.png")
+    labels = _write_labels(
+        tmp_path,
+        [{"path": str(img_path), "expected": "dark_red", "note": "missed dark_red"}],
+    )
+
+    # Observed RGB closer to gray than dark_red (like the real 2026-04-17 miss).
+    def fake_step(self, ts, frame=None):
+        return DetectionResult(
+            ts=ts,
+            window_found=True,
+            dot_found=True,
+            rgb=(135, 62, 67),
+            match=ColorMatch(name="gray", distance=45.0, confidence=0.12),
+            accepted=True,
+            color="gray",
+        )
+
+    monkeypatch.setattr("atm.detector.Detector.step", fake_step)
+
+    report = validate_mod.validate_calibration(labels, _make_config())
+
+    assert report.total == 1
+    assert report.failed == 1
+    assert report.all_pass is False
+
+    rec = report.records[0]
+    assert rec.passed is False
+    assert rec.detected == "gray"
+    assert rec.expected == "dark_red"
+    # Top 3 candidates populated (name, score) sorted by RGB distance.
+    assert len(rec.top3) == 3
+    names = [n for n, _ in rec.top3]
+    # dark_red should appear in top candidates since observed RGB(135,62,67)
+    # is reasonably close to dark_red(165,42,42).
+    assert "dark_red" in names
+
+    rendered = report.render()
+    assert "[FAIL]" in rendered
+    assert "Top 3 candidates:" in rendered
+    assert "SUGGESTIONS:" in rendered
+    # The suggestion must mention the expected color's RGB and the measured distance.
+    assert "dark_red" in rendered
+    assert "(165, 42, 42)" in rendered
+
+    # CLI wiring: exit 1
+    import atm.main as _main
+
+    class _Args:
+        label_file = labels
+
+    monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
+    with pytest.raises(SystemExit) as exc_info:
+        _main._cmd_validate_calibration(_Args())
+    assert exc_info.value.code == 1
+
+
+# ---------------------------------------------------------------------------
+# Test 19: missing label file — clean error, non-zero exit, no stack trace
+# ---------------------------------------------------------------------------
+
+def test_validate_calibration_file_not_found(monkeypatch, tmp_path, capsys):
+    from atm import validate as validate_mod
+
+    missing = tmp_path / "nope.json"
+
+    # Library-level: raises ValidationError (not bare FileNotFoundError).
+    with pytest.raises(validate_mod.ValidationError) as exc_info:
+        validate_mod.validate_calibration(missing, _make_config())
+    assert "not found" in str(exc_info.value).lower()
+
+    # CLI-level: graceful sys.exit with non-zero code, message on stderr.
+    import atm.main as _main
+
+    class _Args:
+        label_file = missing
+
+    monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
+    with pytest.raises(SystemExit) as exc_info:
+        _main._cmd_validate_calibration(_Args())
+    assert exc_info.value.code != 0
+    err = capsys.readouterr().err
+    assert "not found" in err.lower()
+    # Ensure no python traceback leaked through.
+    assert "Traceback" not in err