Files
atm/tests/test_validate.py
Marius Mutu 3cb647e084 feat(cli): atm validate-calibration — offline color classification gate
Adds `atm validate-calibration LABEL_FILE` subcommand that runs the Detector
on a set of labeled PNG frames and reports per-sample PASS/FAIL with top-3
candidate colors and RGB-distance suggestions for failures. Exits 0 on 100%
PASS, 1 on any FAIL, 2 on missing/malformed label file.

- New module src/atm/validate.py with ValidationReport + SampleRecord
  dataclasses; reuses Detector.step(frame), does not reimplement color
  classification.
- main.py: new `validate-calibration` subparser and _cmd_validate_calibration
  handler wired into the dispatch map.
- samples/calibration_labels.json seeded with 3 entries from the 2026-04-17
  incident, plus a README describing the schema.
- tests/test_validate.py covers the 3 planned cases: PASS, FAIL w/ top-3
  + suggestion, missing file (graceful error, no traceback).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-18 11:54:48 +03:00

215 lines
7.2 KiB
Python

"""Tests for atm.validate — offline calibration validation.
Covers the 3 tests from plan section D':
17. test_validate_calibration_pass
18. test_validate_calibration_fail_reports_top_candidates
19. test_validate_calibration_file_not_found
"""
from __future__ import annotations
import json
from pathlib import Path
import numpy as np
import pytest
from atm.config import (
CanaryRegion,
ColorSpec,
Config,
DiscordCfg,
ROI,
TelegramCfg,
YAxisCalib,
)
from atm.detector import DetectionResult
from atm.vision import ColorMatch
def _make_config() -> Config:
"""Minimal Config with a palette large enough to support top-3 candidates."""
colors = {
"turquoise": ColorSpec(rgb=(0, 200, 200), tolerance=30),
"yellow": ColorSpec(rgb=(255, 255, 0), tolerance=30),
"dark_green": ColorSpec(rgb=(0, 100, 0), tolerance=30),
"dark_red": ColorSpec(rgb=(165, 42, 42), tolerance=30),
"light_green": ColorSpec(rgb=(144, 238, 144), tolerance=30),
"light_red": ColorSpec(rgb=(255, 182, 193), tolerance=30),
"gray": ColorSpec(rgb=(128, 128, 128), tolerance=30),
"background": ColorSpec(rgb=(18, 18, 18), tolerance=15),
}
return Config(
window_title="test",
dot_roi=ROI(x=0, y=0, w=100, h=100),
chart_roi=ROI(x=0, y=0, w=100, h=100),
colors=colors,
y_axis=YAxisCalib(p1_y=0, p1_price=100.0, p2_y=100, p2_price=0.0),
canary=CanaryRegion(
roi=ROI(x=0, y=0, w=10, h=10),
baseline_phash="0" * 64,
),
discord=DiscordCfg(webhook_url="http://localhost/fake"),
telegram=TelegramCfg(bot_token="fake_token", chat_id="123"),
debounce_depth=1,
)
def _write_labels(tmp_path: Path, entries: list[dict]) -> Path:
f = tmp_path / "labels.json"
f.write_text(json.dumps(entries), encoding="utf-8")
return f
def _write_blank_png(tmp_path: Path, name: str) -> Path:
"""Write a trivially-valid 10x10 BGR image so cv2.imread returns non-None."""
import cv2
p = tmp_path / name
arr = np.zeros((10, 10, 3), dtype=np.uint8)
cv2.imwrite(str(p), arr)
return p
# ---------------------------------------------------------------------------
# Test 17: PASS path — mocked Detector.step returns expected color
# ---------------------------------------------------------------------------
def test_validate_calibration_pass(monkeypatch, tmp_path):
from atm import validate as validate_mod
img_path = _write_blank_png(tmp_path, "yellow_sample.png")
labels = _write_labels(
tmp_path,
[{"path": str(img_path), "expected": "yellow", "note": "test"}],
)
def fake_step(self, ts, frame=None):
return DetectionResult(
ts=ts,
window_found=True,
dot_found=True,
rgb=(250, 250, 5),
match=ColorMatch(name="yellow", distance=6.0, confidence=0.94),
accepted=True,
color="yellow",
)
monkeypatch.setattr("atm.detector.Detector.step", fake_step)
report = validate_mod.validate_calibration(labels, _make_config())
assert report.total == 1
assert report.passed == 1
assert report.failed == 0
assert report.all_pass is True
rec = report.records[0]
assert rec.passed is True
assert rec.detected == "yellow"
assert rec.expected == "yellow"
assert "[PASS]" in report.render()
# CLI wiring: exit 0
import atm.main as _main
class _Args:
label_file = labels
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
with pytest.raises(SystemExit) as exc_info:
_main._cmd_validate_calibration(_Args())
assert exc_info.value.code == 0
# ---------------------------------------------------------------------------
# Test 18: FAIL path — Detector returns wrong color; report lists top 3
# candidates and a SUGGESTIONS line with RGB distance.
# ---------------------------------------------------------------------------
def test_validate_calibration_fail_reports_top_candidates(monkeypatch, tmp_path):
from atm import validate as validate_mod
img_path = _write_blank_png(tmp_path, "dark_red_sample.png")
labels = _write_labels(
tmp_path,
[{"path": str(img_path), "expected": "dark_red", "note": "missed dark_red"}],
)
# Observed RGB closer to gray than dark_red (like the real 2026-04-17 miss).
def fake_step(self, ts, frame=None):
return DetectionResult(
ts=ts,
window_found=True,
dot_found=True,
rgb=(135, 62, 67),
match=ColorMatch(name="gray", distance=45.0, confidence=0.12),
accepted=True,
color="gray",
)
monkeypatch.setattr("atm.detector.Detector.step", fake_step)
report = validate_mod.validate_calibration(labels, _make_config())
assert report.total == 1
assert report.failed == 1
assert report.all_pass is False
rec = report.records[0]
assert rec.passed is False
assert rec.detected == "gray"
assert rec.expected == "dark_red"
# Top 3 candidates populated (name, score) sorted by RGB distance.
assert len(rec.top3) == 3
names = [n for n, _ in rec.top3]
# dark_red should appear in top candidates since observed RGB(135,62,67)
# is reasonably close to dark_red(165,42,42).
assert "dark_red" in names
rendered = report.render()
assert "[FAIL]" in rendered
assert "Top 3 candidates:" in rendered
assert "SUGGESTIONS:" in rendered
# The suggestion must mention the expected color's RGB and the measured distance.
assert "dark_red" in rendered
assert "(165, 42, 42)" in rendered
# CLI wiring: exit 1
import atm.main as _main
class _Args:
label_file = labels
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
with pytest.raises(SystemExit) as exc_info:
_main._cmd_validate_calibration(_Args())
assert exc_info.value.code == 1
# ---------------------------------------------------------------------------
# Test 19: missing label file — clean error, non-zero exit, no stack trace
# ---------------------------------------------------------------------------
def test_validate_calibration_file_not_found(monkeypatch, tmp_path, capsys):
from atm import validate as validate_mod
missing = tmp_path / "nope.json"
# Library-level: raises ValidationError (not bare FileNotFoundError).
with pytest.raises(validate_mod.ValidationError) as exc_info:
validate_mod.validate_calibration(missing, _make_config())
assert "not found" in str(exc_info.value).lower()
# CLI-level: graceful sys.exit with non-zero code, message on stderr.
import atm.main as _main
class _Args:
label_file = missing
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
with pytest.raises(SystemExit) as exc_info:
_main._cmd_validate_calibration(_Args())
assert exc_info.value.code != 0
err = capsys.readouterr().err
assert "not found" in err.lower()
# Ensure no python traceback leaked through.
assert "Traceback" not in err