feat(cli): atm validate-calibration — offline color classification gate
Adds `atm validate-calibration LABEL_FILE` subcommand that runs the Detector on a set of labeled PNG frames and reports per-sample PASS/FAIL with top-3 candidate colors and RGB-distance suggestions for failures. Exits 0 on 100% PASS, 1 on any FAIL, 2 on missing/malformed label file. - New module src/atm/validate.py with ValidationReport + SampleRecord dataclasses; reuses Detector.step(frame), does not reimplement color classification. - main.py: new `validate-calibration` subparser and _cmd_validate_calibration handler wired into the dispatch map. - samples/calibration_labels.json seeded with 3 entries from the 2026-04-17 incident, plus a README describing the schema. - tests/test_validate.py covers the 3 planned cases: PASS, FAIL w/ top-3 + suggestion, missing file (graceful error, no traceback). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
214
tests/test_validate.py
Normal file
214
tests/test_validate.py
Normal file
@@ -0,0 +1,214 @@
|
||||
"""Tests for atm.validate — offline calibration validation.
|
||||
|
||||
Covers the 3 tests from plan section D':
|
||||
17. test_validate_calibration_pass
|
||||
18. test_validate_calibration_fail_reports_top_candidates
|
||||
19. test_validate_calibration_file_not_found
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from atm.config import (
|
||||
CanaryRegion,
|
||||
ColorSpec,
|
||||
Config,
|
||||
DiscordCfg,
|
||||
ROI,
|
||||
TelegramCfg,
|
||||
YAxisCalib,
|
||||
)
|
||||
from atm.detector import DetectionResult
|
||||
from atm.vision import ColorMatch
|
||||
|
||||
|
||||
def _make_config() -> Config:
|
||||
"""Minimal Config with a palette large enough to support top-3 candidates."""
|
||||
colors = {
|
||||
"turquoise": ColorSpec(rgb=(0, 200, 200), tolerance=30),
|
||||
"yellow": ColorSpec(rgb=(255, 255, 0), tolerance=30),
|
||||
"dark_green": ColorSpec(rgb=(0, 100, 0), tolerance=30),
|
||||
"dark_red": ColorSpec(rgb=(165, 42, 42), tolerance=30),
|
||||
"light_green": ColorSpec(rgb=(144, 238, 144), tolerance=30),
|
||||
"light_red": ColorSpec(rgb=(255, 182, 193), tolerance=30),
|
||||
"gray": ColorSpec(rgb=(128, 128, 128), tolerance=30),
|
||||
"background": ColorSpec(rgb=(18, 18, 18), tolerance=15),
|
||||
}
|
||||
return Config(
|
||||
window_title="test",
|
||||
dot_roi=ROI(x=0, y=0, w=100, h=100),
|
||||
chart_roi=ROI(x=0, y=0, w=100, h=100),
|
||||
colors=colors,
|
||||
y_axis=YAxisCalib(p1_y=0, p1_price=100.0, p2_y=100, p2_price=0.0),
|
||||
canary=CanaryRegion(
|
||||
roi=ROI(x=0, y=0, w=10, h=10),
|
||||
baseline_phash="0" * 64,
|
||||
),
|
||||
discord=DiscordCfg(webhook_url="http://localhost/fake"),
|
||||
telegram=TelegramCfg(bot_token="fake_token", chat_id="123"),
|
||||
debounce_depth=1,
|
||||
)
|
||||
|
||||
|
||||
def _write_labels(tmp_path: Path, entries: list[dict]) -> Path:
|
||||
f = tmp_path / "labels.json"
|
||||
f.write_text(json.dumps(entries), encoding="utf-8")
|
||||
return f
|
||||
|
||||
|
||||
def _write_blank_png(tmp_path: Path, name: str) -> Path:
|
||||
"""Write a trivially-valid 10x10 BGR image so cv2.imread returns non-None."""
|
||||
import cv2
|
||||
p = tmp_path / name
|
||||
arr = np.zeros((10, 10, 3), dtype=np.uint8)
|
||||
cv2.imwrite(str(p), arr)
|
||||
return p
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 17: PASS path — mocked Detector.step returns expected color
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_validate_calibration_pass(monkeypatch, tmp_path):
|
||||
from atm import validate as validate_mod
|
||||
|
||||
img_path = _write_blank_png(tmp_path, "yellow_sample.png")
|
||||
labels = _write_labels(
|
||||
tmp_path,
|
||||
[{"path": str(img_path), "expected": "yellow", "note": "test"}],
|
||||
)
|
||||
|
||||
def fake_step(self, ts, frame=None):
|
||||
return DetectionResult(
|
||||
ts=ts,
|
||||
window_found=True,
|
||||
dot_found=True,
|
||||
rgb=(250, 250, 5),
|
||||
match=ColorMatch(name="yellow", distance=6.0, confidence=0.94),
|
||||
accepted=True,
|
||||
color="yellow",
|
||||
)
|
||||
|
||||
monkeypatch.setattr("atm.detector.Detector.step", fake_step)
|
||||
|
||||
report = validate_mod.validate_calibration(labels, _make_config())
|
||||
|
||||
assert report.total == 1
|
||||
assert report.passed == 1
|
||||
assert report.failed == 0
|
||||
assert report.all_pass is True
|
||||
rec = report.records[0]
|
||||
assert rec.passed is True
|
||||
assert rec.detected == "yellow"
|
||||
assert rec.expected == "yellow"
|
||||
assert "[PASS]" in report.render()
|
||||
|
||||
# CLI wiring: exit 0
|
||||
import atm.main as _main
|
||||
|
||||
class _Args:
|
||||
label_file = labels
|
||||
|
||||
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
_main._cmd_validate_calibration(_Args())
|
||||
assert exc_info.value.code == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 18: FAIL path — Detector returns wrong color; report lists top 3
|
||||
# candidates and a SUGGESTIONS line with RGB distance.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_validate_calibration_fail_reports_top_candidates(monkeypatch, tmp_path):
|
||||
from atm import validate as validate_mod
|
||||
|
||||
img_path = _write_blank_png(tmp_path, "dark_red_sample.png")
|
||||
labels = _write_labels(
|
||||
tmp_path,
|
||||
[{"path": str(img_path), "expected": "dark_red", "note": "missed dark_red"}],
|
||||
)
|
||||
|
||||
# Observed RGB closer to gray than dark_red (like the real 2026-04-17 miss).
|
||||
def fake_step(self, ts, frame=None):
|
||||
return DetectionResult(
|
||||
ts=ts,
|
||||
window_found=True,
|
||||
dot_found=True,
|
||||
rgb=(135, 62, 67),
|
||||
match=ColorMatch(name="gray", distance=45.0, confidence=0.12),
|
||||
accepted=True,
|
||||
color="gray",
|
||||
)
|
||||
|
||||
monkeypatch.setattr("atm.detector.Detector.step", fake_step)
|
||||
|
||||
report = validate_mod.validate_calibration(labels, _make_config())
|
||||
|
||||
assert report.total == 1
|
||||
assert report.failed == 1
|
||||
assert report.all_pass is False
|
||||
|
||||
rec = report.records[0]
|
||||
assert rec.passed is False
|
||||
assert rec.detected == "gray"
|
||||
assert rec.expected == "dark_red"
|
||||
# Top 3 candidates populated (name, score) sorted by RGB distance.
|
||||
assert len(rec.top3) == 3
|
||||
names = [n for n, _ in rec.top3]
|
||||
# dark_red should appear in top candidates since observed RGB(135,62,67)
|
||||
# is reasonably close to dark_red(165,42,42).
|
||||
assert "dark_red" in names
|
||||
|
||||
rendered = report.render()
|
||||
assert "[FAIL]" in rendered
|
||||
assert "Top 3 candidates:" in rendered
|
||||
assert "SUGGESTIONS:" in rendered
|
||||
# The suggestion must mention the expected color's RGB and the measured distance.
|
||||
assert "dark_red" in rendered
|
||||
assert "(165, 42, 42)" in rendered
|
||||
|
||||
# CLI wiring: exit 1
|
||||
import atm.main as _main
|
||||
|
||||
class _Args:
|
||||
label_file = labels
|
||||
|
||||
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
_main._cmd_validate_calibration(_Args())
|
||||
assert exc_info.value.code == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 19: missing label file — clean error, non-zero exit, no stack trace
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_validate_calibration_file_not_found(monkeypatch, tmp_path, capsys):
|
||||
from atm import validate as validate_mod
|
||||
|
||||
missing = tmp_path / "nope.json"
|
||||
|
||||
# Library-level: raises ValidationError (not bare FileNotFoundError).
|
||||
with pytest.raises(validate_mod.ValidationError) as exc_info:
|
||||
validate_mod.validate_calibration(missing, _make_config())
|
||||
assert "not found" in str(exc_info.value).lower()
|
||||
|
||||
# CLI-level: graceful sys.exit with non-zero code, message on stderr.
|
||||
import atm.main as _main
|
||||
|
||||
class _Args:
|
||||
label_file = missing
|
||||
|
||||
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
_main._cmd_validate_calibration(_Args())
|
||||
assert exc_info.value.code != 0
|
||||
err = capsys.readouterr().err
|
||||
assert "not found" in err.lower()
|
||||
# Ensure no python traceback leaked through.
|
||||
assert "Traceback" not in err
|
||||
Reference in New Issue
Block a user