Adds `atm validate-calibration LABEL_FILE` subcommand that runs the Detector on a set of labeled PNG frames and reports per-sample PASS/FAIL with top-3 candidate colors and RGB-distance suggestions for failures. Exits 0 on 100% PASS, 1 on any FAIL, 2 on missing/malformed label file. - New module src/atm/validate.py with ValidationReport + SampleRecord dataclasses; reuses Detector.step(frame), does not reimplement color classification. - main.py: new `validate-calibration` subparser and _cmd_validate_calibration handler wired into the dispatch map. - samples/calibration_labels.json seeded with 3 entries from the 2026-04-17 incident, plus a README describing the schema. - tests/test_validate.py covers the 3 planned cases: PASS, FAIL w/ top-3 + suggestion, missing file (graceful error, no traceback). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
215 lines
7.2 KiB
Python
215 lines
7.2 KiB
Python
"""Tests for atm.validate — offline calibration validation.
|
|
|
|
Covers the 3 tests from plan section D':
|
|
17. test_validate_calibration_pass
|
|
18. test_validate_calibration_fail_reports_top_candidates
|
|
19. test_validate_calibration_file_not_found
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from atm.config import (
|
|
CanaryRegion,
|
|
ColorSpec,
|
|
Config,
|
|
DiscordCfg,
|
|
ROI,
|
|
TelegramCfg,
|
|
YAxisCalib,
|
|
)
|
|
from atm.detector import DetectionResult
|
|
from atm.vision import ColorMatch
|
|
|
|
|
|
def _make_config() -> Config:
|
|
"""Minimal Config with a palette large enough to support top-3 candidates."""
|
|
colors = {
|
|
"turquoise": ColorSpec(rgb=(0, 200, 200), tolerance=30),
|
|
"yellow": ColorSpec(rgb=(255, 255, 0), tolerance=30),
|
|
"dark_green": ColorSpec(rgb=(0, 100, 0), tolerance=30),
|
|
"dark_red": ColorSpec(rgb=(165, 42, 42), tolerance=30),
|
|
"light_green": ColorSpec(rgb=(144, 238, 144), tolerance=30),
|
|
"light_red": ColorSpec(rgb=(255, 182, 193), tolerance=30),
|
|
"gray": ColorSpec(rgb=(128, 128, 128), tolerance=30),
|
|
"background": ColorSpec(rgb=(18, 18, 18), tolerance=15),
|
|
}
|
|
return Config(
|
|
window_title="test",
|
|
dot_roi=ROI(x=0, y=0, w=100, h=100),
|
|
chart_roi=ROI(x=0, y=0, w=100, h=100),
|
|
colors=colors,
|
|
y_axis=YAxisCalib(p1_y=0, p1_price=100.0, p2_y=100, p2_price=0.0),
|
|
canary=CanaryRegion(
|
|
roi=ROI(x=0, y=0, w=10, h=10),
|
|
baseline_phash="0" * 64,
|
|
),
|
|
discord=DiscordCfg(webhook_url="http://localhost/fake"),
|
|
telegram=TelegramCfg(bot_token="fake_token", chat_id="123"),
|
|
debounce_depth=1,
|
|
)
|
|
|
|
|
|
def _write_labels(tmp_path: Path, entries: list[dict]) -> Path:
|
|
f = tmp_path / "labels.json"
|
|
f.write_text(json.dumps(entries), encoding="utf-8")
|
|
return f
|
|
|
|
|
|
def _write_blank_png(tmp_path: Path, name: str) -> Path:
|
|
"""Write a trivially-valid 10x10 BGR image so cv2.imread returns non-None."""
|
|
import cv2
|
|
p = tmp_path / name
|
|
arr = np.zeros((10, 10, 3), dtype=np.uint8)
|
|
cv2.imwrite(str(p), arr)
|
|
return p
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 17: PASS path — mocked Detector.step returns expected color
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_validate_calibration_pass(monkeypatch, tmp_path):
|
|
from atm import validate as validate_mod
|
|
|
|
img_path = _write_blank_png(tmp_path, "yellow_sample.png")
|
|
labels = _write_labels(
|
|
tmp_path,
|
|
[{"path": str(img_path), "expected": "yellow", "note": "test"}],
|
|
)
|
|
|
|
def fake_step(self, ts, frame=None):
|
|
return DetectionResult(
|
|
ts=ts,
|
|
window_found=True,
|
|
dot_found=True,
|
|
rgb=(250, 250, 5),
|
|
match=ColorMatch(name="yellow", distance=6.0, confidence=0.94),
|
|
accepted=True,
|
|
color="yellow",
|
|
)
|
|
|
|
monkeypatch.setattr("atm.detector.Detector.step", fake_step)
|
|
|
|
report = validate_mod.validate_calibration(labels, _make_config())
|
|
|
|
assert report.total == 1
|
|
assert report.passed == 1
|
|
assert report.failed == 0
|
|
assert report.all_pass is True
|
|
rec = report.records[0]
|
|
assert rec.passed is True
|
|
assert rec.detected == "yellow"
|
|
assert rec.expected == "yellow"
|
|
assert "[PASS]" in report.render()
|
|
|
|
# CLI wiring: exit 0
|
|
import atm.main as _main
|
|
|
|
class _Args:
|
|
label_file = labels
|
|
|
|
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
|
|
with pytest.raises(SystemExit) as exc_info:
|
|
_main._cmd_validate_calibration(_Args())
|
|
assert exc_info.value.code == 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 18: FAIL path — Detector returns wrong color; report lists top 3
|
|
# candidates and a SUGGESTIONS line with RGB distance.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_validate_calibration_fail_reports_top_candidates(monkeypatch, tmp_path):
|
|
from atm import validate as validate_mod
|
|
|
|
img_path = _write_blank_png(tmp_path, "dark_red_sample.png")
|
|
labels = _write_labels(
|
|
tmp_path,
|
|
[{"path": str(img_path), "expected": "dark_red", "note": "missed dark_red"}],
|
|
)
|
|
|
|
# Observed RGB closer to gray than dark_red (like the real 2026-04-17 miss).
|
|
def fake_step(self, ts, frame=None):
|
|
return DetectionResult(
|
|
ts=ts,
|
|
window_found=True,
|
|
dot_found=True,
|
|
rgb=(135, 62, 67),
|
|
match=ColorMatch(name="gray", distance=45.0, confidence=0.12),
|
|
accepted=True,
|
|
color="gray",
|
|
)
|
|
|
|
monkeypatch.setattr("atm.detector.Detector.step", fake_step)
|
|
|
|
report = validate_mod.validate_calibration(labels, _make_config())
|
|
|
|
assert report.total == 1
|
|
assert report.failed == 1
|
|
assert report.all_pass is False
|
|
|
|
rec = report.records[0]
|
|
assert rec.passed is False
|
|
assert rec.detected == "gray"
|
|
assert rec.expected == "dark_red"
|
|
# Top 3 candidates populated (name, score) sorted by RGB distance.
|
|
assert len(rec.top3) == 3
|
|
names = [n for n, _ in rec.top3]
|
|
# dark_red should appear in top candidates since observed RGB(135,62,67)
|
|
# is reasonably close to dark_red(165,42,42).
|
|
assert "dark_red" in names
|
|
|
|
rendered = report.render()
|
|
assert "[FAIL]" in rendered
|
|
assert "Top 3 candidates:" in rendered
|
|
assert "SUGGESTIONS:" in rendered
|
|
# The suggestion must mention the expected color's RGB and the measured distance.
|
|
assert "dark_red" in rendered
|
|
assert "(165, 42, 42)" in rendered
|
|
|
|
# CLI wiring: exit 1
|
|
import atm.main as _main
|
|
|
|
class _Args:
|
|
label_file = labels
|
|
|
|
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
|
|
with pytest.raises(SystemExit) as exc_info:
|
|
_main._cmd_validate_calibration(_Args())
|
|
assert exc_info.value.code == 1
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 19: missing label file — clean error, non-zero exit, no stack trace
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_validate_calibration_file_not_found(monkeypatch, tmp_path, capsys):
|
|
from atm import validate as validate_mod
|
|
|
|
missing = tmp_path / "nope.json"
|
|
|
|
# Library-level: raises ValidationError (not bare FileNotFoundError).
|
|
with pytest.raises(validate_mod.ValidationError) as exc_info:
|
|
validate_mod.validate_calibration(missing, _make_config())
|
|
assert "not found" in str(exc_info.value).lower()
|
|
|
|
# CLI-level: graceful sys.exit with non-zero code, message on stderr.
|
|
import atm.main as _main
|
|
|
|
class _Args:
|
|
label_file = missing
|
|
|
|
monkeypatch.setattr("atm.config.Config.load_current", classmethod(lambda cls, d: _make_config()))
|
|
with pytest.raises(SystemExit) as exc_info:
|
|
_main._cmd_validate_calibration(_Args())
|
|
assert exc_info.value.code != 0
|
|
err = capsys.readouterr().err
|
|
assert "not found" in err.lower()
|
|
# Ensure no python traceback leaked through.
|
|
assert "Traceback" not in err
|