feat: complete Faza 1 implementation (105 tests green)
All 12 modules built per reviewed plan: - detector, state_machine (5-state phased FSM), canary, levels Phase B - notifier fanout (Discord + Telegram, bounded queue, retry, dead-letter) - audit (JSONL daily rotation), journal, report (weekly R-multiple PnL) - calibrate + labeler (Tk, lazy-imported), dryrun with acceptance gate - unified CLI: atm calibrate|label|dryrun|run|journal|report README + Phase 2 prop-firm TOS audit checklist included. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
224
tests/test_dryrun.py
Normal file
224
tests/test_dryrun.py
Normal file
@@ -0,0 +1,224 @@
|
||||
"""Tests for atm.dryrun."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from atm.config import CanaryRegion, ColorSpec, Config, DiscordCfg, ROI, TelegramCfg, YAxisCalib
|
||||
from atm.dryrun import ConfusionMatrix, DryrunResult, dryrun
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config fixture
|
||||
#
|
||||
# The 6x6 dot at x=250..255, y=50..55 in a 100x300 frame is sampled by
|
||||
# pixel_rgb(box=3) over a 7x7 patch: 24 dot pixels + 25 background (0,0,0).
|
||||
# Sampled RGB = int(true_RGB * 24/49). Config colors match the sampled values
|
||||
# so classify_pixel returns the correct label.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SCALE = 24 / 49 # fraction of dot pixels in the 7x7 sample box
|
||||
|
||||
# True BGR paint values → sampled RGB ≈ int(true_RGB * _SCALE)
|
||||
_SAMPLED_RGB: dict[str, tuple[int, int, int]] = {
|
||||
"turquoise": (0, 97, 97), # true (0, 200, 200)
|
||||
"yellow": (124, 124, 0), # true (255, 255, 0)
|
||||
"dark_green": (0, 48, 0), # true (0, 100, 0)
|
||||
"dark_red": (68, 0, 0), # true (139, 0, 0)
|
||||
"light_green": (70, 116, 70), # true (144, 238, 144)
|
||||
"light_red": (124, 89, 94), # true (255, 182, 193)
|
||||
"gray": (62, 62, 62), # true (128, 128, 128)
|
||||
}
|
||||
|
||||
# True RGB values used when painting frames (before sampling dilution)
|
||||
_TRUE_RGB: dict[str, tuple[int, int, int]] = {
|
||||
"turquoise": (0, 200, 200),
|
||||
"yellow": (255, 255, 0),
|
||||
"dark_green": (0, 100, 0),
|
||||
"dark_red": (139, 0, 0),
|
||||
"light_green": (144, 238, 144),
|
||||
"light_red": (255, 182, 193),
|
||||
"gray": (128, 128, 128),
|
||||
}
|
||||
|
||||
|
||||
def _make_config() -> Config:
|
||||
colors = {
|
||||
name: ColorSpec(rgb=rgb, tolerance=5)
|
||||
for name, rgb in _SAMPLED_RGB.items()
|
||||
}
|
||||
colors["background"] = ColorSpec(rgb=(0, 0, 0), tolerance=5)
|
||||
return Config(
|
||||
window_title="test",
|
||||
dot_roi=ROI(x=0, y=0, w=300, h=100),
|
||||
chart_roi=ROI(x=0, y=0, w=300, h=100),
|
||||
colors=colors,
|
||||
y_axis=YAxisCalib(p1_y=0, p1_price=100.0, p2_y=100, p2_price=0.0),
|
||||
canary=CanaryRegion(
|
||||
roi=ROI(x=0, y=0, w=10, h=10),
|
||||
baseline_phash="0" * 64,
|
||||
),
|
||||
discord=DiscordCfg(webhook_url="http://localhost/fake"),
|
||||
telegram=TelegramCfg(bot_token="fake_token", chat_id="123"),
|
||||
debounce_depth=1,
|
||||
)
|
||||
|
||||
|
||||
def _make_dot_frame(rgb: tuple[int, int, int]) -> np.ndarray:
|
||||
"""100x300 BGR frame with a 6x6 dot at x=250,y=50."""
|
||||
frame = np.zeros((100, 300, 3), dtype=np.uint8)
|
||||
frame[50:56, 250:256] = (rgb[2], rgb[1], rgb[0]) # BGR
|
||||
return frame
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Confusion matrix unit test — pure math, no cv2/detector
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_confusion_matrix_math() -> None:
|
||||
cm = ConfusionMatrix()
|
||||
cm.add("A", "A")
|
||||
cm.add("A", "A")
|
||||
cm.add("A", "B") # FN for A, FP for B
|
||||
cm.add("B", "B")
|
||||
|
||||
per = cm.per_label()
|
||||
|
||||
# A: support=3, TP=2, FP=0 (B never predicted as A), FN=1
|
||||
assert per["A"]["support"] == 3.0
|
||||
assert per["A"]["precision"] == pytest.approx(1.0) # TP/(TP+FP) = 2/2
|
||||
assert per["A"]["recall"] == pytest.approx(2 / 3)
|
||||
assert per["A"]["f1"] == pytest.approx(2 * 1.0 * (2 / 3) / (1.0 + 2 / 3))
|
||||
|
||||
# B: support=1, TP=1, FP=1 (one A was predicted as B), FN=0
|
||||
assert per["B"]["support"] == 1.0
|
||||
assert per["B"]["precision"] == pytest.approx(0.5) # TP/(TP+FP) = 1/2
|
||||
assert per["B"]["recall"] == pytest.approx(1.0)
|
||||
|
||||
# Overall accuracy: 3 correct out of 4
|
||||
assert cm.overall_accuracy() == pytest.approx(3 / 4)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2-5: integration tests that require atm.detector
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dryrun_perfect_match(tmp_path: Path) -> None:
|
||||
pytest.importorskip("atm.detector")
|
||||
cfg = _make_config()
|
||||
colors_6 = ["turquoise", "yellow", "dark_green", "dark_red", "light_green", "light_red"]
|
||||
|
||||
import cv2
|
||||
|
||||
labels: dict[str, str] = {}
|
||||
for idx, name in enumerate(colors_6):
|
||||
frame = _make_dot_frame(_TRUE_RGB[name])
|
||||
cv2.imwrite(str(tmp_path / f"{idx}.png"), frame)
|
||||
labels[str(idx)] = name
|
||||
|
||||
labels_path = tmp_path / "labels.json"
|
||||
labels_path.write_text(json.dumps(labels))
|
||||
|
||||
result = dryrun(tmp_path, labels_path, cfg)
|
||||
|
||||
assert result.n_samples == 6
|
||||
assert result.n_labeled == 6
|
||||
assert result.precision_overall == pytest.approx(1.0)
|
||||
assert result.recall_overall == pytest.approx(1.0)
|
||||
assert result.acceptance_pass is True
|
||||
|
||||
# Diagonal-only: each label predicts only itself
|
||||
per = result.confusion.per_label()
|
||||
for name in colors_6:
|
||||
assert result.confusion.counts[name] == {name: 1}, (
|
||||
f"Expected diagonal for {name}, got {result.confusion.counts[name]}"
|
||||
)
|
||||
assert all(m["precision"] == pytest.approx(1.0) for m in per.values())
|
||||
assert all(m["recall"] == pytest.approx(1.0) for m in per.values())
|
||||
|
||||
|
||||
def test_dryrun_with_unlabeled_sample(tmp_path: Path) -> None:
|
||||
pytest.importorskip("atm.detector")
|
||||
cfg = _make_config()
|
||||
|
||||
import cv2
|
||||
|
||||
# Write 3 labeled frames + 1 unlabeled
|
||||
labels: dict[str, str] = {}
|
||||
for idx, name in enumerate(["turquoise", "yellow", "dark_green"]):
|
||||
frame = _make_dot_frame(_TRUE_RGB[name])
|
||||
cv2.imwrite(str(tmp_path / f"{idx}.png"), frame)
|
||||
labels[str(idx)] = name
|
||||
|
||||
# Frame "3" exists on disk but has NO label entry
|
||||
unlabeled_frame = _make_dot_frame(_TRUE_RGB["dark_red"])
|
||||
cv2.imwrite(str(tmp_path / "3.png"), unlabeled_frame)
|
||||
|
||||
labels_path = tmp_path / "labels.json"
|
||||
labels_path.write_text(json.dumps(labels))
|
||||
|
||||
result = dryrun(tmp_path, labels_path, cfg)
|
||||
|
||||
assert result.n_samples == 4 # 4 PNGs on disk
|
||||
assert result.n_labeled == 3 # only 3 labeled
|
||||
# "3" not in confusion
|
||||
assert "3" not in result.confusion.counts
|
||||
# Only the 3 labeled colors appear
|
||||
assert set(result.confusion.counts.keys()) == {"turquoise", "yellow", "dark_green"}
|
||||
|
||||
|
||||
def test_dryrun_misclassification_fails_gate(tmp_path: Path) -> None:
|
||||
pytest.importorskip("atm.detector")
|
||||
cfg = _make_config()
|
||||
|
||||
import cv2
|
||||
|
||||
colors_6 = ["turquoise", "yellow", "dark_green", "dark_red", "light_green", "light_red"]
|
||||
labels: dict[str, str] = {}
|
||||
for idx, name in enumerate(colors_6):
|
||||
frame = _make_dot_frame(_TRUE_RGB[name])
|
||||
cv2.imwrite(str(tmp_path / f"{idx}.png"), frame)
|
||||
labels[str(idx)] = name
|
||||
|
||||
# Swap label of frame 0 (turquoise dot → labeled as "yellow")
|
||||
labels["0"] = "yellow"
|
||||
|
||||
labels_path = tmp_path / "labels.json"
|
||||
labels_path.write_text(json.dumps(labels))
|
||||
|
||||
result = dryrun(tmp_path, labels_path, cfg)
|
||||
|
||||
assert result.acceptance_pass is False
|
||||
# recall for "yellow" drops: one yellow-labeled frame predicted as turquoise
|
||||
per = result.confusion.per_label()
|
||||
assert per["yellow"]["recall"] < 1.0
|
||||
|
||||
|
||||
def test_fire_event_captured(tmp_path: Path) -> None:
|
||||
pytest.importorskip("atm.detector")
|
||||
cfg = _make_config()
|
||||
|
||||
import cv2
|
||||
|
||||
# Sequence that triggers a BUY fire: turquoise → gray → dark_green → light_green
|
||||
sequence = ["turquoise", "gray", "dark_green", "light_green"]
|
||||
labels: dict[str, str] = {}
|
||||
for idx, name in enumerate(sequence):
|
||||
frame = _make_dot_frame(_TRUE_RGB[name])
|
||||
cv2.imwrite(str(tmp_path / f"{idx}.png"), frame)
|
||||
labels[str(idx)] = name
|
||||
|
||||
labels_path = tmp_path / "labels.json"
|
||||
labels_path.write_text(json.dumps(labels))
|
||||
|
||||
result = dryrun(tmp_path, labels_path, cfg)
|
||||
|
||||
assert len(result.fire_events) == 1
|
||||
ev = result.fire_events[0]
|
||||
assert ev["direction"] == "BUY"
|
||||
assert ev["ts"] == pytest.approx(15.0) # i=3 → ts=3*5.0
|
||||
assert ev["sample"] == "3"
|
||||
Reference in New Issue
Block a user