All 12 modules built per reviewed plan: - detector, state_machine (5-state phased FSM), canary, levels Phase B - notifier fanout (Discord + Telegram, bounded queue, retry, dead-letter) - audit (JSONL daily rotation), journal, report (weekly R-multiple PnL) - calibrate + labeler (Tk, lazy-imported), dryrun with acceptance gate - unified CLI: atm calibrate|label|dryrun|run|journal|report README + Phase 2 prop-firm TOS audit checklist included. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
"""Tests for atm.labeler."""
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from atm.labeler import LabelStore, accuracy
|
|
|
|
|
|
def test_label_store_roundtrip(tmp_path: Path) -> None:
|
|
path = tmp_path / "labels.json"
|
|
store = LabelStore(path)
|
|
store["img1.png"] = "turquoise"
|
|
store["img2.png"] = "yellow"
|
|
store.save()
|
|
|
|
store2 = LabelStore(path)
|
|
assert store2["img1.png"] == "turquoise"
|
|
assert store2["img2.png"] == "yellow"
|
|
assert store2.as_dict() == {"img1.png": "turquoise", "img2.png": "yellow"}
|
|
|
|
|
|
def test_accuracy_perfect() -> None:
|
|
labels = {"a.png": "turquoise", "b.png": "yellow", "c.png": "gray"}
|
|
predicted = {"a.png": "turquoise", "b.png": "yellow", "c.png": "gray"}
|
|
result = accuracy(labels, predicted)
|
|
|
|
assert result["accuracy"] == pytest.approx(1.0)
|
|
assert result["turquoise_precision"] == pytest.approx(1.0)
|
|
assert result["turquoise_recall"] == pytest.approx(1.0)
|
|
assert result["yellow_f1"] == pytest.approx(1.0)
|
|
assert result["gray_f1"] == pytest.approx(1.0)
|
|
|
|
|
|
def test_accuracy_partial() -> None:
|
|
# a=turquoise correct, b=turquoise predicted yellow (FN turquoise / FP yellow), c=yellow correct
|
|
labels = {"a.png": "turquoise", "b.png": "turquoise", "c.png": "yellow"}
|
|
predicted = {"a.png": "turquoise", "b.png": "yellow", "c.png": "yellow"}
|
|
|
|
result = accuracy(labels, predicted)
|
|
|
|
# 2 out of 3 correct
|
|
assert result["accuracy"] == pytest.approx(2 / 3)
|
|
|
|
# turquoise: tp=1, fp=0, fn=1 → precision=1.0, recall=0.5, f1=2/3
|
|
assert result["turquoise_precision"] == pytest.approx(1.0)
|
|
assert result["turquoise_recall"] == pytest.approx(0.5)
|
|
assert result["turquoise_f1"] == pytest.approx(2 / 3)
|
|
|
|
# yellow: tp=1, fp=1, fn=0 → precision=0.5, recall=1.0, f1=2/3
|
|
assert result["yellow_precision"] == pytest.approx(0.5)
|
|
assert result["yellow_recall"] == pytest.approx(1.0)
|
|
assert result["yellow_f1"] == pytest.approx(2 / 3)
|