Files
atm/tests/test_labeler.py
Claude Agent bf70ca3ac7 feat: complete Faza 1 implementation (105 tests green)
All 12 modules built per reviewed plan:
- detector, state_machine (5-state phased FSM), canary, levels Phase B
- notifier fanout (Discord + Telegram, bounded queue, retry, dead-letter)
- audit (JSONL daily rotation), journal, report (weekly R-multiple PnL)
- calibrate + labeler (Tk, lazy-imported), dryrun with acceptance gate
- unified CLI: atm calibrate|label|dryrun|run|journal|report

README + Phase 2 prop-firm TOS audit checklist included.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 22:17:41 +00:00

55 lines
1.9 KiB
Python

"""Tests for atm.labeler."""
from __future__ import annotations
from pathlib import Path
import pytest
from atm.labeler import LabelStore, accuracy
def test_label_store_roundtrip(tmp_path: Path) -> None:
path = tmp_path / "labels.json"
store = LabelStore(path)
store["img1.png"] = "turquoise"
store["img2.png"] = "yellow"
store.save()
store2 = LabelStore(path)
assert store2["img1.png"] == "turquoise"
assert store2["img2.png"] == "yellow"
assert store2.as_dict() == {"img1.png": "turquoise", "img2.png": "yellow"}
def test_accuracy_perfect() -> None:
labels = {"a.png": "turquoise", "b.png": "yellow", "c.png": "gray"}
predicted = {"a.png": "turquoise", "b.png": "yellow", "c.png": "gray"}
result = accuracy(labels, predicted)
assert result["accuracy"] == pytest.approx(1.0)
assert result["turquoise_precision"] == pytest.approx(1.0)
assert result["turquoise_recall"] == pytest.approx(1.0)
assert result["yellow_f1"] == pytest.approx(1.0)
assert result["gray_f1"] == pytest.approx(1.0)
def test_accuracy_partial() -> None:
# a=turquoise correct, b=turquoise predicted yellow (FN turquoise / FP yellow), c=yellow correct
labels = {"a.png": "turquoise", "b.png": "turquoise", "c.png": "yellow"}
predicted = {"a.png": "turquoise", "b.png": "yellow", "c.png": "yellow"}
result = accuracy(labels, predicted)
# 2 out of 3 correct
assert result["accuracy"] == pytest.approx(2 / 3)
# turquoise: tp=1, fp=0, fn=1 → precision=1.0, recall=0.5, f1=2/3
assert result["turquoise_precision"] == pytest.approx(1.0)
assert result["turquoise_recall"] == pytest.approx(0.5)
assert result["turquoise_f1"] == pytest.approx(2 / 3)
# yellow: tp=1, fp=1, fn=0 → precision=0.5, recall=1.0, f1=2/3
assert result["yellow_precision"] == pytest.approx(0.5)
assert result["yellow_recall"] == pytest.approx(1.0)
assert result["yellow_f1"] == pytest.approx(2 / 3)