commands: m2d-log + backtest + batch + stats slash commands (124 tests pass)

2026-05-13 12:48:26 +03:00
parent 26d084dc4b
commit 34af5b631e
7 changed files with 1111 additions and 730 deletions
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -1,4 +1,5 @@
-"""Tests for scripts/stats.py."""
+"""CSV-fixture tests for scripts.stats — compute_stats, render_stats,
+compute_calibration, render_calibration, main()."""

 from __future__ import annotations

@@ -12,24 +13,17 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

 from scripts.append_row import CSV_COLUMNS  # noqa: E402
 from scripts.stats import (  # noqa: E402
-    BACKTEST_SOURCES,
    CORE_CALIBRATION_FIELDS,
-    bootstrap_ci,
-    calibration_mismatch,
-    compute_group_stats,
-    expectancy,
-    format_calibration_report,
-    format_report,
-    group_by,
-    load_trades,
+    compute_calibration,
+    compute_stats,
    main,
-    win_rate,
-    wilson_ci,
+    render_calibration,
+    render_stats,
 )


 # ---------------------------------------------------------------------------
-# Synthetic CSV fixture: 30 trades
+# Fixture row builder
 # ---------------------------------------------------------------------------


@@ -78,55 +72,61 @@ def _write_csv(path: Path, rows: list[dict[str, str]]) -> None:
            w.writerow({k: r.get(k, "") for k in CSV_COLUMNS})


-def _synthetic_30(tmp_path: Path) -> Path:
-    """30 vision-source trades engineered for known stats.
+# Outcome templates (P/L values) — match scripts.pl_calc tables.
+_SL = {"outcome_path": "SL", "max_reached": "SL_first", "be_moved": "False",
+       "pl_marius": "-1.0000", "pl_theoretical": "-1.0000"}
+_TP0_SL_BE = {"outcome_path": "TP0→SL", "max_reached": "TP0", "be_moved": "True",
+              "pl_marius": "0.2000", "pl_theoretical": "0.1330"}
+_TP0_TP1 = {"outcome_path": "TP0→TP1", "max_reached": "TP1", "be_moved": "True",
+            "pl_marius": "0.5000", "pl_theoretical": "0.3330"}
+_TP0_TP2 = {"outcome_path": "TP0→TP2", "max_reached": "TP2", "be_moved": "True",
+            "pl_marius": "0.5000", "pl_theoretical": "0.6670"}
+_PENDING = {"outcome_path": "pending", "max_reached": "TP0", "be_moved": "False",
+            "pl_marius": "", "pl_theoretical": "0.1330"}

-    Layout (by Set):
-      - A1: 10 trades — 6 wins TP0->TP1 (+0.5), 4 losses SL (-1.0) → WR 60%
-      - A2: 10 trades — 7 wins TP0->TP2 (+0.5), 3 losses SL (-1.0) → WR 70%
-      - A3: 10 trades — 4 wins TP0->TP1 (+0.5), 6 losses SL (-1.0) → WR 40%

-    Overall: 17 wins / 30, WR ≈ 56.67%.
+def _synthetic_csv(tmp_path: Path) -> Path:
+    """30-trade backtest fixture.
+
+    Set distribution:
+      A1: 8 rows (all closed; 3 SL, 2 TP0→SL, 2 TP0→TP1, 1 TP0→TP2)
+      A2: 10 rows (all closed; 4 SL, 3 TP0→SL, 2 TP0→TP1, 1 TP0→TP2)
+      B : 7 rows (2 pending, 5 closed; 2 SL, 2 TP0→TP1, 1 TP0→TP2)
+      D : 5 rows (3 pending, 2 closed; 1 SL, 1 TP0→TP1)
+
+    Totals: n_total=30, n_pending=5, n_closed=25.
+
+    Wins by pl_marius (>0): all TP0→SL_BE + TP0→TP1 + TP0→TP2
+      A1: 2 + 2 + 1 = 5 wins / 8
+      A2: 3 + 2 + 1 = 6 wins / 10
+      B : 0 + 2 + 1 = 3 wins / 5
+      D : 0 + 1 + 0 = 1 win  / 2
+      Total wins = 15 / 25 = 60.0%.
+
+    Calitate distribution: half "Clară", half "Slabă" (alternating).
+    Directie distribution: 2/3 Buy, 1/3 Sell.
    """
    rows: list[dict[str, str]] = []
    rid = 0

-    def add(set_label: str, n_win: int, n_loss: int, calitate: str = "Clară") -> None:
+    def add(set_label: str, outcomes: list[dict[str, str]]) -> None:
        nonlocal rid
-        for _ in range(n_win):
+        for i, outcome in enumerate(outcomes):
            rid += 1
-            rows.append(
-                _base_row(
-                    id=rid,
-                    screenshot_file=f"win-{rid}.png",
-                    set=set_label,
-                    calitate=calitate,
-                    outcome_path="TP0→TP1",
-                    max_reached="TP1",
-                    be_moved="True",
-                    pl_marius="0.5000",
-                    pl_theoretical="0.3330",
-                )
-            )
-        for _ in range(n_loss):
-            rid += 1
-            rows.append(
-                _base_row(
-                    id=rid,
-                    screenshot_file=f"loss-{rid}.png",
-                    set=set_label,
-                    calitate=calitate,
-                    outcome_path="SL",
-                    max_reached="SL_first",
-                    be_moved="False",
-                    pl_marius="-1.0000",
-                    pl_theoretical="-1.0000",
-                )
+            row = _base_row(
+                id=rid,
+                screenshot_file=f"{set_label.lower()}-{rid}.png",
+                set=set_label,
+                calitate="Clară" if rid % 2 == 0 else "Slabă",
+                directie="Buy" if rid % 3 != 0 else "Sell",
            )
+            row.update({k: str(v) for k, v in outcome.items()})
+            rows.append(row)

-    add("A1", 6, 4)
-    add("A2", 7, 3)
-    add("A3", 4, 6)
+    add("A1", [_SL] * 3 + [_TP0_SL_BE] * 2 + [_TP0_TP1] * 2 + [_TP0_TP2] * 1)
+    add("A2", [_SL] * 4 + [_TP0_SL_BE] * 3 + [_TP0_TP1] * 2 + [_TP0_TP2] * 1)
+    add("B", [_PENDING] * 2 + [_SL] * 2 + [_TP0_TP1] * 2 + [_TP0_TP2] * 1)
+    add("D", [_PENDING] * 3 + [_SL] * 1 + [_TP0_TP1] * 1)

    path = tmp_path / "jurnal.csv"
    _write_csv(path, rows)
@@ -134,336 +134,314 @@ def _synthetic_30(tmp_path: Path) -> Path:


 # ---------------------------------------------------------------------------
-# Wilson CI — reference values
+# compute_stats — core
 # ---------------------------------------------------------------------------


-class TestWilsonCI:
-    def test_n_zero(self) -> None:
-        assert wilson_ci(0, 0) == (0.0, 0.0)
+class TestComputeStats:
+    def test_compute_stats_n_pending(self, tmp_path: Path) -> None:
+        path = _synthetic_csv(tmp_path)
+        s = compute_stats(path)
+        assert s["n_total"] == 30
+        assert s["n_pending"] == 5
+        assert s["n_closed"] == 25

-    def test_50pct_at_n40(self) -> None:
-        lo, hi = wilson_ci(20, 40)
-        assert lo == pytest.approx(0.3519927879709976, abs=1e-9)
-        assert hi == pytest.approx(0.6480072120290024, abs=1e-9)
+    def test_compute_stats_wr_correct(self, tmp_path: Path) -> None:
+        """Manual win count: 15 / 25 = 60.0%."""
+        path = _synthetic_csv(tmp_path)
+        s = compute_stats(path)
+        assert s["wr"] == pytest.approx(15 / 25)
+        lo, hi = s["wr_ci_95"]
+        assert 0.0 <= lo <= s["wr"] <= hi <= 1.0

-    def test_55pct_at_n40(self) -> None:
-        lo, hi = wilson_ci(22, 40)
-        assert lo == pytest.approx(0.3982882988844078, abs=1e-9)
-        assert hi == pytest.approx(0.6929492471905531, abs=1e-9)
+    def test_compute_stats_per_set(self, tmp_path: Path) -> None:
+        path = _synthetic_csv(tmp_path)
+        s = compute_stats(path)
+        a2 = s["per_set"]["A2"]
+        assert a2["n"] == 10  # 10 closed A2 trades
+        # A2 wins (pl_marius > 0): 3 BE + 2 TP1 + 1 TP2 = 6 / 10
+        assert a2["wr"] == pytest.approx(0.60)

-    def test_55pct_at_n100(self) -> None:
-        # Larger N tightens the CI; lower bound rises above 45%.
-        lo, hi = wilson_ci(55, 100)
-        assert lo == pytest.approx(0.4524442703164345, abs=1e-9)
-        assert hi == pytest.approx(0.6438562489359655, abs=1e-9)
-        assert lo > 0.45  # STOPPING_RULE GO-LIVE gate
+    def test_per_set_b_pending_excluded(self, tmp_path: Path) -> None:
+        """Set B has 7 total rows (2 pending + 5 closed). n must be 5."""
+        path = _synthetic_csv(tmp_path)
+        s = compute_stats(path)
+        assert s["per_set"]["B"]["n"] == 5
+        # B wins: 0 BE + 2 TP1 + 1 TP2 = 3 / 5
+        assert s["per_set"]["B"]["wr"] == pytest.approx(0.60)

-    def test_zero_wins(self) -> None:
-        lo, hi = wilson_ci(0, 10)
-        assert lo == pytest.approx(0.0, abs=1e-12)
-        assert hi == pytest.approx(0.2775401687666165, abs=1e-9)
+    def test_per_directie_no_ci_keys(self, tmp_path: Path) -> None:
+        """per_directie omits CI fields per spec (only n / wr / expectancy)."""
+        path = _synthetic_csv(tmp_path)
+        s = compute_stats(path)
+        for k, d in s["per_directie"].items():
+            assert set(d.keys()) == {"n", "wr", "expectancy"}, k

-    def test_all_wins(self) -> None:
-        lo, hi = wilson_ci(10, 10)
-        assert lo == pytest.approx(0.7224598312333834, abs=1e-9)
-        assert hi == pytest.approx(1.0, abs=1e-12)
+    def test_overlay_theoretical_vs_marius(self, tmp_path: Path) -> None:
+        path = _synthetic_csv(tmp_path)
+        s_m = compute_stats(path, overlay="pl_marius")
+        s_t = compute_stats(path, overlay="pl_theoretical")
+        # Same N, but different expectancy.
+        assert s_m["n_closed"] == s_t["n_closed"]
+        assert s_m["expectancy"] != s_t["expectancy"]

-    def test_wins_out_of_range(self) -> None:
+    def test_unknown_overlay_raises(self, tmp_path: Path) -> None:
+        path = _synthetic_csv(tmp_path)
        with pytest.raises(ValueError):
-            wilson_ci(11, 10)
-        with pytest.raises(ValueError):
-            wilson_ci(-1, 10)
+            compute_stats(path, overlay="pl_imaginary")

+    def test_empty_csv_no_crash(self, tmp_path: Path) -> None:
+        path = tmp_path / "empty.csv"
+        _write_csv(path, [])
+        s = compute_stats(path)
+        assert s["n_total"] == 0
+        assert s["n_closed"] == 0
+        assert s["per_set"] == {}
+        assert s["wr"] == 0.0
+        assert s["wr_ci_95"] == (0.0, 0.0)

-# ---------------------------------------------------------------------------
-# Bootstrap CI — determinism + sanity
-# ---------------------------------------------------------------------------
+    def test_missing_csv_no_crash(self, tmp_path: Path) -> None:
+        # Nonexistent path: treat as empty, do not raise.
+        s = compute_stats(tmp_path / "ghost.csv")
+        assert s["n_total"] == 0

-
-class TestBootstrapCI:
-    def test_deterministic_with_seed(self) -> None:
-        vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5]
-        lo1, hi1 = bootstrap_ci(vals, iterations=500, seed=42)
-        lo2, hi2 = bootstrap_ci(vals, iterations=500, seed=42)
-        assert (lo1, hi1) == (lo2, hi2)
-
-    def test_different_seed_different_result(self) -> None:
-        vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5]
-        r1 = bootstrap_ci(vals, iterations=500, seed=1)
-        r2 = bootstrap_ci(vals, iterations=500, seed=2)
-        assert r1 != r2
-
-    def test_brackets_the_mean(self) -> None:
-        vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5] * 5
-        mean = sum(vals) / len(vals)
-        lo, hi = bootstrap_ci(vals, iterations=1000, seed=7)
-        assert lo <= mean <= hi
-
-    def test_empty_input(self) -> None:
-        assert bootstrap_ci([], iterations=100, seed=0) == (0.0, 0.0)
-
-    def test_single_value(self) -> None:
-        lo, hi = bootstrap_ci([0.5], iterations=100, seed=0)
-        # No variance with n=1: short-circuited to (mean, mean).
-        assert lo == pytest.approx(0.5)
-        assert hi == pytest.approx(0.5)
-
-
-# ---------------------------------------------------------------------------
-# Loading + group stats on the 30-trade fixture
-# ---------------------------------------------------------------------------
-
-
-class TestSyntheticFixture:
-    def test_load_30(self, tmp_path: Path) -> None:
-        path = _synthetic_30(tmp_path)
-        trades = load_trades(path)
-        assert len(trades) == 30
-        assert all(t.source == "vision" for t in trades)
-
-    def test_overall_wr(self, tmp_path: Path) -> None:
-        trades = load_trades(_synthetic_30(tmp_path))
-        wins, n, wr = win_rate(trades)
-        assert wins == 17
-        assert n == 30
-        assert wr == pytest.approx(17 / 30)
-
-    def test_overall_expectancy(self, tmp_path: Path) -> None:
-        trades = load_trades(_synthetic_30(tmp_path))
-        # 17 wins * 0.5 + 13 losses * -1.0 = 8.5 - 13.0 = -4.5 → mean = -0.15
-        assert expectancy(trades) == pytest.approx(-0.15, abs=1e-9)
-
-    def test_per_set_wr(self, tmp_path: Path) -> None:
-        trades = load_trades(_synthetic_30(tmp_path))
-        by_set = group_by(trades, "set")
-        wr_a1 = win_rate(by_set["A1"])[2]
-        wr_a2 = win_rate(by_set["A2"])[2]
-        wr_a3 = win_rate(by_set["A3"])[2]
-        assert wr_a1 == pytest.approx(0.60)
-        assert wr_a2 == pytest.approx(0.70)
-        assert wr_a3 == pytest.approx(0.40)
-
-    def test_group_stats_a2(self, tmp_path: Path) -> None:
-        trades = load_trades(_synthetic_30(tmp_path))
-        a2 = [t for t in trades if t.set == "A2"]
-        s = compute_group_stats(
-            a2, label="A2", bootstrap_iterations=500, seed=11
-        )
-        assert s.n_total == 10
-        assert s.n_resolved == 10
-        assert s.wins == 7
-        assert s.wr == pytest.approx(0.70)
-        # Wilson 7/10
-        assert s.wr_ci_lo == pytest.approx(0.3967732199795652, abs=1e-9)
-        assert s.wr_ci_hi == pytest.approx(0.892210712513788, abs=1e-9)
-        # Expectancy A2 = 7*0.5 + 3*(-1.0) = 0.5 → mean = 0.05
-        assert s.exp_marius == pytest.approx(0.05, abs=1e-9)
-        assert s.exp_marius_ci_lo <= s.exp_marius <= s.exp_marius_ci_hi
-
-
-# ---------------------------------------------------------------------------
-# Pending-trade handling
-# ---------------------------------------------------------------------------
-
-
-class TestPendingHandling:
-    def test_pending_excluded_from_wr(self, tmp_path: Path) -> None:
+    def test_calibration_rows_excluded(self, tmp_path: Path) -> None:
        rows = [
-            _base_row(
-                id=1, screenshot_file="a.png",
-                outcome_path="TP0→TP1", max_reached="TP1",
-                be_moved="True", pl_marius="0.5000", pl_theoretical="0.3330",
-            ),
-            _base_row(
-                id=2, screenshot_file="b.png",
-                outcome_path="pending", max_reached="TP0",
-                be_moved="False", pl_marius="", pl_theoretical="0.1330",
-            ),
-            _base_row(
-                id=3, screenshot_file="c.png",
-                outcome_path="SL", max_reached="SL_first",
-                be_moved="False", pl_marius="-1.0000", pl_theoretical="-1.0000",
-            ),
+            _base_row(id=1, source="vision", screenshot_file="v.png"),
+            _base_row(id=2, source="manual_calibration", screenshot_file="c.png"),
+            _base_row(id=3, source="vision_calibration", screenshot_file="c.png"),
        ]
-        p = tmp_path / "j.csv"
-        _write_csv(p, rows)
-        trades = load_trades(p)
-
-        wins, n, wr = win_rate(trades)
-        assert wins == 1
-        assert n == 2  # pending excluded
-        assert wr == pytest.approx(0.5)
-        # Expectancy on pl_marius averages only resolved rows: (0.5 + -1.0) / 2 = -0.25
-        assert expectancy(trades, "pl_marius") == pytest.approx(-0.25)
+        path = tmp_path / "j.csv"
+        _write_csv(path, rows)
+        s = compute_stats(path)
+        assert s["n_total"] == 1  # calibration rows filtered out


 # ---------------------------------------------------------------------------
-# Source filtering: calibration rows excluded from main report
+# render_stats
 # ---------------------------------------------------------------------------


-class TestSourceFiltering:
-    def test_calibration_rows_excluded_from_backtest_stats(
-        self, tmp_path: Path
-    ) -> None:
-        rows = [
-            _base_row(id=1, source="vision", screenshot_file="v.png",
-                      pl_marius="0.5000"),
-            _base_row(id=2, source="manual", screenshot_file="m.png",
-                      pl_marius="0.5000"),
-            _base_row(id=3, source="manual_calibration", screenshot_file="c.png",
-                      pl_marius="-1.0000"),
-            _base_row(id=4, source="vision_calibration", screenshot_file="c.png",
-                      pl_marius="-1.0000"),
-        ]
-        p = tmp_path / "j.csv"
-        _write_csv(p, rows)
-        trades = load_trades(p)
-        backtest = [t for t in trades if t.source in BACKTEST_SOURCES]
-        assert len(backtest) == 2
-        wins, n, wr = win_rate(backtest)
-        assert (wins, n) == (2, 2)
-        assert wr == pytest.approx(1.0)
+class TestRenderStats:
+    def test_render_stats_no_crash(self, tmp_path: Path) -> None:
+        path = _synthetic_csv(tmp_path)
+        s = compute_stats(path)
+        out = render_stats(s, "pl_marius")
+        assert isinstance(out, str)
+        assert out  # non-empty
+        assert "STOPPING RULE" in out
+
+    def test_render_stats_contains_sections(self, tmp_path: Path) -> None:
+        path = _synthetic_csv(tmp_path)
+        out = render_stats(compute_stats(path), "pl_marius")
+        for marker in (
+            "Stats jurnal.csv",
+            "Trade-uri totale",
+            "GLOBAL",
+            "PER SET:",
+            "PER CALITATE",
+            "PER DIRECȚIE",
+            "DESCRIPTOR ONLY",
+        ):
+            assert marker in out, f"missing section: {marker!r}"
+
+    def test_render_stats_flags_under_threshold(self, tmp_path: Path) -> None:
+        """All Sets in synthetic fixture have N<40 → all should be flagged."""
+        path = _synthetic_csv(tmp_path)
+        out = render_stats(compute_stats(path), "pl_marius")
+        for k in ("A1", "A2", "B", "D"):
+            assert f"{k}: N=" in out
+            assert "NEEDS MORE DATA" in out
+
+    def test_render_stats_empty(self, tmp_path: Path) -> None:
+        path = tmp_path / "empty.csv"
+        _write_csv(path, [])
+        out = render_stats(compute_stats(path), "pl_marius")
+        assert "Trade-uri totale: 0" in out
+        # No crash, no per-Set table for an empty dataset.
+        assert "NEEDS MORE DATA" not in out


 # ---------------------------------------------------------------------------
-# Calibration mode: pairing + mismatch
+# compute_calibration
 # ---------------------------------------------------------------------------


-class TestCalibration:
-    def test_pairs_and_zero_mismatch(self, tmp_path: Path) -> None:
-        m = _base_row(
-            id=1, source="manual_calibration", screenshot_file="cal-1.png"
-        )
-        v = _base_row(
-            id=2, source="vision_calibration", screenshot_file="cal-1.png"
-        )
-        p = tmp_path / "j.csv"
-        _write_csv(p, [m, v])
-        trades = load_trades(p)
-        rep = calibration_mismatch(trades)
-        assert rep.pairs == 1
-        assert sum(rep.field_mismatches.values()) == 0
-        assert rep.overall_mismatch_rate == 0.0
-
-    def test_one_field_mismatch(self, tmp_path: Path) -> None:
-        m = _base_row(
-            id=1, source="manual_calibration", screenshot_file="cal-1.png",
-            entry="400.0",
-        )
-        v = _base_row(
-            id=2, source="vision_calibration", screenshot_file="cal-1.png",
-            entry="400.10",  # different entry
-        )
-        p = tmp_path / "j.csv"
-        _write_csv(p, [m, v])
-        trades = load_trades(p)
-        rep = calibration_mismatch(trades)
-        assert rep.pairs == 1
-        assert rep.field_mismatches["entry"] == 1
-        # all other core fields match
+class TestComputeCalibration:
+    def test_compute_calibration_pairs(self, tmp_path: Path) -> None:
+        rows: list[dict[str, str]] = []
+        for i in range(5):
+            f = f"cal-{i}.png"
+            rows.append(_base_row(
+                id=i * 2 + 1, source="manual_calibration", screenshot_file=f
+            ))
+            rows.append(_base_row(
+                id=i * 2 + 2, source="vision_calibration", screenshot_file=f
+            ))
+        path = tmp_path / "j.csv"
+        _write_csv(path, rows)
+        cal = compute_calibration(path)
+        assert cal["n_pairs"] == 5
        for fld in CORE_CALIBRATION_FIELDS:
-            if fld == "entry":
-                continue
-            assert rep.field_mismatches[fld] == 0
-        # 1 mismatch / (1 pair * 8 fields) = 12.5%
-        assert rep.overall_mismatch_rate == pytest.approx(1.0 / len(CORE_CALIBRATION_FIELDS))
+            assert fld in cal["fields"]
+            # All identical → 5 matches, 0 mismatches per field.
+            assert cal["fields"][fld]["match"] == 5
+            assert cal["fields"][fld]["mismatch"] == 0
+            assert cal["fields"][fld]["match_rate"] == pytest.approx(1.0)

-    def test_unpaired_rows_ignored(self, tmp_path: Path) -> None:
-        # Only a manual leg — no pair → 0 pairs.
-        m = _base_row(
-            id=1, source="manual_calibration", screenshot_file="lonely.png"
-        )
-        p = tmp_path / "j.csv"
-        _write_csv(p, [m])
-        trades = load_trades(p)
-        rep = calibration_mismatch(trades)
-        assert rep.pairs == 0
-        assert rep.total_comparisons == 0
-        assert rep.overall_mismatch_rate == 0.0
+    def test_compute_calibration_mismatch_examples(self, tmp_path: Path) -> None:
+        """Modify entry on 2 pairs → mismatch_examples contains both."""
+        rows: list[dict[str, str]] = []
+        for i in range(5):
+            f = f"cal-{i}.png"
+            manual_entry = "400.0"
+            # First two pairs differ on entry; the rest match exactly.
+            vision_entry = "401.5" if i < 2 else "400.0"
+            rows.append(_base_row(
+                id=i * 2 + 1, source="manual_calibration",
+                screenshot_file=f, entry=manual_entry,
+            ))
+            rows.append(_base_row(
+                id=i * 2 + 2, source="vision_calibration",
+                screenshot_file=f, entry=vision_entry,
+            ))
+        path = tmp_path / "j.csv"
+        _write_csv(path, rows)
+        cal = compute_calibration(path)
+        assert cal["n_pairs"] == 5
+        entry = cal["fields"]["entry"]
+        assert entry["match"] == 3
+        assert entry["mismatch"] == 2
+        assert entry["match_rate"] == pytest.approx(3 / 5)
+        assert len(entry["mismatch_examples"]) == 2
+        for ex in entry["mismatch_examples"]:
+            assert "manual=" in ex and "vision=" in ex

-    def test_numeric_equivalence_tolerated(self, tmp_path: Path) -> None:
-        """'400' and '400.0000' should NOT count as a mismatch on entry."""
-        m = _base_row(
-            id=1, source="manual_calibration", screenshot_file="cal-1.png",
-            entry="400",
-        )
-        v = _base_row(
-            id=2, source="vision_calibration", screenshot_file="cal-1.png",
-            entry="400.0000",
-        )
-        p = tmp_path / "j.csv"
-        _write_csv(p, [m, v])
-        rep = calibration_mismatch(load_trades(p))
-        assert rep.field_mismatches["entry"] == 0
+    def test_calibration_examples_capped_at_3(self, tmp_path: Path) -> None:
+        """5 mismatches but mismatch_examples is capped at 3."""
+        rows: list[dict[str, str]] = []
+        for i in range(5):
+            f = f"cal-{i}.png"
+            rows.append(_base_row(
+                id=i * 2 + 1, source="manual_calibration",
+                screenshot_file=f, entry="400.0",
+            ))
+            rows.append(_base_row(
+                id=i * 2 + 2, source="vision_calibration",
+                screenshot_file=f, entry="500.0",
+            ))
+        path = tmp_path / "j.csv"
+        _write_csv(path, rows)
+        cal = compute_calibration(path)
+        assert cal["fields"]["entry"]["mismatch"] == 5
+        assert len(cal["fields"]["entry"]["mismatch_examples"]) == 3

-
-# ---------------------------------------------------------------------------
-# Report formatting + CLI
-# ---------------------------------------------------------------------------
-
-
-class TestReporting:
-    def test_format_report_contains_sections(self, tmp_path: Path) -> None:
-        out = format_report(
-            load_trades(_synthetic_30(tmp_path)),
-            bootstrap_iterations=200,
-            seed=0,
-        )
-        assert "M2D Backtest Stats" in out
-        assert "Overall" in out
-        assert "By Set" in out
-        assert "A1" in out and "A2" in out and "A3" in out
-        # calitate warning present
-        assert "descriptor only" in out.lower() or "biased" in out.lower()
-
-    def test_format_calibration_report(self, tmp_path: Path) -> None:
+    def test_calibration_numeric_tolerance(self, tmp_path: Path) -> None:
+        """Floats within 0.01 must NOT count as a mismatch."""
        rows = [
            _base_row(
-                id=1, source="manual_calibration", screenshot_file="cal-1.png"
+                id=1, source="manual_calibration",
+                screenshot_file="cal-1.png", entry="400.005",
            ),
            _base_row(
-                id=2, source="vision_calibration", screenshot_file="cal-1.png",
-                directie="Sell",  # mismatch on directie
-                entry="400.0", sl="401.0", tp0="399.5", tp1="399.0", tp2="398.0",
+                id=2, source="vision_calibration",
+                screenshot_file="cal-1.png", entry="400.010",
            ),
        ]
-        p = tmp_path / "j.csv"
-        _write_csv(p, rows)
-        out = format_calibration_report(load_trades(p))
-        assert "Paired screenshots" in out
+        path = tmp_path / "j.csv"
+        _write_csv(path, rows)
+        cal = compute_calibration(path)
+        assert cal["fields"]["entry"]["match"] == 1
+        assert cal["fields"]["entry"]["mismatch"] == 0
+
+    def test_calibration_outside_tolerance(self, tmp_path: Path) -> None:
+        """Floats > 0.01 apart DO count as a mismatch."""
+        rows = [
+            _base_row(
+                id=1, source="manual_calibration",
+                screenshot_file="cal-1.png", entry="400.00",
+            ),
+            _base_row(
+                id=2, source="vision_calibration",
+                screenshot_file="cal-1.png", entry="400.05",
+            ),
+        ]
+        path = tmp_path / "j.csv"
+        _write_csv(path, rows)
+        cal = compute_calibration(path)
+        assert cal["fields"]["entry"]["mismatch"] == 1
+
+    def test_calibration_no_pairs(self, tmp_path: Path) -> None:
+        """No paired screenshot → n_pairs=0, all rates 0.0."""
+        path = tmp_path / "j.csv"
+        _write_csv(path, [
+            _base_row(id=1, source="manual_calibration", screenshot_file="lonely.png"),
+        ])
+        cal = compute_calibration(path)
+        assert cal["n_pairs"] == 0
+        for fld in CORE_CALIBRATION_FIELDS:
+            assert cal["fields"][fld]["match"] == 0
+            assert cal["fields"][fld]["mismatch"] == 0
+
+    def test_render_calibration_no_crash(self, tmp_path: Path) -> None:
+        rows = [
+            _base_row(id=1, source="manual_calibration",
+                      screenshot_file="cal-1.png", directie="Buy"),
+            _base_row(id=2, source="vision_calibration",
+                      screenshot_file="cal-1.png", directie="Sell",
+                      entry="400.0", sl="401.0", tp0="399.5",
+                      tp1="399.0", tp2="398.0"),
+        ]
+        path = tmp_path / "j.csv"
+        _write_csv(path, rows)
+        out = render_calibration(compute_calibration(path))
+        assert "Calibration P4" in out
        assert "directie" in out
-        # 1 mismatch (directie) of 8 fields = 12.5% → FAIL P4 gate
-        assert "FAIL" in out

-    def test_empty_csv_report(self, tmp_path: Path) -> None:
-        p = tmp_path / "empty.csv"
-        _write_csv(p, [])
-        out = format_report(load_trades(p))
-        assert "no backtest trades" in out.lower()
+    def test_render_calibration_empty(self, tmp_path: Path) -> None:
+        path = tmp_path / "empty.csv"
+        _write_csv(path, [])
+        out = render_calibration(compute_calibration(path))
+        assert "0" in out
+        assert "FAIL" not in out
+        assert "PASS" not in out

-    def test_main_cli_runs(
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+class TestCLI:
+    def test_main_stats(
        self, tmp_path: Path, capsys: pytest.CaptureFixture
    ) -> None:
-        path = _synthetic_30(tmp_path)
-        rc = main(["--csv", str(path), "--seed", "0", "--bootstrap-iterations", "100"])
+        path = _synthetic_csv(tmp_path)
+        rc = main(["--csv", str(path)])
        assert rc == 0
-        captured = capsys.readouterr()
-        assert "M2D Backtest Stats" in captured.out
+        assert "Stats jurnal.csv" in capsys.readouterr().out

-    def test_main_cli_calibration(
+    def test_main_overlay(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture
+    ) -> None:
+        path = _synthetic_csv(tmp_path)
+        rc = main(["--csv", str(path), "--overlay", "pl_theoretical"])
+        assert rc == 0
+        assert "pl_theoretical" in capsys.readouterr().out
+
+    def test_main_calibration(
        self, tmp_path: Path, capsys: pytest.CaptureFixture
    ) -> None:
        rows = [
-            _base_row(id=1, source="manual_calibration", screenshot_file="cal-1.png"),
-            _base_row(id=2, source="vision_calibration", screenshot_file="cal-1.png"),
+            _base_row(id=1, source="manual_calibration",
+                      screenshot_file="cal-1.png"),
+            _base_row(id=2, source="vision_calibration",
+                      screenshot_file="cal-1.png"),
        ]
-        p = tmp_path / "j.csv"
-        _write_csv(p, rows)
-        rc = main(["--csv", str(p), "--calibration"])
+        path = tmp_path / "j.csv"
+        _write_csv(path, rows)
+        rc = main(["--csv", str(path), "--calibration"])
        assert rc == 0
        out = capsys.readouterr().out
-        assert "Calibration P4 gate" in out
-        assert "PASS" in out  # all fields match → PASS
+        assert "Calibration P4" in out
+        assert "PASS" in out