scripts: regenerate_md + stats + tests (116-144 passing across modules)

2026-05-13 12:45:05 +03:00
parent ce80151c58
commit 26d084dc4b
6 changed files with 1843 additions and 283 deletions
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -0,0 +1,469 @@
+"""Tests for scripts/stats.py."""
+
+from __future__ import annotations
+
+import csv
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from scripts.append_row import CSV_COLUMNS  # noqa: E402
+from scripts.stats import (  # noqa: E402
+    BACKTEST_SOURCES,
+    CORE_CALIBRATION_FIELDS,
+    bootstrap_ci,
+    calibration_mismatch,
+    compute_group_stats,
+    expectancy,
+    format_calibration_report,
+    format_report,
+    group_by,
+    load_trades,
+    main,
+    win_rate,
+    wilson_ci,
+)
+
+
+# ---------------------------------------------------------------------------
+# Synthetic CSV fixture: 30 trades
+# ---------------------------------------------------------------------------
+
+
+def _base_row(**overrides) -> dict[str, str]:
+    base = {
+        "id": "0",
+        "screenshot_file": "",
+        "source": "vision",
+        "data": "2026-05-13",
+        "zi": "Mi",
+        "ora_ro": "17:30",
+        "ora_utc": "14:30",
+        "instrument": "DIA",
+        "directie": "Buy",
+        "tf_mare": "5min",
+        "tf_mic": "1min",
+        "calitate": "Clară",
+        "entry": "400.0",
+        "sl": "399.0",
+        "tp0": "400.5",
+        "tp1": "401.0",
+        "tp2": "402.0",
+        "risc_pct": "0.25",
+        "outcome_path": "TP0→TP1",
+        "max_reached": "TP1",
+        "be_moved": "True",
+        "pl_marius": "0.5000",
+        "pl_theoretical": "0.3330",
+        "set": "A2",
+        "indicator_version": "v-2026-05",
+        "pl_overlay_version": "marius-v1",
+        "csv_schema_version": "1",
+        "extracted_at": "2026-05-13T10:00:00Z",
+        "note": "",
+    }
+    base.update({k: str(v) for k, v in overrides.items()})
+    return base
+
+
+def _write_csv(path: Path, rows: list[dict[str, str]]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8", newline="") as fh:
+        w = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS))
+        w.writeheader()
+        for r in rows:
+            w.writerow({k: r.get(k, "") for k in CSV_COLUMNS})
+
+
+def _synthetic_30(tmp_path: Path) -> Path:
+    """30 vision-source trades engineered for known stats.
+
+    Layout (by Set):
+      - A1: 10 trades — 6 wins TP0->TP1 (+0.5), 4 losses SL (-1.0) → WR 60%
+      - A2: 10 trades — 7 wins TP0->TP2 (+0.5), 3 losses SL (-1.0) → WR 70%
+      - A3: 10 trades — 4 wins TP0->TP1 (+0.5), 6 losses SL (-1.0) → WR 40%
+
+    Overall: 17 wins / 30, WR ≈ 56.67%.
+    """
+    rows: list[dict[str, str]] = []
+    rid = 0
+
+    def add(set_label: str, n_win: int, n_loss: int, calitate: str = "Clară") -> None:
+        nonlocal rid
+        for _ in range(n_win):
+            rid += 1
+            rows.append(
+                _base_row(
+                    id=rid,
+                    screenshot_file=f"win-{rid}.png",
+                    set=set_label,
+                    calitate=calitate,
+                    outcome_path="TP0→TP1",
+                    max_reached="TP1",
+                    be_moved="True",
+                    pl_marius="0.5000",
+                    pl_theoretical="0.3330",
+                )
+            )
+        for _ in range(n_loss):
+            rid += 1
+            rows.append(
+                _base_row(
+                    id=rid,
+                    screenshot_file=f"loss-{rid}.png",
+                    set=set_label,
+                    calitate=calitate,
+                    outcome_path="SL",
+                    max_reached="SL_first",
+                    be_moved="False",
+                    pl_marius="-1.0000",
+                    pl_theoretical="-1.0000",
+                )
+            )
+
+    add("A1", 6, 4)
+    add("A2", 7, 3)
+    add("A3", 4, 6)
+
+    path = tmp_path / "jurnal.csv"
+    _write_csv(path, rows)
+    return path
+
+
+# ---------------------------------------------------------------------------
+# Wilson CI — reference values
+# ---------------------------------------------------------------------------
+
+
+class TestWilsonCI:
+    def test_n_zero(self) -> None:
+        assert wilson_ci(0, 0) == (0.0, 0.0)
+
+    def test_50pct_at_n40(self) -> None:
+        lo, hi = wilson_ci(20, 40)
+        assert lo == pytest.approx(0.3519927879709976, abs=1e-9)
+        assert hi == pytest.approx(0.6480072120290024, abs=1e-9)
+
+    def test_55pct_at_n40(self) -> None:
+        lo, hi = wilson_ci(22, 40)
+        assert lo == pytest.approx(0.3982882988844078, abs=1e-9)
+        assert hi == pytest.approx(0.6929492471905531, abs=1e-9)
+
+    def test_55pct_at_n100(self) -> None:
+        # Larger N tightens the CI; lower bound rises above 45%.
+        lo, hi = wilson_ci(55, 100)
+        assert lo == pytest.approx(0.4524442703164345, abs=1e-9)
+        assert hi == pytest.approx(0.6438562489359655, abs=1e-9)
+        assert lo > 0.45  # STOPPING_RULE GO-LIVE gate
+
+    def test_zero_wins(self) -> None:
+        lo, hi = wilson_ci(0, 10)
+        assert lo == pytest.approx(0.0, abs=1e-12)
+        assert hi == pytest.approx(0.2775401687666165, abs=1e-9)
+
+    def test_all_wins(self) -> None:
+        lo, hi = wilson_ci(10, 10)
+        assert lo == pytest.approx(0.7224598312333834, abs=1e-9)
+        assert hi == pytest.approx(1.0, abs=1e-12)
+
+    def test_wins_out_of_range(self) -> None:
+        with pytest.raises(ValueError):
+            wilson_ci(11, 10)
+        with pytest.raises(ValueError):
+            wilson_ci(-1, 10)
+
+
+# ---------------------------------------------------------------------------
+# Bootstrap CI — determinism + sanity
+# ---------------------------------------------------------------------------
+
+
+class TestBootstrapCI:
+    def test_deterministic_with_seed(self) -> None:
+        vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5]
+        lo1, hi1 = bootstrap_ci(vals, iterations=500, seed=42)
+        lo2, hi2 = bootstrap_ci(vals, iterations=500, seed=42)
+        assert (lo1, hi1) == (lo2, hi2)
+
+    def test_different_seed_different_result(self) -> None:
+        vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5]
+        r1 = bootstrap_ci(vals, iterations=500, seed=1)
+        r2 = bootstrap_ci(vals, iterations=500, seed=2)
+        assert r1 != r2
+
+    def test_brackets_the_mean(self) -> None:
+        vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5] * 5
+        mean = sum(vals) / len(vals)
+        lo, hi = bootstrap_ci(vals, iterations=1000, seed=7)
+        assert lo <= mean <= hi
+
+    def test_empty_input(self) -> None:
+        assert bootstrap_ci([], iterations=100, seed=0) == (0.0, 0.0)
+
+    def test_single_value(self) -> None:
+        lo, hi = bootstrap_ci([0.5], iterations=100, seed=0)
+        # No variance with n=1: short-circuited to (mean, mean).
+        assert lo == pytest.approx(0.5)
+        assert hi == pytest.approx(0.5)
+
+
+# ---------------------------------------------------------------------------
+# Loading + group stats on the 30-trade fixture
+# ---------------------------------------------------------------------------
+
+
+class TestSyntheticFixture:
+    def test_load_30(self, tmp_path: Path) -> None:
+        path = _synthetic_30(tmp_path)
+        trades = load_trades(path)
+        assert len(trades) == 30
+        assert all(t.source == "vision" for t in trades)
+
+    def test_overall_wr(self, tmp_path: Path) -> None:
+        trades = load_trades(_synthetic_30(tmp_path))
+        wins, n, wr = win_rate(trades)
+        assert wins == 17
+        assert n == 30
+        assert wr == pytest.approx(17 / 30)
+
+    def test_overall_expectancy(self, tmp_path: Path) -> None:
+        trades = load_trades(_synthetic_30(tmp_path))
+        # 17 wins * 0.5 + 13 losses * -1.0 = 8.5 - 13.0 = -4.5 → mean = -0.15
+        assert expectancy(trades) == pytest.approx(-0.15, abs=1e-9)
+
+    def test_per_set_wr(self, tmp_path: Path) -> None:
+        trades = load_trades(_synthetic_30(tmp_path))
+        by_set = group_by(trades, "set")
+        wr_a1 = win_rate(by_set["A1"])[2]
+        wr_a2 = win_rate(by_set["A2"])[2]
+        wr_a3 = win_rate(by_set["A3"])[2]
+        assert wr_a1 == pytest.approx(0.60)
+        assert wr_a2 == pytest.approx(0.70)
+        assert wr_a3 == pytest.approx(0.40)
+
+    def test_group_stats_a2(self, tmp_path: Path) -> None:
+        trades = load_trades(_synthetic_30(tmp_path))
+        a2 = [t for t in trades if t.set == "A2"]
+        s = compute_group_stats(
+            a2, label="A2", bootstrap_iterations=500, seed=11
+        )
+        assert s.n_total == 10
+        assert s.n_resolved == 10
+        assert s.wins == 7
+        assert s.wr == pytest.approx(0.70)
+        # Wilson 7/10
+        assert s.wr_ci_lo == pytest.approx(0.3967732199795652, abs=1e-9)
+        assert s.wr_ci_hi == pytest.approx(0.892210712513788, abs=1e-9)
+        # Expectancy A2 = 7*0.5 + 3*(-1.0) = 0.5 → mean = 0.05
+        assert s.exp_marius == pytest.approx(0.05, abs=1e-9)
+        assert s.exp_marius_ci_lo <= s.exp_marius <= s.exp_marius_ci_hi
+
+
+# ---------------------------------------------------------------------------
+# Pending-trade handling
+# ---------------------------------------------------------------------------
+
+
+class TestPendingHandling:
+    def test_pending_excluded_from_wr(self, tmp_path: Path) -> None:
+        rows = [
+            _base_row(
+                id=1, screenshot_file="a.png",
+                outcome_path="TP0→TP1", max_reached="TP1",
+                be_moved="True", pl_marius="0.5000", pl_theoretical="0.3330",
+            ),
+            _base_row(
+                id=2, screenshot_file="b.png",
+                outcome_path="pending", max_reached="TP0",
+                be_moved="False", pl_marius="", pl_theoretical="0.1330",
+            ),
+            _base_row(
+                id=3, screenshot_file="c.png",
+                outcome_path="SL", max_reached="SL_first",
+                be_moved="False", pl_marius="-1.0000", pl_theoretical="-1.0000",
+            ),
+        ]
+        p = tmp_path / "j.csv"
+        _write_csv(p, rows)
+        trades = load_trades(p)
+
+        wins, n, wr = win_rate(trades)
+        assert wins == 1
+        assert n == 2  # pending excluded
+        assert wr == pytest.approx(0.5)
+        # Expectancy on pl_marius averages only resolved rows: (0.5 + -1.0) / 2 = -0.25
+        assert expectancy(trades, "pl_marius") == pytest.approx(-0.25)
+
+
+# ---------------------------------------------------------------------------
+# Source filtering: calibration rows excluded from main report
+# ---------------------------------------------------------------------------
+
+
+class TestSourceFiltering:
+    def test_calibration_rows_excluded_from_backtest_stats(
+        self, tmp_path: Path
+    ) -> None:
+        rows = [
+            _base_row(id=1, source="vision", screenshot_file="v.png",
+                      pl_marius="0.5000"),
+            _base_row(id=2, source="manual", screenshot_file="m.png",
+                      pl_marius="0.5000"),
+            _base_row(id=3, source="manual_calibration", screenshot_file="c.png",
+                      pl_marius="-1.0000"),
+            _base_row(id=4, source="vision_calibration", screenshot_file="c.png",
+                      pl_marius="-1.0000"),
+        ]
+        p = tmp_path / "j.csv"
+        _write_csv(p, rows)
+        trades = load_trades(p)
+        backtest = [t for t in trades if t.source in BACKTEST_SOURCES]
+        assert len(backtest) == 2
+        wins, n, wr = win_rate(backtest)
+        assert (wins, n) == (2, 2)
+        assert wr == pytest.approx(1.0)
+
+
+# ---------------------------------------------------------------------------
+# Calibration mode: pairing + mismatch
+# ---------------------------------------------------------------------------
+
+
+class TestCalibration:
+    def test_pairs_and_zero_mismatch(self, tmp_path: Path) -> None:
+        m = _base_row(
+            id=1, source="manual_calibration", screenshot_file="cal-1.png"
+        )
+        v = _base_row(
+            id=2, source="vision_calibration", screenshot_file="cal-1.png"
+        )
+        p = tmp_path / "j.csv"
+        _write_csv(p, [m, v])
+        trades = load_trades(p)
+        rep = calibration_mismatch(trades)
+        assert rep.pairs == 1
+        assert sum(rep.field_mismatches.values()) == 0
+        assert rep.overall_mismatch_rate == 0.0
+
+    def test_one_field_mismatch(self, tmp_path: Path) -> None:
+        m = _base_row(
+            id=1, source="manual_calibration", screenshot_file="cal-1.png",
+            entry="400.0",
+        )
+        v = _base_row(
+            id=2, source="vision_calibration", screenshot_file="cal-1.png",
+            entry="400.10",  # different entry
+        )
+        p = tmp_path / "j.csv"
+        _write_csv(p, [m, v])
+        trades = load_trades(p)
+        rep = calibration_mismatch(trades)
+        assert rep.pairs == 1
+        assert rep.field_mismatches["entry"] == 1
+        # all other core fields match
+        for fld in CORE_CALIBRATION_FIELDS:
+            if fld == "entry":
+                continue
+            assert rep.field_mismatches[fld] == 0
+        # 1 mismatch / (1 pair * 8 fields) = 12.5%
+        assert rep.overall_mismatch_rate == pytest.approx(1.0 / len(CORE_CALIBRATION_FIELDS))
+
+    def test_unpaired_rows_ignored(self, tmp_path: Path) -> None:
+        # Only a manual leg — no pair → 0 pairs.
+        m = _base_row(
+            id=1, source="manual_calibration", screenshot_file="lonely.png"
+        )
+        p = tmp_path / "j.csv"
+        _write_csv(p, [m])
+        trades = load_trades(p)
+        rep = calibration_mismatch(trades)
+        assert rep.pairs == 0
+        assert rep.total_comparisons == 0
+        assert rep.overall_mismatch_rate == 0.0
+
+    def test_numeric_equivalence_tolerated(self, tmp_path: Path) -> None:
+        """'400' and '400.0000' should NOT count as a mismatch on entry."""
+        m = _base_row(
+            id=1, source="manual_calibration", screenshot_file="cal-1.png",
+            entry="400",
+        )
+        v = _base_row(
+            id=2, source="vision_calibration", screenshot_file="cal-1.png",
+            entry="400.0000",
+        )
+        p = tmp_path / "j.csv"
+        _write_csv(p, [m, v])
+        rep = calibration_mismatch(load_trades(p))
+        assert rep.field_mismatches["entry"] == 0
+
+
+# ---------------------------------------------------------------------------
+# Report formatting + CLI
+# ---------------------------------------------------------------------------
+
+
+class TestReporting:
+    def test_format_report_contains_sections(self, tmp_path: Path) -> None:
+        out = format_report(
+            load_trades(_synthetic_30(tmp_path)),
+            bootstrap_iterations=200,
+            seed=0,
+        )
+        assert "M2D Backtest Stats" in out
+        assert "Overall" in out
+        assert "By Set" in out
+        assert "A1" in out and "A2" in out and "A3" in out
+        # calitate warning present
+        assert "descriptor only" in out.lower() or "biased" in out.lower()
+
+    def test_format_calibration_report(self, tmp_path: Path) -> None:
+        rows = [
+            _base_row(
+                id=1, source="manual_calibration", screenshot_file="cal-1.png"
+            ),
+            _base_row(
+                id=2, source="vision_calibration", screenshot_file="cal-1.png",
+                directie="Sell",  # mismatch on directie
+                entry="400.0", sl="401.0", tp0="399.5", tp1="399.0", tp2="398.0",
+            ),
+        ]
+        p = tmp_path / "j.csv"
+        _write_csv(p, rows)
+        out = format_calibration_report(load_trades(p))
+        assert "Paired screenshots" in out
+        assert "directie" in out
+        # 1 mismatch (directie) of 8 fields = 12.5% → FAIL P4 gate
+        assert "FAIL" in out
+
+    def test_empty_csv_report(self, tmp_path: Path) -> None:
+        p = tmp_path / "empty.csv"
+        _write_csv(p, [])
+        out = format_report(load_trades(p))
+        assert "no backtest trades" in out.lower()
+
+    def test_main_cli_runs(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture
+    ) -> None:
+        path = _synthetic_30(tmp_path)
+        rc = main(["--csv", str(path), "--seed", "0", "--bootstrap-iterations", "100"])
+        assert rc == 0
+        captured = capsys.readouterr()
+        assert "M2D Backtest Stats" in captured.out
+
+    def test_main_cli_calibration(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture
+    ) -> None:
+        rows = [
+            _base_row(id=1, source="manual_calibration", screenshot_file="cal-1.png"),
+            _base_row(id=2, source="vision_calibration", screenshot_file="cal-1.png"),
+        ]
+        p = tmp_path / "j.csv"
+        _write_csv(p, rows)
+        rc = main(["--csv", str(p), "--calibration"])
+        assert rc == 0
+        out = capsys.readouterr().out
+        assert "Calibration P4 gate" in out
+        assert "PASS" in out  # all fields match → PASS