scripts: regenerate_md + stats + tests (116-144 passing across modules)

2026-05-13 12:45:05 +03:00
parent ce80151c58
commit 26d084dc4b
6 changed files with 1843 additions and 283 deletions
--- a/scripts/append_row.py
+++ b/scripts/append_row.py
@@ -1,22 +1,26 @@
-"""Append a validated M2D extraction to ``data/trades.csv``.
+"""Append a validated M2D extraction to ``data/jurnal.csv``.

 Pipeline:
    JSON file --> pydantic validate (M2DExtraction)
-              --> load data/_meta.yaml (versions + schema)
-              --> compute ora_ro, zi, set, pl_marius, pl_theoretical
+              --> load data/_meta.yaml (versions)
+              --> compute id, ora_ro, zi, set, pl_marius, pl_theoretical, extracted_at
              --> dedup on (screenshot_file, source)
-              --> atomic CSV write (temp file + os.replace)
+              --> atomic CSV write (sibling .tmp + os.replace)

 Source values
-    - ``manual``               : Marius logged by hand
    - ``vision``               : produced by the vision subagent
+    - ``manual``               : Marius logged by hand
    - ``manual_calibration``   : calibration P4 — manual leg
    - ``vision_calibration``   : calibration P4 — vision leg

 A row with ``source=manual_calibration`` and a row with ``source=vision_calibration``
-for the *same* screenshot are allowed to coexist (different dedup keys); a
-duplicate ``(screenshot_file, source)`` pair is rejected (or skipped — see
-``append_row`` ``on_duplicate`` argument).
+for the *same* screenshot are allowed to coexist (different dedup keys).
+
+Failure mode: ``append_extraction`` NEVER raises. On any error (missing JSON,
+pydantic ValidationError, dedup hit, etc.) it returns
+``{"status": "rejected", "reason": "...", "id": None, "row": None}`` so the
+caller (a slash command) can decide what to do with the screenshot
+(move to ``needs_review/``, log to workflow, etc.).
 """

 from __future__ import annotations
@@ -24,41 +28,43 @@ from __future__ import annotations
 import csv
 import json
 import os
-import tempfile
+import traceback
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Literal

 import yaml
+from pydantic import ValidationError

 from scripts.calendar_parse import calc_set, load_calendar, utc_to_ro
 from scripts.pl_calc import pl_marius, pl_theoretical
-from scripts.vision_schema import M2DExtraction, parse_extraction_dict
+from scripts.vision_schema import M2DExtraction, parse_extraction

 __all__ = [
    "CSV_COLUMNS",
    "VALID_SOURCES",
-    "build_row",
-    "read_rows",
-    "append_row",
-    "append_row_from_json",
+    "ZI_RO_MAP",
+    "csv_columns",
+    "append_extraction",
 ]


-Source = Literal["manual", "vision", "manual_calibration", "vision_calibration"]
+Source = Literal["vision", "manual", "manual_calibration", "vision_calibration"]

 VALID_SOURCES: frozenset[str] = frozenset(
-    {"manual", "vision", "manual_calibration", "vision_calibration"}
+    {"vision", "manual", "manual_calibration", "vision_calibration"}
 )


+# Canonical column order (29) — must stay stable; regenerate_md + stats depend on it.
 CSV_COLUMNS: tuple[str, ...] = (
+    "id",
    "screenshot_file",
    "source",
    "data",
-    "ora_utc",
-    "ora_ro",
    "zi",
-    "set",
+    "ora_ro",
+    "ora_utc",
    "instrument",
    "directie",
    "tf_mare",
@@ -73,17 +79,38 @@ CSV_COLUMNS: tuple[str, ...] = (
    "outcome_path",
    "max_reached",
    "be_moved",
-    "confidence",
-    "ambiguities",
-    "note",
    "pl_marius",
    "pl_theoretical",
+    "set",
    "indicator_version",
    "pl_overlay_version",
    "csv_schema_version",
+    "extracted_at",
+    "note",
 )


+ZI_RO_MAP: dict[str, str] = {
+    "Mon": "Lu",
+    "Tue": "Ma",
+    "Wed": "Mi",
+    "Thu": "Jo",
+    "Fri": "Vi",
+    "Sat": "Sa",
+    "Sun": "Du",
+}
+
+
+def csv_columns() -> list[str]:
+    """Return the 29-column header in canonical order."""
+    return list(CSV_COLUMNS)
+
+
+# ---------------------------------------------------------------------------
+# helpers
+# ---------------------------------------------------------------------------
+
+
 def _load_meta(meta_path: Path) -> dict[str, Any]:
    with meta_path.open("r", encoding="utf-8") as fh:
        meta = yaml.safe_load(fh) or {}
@@ -94,35 +121,69 @@ def _load_meta(meta_path: Path) -> dict[str, Any]:
    return meta


+def _read_existing_rows(csv_path: Path) -> list[dict[str, str]]:
+    if not csv_path.exists() or csv_path.stat().st_size == 0:
+        return []
+    with csv_path.open("r", encoding="utf-8", newline="") as fh:
+        reader = csv.DictReader(fh)
+        return list(reader)
+
+
+def _next_id(rows: list[dict[str, str]]) -> int:
+    max_id = 0
+    for r in rows:
+        raw = r.get("id", "")
+        if not raw:
+            continue
+        try:
+            v = int(raw)
+        except (TypeError, ValueError):
+            continue
+        if v > max_id:
+            max_id = v
+    return max_id + 1
+
+
 def _format_optional(value: float | None) -> str:
    return "" if value is None else f"{value:.4f}"


-def build_row(
+def _write_csv_atomic(
+    csv_path: Path, rows: list[dict[str, str]], columns: list[str]
+) -> None:
+    csv_path.parent.mkdir(parents=True, exist_ok=True)
+    tmp = csv_path.with_suffix(csv_path.suffix + ".tmp")
+    with tmp.open("w", encoding="utf-8", newline="") as fh:
+        writer = csv.DictWriter(fh, fieldnames=columns)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow({k: row.get(k, "") for k in columns})
+    os.replace(tmp, csv_path)
+
+
+def _build_row(
    extraction: M2DExtraction,
+    *,
    source: str,
+    row_id: int,
    meta: dict[str, Any],
    calendar: list[dict[str, Any]],
+    extracted_at: str,
 ) -> dict[str, str]:
-    """Compute the full CSV row dict for one extraction."""
-    if source not in VALID_SOURCES:
-        raise ValueError(
-            f"invalid source {source!r}; must be one of {sorted(VALID_SOURCES)}"
-        )
-
-    d_ro, t_ro, zi = utc_to_ro(extraction.data, extraction.ora_utc)
-    set_label = calc_set(d_ro, t_ro, zi, calendar)
+    d_ro, t_ro, day_short = utc_to_ro(extraction.data, extraction.ora_utc)
+    set_label = calc_set(d_ro, t_ro, day_short, calendar)
    pl_m = pl_marius(extraction.outcome_path, extraction.be_moved)
    pl_t = pl_theoretical(extraction.max_reached)
+    zi_ro = ZI_RO_MAP[day_short]

    return {
+        "id": str(row_id),
        "screenshot_file": extraction.screenshot_file,
        "source": source,
        "data": extraction.data,
-        "ora_utc": extraction.ora_utc,
+        "zi": zi_ro,
        "ora_ro": t_ro.strftime("%H:%M"),
-        "zi": zi,
-        "set": set_label,
+        "ora_utc": extraction.ora_utc,
        "instrument": extraction.instrument,
        "directie": extraction.directie,
        "tf_mare": extraction.tf_mare,
@@ -136,102 +197,115 @@ def build_row(
        "risc_pct": f"{extraction.risc_pct}",
        "outcome_path": extraction.outcome_path,
        "max_reached": extraction.max_reached,
-        "be_moved": "true" if extraction.be_moved else "false",
-        "confidence": extraction.confidence,
-        "ambiguities": json.dumps(extraction.ambiguities, ensure_ascii=False),
-        "note": extraction.note,
+        "be_moved": str(extraction.be_moved),
        "pl_marius": _format_optional(pl_m),
        "pl_theoretical": _format_optional(pl_t),
+        "set": set_label,
        "indicator_version": str(meta["indicator_version"]),
        "pl_overlay_version": str(meta["pl_overlay_version"]),
        "csv_schema_version": str(meta["csv_schema_version"]),
+        "extracted_at": extracted_at,
+        "note": extraction.note,
    }


-def read_rows(csv_path: Path) -> list[dict[str, str]]:
-    """Read existing rows; return [] if the file does not exist or is empty."""
-    if not csv_path.exists() or csv_path.stat().st_size == 0:
-        return []
-    with csv_path.open("r", encoding="utf-8", newline="") as fh:
-        reader = csv.DictReader(fh)
-        return list(reader)
+def _reject(reason: str) -> dict[str, Any]:
+    return {"status": "rejected", "reason": reason, "id": None, "row": None}


-def _atomic_write(csv_path: Path, rows: list[dict[str, str]]) -> None:
-    csv_path.parent.mkdir(parents=True, exist_ok=True)
-    fd, tmp_name = tempfile.mkstemp(
-        prefix=csv_path.name + ".",
-        suffix=".tmp",
-        dir=str(csv_path.parent),
-    )
-    try:
-        with os.fdopen(fd, "w", encoding="utf-8", newline="") as fh:
-            writer = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS))
-            writer.writeheader()
-            for r in rows:
-                writer.writerow({k: r.get(k, "") for k in CSV_COLUMNS})
-        os.replace(tmp_name, csv_path)
-    except Exception:
-        try:
-            os.unlink(tmp_name)
-        except OSError:
-            pass
-        raise
+# ---------------------------------------------------------------------------
+# public API
+# ---------------------------------------------------------------------------


-def append_row(
-    extraction: M2DExtraction,
+def append_extraction(
+    json_path: Path | str,
    source: str,
-    csv_path: Path,
-    meta_path: Path,
-    calendar_path: Path,
-    on_duplicate: Literal["raise", "skip"] = "raise",
-) -> dict[str, str]:
-    """Append one extraction to the CSV.
+    csv_path: Path | str = "data/jurnal.csv",
+    meta_path: Path | str = "data/_meta.yaml",
+    calendar_path: Path | str = "calendar_evenimente.yaml",
+) -> dict[str, Any]:
+    """Append one validated extraction to the jurnal CSV.

-    Dedup key: ``(screenshot_file, source)``. If a row with the same key
-    already exists, behaviour is controlled by ``on_duplicate``:
+    Never raises. Returns one of:

-    - ``"raise"`` (default): raise ``ValueError``.
-    - ``"skip"``: leave the CSV untouched and return the *existing* row.
+    - ``{"status": "ok", "reason": "", "id": <int>, "row": <dict>}``
+    - ``{"status": "rejected", "reason": <str>, "id": None, "row": None}``
    """
-    meta = _load_meta(meta_path)
-    calendar = load_calendar(calendar_path)
-    row = build_row(extraction, source, meta, calendar)
+    json_path = Path(json_path)
+    csv_path = Path(csv_path)
+    meta_path = Path(meta_path)
+    calendar_path = Path(calendar_path)

-    existing = read_rows(csv_path)
-    key = (row["screenshot_file"], row["source"])
+    if source not in VALID_SOURCES:
+        return _reject(
+            f"invalid source {source!r}; must be one of {sorted(VALID_SOURCES)}"
+        )
+
+    if not json_path.exists():
+        return _reject(f"JSON file not found: {json_path}")
+
+    try:
+        with json_path.open("r", encoding="utf-8") as fh:
+            raw = fh.read()
+    except OSError as exc:
+        return _reject(f"failed to read JSON {json_path}: {exc}")
+
+    try:
+        extraction = parse_extraction(raw)
+    except ValidationError as exc:
+        return _reject(f"validation error: {exc}")
+    except (ValueError, json.JSONDecodeError) as exc:
+        return _reject(f"validation error (json parse): {exc}")
+
+    try:
+        meta = _load_meta(meta_path)
+    except (FileNotFoundError, OSError) as exc:
+        return _reject(f"_meta.yaml not found: {exc}")
+    except (ValueError, yaml.YAMLError) as exc:
+        return _reject(f"_meta.yaml invalid: {exc}")
+
+    try:
+        calendar = load_calendar(calendar_path)
+    except (FileNotFoundError, OSError) as exc:
+        return _reject(f"calendar not found: {exc}")
+    except (ValueError, yaml.YAMLError) as exc:
+        return _reject(f"calendar invalid: {exc}")
+
+    try:
+        existing = _read_existing_rows(csv_path)
+    except OSError as exc:
+        return _reject(f"failed to read existing CSV {csv_path}: {exc}")
+
+    key = (extraction.screenshot_file, source)
    for r in existing:
        if (r.get("screenshot_file"), r.get("source")) == key:
-            if on_duplicate == "skip":
-                return r
-            raise ValueError(
-                f"duplicate row: screenshot_file={key[0]!r} source={key[1]!r} "
-                f"already exists in {csv_path}"
+            return _reject(
+                f"duplicate row: screenshot_file={key[0]!r} source={key[1]!r}"
            )

-    existing.append(row)
-    _atomic_write(csv_path, existing)
-    return row
-
-
-def append_row_from_json(
-    json_path: Path,
-    source: str,
-    csv_path: Path,
-    meta_path: Path,
-    calendar_path: Path,
-    on_duplicate: Literal["raise", "skip"] = "raise",
-) -> dict[str, str]:
-    """Convenience wrapper: load JSON, validate, append."""
-    with Path(json_path).open("r", encoding="utf-8") as fh:
-        payload = json.load(fh)
-    extraction = parse_extraction_dict(payload)
-    return append_row(
-        extraction=extraction,
-        source=source,
-        csv_path=csv_path,
-        meta_path=meta_path,
-        calendar_path=calendar_path,
-        on_duplicate=on_duplicate,
+    row_id = _next_id(existing)
+    extracted_at = (
+        datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S") + "Z"
    )
+
+    try:
+        row = _build_row(
+            extraction,
+            source=source,
+            row_id=row_id,
+            meta=meta,
+            calendar=calendar,
+            extracted_at=extracted_at,
+        )
+    except (KeyError, ValueError) as exc:
+        return _reject(f"derived-field computation failed: {exc}")
+
+    try:
+        _write_csv_atomic(csv_path, [*existing, row], list(CSV_COLUMNS))
+    except OSError as exc:
+        return _reject(
+            f"atomic write failed: {exc}\n{traceback.format_exc()}"
+        )
+
+    return {"status": "ok", "reason": "", "id": row_id, "row": row}
--- a/scripts/regenerate_md.py
+++ b/scripts/regenerate_md.py
@@ -0,0 +1,240 @@
+"""Regenerate ``data/jurnal.md`` from ``data/jurnal.csv``.
+
+CSV is the source of truth (29 columns, schema owned by ``scripts.append_row``).
+MD is a human-readable mirror with a curated 18-column table.
+
+CLI: ``python scripts/regenerate_md.py [csv_path] [md_path]``
+"""
+
+from __future__ import annotations
+
+import csv
+import os
+import sys
+import tempfile
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Sequence
+
+from scripts.append_row import csv_columns
+
+__all__ = ["MD_COLUMNS", "regenerate_md", "main"]
+
+
+MD_COLUMNS: tuple[str, ...] = (
+    "#",
+    "Data",
+    "Zi",
+    "Ora RO",
+    "Set",
+    "Instrument",
+    "Direcție",
+    "Calitate",
+    "Entry",
+    "SL",
+    "TP0",
+    "TP1",
+    "TP2",
+    "outcome_path",
+    "P/L (Marius)",
+    "P/L (theoretic)",
+    "Source",
+    "Note",
+)
+
+
+_CSV_FIELDS_USED: tuple[str, ...] = (
+    "id",
+    "data",
+    "zi",
+    "ora_ro",
+    "set",
+    "instrument",
+    "directie",
+    "calitate",
+    "entry",
+    "sl",
+    "tp0",
+    "tp1",
+    "tp2",
+    "outcome_path",
+    "pl_marius",
+    "pl_theoretical",
+    "source",
+    "note",
+)
+
+
+_DIRECTIE_DISPLAY = {"long": "Buy", "short": "Sell", "buy": "Buy", "sell": "Sell"}
+
+
+def _fmt_pl(value: str) -> str:
+    if value is None or value == "":
+        return "pending"
+    try:
+        return f"{float(value):+.2f}"
+    except ValueError:
+        return value
+
+
+def _fmt_directie(value: str) -> str:
+    if not value:
+        return ""
+    return _DIRECTIE_DISPLAY.get(value.strip().lower(), value)
+
+
+def _escape_cell(value: str) -> str:
+    return (value or "").replace("|", "\\|").replace("\n", " ").strip()
+
+
+def _placeholder_md() -> str:
+    return (
+        "# Jurnal M2D (auto-generated)\n"
+        "\n"
+        "*Niciun trade încă. Adaugă unul prin `/m2d-log` sau `/backtest`.*\n"
+    )
+
+
+def _atomic_write_text(path: Path, content: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp_name = tempfile.mkstemp(
+        prefix=path.name + ".", suffix=".tmp", dir=str(path.parent)
+    )
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8", newline="\n") as fh:
+            fh.write(content)
+        os.replace(tmp_name, path)
+    except Exception:
+        try:
+            os.unlink(tmp_name)
+        except OSError:
+            pass
+        raise
+
+
+def _row_to_cells(row: dict[str, str], display_index: int) -> tuple[str, ...]:
+    g = row.get
+    return (
+        str(display_index),
+        g("data", "") or "",
+        g("zi", "") or "",
+        g("ora_ro", "") or "",
+        g("set", "") or "",
+        g("instrument", "") or "",
+        _fmt_directie(g("directie", "") or ""),
+        g("calitate", "") or "",
+        g("entry", "") or "",
+        g("sl", "") or "",
+        g("tp0", "") or "",
+        g("tp1", "") or "",
+        g("tp2", "") or "",
+        g("outcome_path", "") or "",
+        _fmt_pl(g("pl_marius", "") or ""),
+        _fmt_pl(g("pl_theoretical", "") or ""),
+        g("source", "") or "",
+        g("note", "") or "",
+    )
+
+
+def _render_table(rows: Sequence[dict[str, str]]) -> str:
+    header_line = "| " + " | ".join(MD_COLUMNS) + " |"
+    sep_line = "|" + "|".join(["---"] * len(MD_COLUMNS)) + "|"
+    data_lines = []
+    for i, row in enumerate(rows, start=1):
+        cells = _row_to_cells(row, i)
+        data_lines.append(
+            "| " + " | ".join(_escape_cell(c) for c in cells) + " |"
+        )
+    return "\n".join([header_line, sep_line, *data_lines])
+
+
+def _render_md(rows: Sequence[dict[str, str]]) -> str:
+    if not rows:
+        return _placeholder_md()
+    now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    table = _render_table(rows)
+    return (
+        "# Jurnal M2D (auto-generated from data/jurnal.csv)\n"
+        "\n"
+        f"Generated: {now_iso}\n"
+        f"Rows: {len(rows)}\n"
+        "\n"
+        f"{table}\n"
+        "\n"
+        "*Vezi `data/jurnal.csv` pentru toate cele 29 coloane "
+        "(id, ora_utc, tf_*, risc_pct, be_moved, max_reached, versions, extracted_at).*\n"
+    )
+
+
+def _id_sort_key(raw: str) -> tuple[int, int | str]:
+    try:
+        return (0, int(raw))
+    except (ValueError, TypeError):
+        return (1, raw or "")
+
+
+def _load_rows(csv_path: Path) -> list[dict[str, str]]:
+    """Read CSV, returning rows sorted by id.
+
+    Schema drift handling:
+    - Extra header columns → warning to stderr, dropped.
+    - Missing required header columns → warning to stderr per affected row (row skipped).
+    """
+    if not csv_path.exists() or csv_path.stat().st_size == 0:
+        return []
+
+    expected = set(csv_columns())
+    required = set(_CSV_FIELDS_USED)
+
+    with csv_path.open("r", encoding="utf-8", newline="") as fh:
+        reader = csv.DictReader(fh)
+        header = reader.fieldnames or []
+        header_set = set(header)
+
+        extras = [c for c in header if c not in expected]
+        if extras:
+            print(
+                f"regenerate_md: warning: unknown CSV columns ignored: {extras}",
+                file=sys.stderr,
+            )
+
+        missing_required = required - header_set
+        rows: list[dict[str, str]] = []
+        for raw in reader:
+            if missing_required:
+                print(
+                    f"regenerate_md: warning: row skipped (missing required "
+                    f"columns: {sorted(missing_required)})",
+                    file=sys.stderr,
+                )
+                continue
+            rows.append({k: (raw.get(k) or "") for k in required})
+
+    rows.sort(key=lambda r: _id_sort_key(r.get("id", "")))
+    return rows
+
+
+def regenerate_md(
+    csv_path: Path | str = "data/jurnal.csv",
+    md_path: Path | str = "data/jurnal.md",
+) -> int:
+    """Read CSV → write MD atomically. Returns count of trade rows written."""
+    csv_p = Path(csv_path)
+    md_p = Path(md_path)
+    rows = _load_rows(csv_p)
+    content = _render_md(rows)
+    _atomic_write_text(md_p, content)
+    return len(rows)
+
+
+def main() -> int:
+    args = sys.argv[1:]
+    csv_arg = args[0] if len(args) >= 1 else "data/jurnal.csv"
+    md_arg = args[1] if len(args) >= 2 else "data/jurnal.md"
+    n = regenerate_md(csv_arg, md_arg)
+    print(f"regenerate_md: wrote {md_arg} with {n} row(s)")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/stats.py
+++ b/scripts/stats.py
@@ -0,0 +1,540 @@
+"""Backtest statistics for ``data/jurnal.csv``.
+
+Outputs:
+- Overall + per-Set + per-calitate + per-instrument WR, expectancy.
+- Wilson 95% CI for WR (closed form).
+- Bootstrap percentile 95% CI for expectancy (deterministic via ``seed``).
+- ``--calibration`` mode: joins ``manual_calibration`` rows with their
+  ``vision_calibration`` counterparts on ``screenshot_file`` and reports
+  field-by-field mismatch rates for the P4 gate (see ``STOPPING_RULE.md``).
+
+A "win" is any trade with ``pl_marius > 0``. Pending trades
+(``pl_marius`` blank, i.e. ``outcome_path in {pending, TP0->pending}``) are
+excluded from both WR and expectancy: there is no realised outcome yet.
+
+The ``calitate`` field is a known-biased descriptor (post-outcome
+classification — see ``STOPPING_RULE.md`` §3). It is reported as
+informational only and explicitly flagged as such; do NOT use it as a
+filter for GO LIVE decisions.
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import math
+import random
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Iterable
+
+__all__ = [
+    "CORE_CALIBRATION_FIELDS",
+    "BACKTEST_SOURCES",
+    "CALIBRATION_SOURCES",
+    "Trade",
+    "GroupStats",
+    "load_trades",
+    "wilson_ci",
+    "bootstrap_ci",
+    "win_rate",
+    "expectancy",
+    "group_by",
+    "compute_group_stats",
+    "calibration_mismatch",
+    "format_report",
+    "main",
+]
+
+
+# Fields compared in the calibration mismatch gate (STOPPING_RULE.md §P4).
+CORE_CALIBRATION_FIELDS: tuple[str, ...] = (
+    "entry",
+    "sl",
+    "tp0",
+    "tp1",
+    "tp2",
+    "outcome_path",
+    "max_reached",
+    "directie",
+)
+
+
+BACKTEST_SOURCES: frozenset[str] = frozenset({"vision", "manual"})
+CALIBRATION_SOURCES: frozenset[str] = frozenset(
+    {"manual_calibration", "vision_calibration"}
+)
+
+
+# ---------------------------------------------------------------------------
+# Loading / typed access
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class Trade:
+    """One realised (or pending) trade row, typed."""
+
+    id: int
+    screenshot_file: str
+    source: str
+    data: str
+    zi: str
+    ora_ro: str
+    instrument: str
+    directie: str
+    calitate: str
+    set: str
+    outcome_path: str
+    max_reached: str
+    be_moved: bool
+    pl_marius: float | None
+    pl_theoretical: float
+    raw: dict[str, str] = field(default_factory=dict)
+
+    @property
+    def is_pending(self) -> bool:
+        return self.pl_marius is None
+
+    @property
+    def is_win(self) -> bool:
+        return self.pl_marius is not None and self.pl_marius > 0
+
+
+def _parse_optional_float(value: str) -> float | None:
+    s = (value or "").strip()
+    if s == "":
+        return None
+    return float(s)
+
+
+def _parse_bool(value: str) -> bool:
+    return (value or "").strip().lower() in {"true", "1", "yes", "da"}
+
+
+def _row_to_trade(row: dict[str, str]) -> Trade:
+    return Trade(
+        id=int(row.get("id") or 0),
+        screenshot_file=row.get("screenshot_file", ""),
+        source=row.get("source", ""),
+        data=row.get("data", ""),
+        zi=row.get("zi", ""),
+        ora_ro=row.get("ora_ro", ""),
+        instrument=row.get("instrument", ""),
+        directie=row.get("directie", ""),
+        calitate=row.get("calitate", ""),
+        set=row.get("set", ""),
+        outcome_path=row.get("outcome_path", ""),
+        max_reached=row.get("max_reached", ""),
+        be_moved=_parse_bool(row.get("be_moved", "")),
+        pl_marius=_parse_optional_float(row.get("pl_marius", "")),
+        pl_theoretical=float(row.get("pl_theoretical") or 0.0),
+        raw=dict(row),
+    )
+
+
+def load_trades(csv_path: Path | str) -> list[Trade]:
+    """Load all rows of ``csv_path`` as :class:`Trade` objects.
+
+    Returns ``[]`` if the file does not exist or is empty.
+    """
+    p = Path(csv_path)
+    if not p.exists() or p.stat().st_size == 0:
+        return []
+    with p.open("r", encoding="utf-8", newline="") as fh:
+        reader = csv.DictReader(fh)
+        return [_row_to_trade(r) for r in reader]
+
+
+# ---------------------------------------------------------------------------
+# Statistics primitives
+# ---------------------------------------------------------------------------
+
+
+def wilson_ci(wins: int, n: int, z: float = 1.96) -> tuple[float, float]:
+    """Wilson score interval for a binomial proportion.
+
+    Returns ``(lo, hi)`` as proportions in [0, 1]. For ``n == 0`` returns
+    ``(0.0, 0.0)``. ``z = 1.96`` corresponds to a 95% CI.
+    """
+    if n <= 0:
+        return (0.0, 0.0)
+    if wins < 0 or wins > n:
+        raise ValueError(f"wins={wins} out of range for n={n}")
+    p_hat = wins / n
+    denom = 1.0 + (z * z) / n
+    center = p_hat + (z * z) / (2.0 * n)
+    half = z * math.sqrt((p_hat * (1.0 - p_hat) + (z * z) / (4.0 * n)) / n)
+    lo = (center - half) / denom
+    hi = (center + half) / denom
+    return (max(0.0, lo), min(1.0, hi))
+
+
+def bootstrap_ci(
+    values: list[float],
+    *,
+    iterations: int = 2000,
+    alpha: float = 0.05,
+    seed: int | None = None,
+) -> tuple[float, float]:
+    """Percentile-method bootstrap CI for the mean of ``values``.
+
+    Deterministic when ``seed`` is provided. Returns ``(lo, hi)``. For
+    ``len(values) < 2`` returns ``(mean, mean)``.
+    """
+    if not values:
+        return (0.0, 0.0)
+    n = len(values)
+    mean = sum(values) / n
+    if n < 2 or iterations <= 0:
+        return (mean, mean)
+
+    rng = random.Random(seed)
+    means: list[float] = []
+    for _ in range(iterations):
+        s = 0.0
+        for _ in range(n):
+            s += values[rng.randrange(n)]
+        means.append(s / n)
+    means.sort()
+    lo_idx = int(math.floor((alpha / 2.0) * iterations))
+    hi_idx = int(math.ceil((1.0 - alpha / 2.0) * iterations)) - 1
+    lo_idx = max(0, min(iterations - 1, lo_idx))
+    hi_idx = max(0, min(iterations - 1, hi_idx))
+    return (means[lo_idx], means[hi_idx])
+
+
+def win_rate(trades: Iterable[Trade]) -> tuple[int, int, float]:
+    """Return ``(wins, n_resolved, wr)`` ignoring pending trades."""
+    resolved = [t for t in trades if not t.is_pending]
+    wins = sum(1 for t in resolved if t.is_win)
+    n = len(resolved)
+    wr = (wins / n) if n else 0.0
+    return wins, n, wr
+
+
+def expectancy(trades: Iterable[Trade], overlay: str = "pl_marius") -> float:
+    """Mean P/L (in R) over non-pending trades, on the given overlay."""
+    if overlay not in {"pl_marius", "pl_theoretical"}:
+        raise ValueError(f"unknown overlay {overlay!r}")
+    if overlay == "pl_marius":
+        vals = [t.pl_marius for t in trades if t.pl_marius is not None]
+    else:
+        vals = [t.pl_theoretical for t in trades if not t.is_pending]
+    if not vals:
+        return 0.0
+    return sum(vals) / len(vals)
+
+
+# ---------------------------------------------------------------------------
+# Group stats
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class GroupStats:
+    key: str
+    n_total: int
+    n_resolved: int
+    wins: int
+    wr: float
+    wr_ci_lo: float
+    wr_ci_hi: float
+    exp_marius: float
+    exp_marius_ci_lo: float
+    exp_marius_ci_hi: float
+    exp_theoretical: float
+    exp_theoretical_ci_lo: float
+    exp_theoretical_ci_hi: float
+
+
+def group_by(trades: Iterable[Trade], field_name: str) -> dict[str, list[Trade]]:
+    out: dict[str, list[Trade]] = {}
+    for t in trades:
+        key = getattr(t, field_name, "") or "(blank)"
+        out.setdefault(key, []).append(t)
+    return out
+
+
+def compute_group_stats(
+    trades: list[Trade],
+    *,
+    label: str,
+    bootstrap_iterations: int = 2000,
+    seed: int | None = None,
+) -> GroupStats:
+    wins, n_resolved, wr = win_rate(trades)
+    wr_lo, wr_hi = wilson_ci(wins, n_resolved)
+
+    pl_m_vals = [t.pl_marius for t in trades if t.pl_marius is not None]
+    exp_m = (sum(pl_m_vals) / len(pl_m_vals)) if pl_m_vals else 0.0
+    exp_m_lo, exp_m_hi = bootstrap_ci(
+        pl_m_vals, iterations=bootstrap_iterations, seed=seed
+    )
+
+    pl_t_vals = [t.pl_theoretical for t in trades if not t.is_pending]
+    exp_t = (sum(pl_t_vals) / len(pl_t_vals)) if pl_t_vals else 0.0
+    exp_t_lo, exp_t_hi = bootstrap_ci(
+        pl_t_vals,
+        iterations=bootstrap_iterations,
+        seed=None if seed is None else seed + 1,
+    )
+
+    return GroupStats(
+        key=label,
+        n_total=len(trades),
+        n_resolved=n_resolved,
+        wins=wins,
+        wr=wr,
+        wr_ci_lo=wr_lo,
+        wr_ci_hi=wr_hi,
+        exp_marius=exp_m,
+        exp_marius_ci_lo=exp_m_lo,
+        exp_marius_ci_hi=exp_m_hi,
+        exp_theoretical=exp_t,
+        exp_theoretical_ci_lo=exp_t_lo,
+        exp_theoretical_ci_hi=exp_t_hi,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Calibration mode
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class CalibrationReport:
+    pairs: int
+    field_mismatches: dict[str, int]
+    total_comparisons: int
+
+    @property
+    def overall_mismatch_rate(self) -> float:
+        if self.total_comparisons == 0:
+            return 0.0
+        total = sum(self.field_mismatches.values())
+        return total / self.total_comparisons
+
+
+def _normalise_for_compare(field_name: str, value: str) -> str:
+    s = (value or "").strip()
+    if field_name in {"entry", "sl", "tp0", "tp1", "tp2"}:
+        try:
+            return f"{float(s):.4f}"
+        except ValueError:
+            return s
+    return s
+
+
+def calibration_mismatch(
+    trades: Iterable[Trade],
+    *,
+    fields: tuple[str, ...] = CORE_CALIBRATION_FIELDS,
+) -> CalibrationReport:
+    """Pair ``manual_calibration`` and ``vision_calibration`` rows by
+    ``screenshot_file``, then count mismatches per ``fields``.
+
+    Returns a :class:`CalibrationReport`. Unpaired calibration rows are
+    silently ignored — they cannot contribute to a comparison.
+    """
+    manual: dict[str, Trade] = {}
+    vision: dict[str, Trade] = {}
+    for t in trades:
+        if t.source == "manual_calibration":
+            manual[t.screenshot_file] = t
+        elif t.source == "vision_calibration":
+            vision[t.screenshot_file] = t
+
+    paired_files = sorted(set(manual) & set(vision))
+    field_mismatches: dict[str, int] = {f: 0 for f in fields}
+    for f in paired_files:
+        m = manual[f]
+        v = vision[f]
+        for fld in fields:
+            mv = _normalise_for_compare(fld, m.raw.get(fld, ""))
+            vv = _normalise_for_compare(fld, v.raw.get(fld, ""))
+            if mv != vv:
+                field_mismatches[fld] += 1
+
+    total_comparisons = len(paired_files) * len(fields)
+    return CalibrationReport(
+        pairs=len(paired_files),
+        field_mismatches=field_mismatches,
+        total_comparisons=total_comparisons,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Reporting
+# ---------------------------------------------------------------------------
+
+
+def _fmt_pct(p: float) -> str:
+    return f"{100.0 * p:5.1f}%"
+
+
+def _fmt_r(x: float) -> str:
+    return f"{x:+.3f}R"
+
+
+def _fmt_stats_row(s: GroupStats) -> str:
+    return (
+        f"{s.key:<14} N={s.n_total:>3} (resolved {s.n_resolved:>3})  "
+        f"WR={_fmt_pct(s.wr)} [{_fmt_pct(s.wr_ci_lo)}, {_fmt_pct(s.wr_ci_hi)}]  "
+        f"E_marius={_fmt_r(s.exp_marius)} "
+        f"[{_fmt_r(s.exp_marius_ci_lo)}, {_fmt_r(s.exp_marius_ci_hi)}]  "
+        f"E_theor={_fmt_r(s.exp_theoretical)}"
+    )
+
+
+def format_report(
+    trades: list[Trade],
+    *,
+    bootstrap_iterations: int = 2000,
+    seed: int | None = None,
+) -> str:
+    """Render the main stats report.
+
+    Only ``source in {vision, manual}`` rows are included in the WR /
+    expectancy computations; calibration rows are reported separately via
+    ``--calibration``.
+    """
+    backtest = [t for t in trades if t.source in BACKTEST_SOURCES]
+    lines: list[str] = []
+    lines.append("=== M2D Backtest Stats ===")
+    lines.append(f"Backtest rows: {len(backtest)} (calibration excluded)")
+    lines.append("")
+
+    if not backtest:
+        lines.append("(no backtest trades yet)")
+        return "\n".join(lines)
+
+    overall = compute_group_stats(
+        backtest,
+        label="OVERALL",
+        bootstrap_iterations=bootstrap_iterations,
+        seed=seed,
+    )
+    lines.append("-- Overall --")
+    lines.append(_fmt_stats_row(overall))
+    lines.append("")
+
+    def _emit_group(title: str, field_name: str, key_order: list[str] | None = None) -> None:
+        lines.append(f"-- By {title} --")
+        groups = group_by(backtest, field_name)
+        keys = key_order if key_order is not None else sorted(groups)
+        for k in keys:
+            if k not in groups:
+                continue
+            sub_seed = None if seed is None else seed + abs(hash(k)) % 10_000
+            s = compute_group_stats(
+                groups[k],
+                label=k,
+                bootstrap_iterations=bootstrap_iterations,
+                seed=sub_seed,
+            )
+            lines.append(_fmt_stats_row(s))
+        lines.append("")
+
+    _emit_group(
+        "Set",
+        "set",
+        key_order=["A1", "A2", "A3", "B", "C", "D", "Other"],
+    )
+    _emit_group("Instrument", "instrument")
+    lines.append(
+        "[!] By calitate — descriptor only (post-outcome, biased; do not use "
+        "as a GO LIVE filter — see STOPPING_RULE.md §3)."
+    )
+    _emit_group(
+        "calitate",
+        "calitate",
+        key_order=["Clară", "Mai mare ca impuls", "Slabă", "n/a"],
+    )
+
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def format_calibration_report(trades: list[Trade]) -> str:
+    cal = calibration_mismatch(trades)
+    lines: list[str] = []
+    lines.append("=== Calibration P4 gate ===")
+    lines.append(f"Paired screenshots (manual ∩ vision): {cal.pairs}")
+    if cal.pairs == 0:
+        lines.append("(no calibration pairs yet)")
+        return "\n".join(lines) + "\n"
+
+    lines.append("")
+    lines.append(f"{'field':<14} mismatches / pairs    rate")
+    for fld in CORE_CALIBRATION_FIELDS:
+        m = cal.field_mismatches.get(fld, 0)
+        rate = (m / cal.pairs) if cal.pairs else 0.0
+        lines.append(f"{fld:<14} {m:>3} / {cal.pairs:<3}        {_fmt_pct(rate)}")
+    lines.append("")
+    lines.append(
+        f"Overall mismatch rate: {_fmt_pct(cal.overall_mismatch_rate)} "
+        f"({sum(cal.field_mismatches.values())} of {cal.total_comparisons} comparisons)"
+    )
+    threshold = 0.10
+    verdict = "PASS" if cal.overall_mismatch_rate <= threshold else "FAIL"
+    lines.append(f"P4 gate (<= 10%): {verdict}")
+    return "\n".join(lines) + "\n"
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="stats",
+        description="Backtest statistics for data/jurnal.csv",
+    )
+    parser.add_argument(
+        "--csv",
+        type=Path,
+        default=Path("data/jurnal.csv"),
+        help="Path to the jurnal CSV (default: data/jurnal.csv).",
+    )
+    parser.add_argument(
+        "--calibration",
+        action="store_true",
+        help="Show P4 calibration mismatch report instead of backtest stats.",
+    )
+    parser.add_argument(
+        "--bootstrap-iterations",
+        type=int,
+        default=2000,
+        help="Bootstrap iterations for expectancy CI (default: 2000).",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Seed for the bootstrap RNG (set for deterministic output).",
+    )
+    args = parser.parse_args(argv)
+
+    trades = load_trades(args.csv)
+    if args.calibration:
+        out = format_calibration_report(trades)
+    else:
+        out = format_report(
+            trades,
+            bootstrap_iterations=args.bootstrap_iterations,
+            seed=args.seed,
+        )
+    # Force UTF-8 on stdout: the report contains diacritics ("Clară", "Slabă")
+    # and a console codepage like cp1252 would crash on those.
+    try:
+        sys.stdout.reconfigure(encoding="utf-8")  # type: ignore[attr-defined]
+    except (AttributeError, OSError):
+        pass
+    sys.stdout.write(out)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())