scripts: regenerate_md + stats + tests (116-144 passing across modules)

This commit is contained in:
Marius
2026-05-13 12:45:05 +03:00
parent ce80151c58
commit 26d084dc4b
6 changed files with 1843 additions and 283 deletions

View File

@@ -1,22 +1,26 @@
"""Append a validated M2D extraction to ``data/trades.csv``.
"""Append a validated M2D extraction to ``data/jurnal.csv``.
Pipeline:
JSON file --> pydantic validate (M2DExtraction)
--> load data/_meta.yaml (versions + schema)
--> compute ora_ro, zi, set, pl_marius, pl_theoretical
--> load data/_meta.yaml (versions)
--> compute id, ora_ro, zi, set, pl_marius, pl_theoretical, extracted_at
--> dedup on (screenshot_file, source)
--> atomic CSV write (temp file + os.replace)
--> atomic CSV write (sibling .tmp + os.replace)
Source values
- ``manual`` : Marius logged by hand
- ``vision`` : produced by the vision subagent
- ``manual`` : Marius logged by hand
- ``manual_calibration`` : calibration P4 — manual leg
- ``vision_calibration`` : calibration P4 — vision leg
A row with ``source=manual_calibration`` and a row with ``source=vision_calibration``
for the *same* screenshot are allowed to coexist (different dedup keys); a
duplicate ``(screenshot_file, source)`` pair is rejected (or skipped — see
``append_row`` ``on_duplicate`` argument).
for the *same* screenshot are allowed to coexist (different dedup keys).
Failure mode: ``append_extraction`` NEVER raises. On any error (missing JSON,
pydantic ValidationError, dedup hit, etc.) it returns
``{"status": "rejected", "reason": "...", "id": None, "row": None}`` so the
caller (a slash command) can decide what to do with the screenshot
(move to ``needs_review/``, log to workflow, etc.).
"""
from __future__ import annotations
@@ -24,41 +28,43 @@ from __future__ import annotations
import csv
import json
import os
import tempfile
import traceback
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Literal
import yaml
from pydantic import ValidationError
from scripts.calendar_parse import calc_set, load_calendar, utc_to_ro
from scripts.pl_calc import pl_marius, pl_theoretical
from scripts.vision_schema import M2DExtraction, parse_extraction_dict
from scripts.vision_schema import M2DExtraction, parse_extraction
__all__ = [
"CSV_COLUMNS",
"VALID_SOURCES",
"build_row",
"read_rows",
"append_row",
"append_row_from_json",
"ZI_RO_MAP",
"csv_columns",
"append_extraction",
]
Source = Literal["manual", "vision", "manual_calibration", "vision_calibration"]
Source = Literal["vision", "manual", "manual_calibration", "vision_calibration"]
VALID_SOURCES: frozenset[str] = frozenset(
{"manual", "vision", "manual_calibration", "vision_calibration"}
{"vision", "manual", "manual_calibration", "vision_calibration"}
)
# Canonical column order (29) — must stay stable; regenerate_md + stats depend on it.
CSV_COLUMNS: tuple[str, ...] = (
"id",
"screenshot_file",
"source",
"data",
"ora_utc",
"ora_ro",
"zi",
"set",
"ora_ro",
"ora_utc",
"instrument",
"directie",
"tf_mare",
@@ -73,17 +79,38 @@ CSV_COLUMNS: tuple[str, ...] = (
"outcome_path",
"max_reached",
"be_moved",
"confidence",
"ambiguities",
"note",
"pl_marius",
"pl_theoretical",
"set",
"indicator_version",
"pl_overlay_version",
"csv_schema_version",
"extracted_at",
"note",
)
ZI_RO_MAP: dict[str, str] = {
"Mon": "Lu",
"Tue": "Ma",
"Wed": "Mi",
"Thu": "Jo",
"Fri": "Vi",
"Sat": "Sa",
"Sun": "Du",
}
def csv_columns() -> list[str]:
"""Return the 29-column header in canonical order."""
return list(CSV_COLUMNS)
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
def _load_meta(meta_path: Path) -> dict[str, Any]:
with meta_path.open("r", encoding="utf-8") as fh:
meta = yaml.safe_load(fh) or {}
@@ -94,35 +121,69 @@ def _load_meta(meta_path: Path) -> dict[str, Any]:
return meta
def _read_existing_rows(csv_path: Path) -> list[dict[str, str]]:
if not csv_path.exists() or csv_path.stat().st_size == 0:
return []
with csv_path.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
return list(reader)
def _next_id(rows: list[dict[str, str]]) -> int:
max_id = 0
for r in rows:
raw = r.get("id", "")
if not raw:
continue
try:
v = int(raw)
except (TypeError, ValueError):
continue
if v > max_id:
max_id = v
return max_id + 1
def _format_optional(value: float | None) -> str:
return "" if value is None else f"{value:.4f}"
def build_row(
def _write_csv_atomic(
csv_path: Path, rows: list[dict[str, str]], columns: list[str]
) -> None:
csv_path.parent.mkdir(parents=True, exist_ok=True)
tmp = csv_path.with_suffix(csv_path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8", newline="") as fh:
writer = csv.DictWriter(fh, fieldnames=columns)
writer.writeheader()
for row in rows:
writer.writerow({k: row.get(k, "") for k in columns})
os.replace(tmp, csv_path)
def _build_row(
extraction: M2DExtraction,
*,
source: str,
row_id: int,
meta: dict[str, Any],
calendar: list[dict[str, Any]],
extracted_at: str,
) -> dict[str, str]:
"""Compute the full CSV row dict for one extraction."""
if source not in VALID_SOURCES:
raise ValueError(
f"invalid source {source!r}; must be one of {sorted(VALID_SOURCES)}"
)
d_ro, t_ro, zi = utc_to_ro(extraction.data, extraction.ora_utc)
set_label = calc_set(d_ro, t_ro, zi, calendar)
d_ro, t_ro, day_short = utc_to_ro(extraction.data, extraction.ora_utc)
set_label = calc_set(d_ro, t_ro, day_short, calendar)
pl_m = pl_marius(extraction.outcome_path, extraction.be_moved)
pl_t = pl_theoretical(extraction.max_reached)
zi_ro = ZI_RO_MAP[day_short]
return {
"id": str(row_id),
"screenshot_file": extraction.screenshot_file,
"source": source,
"data": extraction.data,
"ora_utc": extraction.ora_utc,
"zi": zi_ro,
"ora_ro": t_ro.strftime("%H:%M"),
"zi": zi,
"set": set_label,
"ora_utc": extraction.ora_utc,
"instrument": extraction.instrument,
"directie": extraction.directie,
"tf_mare": extraction.tf_mare,
@@ -136,102 +197,115 @@ def build_row(
"risc_pct": f"{extraction.risc_pct}",
"outcome_path": extraction.outcome_path,
"max_reached": extraction.max_reached,
"be_moved": "true" if extraction.be_moved else "false",
"confidence": extraction.confidence,
"ambiguities": json.dumps(extraction.ambiguities, ensure_ascii=False),
"note": extraction.note,
"be_moved": str(extraction.be_moved),
"pl_marius": _format_optional(pl_m),
"pl_theoretical": _format_optional(pl_t),
"set": set_label,
"indicator_version": str(meta["indicator_version"]),
"pl_overlay_version": str(meta["pl_overlay_version"]),
"csv_schema_version": str(meta["csv_schema_version"]),
"extracted_at": extracted_at,
"note": extraction.note,
}
def read_rows(csv_path: Path) -> list[dict[str, str]]:
"""Read existing rows; return [] if the file does not exist or is empty."""
if not csv_path.exists() or csv_path.stat().st_size == 0:
return []
with csv_path.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
return list(reader)
def _reject(reason: str) -> dict[str, Any]:
return {"status": "rejected", "reason": reason, "id": None, "row": None}
def _atomic_write(csv_path: Path, rows: list[dict[str, str]]) -> None:
csv_path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_name = tempfile.mkstemp(
prefix=csv_path.name + ".",
suffix=".tmp",
dir=str(csv_path.parent),
)
try:
with os.fdopen(fd, "w", encoding="utf-8", newline="") as fh:
writer = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS))
writer.writeheader()
for r in rows:
writer.writerow({k: r.get(k, "") for k in CSV_COLUMNS})
os.replace(tmp_name, csv_path)
except Exception:
try:
os.unlink(tmp_name)
except OSError:
pass
raise
# ---------------------------------------------------------------------------
# public API
# ---------------------------------------------------------------------------
def append_row(
extraction: M2DExtraction,
def append_extraction(
json_path: Path | str,
source: str,
csv_path: Path,
meta_path: Path,
calendar_path: Path,
on_duplicate: Literal["raise", "skip"] = "raise",
) -> dict[str, str]:
"""Append one extraction to the CSV.
csv_path: Path | str = "data/jurnal.csv",
meta_path: Path | str = "data/_meta.yaml",
calendar_path: Path | str = "calendar_evenimente.yaml",
) -> dict[str, Any]:
"""Append one validated extraction to the jurnal CSV.
Dedup key: ``(screenshot_file, source)``. If a row with the same key
already exists, behaviour is controlled by ``on_duplicate``:
Never raises. Returns one of:
- ``"raise"`` (default): raise ``ValueError``.
- ``"skip"``: leave the CSV untouched and return the *existing* row.
- ``{"status": "ok", "reason": "", "id": <int>, "row": <dict>}``
- ``{"status": "rejected", "reason": <str>, "id": None, "row": None}``
"""
meta = _load_meta(meta_path)
calendar = load_calendar(calendar_path)
row = build_row(extraction, source, meta, calendar)
json_path = Path(json_path)
csv_path = Path(csv_path)
meta_path = Path(meta_path)
calendar_path = Path(calendar_path)
existing = read_rows(csv_path)
key = (row["screenshot_file"], row["source"])
if source not in VALID_SOURCES:
return _reject(
f"invalid source {source!r}; must be one of {sorted(VALID_SOURCES)}"
)
if not json_path.exists():
return _reject(f"JSON file not found: {json_path}")
try:
with json_path.open("r", encoding="utf-8") as fh:
raw = fh.read()
except OSError as exc:
return _reject(f"failed to read JSON {json_path}: {exc}")
try:
extraction = parse_extraction(raw)
except ValidationError as exc:
return _reject(f"validation error: {exc}")
except (ValueError, json.JSONDecodeError) as exc:
return _reject(f"validation error (json parse): {exc}")
try:
meta = _load_meta(meta_path)
except (FileNotFoundError, OSError) as exc:
return _reject(f"_meta.yaml not found: {exc}")
except (ValueError, yaml.YAMLError) as exc:
return _reject(f"_meta.yaml invalid: {exc}")
try:
calendar = load_calendar(calendar_path)
except (FileNotFoundError, OSError) as exc:
return _reject(f"calendar not found: {exc}")
except (ValueError, yaml.YAMLError) as exc:
return _reject(f"calendar invalid: {exc}")
try:
existing = _read_existing_rows(csv_path)
except OSError as exc:
return _reject(f"failed to read existing CSV {csv_path}: {exc}")
key = (extraction.screenshot_file, source)
for r in existing:
if (r.get("screenshot_file"), r.get("source")) == key:
if on_duplicate == "skip":
return r
raise ValueError(
f"duplicate row: screenshot_file={key[0]!r} source={key[1]!r} "
f"already exists in {csv_path}"
return _reject(
f"duplicate row: screenshot_file={key[0]!r} source={key[1]!r}"
)
existing.append(row)
_atomic_write(csv_path, existing)
return row
def append_row_from_json(
json_path: Path,
source: str,
csv_path: Path,
meta_path: Path,
calendar_path: Path,
on_duplicate: Literal["raise", "skip"] = "raise",
) -> dict[str, str]:
"""Convenience wrapper: load JSON, validate, append."""
with Path(json_path).open("r", encoding="utf-8") as fh:
payload = json.load(fh)
extraction = parse_extraction_dict(payload)
return append_row(
extraction=extraction,
source=source,
csv_path=csv_path,
meta_path=meta_path,
calendar_path=calendar_path,
on_duplicate=on_duplicate,
row_id = _next_id(existing)
extracted_at = (
datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S") + "Z"
)
try:
row = _build_row(
extraction,
source=source,
row_id=row_id,
meta=meta,
calendar=calendar,
extracted_at=extracted_at,
)
except (KeyError, ValueError) as exc:
return _reject(f"derived-field computation failed: {exc}")
try:
_write_csv_atomic(csv_path, [*existing, row], list(CSV_COLUMNS))
except OSError as exc:
return _reject(
f"atomic write failed: {exc}\n{traceback.format_exc()}"
)
return {"status": "ok", "reason": "", "id": row_id, "row": row}

240
scripts/regenerate_md.py Normal file
View File

@@ -0,0 +1,240 @@
"""Regenerate ``data/jurnal.md`` from ``data/jurnal.csv``.
CSV is the source of truth (29 columns, schema owned by ``scripts.append_row``).
MD is a human-readable mirror with a curated 18-column table.
CLI: ``python scripts/regenerate_md.py [csv_path] [md_path]``
"""
from __future__ import annotations
import csv
import os
import sys
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Sequence
from scripts.append_row import csv_columns
__all__ = ["MD_COLUMNS", "regenerate_md", "main"]
MD_COLUMNS: tuple[str, ...] = (
"#",
"Data",
"Zi",
"Ora RO",
"Set",
"Instrument",
"Direcție",
"Calitate",
"Entry",
"SL",
"TP0",
"TP1",
"TP2",
"outcome_path",
"P/L (Marius)",
"P/L (theoretic)",
"Source",
"Note",
)
_CSV_FIELDS_USED: tuple[str, ...] = (
"id",
"data",
"zi",
"ora_ro",
"set",
"instrument",
"directie",
"calitate",
"entry",
"sl",
"tp0",
"tp1",
"tp2",
"outcome_path",
"pl_marius",
"pl_theoretical",
"source",
"note",
)
_DIRECTIE_DISPLAY = {"long": "Buy", "short": "Sell", "buy": "Buy", "sell": "Sell"}
def _fmt_pl(value: str) -> str:
if value is None or value == "":
return "pending"
try:
return f"{float(value):+.2f}"
except ValueError:
return value
def _fmt_directie(value: str) -> str:
if not value:
return ""
return _DIRECTIE_DISPLAY.get(value.strip().lower(), value)
def _escape_cell(value: str) -> str:
return (value or "").replace("|", "\\|").replace("\n", " ").strip()
def _placeholder_md() -> str:
return (
"# Jurnal M2D (auto-generated)\n"
"\n"
"*Niciun trade încă. Adaugă unul prin `/m2d-log` sau `/backtest`.*\n"
)
def _atomic_write_text(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_name = tempfile.mkstemp(
prefix=path.name + ".", suffix=".tmp", dir=str(path.parent)
)
try:
with os.fdopen(fd, "w", encoding="utf-8", newline="\n") as fh:
fh.write(content)
os.replace(tmp_name, path)
except Exception:
try:
os.unlink(tmp_name)
except OSError:
pass
raise
def _row_to_cells(row: dict[str, str], display_index: int) -> tuple[str, ...]:
g = row.get
return (
str(display_index),
g("data", "") or "",
g("zi", "") or "",
g("ora_ro", "") or "",
g("set", "") or "",
g("instrument", "") or "",
_fmt_directie(g("directie", "") or ""),
g("calitate", "") or "",
g("entry", "") or "",
g("sl", "") or "",
g("tp0", "") or "",
g("tp1", "") or "",
g("tp2", "") or "",
g("outcome_path", "") or "",
_fmt_pl(g("pl_marius", "") or ""),
_fmt_pl(g("pl_theoretical", "") or ""),
g("source", "") or "",
g("note", "") or "",
)
def _render_table(rows: Sequence[dict[str, str]]) -> str:
header_line = "| " + " | ".join(MD_COLUMNS) + " |"
sep_line = "|" + "|".join(["---"] * len(MD_COLUMNS)) + "|"
data_lines = []
for i, row in enumerate(rows, start=1):
cells = _row_to_cells(row, i)
data_lines.append(
"| " + " | ".join(_escape_cell(c) for c in cells) + " |"
)
return "\n".join([header_line, sep_line, *data_lines])
def _render_md(rows: Sequence[dict[str, str]]) -> str:
if not rows:
return _placeholder_md()
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
table = _render_table(rows)
return (
"# Jurnal M2D (auto-generated from data/jurnal.csv)\n"
"\n"
f"Generated: {now_iso}\n"
f"Rows: {len(rows)}\n"
"\n"
f"{table}\n"
"\n"
"*Vezi `data/jurnal.csv` pentru toate cele 29 coloane "
"(id, ora_utc, tf_*, risc_pct, be_moved, max_reached, versions, extracted_at).*\n"
)
def _id_sort_key(raw: str) -> tuple[int, int | str]:
try:
return (0, int(raw))
except (ValueError, TypeError):
return (1, raw or "")
def _load_rows(csv_path: Path) -> list[dict[str, str]]:
"""Read CSV, returning rows sorted by id.
Schema drift handling:
- Extra header columns → warning to stderr, dropped.
- Missing required header columns → warning to stderr per affected row (row skipped).
"""
if not csv_path.exists() or csv_path.stat().st_size == 0:
return []
expected = set(csv_columns())
required = set(_CSV_FIELDS_USED)
with csv_path.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
header = reader.fieldnames or []
header_set = set(header)
extras = [c for c in header if c not in expected]
if extras:
print(
f"regenerate_md: warning: unknown CSV columns ignored: {extras}",
file=sys.stderr,
)
missing_required = required - header_set
rows: list[dict[str, str]] = []
for raw in reader:
if missing_required:
print(
f"regenerate_md: warning: row skipped (missing required "
f"columns: {sorted(missing_required)})",
file=sys.stderr,
)
continue
rows.append({k: (raw.get(k) or "") for k in required})
rows.sort(key=lambda r: _id_sort_key(r.get("id", "")))
return rows
def regenerate_md(
csv_path: Path | str = "data/jurnal.csv",
md_path: Path | str = "data/jurnal.md",
) -> int:
"""Read CSV → write MD atomically. Returns count of trade rows written."""
csv_p = Path(csv_path)
md_p = Path(md_path)
rows = _load_rows(csv_p)
content = _render_md(rows)
_atomic_write_text(md_p, content)
return len(rows)
def main() -> int:
args = sys.argv[1:]
csv_arg = args[0] if len(args) >= 1 else "data/jurnal.csv"
md_arg = args[1] if len(args) >= 2 else "data/jurnal.md"
n = regenerate_md(csv_arg, md_arg)
print(f"regenerate_md: wrote {md_arg} with {n} row(s)")
return 0
if __name__ == "__main__":
raise SystemExit(main())

540
scripts/stats.py Normal file
View File

@@ -0,0 +1,540 @@
"""Backtest statistics for ``data/jurnal.csv``.
Outputs:
- Overall + per-Set + per-calitate + per-instrument WR, expectancy.
- Wilson 95% CI for WR (closed form).
- Bootstrap percentile 95% CI for expectancy (deterministic via ``seed``).
- ``--calibration`` mode: joins ``manual_calibration`` rows with their
``vision_calibration`` counterparts on ``screenshot_file`` and reports
field-by-field mismatch rates for the P4 gate (see ``STOPPING_RULE.md``).
A "win" is any trade with ``pl_marius > 0``. Pending trades
(``pl_marius`` blank, i.e. ``outcome_path in {pending, TP0->pending}``) are
excluded from both WR and expectancy: there is no realised outcome yet.
The ``calitate`` field is a known-biased descriptor (post-outcome
classification — see ``STOPPING_RULE.md`` §3). It is reported as
informational only and explicitly flagged as such; do NOT use it as a
filter for GO LIVE decisions.
"""
from __future__ import annotations
import argparse
import csv
import math
import random
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable
__all__ = [
"CORE_CALIBRATION_FIELDS",
"BACKTEST_SOURCES",
"CALIBRATION_SOURCES",
"Trade",
"GroupStats",
"load_trades",
"wilson_ci",
"bootstrap_ci",
"win_rate",
"expectancy",
"group_by",
"compute_group_stats",
"calibration_mismatch",
"format_report",
"main",
]
# Fields compared in the calibration mismatch gate (STOPPING_RULE.md §P4).
CORE_CALIBRATION_FIELDS: tuple[str, ...] = (
"entry",
"sl",
"tp0",
"tp1",
"tp2",
"outcome_path",
"max_reached",
"directie",
)
BACKTEST_SOURCES: frozenset[str] = frozenset({"vision", "manual"})
CALIBRATION_SOURCES: frozenset[str] = frozenset(
{"manual_calibration", "vision_calibration"}
)
# ---------------------------------------------------------------------------
# Loading / typed access
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class Trade:
"""One realised (or pending) trade row, typed."""
id: int
screenshot_file: str
source: str
data: str
zi: str
ora_ro: str
instrument: str
directie: str
calitate: str
set: str
outcome_path: str
max_reached: str
be_moved: bool
pl_marius: float | None
pl_theoretical: float
raw: dict[str, str] = field(default_factory=dict)
@property
def is_pending(self) -> bool:
return self.pl_marius is None
@property
def is_win(self) -> bool:
return self.pl_marius is not None and self.pl_marius > 0
def _parse_optional_float(value: str) -> float | None:
s = (value or "").strip()
if s == "":
return None
return float(s)
def _parse_bool(value: str) -> bool:
return (value or "").strip().lower() in {"true", "1", "yes", "da"}
def _row_to_trade(row: dict[str, str]) -> Trade:
return Trade(
id=int(row.get("id") or 0),
screenshot_file=row.get("screenshot_file", ""),
source=row.get("source", ""),
data=row.get("data", ""),
zi=row.get("zi", ""),
ora_ro=row.get("ora_ro", ""),
instrument=row.get("instrument", ""),
directie=row.get("directie", ""),
calitate=row.get("calitate", ""),
set=row.get("set", ""),
outcome_path=row.get("outcome_path", ""),
max_reached=row.get("max_reached", ""),
be_moved=_parse_bool(row.get("be_moved", "")),
pl_marius=_parse_optional_float(row.get("pl_marius", "")),
pl_theoretical=float(row.get("pl_theoretical") or 0.0),
raw=dict(row),
)
def load_trades(csv_path: Path | str) -> list[Trade]:
"""Load all rows of ``csv_path`` as :class:`Trade` objects.
Returns ``[]`` if the file does not exist or is empty.
"""
p = Path(csv_path)
if not p.exists() or p.stat().st_size == 0:
return []
with p.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
return [_row_to_trade(r) for r in reader]
# ---------------------------------------------------------------------------
# Statistics primitives
# ---------------------------------------------------------------------------
def wilson_ci(wins: int, n: int, z: float = 1.96) -> tuple[float, float]:
"""Wilson score interval for a binomial proportion.
Returns ``(lo, hi)`` as proportions in [0, 1]. For ``n == 0`` returns
``(0.0, 0.0)``. ``z = 1.96`` corresponds to a 95% CI.
"""
if n <= 0:
return (0.0, 0.0)
if wins < 0 or wins > n:
raise ValueError(f"wins={wins} out of range for n={n}")
p_hat = wins / n
denom = 1.0 + (z * z) / n
center = p_hat + (z * z) / (2.0 * n)
half = z * math.sqrt((p_hat * (1.0 - p_hat) + (z * z) / (4.0 * n)) / n)
lo = (center - half) / denom
hi = (center + half) / denom
return (max(0.0, lo), min(1.0, hi))
def bootstrap_ci(
values: list[float],
*,
iterations: int = 2000,
alpha: float = 0.05,
seed: int | None = None,
) -> tuple[float, float]:
"""Percentile-method bootstrap CI for the mean of ``values``.
Deterministic when ``seed`` is provided. Returns ``(lo, hi)``. For
``len(values) < 2`` returns ``(mean, mean)``.
"""
if not values:
return (0.0, 0.0)
n = len(values)
mean = sum(values) / n
if n < 2 or iterations <= 0:
return (mean, mean)
rng = random.Random(seed)
means: list[float] = []
for _ in range(iterations):
s = 0.0
for _ in range(n):
s += values[rng.randrange(n)]
means.append(s / n)
means.sort()
lo_idx = int(math.floor((alpha / 2.0) * iterations))
hi_idx = int(math.ceil((1.0 - alpha / 2.0) * iterations)) - 1
lo_idx = max(0, min(iterations - 1, lo_idx))
hi_idx = max(0, min(iterations - 1, hi_idx))
return (means[lo_idx], means[hi_idx])
def win_rate(trades: Iterable[Trade]) -> tuple[int, int, float]:
"""Return ``(wins, n_resolved, wr)`` ignoring pending trades."""
resolved = [t for t in trades if not t.is_pending]
wins = sum(1 for t in resolved if t.is_win)
n = len(resolved)
wr = (wins / n) if n else 0.0
return wins, n, wr
def expectancy(trades: Iterable[Trade], overlay: str = "pl_marius") -> float:
"""Mean P/L (in R) over non-pending trades, on the given overlay."""
if overlay not in {"pl_marius", "pl_theoretical"}:
raise ValueError(f"unknown overlay {overlay!r}")
if overlay == "pl_marius":
vals = [t.pl_marius for t in trades if t.pl_marius is not None]
else:
vals = [t.pl_theoretical for t in trades if not t.is_pending]
if not vals:
return 0.0
return sum(vals) / len(vals)
# ---------------------------------------------------------------------------
# Group stats
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class GroupStats:
key: str
n_total: int
n_resolved: int
wins: int
wr: float
wr_ci_lo: float
wr_ci_hi: float
exp_marius: float
exp_marius_ci_lo: float
exp_marius_ci_hi: float
exp_theoretical: float
exp_theoretical_ci_lo: float
exp_theoretical_ci_hi: float
def group_by(trades: Iterable[Trade], field_name: str) -> dict[str, list[Trade]]:
out: dict[str, list[Trade]] = {}
for t in trades:
key = getattr(t, field_name, "") or "(blank)"
out.setdefault(key, []).append(t)
return out
def compute_group_stats(
trades: list[Trade],
*,
label: str,
bootstrap_iterations: int = 2000,
seed: int | None = None,
) -> GroupStats:
wins, n_resolved, wr = win_rate(trades)
wr_lo, wr_hi = wilson_ci(wins, n_resolved)
pl_m_vals = [t.pl_marius for t in trades if t.pl_marius is not None]
exp_m = (sum(pl_m_vals) / len(pl_m_vals)) if pl_m_vals else 0.0
exp_m_lo, exp_m_hi = bootstrap_ci(
pl_m_vals, iterations=bootstrap_iterations, seed=seed
)
pl_t_vals = [t.pl_theoretical for t in trades if not t.is_pending]
exp_t = (sum(pl_t_vals) / len(pl_t_vals)) if pl_t_vals else 0.0
exp_t_lo, exp_t_hi = bootstrap_ci(
pl_t_vals,
iterations=bootstrap_iterations,
seed=None if seed is None else seed + 1,
)
return GroupStats(
key=label,
n_total=len(trades),
n_resolved=n_resolved,
wins=wins,
wr=wr,
wr_ci_lo=wr_lo,
wr_ci_hi=wr_hi,
exp_marius=exp_m,
exp_marius_ci_lo=exp_m_lo,
exp_marius_ci_hi=exp_m_hi,
exp_theoretical=exp_t,
exp_theoretical_ci_lo=exp_t_lo,
exp_theoretical_ci_hi=exp_t_hi,
)
# ---------------------------------------------------------------------------
# Calibration mode
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class CalibrationReport:
pairs: int
field_mismatches: dict[str, int]
total_comparisons: int
@property
def overall_mismatch_rate(self) -> float:
if self.total_comparisons == 0:
return 0.0
total = sum(self.field_mismatches.values())
return total / self.total_comparisons
def _normalise_for_compare(field_name: str, value: str) -> str:
s = (value or "").strip()
if field_name in {"entry", "sl", "tp0", "tp1", "tp2"}:
try:
return f"{float(s):.4f}"
except ValueError:
return s
return s
def calibration_mismatch(
trades: Iterable[Trade],
*,
fields: tuple[str, ...] = CORE_CALIBRATION_FIELDS,
) -> CalibrationReport:
"""Pair ``manual_calibration`` and ``vision_calibration`` rows by
``screenshot_file``, then count mismatches per ``fields``.
Returns a :class:`CalibrationReport`. Unpaired calibration rows are
silently ignored — they cannot contribute to a comparison.
"""
manual: dict[str, Trade] = {}
vision: dict[str, Trade] = {}
for t in trades:
if t.source == "manual_calibration":
manual[t.screenshot_file] = t
elif t.source == "vision_calibration":
vision[t.screenshot_file] = t
paired_files = sorted(set(manual) & set(vision))
field_mismatches: dict[str, int] = {f: 0 for f in fields}
for f in paired_files:
m = manual[f]
v = vision[f]
for fld in fields:
mv = _normalise_for_compare(fld, m.raw.get(fld, ""))
vv = _normalise_for_compare(fld, v.raw.get(fld, ""))
if mv != vv:
field_mismatches[fld] += 1
total_comparisons = len(paired_files) * len(fields)
return CalibrationReport(
pairs=len(paired_files),
field_mismatches=field_mismatches,
total_comparisons=total_comparisons,
)
# ---------------------------------------------------------------------------
# Reporting
# ---------------------------------------------------------------------------
def _fmt_pct(p: float) -> str:
return f"{100.0 * p:5.1f}%"
def _fmt_r(x: float) -> str:
return f"{x:+.3f}R"
def _fmt_stats_row(s: GroupStats) -> str:
return (
f"{s.key:<14} N={s.n_total:>3} (resolved {s.n_resolved:>3}) "
f"WR={_fmt_pct(s.wr)} [{_fmt_pct(s.wr_ci_lo)}, {_fmt_pct(s.wr_ci_hi)}] "
f"E_marius={_fmt_r(s.exp_marius)} "
f"[{_fmt_r(s.exp_marius_ci_lo)}, {_fmt_r(s.exp_marius_ci_hi)}] "
f"E_theor={_fmt_r(s.exp_theoretical)}"
)
def format_report(
trades: list[Trade],
*,
bootstrap_iterations: int = 2000,
seed: int | None = None,
) -> str:
"""Render the main stats report.
Only ``source in {vision, manual}`` rows are included in the WR /
expectancy computations; calibration rows are reported separately via
``--calibration``.
"""
backtest = [t for t in trades if t.source in BACKTEST_SOURCES]
lines: list[str] = []
lines.append("=== M2D Backtest Stats ===")
lines.append(f"Backtest rows: {len(backtest)} (calibration excluded)")
lines.append("")
if not backtest:
lines.append("(no backtest trades yet)")
return "\n".join(lines)
overall = compute_group_stats(
backtest,
label="OVERALL",
bootstrap_iterations=bootstrap_iterations,
seed=seed,
)
lines.append("-- Overall --")
lines.append(_fmt_stats_row(overall))
lines.append("")
def _emit_group(title: str, field_name: str, key_order: list[str] | None = None) -> None:
lines.append(f"-- By {title} --")
groups = group_by(backtest, field_name)
keys = key_order if key_order is not None else sorted(groups)
for k in keys:
if k not in groups:
continue
sub_seed = None if seed is None else seed + abs(hash(k)) % 10_000
s = compute_group_stats(
groups[k],
label=k,
bootstrap_iterations=bootstrap_iterations,
seed=sub_seed,
)
lines.append(_fmt_stats_row(s))
lines.append("")
_emit_group(
"Set",
"set",
key_order=["A1", "A2", "A3", "B", "C", "D", "Other"],
)
_emit_group("Instrument", "instrument")
lines.append(
"[!] By calitate — descriptor only (post-outcome, biased; do not use "
"as a GO LIVE filter — see STOPPING_RULE.md §3)."
)
_emit_group(
"calitate",
"calitate",
key_order=["Clară", "Mai mare ca impuls", "Slabă", "n/a"],
)
return "\n".join(lines).rstrip() + "\n"
def format_calibration_report(trades: list[Trade]) -> str:
cal = calibration_mismatch(trades)
lines: list[str] = []
lines.append("=== Calibration P4 gate ===")
lines.append(f"Paired screenshots (manual ∩ vision): {cal.pairs}")
if cal.pairs == 0:
lines.append("(no calibration pairs yet)")
return "\n".join(lines) + "\n"
lines.append("")
lines.append(f"{'field':<14} mismatches / pairs rate")
for fld in CORE_CALIBRATION_FIELDS:
m = cal.field_mismatches.get(fld, 0)
rate = (m / cal.pairs) if cal.pairs else 0.0
lines.append(f"{fld:<14} {m:>3} / {cal.pairs:<3} {_fmt_pct(rate)}")
lines.append("")
lines.append(
f"Overall mismatch rate: {_fmt_pct(cal.overall_mismatch_rate)} "
f"({sum(cal.field_mismatches.values())} of {cal.total_comparisons} comparisons)"
)
threshold = 0.10
verdict = "PASS" if cal.overall_mismatch_rate <= threshold else "FAIL"
lines.append(f"P4 gate (<= 10%): {verdict}")
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
prog="stats",
description="Backtest statistics for data/jurnal.csv",
)
parser.add_argument(
"--csv",
type=Path,
default=Path("data/jurnal.csv"),
help="Path to the jurnal CSV (default: data/jurnal.csv).",
)
parser.add_argument(
"--calibration",
action="store_true",
help="Show P4 calibration mismatch report instead of backtest stats.",
)
parser.add_argument(
"--bootstrap-iterations",
type=int,
default=2000,
help="Bootstrap iterations for expectancy CI (default: 2000).",
)
parser.add_argument(
"--seed",
type=int,
default=None,
help="Seed for the bootstrap RNG (set for deterministic output).",
)
args = parser.parse_args(argv)
trades = load_trades(args.csv)
if args.calibration:
out = format_calibration_report(trades)
else:
out = format_report(
trades,
bootstrap_iterations=args.bootstrap_iterations,
seed=args.seed,
)
# Force UTF-8 on stdout: the report contains diacritics ("Clară", "Slabă")
# and a console codepage like cp1252 would crash on those.
try:
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
except (AttributeError, OSError):
pass
sys.stdout.write(out)
return 0
if __name__ == "__main__":
raise SystemExit(main())