scripts: regenerate_md + stats + tests (116-144 passing across modules)

This commit is contained in:
Marius
2026-05-13 12:45:05 +03:00
parent ce80151c58
commit 26d084dc4b
6 changed files with 1843 additions and 283 deletions

View File

@@ -1,22 +1,26 @@
"""Append a validated M2D extraction to ``data/trades.csv``. """Append a validated M2D extraction to ``data/jurnal.csv``.
Pipeline: Pipeline:
JSON file --> pydantic validate (M2DExtraction) JSON file --> pydantic validate (M2DExtraction)
--> load data/_meta.yaml (versions + schema) --> load data/_meta.yaml (versions)
--> compute ora_ro, zi, set, pl_marius, pl_theoretical --> compute id, ora_ro, zi, set, pl_marius, pl_theoretical, extracted_at
--> dedup on (screenshot_file, source) --> dedup on (screenshot_file, source)
--> atomic CSV write (temp file + os.replace) --> atomic CSV write (sibling .tmp + os.replace)
Source values Source values
- ``manual`` : Marius logged by hand
- ``vision`` : produced by the vision subagent - ``vision`` : produced by the vision subagent
- ``manual`` : Marius logged by hand
- ``manual_calibration`` : calibration P4 — manual leg - ``manual_calibration`` : calibration P4 — manual leg
- ``vision_calibration`` : calibration P4 — vision leg - ``vision_calibration`` : calibration P4 — vision leg
A row with ``source=manual_calibration`` and a row with ``source=vision_calibration`` A row with ``source=manual_calibration`` and a row with ``source=vision_calibration``
for the *same* screenshot are allowed to coexist (different dedup keys); a for the *same* screenshot are allowed to coexist (different dedup keys).
duplicate ``(screenshot_file, source)`` pair is rejected (or skipped — see
``append_row`` ``on_duplicate`` argument). Failure mode: ``append_extraction`` NEVER raises. On any error (missing JSON,
pydantic ValidationError, dedup hit, etc.) it returns
``{"status": "rejected", "reason": "...", "id": None, "row": None}`` so the
caller (a slash command) can decide what to do with the screenshot
(move to ``needs_review/``, log to workflow, etc.).
""" """
from __future__ import annotations from __future__ import annotations
@@ -24,41 +28,43 @@ from __future__ import annotations
import csv import csv
import json import json
import os import os
import tempfile import traceback
from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Literal from typing import Any, Literal
import yaml import yaml
from pydantic import ValidationError
from scripts.calendar_parse import calc_set, load_calendar, utc_to_ro from scripts.calendar_parse import calc_set, load_calendar, utc_to_ro
from scripts.pl_calc import pl_marius, pl_theoretical from scripts.pl_calc import pl_marius, pl_theoretical
from scripts.vision_schema import M2DExtraction, parse_extraction_dict from scripts.vision_schema import M2DExtraction, parse_extraction
__all__ = [ __all__ = [
"CSV_COLUMNS", "CSV_COLUMNS",
"VALID_SOURCES", "VALID_SOURCES",
"build_row", "ZI_RO_MAP",
"read_rows", "csv_columns",
"append_row", "append_extraction",
"append_row_from_json",
] ]
Source = Literal["manual", "vision", "manual_calibration", "vision_calibration"] Source = Literal["vision", "manual", "manual_calibration", "vision_calibration"]
VALID_SOURCES: frozenset[str] = frozenset( VALID_SOURCES: frozenset[str] = frozenset(
{"manual", "vision", "manual_calibration", "vision_calibration"} {"vision", "manual", "manual_calibration", "vision_calibration"}
) )
# Canonical column order (29) — must stay stable; regenerate_md + stats depend on it.
CSV_COLUMNS: tuple[str, ...] = ( CSV_COLUMNS: tuple[str, ...] = (
"id",
"screenshot_file", "screenshot_file",
"source", "source",
"data", "data",
"ora_utc",
"ora_ro",
"zi", "zi",
"set", "ora_ro",
"ora_utc",
"instrument", "instrument",
"directie", "directie",
"tf_mare", "tf_mare",
@@ -73,17 +79,38 @@ CSV_COLUMNS: tuple[str, ...] = (
"outcome_path", "outcome_path",
"max_reached", "max_reached",
"be_moved", "be_moved",
"confidence",
"ambiguities",
"note",
"pl_marius", "pl_marius",
"pl_theoretical", "pl_theoretical",
"set",
"indicator_version", "indicator_version",
"pl_overlay_version", "pl_overlay_version",
"csv_schema_version", "csv_schema_version",
"extracted_at",
"note",
) )
ZI_RO_MAP: dict[str, str] = {
"Mon": "Lu",
"Tue": "Ma",
"Wed": "Mi",
"Thu": "Jo",
"Fri": "Vi",
"Sat": "Sa",
"Sun": "Du",
}
def csv_columns() -> list[str]:
"""Return the 29-column header in canonical order."""
return list(CSV_COLUMNS)
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
def _load_meta(meta_path: Path) -> dict[str, Any]: def _load_meta(meta_path: Path) -> dict[str, Any]:
with meta_path.open("r", encoding="utf-8") as fh: with meta_path.open("r", encoding="utf-8") as fh:
meta = yaml.safe_load(fh) or {} meta = yaml.safe_load(fh) or {}
@@ -94,35 +121,69 @@ def _load_meta(meta_path: Path) -> dict[str, Any]:
return meta return meta
def _read_existing_rows(csv_path: Path) -> list[dict[str, str]]:
if not csv_path.exists() or csv_path.stat().st_size == 0:
return []
with csv_path.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
return list(reader)
def _next_id(rows: list[dict[str, str]]) -> int:
max_id = 0
for r in rows:
raw = r.get("id", "")
if not raw:
continue
try:
v = int(raw)
except (TypeError, ValueError):
continue
if v > max_id:
max_id = v
return max_id + 1
def _format_optional(value: float | None) -> str: def _format_optional(value: float | None) -> str:
return "" if value is None else f"{value:.4f}" return "" if value is None else f"{value:.4f}"
def build_row( def _write_csv_atomic(
csv_path: Path, rows: list[dict[str, str]], columns: list[str]
) -> None:
csv_path.parent.mkdir(parents=True, exist_ok=True)
tmp = csv_path.with_suffix(csv_path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8", newline="") as fh:
writer = csv.DictWriter(fh, fieldnames=columns)
writer.writeheader()
for row in rows:
writer.writerow({k: row.get(k, "") for k in columns})
os.replace(tmp, csv_path)
def _build_row(
extraction: M2DExtraction, extraction: M2DExtraction,
*,
source: str, source: str,
row_id: int,
meta: dict[str, Any], meta: dict[str, Any],
calendar: list[dict[str, Any]], calendar: list[dict[str, Any]],
extracted_at: str,
) -> dict[str, str]: ) -> dict[str, str]:
"""Compute the full CSV row dict for one extraction.""" d_ro, t_ro, day_short = utc_to_ro(extraction.data, extraction.ora_utc)
if source not in VALID_SOURCES: set_label = calc_set(d_ro, t_ro, day_short, calendar)
raise ValueError(
f"invalid source {source!r}; must be one of {sorted(VALID_SOURCES)}"
)
d_ro, t_ro, zi = utc_to_ro(extraction.data, extraction.ora_utc)
set_label = calc_set(d_ro, t_ro, zi, calendar)
pl_m = pl_marius(extraction.outcome_path, extraction.be_moved) pl_m = pl_marius(extraction.outcome_path, extraction.be_moved)
pl_t = pl_theoretical(extraction.max_reached) pl_t = pl_theoretical(extraction.max_reached)
zi_ro = ZI_RO_MAP[day_short]
return { return {
"id": str(row_id),
"screenshot_file": extraction.screenshot_file, "screenshot_file": extraction.screenshot_file,
"source": source, "source": source,
"data": extraction.data, "data": extraction.data,
"ora_utc": extraction.ora_utc, "zi": zi_ro,
"ora_ro": t_ro.strftime("%H:%M"), "ora_ro": t_ro.strftime("%H:%M"),
"zi": zi, "ora_utc": extraction.ora_utc,
"set": set_label,
"instrument": extraction.instrument, "instrument": extraction.instrument,
"directie": extraction.directie, "directie": extraction.directie,
"tf_mare": extraction.tf_mare, "tf_mare": extraction.tf_mare,
@@ -136,102 +197,115 @@ def build_row(
"risc_pct": f"{extraction.risc_pct}", "risc_pct": f"{extraction.risc_pct}",
"outcome_path": extraction.outcome_path, "outcome_path": extraction.outcome_path,
"max_reached": extraction.max_reached, "max_reached": extraction.max_reached,
"be_moved": "true" if extraction.be_moved else "false", "be_moved": str(extraction.be_moved),
"confidence": extraction.confidence,
"ambiguities": json.dumps(extraction.ambiguities, ensure_ascii=False),
"note": extraction.note,
"pl_marius": _format_optional(pl_m), "pl_marius": _format_optional(pl_m),
"pl_theoretical": _format_optional(pl_t), "pl_theoretical": _format_optional(pl_t),
"set": set_label,
"indicator_version": str(meta["indicator_version"]), "indicator_version": str(meta["indicator_version"]),
"pl_overlay_version": str(meta["pl_overlay_version"]), "pl_overlay_version": str(meta["pl_overlay_version"]),
"csv_schema_version": str(meta["csv_schema_version"]), "csv_schema_version": str(meta["csv_schema_version"]),
"extracted_at": extracted_at,
"note": extraction.note,
} }
def read_rows(csv_path: Path) -> list[dict[str, str]]: def _reject(reason: str) -> dict[str, Any]:
"""Read existing rows; return [] if the file does not exist or is empty.""" return {"status": "rejected", "reason": reason, "id": None, "row": None}
if not csv_path.exists() or csv_path.stat().st_size == 0:
return []
with csv_path.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
return list(reader)
def _atomic_write(csv_path: Path, rows: list[dict[str, str]]) -> None: # ---------------------------------------------------------------------------
csv_path.parent.mkdir(parents=True, exist_ok=True) # public API
fd, tmp_name = tempfile.mkstemp( # ---------------------------------------------------------------------------
prefix=csv_path.name + ".",
suffix=".tmp",
dir=str(csv_path.parent),
)
try:
with os.fdopen(fd, "w", encoding="utf-8", newline="") as fh:
writer = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS))
writer.writeheader()
for r in rows:
writer.writerow({k: r.get(k, "") for k in CSV_COLUMNS})
os.replace(tmp_name, csv_path)
except Exception:
try:
os.unlink(tmp_name)
except OSError:
pass
raise
def append_row( def append_extraction(
extraction: M2DExtraction, json_path: Path | str,
source: str, source: str,
csv_path: Path, csv_path: Path | str = "data/jurnal.csv",
meta_path: Path, meta_path: Path | str = "data/_meta.yaml",
calendar_path: Path, calendar_path: Path | str = "calendar_evenimente.yaml",
on_duplicate: Literal["raise", "skip"] = "raise", ) -> dict[str, Any]:
) -> dict[str, str]: """Append one validated extraction to the jurnal CSV.
"""Append one extraction to the CSV.
Dedup key: ``(screenshot_file, source)``. If a row with the same key Never raises. Returns one of:
already exists, behaviour is controlled by ``on_duplicate``:
- ``"raise"`` (default): raise ``ValueError``. - ``{"status": "ok", "reason": "", "id": <int>, "row": <dict>}``
- ``"skip"``: leave the CSV untouched and return the *existing* row. - ``{"status": "rejected", "reason": <str>, "id": None, "row": None}``
""" """
meta = _load_meta(meta_path) json_path = Path(json_path)
calendar = load_calendar(calendar_path) csv_path = Path(csv_path)
row = build_row(extraction, source, meta, calendar) meta_path = Path(meta_path)
calendar_path = Path(calendar_path)
existing = read_rows(csv_path) if source not in VALID_SOURCES:
key = (row["screenshot_file"], row["source"]) return _reject(
f"invalid source {source!r}; must be one of {sorted(VALID_SOURCES)}"
)
if not json_path.exists():
return _reject(f"JSON file not found: {json_path}")
try:
with json_path.open("r", encoding="utf-8") as fh:
raw = fh.read()
except OSError as exc:
return _reject(f"failed to read JSON {json_path}: {exc}")
try:
extraction = parse_extraction(raw)
except ValidationError as exc:
return _reject(f"validation error: {exc}")
except (ValueError, json.JSONDecodeError) as exc:
return _reject(f"validation error (json parse): {exc}")
try:
meta = _load_meta(meta_path)
except (FileNotFoundError, OSError) as exc:
return _reject(f"_meta.yaml not found: {exc}")
except (ValueError, yaml.YAMLError) as exc:
return _reject(f"_meta.yaml invalid: {exc}")
try:
calendar = load_calendar(calendar_path)
except (FileNotFoundError, OSError) as exc:
return _reject(f"calendar not found: {exc}")
except (ValueError, yaml.YAMLError) as exc:
return _reject(f"calendar invalid: {exc}")
try:
existing = _read_existing_rows(csv_path)
except OSError as exc:
return _reject(f"failed to read existing CSV {csv_path}: {exc}")
key = (extraction.screenshot_file, source)
for r in existing: for r in existing:
if (r.get("screenshot_file"), r.get("source")) == key: if (r.get("screenshot_file"), r.get("source")) == key:
if on_duplicate == "skip": return _reject(
return r f"duplicate row: screenshot_file={key[0]!r} source={key[1]!r}"
raise ValueError(
f"duplicate row: screenshot_file={key[0]!r} source={key[1]!r} "
f"already exists in {csv_path}"
) )
existing.append(row) row_id = _next_id(existing)
_atomic_write(csv_path, existing) extracted_at = (
return row datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S") + "Z"
def append_row_from_json(
json_path: Path,
source: str,
csv_path: Path,
meta_path: Path,
calendar_path: Path,
on_duplicate: Literal["raise", "skip"] = "raise",
) -> dict[str, str]:
"""Convenience wrapper: load JSON, validate, append."""
with Path(json_path).open("r", encoding="utf-8") as fh:
payload = json.load(fh)
extraction = parse_extraction_dict(payload)
return append_row(
extraction=extraction,
source=source,
csv_path=csv_path,
meta_path=meta_path,
calendar_path=calendar_path,
on_duplicate=on_duplicate,
) )
try:
row = _build_row(
extraction,
source=source,
row_id=row_id,
meta=meta,
calendar=calendar,
extracted_at=extracted_at,
)
except (KeyError, ValueError) as exc:
return _reject(f"derived-field computation failed: {exc}")
try:
_write_csv_atomic(csv_path, [*existing, row], list(CSV_COLUMNS))
except OSError as exc:
return _reject(
f"atomic write failed: {exc}\n{traceback.format_exc()}"
)
return {"status": "ok", "reason": "", "id": row_id, "row": row}

240
scripts/regenerate_md.py Normal file
View File

@@ -0,0 +1,240 @@
"""Regenerate ``data/jurnal.md`` from ``data/jurnal.csv``.
CSV is the source of truth (29 columns, schema owned by ``scripts.append_row``).
MD is a human-readable mirror with a curated 18-column table.
CLI: ``python scripts/regenerate_md.py [csv_path] [md_path]``
"""
from __future__ import annotations
import csv
import os
import sys
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Sequence
from scripts.append_row import csv_columns
__all__ = ["MD_COLUMNS", "regenerate_md", "main"]
MD_COLUMNS: tuple[str, ...] = (
"#",
"Data",
"Zi",
"Ora RO",
"Set",
"Instrument",
"Direcție",
"Calitate",
"Entry",
"SL",
"TP0",
"TP1",
"TP2",
"outcome_path",
"P/L (Marius)",
"P/L (theoretic)",
"Source",
"Note",
)
_CSV_FIELDS_USED: tuple[str, ...] = (
"id",
"data",
"zi",
"ora_ro",
"set",
"instrument",
"directie",
"calitate",
"entry",
"sl",
"tp0",
"tp1",
"tp2",
"outcome_path",
"pl_marius",
"pl_theoretical",
"source",
"note",
)
_DIRECTIE_DISPLAY = {"long": "Buy", "short": "Sell", "buy": "Buy", "sell": "Sell"}
def _fmt_pl(value: str) -> str:
if value is None or value == "":
return "pending"
try:
return f"{float(value):+.2f}"
except ValueError:
return value
def _fmt_directie(value: str) -> str:
if not value:
return ""
return _DIRECTIE_DISPLAY.get(value.strip().lower(), value)
def _escape_cell(value: str) -> str:
return (value or "").replace("|", "\\|").replace("\n", " ").strip()
def _placeholder_md() -> str:
return (
"# Jurnal M2D (auto-generated)\n"
"\n"
"*Niciun trade încă. Adaugă unul prin `/m2d-log` sau `/backtest`.*\n"
)
def _atomic_write_text(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_name = tempfile.mkstemp(
prefix=path.name + ".", suffix=".tmp", dir=str(path.parent)
)
try:
with os.fdopen(fd, "w", encoding="utf-8", newline="\n") as fh:
fh.write(content)
os.replace(tmp_name, path)
except Exception:
try:
os.unlink(tmp_name)
except OSError:
pass
raise
def _row_to_cells(row: dict[str, str], display_index: int) -> tuple[str, ...]:
g = row.get
return (
str(display_index),
g("data", "") or "",
g("zi", "") or "",
g("ora_ro", "") or "",
g("set", "") or "",
g("instrument", "") or "",
_fmt_directie(g("directie", "") or ""),
g("calitate", "") or "",
g("entry", "") or "",
g("sl", "") or "",
g("tp0", "") or "",
g("tp1", "") or "",
g("tp2", "") or "",
g("outcome_path", "") or "",
_fmt_pl(g("pl_marius", "") or ""),
_fmt_pl(g("pl_theoretical", "") or ""),
g("source", "") or "",
g("note", "") or "",
)
def _render_table(rows: Sequence[dict[str, str]]) -> str:
header_line = "| " + " | ".join(MD_COLUMNS) + " |"
sep_line = "|" + "|".join(["---"] * len(MD_COLUMNS)) + "|"
data_lines = []
for i, row in enumerate(rows, start=1):
cells = _row_to_cells(row, i)
data_lines.append(
"| " + " | ".join(_escape_cell(c) for c in cells) + " |"
)
return "\n".join([header_line, sep_line, *data_lines])
def _render_md(rows: Sequence[dict[str, str]]) -> str:
if not rows:
return _placeholder_md()
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
table = _render_table(rows)
return (
"# Jurnal M2D (auto-generated from data/jurnal.csv)\n"
"\n"
f"Generated: {now_iso}\n"
f"Rows: {len(rows)}\n"
"\n"
f"{table}\n"
"\n"
"*Vezi `data/jurnal.csv` pentru toate cele 29 coloane "
"(id, ora_utc, tf_*, risc_pct, be_moved, max_reached, versions, extracted_at).*\n"
)
def _id_sort_key(raw: str) -> tuple[int, int | str]:
try:
return (0, int(raw))
except (ValueError, TypeError):
return (1, raw or "")
def _load_rows(csv_path: Path) -> list[dict[str, str]]:
"""Read CSV, returning rows sorted by id.
Schema drift handling:
- Extra header columns → warning to stderr, dropped.
- Missing required header columns → warning to stderr per affected row (row skipped).
"""
if not csv_path.exists() or csv_path.stat().st_size == 0:
return []
expected = set(csv_columns())
required = set(_CSV_FIELDS_USED)
with csv_path.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
header = reader.fieldnames or []
header_set = set(header)
extras = [c for c in header if c not in expected]
if extras:
print(
f"regenerate_md: warning: unknown CSV columns ignored: {extras}",
file=sys.stderr,
)
missing_required = required - header_set
rows: list[dict[str, str]] = []
for raw in reader:
if missing_required:
print(
f"regenerate_md: warning: row skipped (missing required "
f"columns: {sorted(missing_required)})",
file=sys.stderr,
)
continue
rows.append({k: (raw.get(k) or "") for k in required})
rows.sort(key=lambda r: _id_sort_key(r.get("id", "")))
return rows
def regenerate_md(
csv_path: Path | str = "data/jurnal.csv",
md_path: Path | str = "data/jurnal.md",
) -> int:
"""Read CSV → write MD atomically. Returns count of trade rows written."""
csv_p = Path(csv_path)
md_p = Path(md_path)
rows = _load_rows(csv_p)
content = _render_md(rows)
_atomic_write_text(md_p, content)
return len(rows)
def main() -> int:
args = sys.argv[1:]
csv_arg = args[0] if len(args) >= 1 else "data/jurnal.csv"
md_arg = args[1] if len(args) >= 2 else "data/jurnal.md"
n = regenerate_md(csv_arg, md_arg)
print(f"regenerate_md: wrote {md_arg} with {n} row(s)")
return 0
if __name__ == "__main__":
raise SystemExit(main())

540
scripts/stats.py Normal file
View File

@@ -0,0 +1,540 @@
"""Backtest statistics for ``data/jurnal.csv``.
Outputs:
- Overall + per-Set + per-calitate + per-instrument WR, expectancy.
- Wilson 95% CI for WR (closed form).
- Bootstrap percentile 95% CI for expectancy (deterministic via ``seed``).
- ``--calibration`` mode: joins ``manual_calibration`` rows with their
``vision_calibration`` counterparts on ``screenshot_file`` and reports
field-by-field mismatch rates for the P4 gate (see ``STOPPING_RULE.md``).
A "win" is any trade with ``pl_marius > 0``. Pending trades
(``pl_marius`` blank, i.e. ``outcome_path in {pending, TP0->pending}``) are
excluded from both WR and expectancy: there is no realised outcome yet.
The ``calitate`` field is a known-biased descriptor (post-outcome
classification — see ``STOPPING_RULE.md`` §3). It is reported as
informational only and explicitly flagged as such; do NOT use it as a
filter for GO LIVE decisions.
"""
from __future__ import annotations
import argparse
import csv
import math
import random
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable
__all__ = [
"CORE_CALIBRATION_FIELDS",
"BACKTEST_SOURCES",
"CALIBRATION_SOURCES",
"Trade",
"GroupStats",
"load_trades",
"wilson_ci",
"bootstrap_ci",
"win_rate",
"expectancy",
"group_by",
"compute_group_stats",
"calibration_mismatch",
"format_report",
"main",
]
# Fields compared in the calibration mismatch gate (STOPPING_RULE.md §P4).
CORE_CALIBRATION_FIELDS: tuple[str, ...] = (
"entry",
"sl",
"tp0",
"tp1",
"tp2",
"outcome_path",
"max_reached",
"directie",
)
BACKTEST_SOURCES: frozenset[str] = frozenset({"vision", "manual"})
CALIBRATION_SOURCES: frozenset[str] = frozenset(
{"manual_calibration", "vision_calibration"}
)
# ---------------------------------------------------------------------------
# Loading / typed access
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class Trade:
"""One realised (or pending) trade row, typed."""
id: int
screenshot_file: str
source: str
data: str
zi: str
ora_ro: str
instrument: str
directie: str
calitate: str
set: str
outcome_path: str
max_reached: str
be_moved: bool
pl_marius: float | None
pl_theoretical: float
raw: dict[str, str] = field(default_factory=dict)
@property
def is_pending(self) -> bool:
return self.pl_marius is None
@property
def is_win(self) -> bool:
return self.pl_marius is not None and self.pl_marius > 0
def _parse_optional_float(value: str) -> float | None:
s = (value or "").strip()
if s == "":
return None
return float(s)
def _parse_bool(value: str) -> bool:
return (value or "").strip().lower() in {"true", "1", "yes", "da"}
def _row_to_trade(row: dict[str, str]) -> Trade:
return Trade(
id=int(row.get("id") or 0),
screenshot_file=row.get("screenshot_file", ""),
source=row.get("source", ""),
data=row.get("data", ""),
zi=row.get("zi", ""),
ora_ro=row.get("ora_ro", ""),
instrument=row.get("instrument", ""),
directie=row.get("directie", ""),
calitate=row.get("calitate", ""),
set=row.get("set", ""),
outcome_path=row.get("outcome_path", ""),
max_reached=row.get("max_reached", ""),
be_moved=_parse_bool(row.get("be_moved", "")),
pl_marius=_parse_optional_float(row.get("pl_marius", "")),
pl_theoretical=float(row.get("pl_theoretical") or 0.0),
raw=dict(row),
)
def load_trades(csv_path: Path | str) -> list[Trade]:
"""Load all rows of ``csv_path`` as :class:`Trade` objects.
Returns ``[]`` if the file does not exist or is empty.
"""
p = Path(csv_path)
if not p.exists() or p.stat().st_size == 0:
return []
with p.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
return [_row_to_trade(r) for r in reader]
# ---------------------------------------------------------------------------
# Statistics primitives
# ---------------------------------------------------------------------------
def wilson_ci(wins: int, n: int, z: float = 1.96) -> tuple[float, float]:
"""Wilson score interval for a binomial proportion.
Returns ``(lo, hi)`` as proportions in [0, 1]. For ``n == 0`` returns
``(0.0, 0.0)``. ``z = 1.96`` corresponds to a 95% CI.
"""
if n <= 0:
return (0.0, 0.0)
if wins < 0 or wins > n:
raise ValueError(f"wins={wins} out of range for n={n}")
p_hat = wins / n
denom = 1.0 + (z * z) / n
center = p_hat + (z * z) / (2.0 * n)
half = z * math.sqrt((p_hat * (1.0 - p_hat) + (z * z) / (4.0 * n)) / n)
lo = (center - half) / denom
hi = (center + half) / denom
return (max(0.0, lo), min(1.0, hi))
def bootstrap_ci(
values: list[float],
*,
iterations: int = 2000,
alpha: float = 0.05,
seed: int | None = None,
) -> tuple[float, float]:
"""Percentile-method bootstrap CI for the mean of ``values``.
Deterministic when ``seed`` is provided. Returns ``(lo, hi)``. For
``len(values) < 2`` returns ``(mean, mean)``.
"""
if not values:
return (0.0, 0.0)
n = len(values)
mean = sum(values) / n
if n < 2 or iterations <= 0:
return (mean, mean)
rng = random.Random(seed)
means: list[float] = []
for _ in range(iterations):
s = 0.0
for _ in range(n):
s += values[rng.randrange(n)]
means.append(s / n)
means.sort()
lo_idx = int(math.floor((alpha / 2.0) * iterations))
hi_idx = int(math.ceil((1.0 - alpha / 2.0) * iterations)) - 1
lo_idx = max(0, min(iterations - 1, lo_idx))
hi_idx = max(0, min(iterations - 1, hi_idx))
return (means[lo_idx], means[hi_idx])
def win_rate(trades: Iterable[Trade]) -> tuple[int, int, float]:
"""Return ``(wins, n_resolved, wr)`` ignoring pending trades."""
resolved = [t for t in trades if not t.is_pending]
wins = sum(1 for t in resolved if t.is_win)
n = len(resolved)
wr = (wins / n) if n else 0.0
return wins, n, wr
def expectancy(trades: Iterable[Trade], overlay: str = "pl_marius") -> float:
"""Mean P/L (in R) over non-pending trades, on the given overlay."""
if overlay not in {"pl_marius", "pl_theoretical"}:
raise ValueError(f"unknown overlay {overlay!r}")
if overlay == "pl_marius":
vals = [t.pl_marius for t in trades if t.pl_marius is not None]
else:
vals = [t.pl_theoretical for t in trades if not t.is_pending]
if not vals:
return 0.0
return sum(vals) / len(vals)
# ---------------------------------------------------------------------------
# Group stats
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class GroupStats:
key: str
n_total: int
n_resolved: int
wins: int
wr: float
wr_ci_lo: float
wr_ci_hi: float
exp_marius: float
exp_marius_ci_lo: float
exp_marius_ci_hi: float
exp_theoretical: float
exp_theoretical_ci_lo: float
exp_theoretical_ci_hi: float
def group_by(trades: Iterable[Trade], field_name: str) -> dict[str, list[Trade]]:
out: dict[str, list[Trade]] = {}
for t in trades:
key = getattr(t, field_name, "") or "(blank)"
out.setdefault(key, []).append(t)
return out
def compute_group_stats(
trades: list[Trade],
*,
label: str,
bootstrap_iterations: int = 2000,
seed: int | None = None,
) -> GroupStats:
wins, n_resolved, wr = win_rate(trades)
wr_lo, wr_hi = wilson_ci(wins, n_resolved)
pl_m_vals = [t.pl_marius for t in trades if t.pl_marius is not None]
exp_m = (sum(pl_m_vals) / len(pl_m_vals)) if pl_m_vals else 0.0
exp_m_lo, exp_m_hi = bootstrap_ci(
pl_m_vals, iterations=bootstrap_iterations, seed=seed
)
pl_t_vals = [t.pl_theoretical for t in trades if not t.is_pending]
exp_t = (sum(pl_t_vals) / len(pl_t_vals)) if pl_t_vals else 0.0
exp_t_lo, exp_t_hi = bootstrap_ci(
pl_t_vals,
iterations=bootstrap_iterations,
seed=None if seed is None else seed + 1,
)
return GroupStats(
key=label,
n_total=len(trades),
n_resolved=n_resolved,
wins=wins,
wr=wr,
wr_ci_lo=wr_lo,
wr_ci_hi=wr_hi,
exp_marius=exp_m,
exp_marius_ci_lo=exp_m_lo,
exp_marius_ci_hi=exp_m_hi,
exp_theoretical=exp_t,
exp_theoretical_ci_lo=exp_t_lo,
exp_theoretical_ci_hi=exp_t_hi,
)
# ---------------------------------------------------------------------------
# Calibration mode
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class CalibrationReport:
pairs: int
field_mismatches: dict[str, int]
total_comparisons: int
@property
def overall_mismatch_rate(self) -> float:
if self.total_comparisons == 0:
return 0.0
total = sum(self.field_mismatches.values())
return total / self.total_comparisons
def _normalise_for_compare(field_name: str, value: str) -> str:
s = (value or "").strip()
if field_name in {"entry", "sl", "tp0", "tp1", "tp2"}:
try:
return f"{float(s):.4f}"
except ValueError:
return s
return s
def calibration_mismatch(
trades: Iterable[Trade],
*,
fields: tuple[str, ...] = CORE_CALIBRATION_FIELDS,
) -> CalibrationReport:
"""Pair ``manual_calibration`` and ``vision_calibration`` rows by
``screenshot_file``, then count mismatches per ``fields``.
Returns a :class:`CalibrationReport`. Unpaired calibration rows are
silently ignored — they cannot contribute to a comparison.
"""
manual: dict[str, Trade] = {}
vision: dict[str, Trade] = {}
for t in trades:
if t.source == "manual_calibration":
manual[t.screenshot_file] = t
elif t.source == "vision_calibration":
vision[t.screenshot_file] = t
paired_files = sorted(set(manual) & set(vision))
field_mismatches: dict[str, int] = {f: 0 for f in fields}
for f in paired_files:
m = manual[f]
v = vision[f]
for fld in fields:
mv = _normalise_for_compare(fld, m.raw.get(fld, ""))
vv = _normalise_for_compare(fld, v.raw.get(fld, ""))
if mv != vv:
field_mismatches[fld] += 1
total_comparisons = len(paired_files) * len(fields)
return CalibrationReport(
pairs=len(paired_files),
field_mismatches=field_mismatches,
total_comparisons=total_comparisons,
)
# ---------------------------------------------------------------------------
# Reporting
# ---------------------------------------------------------------------------
def _fmt_pct(p: float) -> str:
return f"{100.0 * p:5.1f}%"
def _fmt_r(x: float) -> str:
return f"{x:+.3f}R"
def _fmt_stats_row(s: GroupStats) -> str:
return (
f"{s.key:<14} N={s.n_total:>3} (resolved {s.n_resolved:>3}) "
f"WR={_fmt_pct(s.wr)} [{_fmt_pct(s.wr_ci_lo)}, {_fmt_pct(s.wr_ci_hi)}] "
f"E_marius={_fmt_r(s.exp_marius)} "
f"[{_fmt_r(s.exp_marius_ci_lo)}, {_fmt_r(s.exp_marius_ci_hi)}] "
f"E_theor={_fmt_r(s.exp_theoretical)}"
)
def format_report(
trades: list[Trade],
*,
bootstrap_iterations: int = 2000,
seed: int | None = None,
) -> str:
"""Render the main stats report.
Only ``source in {vision, manual}`` rows are included in the WR /
expectancy computations; calibration rows are reported separately via
``--calibration``.
"""
backtest = [t for t in trades if t.source in BACKTEST_SOURCES]
lines: list[str] = []
lines.append("=== M2D Backtest Stats ===")
lines.append(f"Backtest rows: {len(backtest)} (calibration excluded)")
lines.append("")
if not backtest:
lines.append("(no backtest trades yet)")
return "\n".join(lines)
overall = compute_group_stats(
backtest,
label="OVERALL",
bootstrap_iterations=bootstrap_iterations,
seed=seed,
)
lines.append("-- Overall --")
lines.append(_fmt_stats_row(overall))
lines.append("")
def _emit_group(title: str, field_name: str, key_order: list[str] | None = None) -> None:
lines.append(f"-- By {title} --")
groups = group_by(backtest, field_name)
keys = key_order if key_order is not None else sorted(groups)
for k in keys:
if k not in groups:
continue
sub_seed = None if seed is None else seed + abs(hash(k)) % 10_000
s = compute_group_stats(
groups[k],
label=k,
bootstrap_iterations=bootstrap_iterations,
seed=sub_seed,
)
lines.append(_fmt_stats_row(s))
lines.append("")
_emit_group(
"Set",
"set",
key_order=["A1", "A2", "A3", "B", "C", "D", "Other"],
)
_emit_group("Instrument", "instrument")
lines.append(
"[!] By calitate — descriptor only (post-outcome, biased; do not use "
"as a GO LIVE filter — see STOPPING_RULE.md §3)."
)
_emit_group(
"calitate",
"calitate",
key_order=["Clară", "Mai mare ca impuls", "Slabă", "n/a"],
)
return "\n".join(lines).rstrip() + "\n"
def format_calibration_report(trades: list[Trade]) -> str:
cal = calibration_mismatch(trades)
lines: list[str] = []
lines.append("=== Calibration P4 gate ===")
lines.append(f"Paired screenshots (manual ∩ vision): {cal.pairs}")
if cal.pairs == 0:
lines.append("(no calibration pairs yet)")
return "\n".join(lines) + "\n"
lines.append("")
lines.append(f"{'field':<14} mismatches / pairs rate")
for fld in CORE_CALIBRATION_FIELDS:
m = cal.field_mismatches.get(fld, 0)
rate = (m / cal.pairs) if cal.pairs else 0.0
lines.append(f"{fld:<14} {m:>3} / {cal.pairs:<3} {_fmt_pct(rate)}")
lines.append("")
lines.append(
f"Overall mismatch rate: {_fmt_pct(cal.overall_mismatch_rate)} "
f"({sum(cal.field_mismatches.values())} of {cal.total_comparisons} comparisons)"
)
threshold = 0.10
verdict = "PASS" if cal.overall_mismatch_rate <= threshold else "FAIL"
lines.append(f"P4 gate (<= 10%): {verdict}")
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
prog="stats",
description="Backtest statistics for data/jurnal.csv",
)
parser.add_argument(
"--csv",
type=Path,
default=Path("data/jurnal.csv"),
help="Path to the jurnal CSV (default: data/jurnal.csv).",
)
parser.add_argument(
"--calibration",
action="store_true",
help="Show P4 calibration mismatch report instead of backtest stats.",
)
parser.add_argument(
"--bootstrap-iterations",
type=int,
default=2000,
help="Bootstrap iterations for expectancy CI (default: 2000).",
)
parser.add_argument(
"--seed",
type=int,
default=None,
help="Seed for the bootstrap RNG (set for deterministic output).",
)
args = parser.parse_args(argv)
trades = load_trades(args.csv)
if args.calibration:
out = format_calibration_report(trades)
else:
out = format_report(
trades,
bootstrap_iterations=args.bootstrap_iterations,
seed=args.seed,
)
# Force UTF-8 on stdout: the report contains diacritics ("Clară", "Slabă")
# and a console codepage like cp1252 would crash on those.
try:
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
except (AttributeError, OSError):
pass
sys.stdout.write(out)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -1,26 +1,26 @@
"""Tests for scripts/append_row.py.""" """Tests for scripts/append_row.py — append_extraction pipeline."""
from __future__ import annotations from __future__ import annotations
import csv import csv
import json import json
import re
import sys import sys
from datetime import datetime
from pathlib import Path from pathlib import Path
import pytest import pytest
from pydantic import ValidationError import yaml
sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from scripts.append_row import ( # noqa: E402 from scripts.append_row import ( # noqa: E402
CSV_COLUMNS, CSV_COLUMNS,
VALID_SOURCES, VALID_SOURCES,
append_row, ZI_RO_MAP,
append_row_from_json, append_extraction,
build_row, csv_columns,
read_rows,
) )
from scripts.vision_schema import parse_extraction_dict # noqa: E402
REPO_ROOT = Path(__file__).resolve().parent.parent REPO_ROOT = Path(__file__).resolve().parent.parent
@@ -29,12 +29,12 @@ META_PATH = REPO_ROOT / "data" / "_meta.yaml"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# fixtures / payload helpers # helpers / fixtures
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _buy_payload(**overrides) -> dict: def _buy_payload(**overrides) -> dict:
# 2026-05-13 14:23 UTC == 17:23 RO (EEST, Wed) → Set A2. # 2026-05-13 14:23 UTC == 17:23 RO (EEST, Wed) → set A2, zi=Mi.
base = { base = {
"screenshot_file": "dia-2026-05-13-1.png", "screenshot_file": "dia-2026-05-13-1.png",
"data": "2026-05-13", "data": "2026-05-13",
@@ -61,198 +61,227 @@ def _buy_payload(**overrides) -> dict:
return base return base
def _write_payload(tmp_path: Path, name: str, **overrides) -> Path:
p = tmp_path / name
p.write_text(json.dumps(_buy_payload(**overrides)), encoding="utf-8")
return p
def _read_rows(csv_path: Path) -> list[dict[str, str]]:
with csv_path.open("r", encoding="utf-8", newline="") as fh:
return list(csv.DictReader(fh))
@pytest.fixture @pytest.fixture
def csv_path(tmp_path: Path) -> Path: def csv_path(tmp_path: Path) -> Path:
return tmp_path / "trades.csv" return tmp_path / "jurnal.csv"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# build_row — computed fields # schema / column layout
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class TestBuildRow: def test_csv_columns_canonical_29() -> None:
def setup_method(self) -> None: cols = csv_columns()
import yaml assert len(cols) == 29
with META_PATH.open("r", encoding="utf-8") as fh: assert cols[0] == "id"
self.meta = yaml.safe_load(fh) assert cols[-1] == "note"
from scripts.calendar_parse import load_calendar assert cols == list(CSV_COLUMNS)
self.calendar = load_calendar(CALENDAR_PATH)
def test_happy_path_computed_fields(self) -> None:
extr = parse_extraction_dict(_buy_payload())
row = build_row(extr, "manual", self.meta, self.calendar)
# 14:23 UTC on 2026-05-13 = 17:23 RO (EEST), Wed → A2
assert row["ora_ro"] == "17:23"
assert row["zi"] == "Wed"
assert row["set"] == "A2"
# pl_marius for TP0->TP1 with be_moved=True is +0.50R
assert float(row["pl_marius"]) == pytest.approx(0.50)
# pl_theoretical for max_reached=TP1 is 0.333
assert float(row["pl_theoretical"]) == pytest.approx(0.333)
# version stamps copied from meta
assert row["indicator_version"] == str(self.meta["indicator_version"])
assert row["pl_overlay_version"] == str(self.meta["pl_overlay_version"])
assert row["csv_schema_version"] == str(self.meta["csv_schema_version"])
def test_pending_overlay_is_blank(self) -> None:
extr = parse_extraction_dict(
_buy_payload(outcome_path="pending", max_reached="TP0")
)
row = build_row(extr, "vision", self.meta, self.calendar)
# pl_marius returns None for pending → empty string in CSV
assert row["pl_marius"] == ""
# pl_theoretical always concrete
assert row["pl_theoretical"] != ""
def test_invalid_source_rejected(self) -> None:
extr = parse_extraction_dict(_buy_payload())
with pytest.raises(ValueError):
build_row(extr, "auto_magic", self.meta, self.calendar)
def test_all_valid_sources_accepted(self) -> None:
extr = parse_extraction_dict(_buy_payload())
for s in VALID_SOURCES:
row = build_row(extr, s, self.meta, self.calendar)
assert row["source"] == s
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# append_row — happy path, dedup, atomic writes # core tests as specified in task #9
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class TestAppendRow: def test_happy_path(tmp_path: Path, csv_path: Path) -> None:
def test_happy_path_writes_header_and_row(self, csv_path: Path) -> None: j = _write_payload(tmp_path, "t.json")
extr = parse_extraction_dict(_buy_payload()) result = append_extraction(
row = append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH) j, "vision", csv_path, META_PATH, CALENDAR_PATH
assert csv_path.exists() )
assert result["status"] == "ok", result
assert result["reason"] == ""
assert result["id"] == 1
with csv_path.open("r", encoding="utf-8", newline="") as fh: rows = _read_rows(csv_path)
reader = csv.DictReader(fh) assert len(rows) == 1
assert reader.fieldnames == list(CSV_COLUMNS) r = rows[0]
rows = list(reader) assert r["id"] == "1"
assert len(rows) == 1 assert r["screenshot_file"] == "dia-2026-05-13-1.png"
assert rows[0]["screenshot_file"] == row["screenshot_file"] assert r["source"] == "vision"
assert rows[0]["set"] == "A2" assert r["data"] == "2026-05-13"
assert rows[0]["source"] == "manual" assert r["zi"] == "Mi"
assert r["ora_ro"] == "17:23"
assert r["ora_utc"] == "14:23"
assert r["set"] == "A2"
assert r["instrument"] == "DIA"
assert r["directie"] == "Buy"
assert r["be_moved"] == "True"
def test_two_distinct_rows(self, csv_path: Path) -> None:
e1 = parse_extraction_dict(_buy_payload(screenshot_file="a.png"))
e2 = parse_extraction_dict(_buy_payload(screenshot_file="b.png"))
append_row(e1, "manual", csv_path, META_PATH, CALENDAR_PATH)
append_row(e2, "manual", csv_path, META_PATH, CALENDAR_PATH)
rows = read_rows(csv_path)
assert len(rows) == 2
assert {r["screenshot_file"] for r in rows} == {"a.png", "b.png"}
def test_dedup_raises(self, csv_path: Path) -> None: def test_pl_calc_overlay(tmp_path: Path, csv_path: Path) -> None:
extr = parse_extraction_dict(_buy_payload()) """outcome_path=TP0->TP1, max_reached=TP1 → pl_marius=0.5, pl_theoretical=0.333."""
append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH) j = _write_payload(tmp_path, "t.json")
with pytest.raises(ValueError, match="duplicate"): result = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH) assert result["status"] == "ok"
# CSV still contains exactly the one row r = _read_rows(csv_path)[0]
assert len(read_rows(csv_path)) == 1 assert float(r["pl_marius"]) == pytest.approx(0.50)
assert float(r["pl_theoretical"]) == pytest.approx(0.333)
def test_dedup_skip(self, csv_path: Path) -> None:
extr = parse_extraction_dict(_buy_payload())
first = append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH)
# Mutate the extraction; the existing row should be returned untouched.
extr2 = parse_extraction_dict(_buy_payload(note="changed"))
existing = append_row(
extr2, "manual", csv_path, META_PATH, CALENDAR_PATH, on_duplicate="skip"
)
assert existing["note"] == first["note"] == ""
assert len(read_rows(csv_path)) == 1
def test_calibration_coexistence(self, csv_path: Path) -> None: def test_dedup_same_source(tmp_path: Path, csv_path: Path) -> None:
"""manual_calibration + vision_calibration on the SAME screenshot must coexist.""" j = _write_payload(tmp_path, "t.json")
extr = parse_extraction_dict(_buy_payload()) r1 = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
append_row(extr, "manual_calibration", csv_path, META_PATH, CALENDAR_PATH) r2 = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
# Vision leg may differ slightly — change entry by 0.1, still valid. assert r1["status"] == "ok"
extr_vision = parse_extraction_dict( assert r2["status"] == "rejected"
_buy_payload(entry=400.1, confidence="medium") assert "duplicate" in r2["reason"].lower()
) assert r2["id"] is None
append_row( assert r2["row"] is None
extr_vision, "vision_calibration", csv_path, META_PATH, CALENDAR_PATH assert len(_read_rows(csv_path)) == 1
)
rows = read_rows(csv_path)
assert len(rows) == 2
sources = {r["source"] for r in rows}
assert sources == {"manual_calibration", "vision_calibration"}
# Same screenshot, different source ⇒ no dedup collision.
files = {r["screenshot_file"] for r in rows}
assert files == {extr.screenshot_file}
def test_calibration_duplicate_same_source_rejected( def test_dedup_different_source_ok(tmp_path: Path, csv_path: Path) -> None:
self, csv_path: Path """Same screenshot_file + different source ⇒ both rows accepted."""
) -> None: j = _write_payload(tmp_path, "t.json")
extr = parse_extraction_dict(_buy_payload()) r1 = append_extraction(
append_row(extr, "manual_calibration", csv_path, META_PATH, CALENDAR_PATH) j, "manual_calibration", csv_path, META_PATH, CALENDAR_PATH
with pytest.raises(ValueError, match="duplicate"): )
append_row( r2 = append_extraction(
extr, "manual_calibration", csv_path, META_PATH, CALENDAR_PATH j, "vision_calibration", csv_path, META_PATH, CALENDAR_PATH
) )
assert r1["status"] == "ok"
assert r2["status"] == "ok"
rows = _read_rows(csv_path)
assert len(rows) == 2
assert {r["source"] for r in rows} == {"manual_calibration", "vision_calibration"}
# Distinct sequential ids.
assert {r["id"] for r in rows} == {"1", "2"}
def test_invalid_pydantic_rejected(tmp_path: Path, csv_path: Path) -> None:
"""entry == sl is rejected by pydantic; no CSV is written."""
j = _write_payload(tmp_path, "bad.json", entry=399.0, sl=399.0)
result = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
assert result["status"] == "rejected"
assert "validation" in result["reason"].lower()
assert not csv_path.exists()
def test_missing_json_file(tmp_path: Path, csv_path: Path) -> None:
missing = tmp_path / "ghost.json"
result = append_extraction(
missing, "vision", csv_path, META_PATH, CALENDAR_PATH
)
assert result["status"] == "rejected"
assert "not found" in result["reason"].lower()
assert not csv_path.exists()
def test_id_increments(tmp_path: Path, csv_path: Path) -> None:
paths = [
_write_payload(tmp_path, "a.json", screenshot_file="a.png"),
_write_payload(tmp_path, "b.json", screenshot_file="b.png"),
_write_payload(tmp_path, "c.json", screenshot_file="c.png"),
]
ids = []
for p in paths:
r = append_extraction(p, "vision", csv_path, META_PATH, CALENDAR_PATH)
assert r["status"] == "ok"
ids.append(r["id"])
assert ids == [1, 2, 3]
csv_ids = [int(r["id"]) for r in _read_rows(csv_path)]
assert csv_ids == [1, 2, 3]
def test_set_a2(tmp_path: Path, csv_path: Path) -> None:
"""Wed 2026-05-13 14:30 UTC → 17:30 RO → A2 sweet spot."""
j = _write_payload(tmp_path, "t.json", ora_utc="14:30")
r = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
assert r["status"] == "ok"
row = _read_rows(csv_path)[0]
assert row["ora_ro"] == "17:30"
assert row["zi"] == "Mi"
assert row["set"] == "A2"
def test_set_c_fomc(tmp_path: Path, csv_path: Path) -> None:
"""2026-04-29 18:35 UTC == 21:35 RO (FOMC Powell Press window) → Set C."""
j = _write_payload(
tmp_path,
"t.json",
data="2026-04-29",
ora_utc="18:35",
screenshot_file="fomc-apr.png",
)
r = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
assert r["status"] == "ok"
row = _read_rows(csv_path)[0]
assert row["ora_ro"] == "21:35"
assert row["set"] == "C"
def test_versions_stamped(tmp_path: Path, csv_path: Path) -> None:
j = _write_payload(tmp_path, "t.json")
append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
row = _read_rows(csv_path)[0]
meta = yaml.safe_load(META_PATH.read_text(encoding="utf-8"))
assert row["indicator_version"] == str(meta["indicator_version"])
assert row["pl_overlay_version"] == str(meta["pl_overlay_version"])
assert row["csv_schema_version"] == str(meta["csv_schema_version"])
def test_extracted_at_format(tmp_path: Path, csv_path: Path) -> None:
j = _write_payload(tmp_path, "t.json")
append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
val = _read_rows(csv_path)[0]["extracted_at"]
# ISO 8601 UTC with trailing 'Z': YYYY-MM-DDTHH:MM:SSZ
assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", val), val
# Round-trip through datetime.fromisoformat after dropping the Z.
parsed = datetime.fromisoformat(val[:-1])
assert parsed.year >= 2026
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Cross-field invalid input # additional safety nets
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class TestInvalidInput: def test_invalid_source_rejected(tmp_path: Path, csv_path: Path) -> None:
def test_buy_with_inverted_tp_rejected_before_append( j = _write_payload(tmp_path, "t.json")
self, csv_path: Path r = append_extraction(j, "auto_magic", csv_path, META_PATH, CALENDAR_PATH)
) -> None: assert r["status"] == "rejected"
# tp1 < tp0 violates Buy ordering: caught at validation, not by append_row. assert "source" in r["reason"].lower()
with pytest.raises(ValidationError): assert not csv_path.exists()
parse_extraction_dict(
_buy_payload(tp0=401.0, tp1=400.5, tp2=402.0)
)
assert not csv_path.exists() # nothing written
def test_outcome_path_sl_with_tp1_max_rejected(self, csv_path: Path) -> None:
with pytest.raises(ValidationError):
parse_extraction_dict(
_buy_payload(outcome_path="SL", max_reached="TP1")
)
assert not csv_path.exists()
def test_append_row_from_json_invalid_payload(
self, tmp_path: Path, csv_path: Path
) -> None:
bad = tmp_path / "bad.json"
payload = _buy_payload(directie="Long") # invalid Literal
bad.write_text(json.dumps(payload), encoding="utf-8")
with pytest.raises(ValidationError):
append_row_from_json(
bad, "vision", csv_path, META_PATH, CALENDAR_PATH
)
assert not csv_path.exists()
# --------------------------------------------------------------------------- def test_all_valid_sources_accepted(tmp_path: Path, csv_path: Path) -> None:
# Atomic write: no temp file remains on disk for i, src in enumerate(sorted(VALID_SOURCES)):
# --------------------------------------------------------------------------- j = _write_payload(tmp_path, f"t{i}.json", screenshot_file=f"s{i}.png")
r = append_extraction(j, src, csv_path, META_PATH, CALENDAR_PATH)
assert r["status"] == "ok", (src, r)
rows = _read_rows(csv_path)
assert {r["source"] for r in rows} == set(VALID_SOURCES)
class TestAtomicWrite: def test_atomic_write_leaves_no_tmp(tmp_path: Path, csv_path: Path) -> None:
def test_no_temp_file_left_behind(self, csv_path: Path) -> None: j = _write_payload(tmp_path, "t.json")
extr = parse_extraction_dict(_buy_payload()) append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH) leftovers = [p for p in csv_path.parent.iterdir() if p.name.endswith(".tmp")]
leftovers = [ assert leftovers == []
p for p in csv_path.parent.iterdir() if p.name.endswith(".tmp")
]
assert leftovers == []
def test_append_row_from_json_roundtrip(
self, tmp_path: Path, csv_path: Path def test_zi_ro_map_covers_all_weekdays() -> None:
) -> None: """Internal sanity: the Romanian-day map covers all 7 short weekday names."""
good = tmp_path / "good.json" assert set(ZI_RO_MAP.keys()) == {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"}
good.write_text(json.dumps(_buy_payload()), encoding="utf-8") assert set(ZI_RO_MAP.values()) == {"Lu", "Ma", "Mi", "Jo", "Vi", "Sa", "Du"}
row = append_row_from_json(
good, "vision", csv_path, META_PATH, CALENDAR_PATH
) def test_malformed_json_rejected(tmp_path: Path, csv_path: Path) -> None:
assert row["source"] == "vision" bad = tmp_path / "broken.json"
assert read_rows(csv_path)[0]["screenshot_file"] == row["screenshot_file"] bad.write_text("{not valid json", encoding="utf-8")
r = append_extraction(bad, "vision", csv_path, META_PATH, CALENDAR_PATH)
assert r["status"] == "rejected"
assert "validation" in r["reason"].lower() or "json" in r["reason"].lower()
assert not csv_path.exists()

208
tests/test_regenerate_md.py Normal file
View File

@@ -0,0 +1,208 @@
"""Tests for scripts/regenerate_md.py."""
from __future__ import annotations
import csv
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from scripts.append_row import csv_columns # noqa: E402
from scripts.regenerate_md import MD_COLUMNS, regenerate_md # noqa: E402
def _row(**overrides: str) -> dict[str, str]:
base = {
"id": "1",
"screenshot_file": "2026-05-13_dia_5min.png",
"source": "vision",
"data": "2026-05-13",
"zi": "Mi",
"ora_ro": "17:23",
"ora_utc": "14:23",
"instrument": "DIA",
"directie": "long",
"tf_mare": "5min",
"tf_mic": "1min",
"calitate": "Clară",
"entry": "497.42",
"sl": "496.80",
"tp0": "497.67",
"tp1": "497.79",
"tp2": "498.04",
"risc_pct": "0.50",
"outcome_path": "TP0→TP1",
"max_reached": "TP1",
"be_moved": "true",
"pl_marius": "0.5000",
"pl_theoretical": "0.3330",
"set": "A2",
"indicator_version": "1",
"pl_overlay_version": "1",
"csv_schema_version": "1",
"extracted_at": "2026-05-13T14:30:00Z",
"note": "",
}
base.update(overrides)
return base
def _write_csv(
path: Path,
rows: list[dict[str, str]],
extra_columns: list[str] | None = None,
) -> None:
fieldnames = csv_columns()
if extra_columns:
fieldnames = fieldnames + extra_columns
with path.open("w", encoding="utf-8", newline="") as fh:
writer = csv.DictWriter(fh, fieldnames=fieldnames)
writer.writeheader()
for r in rows:
writer.writerow({k: r.get(k, "") for k in fieldnames})
def _data_lines(md_text: str) -> list[str]:
header_prefix = "| " + MD_COLUMNS[0] + " | " + MD_COLUMNS[1]
return [
ln
for ln in md_text.splitlines()
if ln.startswith("|")
and not ln.startswith(header_prefix)
and not ln.startswith("|---")
]
def test_empty_csv_placeholder(tmp_path: Path) -> None:
csv_p = tmp_path / "jurnal.csv"
md_p = tmp_path / "jurnal.md"
_write_csv(csv_p, [])
n = regenerate_md(csv_p, md_p)
assert n == 0
content = md_p.read_text(encoding="utf-8")
assert "# Jurnal M2D (auto-generated)" in content
assert "Niciun trade încă" in content
assert "| # |" not in content
def test_missing_csv_placeholder(tmp_path: Path) -> None:
csv_p = tmp_path / "does_not_exist.csv"
md_p = tmp_path / "jurnal.md"
n = regenerate_md(csv_p, md_p)
assert n == 0
content = md_p.read_text(encoding="utf-8")
assert "Niciun trade încă" in content
assert md_p.exists()
def test_single_row_format(tmp_path: Path) -> None:
csv_p = tmp_path / "jurnal.csv"
md_p = tmp_path / "jurnal.md"
_write_csv(csv_p, [_row()])
n = regenerate_md(csv_p, md_p)
assert n == 1
content = md_p.read_text(encoding="utf-8")
assert "# Jurnal M2D (auto-generated from data/jurnal.csv)" in content
assert "Rows: 1" in content
header_line = "| " + " | ".join(MD_COLUMNS) + " |"
assert header_line in content
rows = _data_lines(content)
assert len(rows) == 1
cells = [c.strip() for c in rows[0].strip("|").split("|")]
assert cells[0] == "1"
assert cells[1] == "2026-05-13"
assert cells[2] == "Mi"
assert cells[3] == "17:23"
assert cells[4] == "A2"
assert cells[5] == "DIA"
assert cells[6] == "Buy"
assert cells[7] == "Clară"
assert cells[13] == "TP0→TP1"
assert cells[14] == "+0.50"
assert cells[15] == "+0.33"
assert cells[16] == "vision"
def test_three_rows(tmp_path: Path) -> None:
csv_p = tmp_path / "jurnal.csv"
md_p = tmp_path / "jurnal.md"
rows = [
_row(id="3", data="2026-05-15", pl_marius="-1.0000"),
_row(id="1", data="2026-05-13"),
_row(id="2", data="2026-05-14", pl_marius="0.2000"),
]
_write_csv(csv_p, rows)
n = regenerate_md(csv_p, md_p)
assert n == 3
content = md_p.read_text(encoding="utf-8")
assert "Rows: 3" in content
data = _data_lines(content)
assert len(data) == 3
assert "| 1 | 2026-05-13 |" in data[0]
assert "| 2 | 2026-05-14 |" in data[1]
assert "| 3 | 2026-05-15 |" in data[2]
def test_pending_pl_displayed(tmp_path: Path) -> None:
csv_p = tmp_path / "jurnal.csv"
md_p = tmp_path / "jurnal.md"
_write_csv(csv_p, [_row(pl_marius="", pl_theoretical="")])
n = regenerate_md(csv_p, md_p)
assert n == 1
content = md_p.read_text(encoding="utf-8")
rows = _data_lines(content)
cells = [c.strip() for c in rows[0].strip("|").split("|")]
assert cells[14] == "pending"
assert cells[15] == "pending"
def test_unknown_column_graceful(
tmp_path: Path, capsys: pytest.CaptureFixture[str]
) -> None:
csv_p = tmp_path / "jurnal.csv"
md_p = tmp_path / "jurnal.md"
_write_csv(csv_p, [_row()], extra_columns=["extra_field"])
n = regenerate_md(csv_p, md_p)
assert n == 1
content = md_p.read_text(encoding="utf-8")
assert "Rows: 1" in content
captured = capsys.readouterr()
assert "unknown CSV columns ignored" in captured.err
assert "extra_field" in captured.err
def test_atomic_write_no_tmp_leftover(tmp_path: Path) -> None:
csv_p = tmp_path / "jurnal.csv"
md_p = tmp_path / "jurnal.md"
_write_csv(csv_p, [_row()])
regenerate_md(csv_p, md_p)
leftovers = list(tmp_path.glob("*.tmp"))
assert leftovers == []
assert md_p.exists()
def test_rows_count_returned(tmp_path: Path) -> None:
csv_p = tmp_path / "jurnal.csv"
md_p = tmp_path / "jurnal.md"
_write_csv(csv_p, [_row(id=str(i)) for i in range(1, 6)])
n = regenerate_md(csv_p, md_p)
assert n == 5

469
tests/test_stats.py Normal file
View File

@@ -0,0 +1,469 @@
"""Tests for scripts/stats.py."""
from __future__ import annotations
import csv
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from scripts.append_row import CSV_COLUMNS # noqa: E402
from scripts.stats import ( # noqa: E402
BACKTEST_SOURCES,
CORE_CALIBRATION_FIELDS,
bootstrap_ci,
calibration_mismatch,
compute_group_stats,
expectancy,
format_calibration_report,
format_report,
group_by,
load_trades,
main,
win_rate,
wilson_ci,
)
# ---------------------------------------------------------------------------
# Synthetic CSV fixture: 30 trades
# ---------------------------------------------------------------------------
def _base_row(**overrides) -> dict[str, str]:
base = {
"id": "0",
"screenshot_file": "",
"source": "vision",
"data": "2026-05-13",
"zi": "Mi",
"ora_ro": "17:30",
"ora_utc": "14:30",
"instrument": "DIA",
"directie": "Buy",
"tf_mare": "5min",
"tf_mic": "1min",
"calitate": "Clară",
"entry": "400.0",
"sl": "399.0",
"tp0": "400.5",
"tp1": "401.0",
"tp2": "402.0",
"risc_pct": "0.25",
"outcome_path": "TP0→TP1",
"max_reached": "TP1",
"be_moved": "True",
"pl_marius": "0.5000",
"pl_theoretical": "0.3330",
"set": "A2",
"indicator_version": "v-2026-05",
"pl_overlay_version": "marius-v1",
"csv_schema_version": "1",
"extracted_at": "2026-05-13T10:00:00Z",
"note": "",
}
base.update({k: str(v) for k, v in overrides.items()})
return base
def _write_csv(path: Path, rows: list[dict[str, str]]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="") as fh:
w = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS))
w.writeheader()
for r in rows:
w.writerow({k: r.get(k, "") for k in CSV_COLUMNS})
def _synthetic_30(tmp_path: Path) -> Path:
"""30 vision-source trades engineered for known stats.
Layout (by Set):
- A1: 10 trades — 6 wins TP0->TP1 (+0.5), 4 losses SL (-1.0) → WR 60%
- A2: 10 trades — 7 wins TP0->TP2 (+0.5), 3 losses SL (-1.0) → WR 70%
- A3: 10 trades — 4 wins TP0->TP1 (+0.5), 6 losses SL (-1.0) → WR 40%
Overall: 17 wins / 30, WR ≈ 56.67%.
"""
rows: list[dict[str, str]] = []
rid = 0
def add(set_label: str, n_win: int, n_loss: int, calitate: str = "Clară") -> None:
nonlocal rid
for _ in range(n_win):
rid += 1
rows.append(
_base_row(
id=rid,
screenshot_file=f"win-{rid}.png",
set=set_label,
calitate=calitate,
outcome_path="TP0→TP1",
max_reached="TP1",
be_moved="True",
pl_marius="0.5000",
pl_theoretical="0.3330",
)
)
for _ in range(n_loss):
rid += 1
rows.append(
_base_row(
id=rid,
screenshot_file=f"loss-{rid}.png",
set=set_label,
calitate=calitate,
outcome_path="SL",
max_reached="SL_first",
be_moved="False",
pl_marius="-1.0000",
pl_theoretical="-1.0000",
)
)
add("A1", 6, 4)
add("A2", 7, 3)
add("A3", 4, 6)
path = tmp_path / "jurnal.csv"
_write_csv(path, rows)
return path
# ---------------------------------------------------------------------------
# Wilson CI — reference values
# ---------------------------------------------------------------------------
class TestWilsonCI:
def test_n_zero(self) -> None:
assert wilson_ci(0, 0) == (0.0, 0.0)
def test_50pct_at_n40(self) -> None:
lo, hi = wilson_ci(20, 40)
assert lo == pytest.approx(0.3519927879709976, abs=1e-9)
assert hi == pytest.approx(0.6480072120290024, abs=1e-9)
def test_55pct_at_n40(self) -> None:
lo, hi = wilson_ci(22, 40)
assert lo == pytest.approx(0.3982882988844078, abs=1e-9)
assert hi == pytest.approx(0.6929492471905531, abs=1e-9)
def test_55pct_at_n100(self) -> None:
# Larger N tightens the CI; lower bound rises above 45%.
lo, hi = wilson_ci(55, 100)
assert lo == pytest.approx(0.4524442703164345, abs=1e-9)
assert hi == pytest.approx(0.6438562489359655, abs=1e-9)
assert lo > 0.45 # STOPPING_RULE GO-LIVE gate
def test_zero_wins(self) -> None:
lo, hi = wilson_ci(0, 10)
assert lo == pytest.approx(0.0, abs=1e-12)
assert hi == pytest.approx(0.2775401687666165, abs=1e-9)
def test_all_wins(self) -> None:
lo, hi = wilson_ci(10, 10)
assert lo == pytest.approx(0.7224598312333834, abs=1e-9)
assert hi == pytest.approx(1.0, abs=1e-12)
def test_wins_out_of_range(self) -> None:
with pytest.raises(ValueError):
wilson_ci(11, 10)
with pytest.raises(ValueError):
wilson_ci(-1, 10)
# ---------------------------------------------------------------------------
# Bootstrap CI — determinism + sanity
# ---------------------------------------------------------------------------
class TestBootstrapCI:
def test_deterministic_with_seed(self) -> None:
vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5]
lo1, hi1 = bootstrap_ci(vals, iterations=500, seed=42)
lo2, hi2 = bootstrap_ci(vals, iterations=500, seed=42)
assert (lo1, hi1) == (lo2, hi2)
def test_different_seed_different_result(self) -> None:
vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5]
r1 = bootstrap_ci(vals, iterations=500, seed=1)
r2 = bootstrap_ci(vals, iterations=500, seed=2)
assert r1 != r2
def test_brackets_the_mean(self) -> None:
vals = [0.5, -1.0, 0.5, 0.5, -1.0, 0.2, -0.3, 0.5, -1.0, 0.5] * 5
mean = sum(vals) / len(vals)
lo, hi = bootstrap_ci(vals, iterations=1000, seed=7)
assert lo <= mean <= hi
def test_empty_input(self) -> None:
assert bootstrap_ci([], iterations=100, seed=0) == (0.0, 0.0)
def test_single_value(self) -> None:
lo, hi = bootstrap_ci([0.5], iterations=100, seed=0)
# No variance with n=1: short-circuited to (mean, mean).
assert lo == pytest.approx(0.5)
assert hi == pytest.approx(0.5)
# ---------------------------------------------------------------------------
# Loading + group stats on the 30-trade fixture
# ---------------------------------------------------------------------------
class TestSyntheticFixture:
def test_load_30(self, tmp_path: Path) -> None:
path = _synthetic_30(tmp_path)
trades = load_trades(path)
assert len(trades) == 30
assert all(t.source == "vision" for t in trades)
def test_overall_wr(self, tmp_path: Path) -> None:
trades = load_trades(_synthetic_30(tmp_path))
wins, n, wr = win_rate(trades)
assert wins == 17
assert n == 30
assert wr == pytest.approx(17 / 30)
def test_overall_expectancy(self, tmp_path: Path) -> None:
trades = load_trades(_synthetic_30(tmp_path))
# 17 wins * 0.5 + 13 losses * -1.0 = 8.5 - 13.0 = -4.5 → mean = -0.15
assert expectancy(trades) == pytest.approx(-0.15, abs=1e-9)
def test_per_set_wr(self, tmp_path: Path) -> None:
trades = load_trades(_synthetic_30(tmp_path))
by_set = group_by(trades, "set")
wr_a1 = win_rate(by_set["A1"])[2]
wr_a2 = win_rate(by_set["A2"])[2]
wr_a3 = win_rate(by_set["A3"])[2]
assert wr_a1 == pytest.approx(0.60)
assert wr_a2 == pytest.approx(0.70)
assert wr_a3 == pytest.approx(0.40)
def test_group_stats_a2(self, tmp_path: Path) -> None:
trades = load_trades(_synthetic_30(tmp_path))
a2 = [t for t in trades if t.set == "A2"]
s = compute_group_stats(
a2, label="A2", bootstrap_iterations=500, seed=11
)
assert s.n_total == 10
assert s.n_resolved == 10
assert s.wins == 7
assert s.wr == pytest.approx(0.70)
# Wilson 7/10
assert s.wr_ci_lo == pytest.approx(0.3967732199795652, abs=1e-9)
assert s.wr_ci_hi == pytest.approx(0.892210712513788, abs=1e-9)
# Expectancy A2 = 7*0.5 + 3*(-1.0) = 0.5 → mean = 0.05
assert s.exp_marius == pytest.approx(0.05, abs=1e-9)
assert s.exp_marius_ci_lo <= s.exp_marius <= s.exp_marius_ci_hi
# ---------------------------------------------------------------------------
# Pending-trade handling
# ---------------------------------------------------------------------------
class TestPendingHandling:
def test_pending_excluded_from_wr(self, tmp_path: Path) -> None:
rows = [
_base_row(
id=1, screenshot_file="a.png",
outcome_path="TP0→TP1", max_reached="TP1",
be_moved="True", pl_marius="0.5000", pl_theoretical="0.3330",
),
_base_row(
id=2, screenshot_file="b.png",
outcome_path="pending", max_reached="TP0",
be_moved="False", pl_marius="", pl_theoretical="0.1330",
),
_base_row(
id=3, screenshot_file="c.png",
outcome_path="SL", max_reached="SL_first",
be_moved="False", pl_marius="-1.0000", pl_theoretical="-1.0000",
),
]
p = tmp_path / "j.csv"
_write_csv(p, rows)
trades = load_trades(p)
wins, n, wr = win_rate(trades)
assert wins == 1
assert n == 2 # pending excluded
assert wr == pytest.approx(0.5)
# Expectancy on pl_marius averages only resolved rows: (0.5 + -1.0) / 2 = -0.25
assert expectancy(trades, "pl_marius") == pytest.approx(-0.25)
# ---------------------------------------------------------------------------
# Source filtering: calibration rows excluded from main report
# ---------------------------------------------------------------------------
class TestSourceFiltering:
def test_calibration_rows_excluded_from_backtest_stats(
self, tmp_path: Path
) -> None:
rows = [
_base_row(id=1, source="vision", screenshot_file="v.png",
pl_marius="0.5000"),
_base_row(id=2, source="manual", screenshot_file="m.png",
pl_marius="0.5000"),
_base_row(id=3, source="manual_calibration", screenshot_file="c.png",
pl_marius="-1.0000"),
_base_row(id=4, source="vision_calibration", screenshot_file="c.png",
pl_marius="-1.0000"),
]
p = tmp_path / "j.csv"
_write_csv(p, rows)
trades = load_trades(p)
backtest = [t for t in trades if t.source in BACKTEST_SOURCES]
assert len(backtest) == 2
wins, n, wr = win_rate(backtest)
assert (wins, n) == (2, 2)
assert wr == pytest.approx(1.0)
# ---------------------------------------------------------------------------
# Calibration mode: pairing + mismatch
# ---------------------------------------------------------------------------
class TestCalibration:
def test_pairs_and_zero_mismatch(self, tmp_path: Path) -> None:
m = _base_row(
id=1, source="manual_calibration", screenshot_file="cal-1.png"
)
v = _base_row(
id=2, source="vision_calibration", screenshot_file="cal-1.png"
)
p = tmp_path / "j.csv"
_write_csv(p, [m, v])
trades = load_trades(p)
rep = calibration_mismatch(trades)
assert rep.pairs == 1
assert sum(rep.field_mismatches.values()) == 0
assert rep.overall_mismatch_rate == 0.0
def test_one_field_mismatch(self, tmp_path: Path) -> None:
m = _base_row(
id=1, source="manual_calibration", screenshot_file="cal-1.png",
entry="400.0",
)
v = _base_row(
id=2, source="vision_calibration", screenshot_file="cal-1.png",
entry="400.10", # different entry
)
p = tmp_path / "j.csv"
_write_csv(p, [m, v])
trades = load_trades(p)
rep = calibration_mismatch(trades)
assert rep.pairs == 1
assert rep.field_mismatches["entry"] == 1
# all other core fields match
for fld in CORE_CALIBRATION_FIELDS:
if fld == "entry":
continue
assert rep.field_mismatches[fld] == 0
# 1 mismatch / (1 pair * 8 fields) = 12.5%
assert rep.overall_mismatch_rate == pytest.approx(1.0 / len(CORE_CALIBRATION_FIELDS))
def test_unpaired_rows_ignored(self, tmp_path: Path) -> None:
# Only a manual leg — no pair → 0 pairs.
m = _base_row(
id=1, source="manual_calibration", screenshot_file="lonely.png"
)
p = tmp_path / "j.csv"
_write_csv(p, [m])
trades = load_trades(p)
rep = calibration_mismatch(trades)
assert rep.pairs == 0
assert rep.total_comparisons == 0
assert rep.overall_mismatch_rate == 0.0
def test_numeric_equivalence_tolerated(self, tmp_path: Path) -> None:
"""'400' and '400.0000' should NOT count as a mismatch on entry."""
m = _base_row(
id=1, source="manual_calibration", screenshot_file="cal-1.png",
entry="400",
)
v = _base_row(
id=2, source="vision_calibration", screenshot_file="cal-1.png",
entry="400.0000",
)
p = tmp_path / "j.csv"
_write_csv(p, [m, v])
rep = calibration_mismatch(load_trades(p))
assert rep.field_mismatches["entry"] == 0
# ---------------------------------------------------------------------------
# Report formatting + CLI
# ---------------------------------------------------------------------------
class TestReporting:
def test_format_report_contains_sections(self, tmp_path: Path) -> None:
out = format_report(
load_trades(_synthetic_30(tmp_path)),
bootstrap_iterations=200,
seed=0,
)
assert "M2D Backtest Stats" in out
assert "Overall" in out
assert "By Set" in out
assert "A1" in out and "A2" in out and "A3" in out
# calitate warning present
assert "descriptor only" in out.lower() or "biased" in out.lower()
def test_format_calibration_report(self, tmp_path: Path) -> None:
rows = [
_base_row(
id=1, source="manual_calibration", screenshot_file="cal-1.png"
),
_base_row(
id=2, source="vision_calibration", screenshot_file="cal-1.png",
directie="Sell", # mismatch on directie
entry="400.0", sl="401.0", tp0="399.5", tp1="399.0", tp2="398.0",
),
]
p = tmp_path / "j.csv"
_write_csv(p, rows)
out = format_calibration_report(load_trades(p))
assert "Paired screenshots" in out
assert "directie" in out
# 1 mismatch (directie) of 8 fields = 12.5% → FAIL P4 gate
assert "FAIL" in out
def test_empty_csv_report(self, tmp_path: Path) -> None:
p = tmp_path / "empty.csv"
_write_csv(p, [])
out = format_report(load_trades(p))
assert "no backtest trades" in out.lower()
def test_main_cli_runs(
self, tmp_path: Path, capsys: pytest.CaptureFixture
) -> None:
path = _synthetic_30(tmp_path)
rc = main(["--csv", str(path), "--seed", "0", "--bootstrap-iterations", "100"])
assert rc == 0
captured = capsys.readouterr()
assert "M2D Backtest Stats" in captured.out
def test_main_cli_calibration(
self, tmp_path: Path, capsys: pytest.CaptureFixture
) -> None:
rows = [
_base_row(id=1, source="manual_calibration", screenshot_file="cal-1.png"),
_base_row(id=2, source="vision_calibration", screenshot_file="cal-1.png"),
]
p = tmp_path / "j.csv"
_write_csv(p, rows)
rc = main(["--csv", str(p), "--calibration"])
assert rc == 0
out = capsys.readouterr().out
assert "Calibration P4 gate" in out
assert "PASS" in out # all fields match → PASS