commands: m2d-log + backtest + batch + stats slash commands (124 tests pass)
This commit is contained in:
801
scripts/stats.py
801
scripts/stats.py
@@ -1,21 +1,20 @@
|
||||
"""Backtest statistics for ``data/jurnal.csv``.
|
||||
|
||||
Outputs:
|
||||
- Overall + per-Set + per-calitate + per-instrument WR, expectancy.
|
||||
- Wilson 95% CI for WR (closed form).
|
||||
- Bootstrap percentile 95% CI for expectancy (deterministic via ``seed``).
|
||||
- ``--calibration`` mode: joins ``manual_calibration`` rows with their
|
||||
``vision_calibration`` counterparts on ``screenshot_file`` and reports
|
||||
field-by-field mismatch rates for the P4 gate (see ``STOPPING_RULE.md``).
|
||||
Public API:
|
||||
- ``compute_stats(csv_path, overlay) -> dict``
|
||||
- ``render_stats(stats, overlay) -> str``
|
||||
- ``compute_calibration(csv_path) -> dict``
|
||||
- ``render_calibration(cal) -> str``
|
||||
- ``main()`` — CLI entry point.
|
||||
|
||||
A "win" is any trade with ``pl_marius > 0``. Pending trades
|
||||
(``pl_marius`` blank, i.e. ``outcome_path in {pending, TP0->pending}``) are
|
||||
excluded from both WR and expectancy: there is no realised outcome yet.
|
||||
A "win" is a closed trade with ``pl_overlay > 0`` (where ``pl_overlay`` is
|
||||
either ``pl_marius`` or ``pl_theoretical``). Pending trades — ``pl_marius``
|
||||
blank, i.e. ``outcome_path in {pending, TP0->pending}`` — are excluded from
|
||||
both WR and expectancy: there is no realised outcome yet.
|
||||
|
||||
The ``calitate`` field is a known-biased descriptor (post-outcome
|
||||
classification — see ``STOPPING_RULE.md`` §3). It is reported as
|
||||
informational only and explicitly flagged as such; do NOT use it as a
|
||||
filter for GO LIVE decisions.
|
||||
The ``calitate`` field is a known-biased descriptor: it is classified
|
||||
post-outcome (see ``STOPPING_RULE.md`` §3). The per-``calitate`` split is
|
||||
reported with an explicit *descriptor only — biased post-outcome* caveat.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -23,32 +22,42 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import csv
|
||||
import math
|
||||
import random
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
from typing import Any, Iterable
|
||||
|
||||
import numpy as np
|
||||
|
||||
from scripts.append_row import CSV_COLUMNS
|
||||
|
||||
__all__ = [
|
||||
"CORE_CALIBRATION_FIELDS",
|
||||
"BACKTEST_SOURCES",
|
||||
"CALIBRATION_SOURCES",
|
||||
"Trade",
|
||||
"GroupStats",
|
||||
"load_trades",
|
||||
"CORE_CALIBRATION_FIELDS",
|
||||
"NUMERIC_CALIBRATION_FIELDS",
|
||||
"STOPPING_RULE_N",
|
||||
"wilson_ci",
|
||||
"bootstrap_ci",
|
||||
"win_rate",
|
||||
"expectancy",
|
||||
"group_by",
|
||||
"compute_group_stats",
|
||||
"calibration_mismatch",
|
||||
"format_report",
|
||||
"bootstrap_expectancy_ci",
|
||||
"compute_stats",
|
||||
"render_stats",
|
||||
"compute_calibration",
|
||||
"render_calibration",
|
||||
"main",
|
||||
]
|
||||
|
||||
|
||||
# Fields compared in the calibration mismatch gate (STOPPING_RULE.md §P4).
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
BACKTEST_SOURCES: frozenset[str] = frozenset({"vision", "manual"})
|
||||
CALIBRATION_SOURCES: frozenset[str] = frozenset(
|
||||
{"manual_calibration", "vision_calibration"}
|
||||
)
|
||||
|
||||
|
||||
# Calibration P4 gate (STOPPING_RULE.md §P4) — explicitly reported per field.
|
||||
CORE_CALIBRATION_FIELDS: tuple[str, ...] = (
|
||||
"entry",
|
||||
"sl",
|
||||
@@ -58,315 +67,205 @@ CORE_CALIBRATION_FIELDS: tuple[str, ...] = (
|
||||
"outcome_path",
|
||||
"max_reached",
|
||||
"directie",
|
||||
"instrument",
|
||||
)
|
||||
|
||||
|
||||
BACKTEST_SOURCES: frozenset[str] = frozenset({"vision", "manual"})
|
||||
CALIBRATION_SOURCES: frozenset[str] = frozenset(
|
||||
{"manual_calibration", "vision_calibration"}
|
||||
NUMERIC_CALIBRATION_FIELDS: frozenset[str] = frozenset(
|
||||
{"entry", "sl", "tp0", "tp1", "tp2"}
|
||||
)
|
||||
|
||||
|
||||
# STOPPING_RULE.md §"GO LIVE" gate: N >= 40 per Set.
|
||||
STOPPING_RULE_N: int = 40
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Loading / typed access
|
||||
# Loading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Trade:
|
||||
"""One realised (or pending) trade row, typed."""
|
||||
|
||||
id: int
|
||||
screenshot_file: str
|
||||
source: str
|
||||
data: str
|
||||
zi: str
|
||||
ora_ro: str
|
||||
instrument: str
|
||||
directie: str
|
||||
calitate: str
|
||||
set: str
|
||||
outcome_path: str
|
||||
max_reached: str
|
||||
be_moved: bool
|
||||
pl_marius: float | None
|
||||
pl_theoretical: float
|
||||
raw: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def is_pending(self) -> bool:
|
||||
return self.pl_marius is None
|
||||
|
||||
@property
|
||||
def is_win(self) -> bool:
|
||||
return self.pl_marius is not None and self.pl_marius > 0
|
||||
|
||||
|
||||
def _parse_optional_float(value: str) -> float | None:
|
||||
s = (value or "").strip()
|
||||
if s == "":
|
||||
return None
|
||||
return float(s)
|
||||
try:
|
||||
return float(s)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_bool(value: str) -> bool:
|
||||
return (value or "").strip().lower() in {"true", "1", "yes", "da"}
|
||||
|
||||
|
||||
def _row_to_trade(row: dict[str, str]) -> Trade:
|
||||
return Trade(
|
||||
id=int(row.get("id") or 0),
|
||||
screenshot_file=row.get("screenshot_file", ""),
|
||||
source=row.get("source", ""),
|
||||
data=row.get("data", ""),
|
||||
zi=row.get("zi", ""),
|
||||
ora_ro=row.get("ora_ro", ""),
|
||||
instrument=row.get("instrument", ""),
|
||||
directie=row.get("directie", ""),
|
||||
calitate=row.get("calitate", ""),
|
||||
set=row.get("set", ""),
|
||||
outcome_path=row.get("outcome_path", ""),
|
||||
max_reached=row.get("max_reached", ""),
|
||||
be_moved=_parse_bool(row.get("be_moved", "")),
|
||||
pl_marius=_parse_optional_float(row.get("pl_marius", "")),
|
||||
pl_theoretical=float(row.get("pl_theoretical") or 0.0),
|
||||
raw=dict(row),
|
||||
)
|
||||
|
||||
|
||||
def load_trades(csv_path: Path | str) -> list[Trade]:
|
||||
"""Load all rows of ``csv_path`` as :class:`Trade` objects.
|
||||
|
||||
Returns ``[]`` if the file does not exist or is empty.
|
||||
"""
|
||||
def _load_rows(csv_path: Path | str) -> list[dict[str, str]]:
|
||||
p = Path(csv_path)
|
||||
if not p.exists() or p.stat().st_size == 0:
|
||||
return []
|
||||
with p.open("r", encoding="utf-8", newline="") as fh:
|
||||
reader = csv.DictReader(fh)
|
||||
return [_row_to_trade(r) for r in reader]
|
||||
return list(csv.DictReader(fh))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Statistics primitives
|
||||
# CI primitives
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def wilson_ci(wins: int, n: int, z: float = 1.96) -> tuple[float, float]:
|
||||
"""Wilson score interval for a binomial proportion.
|
||||
|
||||
Returns ``(lo, hi)`` as proportions in [0, 1]. For ``n == 0`` returns
|
||||
``(0.0, 0.0)``. ``z = 1.96`` corresponds to a 95% CI.
|
||||
Returns ``(lo, hi)`` clamped to ``[0.0, 1.0]``. For ``n == 0`` returns
|
||||
``(0.0, 0.0)``. ``z = 1.96`` ≈ 95% confidence.
|
||||
"""
|
||||
if n <= 0:
|
||||
return (0.0, 0.0)
|
||||
if wins < 0 or wins > n:
|
||||
raise ValueError(f"wins={wins} out of range for n={n}")
|
||||
p_hat = wins / n
|
||||
p = wins / n
|
||||
denom = 1.0 + (z * z) / n
|
||||
center = p_hat + (z * z) / (2.0 * n)
|
||||
half = z * math.sqrt((p_hat * (1.0 - p_hat) + (z * z) / (4.0 * n)) / n)
|
||||
lo = (center - half) / denom
|
||||
hi = (center + half) / denom
|
||||
return (max(0.0, lo), min(1.0, hi))
|
||||
center = (p + (z * z) / (2.0 * n)) / denom
|
||||
spread = z * math.sqrt(p * (1.0 - p) / n + (z * z) / (4.0 * n * n)) / denom
|
||||
return (max(0.0, center - spread), min(1.0, center + spread))
|
||||
|
||||
|
||||
def bootstrap_ci(
|
||||
values: list[float],
|
||||
*,
|
||||
iterations: int = 2000,
|
||||
alpha: float = 0.05,
|
||||
seed: int | None = None,
|
||||
def bootstrap_expectancy_ci(
|
||||
values: list[float] | np.ndarray,
|
||||
n_resamples: int = 5000,
|
||||
seed: int = 42,
|
||||
) -> tuple[float, float]:
|
||||
"""Percentile-method bootstrap CI for the mean of ``values``.
|
||||
"""Percentile-method bootstrap 95% CI for the mean of ``values``.
|
||||
|
||||
Deterministic when ``seed`` is provided. Returns ``(lo, hi)``. For
|
||||
``len(values) < 2`` returns ``(mean, mean)``.
|
||||
Deterministic for a given ``seed``. Empty input → ``(0.0, 0.0)``.
|
||||
Single value → ``(value, value)`` (no variance to resample).
|
||||
"""
|
||||
if not values:
|
||||
arr = np.asarray(list(values), dtype=float)
|
||||
if arr.size == 0:
|
||||
return (0.0, 0.0)
|
||||
n = len(values)
|
||||
mean = sum(values) / n
|
||||
if n < 2 or iterations <= 0:
|
||||
return (mean, mean)
|
||||
|
||||
rng = random.Random(seed)
|
||||
means: list[float] = []
|
||||
for _ in range(iterations):
|
||||
s = 0.0
|
||||
for _ in range(n):
|
||||
s += values[rng.randrange(n)]
|
||||
means.append(s / n)
|
||||
means.sort()
|
||||
lo_idx = int(math.floor((alpha / 2.0) * iterations))
|
||||
hi_idx = int(math.ceil((1.0 - alpha / 2.0) * iterations)) - 1
|
||||
lo_idx = max(0, min(iterations - 1, lo_idx))
|
||||
hi_idx = max(0, min(iterations - 1, hi_idx))
|
||||
return (means[lo_idx], means[hi_idx])
|
||||
if arr.size == 1:
|
||||
v = float(arr[0])
|
||||
return (v, v)
|
||||
rng = np.random.default_rng(seed)
|
||||
boots = np.empty(n_resamples, dtype=float)
|
||||
n = arr.size
|
||||
for i in range(n_resamples):
|
||||
idx = rng.integers(0, n, size=n)
|
||||
boots[i] = float(arr[idx].mean())
|
||||
lo = float(np.percentile(boots, 2.5))
|
||||
hi = float(np.percentile(boots, 97.5))
|
||||
return (lo, hi)
|
||||
|
||||
|
||||
def win_rate(trades: Iterable[Trade]) -> tuple[int, int, float]:
|
||||
"""Return ``(wins, n_resolved, wr)`` ignoring pending trades."""
|
||||
resolved = [t for t in trades if not t.is_pending]
|
||||
wins = sum(1 for t in resolved if t.is_win)
|
||||
n = len(resolved)
|
||||
# ---------------------------------------------------------------------------
|
||||
# compute_stats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _group_stats(
|
||||
overlay_values: list[float | None],
|
||||
*,
|
||||
include_ci: bool,
|
||||
bootstrap_seed: int,
|
||||
) -> dict[str, Any]:
|
||||
closed = [v for v in overlay_values if v is not None]
|
||||
n = len(closed)
|
||||
wins = sum(1 for v in closed if v > 0)
|
||||
wr = (wins / n) if n else 0.0
|
||||
return wins, n, wr
|
||||
|
||||
|
||||
def expectancy(trades: Iterable[Trade], overlay: str = "pl_marius") -> float:
|
||||
"""Mean P/L (in R) over non-pending trades, on the given overlay."""
|
||||
if overlay not in {"pl_marius", "pl_theoretical"}:
|
||||
raise ValueError(f"unknown overlay {overlay!r}")
|
||||
if overlay == "pl_marius":
|
||||
vals = [t.pl_marius for t in trades if t.pl_marius is not None]
|
||||
else:
|
||||
vals = [t.pl_theoretical for t in trades if not t.is_pending]
|
||||
if not vals:
|
||||
return 0.0
|
||||
return sum(vals) / len(vals)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Group stats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GroupStats:
|
||||
key: str
|
||||
n_total: int
|
||||
n_resolved: int
|
||||
wins: int
|
||||
wr: float
|
||||
wr_ci_lo: float
|
||||
wr_ci_hi: float
|
||||
exp_marius: float
|
||||
exp_marius_ci_lo: float
|
||||
exp_marius_ci_hi: float
|
||||
exp_theoretical: float
|
||||
exp_theoretical_ci_lo: float
|
||||
exp_theoretical_ci_hi: float
|
||||
|
||||
|
||||
def group_by(trades: Iterable[Trade], field_name: str) -> dict[str, list[Trade]]:
|
||||
out: dict[str, list[Trade]] = {}
|
||||
for t in trades:
|
||||
key = getattr(t, field_name, "") or "(blank)"
|
||||
out.setdefault(key, []).append(t)
|
||||
out: dict[str, Any] = {
|
||||
"n": n,
|
||||
"wr": wr,
|
||||
"expectancy": (sum(closed) / n) if n else 0.0,
|
||||
}
|
||||
if include_ci:
|
||||
out["wr_ci_95"] = wilson_ci(wins, n)
|
||||
out["expectancy_ci_95"] = bootstrap_expectancy_ci(
|
||||
closed, seed=bootstrap_seed
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def compute_group_stats(
|
||||
trades: list[Trade],
|
||||
*,
|
||||
label: str,
|
||||
bootstrap_iterations: int = 2000,
|
||||
seed: int | None = None,
|
||||
) -> GroupStats:
|
||||
wins, n_resolved, wr = win_rate(trades)
|
||||
wr_lo, wr_hi = wilson_ci(wins, n_resolved)
|
||||
|
||||
pl_m_vals = [t.pl_marius for t in trades if t.pl_marius is not None]
|
||||
exp_m = (sum(pl_m_vals) / len(pl_m_vals)) if pl_m_vals else 0.0
|
||||
exp_m_lo, exp_m_hi = bootstrap_ci(
|
||||
pl_m_vals, iterations=bootstrap_iterations, seed=seed
|
||||
)
|
||||
|
||||
pl_t_vals = [t.pl_theoretical for t in trades if not t.is_pending]
|
||||
exp_t = (sum(pl_t_vals) / len(pl_t_vals)) if pl_t_vals else 0.0
|
||||
exp_t_lo, exp_t_hi = bootstrap_ci(
|
||||
pl_t_vals,
|
||||
iterations=bootstrap_iterations,
|
||||
seed=None if seed is None else seed + 1,
|
||||
)
|
||||
|
||||
return GroupStats(
|
||||
key=label,
|
||||
n_total=len(trades),
|
||||
n_resolved=n_resolved,
|
||||
wins=wins,
|
||||
wr=wr,
|
||||
wr_ci_lo=wr_lo,
|
||||
wr_ci_hi=wr_hi,
|
||||
exp_marius=exp_m,
|
||||
exp_marius_ci_lo=exp_m_lo,
|
||||
exp_marius_ci_hi=exp_m_hi,
|
||||
exp_theoretical=exp_t,
|
||||
exp_theoretical_ci_lo=exp_t_lo,
|
||||
exp_theoretical_ci_hi=exp_t_hi,
|
||||
)
|
||||
def _overlay_value(row: dict[str, str], overlay: str) -> float | None:
|
||||
raw = row.get(overlay, "")
|
||||
return _parse_optional_float(raw)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Calibration mode
|
||||
# ---------------------------------------------------------------------------
|
||||
def compute_stats(
|
||||
csv_path: Path | str = "data/jurnal.csv",
|
||||
overlay: str = "pl_marius",
|
||||
) -> dict[str, Any]:
|
||||
"""Compute aggregate WR + expectancy stats over the backtest rows.
|
||||
|
||||
Calibration rows (``manual_calibration`` / ``vision_calibration``) are
|
||||
excluded; use :func:`compute_calibration` for the P4 mismatch report.
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CalibrationReport:
|
||||
pairs: int
|
||||
field_mismatches: dict[str, int]
|
||||
total_comparisons: int
|
||||
|
||||
@property
|
||||
def overall_mismatch_rate(self) -> float:
|
||||
if self.total_comparisons == 0:
|
||||
return 0.0
|
||||
total = sum(self.field_mismatches.values())
|
||||
return total / self.total_comparisons
|
||||
|
||||
|
||||
def _normalise_for_compare(field_name: str, value: str) -> str:
|
||||
s = (value or "").strip()
|
||||
if field_name in {"entry", "sl", "tp0", "tp1", "tp2"}:
|
||||
try:
|
||||
return f"{float(s):.4f}"
|
||||
except ValueError:
|
||||
return s
|
||||
return s
|
||||
|
||||
|
||||
def calibration_mismatch(
|
||||
trades: Iterable[Trade],
|
||||
*,
|
||||
fields: tuple[str, ...] = CORE_CALIBRATION_FIELDS,
|
||||
) -> CalibrationReport:
|
||||
"""Pair ``manual_calibration`` and ``vision_calibration`` rows by
|
||||
``screenshot_file``, then count mismatches per ``fields``.
|
||||
|
||||
Returns a :class:`CalibrationReport`. Unpaired calibration rows are
|
||||
silently ignored — they cannot contribute to a comparison.
|
||||
``overlay`` selects the P/L column: ``"pl_marius"`` (default — the real
|
||||
overlay Marius trades) or ``"pl_theoretical"`` (1/3-1/3-1/3 hold-to-TP2).
|
||||
"""
|
||||
manual: dict[str, Trade] = {}
|
||||
vision: dict[str, Trade] = {}
|
||||
for t in trades:
|
||||
if t.source == "manual_calibration":
|
||||
manual[t.screenshot_file] = t
|
||||
elif t.source == "vision_calibration":
|
||||
vision[t.screenshot_file] = t
|
||||
if overlay not in {"pl_marius", "pl_theoretical"}:
|
||||
raise ValueError(f"unknown overlay {overlay!r}")
|
||||
|
||||
paired_files = sorted(set(manual) & set(vision))
|
||||
field_mismatches: dict[str, int] = {f: 0 for f in fields}
|
||||
for f in paired_files:
|
||||
m = manual[f]
|
||||
v = vision[f]
|
||||
for fld in fields:
|
||||
mv = _normalise_for_compare(fld, m.raw.get(fld, ""))
|
||||
vv = _normalise_for_compare(fld, v.raw.get(fld, ""))
|
||||
if mv != vv:
|
||||
field_mismatches[fld] += 1
|
||||
rows = [r for r in _load_rows(csv_path) if r.get("source", "") in BACKTEST_SOURCES]
|
||||
|
||||
total_comparisons = len(paired_files) * len(fields)
|
||||
return CalibrationReport(
|
||||
pairs=len(paired_files),
|
||||
field_mismatches=field_mismatches,
|
||||
total_comparisons=total_comparisons,
|
||||
if not rows:
|
||||
return {
|
||||
"n_total": 0,
|
||||
"n_pending": 0,
|
||||
"n_closed": 0,
|
||||
"wr": 0.0,
|
||||
"wr_ci_95": (0.0, 0.0),
|
||||
"expectancy": 0.0,
|
||||
"expectancy_ci_95": (0.0, 0.0),
|
||||
"per_set": {},
|
||||
"per_calitate": {},
|
||||
"per_directie": {},
|
||||
}
|
||||
|
||||
# Pending status is overlay-independent: a trade is pending iff
|
||||
# pl_marius is blank (outcome_path in {pending, TP0->pending}).
|
||||
# pl_theoretical is concrete even for pending rows, so it would otherwise
|
||||
# let pending trades sneak into the closed-trades stats — we mask those
|
||||
# out explicitly here.
|
||||
pending_mask = [_parse_optional_float(r.get("pl_marius", "")) is None for r in rows]
|
||||
overlay_vals: list[float | None] = []
|
||||
for r, is_pending in zip(rows, pending_mask):
|
||||
overlay_vals.append(None if is_pending else _overlay_value(r, overlay))
|
||||
n_total = len(rows)
|
||||
n_pending = sum(1 for p in pending_mask if p)
|
||||
n_closed = n_total - n_pending
|
||||
|
||||
overall = _group_stats(
|
||||
overlay_vals, include_ci=True, bootstrap_seed=42
|
||||
)
|
||||
|
||||
def _split(field: str, include_ci: bool) -> dict[str, dict[str, Any]]:
|
||||
groups: dict[str, list[float | None]] = {}
|
||||
for r, v in zip(rows, overlay_vals):
|
||||
key = r.get(field, "") or "(blank)"
|
||||
groups.setdefault(key, []).append(v)
|
||||
out: dict[str, dict[str, Any]] = {}
|
||||
for k in sorted(groups):
|
||||
sub_seed = 42 + (abs(hash(("split", field, k))) % 1_000_000)
|
||||
out[k] = _group_stats(
|
||||
groups[k], include_ci=include_ci, bootstrap_seed=sub_seed
|
||||
)
|
||||
return out
|
||||
|
||||
return {
|
||||
"n_total": n_total,
|
||||
"n_pending": n_pending,
|
||||
"n_closed": n_closed,
|
||||
"wr": overall["wr"],
|
||||
"wr_ci_95": overall["wr_ci_95"],
|
||||
"expectancy": overall["expectancy"],
|
||||
"expectancy_ci_95": overall["expectancy_ci_95"],
|
||||
"per_set": _split("set", include_ci=True),
|
||||
"per_calitate": _split("calitate", include_ci=True),
|
||||
# per_directie skips CI per spec (no wr_ci_95 / expectancy_ci_95 keys).
|
||||
"per_directie": {
|
||||
k: {"n": v["n"], "wr": v["wr"], "expectancy": v["expectancy"]}
|
||||
for k, v in _split("directie", include_ci=False).items()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reporting
|
||||
# render_stats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -375,110 +274,228 @@ def _fmt_pct(p: float) -> str:
|
||||
|
||||
|
||||
def _fmt_r(x: float) -> str:
|
||||
return f"{x:+.3f}R"
|
||||
return f"{x:+.2f} R"
|
||||
|
||||
|
||||
def _fmt_stats_row(s: GroupStats) -> str:
|
||||
return (
|
||||
f"{s.key:<14} N={s.n_total:>3} (resolved {s.n_resolved:>3}) "
|
||||
f"WR={_fmt_pct(s.wr)} [{_fmt_pct(s.wr_ci_lo)}, {_fmt_pct(s.wr_ci_hi)}] "
|
||||
f"E_marius={_fmt_r(s.exp_marius)} "
|
||||
f"[{_fmt_r(s.exp_marius_ci_lo)}, {_fmt_r(s.exp_marius_ci_hi)}] "
|
||||
f"E_theor={_fmt_r(s.exp_theoretical)}"
|
||||
)
|
||||
def _set_sort_key(name: str) -> tuple[int, str]:
|
||||
order = ["A1", "A2", "A3", "B", "C", "D", "Other"]
|
||||
return (order.index(name), name) if name in order else (len(order), name)
|
||||
|
||||
|
||||
def format_report(
|
||||
trades: list[Trade],
|
||||
*,
|
||||
bootstrap_iterations: int = 2000,
|
||||
seed: int | None = None,
|
||||
) -> str:
|
||||
"""Render the main stats report.
|
||||
|
||||
Only ``source in {vision, manual}`` rows are included in the WR /
|
||||
expectancy computations; calibration rows are reported separately via
|
||||
``--calibration``.
|
||||
"""
|
||||
backtest = [t for t in trades if t.source in BACKTEST_SOURCES]
|
||||
def render_stats(stats: dict[str, Any], overlay: str) -> str:
|
||||
lines: list[str] = []
|
||||
lines.append("=== M2D Backtest Stats ===")
|
||||
lines.append(f"Backtest rows: {len(backtest)} (calibration excluded)")
|
||||
lines.append("")
|
||||
|
||||
if not backtest:
|
||||
lines.append("(no backtest trades yet)")
|
||||
return "\n".join(lines)
|
||||
|
||||
overall = compute_group_stats(
|
||||
backtest,
|
||||
label="OVERALL",
|
||||
bootstrap_iterations=bootstrap_iterations,
|
||||
seed=seed,
|
||||
)
|
||||
lines.append("-- Overall --")
|
||||
lines.append(_fmt_stats_row(overall))
|
||||
lines.append("")
|
||||
|
||||
def _emit_group(title: str, field_name: str, key_order: list[str] | None = None) -> None:
|
||||
lines.append(f"-- By {title} --")
|
||||
groups = group_by(backtest, field_name)
|
||||
keys = key_order if key_order is not None else sorted(groups)
|
||||
for k in keys:
|
||||
if k not in groups:
|
||||
continue
|
||||
sub_seed = None if seed is None else seed + abs(hash(k)) % 10_000
|
||||
s = compute_group_stats(
|
||||
groups[k],
|
||||
label=k,
|
||||
bootstrap_iterations=bootstrap_iterations,
|
||||
seed=sub_seed,
|
||||
)
|
||||
lines.append(_fmt_stats_row(s))
|
||||
lines.append("")
|
||||
|
||||
_emit_group(
|
||||
"Set",
|
||||
"set",
|
||||
key_order=["A1", "A2", "A3", "B", "C", "D", "Other"],
|
||||
)
|
||||
_emit_group("Instrument", "instrument")
|
||||
lines.append(f"=== Stats jurnal.csv (overlay: {overlay}) ===")
|
||||
lines.append(
|
||||
"[!] By calitate — descriptor only (post-outcome, biased; do not use "
|
||||
"as a GO LIVE filter — see STOPPING_RULE.md §3)."
|
||||
)
|
||||
_emit_group(
|
||||
"calitate",
|
||||
"calitate",
|
||||
key_order=["Clară", "Mai mare ca impuls", "Slabă", "n/a"],
|
||||
f"Trade-uri totale: {stats['n_total']} | "
|
||||
f"închise: {stats['n_closed']} | pending: {stats['n_pending']}"
|
||||
)
|
||||
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
|
||||
def format_calibration_report(trades: list[Trade]) -> str:
|
||||
cal = calibration_mismatch(trades)
|
||||
lines: list[str] = []
|
||||
lines.append("=== Calibration P4 gate ===")
|
||||
lines.append(f"Paired screenshots (manual ∩ vision): {cal.pairs}")
|
||||
if cal.pairs == 0:
|
||||
lines.append("(no calibration pairs yet)")
|
||||
if stats["n_total"] == 0:
|
||||
lines.append("")
|
||||
lines.append("(nu sunt trade-uri backtest în CSV)")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
lines.append("")
|
||||
lines.append(f"{'field':<14} mismatches / pairs rate")
|
||||
for fld in CORE_CALIBRATION_FIELDS:
|
||||
m = cal.field_mismatches.get(fld, 0)
|
||||
rate = (m / cal.pairs) if cal.pairs else 0.0
|
||||
lines.append(f"{fld:<14} {m:>3} / {cal.pairs:<3} {_fmt_pct(rate)}")
|
||||
lines.append("")
|
||||
lo, hi = stats["wr_ci_95"]
|
||||
e_lo, e_hi = stats["expectancy_ci_95"]
|
||||
lines.append(f"GLOBAL (n={stats['n_closed']}):")
|
||||
lines.append(
|
||||
f"Overall mismatch rate: {_fmt_pct(cal.overall_mismatch_rate)} "
|
||||
f"({sum(cal.field_mismatches.values())} of {cal.total_comparisons} comparisons)"
|
||||
f" WR: {_fmt_pct(stats['wr'])} "
|
||||
f"[95% CI: {_fmt_pct(lo)}, {_fmt_pct(hi)}]"
|
||||
)
|
||||
threshold = 0.10
|
||||
verdict = "PASS" if cal.overall_mismatch_rate <= threshold else "FAIL"
|
||||
lines.append(f"P4 gate (<= 10%): {verdict}")
|
||||
lines.append(
|
||||
f" Expectancy: {_fmt_r(stats['expectancy'])} "
|
||||
f"[95% CI: {_fmt_r(e_lo)}, {_fmt_r(e_hi)}]"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
def _emit_split(
|
||||
title: str,
|
||||
data: dict[str, dict[str, Any]],
|
||||
*,
|
||||
sort_keys: list[str] | None = None,
|
||||
include_ci: bool = True,
|
||||
) -> None:
|
||||
lines.append(title)
|
||||
keys = sort_keys if sort_keys is not None else sorted(data)
|
||||
for k in keys:
|
||||
if k not in data:
|
||||
continue
|
||||
d = data[k]
|
||||
if include_ci and "wr_ci_95" in d:
|
||||
clo, chi = d["wr_ci_95"]
|
||||
lines.append(
|
||||
f" {k:<14} n={d['n']:>3} "
|
||||
f"WR {_fmt_pct(d['wr'])} "
|
||||
f"[{_fmt_pct(clo)}, {_fmt_pct(chi)}] "
|
||||
f"E {_fmt_r(d['expectancy'])}"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
f" {k:<14} n={d['n']:>3} "
|
||||
f"WR {_fmt_pct(d['wr'])} "
|
||||
f"E {_fmt_r(d['expectancy'])}"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
_emit_split(
|
||||
"PER SET:",
|
||||
stats["per_set"],
|
||||
sort_keys=sorted(stats["per_set"], key=_set_sort_key),
|
||||
)
|
||||
|
||||
lines.append(
|
||||
"PER CALITATE (⚠️ DESCRIPTOR ONLY — biased post-outcome, NU folosi ca filtru):"
|
||||
)
|
||||
cal_order = ["Clară", "Mai mare ca impuls", "Slabă", "n/a"]
|
||||
keys = [k for k in cal_order if k in stats["per_calitate"]] + [
|
||||
k for k in sorted(stats["per_calitate"]) if k not in cal_order
|
||||
]
|
||||
for k in keys:
|
||||
d = stats["per_calitate"][k]
|
||||
clo, chi = d["wr_ci_95"]
|
||||
lines.append(
|
||||
f" {k:<20} n={d['n']:>3} "
|
||||
f"WR {_fmt_pct(d['wr'])} "
|
||||
f"[{_fmt_pct(clo)}, {_fmt_pct(chi)}] "
|
||||
f"E {_fmt_r(d['expectancy'])}"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
_emit_split("PER DIRECȚIE:", stats["per_directie"], include_ci=False)
|
||||
|
||||
# STOPPING_RULE gate check — flag every Set that hasn't crossed N>=40.
|
||||
lines.append(f"⚠️ STOPPING RULE check (vezi STOPPING_RULE.md, N>={STOPPING_RULE_N}):")
|
||||
set_keys = sorted(stats["per_set"], key=_set_sort_key)
|
||||
any_flagged = False
|
||||
for k in set_keys:
|
||||
n = stats["per_set"][k]["n"]
|
||||
if n < STOPPING_RULE_N:
|
||||
lines.append(f" {k}: N={n} < {STOPPING_RULE_N} → NEEDS MORE DATA")
|
||||
any_flagged = True
|
||||
if not any_flagged:
|
||||
lines.append(f" toate Set-urile au N>={STOPPING_RULE_N} (eligibile pentru GO LIVE check).")
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# compute_calibration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _calibration_match(field: str, m_val: str, v_val: str, tol: float = 0.01) -> bool:
|
||||
if field in NUMERIC_CALIBRATION_FIELDS:
|
||||
try:
|
||||
return abs(float(m_val) - float(v_val)) <= tol
|
||||
except ValueError:
|
||||
return (m_val or "").strip() == (v_val or "").strip()
|
||||
return (m_val or "").strip() == (v_val or "").strip()
|
||||
|
||||
|
||||
def compute_calibration(
|
||||
csv_path: Path | str = "data/jurnal.csv",
|
||||
) -> dict[str, Any]:
|
||||
"""Pair calibration legs by ``screenshot_file`` and report per-field mismatch.
|
||||
|
||||
Returns a dict ``{"n_pairs": int, "fields": {field: {match, mismatch,
|
||||
match_rate, mismatch_examples}}}``. ``mismatch_examples`` holds up to 3
|
||||
strings ``"<screenshot_file>: manual=X vs vision=Y"`` per field.
|
||||
|
||||
Numeric fields (``entry/sl/tp0/tp1/tp2``) use a tolerance of 0.01;
|
||||
everything else is exact-string equality after strip.
|
||||
"""
|
||||
rows = _load_rows(csv_path)
|
||||
manual: dict[str, dict[str, str]] = {}
|
||||
vision: dict[str, dict[str, str]] = {}
|
||||
for r in rows:
|
||||
src = r.get("source", "")
|
||||
if src == "manual_calibration":
|
||||
manual[r.get("screenshot_file", "")] = r
|
||||
elif src == "vision_calibration":
|
||||
vision[r.get("screenshot_file", "")] = r
|
||||
|
||||
paired_files = sorted(set(manual) & set(vision))
|
||||
fields_report: dict[str, dict[str, Any]] = {
|
||||
f: {
|
||||
"match": 0,
|
||||
"mismatch": 0,
|
||||
"match_rate": 0.0,
|
||||
"mismatch_examples": [],
|
||||
}
|
||||
for f in CORE_CALIBRATION_FIELDS
|
||||
}
|
||||
|
||||
for f in paired_files:
|
||||
m = manual[f]
|
||||
v = vision[f]
|
||||
for fld in CORE_CALIBRATION_FIELDS:
|
||||
mv = m.get(fld, "")
|
||||
vv = v.get(fld, "")
|
||||
if _calibration_match(fld, mv, vv):
|
||||
fields_report[fld]["match"] += 1
|
||||
else:
|
||||
fields_report[fld]["mismatch"] += 1
|
||||
examples = fields_report[fld]["mismatch_examples"]
|
||||
if len(examples) < 3:
|
||||
examples.append(f"{f}: manual={mv!r} vs vision={vv!r}")
|
||||
|
||||
for fld, data in fields_report.items():
|
||||
total = data["match"] + data["mismatch"]
|
||||
data["match_rate"] = (data["match"] / total) if total else 0.0
|
||||
|
||||
return {"n_pairs": len(paired_files), "fields": fields_report}
|
||||
|
||||
|
||||
def render_calibration(cal: dict[str, Any]) -> str:
|
||||
lines: list[str] = []
|
||||
lines.append("=== Calibration P4 gate (vezi STOPPING_RULE.md §P4) ===")
|
||||
lines.append(f"Perechi calibration: {cal['n_pairs']}")
|
||||
if cal["n_pairs"] == 0:
|
||||
lines.append("(nu există perechi manual_calibration ∩ vision_calibration)")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
lines.append("")
|
||||
lines.append(f"{'field':<14} match mismatch rate")
|
||||
total_mismatches = 0
|
||||
total_comparisons = 0
|
||||
for fld in CORE_CALIBRATION_FIELDS:
|
||||
d = cal["fields"][fld]
|
||||
n = d["match"] + d["mismatch"]
|
||||
total_mismatches += d["mismatch"]
|
||||
total_comparisons += n
|
||||
lines.append(
|
||||
f"{fld:<14} {d['match']:>5} {d['mismatch']:>8} "
|
||||
f"{_fmt_pct(d['match_rate'])}"
|
||||
)
|
||||
|
||||
lines.append("")
|
||||
overall_match_rate = (
|
||||
(total_comparisons - total_mismatches) / total_comparisons
|
||||
if total_comparisons
|
||||
else 0.0
|
||||
)
|
||||
overall_mismatch_rate = 1.0 - overall_match_rate
|
||||
verdict = "PASS" if overall_mismatch_rate <= 0.10 else "FAIL"
|
||||
lines.append(
|
||||
f"Overall mismatch rate: {_fmt_pct(overall_mismatch_rate)} "
|
||||
f"({total_mismatches}/{total_comparisons}) → P4 gate: {verdict}"
|
||||
)
|
||||
|
||||
has_examples = any(
|
||||
cal["fields"][f]["mismatch_examples"] for f in CORE_CALIBRATION_FIELDS
|
||||
)
|
||||
if has_examples:
|
||||
lines.append("")
|
||||
lines.append("Mismatch examples (max 3 per field):")
|
||||
for fld in CORE_CALIBRATION_FIELDS:
|
||||
ex = cal["fields"][fld]["mismatch_examples"]
|
||||
if not ex:
|
||||
continue
|
||||
lines.append(f" [{fld}]")
|
||||
for e in ex:
|
||||
lines.append(f" - {e}")
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
@@ -498,43 +515,37 @@ def main(argv: list[str] | None = None) -> int:
|
||||
default=Path("data/jurnal.csv"),
|
||||
help="Path to the jurnal CSV (default: data/jurnal.csv).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--overlay",
|
||||
choices=("pl_marius", "pl_theoretical"),
|
||||
default="pl_marius",
|
||||
help="Which P/L overlay to use (default: pl_marius).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--calibration",
|
||||
action="store_true",
|
||||
help="Show P4 calibration mismatch report instead of backtest stats.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bootstrap-iterations",
|
||||
type=int,
|
||||
default=2000,
|
||||
help="Bootstrap iterations for expectancy CI (default: 2000).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Seed for the bootstrap RNG (set for deterministic output).",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
trades = load_trades(args.csv)
|
||||
if args.calibration:
|
||||
out = format_calibration_report(trades)
|
||||
else:
|
||||
out = format_report(
|
||||
trades,
|
||||
bootstrap_iterations=args.bootstrap_iterations,
|
||||
seed=args.seed,
|
||||
)
|
||||
# Force UTF-8 on stdout: the report contains diacritics ("Clară", "Slabă")
|
||||
# and a console codepage like cp1252 would crash on those.
|
||||
try:
|
||||
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
|
||||
except (AttributeError, OSError):
|
||||
pass
|
||||
sys.stdout.write(out)
|
||||
|
||||
if args.calibration:
|
||||
cal = compute_calibration(args.csv)
|
||||
sys.stdout.write(render_calibration(cal))
|
||||
else:
|
||||
stats = compute_stats(args.csv, overlay=args.overlay)
|
||||
sys.stdout.write(render_stats(stats, args.overlay))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
||||
|
||||
# Ensure the canonical CSV schema is importable from one place — fail fast if
|
||||
# someone removes append_row.CSV_COLUMNS that this module depends on.
|
||||
assert CSV_COLUMNS is not None
|
||||
|
||||
Reference in New Issue
Block a user