scripts: regenerate_md + stats + tests (116-144 passing across modules)

2026-05-13 12:45:05 +03:00
parent ce80151c58
commit 26d084dc4b
6 changed files with 1843 additions and 283 deletions
--- a/tests/test_append_row.py
+++ b/tests/test_append_row.py
@@ -1,26 +1,26 @@
-"""Tests for scripts/append_row.py."""
+"""Tests for scripts/append_row.py — append_extraction pipeline."""

 from __future__ import annotations

 import csv
 import json
+import re
 import sys
+from datetime import datetime
 from pathlib import Path

 import pytest
-from pydantic import ValidationError
+import yaml

 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

 from scripts.append_row import (  # noqa: E402
    CSV_COLUMNS,
    VALID_SOURCES,
-    append_row,
-    append_row_from_json,
-    build_row,
-    read_rows,
+    ZI_RO_MAP,
+    append_extraction,
+    csv_columns,
 )
-from scripts.vision_schema import parse_extraction_dict  # noqa: E402


 REPO_ROOT = Path(__file__).resolve().parent.parent
@@ -29,12 +29,12 @@ META_PATH = REPO_ROOT / "data" / "_meta.yaml"


 # ---------------------------------------------------------------------------
-# fixtures / payload helpers
+# helpers / fixtures
 # ---------------------------------------------------------------------------


 def _buy_payload(**overrides) -> dict:
-    # 2026-05-13 14:23 UTC == 17:23 RO (EEST, Wed) → Set A2.
+    # 2026-05-13 14:23 UTC == 17:23 RO (EEST, Wed) → set A2, zi=Mi.
    base = {
        "screenshot_file": "dia-2026-05-13-1.png",
        "data": "2026-05-13",
@@ -61,198 +61,227 @@ def _buy_payload(**overrides) -> dict:
    return base


+def _write_payload(tmp_path: Path, name: str, **overrides) -> Path:
+    p = tmp_path / name
+    p.write_text(json.dumps(_buy_payload(**overrides)), encoding="utf-8")
+    return p
+
+
+def _read_rows(csv_path: Path) -> list[dict[str, str]]:
+    with csv_path.open("r", encoding="utf-8", newline="") as fh:
+        return list(csv.DictReader(fh))
+
+
@pytest.fixture
 def csv_path(tmp_path: Path) -> Path:
-    return tmp_path / "trades.csv"
+    return tmp_path / "jurnal.csv"


 # ---------------------------------------------------------------------------
-# build_row — computed fields
+# schema / column layout
 # ---------------------------------------------------------------------------


-class TestBuildRow:
-    def setup_method(self) -> None:
-        import yaml
-        with META_PATH.open("r", encoding="utf-8") as fh:
-            self.meta = yaml.safe_load(fh)
-        from scripts.calendar_parse import load_calendar
-        self.calendar = load_calendar(CALENDAR_PATH)
-
-    def test_happy_path_computed_fields(self) -> None:
-        extr = parse_extraction_dict(_buy_payload())
-        row = build_row(extr, "manual", self.meta, self.calendar)
-        # 14:23 UTC on 2026-05-13 = 17:23 RO (EEST), Wed → A2
-        assert row["ora_ro"] == "17:23"
-        assert row["zi"] == "Wed"
-        assert row["set"] == "A2"
-        # pl_marius for TP0->TP1 with be_moved=True is +0.50R
-        assert float(row["pl_marius"]) == pytest.approx(0.50)
-        # pl_theoretical for max_reached=TP1 is 0.333
-        assert float(row["pl_theoretical"]) == pytest.approx(0.333)
-        # version stamps copied from meta
-        assert row["indicator_version"] == str(self.meta["indicator_version"])
-        assert row["pl_overlay_version"] == str(self.meta["pl_overlay_version"])
-        assert row["csv_schema_version"] == str(self.meta["csv_schema_version"])
-
-    def test_pending_overlay_is_blank(self) -> None:
-        extr = parse_extraction_dict(
-            _buy_payload(outcome_path="pending", max_reached="TP0")
-        )
-        row = build_row(extr, "vision", self.meta, self.calendar)
-        # pl_marius returns None for pending → empty string in CSV
-        assert row["pl_marius"] == ""
-        # pl_theoretical always concrete
-        assert row["pl_theoretical"] != ""
-
-    def test_invalid_source_rejected(self) -> None:
-        extr = parse_extraction_dict(_buy_payload())
-        with pytest.raises(ValueError):
-            build_row(extr, "auto_magic", self.meta, self.calendar)
-
-    def test_all_valid_sources_accepted(self) -> None:
-        extr = parse_extraction_dict(_buy_payload())
-        for s in VALID_SOURCES:
-            row = build_row(extr, s, self.meta, self.calendar)
-            assert row["source"] == s
+def test_csv_columns_canonical_29() -> None:
+    cols = csv_columns()
+    assert len(cols) == 29
+    assert cols[0] == "id"
+    assert cols[-1] == "note"
+    assert cols == list(CSV_COLUMNS)


 # ---------------------------------------------------------------------------
-# append_row — happy path, dedup, atomic writes
+# core tests as specified in task #9
 # ---------------------------------------------------------------------------


-class TestAppendRow:
-    def test_happy_path_writes_header_and_row(self, csv_path: Path) -> None:
-        extr = parse_extraction_dict(_buy_payload())
-        row = append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH)
-        assert csv_path.exists()
+def test_happy_path(tmp_path: Path, csv_path: Path) -> None:
+    j = _write_payload(tmp_path, "t.json")
+    result = append_extraction(
+        j, "vision", csv_path, META_PATH, CALENDAR_PATH
+    )
+    assert result["status"] == "ok", result
+    assert result["reason"] == ""
+    assert result["id"] == 1

-        with csv_path.open("r", encoding="utf-8", newline="") as fh:
-            reader = csv.DictReader(fh)
-            assert reader.fieldnames == list(CSV_COLUMNS)
-            rows = list(reader)
-        assert len(rows) == 1
-        assert rows[0]["screenshot_file"] == row["screenshot_file"]
-        assert rows[0]["set"] == "A2"
-        assert rows[0]["source"] == "manual"
+    rows = _read_rows(csv_path)
+    assert len(rows) == 1
+    r = rows[0]
+    assert r["id"] == "1"
+    assert r["screenshot_file"] == "dia-2026-05-13-1.png"
+    assert r["source"] == "vision"
+    assert r["data"] == "2026-05-13"
+    assert r["zi"] == "Mi"
+    assert r["ora_ro"] == "17:23"
+    assert r["ora_utc"] == "14:23"
+    assert r["set"] == "A2"
+    assert r["instrument"] == "DIA"
+    assert r["directie"] == "Buy"
+    assert r["be_moved"] == "True"

-    def test_two_distinct_rows(self, csv_path: Path) -> None:
-        e1 = parse_extraction_dict(_buy_payload(screenshot_file="a.png"))
-        e2 = parse_extraction_dict(_buy_payload(screenshot_file="b.png"))
-        append_row(e1, "manual", csv_path, META_PATH, CALENDAR_PATH)
-        append_row(e2, "manual", csv_path, META_PATH, CALENDAR_PATH)
-        rows = read_rows(csv_path)
-        assert len(rows) == 2
-        assert {r["screenshot_file"] for r in rows} == {"a.png", "b.png"}

-    def test_dedup_raises(self, csv_path: Path) -> None:
-        extr = parse_extraction_dict(_buy_payload())
-        append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH)
-        with pytest.raises(ValueError, match="duplicate"):
-            append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH)
-        # CSV still contains exactly the one row
-        assert len(read_rows(csv_path)) == 1
+def test_pl_calc_overlay(tmp_path: Path, csv_path: Path) -> None:
+    """outcome_path=TP0->TP1, max_reached=TP1 → pl_marius=0.5, pl_theoretical=0.333."""
+    j = _write_payload(tmp_path, "t.json")
+    result = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    assert result["status"] == "ok"
+    r = _read_rows(csv_path)[0]
+    assert float(r["pl_marius"]) == pytest.approx(0.50)
+    assert float(r["pl_theoretical"]) == pytest.approx(0.333)

-    def test_dedup_skip(self, csv_path: Path) -> None:
-        extr = parse_extraction_dict(_buy_payload())
-        first = append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH)
-        # Mutate the extraction; the existing row should be returned untouched.
-        extr2 = parse_extraction_dict(_buy_payload(note="changed"))
-        existing = append_row(
-            extr2, "manual", csv_path, META_PATH, CALENDAR_PATH, on_duplicate="skip"
-        )
-        assert existing["note"] == first["note"] == ""
-        assert len(read_rows(csv_path)) == 1

-    def test_calibration_coexistence(self, csv_path: Path) -> None:
-        """manual_calibration + vision_calibration on the SAME screenshot must coexist."""
-        extr = parse_extraction_dict(_buy_payload())
-        append_row(extr, "manual_calibration", csv_path, META_PATH, CALENDAR_PATH)
-        # Vision leg may differ slightly — change entry by 0.1, still valid.
-        extr_vision = parse_extraction_dict(
-            _buy_payload(entry=400.1, confidence="medium")
-        )
-        append_row(
-            extr_vision, "vision_calibration", csv_path, META_PATH, CALENDAR_PATH
-        )
+def test_dedup_same_source(tmp_path: Path, csv_path: Path) -> None:
+    j = _write_payload(tmp_path, "t.json")
+    r1 = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    r2 = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    assert r1["status"] == "ok"
+    assert r2["status"] == "rejected"
+    assert "duplicate" in r2["reason"].lower()
+    assert r2["id"] is None
+    assert r2["row"] is None
+    assert len(_read_rows(csv_path)) == 1

-        rows = read_rows(csv_path)
-        assert len(rows) == 2
-        sources = {r["source"] for r in rows}
-        assert sources == {"manual_calibration", "vision_calibration"}
-        # Same screenshot, different source ⇒ no dedup collision.
-        files = {r["screenshot_file"] for r in rows}
-        assert files == {extr.screenshot_file}

-    def test_calibration_duplicate_same_source_rejected(
-        self, csv_path: Path
-    ) -> None:
-        extr = parse_extraction_dict(_buy_payload())
-        append_row(extr, "manual_calibration", csv_path, META_PATH, CALENDAR_PATH)
-        with pytest.raises(ValueError, match="duplicate"):
-            append_row(
-                extr, "manual_calibration", csv_path, META_PATH, CALENDAR_PATH
-            )
+def test_dedup_different_source_ok(tmp_path: Path, csv_path: Path) -> None:
+    """Same screenshot_file + different source ⇒ both rows accepted."""
+    j = _write_payload(tmp_path, "t.json")
+    r1 = append_extraction(
+        j, "manual_calibration", csv_path, META_PATH, CALENDAR_PATH
+    )
+    r2 = append_extraction(
+        j, "vision_calibration", csv_path, META_PATH, CALENDAR_PATH
+    )
+    assert r1["status"] == "ok"
+    assert r2["status"] == "ok"
+    rows = _read_rows(csv_path)
+    assert len(rows) == 2
+    assert {r["source"] for r in rows} == {"manual_calibration", "vision_calibration"}
+    # Distinct sequential ids.
+    assert {r["id"] for r in rows} == {"1", "2"}
+
+
+def test_invalid_pydantic_rejected(tmp_path: Path, csv_path: Path) -> None:
+    """entry == sl is rejected by pydantic; no CSV is written."""
+    j = _write_payload(tmp_path, "bad.json", entry=399.0, sl=399.0)
+    result = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    assert result["status"] == "rejected"
+    assert "validation" in result["reason"].lower()
+    assert not csv_path.exists()
+
+
+def test_missing_json_file(tmp_path: Path, csv_path: Path) -> None:
+    missing = tmp_path / "ghost.json"
+    result = append_extraction(
+        missing, "vision", csv_path, META_PATH, CALENDAR_PATH
+    )
+    assert result["status"] == "rejected"
+    assert "not found" in result["reason"].lower()
+    assert not csv_path.exists()
+
+
+def test_id_increments(tmp_path: Path, csv_path: Path) -> None:
+    paths = [
+        _write_payload(tmp_path, "a.json", screenshot_file="a.png"),
+        _write_payload(tmp_path, "b.json", screenshot_file="b.png"),
+        _write_payload(tmp_path, "c.json", screenshot_file="c.png"),
+    ]
+    ids = []
+    for p in paths:
+        r = append_extraction(p, "vision", csv_path, META_PATH, CALENDAR_PATH)
+        assert r["status"] == "ok"
+        ids.append(r["id"])
+    assert ids == [1, 2, 3]
+    csv_ids = [int(r["id"]) for r in _read_rows(csv_path)]
+    assert csv_ids == [1, 2, 3]
+
+
+def test_set_a2(tmp_path: Path, csv_path: Path) -> None:
+    """Wed 2026-05-13 14:30 UTC → 17:30 RO → A2 sweet spot."""
+    j = _write_payload(tmp_path, "t.json", ora_utc="14:30")
+    r = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    assert r["status"] == "ok"
+    row = _read_rows(csv_path)[0]
+    assert row["ora_ro"] == "17:30"
+    assert row["zi"] == "Mi"
+    assert row["set"] == "A2"
+
+
+def test_set_c_fomc(tmp_path: Path, csv_path: Path) -> None:
+    """2026-04-29 18:35 UTC == 21:35 RO (FOMC Powell Press window) → Set C."""
+    j = _write_payload(
+        tmp_path,
+        "t.json",
+        data="2026-04-29",
+        ora_utc="18:35",
+        screenshot_file="fomc-apr.png",
+    )
+    r = append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    assert r["status"] == "ok"
+    row = _read_rows(csv_path)[0]
+    assert row["ora_ro"] == "21:35"
+    assert row["set"] == "C"
+
+
+def test_versions_stamped(tmp_path: Path, csv_path: Path) -> None:
+    j = _write_payload(tmp_path, "t.json")
+    append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    row = _read_rows(csv_path)[0]
+    meta = yaml.safe_load(META_PATH.read_text(encoding="utf-8"))
+    assert row["indicator_version"] == str(meta["indicator_version"])
+    assert row["pl_overlay_version"] == str(meta["pl_overlay_version"])
+    assert row["csv_schema_version"] == str(meta["csv_schema_version"])
+
+
+def test_extracted_at_format(tmp_path: Path, csv_path: Path) -> None:
+    j = _write_payload(tmp_path, "t.json")
+    append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    val = _read_rows(csv_path)[0]["extracted_at"]
+    # ISO 8601 UTC with trailing 'Z': YYYY-MM-DDTHH:MM:SSZ
+    assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", val), val
+    # Round-trip through datetime.fromisoformat after dropping the Z.
+    parsed = datetime.fromisoformat(val[:-1])
+    assert parsed.year >= 2026


 # ---------------------------------------------------------------------------
-# Cross-field invalid input
+# additional safety nets
 # ---------------------------------------------------------------------------


-class TestInvalidInput:
-    def test_buy_with_inverted_tp_rejected_before_append(
-        self, csv_path: Path
-    ) -> None:
-        # tp1 < tp0 violates Buy ordering: caught at validation, not by append_row.
-        with pytest.raises(ValidationError):
-            parse_extraction_dict(
-                _buy_payload(tp0=401.0, tp1=400.5, tp2=402.0)
-            )
-        assert not csv_path.exists()  # nothing written
-
-    def test_outcome_path_sl_with_tp1_max_rejected(self, csv_path: Path) -> None:
-        with pytest.raises(ValidationError):
-            parse_extraction_dict(
-                _buy_payload(outcome_path="SL", max_reached="TP1")
-            )
-        assert not csv_path.exists()
-
-    def test_append_row_from_json_invalid_payload(
-        self, tmp_path: Path, csv_path: Path
-    ) -> None:
-        bad = tmp_path / "bad.json"
-        payload = _buy_payload(directie="Long")  # invalid Literal
-        bad.write_text(json.dumps(payload), encoding="utf-8")
-        with pytest.raises(ValidationError):
-            append_row_from_json(
-                bad, "vision", csv_path, META_PATH, CALENDAR_PATH
-            )
-        assert not csv_path.exists()
+def test_invalid_source_rejected(tmp_path: Path, csv_path: Path) -> None:
+    j = _write_payload(tmp_path, "t.json")
+    r = append_extraction(j, "auto_magic", csv_path, META_PATH, CALENDAR_PATH)
+    assert r["status"] == "rejected"
+    assert "source" in r["reason"].lower()
+    assert not csv_path.exists()


-# ---------------------------------------------------------------------------
-# Atomic write: no temp file remains on disk
-# ---------------------------------------------------------------------------
+def test_all_valid_sources_accepted(tmp_path: Path, csv_path: Path) -> None:
+    for i, src in enumerate(sorted(VALID_SOURCES)):
+        j = _write_payload(tmp_path, f"t{i}.json", screenshot_file=f"s{i}.png")
+        r = append_extraction(j, src, csv_path, META_PATH, CALENDAR_PATH)
+        assert r["status"] == "ok", (src, r)
+    rows = _read_rows(csv_path)
+    assert {r["source"] for r in rows} == set(VALID_SOURCES)


-class TestAtomicWrite:
-    def test_no_temp_file_left_behind(self, csv_path: Path) -> None:
-        extr = parse_extraction_dict(_buy_payload())
-        append_row(extr, "manual", csv_path, META_PATH, CALENDAR_PATH)
-        leftovers = [
-            p for p in csv_path.parent.iterdir() if p.name.endswith(".tmp")
-        ]
-        assert leftovers == []
+def test_atomic_write_leaves_no_tmp(tmp_path: Path, csv_path: Path) -> None:
+    j = _write_payload(tmp_path, "t.json")
+    append_extraction(j, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    leftovers = [p for p in csv_path.parent.iterdir() if p.name.endswith(".tmp")]
+    assert leftovers == []

-    def test_append_row_from_json_roundtrip(
-        self, tmp_path: Path, csv_path: Path
-    ) -> None:
-        good = tmp_path / "good.json"
-        good.write_text(json.dumps(_buy_payload()), encoding="utf-8")
-        row = append_row_from_json(
-            good, "vision", csv_path, META_PATH, CALENDAR_PATH
-        )
-        assert row["source"] == "vision"
-        assert read_rows(csv_path)[0]["screenshot_file"] == row["screenshot_file"]
+
+def test_zi_ro_map_covers_all_weekdays() -> None:
+    """Internal sanity: the Romanian-day map covers all 7 short weekday names."""
+    assert set(ZI_RO_MAP.keys()) == {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"}
+    assert set(ZI_RO_MAP.values()) == {"Lu", "Ma", "Mi", "Jo", "Vi", "Sa", "Du"}
+
+
+def test_malformed_json_rejected(tmp_path: Path, csv_path: Path) -> None:
+    bad = tmp_path / "broken.json"
+    bad.write_text("{not valid json", encoding="utf-8")
+    r = append_extraction(bad, "vision", csv_path, META_PATH, CALENDAR_PATH)
+    assert r["status"] == "rejected"
+    assert "validation" in r["reason"].lower() or "json" in r["reason"].lower()
+    assert not csv_path.exists()