Files
rar-autopass/tests/test_holdout.py
Claude Agent 3fc53534e2 feat(5.15+5.14): CLOSE — fix-uri code-review + embeddings functional
5.15 (propagare design + dashboard editare) si 5.14 (mapare LLM distilata)
inchise dupa /code-review high. 8 buguri reparate TDD:

- HIGH modal nu se deschidea pe randul slim (base.html: trimitere-slim)
- HIGH /repune trunchia prestatii (declaratie incompleta la RAR) -> iterare
  peste existing, codes pozitional
- HIGH embeddings incarca model ~230MB degeaba pe corpus gol -> poarta has_corpus()
- HIGH picker chips gol pe re-render eroare -> conn/account_id pe toate ramurile
- MED obs re-derivat dupa stergere explicita -> _merge_override pastreaza obs=''
- MED mapare salvata fara denumire poluă GOLD -> _record_gold_validation guard
- MED typo nome_prestatie -> nume_prestatie in select /repune
- MED bucketare timp +3h gresita iarna -> SQLite localtime + TZ=Europe/Bucharest

Embeddings WIRE-uit functional (PRD #15, decizie user): ensure_embeddings_corpus
construieste corpus din nomenclator, gated pe AUTOPASS_EMBEDDINGS_ENABLED (default
off). Marime model corectata ~50MB->~230MB (estimare PRD gresita).

Cleanup: hoist load_* din bucla bulk-fix; import re la top.
Regresie: 1256 passed, 1 deselected (live), 0 failed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 20:48:34 +00:00

287 lines
9.7 KiB
Python

"""Teste TDD pentru tools/mapare-llm/holdout.py.
Verifica logica de split + calcul hit-rate pe un fixture SINTETIC (nu pe date reale).
Fixture-ul nu testeaza numerele efective pe CSV-uri, ci CORECTITUDINEA functiilor.
"""
from __future__ import annotations
import sys
import os
# Adaugam tools/mapare-llm/ in path pentru import direct al holdout.py
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'tools', 'mapare-llm'))
import pytest
# Fixture sintetic: 5 denumiri cu frecvente diferite
# Total volum = 100 + 80 + 50 + 30 + 10 + 1 + 1 = 272
FIXTURE = [
("Revizie motor", 100),
("Schimb ulei", 80),
("Reglat frane", 50),
("Diagnosticare", 30),
("Curatenie interior", 10),
("Altceva rar A", 1),
("Altceva rar B", 1),
]
FIXTURE_TOTAL_VOL = sum(n for _, n in FIXTURE) # 272
FIXTURE_DISTINCT = len(FIXTURE) # 7
# ---------------------------------------------------------------------------
# compute_volume_coverage
# ---------------------------------------------------------------------------
def test_compute_volume_coverage_sorted_descrescator():
"""Primul element trebuie sa fie cel cu NR cel mai mare."""
from holdout import compute_volume_coverage
rows = [("A", 10), ("B", 90), ("C", 0)]
result = compute_volume_coverage([r for r in rows if r[1] > 0])
assert result[0]["denumire"] == "B"
assert result[0]["nr"] == 90
def test_compute_volume_coverage_cumul():
"""Acoperirea cumulativa e corecta."""
from holdout import compute_volume_coverage
rows = [("A", 90), ("B", 9), ("C", 1)] # total=100
result = compute_volume_coverage(rows)
# Ordine: A(90), B(9), C(1) dupa sortare desc
assert result[0]["denumire"] == "A"
assert abs(result[0]["cumulative_volume_frac"] - 0.90) < 1e-9
assert result[0]["cumulative_count"] == 1
assert result[1]["denumire"] == "B"
assert abs(result[1]["cumulative_volume_frac"] - 0.99) < 1e-9
assert result[1]["cumulative_count"] == 2
assert result[2]["denumire"] == "C"
assert abs(result[2]["cumulative_volume_frac"] - 1.0) < 1e-9
assert result[2]["cumulative_count"] == 3
def test_compute_volume_coverage_gol():
"""Lista goala -> lista goala (fara exceptii)."""
from holdout import compute_volume_coverage
assert compute_volume_coverage([]) == []
# ---------------------------------------------------------------------------
# corpus_size_for_threshold
# ---------------------------------------------------------------------------
def test_corpus_size_for_90pct():
"""Gaseste corect numarul de etichete pentru 90% acoperire."""
from holdout import corpus_size_for_threshold
rows = [("A", 90), ("B", 9), ("C", 1)] # total=100
# A singur = 90% -> 1 eticheta suficienta
assert corpus_size_for_threshold(rows, threshold=0.90) == 1
def test_corpus_size_for_99pct():
"""Prag 99%: necesita 2 etichete (A+B = 99/100)."""
from holdout import corpus_size_for_threshold
rows = [("A", 90), ("B", 9), ("C", 1)]
assert corpus_size_for_threshold(rows, threshold=0.99) == 2
def test_corpus_size_for_100pct():
"""Prag 100%: necesita toate etichetele."""
from holdout import corpus_size_for_threshold
rows = [("A", 90), ("B", 9), ("C", 1)]
assert corpus_size_for_threshold(rows, threshold=1.0) == 3
# ---------------------------------------------------------------------------
# compute_hit_rate_at_k
# ---------------------------------------------------------------------------
def test_compute_hit_rate_at_k_1():
"""Top-1 eticheta (A=90): hit-rate = 90/100 = 0.90."""
from holdout import compute_hit_rate_at_k
rows = [("A", 90), ("B", 9), ("C", 1)]
assert abs(compute_hit_rate_at_k(rows, k=1) - 0.90) < 1e-9
def test_compute_hit_rate_at_k_2():
"""Top-2 etichete (A+B=99): hit-rate = 0.99."""
from holdout import compute_hit_rate_at_k
rows = [("A", 90), ("B", 9), ("C", 1)]
assert abs(compute_hit_rate_at_k(rows, k=2) - 0.99) < 1e-9
def test_compute_hit_rate_at_k_depasit():
"""k mai mare decat numarul de randuri: hit-rate = 1.0."""
from holdout import compute_hit_rate_at_k
rows = [("A", 90), ("B", 10)]
assert abs(compute_hit_rate_at_k(rows, k=100) - 1.0) < 1e-9
def test_compute_hit_rate_at_k_gol():
"""Lista goala: hit-rate = 0.0 (fara ZeroDivisionError)."""
from holdout import compute_hit_rate_at_k
assert compute_hit_rate_at_k([], k=10) == 0.0
# ---------------------------------------------------------------------------
# leave_one_out_hit_rate
# ---------------------------------------------------------------------------
def test_leave_one_out_hit_rate_formula():
"""Hit-rate leave-first-out: (total_vol - total_distinct) / total_vol.
Interpretare: pe oricare aparitie, dupa prima, e un hit (deja in corpus).
Singletonii (NR=1) contribuie 0 hit-uri.
"""
from holdout import leave_one_out_hit_rate
rows = [("A", 10), ("B", 5), ("C", 1)] # total=16, distinct=3
# formula: (16 - 3) / 16 = 0.8125
assert abs(leave_one_out_hit_rate(rows) - 13 / 16) < 1e-9
def test_leave_one_out_hit_rate_toate_singletons():
"""Toti singletons: hit-rate = 0 (fiecare aparitie e prima)."""
from holdout import leave_one_out_hit_rate
rows = [("A", 1), ("B", 1), ("C", 1)]
assert leave_one_out_hit_rate(rows) == 0.0
def test_leave_one_out_hit_rate_gol():
"""Lista goala: returneaza 0.0 fara exceptie."""
from holdout import leave_one_out_hit_rate
assert leave_one_out_hit_rate([]) == 0.0
# ---------------------------------------------------------------------------
# singleton_stats
# ---------------------------------------------------------------------------
def test_singleton_stats_calcul():
"""Statistici singletons corecte."""
from holdout import singleton_stats
rows = [("A", 100), ("B", 1), ("C", 1)] # total=102, 2 singletons
stats = singleton_stats(rows)
assert stats["singleton_count"] == 2
assert stats["total_distinct"] == 3
assert abs(stats["singleton_volume_frac"] - 2 / 102) < 1e-9
assert abs(stats["singleton_distinct_frac"] - 2 / 3) < 1e-9
def test_singleton_stats_fara_singletons():
"""Fara singletons: toate fractiile singleton = 0."""
from holdout import singleton_stats
rows = [("A", 5), ("B", 10)]
stats = singleton_stats(rows)
assert stats["singleton_count"] == 0
assert stats["singleton_volume_frac"] == 0.0
# ---------------------------------------------------------------------------
# normalize_for_match: cheia de potrivire refolosita din app/mapping.py
# ---------------------------------------------------------------------------
def test_normalize_for_match_diacritice():
"""normalize_for_match trateaza diacriticele identic (din app/mapping.py)."""
from holdout import normalize_key
# Variante cu si fara diacritice -> aceeasi cheie normalizata
assert normalize_key("Reparație motor") == normalize_key("Reparatie motor")
assert normalize_key("REPARATIE MOTOR") == normalize_key("Reparatie motor")
def test_normalize_for_match_spatii():
"""Spatiile multiple se colapseza."""
from holdout import normalize_key
assert normalize_key("revizie periodica") == normalize_key("REVIZIE PERIODICA")
# ---------------------------------------------------------------------------
# run_holdout: structura si verdict
# ---------------------------------------------------------------------------
def test_run_holdout_campuri_obligatorii():
"""run_holdout returneaza toate campurile asteptate."""
from holdout import run_holdout
result = run_holdout(FIXTURE, client_name="test_client")
campuri = [
"client", "total_distinct", "total_volume",
"coverage_at_100", "coverage_at_500", "coverage_at_1000",
"labels_for_90pct", "frac_for_90pct",
"leave_one_out_hit_rate",
"singleton_count", "singleton_distinct_frac", "singleton_volume_frac",
"verdict", "nota",
]
for camp in campuri:
assert camp in result, f"Camp lipsa: {camp}"
def test_run_holdout_client_name():
"""client_name se pastreaza corect in rezultat."""
from holdout import run_holdout
result = run_holdout(FIXTURE, client_name="test_client")
assert result["client"] == "test_client"
def test_run_holdout_verdict_valid():
"""Verdict e unul din valorile definite."""
from holdout import run_holdout
result = run_holdout(FIXTURE, client_name="test_client")
assert result["verdict"] in ("SUSTINUTA", "SLABA", "NEVALIDABILA")
def test_run_holdout_total_volum():
"""total_volume = suma NR din fixture."""
from holdout import run_holdout
result = run_holdout(FIXTURE, client_name="test_client")
assert result["total_volume"] == FIXTURE_TOTAL_VOL
def test_run_holdout_distinct():
"""total_distinct = numarul de randuri din fixture."""
from holdout import run_holdout
result = run_holdout(FIXTURE, client_name="test_client")
assert result["total_distinct"] == FIXTURE_DISTINCT
def test_run_holdout_verdict_sustinuta_pe_zipf_puternic():
"""Pe distributie Zipf puternica (o denumire = 95% din volum), verdict SUSTINUTA."""
from holdout import run_holdout
rows = [("REVIZIE", 9500)] + [(f"altceva_{i}", 1) for i in range(500)]
result = run_holdout(rows, client_name="zipf")
assert result["verdict"] == "SUSTINUTA"
def test_run_holdout_verdict_slaba_pe_distributie_plata():
"""Pe distributie uniforma (50 denumiri cu aceeasi frecventa), poate fi SLABA/NEVALIDABILA."""
from holdout import run_holdout
rows = [(f"op_{i}", 100) for i in range(100)] # 100 denumiri cu NR egal
result = run_holdout(rows, client_name="uniform")
# 90% din 100*100=10000 = 9000; necesita 90 din 100 denumiri = 90% -> NEVALIDABILA
assert result["verdict"] in ("SLABA", "NEVALIDABILA")