5.15 (propagare design + dashboard editare) si 5.14 (mapare LLM distilata) inchise dupa /code-review high. 8 buguri reparate TDD: - HIGH modal nu se deschidea pe randul slim (base.html: trimitere-slim) - HIGH /repune trunchia prestatii (declaratie incompleta la RAR) -> iterare peste existing, codes pozitional - HIGH embeddings incarca model ~230MB degeaba pe corpus gol -> poarta has_corpus() - HIGH picker chips gol pe re-render eroare -> conn/account_id pe toate ramurile - MED obs re-derivat dupa stergere explicita -> _merge_override pastreaza obs='' - MED mapare salvata fara denumire poluă GOLD -> _record_gold_validation guard - MED typo nome_prestatie -> nume_prestatie in select /repune - MED bucketare timp +3h gresita iarna -> SQLite localtime + TZ=Europe/Bucharest Embeddings WIRE-uit functional (PRD #15, decizie user): ensure_embeddings_corpus construieste corpus din nomenclator, gated pe AUTOPASS_EMBEDDINGS_ENABLED (default off). Marime model corectata ~50MB->~230MB (estimare PRD gresita). Cleanup: hoist load_* din bucla bulk-fix; import re la top. Regresie: 1256 passed, 1 deselected (live), 0 failed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
287 lines
9.7 KiB
Python
287 lines
9.7 KiB
Python
"""Teste TDD pentru tools/mapare-llm/holdout.py.
|
|
|
|
Verifica logica de split + calcul hit-rate pe un fixture SINTETIC (nu pe date reale).
|
|
Fixture-ul nu testeaza numerele efective pe CSV-uri, ci CORECTITUDINEA functiilor.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
import os
|
|
|
|
# Adaugam tools/mapare-llm/ in path pentru import direct al holdout.py
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'tools', 'mapare-llm'))
|
|
|
|
import pytest
|
|
|
|
# Fixture sintetic: 5 denumiri cu frecvente diferite
|
|
# Total volum = 100 + 80 + 50 + 30 + 10 + 1 + 1 = 272
|
|
FIXTURE = [
|
|
("Revizie motor", 100),
|
|
("Schimb ulei", 80),
|
|
("Reglat frane", 50),
|
|
("Diagnosticare", 30),
|
|
("Curatenie interior", 10),
|
|
("Altceva rar A", 1),
|
|
("Altceva rar B", 1),
|
|
]
|
|
FIXTURE_TOTAL_VOL = sum(n for _, n in FIXTURE) # 272
|
|
FIXTURE_DISTINCT = len(FIXTURE) # 7
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# compute_volume_coverage
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_compute_volume_coverage_sorted_descrescator():
|
|
"""Primul element trebuie sa fie cel cu NR cel mai mare."""
|
|
from holdout import compute_volume_coverage
|
|
|
|
rows = [("A", 10), ("B", 90), ("C", 0)]
|
|
result = compute_volume_coverage([r for r in rows if r[1] > 0])
|
|
assert result[0]["denumire"] == "B"
|
|
assert result[0]["nr"] == 90
|
|
|
|
|
|
def test_compute_volume_coverage_cumul():
|
|
"""Acoperirea cumulativa e corecta."""
|
|
from holdout import compute_volume_coverage
|
|
|
|
rows = [("A", 90), ("B", 9), ("C", 1)] # total=100
|
|
result = compute_volume_coverage(rows)
|
|
|
|
# Ordine: A(90), B(9), C(1) dupa sortare desc
|
|
assert result[0]["denumire"] == "A"
|
|
assert abs(result[0]["cumulative_volume_frac"] - 0.90) < 1e-9
|
|
assert result[0]["cumulative_count"] == 1
|
|
|
|
assert result[1]["denumire"] == "B"
|
|
assert abs(result[1]["cumulative_volume_frac"] - 0.99) < 1e-9
|
|
assert result[1]["cumulative_count"] == 2
|
|
|
|
assert result[2]["denumire"] == "C"
|
|
assert abs(result[2]["cumulative_volume_frac"] - 1.0) < 1e-9
|
|
assert result[2]["cumulative_count"] == 3
|
|
|
|
|
|
def test_compute_volume_coverage_gol():
|
|
"""Lista goala -> lista goala (fara exceptii)."""
|
|
from holdout import compute_volume_coverage
|
|
|
|
assert compute_volume_coverage([]) == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# corpus_size_for_threshold
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_corpus_size_for_90pct():
|
|
"""Gaseste corect numarul de etichete pentru 90% acoperire."""
|
|
from holdout import corpus_size_for_threshold
|
|
|
|
rows = [("A", 90), ("B", 9), ("C", 1)] # total=100
|
|
# A singur = 90% -> 1 eticheta suficienta
|
|
assert corpus_size_for_threshold(rows, threshold=0.90) == 1
|
|
|
|
|
|
def test_corpus_size_for_99pct():
|
|
"""Prag 99%: necesita 2 etichete (A+B = 99/100)."""
|
|
from holdout import corpus_size_for_threshold
|
|
|
|
rows = [("A", 90), ("B", 9), ("C", 1)]
|
|
assert corpus_size_for_threshold(rows, threshold=0.99) == 2
|
|
|
|
|
|
def test_corpus_size_for_100pct():
|
|
"""Prag 100%: necesita toate etichetele."""
|
|
from holdout import corpus_size_for_threshold
|
|
|
|
rows = [("A", 90), ("B", 9), ("C", 1)]
|
|
assert corpus_size_for_threshold(rows, threshold=1.0) == 3
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# compute_hit_rate_at_k
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_compute_hit_rate_at_k_1():
|
|
"""Top-1 eticheta (A=90): hit-rate = 90/100 = 0.90."""
|
|
from holdout import compute_hit_rate_at_k
|
|
|
|
rows = [("A", 90), ("B", 9), ("C", 1)]
|
|
assert abs(compute_hit_rate_at_k(rows, k=1) - 0.90) < 1e-9
|
|
|
|
|
|
def test_compute_hit_rate_at_k_2():
|
|
"""Top-2 etichete (A+B=99): hit-rate = 0.99."""
|
|
from holdout import compute_hit_rate_at_k
|
|
|
|
rows = [("A", 90), ("B", 9), ("C", 1)]
|
|
assert abs(compute_hit_rate_at_k(rows, k=2) - 0.99) < 1e-9
|
|
|
|
|
|
def test_compute_hit_rate_at_k_depasit():
|
|
"""k mai mare decat numarul de randuri: hit-rate = 1.0."""
|
|
from holdout import compute_hit_rate_at_k
|
|
|
|
rows = [("A", 90), ("B", 10)]
|
|
assert abs(compute_hit_rate_at_k(rows, k=100) - 1.0) < 1e-9
|
|
|
|
|
|
def test_compute_hit_rate_at_k_gol():
|
|
"""Lista goala: hit-rate = 0.0 (fara ZeroDivisionError)."""
|
|
from holdout import compute_hit_rate_at_k
|
|
|
|
assert compute_hit_rate_at_k([], k=10) == 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# leave_one_out_hit_rate
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_leave_one_out_hit_rate_formula():
|
|
"""Hit-rate leave-first-out: (total_vol - total_distinct) / total_vol.
|
|
|
|
Interpretare: pe oricare aparitie, dupa prima, e un hit (deja in corpus).
|
|
Singletonii (NR=1) contribuie 0 hit-uri.
|
|
"""
|
|
from holdout import leave_one_out_hit_rate
|
|
|
|
rows = [("A", 10), ("B", 5), ("C", 1)] # total=16, distinct=3
|
|
# formula: (16 - 3) / 16 = 0.8125
|
|
assert abs(leave_one_out_hit_rate(rows) - 13 / 16) < 1e-9
|
|
|
|
|
|
def test_leave_one_out_hit_rate_toate_singletons():
|
|
"""Toti singletons: hit-rate = 0 (fiecare aparitie e prima)."""
|
|
from holdout import leave_one_out_hit_rate
|
|
|
|
rows = [("A", 1), ("B", 1), ("C", 1)]
|
|
assert leave_one_out_hit_rate(rows) == 0.0
|
|
|
|
|
|
def test_leave_one_out_hit_rate_gol():
|
|
"""Lista goala: returneaza 0.0 fara exceptie."""
|
|
from holdout import leave_one_out_hit_rate
|
|
|
|
assert leave_one_out_hit_rate([]) == 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# singleton_stats
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_singleton_stats_calcul():
|
|
"""Statistici singletons corecte."""
|
|
from holdout import singleton_stats
|
|
|
|
rows = [("A", 100), ("B", 1), ("C", 1)] # total=102, 2 singletons
|
|
stats = singleton_stats(rows)
|
|
|
|
assert stats["singleton_count"] == 2
|
|
assert stats["total_distinct"] == 3
|
|
assert abs(stats["singleton_volume_frac"] - 2 / 102) < 1e-9
|
|
assert abs(stats["singleton_distinct_frac"] - 2 / 3) < 1e-9
|
|
|
|
|
|
def test_singleton_stats_fara_singletons():
|
|
"""Fara singletons: toate fractiile singleton = 0."""
|
|
from holdout import singleton_stats
|
|
|
|
rows = [("A", 5), ("B", 10)]
|
|
stats = singleton_stats(rows)
|
|
assert stats["singleton_count"] == 0
|
|
assert stats["singleton_volume_frac"] == 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# normalize_for_match: cheia de potrivire refolosita din app/mapping.py
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_normalize_for_match_diacritice():
|
|
"""normalize_for_match trateaza diacriticele identic (din app/mapping.py)."""
|
|
from holdout import normalize_key
|
|
|
|
# Variante cu si fara diacritice -> aceeasi cheie normalizata
|
|
assert normalize_key("Reparație motor") == normalize_key("Reparatie motor")
|
|
assert normalize_key("REPARATIE MOTOR") == normalize_key("Reparatie motor")
|
|
|
|
|
|
def test_normalize_for_match_spatii():
|
|
"""Spatiile multiple se colapseza."""
|
|
from holdout import normalize_key
|
|
|
|
assert normalize_key("revizie periodica") == normalize_key("REVIZIE PERIODICA")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_holdout: structura si verdict
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_run_holdout_campuri_obligatorii():
|
|
"""run_holdout returneaza toate campurile asteptate."""
|
|
from holdout import run_holdout
|
|
|
|
result = run_holdout(FIXTURE, client_name="test_client")
|
|
|
|
campuri = [
|
|
"client", "total_distinct", "total_volume",
|
|
"coverage_at_100", "coverage_at_500", "coverage_at_1000",
|
|
"labels_for_90pct", "frac_for_90pct",
|
|
"leave_one_out_hit_rate",
|
|
"singleton_count", "singleton_distinct_frac", "singleton_volume_frac",
|
|
"verdict", "nota",
|
|
]
|
|
for camp in campuri:
|
|
assert camp in result, f"Camp lipsa: {camp}"
|
|
|
|
|
|
def test_run_holdout_client_name():
|
|
"""client_name se pastreaza corect in rezultat."""
|
|
from holdout import run_holdout
|
|
|
|
result = run_holdout(FIXTURE, client_name="test_client")
|
|
assert result["client"] == "test_client"
|
|
|
|
|
|
def test_run_holdout_verdict_valid():
|
|
"""Verdict e unul din valorile definite."""
|
|
from holdout import run_holdout
|
|
|
|
result = run_holdout(FIXTURE, client_name="test_client")
|
|
assert result["verdict"] in ("SUSTINUTA", "SLABA", "NEVALIDABILA")
|
|
|
|
|
|
def test_run_holdout_total_volum():
|
|
"""total_volume = suma NR din fixture."""
|
|
from holdout import run_holdout
|
|
|
|
result = run_holdout(FIXTURE, client_name="test_client")
|
|
assert result["total_volume"] == FIXTURE_TOTAL_VOL
|
|
|
|
|
|
def test_run_holdout_distinct():
|
|
"""total_distinct = numarul de randuri din fixture."""
|
|
from holdout import run_holdout
|
|
|
|
result = run_holdout(FIXTURE, client_name="test_client")
|
|
assert result["total_distinct"] == FIXTURE_DISTINCT
|
|
|
|
|
|
def test_run_holdout_verdict_sustinuta_pe_zipf_puternic():
|
|
"""Pe distributie Zipf puternica (o denumire = 95% din volum), verdict SUSTINUTA."""
|
|
from holdout import run_holdout
|
|
|
|
rows = [("REVIZIE", 9500)] + [(f"altceva_{i}", 1) for i in range(500)]
|
|
result = run_holdout(rows, client_name="zipf")
|
|
assert result["verdict"] == "SUSTINUTA"
|
|
|
|
|
|
def test_run_holdout_verdict_slaba_pe_distributie_plata():
|
|
"""Pe distributie uniforma (50 denumiri cu aceeasi frecventa), poate fi SLABA/NEVALIDABILA."""
|
|
from holdout import run_holdout
|
|
|
|
rows = [(f"op_{i}", 100) for i in range(100)] # 100 denumiri cu NR egal
|
|
result = run_holdout(rows, client_name="uniform")
|
|
# 90% din 100*100=10000 = 9000; necesita 90 din 100 denumiri = 90% -> NEVALIDABILA
|
|
assert result["verdict"] in ("SLABA", "NEVALIDABILA")
|