"""Teste TDD pentru tools/mapare-llm/holdout.py. Verifica logica de split + calcul hit-rate pe un fixture SINTETIC (nu pe date reale). Fixture-ul nu testeaza numerele efective pe CSV-uri, ci CORECTITUDINEA functiilor. """ from __future__ import annotations import sys import os # Adaugam tools/mapare-llm/ in path pentru import direct al holdout.py sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'tools', 'mapare-llm')) import pytest # Fixture sintetic: 5 denumiri cu frecvente diferite # Total volum = 100 + 80 + 50 + 30 + 10 + 1 + 1 = 272 FIXTURE = [ ("Revizie motor", 100), ("Schimb ulei", 80), ("Reglat frane", 50), ("Diagnosticare", 30), ("Curatenie interior", 10), ("Altceva rar A", 1), ("Altceva rar B", 1), ] FIXTURE_TOTAL_VOL = sum(n for _, n in FIXTURE) # 272 FIXTURE_DISTINCT = len(FIXTURE) # 7 # --------------------------------------------------------------------------- # compute_volume_coverage # --------------------------------------------------------------------------- def test_compute_volume_coverage_sorted_descrescator(): """Primul element trebuie sa fie cel cu NR cel mai mare.""" from holdout import compute_volume_coverage rows = [("A", 10), ("B", 90), ("C", 0)] result = compute_volume_coverage([r for r in rows if r[1] > 0]) assert result[0]["denumire"] == "B" assert result[0]["nr"] == 90 def test_compute_volume_coverage_cumul(): """Acoperirea cumulativa e corecta.""" from holdout import compute_volume_coverage rows = [("A", 90), ("B", 9), ("C", 1)] # total=100 result = compute_volume_coverage(rows) # Ordine: A(90), B(9), C(1) dupa sortare desc assert result[0]["denumire"] == "A" assert abs(result[0]["cumulative_volume_frac"] - 0.90) < 1e-9 assert result[0]["cumulative_count"] == 1 assert result[1]["denumire"] == "B" assert abs(result[1]["cumulative_volume_frac"] - 0.99) < 1e-9 assert result[1]["cumulative_count"] == 2 assert result[2]["denumire"] == "C" assert abs(result[2]["cumulative_volume_frac"] - 1.0) < 1e-9 assert result[2]["cumulative_count"] == 3 def test_compute_volume_coverage_gol(): """Lista goala -> lista goala (fara exceptii).""" from holdout import compute_volume_coverage assert compute_volume_coverage([]) == [] # --------------------------------------------------------------------------- # corpus_size_for_threshold # --------------------------------------------------------------------------- def test_corpus_size_for_90pct(): """Gaseste corect numarul de etichete pentru 90% acoperire.""" from holdout import corpus_size_for_threshold rows = [("A", 90), ("B", 9), ("C", 1)] # total=100 # A singur = 90% -> 1 eticheta suficienta assert corpus_size_for_threshold(rows, threshold=0.90) == 1 def test_corpus_size_for_99pct(): """Prag 99%: necesita 2 etichete (A+B = 99/100).""" from holdout import corpus_size_for_threshold rows = [("A", 90), ("B", 9), ("C", 1)] assert corpus_size_for_threshold(rows, threshold=0.99) == 2 def test_corpus_size_for_100pct(): """Prag 100%: necesita toate etichetele.""" from holdout import corpus_size_for_threshold rows = [("A", 90), ("B", 9), ("C", 1)] assert corpus_size_for_threshold(rows, threshold=1.0) == 3 # --------------------------------------------------------------------------- # compute_hit_rate_at_k # --------------------------------------------------------------------------- def test_compute_hit_rate_at_k_1(): """Top-1 eticheta (A=90): hit-rate = 90/100 = 0.90.""" from holdout import compute_hit_rate_at_k rows = [("A", 90), ("B", 9), ("C", 1)] assert abs(compute_hit_rate_at_k(rows, k=1) - 0.90) < 1e-9 def test_compute_hit_rate_at_k_2(): """Top-2 etichete (A+B=99): hit-rate = 0.99.""" from holdout import compute_hit_rate_at_k rows = [("A", 90), ("B", 9), ("C", 1)] assert abs(compute_hit_rate_at_k(rows, k=2) - 0.99) < 1e-9 def test_compute_hit_rate_at_k_depasit(): """k mai mare decat numarul de randuri: hit-rate = 1.0.""" from holdout import compute_hit_rate_at_k rows = [("A", 90), ("B", 10)] assert abs(compute_hit_rate_at_k(rows, k=100) - 1.0) < 1e-9 def test_compute_hit_rate_at_k_gol(): """Lista goala: hit-rate = 0.0 (fara ZeroDivisionError).""" from holdout import compute_hit_rate_at_k assert compute_hit_rate_at_k([], k=10) == 0.0 # --------------------------------------------------------------------------- # leave_one_out_hit_rate # --------------------------------------------------------------------------- def test_leave_one_out_hit_rate_formula(): """Hit-rate leave-first-out: (total_vol - total_distinct) / total_vol. Interpretare: pe oricare aparitie, dupa prima, e un hit (deja in corpus). Singletonii (NR=1) contribuie 0 hit-uri. """ from holdout import leave_one_out_hit_rate rows = [("A", 10), ("B", 5), ("C", 1)] # total=16, distinct=3 # formula: (16 - 3) / 16 = 0.8125 assert abs(leave_one_out_hit_rate(rows) - 13 / 16) < 1e-9 def test_leave_one_out_hit_rate_toate_singletons(): """Toti singletons: hit-rate = 0 (fiecare aparitie e prima).""" from holdout import leave_one_out_hit_rate rows = [("A", 1), ("B", 1), ("C", 1)] assert leave_one_out_hit_rate(rows) == 0.0 def test_leave_one_out_hit_rate_gol(): """Lista goala: returneaza 0.0 fara exceptie.""" from holdout import leave_one_out_hit_rate assert leave_one_out_hit_rate([]) == 0.0 # --------------------------------------------------------------------------- # singleton_stats # --------------------------------------------------------------------------- def test_singleton_stats_calcul(): """Statistici singletons corecte.""" from holdout import singleton_stats rows = [("A", 100), ("B", 1), ("C", 1)] # total=102, 2 singletons stats = singleton_stats(rows) assert stats["singleton_count"] == 2 assert stats["total_distinct"] == 3 assert abs(stats["singleton_volume_frac"] - 2 / 102) < 1e-9 assert abs(stats["singleton_distinct_frac"] - 2 / 3) < 1e-9 def test_singleton_stats_fara_singletons(): """Fara singletons: toate fractiile singleton = 0.""" from holdout import singleton_stats rows = [("A", 5), ("B", 10)] stats = singleton_stats(rows) assert stats["singleton_count"] == 0 assert stats["singleton_volume_frac"] == 0.0 # --------------------------------------------------------------------------- # normalize_for_match: cheia de potrivire refolosita din app/mapping.py # --------------------------------------------------------------------------- def test_normalize_for_match_diacritice(): """normalize_for_match trateaza diacriticele identic (din app/mapping.py).""" from holdout import normalize_key # Variante cu si fara diacritice -> aceeasi cheie normalizata assert normalize_key("Reparație motor") == normalize_key("Reparatie motor") assert normalize_key("REPARATIE MOTOR") == normalize_key("Reparatie motor") def test_normalize_for_match_spatii(): """Spatiile multiple se colapseza.""" from holdout import normalize_key assert normalize_key("revizie periodica") == normalize_key("REVIZIE PERIODICA") # --------------------------------------------------------------------------- # run_holdout: structura si verdict # --------------------------------------------------------------------------- def test_run_holdout_campuri_obligatorii(): """run_holdout returneaza toate campurile asteptate.""" from holdout import run_holdout result = run_holdout(FIXTURE, client_name="test_client") campuri = [ "client", "total_distinct", "total_volume", "coverage_at_100", "coverage_at_500", "coverage_at_1000", "labels_for_90pct", "frac_for_90pct", "leave_one_out_hit_rate", "singleton_count", "singleton_distinct_frac", "singleton_volume_frac", "verdict", "nota", ] for camp in campuri: assert camp in result, f"Camp lipsa: {camp}" def test_run_holdout_client_name(): """client_name se pastreaza corect in rezultat.""" from holdout import run_holdout result = run_holdout(FIXTURE, client_name="test_client") assert result["client"] == "test_client" def test_run_holdout_verdict_valid(): """Verdict e unul din valorile definite.""" from holdout import run_holdout result = run_holdout(FIXTURE, client_name="test_client") assert result["verdict"] in ("SUSTINUTA", "SLABA", "NEVALIDABILA") def test_run_holdout_total_volum(): """total_volume = suma NR din fixture.""" from holdout import run_holdout result = run_holdout(FIXTURE, client_name="test_client") assert result["total_volume"] == FIXTURE_TOTAL_VOL def test_run_holdout_distinct(): """total_distinct = numarul de randuri din fixture.""" from holdout import run_holdout result = run_holdout(FIXTURE, client_name="test_client") assert result["total_distinct"] == FIXTURE_DISTINCT def test_run_holdout_verdict_sustinuta_pe_zipf_puternic(): """Pe distributie Zipf puternica (o denumire = 95% din volum), verdict SUSTINUTA.""" from holdout import run_holdout rows = [("REVIZIE", 9500)] + [(f"altceva_{i}", 1) for i in range(500)] result = run_holdout(rows, client_name="zipf") assert result["verdict"] == "SUSTINUTA" def test_run_holdout_verdict_slaba_pe_distributie_plata(): """Pe distributie uniforma (50 denumiri cu aceeasi frecventa), poate fi SLABA/NEVALIDABILA.""" from holdout import run_holdout rows = [(f"op_{i}", 100) for i in range(100)] # 100 denumiri cu NR egal result = run_holdout(rows, client_name="uniform") # 90% din 100*100=10000 = 9000; necesita 90 din 100 denumiri = 90% -> NEVALIDABILA assert result["verdict"] in ("SLABA", "NEVALIDABILA")