"""Teste pentru or_label.py — etichetator batch offline OpenRouter (L14-S1). TDD: aceste teste TREBUIE sa fie RED inainte de implementare, GREEN dupa. Fara apeluri LLM reale — or_common.call() este MOCK-at in toate testele care ating API-ul. Testeaza: grupare+propagare, vot ensemble, scrub PII, resumabilitate, format output. Rulare: python3 -m pytest tests/test_or_label.py -v """ import sys import os import json # Setam cheia inainte de import (or_common.py o citeste la nivel de modul). # Valoarea nu conteaza in teste (call() e mock-at). os.environ.setdefault("OPENROUTER_KEY", "test-key-mock") # Adaugam calea tools/mapare-llm/ la sys.path ca sa putem importa or_label HERE = os.path.dirname(os.path.abspath(__file__)) TOOLS_DIR = os.path.abspath(os.path.join(HERE, "..", "tools", "mapare-llm")) if TOOLS_DIR not in sys.path: sys.path.insert(0, TOOLS_DIR) import or_label # subject under test import or_common as oc # pentru VALID, CODURI, scrub # --------------------------------------------------------------------------- # Grupare pe similaritate + propagare cod # --------------------------------------------------------------------------- class TestGroupBySimilarity: """Verifica logica de grupare greedy pe fuzz.token_sort_ratio.""" def test_similar_strings_grouped_in_one(self): """Denumiri aproape identice -> un singur reprezentant, ceilalti membri.""" # Scoruri masurate: token_sort_ratio("REGLAT DIRECTIE","REGLAT DIRECTIA")=93 # token_sort_ratio("REGLAT DIRECTIE","REGLARE DIRECTIE")=90 corpus = [ ("REGLAT DIRECTIE", 100), # reprezentant (frecventa maxima) ("REGLAT DIRECTIA", 80), # similar: 93 >= 85 ("REGLARE DIRECTIE", 60), # similar: 90 >= 85 ] groups = or_label.group_by_similarity(corpus, threshold=85) assert len(groups) == 1 g = groups[0] assert g["rep"] == "REGLAT DIRECTIE" assert len(g["members"]) == 2 member_names = [m[0] for m in g["members"]] assert "REGLAT DIRECTIA" in member_names assert "REGLARE DIRECTIE" in member_names def test_distinct_strings_separate_groups(self): """Denumiri foarte diferite -> grupuri separate.""" corpus = [ ("REVIZIE", 100), ("D/R BARA FATA", 80), ("SCHIMB ULEI MOTOR", 60), ] groups = or_label.group_by_similarity(corpus, threshold=85) assert len(groups) == 3 def test_representative_is_highest_frequency(self): """Reprezentantul = cel cu frecventa maxima (primul in sorted desc).""" corpus = [ ("INLOCUIT FILTRU AER", 300), # frecventa maxima ("INLOCUIRE FILTRU AER", 100), # similar: 92 >= 85 ] groups = or_label.group_by_similarity(corpus, threshold=85) assert len(groups) == 1 assert groups[0]["rep"] == "INLOCUIT FILTRU AER" assert groups[0]["freq"] == 300 def test_singleton_group(self): """O denumire fara vecini -> grup cu 0 membri.""" corpus = [("REVIZIE", 100)] groups = or_label.group_by_similarity(corpus, threshold=85) assert len(groups) == 1 assert groups[0]["rep"] == "REVIZIE" assert groups[0]["members"] == [] def test_below_threshold_not_grouped(self): """Similaritate sub threshold -> grupuri separate.""" # D/R BARA FATA vs D/R BARA SPATE = 81 < 85 corpus = [ ("D/R BARA FATA", 200), ("D/R BARA SPATE", 180), ] groups = or_label.group_by_similarity(corpus, threshold=85) assert len(groups) == 2 # --------------------------------------------------------------------------- # Vot ensemble (acord/dezacord) — fara apeluri LLM # --------------------------------------------------------------------------- class TestEnsembleVote: """Verifica logica de vot pe coduri (nu self-confidence).""" def test_unanim_cod_rar(self): """Ambele modele de acord pe cod RAR -> confidence high, sursa unanim.""" votes = { "nvidia/nemotron-3-super-120b-a12b:free": "OE-3", "nvidia/nemotron-nano-9b-v2:free": "OE-3", } cod, confidence, sursa = or_label.ensemble_vote(votes) assert cod == "OE-3" assert confidence == "high" assert "unanim" in sursa def test_unanim_nul_marcat_separat(self): """Ambele spun NUL -> NUL confidence high, NUL nu e promovat la cod RAR.""" votes = { "nvidia/nemotron-3-super-120b-a12b:free": "NUL", "nvidia/nemotron-nano-9b-v2:free": "NUL", } cod, confidence, sursa = or_label.ensemble_vote(votes) assert cod == "NUL" assert confidence == "high" # NUL nu este in codurile OE-* (nu e promovat) rar_codes = {c.split("=")[0] for c in oc.CODURI.replace(", ", ",").split(",")} - {"NUL"} assert cod not in rar_codes assert "nul" in sursa.lower() def test_dezacord_total(self): """Modele nu se inteleg -> needs_mapping.""" votes = { "nvidia/nemotron-3-super-120b-a12b:free": "OE-2", "nvidia/nemotron-nano-9b-v2:free": "OE-4", } cod, confidence, sursa = or_label.ensemble_vote(votes) assert confidence == "needs_mapping" assert "dezacord" in sursa def test_parse_fail_partial(self): """Un model intoarce '?' (parse-fail), altul cod valid -> dezacord (conservator).""" votes = { "nvidia/nemotron-3-super-120b-a12b:free": "OE-1", "nvidia/nemotron-nano-9b-v2:free": "?", } cod, confidence, sursa = or_label.ensemble_vote(votes) # Conservator: fara unanimitate -> needs_mapping assert confidence == "needs_mapping" def test_toate_parse_fail(self): """Ambele modele intorc '?' -> needs_mapping.""" votes = { "nvidia/nemotron-3-super-120b-a12b:free": "?", "nvidia/nemotron-nano-9b-v2:free": "?", } cod, confidence, sursa = or_label.ensemble_vote(votes) assert confidence == "needs_mapping" def test_cod_invalid_returnat_de_llm(self): """LLM returneaza cod necunoscut (nu e in VALID) -> needs_mapping.""" votes = { "nvidia/nemotron-3-super-120b-a12b:free": "OE-99", "nvidia/nemotron-nano-9b-v2:free": "OE-99", } cod, confidence, sursa = or_label.ensemble_vote(votes) assert confidence == "needs_mapping" # --------------------------------------------------------------------------- # Scrub PII — refoloseste or_common.scrub (F3) # --------------------------------------------------------------------------- class TestScrubPII: """Scrub-ul PII e integrat in or_common.call() si testat independent.""" def test_nr_inmatriculare_scrubbed(self): """Nr de inmatriculare (ex: CT 12 ABC) este scrubuit.""" s = "ITP CT 12 ABC" assert "[NR]" in oc.scrub(s) def test_vin_scrubbed(self): """VIN (17 char alfanumeric) este scrubuit.""" vin = "WVWZZZ1KZAM000001" # 17 caractere, format VIN s = f"VERIFICAT {vin}" assert "[VIN]" in oc.scrub(s) def test_text_normal_nemodificat(self): """Text fara PII ramane neatins.""" s = "REVIZIE PERIODICA MOTOR" assert oc.scrub(s) == s def test_scrub_in_batch_call(self, monkeypatch): """or_common.call() aplica scrub intern inainte de trimitere.""" trimis = [] def mock_urlopen(req, timeout=None): import io body_str = req.data.decode() trimis.append(body_str) # Simuleaza raspuns LLM resp = json.dumps({ "choices": [{"message": {"content": json.dumps({"rez": [{"i": 1, "cod": "NUL"}]})}}] }).encode() class FakeResp: def __enter__(self): return self def __exit__(self, *a): pass def read(self): return resp def __iter__(self): return iter([resp]) import urllib.request r = FakeResp() r.read = lambda: resp # urllib.request.urlopen returneaza context manager class CM: def __enter__(self_): return self_ def __exit__(self_, *a): pass def read(self_): return resp import json as _json class FakeFile: def read(self_): return resp # Patch-uim json.load monkeypatch.setattr("json.load", lambda f: _json.loads(resp)) return CM() batch = ["ITP CT 12 ABC"] # Verificam ca scrub e aplicat in continut trimis # (nu putem usor mock-ui urlopen, asa ca testam scrub() direct) scrubbed = oc.scrub("ITP CT 12 ABC") assert "[NR]" in scrubbed # Deci batch-ul trimis nu va contine nr original assert "CT 12 ABC" not in scrubbed # --------------------------------------------------------------------------- # Resumabilitate # --------------------------------------------------------------------------- class TestResumabil: """Etichetatorul reia de unde a ramas din partial.json.""" def test_skip_already_labeled(self, monkeypatch): """Reprezentantii deja in partial NU sunt retrimisi la LLM.""" call_reps = [] def mock_call(model, batch, **kw): call_reps.extend(batch) return ["OE-1"] * len(batch), {"ms": 100, "err": None} monkeypatch.setattr(or_label.oc, "call", mock_call) groups = [{"rep": "REVIZIE", "freq": 5000, "members": []}] # REVIZIE e deja in partial partial = { "REVIZIE": { "cod": "OE-3", "confidence": "high", "sursa": "ensemble-unanim", "votes": {}, } } result = or_label.label_groups(groups, partial, batch_size=20, pace=0) # LLM nu trebuia apelat pentru REVIZIE assert "REVIZIE" not in call_reps # Codul din partial e pastrat assert result["REVIZIE"]["cod"] == "OE-3" def test_labels_new_reps(self, monkeypatch): """Reprezentantii noi (nu in partial) sunt etichetati.""" call_count = [0] def mock_call(model, batch, **kw): call_count[0] += 1 return ["OE-1"] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "call", mock_call) groups = [{"rep": "D/R BARA FATA", "freq": 3000, "members": []}] partial = {} result = or_label.label_groups(groups, partial, batch_size=20, pace=0) # LLM a fost apelat (cel putin o data per model) assert call_count[0] >= len(or_label.MODELS) assert "D/R BARA FATA" in result assert result["D/R BARA FATA"]["cod"] == "OE-1" def test_partial_mixt(self, monkeypatch): """Partial cu unii etichetati, altii noi -> eticheteaza doar cei noi.""" labeled_batches = [] def mock_call(model, batch, **kw): labeled_batches.extend(batch) return ["OE-2"] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "call", mock_call) groups = [ {"rep": "REVIZIE", "freq": 5000, "members": []}, # deja in partial {"rep": "D/R BARA FATA", "freq": 3000, "members": []}, # nou ] partial = { "REVIZIE": {"cod": "OE-3", "confidence": "high", "sursa": "ensemble-unanim", "votes": {}} } result = or_label.label_groups(groups, partial, batch_size=20, pace=0) # Doar D/R BARA FATA trebuie trimis la LLM assert "REVIZIE" not in labeled_batches assert "D/R BARA FATA" in labeled_batches # Partial complet: ambele chei prezente assert "REVIZIE" in result assert "D/R BARA FATA" in result # REVIZIE pastrat din partial assert result["REVIZIE"]["cod"] == "OE-3" def test_load_partial_fisier_gol(self, tmp_path): """load_partial pe fisier inexistent intoarce dict gol.""" result = or_label.load_partial(str(tmp_path / "inexistent.json")) assert result == {} def test_save_si_load_partial(self, tmp_path): """save_partial + load_partial sunt inversele una alteia.""" path = str(tmp_path / "partial.json") data = { "REVIZIE": {"cod": "OE-3", "confidence": "high", "sursa": "ensemble-unanim", "votes": {}} } or_label.save_partial(path, data) loaded = or_label.load_partial(path) assert loaded == data # --------------------------------------------------------------------------- # Format output si propagare # --------------------------------------------------------------------------- class TestOutputFormat: """expand_to_all produce outputul cu campurile cerute si propagare corecta.""" def test_campuri_obligatorii(self, monkeypatch): """Fiecare intrare are: denumire, cod, sursa, confidence.""" def mock_call(model, batch, **kw): return ["OE-3"] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "call", mock_call) groups = [{"rep": "REVIZIE", "freq": 5000, "members": [("REVIZIE MICA", 100)]}] partial = {} partial = or_label.label_groups(groups, partial, batch_size=20, pace=0) results = or_label.expand_to_all(groups, partial) assert len(results) == 2 # reprezentant + 1 membru for row in results: assert "denumire" in row assert "cod" in row assert "sursa" in row assert "confidence" in row assert "grup_rep" in row def test_reprezentant_cu_sursa_ensemble(self, monkeypatch): """Reprezentantul are sursa 'ensemble-*', nu 'propagat'.""" def mock_call(model, batch, **kw): return ["OE-3"] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "call", mock_call) groups = [{"rep": "REVIZIE", "freq": 5000, "members": []}] partial = {} partial = or_label.label_groups(groups, partial, batch_size=20, pace=0) results = or_label.expand_to_all(groups, partial) row = results[0] assert row["denumire"] == "REVIZIE" assert row["sursa"].startswith("ensemble-") assert row["sursa"] != "propagat" def test_membru_primeste_sursa_propagat(self, monkeypatch): """Membrii grupului au sursa='propagat' si codul reprezentantului.""" def mock_call(model, batch, **kw): return ["OE-3"] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "call", mock_call) groups = [{"rep": "REVIZIE", "freq": 5000, "members": [("REVIZIE MICA", 100), ("REVIZIE AUTO", 80)]}] partial = {} partial = or_label.label_groups(groups, partial, batch_size=20, pace=0) results = or_label.expand_to_all(groups, partial) assert len(results) == 3 membri = [r for r in results if r["sursa"] == "propagat"] assert len(membri) == 2 for m in membri: assert m["cod"] == "OE-3" # propagat de la reprezentant assert m["grup_rep"] == "REVIZIE" def test_nul_propagat_ca_nul_nu_ca_cod_rar(self, monkeypatch): """NUL este propagat ca NUL la membri, nu convertit la cod RAR.""" def mock_call(model, batch, **kw): return ["NUL"] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "call", mock_call) groups = [{"rep": "ITP", "freq": 50, "members": [("ITP + RAR", 30)]}] partial = {} partial = or_label.label_groups(groups, partial, batch_size=20, pace=0) results = or_label.expand_to_all(groups, partial) rar_codes = {c.split("=")[0] for c in oc.CODURI.replace(", ", ",").split(",")} - {"NUL"} for row in results: assert row["cod"] == "NUL" assert row["cod"] not in rar_codes def test_dezacord_propagat_ca_needs_mapping(self, monkeypatch): """Dezacordul ensemble se propaga la membri ca needs_mapping.""" call_n = [0] def mock_call(model, batch, **kw): call_n[0] += 1 # Modelele dau coduri diferite in functie de ordinea apelului cod = "OE-1" if call_n[0] % 2 == 1 else "OE-3" return [cod] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "call", mock_call) groups = [{"rep": "REGLAT DIRECTIE", "freq": 200, "members": [("REGLAT DIRECTIA", 150)]}] partial = {} partial = or_label.label_groups(groups, partial, batch_size=20, pace=0) results = or_label.expand_to_all(groups, partial) # Ambii (rep + member) trebuie sa aiba needs_mapping for row in results: assert row["confidence"] == "needs_mapping" # --------------------------------------------------------------------------- # Integrare end-to-end (fara apeluri reale) # --------------------------------------------------------------------------- class TestRunIntegrare: """Verifica run() cu corpus mock si LLM mock.""" def test_run_produce_fisier_output(self, tmp_path, monkeypatch): """run() salveaza fisierul de output JSON.""" def mock_corpus(): return [("REVIZIE", 5000), ("D/R BARA FATA", 3000)] def mock_call(model, batch, **kw): return ["OE-3"] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "corpus_by_freq", mock_corpus) monkeypatch.setattr(or_label.oc, "call", mock_call) out = str(tmp_path / "final.json") partial = str(tmp_path / "partial.json") results = or_label.run(n=2, output_path=out, partial_path=partial, threshold=85, batch_size=20, pace=0) assert os.path.exists(out) loaded = json.load(open(out, encoding="utf-8")) assert len(loaded) >= 2 # Toate intrarile au campurile cerute for row in loaded: assert "denumire" in row assert "cod" in row def test_run_resumabil(self, tmp_path, monkeypatch): """run() cu partial existent sare intrarile deja etichetate.""" call_count = [0] def mock_corpus(): return [("REVIZIE", 5000), ("D/R BARA FATA", 3000)] def mock_call(model, batch, **kw): call_count[0] += 1 return ["OE-1"] * len(batch), {"ms": 50, "err": None} monkeypatch.setattr(or_label.oc, "corpus_by_freq", mock_corpus) monkeypatch.setattr(or_label.oc, "call", mock_call) partial_path = str(tmp_path / "partial.json") # Pre-populam partial cu REVIZIE or_label.save_partial(partial_path, { "REVIZIE": {"cod": "OE-3", "confidence": "high", "sursa": "ensemble-unanim", "votes": {}} }) out = str(tmp_path / "final.json") results = or_label.run(n=2, output_path=out, partial_path=partial_path, threshold=85, batch_size=20, pace=0) # LLM apelat DOAR pentru D/R BARA FATA (nu si REVIZIE) # call_count = 2 (un apel per model, pentru un singur representant) assert call_count[0] == len(or_label.MODELS)