rar-autopass/tests/test_or_label.py

"""Teste pentru or_label.py — etichetator batch offline OpenRouter (L14-S1).

TDD: aceste teste TREBUIE sa fie RED inainte de implementare, GREEN dupa.
Fara apeluri LLM reale — or_common.call() este MOCK-at in toate testele
care ating API-ul. Testeaza: grupare+propagare, vot ensemble, scrub PII,
resumabilitate, format output.

Rulare: python3 -m pytest tests/test_or_label.py -v
"""
import sys
import os
import json

# Setam cheia inainte de import (or_common.py o citeste la nivel de modul).
# Valoarea nu conteaza in teste (call() e mock-at).
os.environ.setdefault("OPENROUTER_KEY", "test-key-mock")

# Adaugam calea tools/mapare-llm/ la sys.path ca sa putem importa or_label
HERE = os.path.dirname(os.path.abspath(__file__))
TOOLS_DIR = os.path.abspath(os.path.join(HERE, "..", "tools", "mapare-llm"))
if TOOLS_DIR not in sys.path:
    sys.path.insert(0, TOOLS_DIR)

import or_label        # subject under test
import or_common as oc # pentru VALID, CODURI, scrub


# ---------------------------------------------------------------------------
# Grupare pe similaritate + propagare cod
# ---------------------------------------------------------------------------

class TestGroupBySimilarity:
    """Verifica logica de grupare greedy pe fuzz.token_sort_ratio."""

    def test_similar_strings_grouped_in_one(self):
        """Denumiri aproape identice -> un singur reprezentant, ceilalti membri."""
        # Scoruri masurate: token_sort_ratio("REGLAT DIRECTIE","REGLAT DIRECTIA")=93
        #                   token_sort_ratio("REGLAT DIRECTIE","REGLARE DIRECTIE")=90
        corpus = [
            ("REGLAT DIRECTIE", 100),   # reprezentant (frecventa maxima)
            ("REGLAT DIRECTIA", 80),    # similar: 93 >= 85
            ("REGLARE DIRECTIE", 60),   # similar: 90 >= 85
        ]
        groups = or_label.group_by_similarity(corpus, threshold=85)
        assert len(groups) == 1
        g = groups[0]
        assert g["rep"] == "REGLAT DIRECTIE"
        assert len(g["members"]) == 2
        member_names = [m[0] for m in g["members"]]
        assert "REGLAT DIRECTIA" in member_names
        assert "REGLARE DIRECTIE" in member_names

    def test_distinct_strings_separate_groups(self):
        """Denumiri foarte diferite -> grupuri separate."""
        corpus = [
            ("REVIZIE", 100),
            ("D/R BARA FATA", 80),
            ("SCHIMB ULEI MOTOR", 60),
        ]
        groups = or_label.group_by_similarity(corpus, threshold=85)
        assert len(groups) == 3

    def test_representative_is_highest_frequency(self):
        """Reprezentantul = cel cu frecventa maxima (primul in sorted desc)."""
        corpus = [
            ("INLOCUIT FILTRU AER", 300),   # frecventa maxima
            ("INLOCUIRE FILTRU AER", 100),  # similar: 92 >= 85
        ]
        groups = or_label.group_by_similarity(corpus, threshold=85)
        assert len(groups) == 1
        assert groups[0]["rep"] == "INLOCUIT FILTRU AER"
        assert groups[0]["freq"] == 300

    def test_singleton_group(self):
        """O denumire fara vecini -> grup cu 0 membri."""
        corpus = [("REVIZIE", 100)]
        groups = or_label.group_by_similarity(corpus, threshold=85)
        assert len(groups) == 1
        assert groups[0]["rep"] == "REVIZIE"
        assert groups[0]["members"] == []

    def test_below_threshold_not_grouped(self):
        """Similaritate sub threshold -> grupuri separate."""
        # D/R BARA FATA vs D/R BARA SPATE = 81 < 85
        corpus = [
            ("D/R BARA FATA", 200),
            ("D/R BARA SPATE", 180),
        ]
        groups = or_label.group_by_similarity(corpus, threshold=85)
        assert len(groups) == 2


# ---------------------------------------------------------------------------
# Vot ensemble (acord/dezacord) — fara apeluri LLM
# ---------------------------------------------------------------------------

class TestEnsembleVote:
    """Verifica logica de vot pe coduri (nu self-confidence)."""

    def test_unanim_cod_rar(self):
        """Ambele modele de acord pe cod RAR -> confidence high, sursa unanim."""
        votes = {
            "nvidia/nemotron-3-super-120b-a12b:free": "OE-3",
            "nvidia/nemotron-nano-9b-v2:free": "OE-3",
        }
        cod, confidence, sursa = or_label.ensemble_vote(votes)
        assert cod == "OE-3"
        assert confidence == "high"
        assert "unanim" in sursa

    def test_unanim_nul_marcat_separat(self):
        """Ambele spun NUL -> NUL confidence high, NUL nu e promovat la cod RAR."""
        votes = {
            "nvidia/nemotron-3-super-120b-a12b:free": "NUL",
            "nvidia/nemotron-nano-9b-v2:free": "NUL",
        }
        cod, confidence, sursa = or_label.ensemble_vote(votes)
        assert cod == "NUL"
        assert confidence == "high"
        # NUL nu este in codurile OE-* (nu e promovat)
        rar_codes = {c.split("=")[0] for c in oc.CODURI.replace(", ", ",").split(",")} - {"NUL"}
        assert cod not in rar_codes
        assert "nul" in sursa.lower()

    def test_dezacord_total(self):
        """Modele nu se inteleg -> needs_mapping."""
        votes = {
            "nvidia/nemotron-3-super-120b-a12b:free": "OE-2",
            "nvidia/nemotron-nano-9b-v2:free": "OE-4",
        }
        cod, confidence, sursa = or_label.ensemble_vote(votes)
        assert confidence == "needs_mapping"
        assert "dezacord" in sursa

    def test_parse_fail_partial(self):
        """Un model intoarce '?' (parse-fail), altul cod valid -> dezacord (conservator)."""
        votes = {
            "nvidia/nemotron-3-super-120b-a12b:free": "OE-1",
            "nvidia/nemotron-nano-9b-v2:free": "?",
        }
        cod, confidence, sursa = or_label.ensemble_vote(votes)
        # Conservator: fara unanimitate -> needs_mapping
        assert confidence == "needs_mapping"

    def test_toate_parse_fail(self):
        """Ambele modele intorc '?' -> needs_mapping."""
        votes = {
            "nvidia/nemotron-3-super-120b-a12b:free": "?",
            "nvidia/nemotron-nano-9b-v2:free": "?",
        }
        cod, confidence, sursa = or_label.ensemble_vote(votes)
        assert confidence == "needs_mapping"

    def test_cod_invalid_returnat_de_llm(self):
        """LLM returneaza cod necunoscut (nu e in VALID) -> needs_mapping."""
        votes = {
            "nvidia/nemotron-3-super-120b-a12b:free": "OE-99",
            "nvidia/nemotron-nano-9b-v2:free": "OE-99",
        }
        cod, confidence, sursa = or_label.ensemble_vote(votes)
        assert confidence == "needs_mapping"


# ---------------------------------------------------------------------------
# Scrub PII — refoloseste or_common.scrub (F3)
# ---------------------------------------------------------------------------

class TestScrubPII:
    """Scrub-ul PII e integrat in or_common.call() si testat independent."""

    def test_nr_inmatriculare_scrubbed(self):
        """Nr de inmatriculare (ex: CT 12 ABC) este scrubuit."""
        s = "ITP CT 12 ABC"
        assert "[NR]" in oc.scrub(s)

    def test_vin_scrubbed(self):
        """VIN (17 char alfanumeric) este scrubuit."""
        vin = "WVWZZZ1KZAM000001"  # 17 caractere, format VIN
        s = f"VERIFICAT {vin}"
        assert "[VIN]" in oc.scrub(s)

    def test_text_normal_nemodificat(self):
        """Text fara PII ramane neatins."""
        s = "REVIZIE PERIODICA MOTOR"
        assert oc.scrub(s) == s

    def test_scrub_in_batch_call(self, monkeypatch):
        """or_common.call() aplica scrub intern inainte de trimitere."""
        trimis = []

        def mock_urlopen(req, timeout=None):
            import io
            body_str = req.data.decode()
            trimis.append(body_str)
            # Simuleaza raspuns LLM
            resp = json.dumps({
                "choices": [{"message": {"content": json.dumps({"rez": [{"i": 1, "cod": "NUL"}]})}}]
            }).encode()
            class FakeResp:
                def __enter__(self): return self
                def __exit__(self, *a): pass
                def read(self): return resp
                def __iter__(self): return iter([resp])
            import urllib.request
            r = FakeResp()
            r.read = lambda: resp
            # urllib.request.urlopen returneaza context manager
            class CM:
                def __enter__(self_): return self_
                def __exit__(self_, *a): pass
                def read(self_): return resp
            import json as _json
            class FakeFile:
                def read(self_): return resp
            # Patch-uim json.load
            monkeypatch.setattr("json.load", lambda f: _json.loads(resp))
            return CM()

        batch = ["ITP CT 12 ABC"]
        # Verificam ca scrub e aplicat in continut trimis
        # (nu putem usor mock-ui urlopen, asa ca testam scrub() direct)
        scrubbed = oc.scrub("ITP CT 12 ABC")
        assert "[NR]" in scrubbed
        # Deci batch-ul trimis nu va contine nr original
        assert "CT 12 ABC" not in scrubbed


# ---------------------------------------------------------------------------
# Resumabilitate
# ---------------------------------------------------------------------------

class TestResumabil:
    """Etichetatorul reia de unde a ramas din partial.json."""

    def test_skip_already_labeled(self, monkeypatch):
        """Reprezentantii deja in partial NU sunt retrimisi la LLM."""
        call_reps = []

        def mock_call(model, batch, **kw):
            call_reps.extend(batch)
            return ["OE-1"] * len(batch), {"ms": 100, "err": None}

        monkeypatch.setattr(or_label.oc, "call", mock_call)

        groups = [{"rep": "REVIZIE", "freq": 5000, "members": []}]
        # REVIZIE e deja in partial
        partial = {
            "REVIZIE": {
                "cod": "OE-3",
                "confidence": "high",
                "sursa": "ensemble-unanim",
                "votes": {},
            }
        }
        result = or_label.label_groups(groups, partial, batch_size=20, pace=0)

        # LLM nu trebuia apelat pentru REVIZIE
        assert "REVIZIE" not in call_reps
        # Codul din partial e pastrat
        assert result["REVIZIE"]["cod"] == "OE-3"

    def test_labels_new_reps(self, monkeypatch):
        """Reprezentantii noi (nu in partial) sunt etichetati."""
        call_count = [0]

        def mock_call(model, batch, **kw):
            call_count[0] += 1
            return ["OE-1"] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "call", mock_call)

        groups = [{"rep": "D/R BARA FATA", "freq": 3000, "members": []}]
        partial = {}
        result = or_label.label_groups(groups, partial, batch_size=20, pace=0)

        # LLM a fost apelat (cel putin o data per model)
        assert call_count[0] >= len(or_label.MODELS)
        assert "D/R BARA FATA" in result
        assert result["D/R BARA FATA"]["cod"] == "OE-1"

    def test_partial_mixt(self, monkeypatch):
        """Partial cu unii etichetati, altii noi -> eticheteaza doar cei noi."""
        labeled_batches = []

        def mock_call(model, batch, **kw):
            labeled_batches.extend(batch)
            return ["OE-2"] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "call", mock_call)

        groups = [
            {"rep": "REVIZIE", "freq": 5000, "members": []},      # deja in partial
            {"rep": "D/R BARA FATA", "freq": 3000, "members": []}, # nou
        ]
        partial = {
            "REVIZIE": {"cod": "OE-3", "confidence": "high",
                        "sursa": "ensemble-unanim", "votes": {}}
        }
        result = or_label.label_groups(groups, partial, batch_size=20, pace=0)

        # Doar D/R BARA FATA trebuie trimis la LLM
        assert "REVIZIE" not in labeled_batches
        assert "D/R BARA FATA" in labeled_batches
        # Partial complet: ambele chei prezente
        assert "REVIZIE" in result
        assert "D/R BARA FATA" in result
        # REVIZIE pastrat din partial
        assert result["REVIZIE"]["cod"] == "OE-3"

    def test_load_partial_fisier_gol(self, tmp_path):
        """load_partial pe fisier inexistent intoarce dict gol."""
        result = or_label.load_partial(str(tmp_path / "inexistent.json"))
        assert result == {}

    def test_save_si_load_partial(self, tmp_path):
        """save_partial + load_partial sunt inversele una alteia."""
        path = str(tmp_path / "partial.json")
        data = {
            "REVIZIE": {"cod": "OE-3", "confidence": "high",
                        "sursa": "ensemble-unanim", "votes": {}}
        }
        or_label.save_partial(path, data)
        loaded = or_label.load_partial(path)
        assert loaded == data


# ---------------------------------------------------------------------------
# Format output si propagare
# ---------------------------------------------------------------------------

class TestOutputFormat:
    """expand_to_all produce outputul cu campurile cerute si propagare corecta."""

    def test_campuri_obligatorii(self, monkeypatch):
        """Fiecare intrare are: denumire, cod, sursa, confidence."""
        def mock_call(model, batch, **kw):
            return ["OE-3"] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "call", mock_call)

        groups = [{"rep": "REVIZIE", "freq": 5000,
                   "members": [("REVIZIE MICA", 100)]}]
        partial = {}
        partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
        results = or_label.expand_to_all(groups, partial)

        assert len(results) == 2  # reprezentant + 1 membru
        for row in results:
            assert "denumire" in row
            assert "cod" in row
            assert "sursa" in row
            assert "confidence" in row
            assert "grup_rep" in row

    def test_reprezentant_cu_sursa_ensemble(self, monkeypatch):
        """Reprezentantul are sursa 'ensemble-*', nu 'propagat'."""
        def mock_call(model, batch, **kw):
            return ["OE-3"] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "call", mock_call)

        groups = [{"rep": "REVIZIE", "freq": 5000, "members": []}]
        partial = {}
        partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
        results = or_label.expand_to_all(groups, partial)

        row = results[0]
        assert row["denumire"] == "REVIZIE"
        assert row["sursa"].startswith("ensemble-")
        assert row["sursa"] != "propagat"

    def test_membru_primeste_sursa_propagat(self, monkeypatch):
        """Membrii grupului au sursa='propagat' si codul reprezentantului."""
        def mock_call(model, batch, **kw):
            return ["OE-3"] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "call", mock_call)

        groups = [{"rep": "REVIZIE", "freq": 5000,
                   "members": [("REVIZIE MICA", 100), ("REVIZIE AUTO", 80)]}]
        partial = {}
        partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
        results = or_label.expand_to_all(groups, partial)

        assert len(results) == 3
        membri = [r for r in results if r["sursa"] == "propagat"]
        assert len(membri) == 2
        for m in membri:
            assert m["cod"] == "OE-3"      # propagat de la reprezentant
            assert m["grup_rep"] == "REVIZIE"

    def test_nul_propagat_ca_nul_nu_ca_cod_rar(self, monkeypatch):
        """NUL este propagat ca NUL la membri, nu convertit la cod RAR."""
        def mock_call(model, batch, **kw):
            return ["NUL"] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "call", mock_call)

        groups = [{"rep": "ITP", "freq": 50,
                   "members": [("ITP + RAR", 30)]}]
        partial = {}
        partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
        results = or_label.expand_to_all(groups, partial)

        rar_codes = {c.split("=")[0] for c in oc.CODURI.replace(", ", ",").split(",")} - {"NUL"}
        for row in results:
            assert row["cod"] == "NUL"
            assert row["cod"] not in rar_codes

    def test_dezacord_propagat_ca_needs_mapping(self, monkeypatch):
        """Dezacordul ensemble se propaga la membri ca needs_mapping."""
        call_n = [0]

        def mock_call(model, batch, **kw):
            call_n[0] += 1
            # Modelele dau coduri diferite in functie de ordinea apelului
            cod = "OE-1" if call_n[0] % 2 == 1 else "OE-3"
            return [cod] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "call", mock_call)

        groups = [{"rep": "REGLAT DIRECTIE", "freq": 200,
                   "members": [("REGLAT DIRECTIA", 150)]}]
        partial = {}
        partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
        results = or_label.expand_to_all(groups, partial)

        # Ambii (rep + member) trebuie sa aiba needs_mapping
        for row in results:
            assert row["confidence"] == "needs_mapping"


# ---------------------------------------------------------------------------
# Integrare end-to-end (fara apeluri reale)
# ---------------------------------------------------------------------------

class TestRunIntegrare:
    """Verifica run() cu corpus mock si LLM mock."""

    def test_run_produce_fisier_output(self, tmp_path, monkeypatch):
        """run() salveaza fisierul de output JSON."""
        def mock_corpus():
            return [("REVIZIE", 5000), ("D/R BARA FATA", 3000)]

        def mock_call(model, batch, **kw):
            return ["OE-3"] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "corpus_by_freq", mock_corpus)
        monkeypatch.setattr(or_label.oc, "call", mock_call)

        out = str(tmp_path / "final.json")
        partial = str(tmp_path / "partial.json")
        results = or_label.run(n=2, output_path=out, partial_path=partial,
                               threshold=85, batch_size=20, pace=0)

        assert os.path.exists(out)
        loaded = json.load(open(out, encoding="utf-8"))
        assert len(loaded) >= 2
        # Toate intrarile au campurile cerute
        for row in loaded:
            assert "denumire" in row
            assert "cod" in row

    def test_run_resumabil(self, tmp_path, monkeypatch):
        """run() cu partial existent sare intrarile deja etichetate."""
        call_count = [0]

        def mock_corpus():
            return [("REVIZIE", 5000), ("D/R BARA FATA", 3000)]

        def mock_call(model, batch, **kw):
            call_count[0] += 1
            return ["OE-1"] * len(batch), {"ms": 50, "err": None}

        monkeypatch.setattr(or_label.oc, "corpus_by_freq", mock_corpus)
        monkeypatch.setattr(or_label.oc, "call", mock_call)

        partial_path = str(tmp_path / "partial.json")
        # Pre-populam partial cu REVIZIE
        or_label.save_partial(partial_path, {
            "REVIZIE": {"cod": "OE-3", "confidence": "high",
                        "sursa": "ensemble-unanim", "votes": {}}
        })

        out = str(tmp_path / "final.json")
        results = or_label.run(n=2, output_path=out, partial_path=partial_path,
                               threshold=85, batch_size=20, pace=0)

        # LLM apelat DOAR pentru D/R BARA FATA (nu si REVIZIE)
        # call_count = 2 (un apel per model, pentru un singur representant)
        assert call_count[0] == len(or_label.MODELS)