"""US-002 (PRD 5.18) — etichetator offline multi-backend cu prompt procedural. Toate testele ruleaza FARA retea reala (transport injectabil / inspectie body). Acopera: prompt 3 pasi, envelope json_schema strict + enum, backend selectabil prin env, scrub PII inainte de orice request, garda de truncare. """ from __future__ import annotations # Numele pachetului `tools/mapare-llm` contine cratima -> nu e importabil ca modul. # Incarcam fisierul direct prin importlib pe cale. import importlib.util import os import sys _PATH = os.path.join(os.path.dirname(__file__), "..", "tools", "mapare-llm", "eticheteaza.py") _spec = importlib.util.spec_from_file_location("eticheteaza", _PATH) eticheteaza = importlib.util.module_from_spec(_spec) sys.modules["eticheteaza"] = eticheteaza # necesar pt. @dataclass introspection _spec.loader.exec_module(eticheteaza) def test_construieste_prompt_3pasi(): msgs = eticheteaza.construieste_mesaje(["INLOCUIT PLACUTE FRANA"]) assert isinstance(msgs, list) and msgs[0]["role"] == "system" sys = msgs[0]["content"].upper() # Procedura in 3 pasi explicita. assert "PAS 1" in sys and "PAS 2" in sys and "PAS 3" in sys # Regula NUL + avarie grava doar la accident. assert "NUL" in sys assert "ACCIDENT" in sys # Dezactivare thinking Qwen3 (token /no_think undeva in mesaje). joined = " ".join(m["content"] for m in msgs) assert "/no_think" in joined # User message enumera operatiile. assert "1." in msgs[1]["content"] and "INLOCUIT PLACUTE FRANA" in msgs[1]["content"] def test_envelope_json_schema_strict_si_enum(): backend = eticheteaza.get_backend("lmstudio") body = eticheteaza.construieste_body(["REVIZIE"], backend) rf = body["response_format"] # Envelope COMPLET, NU json_object. assert rf["type"] == "json_schema" js = rf["json_schema"] assert js["strict"] is True assert "name" in js schema = js["schema"] cod_schema = schema["properties"]["rez"]["items"]["properties"]["cod"] # cod = enum peste cele 19 ALL_LABELS (18 coduri + NUL). assert set(cod_schema["enum"]) == set(eticheteaza.ALL_LABELS) assert len(eticheteaza.ALL_LABELS) == 19 assert "NUL" in eticheteaza.ALL_LABELS # temperatura 0 (determinist) si strict items. assert body["temperature"] == 0 assert schema["properties"]["rez"]["items"]["additionalProperties"] is False def test_parseaza_raspuns_si_garda_truncare(): batch = ["A", "B", "C"] # Raspuns complet, ordine amestecata, un cod invalid. content = {"rez": [{"i": 2, "cod": "OE-1"}, {"i": 1, "cod": "NUL"}, {"i": 3, "cod": "INEXISTENT"}]} codes = eticheteaza.parseaza_raspuns(content, len(batch)) assert codes == ["NUL", "OE-1", "?"] # cod invalid -> '?', NU ascuns # Raspuns trunchiat: lipseste pozitia 3 -> '?' pe lipsa, nu eroare. content_trunc = {"rez": [{"i": 1, "cod": "OE-1"}, {"i": 2, "cod": "OE-2"}]} codes2 = eticheteaza.parseaza_raspuns(content_trunc, len(batch)) assert codes2 == ["OE-1", "OE-2", "?"] assert len(codes2) == len(batch) def test_backend_selectabil_env(monkeypatch): # Default = lmstudio (backend aprobat v1, D4). monkeypatch.delenv("ETICHETARE_BACKEND", raising=False) assert eticheteaza.get_backend().name == "lmstudio" # Selectie prin env. monkeypatch.setenv("ETICHETARE_BACKEND", "groq") assert eticheteaza.get_backend().name == "groq" # Endpoint + model configurabile prin env. monkeypatch.setenv("ETICHETARE_BACKEND", "lmstudio") monkeypatch.setenv("ETICHETARE_ENDPOINT", "http://exemplu:1234/v1/chat/completions") monkeypatch.setenv("ETICHETARE_MODEL", "qwen/qwen3-custom") b = eticheteaza.get_backend() assert b.url == "http://exemplu:1234/v1/chat/completions" assert b.model == "qwen/qwen3-custom" def test_scrub_pii_inainte_de_request(monkeypatch): """Nicio placuta/VIN nu ajunge la transport — scrub inainte de orice apel.""" capturat = {} def fake_transport(url, headers, payload, timeout): capturat["payload"] = payload return {"choices": [{"message": {"content": '{"rez":[{"i":1,"cod":"OE-1"}]}'}}]} backend = eticheteaza.get_backend("lmstudio") codes, meta = eticheteaza.call(["VOPSIT USA B 123 ABC"], backend, transport=fake_transport) assert codes == ["OE-1"] body = capturat["payload"] user_content = body["messages"][1]["content"] assert "B 123 ABC" not in user_content assert "[NR]" in user_content assert meta["err"] is None