Seed app/data/operatii-etichetate.json regenerat cu subagenti Haiku pe TOATE cele 17181 operatii distincte (ordine frecventa, 100%), inlocuind seed-ul Groq (3758). Validare Haiku vs Groq pe 157 op etichetate: la dezacorduri Haiku corect ~22/30, Groq ~0. Haiku prinde gunoiul ratat de Groq (ITP, chirie anvelope, nume piese fara actiune): NUL 2200 (12.8%) vs ~7.6% Groq; adaptare electronica OE-7 (nu OE-5), placute frana uzura OE-1 (nu OE-F avarie). US-001..006: prefiltru NUL determinist, etichetator offline, generator seed, seeder mapping_suggestions (in init_db, gated seed_operatii_enabled), embeddings indexeaza corpus etichetat, enrich NUL+kNN. Distributie seed: OE-1 80.1%, NUL 12.8%, OE-2 3.5%, restul rar (OE-4/3/7/8/R/I/5, AITLV, R-ODO). config: seed_operatii_enabled=True + embeddings_enabled=True implicit (SILVER populat + sugestii semantice; ambele suggestion-only, dezactivabile prin env). Suita: 1387 passed, 1 deselected (live). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
104 lines
4.4 KiB
Python
104 lines
4.4 KiB
Python
"""US-002 (PRD 5.18) — etichetator offline multi-backend cu prompt procedural.
|
|
|
|
Toate testele ruleaza FARA retea reala (transport injectabil / inspectie body).
|
|
Acopera: prompt 3 pasi, envelope json_schema strict + enum, backend selectabil
|
|
prin env, scrub PII inainte de orice request, garda de truncare.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
# Numele pachetului `tools/mapare-llm` contine cratima -> nu e importabil ca modul.
|
|
# Incarcam fisierul direct prin importlib pe cale.
|
|
import importlib.util
|
|
import os
|
|
import sys
|
|
|
|
_PATH = os.path.join(os.path.dirname(__file__), "..", "tools", "mapare-llm", "eticheteaza.py")
|
|
_spec = importlib.util.spec_from_file_location("eticheteaza", _PATH)
|
|
eticheteaza = importlib.util.module_from_spec(_spec)
|
|
sys.modules["eticheteaza"] = eticheteaza # necesar pt. @dataclass introspection
|
|
_spec.loader.exec_module(eticheteaza)
|
|
|
|
|
|
def test_construieste_prompt_3pasi():
|
|
msgs = eticheteaza.construieste_mesaje(["INLOCUIT PLACUTE FRANA"])
|
|
assert isinstance(msgs, list) and msgs[0]["role"] == "system"
|
|
sys = msgs[0]["content"].upper()
|
|
# Procedura in 3 pasi explicita.
|
|
assert "PAS 1" in sys and "PAS 2" in sys and "PAS 3" in sys
|
|
# Regula NUL + avarie grava doar la accident.
|
|
assert "NUL" in sys
|
|
assert "ACCIDENT" in sys
|
|
# Dezactivare thinking Qwen3 (token /no_think undeva in mesaje).
|
|
joined = " ".join(m["content"] for m in msgs)
|
|
assert "/no_think" in joined
|
|
# User message enumera operatiile.
|
|
assert "1." in msgs[1]["content"] and "INLOCUIT PLACUTE FRANA" in msgs[1]["content"]
|
|
|
|
|
|
def test_envelope_json_schema_strict_si_enum():
|
|
backend = eticheteaza.get_backend("lmstudio")
|
|
body = eticheteaza.construieste_body(["REVIZIE"], backend)
|
|
rf = body["response_format"]
|
|
# Envelope COMPLET, NU json_object.
|
|
assert rf["type"] == "json_schema"
|
|
js = rf["json_schema"]
|
|
assert js["strict"] is True
|
|
assert "name" in js
|
|
schema = js["schema"]
|
|
cod_schema = schema["properties"]["rez"]["items"]["properties"]["cod"]
|
|
# cod = enum peste cele 19 ALL_LABELS (18 coduri + NUL).
|
|
assert set(cod_schema["enum"]) == set(eticheteaza.ALL_LABELS)
|
|
assert len(eticheteaza.ALL_LABELS) == 19
|
|
assert "NUL" in eticheteaza.ALL_LABELS
|
|
# temperatura 0 (determinist) si strict items.
|
|
assert body["temperature"] == 0
|
|
assert schema["properties"]["rez"]["items"]["additionalProperties"] is False
|
|
|
|
|
|
def test_parseaza_raspuns_si_garda_truncare():
|
|
batch = ["A", "B", "C"]
|
|
# Raspuns complet, ordine amestecata, un cod invalid.
|
|
content = {"rez": [{"i": 2, "cod": "OE-1"}, {"i": 1, "cod": "NUL"}, {"i": 3, "cod": "INEXISTENT"}]}
|
|
codes = eticheteaza.parseaza_raspuns(content, len(batch))
|
|
assert codes == ["NUL", "OE-1", "?"] # cod invalid -> '?', NU ascuns
|
|
# Raspuns trunchiat: lipseste pozitia 3 -> '?' pe lipsa, nu eroare.
|
|
content_trunc = {"rez": [{"i": 1, "cod": "OE-1"}, {"i": 2, "cod": "OE-2"}]}
|
|
codes2 = eticheteaza.parseaza_raspuns(content_trunc, len(batch))
|
|
assert codes2 == ["OE-1", "OE-2", "?"]
|
|
assert len(codes2) == len(batch)
|
|
|
|
|
|
def test_backend_selectabil_env(monkeypatch):
|
|
# Default = lmstudio (backend aprobat v1, D4).
|
|
monkeypatch.delenv("ETICHETARE_BACKEND", raising=False)
|
|
assert eticheteaza.get_backend().name == "lmstudio"
|
|
# Selectie prin env.
|
|
monkeypatch.setenv("ETICHETARE_BACKEND", "groq")
|
|
assert eticheteaza.get_backend().name == "groq"
|
|
# Endpoint + model configurabile prin env.
|
|
monkeypatch.setenv("ETICHETARE_BACKEND", "lmstudio")
|
|
monkeypatch.setenv("ETICHETARE_ENDPOINT", "http://exemplu:1234/v1/chat/completions")
|
|
monkeypatch.setenv("ETICHETARE_MODEL", "qwen/qwen3-custom")
|
|
b = eticheteaza.get_backend()
|
|
assert b.url == "http://exemplu:1234/v1/chat/completions"
|
|
assert b.model == "qwen/qwen3-custom"
|
|
|
|
|
|
def test_scrub_pii_inainte_de_request(monkeypatch):
|
|
"""Nicio placuta/VIN nu ajunge la transport — scrub inainte de orice apel."""
|
|
capturat = {}
|
|
|
|
def fake_transport(url, headers, payload, timeout):
|
|
capturat["payload"] = payload
|
|
return {"choices": [{"message": {"content": '{"rez":[{"i":1,"cod":"OE-1"}]}'}}]}
|
|
|
|
backend = eticheteaza.get_backend("lmstudio")
|
|
codes, meta = eticheteaza.call(["VOPSIT USA B 123 ABC"], backend, transport=fake_transport)
|
|
assert codes == ["OE-1"]
|
|
body = capturat["payload"]
|
|
user_content = body["messages"][1]["content"]
|
|
assert "B 123 ABC" not in user_content
|
|
assert "[NR]" in user_content
|
|
assert meta["err"] is None
|