Files
rar-autopass/tests/test_eticheteaza_tool.py
Claude Agent 756f77730f feat(5.18): corpus k-NN exemple etichetate + seed real Haiku (17181 op)
Seed app/data/operatii-etichetate.json regenerat cu subagenti Haiku pe TOATE
cele 17181 operatii distincte (ordine frecventa, 100%), inlocuind seed-ul Groq
(3758). Validare Haiku vs Groq pe 157 op etichetate: la dezacorduri Haiku corect
~22/30, Groq ~0. Haiku prinde gunoiul ratat de Groq (ITP, chirie anvelope, nume
piese fara actiune): NUL 2200 (12.8%) vs ~7.6% Groq; adaptare electronica OE-7
(nu OE-5), placute frana uzura OE-1 (nu OE-F avarie).

US-001..006: prefiltru NUL determinist, etichetator offline, generator seed,
seeder mapping_suggestions (in init_db, gated seed_operatii_enabled), embeddings
indexeaza corpus etichetat, enrich NUL+kNN. Distributie seed: OE-1 80.1%, NUL
12.8%, OE-2 3.5%, restul rar (OE-4/3/7/8/R/I/5, AITLV, R-ODO).

config: seed_operatii_enabled=True + embeddings_enabled=True implicit (SILVER
populat + sugestii semantice; ambele suggestion-only, dezactivabile prin env).

Suita: 1387 passed, 1 deselected (live).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 06:59:15 +00:00

104 lines
4.4 KiB
Python

"""US-002 (PRD 5.18) — etichetator offline multi-backend cu prompt procedural.
Toate testele ruleaza FARA retea reala (transport injectabil / inspectie body).
Acopera: prompt 3 pasi, envelope json_schema strict + enum, backend selectabil
prin env, scrub PII inainte de orice request, garda de truncare.
"""
from __future__ import annotations
# Numele pachetului `tools/mapare-llm` contine cratima -> nu e importabil ca modul.
# Incarcam fisierul direct prin importlib pe cale.
import importlib.util
import os
import sys
_PATH = os.path.join(os.path.dirname(__file__), "..", "tools", "mapare-llm", "eticheteaza.py")
_spec = importlib.util.spec_from_file_location("eticheteaza", _PATH)
eticheteaza = importlib.util.module_from_spec(_spec)
sys.modules["eticheteaza"] = eticheteaza # necesar pt. @dataclass introspection
_spec.loader.exec_module(eticheteaza)
def test_construieste_prompt_3pasi():
msgs = eticheteaza.construieste_mesaje(["INLOCUIT PLACUTE FRANA"])
assert isinstance(msgs, list) and msgs[0]["role"] == "system"
sys = msgs[0]["content"].upper()
# Procedura in 3 pasi explicita.
assert "PAS 1" in sys and "PAS 2" in sys and "PAS 3" in sys
# Regula NUL + avarie grava doar la accident.
assert "NUL" in sys
assert "ACCIDENT" in sys
# Dezactivare thinking Qwen3 (token /no_think undeva in mesaje).
joined = " ".join(m["content"] for m in msgs)
assert "/no_think" in joined
# User message enumera operatiile.
assert "1." in msgs[1]["content"] and "INLOCUIT PLACUTE FRANA" in msgs[1]["content"]
def test_envelope_json_schema_strict_si_enum():
backend = eticheteaza.get_backend("lmstudio")
body = eticheteaza.construieste_body(["REVIZIE"], backend)
rf = body["response_format"]
# Envelope COMPLET, NU json_object.
assert rf["type"] == "json_schema"
js = rf["json_schema"]
assert js["strict"] is True
assert "name" in js
schema = js["schema"]
cod_schema = schema["properties"]["rez"]["items"]["properties"]["cod"]
# cod = enum peste cele 19 ALL_LABELS (18 coduri + NUL).
assert set(cod_schema["enum"]) == set(eticheteaza.ALL_LABELS)
assert len(eticheteaza.ALL_LABELS) == 19
assert "NUL" in eticheteaza.ALL_LABELS
# temperatura 0 (determinist) si strict items.
assert body["temperature"] == 0
assert schema["properties"]["rez"]["items"]["additionalProperties"] is False
def test_parseaza_raspuns_si_garda_truncare():
batch = ["A", "B", "C"]
# Raspuns complet, ordine amestecata, un cod invalid.
content = {"rez": [{"i": 2, "cod": "OE-1"}, {"i": 1, "cod": "NUL"}, {"i": 3, "cod": "INEXISTENT"}]}
codes = eticheteaza.parseaza_raspuns(content, len(batch))
assert codes == ["NUL", "OE-1", "?"] # cod invalid -> '?', NU ascuns
# Raspuns trunchiat: lipseste pozitia 3 -> '?' pe lipsa, nu eroare.
content_trunc = {"rez": [{"i": 1, "cod": "OE-1"}, {"i": 2, "cod": "OE-2"}]}
codes2 = eticheteaza.parseaza_raspuns(content_trunc, len(batch))
assert codes2 == ["OE-1", "OE-2", "?"]
assert len(codes2) == len(batch)
def test_backend_selectabil_env(monkeypatch):
# Default = lmstudio (backend aprobat v1, D4).
monkeypatch.delenv("ETICHETARE_BACKEND", raising=False)
assert eticheteaza.get_backend().name == "lmstudio"
# Selectie prin env.
monkeypatch.setenv("ETICHETARE_BACKEND", "groq")
assert eticheteaza.get_backend().name == "groq"
# Endpoint + model configurabile prin env.
monkeypatch.setenv("ETICHETARE_BACKEND", "lmstudio")
monkeypatch.setenv("ETICHETARE_ENDPOINT", "http://exemplu:1234/v1/chat/completions")
monkeypatch.setenv("ETICHETARE_MODEL", "qwen/qwen3-custom")
b = eticheteaza.get_backend()
assert b.url == "http://exemplu:1234/v1/chat/completions"
assert b.model == "qwen/qwen3-custom"
def test_scrub_pii_inainte_de_request(monkeypatch):
"""Nicio placuta/VIN nu ajunge la transport — scrub inainte de orice apel."""
capturat = {}
def fake_transport(url, headers, payload, timeout):
capturat["payload"] = payload
return {"choices": [{"message": {"content": '{"rez":[{"i":1,"cod":"OE-1"}]}'}}]}
backend = eticheteaza.get_backend("lmstudio")
codes, meta = eticheteaza.call(["VOPSIT USA B 123 ABC"], backend, transport=fake_transport)
assert codes == ["OE-1"]
body = capturat["payload"]
user_content = body["messages"][1]["content"]
assert "B 123 ABC" not in user_content
assert "[NR]" in user_content
assert meta["err"] is None