Seed app/data/operatii-etichetate.json regenerat cu subagenti Haiku pe TOATE cele 17181 operatii distincte (ordine frecventa, 100%), inlocuind seed-ul Groq (3758). Validare Haiku vs Groq pe 157 op etichetate: la dezacorduri Haiku corect ~22/30, Groq ~0. Haiku prinde gunoiul ratat de Groq (ITP, chirie anvelope, nume piese fara actiune): NUL 2200 (12.8%) vs ~7.6% Groq; adaptare electronica OE-7 (nu OE-5), placute frana uzura OE-1 (nu OE-F avarie). US-001..006: prefiltru NUL determinist, etichetator offline, generator seed, seeder mapping_suggestions (in init_db, gated seed_operatii_enabled), embeddings indexeaza corpus etichetat, enrich NUL+kNN. Distributie seed: OE-1 80.1%, NUL 12.8%, OE-2 3.5%, restul rar (OE-4/3/7/8/R/I/5, AITLV, R-ODO). config: seed_operatii_enabled=True + embeddings_enabled=True implicit (SILVER populat + sugestii semantice; ambele suggestion-only, dezactivabile prin env). Suita: 1387 passed, 1 deselected (live). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
151 lines
5.0 KiB
Python
151 lines
5.0 KiB
Python
"""US-005 (PRD 5.18) — embeddings indexeaza corpusul etichetat (NU nomenclatorul).
|
|
|
|
k-NN peste exemple reale etichetate (denumire_normalizata -> cod, is_nul) e net mai
|
|
precis decat peste cele 18 categorii generice. Acopera si simetria corpus/query (F1):
|
|
corpusul e text NORMALIZAT, deci query-ul trebuie normalizat la fel inainte de embedding.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
|
|
# Backend mock determinist: vector = histograma de caractere (similaritate stabila).
|
|
class MockBackend:
|
|
def embed(self, texts):
|
|
out = []
|
|
for t in texts:
|
|
v = [0.0] * 27
|
|
for ch in t.upper():
|
|
if "A" <= ch <= "Z":
|
|
v[ord(ch) - 65] += 1.0
|
|
else:
|
|
v[26] += 1.0
|
|
out.append(v)
|
|
return out
|
|
|
|
|
|
@pytest.fixture()
|
|
def env(monkeypatch):
|
|
tmp = tempfile.mkdtemp()
|
|
monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "us005.db"))
|
|
monkeypatch.setenv("AUTOPASS_WEB_AUTH_REQUIRED", "false")
|
|
monkeypatch.setenv("AUTOPASS_EMBEDDINGS_ENABLED", "true") # US-005 are nevoie de embeddings ON
|
|
from app.config import get_settings
|
|
get_settings.cache_clear()
|
|
from app.db import init_db
|
|
init_db()
|
|
yield monkeypatch
|
|
get_settings.cache_clear()
|
|
|
|
|
|
@pytest.fixture()
|
|
def conn(env):
|
|
from app.db import get_connection
|
|
c = get_connection()
|
|
yield c
|
|
c.close()
|
|
|
|
|
|
def _inject_mock_engine():
|
|
import app.embeddings as emb
|
|
from app.embeddings import EmbeddingEngine
|
|
emb._engine = EmbeddingEngine(backend=MockBackend())
|
|
return emb
|
|
|
|
|
|
def _seed_silver(conn, rows):
|
|
"""rows = [(denumire_normalizata, cod, is_nul)]."""
|
|
conn.executemany(
|
|
"INSERT OR IGNORE INTO mapping_suggestions "
|
|
"(denumire_normalizata, cod_prestatie, is_nul, source, confidence) VALUES (?, ?, ?, 'llm_seed', 0.7)",
|
|
rows,
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def test_corpus_din_mapping_suggestions(conn):
|
|
emb = _inject_mock_engine()
|
|
_seed_silver(conn, [
|
|
("SCHIMB ULEI MOTOR", "OE-3", 0),
|
|
("INLOCUIT PLACUTE FRANA", "OE-1", 0),
|
|
("13 X ITP", None, 1),
|
|
])
|
|
from app.mapping import ensure_embeddings_corpus
|
|
ensure_embeddings_corpus(conn)
|
|
assert emb.has_corpus()
|
|
# Corpusul indexat = denumirile din mapping_suggestions, NU din nomenclator_rar.
|
|
texte = {it["denumire"] for it in emb._engine._corpus_items}
|
|
assert texte == {"SCHIMB ULEI MOTOR", "INLOCUIT PLACUTE FRANA", "13 X ITP"}
|
|
|
|
|
|
def test_suggest_nearest_intoarce_is_nul(conn):
|
|
emb = _inject_mock_engine()
|
|
_seed_silver(conn, [
|
|
("SCHIMB ULEI MOTOR", "OE-3", 0),
|
|
("13 X ITP", None, 1),
|
|
])
|
|
from app.mapping import ensure_embeddings_corpus
|
|
ensure_embeddings_corpus(conn)
|
|
res = emb.suggest_nearest("13 X ITP", top_k=1)
|
|
assert res and res[0]["is_nul"] is True # vecin NUL -> semnal de supresie
|
|
res2 = emb.suggest_nearest("SCHIMB ULEI MOTOR", top_k=1)
|
|
assert res2 and res2[0]["is_nul"] is False
|
|
assert res2[0]["cod"] == "OE-3"
|
|
|
|
|
|
def test_semnatura_corpus_pe_seed(conn):
|
|
emb = _inject_mock_engine()
|
|
_seed_silver(conn, [("SCHIMB ULEI MOTOR", "OE-3", 0)])
|
|
from app.mapping import ensure_embeddings_corpus
|
|
ensure_embeddings_corpus(conn)
|
|
sig1 = emb.corpus_signature()
|
|
assert sig1 is not None
|
|
# Re-apel fara schimbare -> aceeasi semnatura (nu re-indexeaza).
|
|
ensure_embeddings_corpus(conn)
|
|
assert emb.corpus_signature() == sig1
|
|
# Adaugare rand -> semnatura se schimba.
|
|
_seed_silver(conn, [("INLOCUIT BATERIE", "OE-1", 0)])
|
|
ensure_embeddings_corpus(conn)
|
|
assert emb.corpus_signature() != sig1
|
|
|
|
|
|
def test_query_normalizat_ca_si_corpusul(conn, monkeypatch):
|
|
"""F1 (HIGH): enrich_suggestions interogheaza suggest_nearest cu textul NORMALIZAT."""
|
|
import app.embeddings as emb
|
|
captura = {}
|
|
monkeypatch.setattr(emb, "has_corpus", lambda: True)
|
|
|
|
def fake_suggest(text, top_k=1):
|
|
captura["text"] = text
|
|
return [{"cod": "OE-3", "is_nul": False, "similaritate": 0.99}]
|
|
|
|
monkeypatch.setattr(emb, "suggest_nearest", fake_suggest)
|
|
from app.mapping import enrich_suggestions
|
|
enrich_suggestions(conn, "Schimb Uleiul Motor")
|
|
# Corpusul e denumire_normalizata -> query-ul trebuie normalizat la fel.
|
|
from app.mapping import normalize_for_match
|
|
assert captura["text"] == normalize_for_match("Schimb Uleiul Motor")
|
|
assert captura["text"] == "SCHIMB ULEIUL MOTOR"
|
|
|
|
|
|
def test_degradare_gratioasa_pastrata(conn):
|
|
"""Backend care arunca -> ensure + enrich NU arunca exceptie."""
|
|
import app.embeddings as emb
|
|
from app.embeddings import EmbeddingEngine
|
|
|
|
class BrokenBackend:
|
|
def embed(self, texts):
|
|
raise RuntimeError("model indisponibil")
|
|
|
|
emb._engine = EmbeddingEngine(backend=BrokenBackend())
|
|
_seed_silver(conn, [("SCHIMB ULEI MOTOR", "OE-3", 0)])
|
|
from app.mapping import ensure_embeddings_corpus, enrich_suggestions
|
|
ensure_embeddings_corpus(conn) # nu arunca
|
|
out = enrich_suggestions(conn, "SCHIMB ULEI") # nu arunca
|
|
assert "sugestie_principala" in out
|