"""US-005 (PRD 5.18) — embeddings indexeaza corpusul etichetat (NU nomenclatorul). k-NN peste exemple reale etichetate (denumire_normalizata -> cod, is_nul) e net mai precis decat peste cele 18 categorii generice. Acopera si simetria corpus/query (F1): corpusul e text NORMALIZAT, deci query-ul trebuie normalizat la fel inainte de embedding. """ from __future__ import annotations import math import os import tempfile import pytest # Backend mock determinist: vector = histograma de caractere (similaritate stabila). class MockBackend: def embed(self, texts): out = [] for t in texts: v = [0.0] * 27 for ch in t.upper(): if "A" <= ch <= "Z": v[ord(ch) - 65] += 1.0 else: v[26] += 1.0 out.append(v) return out @pytest.fixture() def env(monkeypatch): tmp = tempfile.mkdtemp() monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "us005.db")) monkeypatch.setenv("AUTOPASS_WEB_AUTH_REQUIRED", "false") monkeypatch.setenv("AUTOPASS_EMBEDDINGS_ENABLED", "true") # US-005 are nevoie de embeddings ON from app.config import get_settings get_settings.cache_clear() from app.db import init_db init_db() yield monkeypatch get_settings.cache_clear() @pytest.fixture() def conn(env): from app.db import get_connection c = get_connection() yield c c.close() def _inject_mock_engine(): import app.embeddings as emb from app.embeddings import EmbeddingEngine emb._engine = EmbeddingEngine(backend=MockBackend()) return emb def _seed_silver(conn, rows): """rows = [(denumire_normalizata, cod, is_nul)].""" conn.executemany( "INSERT OR IGNORE INTO mapping_suggestions " "(denumire_normalizata, cod_prestatie, is_nul, source, confidence) VALUES (?, ?, ?, 'llm_seed', 0.7)", rows, ) conn.commit() def test_corpus_din_mapping_suggestions(conn): emb = _inject_mock_engine() _seed_silver(conn, [ ("SCHIMB ULEI MOTOR", "OE-3", 0), ("INLOCUIT PLACUTE FRANA", "OE-1", 0), ("13 X ITP", None, 1), ]) from app.mapping import ensure_embeddings_corpus ensure_embeddings_corpus(conn) assert emb.has_corpus() # Corpusul indexat = denumirile din mapping_suggestions, NU din nomenclator_rar. texte = {it["denumire"] for it in emb._engine._corpus_items} assert texte == {"SCHIMB ULEI MOTOR", "INLOCUIT PLACUTE FRANA", "13 X ITP"} def test_suggest_nearest_intoarce_is_nul(conn): emb = _inject_mock_engine() _seed_silver(conn, [ ("SCHIMB ULEI MOTOR", "OE-3", 0), ("13 X ITP", None, 1), ]) from app.mapping import ensure_embeddings_corpus ensure_embeddings_corpus(conn) res = emb.suggest_nearest("13 X ITP", top_k=1) assert res and res[0]["is_nul"] is True # vecin NUL -> semnal de supresie res2 = emb.suggest_nearest("SCHIMB ULEI MOTOR", top_k=1) assert res2 and res2[0]["is_nul"] is False assert res2[0]["cod"] == "OE-3" def test_semnatura_corpus_pe_seed(conn): emb = _inject_mock_engine() _seed_silver(conn, [("SCHIMB ULEI MOTOR", "OE-3", 0)]) from app.mapping import ensure_embeddings_corpus ensure_embeddings_corpus(conn) sig1 = emb.corpus_signature() assert sig1 is not None # Re-apel fara schimbare -> aceeasi semnatura (nu re-indexeaza). ensure_embeddings_corpus(conn) assert emb.corpus_signature() == sig1 # Adaugare rand -> semnatura se schimba. _seed_silver(conn, [("INLOCUIT BATERIE", "OE-1", 0)]) ensure_embeddings_corpus(conn) assert emb.corpus_signature() != sig1 def test_query_normalizat_ca_si_corpusul(conn, monkeypatch): """F1 (HIGH): enrich_suggestions interogheaza suggest_nearest cu textul NORMALIZAT.""" import app.embeddings as emb captura = {} monkeypatch.setattr(emb, "has_corpus", lambda: True) def fake_suggest(text, top_k=1): captura["text"] = text return [{"cod": "OE-3", "is_nul": False, "similaritate": 0.99}] monkeypatch.setattr(emb, "suggest_nearest", fake_suggest) from app.mapping import enrich_suggestions enrich_suggestions(conn, "Schimb Uleiul Motor") # Corpusul e denumire_normalizata -> query-ul trebuie normalizat la fel. from app.mapping import normalize_for_match assert captura["text"] == normalize_for_match("Schimb Uleiul Motor") assert captura["text"] == "SCHIMB ULEIUL MOTOR" def test_degradare_gratioasa_pastrata(conn): """Backend care arunca -> ensure + enrich NU arunca exceptie.""" import app.embeddings as emb from app.embeddings import EmbeddingEngine class BrokenBackend: def embed(self, texts): raise RuntimeError("model indisponibil") emb._engine = EmbeddingEngine(backend=BrokenBackend()) _seed_silver(conn, [("SCHIMB ULEI MOTOR", "OE-3", 0)]) from app.mapping import ensure_embeddings_corpus, enrich_suggestions ensure_embeddings_corpus(conn) # nu arunca out = enrich_suggestions(conn, "SCHIMB ULEI") # nu arunca assert "sugestie_principala" in out