""" Teste pentru app/embeddings.py -- modul embedding in-proces (L14-S4). Structura: (a) backend MOCK (vectori deterministi) -- index + suggest_nearest (b) degradare gratioasa: backend None/broken -> is_available()=False, suggest_nearest()=[] fara exceptie (c) test real fastembed, skip daca nu e instalat (marker slow) """ import math import pytest from app import embeddings as emb from app.embeddings import EmbeddingEngine # --------------------------------------------------------------------------- # # Helpers # # --------------------------------------------------------------------------- # def _vec(text: str, dim: int = 8) -> list: """Vector determinist bazat pe hash-ul textului (mock pur, fara retea).""" h = abs(hash(text)) components = [(h >> (i * 5)) & 0x1F for i in range(dim)] norm = math.sqrt(sum(c * c for c in components)) or 1.0 return [c / norm for c in components] class MockBackend: """Backend embedding determinist pentru teste.""" def embed(self, texts: list) -> list: return [_vec(t) for t in texts] # --------------------------------------------------------------------------- # # (a) Mock backend -- index + suggest_nearest # # --------------------------------------------------------------------------- # def test_index_and_suggest_nearest_mock(): """Cel mai apropiat vecin al unui text identic == el insusi.""" corpus = [ {"denumire": "SCHIMB ULEI", "cod": "OE-3"}, {"denumire": "REPARATIE MOTOR", "cod": "OE-1"}, {"denumire": "VERIFICARE DIRECTIE", "cod": "OE-4"}, ] engine = EmbeddingEngine(backend=MockBackend()) engine.index_corpus(corpus) results = engine.suggest_nearest("SCHIMB ULEI", top_k=1) assert results, "Trebuie sa returneze cel putin un rezultat" assert results[0]["cod"] == "OE-3" assert 0.0 <= results[0]["similaritate"] <= 1.0 + 1e-9 def test_suggest_nearest_top_k_respects_limit(): """suggest_nearest(top_k=2) nu returneaza mai mult de 2 rezultate.""" corpus = [ {"denumire": "SCHIMB ULEI MOTOR", "cod": "OE-3"}, {"denumire": "REVIZIE COMPLETA", "cod": "OE-3"}, {"denumire": "REPARATIE MOTOR", "cod": "OE-1"}, {"denumire": "INLOCUIT FRANA", "cod": "OE-2"}, ] engine = EmbeddingEngine(backend=MockBackend()) engine.index_corpus(corpus) results = engine.suggest_nearest("ULEI MOTOR", top_k=2) assert len(results) <= 2 def test_suggest_nearest_sorted_descending(): """Rezultatele sunt sortate descrescator dupa similaritate.""" corpus = [ {"denumire": "SCHIMB ULEI", "cod": "OE-3"}, {"denumire": "REPARATIE MOTOR", "cod": "OE-1"}, {"denumire": "VERIFICARE FRANURI", "cod": "OE-2"}, ] engine = EmbeddingEngine(backend=MockBackend()) engine.index_corpus(corpus) results = engine.suggest_nearest("SCHIMB ULEI", top_k=3) scores = [r["similaritate"] for r in results] assert scores == sorted(scores, reverse=True) def test_suggest_nearest_returns_dict_with_required_keys(): """Fiecare rezultat contine 'cod' si 'similaritate'.""" corpus = [{"denumire": "SCHIMB ULEI", "cod": "OE-3"}] engine = EmbeddingEngine(backend=MockBackend()) engine.index_corpus(corpus) results = engine.suggest_nearest("SCHIMB ULEI", top_k=1) assert results assert "cod" in results[0] assert "similaritate" in results[0] def test_index_empty_corpus(): """suggest_nearest pe corpus gol returneaza [].""" engine = EmbeddingEngine(backend=MockBackend()) engine.index_corpus([]) assert engine.suggest_nearest("CEVA", top_k=3) == [] def test_suggest_nearest_before_index(): """suggest_nearest fara index_corpus returneaza [].""" engine = EmbeddingEngine(backend=MockBackend()) assert engine.suggest_nearest("CEVA", top_k=3) == [] def test_engine_is_available_with_backend(): """is_available() = True cand backend-ul e furnizat.""" engine = EmbeddingEngine(backend=MockBackend()) assert engine.is_available() is True # --------------------------------------------------------------------------- # # (b) Degradare gratioasa -- backend None / arunca # # --------------------------------------------------------------------------- # def test_is_available_false_when_backend_none(): """is_available() = False cand backend = None.""" engine = EmbeddingEngine(backend=None) assert engine.is_available() is False def test_suggest_nearest_returns_empty_when_backend_none(): """suggest_nearest = [] fara exceptie cand backend = None.""" engine = EmbeddingEngine(backend=None) result = engine.suggest_nearest("CEVA", top_k=3) assert result == [] def test_index_corpus_no_exception_when_backend_none(): """index_corpus nu arunca exceptie cand backend = None.""" engine = EmbeddingEngine(backend=None) engine.index_corpus([{"denumire": "CEVA", "cod": "OE-1"}]) # nu arunca def test_suggest_nearest_no_exception_on_backend_error(): """suggest_nearest prinde exceptia din backend si returneaza [].""" class BrokenBackend: def embed(self, texts): raise RuntimeError("backend broke") corpus = [{"denumire": "SCHIMB ULEI", "cod": "OE-3"}] engine = EmbeddingEngine(backend=BrokenBackend()) engine.index_corpus(corpus) # index poate esua silentios # suggest_nearest nu trebuie sa arunce exceptie result = engine.suggest_nearest("SCHIMB ULEI", top_k=1) assert result == [] def test_index_corpus_no_exception_on_backend_error(): """index_corpus nu arunca exceptie cand backend-ul arunca la embed.""" class BrokenBackend: def embed(self, texts): raise ValueError("embed error") engine = EmbeddingEngine(backend=BrokenBackend()) engine.index_corpus([{"denumire": "CEVA", "cod": "OE-1"}]) # corpus ramane gol, suggest_nearest returneaza [] assert engine.suggest_nearest("CEVA") == [] # --------------------------------------------------------------------------- # # API la nivel de modul (singleton global) # # --------------------------------------------------------------------------- # def test_module_level_is_available_no_exception(): """Apelul global is_available() nu arunca exceptie.""" result = emb.is_available() assert isinstance(result, bool) def test_module_level_suggest_nearest_no_exception(): """Apelul global suggest_nearest() nu arunca exceptie.""" result = emb.suggest_nearest("SCHIMB ULEI MOTOR", top_k=3) assert isinstance(result, list) def test_module_level_index_corpus_no_exception(): """Apelul global index_corpus() nu arunca exceptie.""" corpus = [{"denumire": "REPARATIE", "cod": "OE-1"}] emb.index_corpus(corpus) # nu trebuie sa arunce # --------------------------------------------------------------------------- # # (c) Test real fastembed -- skip daca modelul nu e descarcat # # --------------------------------------------------------------------------- # try: import fastembed as _fe _FASTEMBED_AVAILABLE = True except ImportError: _FASTEMBED_AVAILABLE = False @pytest.mark.skipif(not _FASTEMBED_AVAILABLE, reason="fastembed nu e instalat") def test_fastembed_backend_is_available_type(): """is_available() returneaza bool (indiferent daca modelul e descarcat sau nu).""" result = emb.is_available() assert isinstance(result, bool) @pytest.mark.slow @pytest.mark.skipif(not _FASTEMBED_AVAILABLE, reason="fastembed nu e instalat") def test_fastembed_real_embedding_similarity(): """Test real end-to-end: denumiri similare au similaritate mai mare decat cele diferite. Necesita download model la prima rulare (~220MB). Skip cu: pytest -m 'not slow'. """ from app.embeddings import EmbeddingEngine, FastEmbedBackend backend = FastEmbedBackend() engine = EmbeddingEngine(backend=backend) corpus = [ {"denumire": "schimb ulei motor", "cod": "OE-3"}, {"denumire": "reparatie motor cutie viteze", "cod": "OE-1"}, {"denumire": "verificare directie volan", "cod": "OE-4"}, ] engine.index_corpus(corpus) results = engine.suggest_nearest("schimb ulei", top_k=3) assert results, "Trebuie sa returneze cel putin un rezultat" # 'schimb ulei' trebuie sa fie mai aproape de 'schimb ulei motor' (OE-3) assert results[0]["cod"] == "OE-3", ( f"Asteptat OE-3 ca primul rezultat, primit: {results}" )