Files
rar-autopass/tests/test_embeddings.py
Claude Agent 3fc53534e2 feat(5.15+5.14): CLOSE — fix-uri code-review + embeddings functional
5.15 (propagare design + dashboard editare) si 5.14 (mapare LLM distilata)
inchise dupa /code-review high. 8 buguri reparate TDD:

- HIGH modal nu se deschidea pe randul slim (base.html: trimitere-slim)
- HIGH /repune trunchia prestatii (declaratie incompleta la RAR) -> iterare
  peste existing, codes pozitional
- HIGH embeddings incarca model ~230MB degeaba pe corpus gol -> poarta has_corpus()
- HIGH picker chips gol pe re-render eroare -> conn/account_id pe toate ramurile
- MED obs re-derivat dupa stergere explicita -> _merge_override pastreaza obs=''
- MED mapare salvata fara denumire poluă GOLD -> _record_gold_validation guard
- MED typo nome_prestatie -> nume_prestatie in select /repune
- MED bucketare timp +3h gresita iarna -> SQLite localtime + TZ=Europe/Bucharest

Embeddings WIRE-uit functional (PRD #15, decizie user): ensure_embeddings_corpus
construieste corpus din nomenclator, gated pe AUTOPASS_EMBEDDINGS_ENABLED (default
off). Marime model corectata ~50MB->~230MB (estimare PRD gresita).

Cleanup: hoist load_* din bucla bulk-fix; import re la top.
Regresie: 1256 passed, 1 deselected (live), 0 failed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 20:48:34 +00:00

234 lines
8.4 KiB
Python

"""
Teste pentru app/embeddings.py -- modul embedding in-proces (L14-S4).
Structura:
(a) backend MOCK (vectori deterministi) -- index + suggest_nearest
(b) degradare gratioasa: backend None/broken -> is_available()=False,
suggest_nearest()=[] fara exceptie
(c) test real fastembed, skip daca nu e instalat (marker slow)
"""
import math
import pytest
from app import embeddings as emb
from app.embeddings import EmbeddingEngine
# --------------------------------------------------------------------------- #
# Helpers #
# --------------------------------------------------------------------------- #
def _vec(text: str, dim: int = 8) -> list:
"""Vector determinist bazat pe hash-ul textului (mock pur, fara retea)."""
h = abs(hash(text))
components = [(h >> (i * 5)) & 0x1F for i in range(dim)]
norm = math.sqrt(sum(c * c for c in components)) or 1.0
return [c / norm for c in components]
class MockBackend:
"""Backend embedding determinist pentru teste."""
def embed(self, texts: list) -> list:
return [_vec(t) for t in texts]
# --------------------------------------------------------------------------- #
# (a) Mock backend -- index + suggest_nearest #
# --------------------------------------------------------------------------- #
def test_index_and_suggest_nearest_mock():
"""Cel mai apropiat vecin al unui text identic == el insusi."""
corpus = [
{"denumire": "SCHIMB ULEI", "cod": "OE-3"},
{"denumire": "REPARATIE MOTOR", "cod": "OE-1"},
{"denumire": "VERIFICARE DIRECTIE", "cod": "OE-4"},
]
engine = EmbeddingEngine(backend=MockBackend())
engine.index_corpus(corpus)
results = engine.suggest_nearest("SCHIMB ULEI", top_k=1)
assert results, "Trebuie sa returneze cel putin un rezultat"
assert results[0]["cod"] == "OE-3"
assert 0.0 <= results[0]["similaritate"] <= 1.0 + 1e-9
def test_suggest_nearest_top_k_respects_limit():
"""suggest_nearest(top_k=2) nu returneaza mai mult de 2 rezultate."""
corpus = [
{"denumire": "SCHIMB ULEI MOTOR", "cod": "OE-3"},
{"denumire": "REVIZIE COMPLETA", "cod": "OE-3"},
{"denumire": "REPARATIE MOTOR", "cod": "OE-1"},
{"denumire": "INLOCUIT FRANA", "cod": "OE-2"},
]
engine = EmbeddingEngine(backend=MockBackend())
engine.index_corpus(corpus)
results = engine.suggest_nearest("ULEI MOTOR", top_k=2)
assert len(results) <= 2
def test_suggest_nearest_sorted_descending():
"""Rezultatele sunt sortate descrescator dupa similaritate."""
corpus = [
{"denumire": "SCHIMB ULEI", "cod": "OE-3"},
{"denumire": "REPARATIE MOTOR", "cod": "OE-1"},
{"denumire": "VERIFICARE FRANURI", "cod": "OE-2"},
]
engine = EmbeddingEngine(backend=MockBackend())
engine.index_corpus(corpus)
results = engine.suggest_nearest("SCHIMB ULEI", top_k=3)
scores = [r["similaritate"] for r in results]
assert scores == sorted(scores, reverse=True)
def test_suggest_nearest_returns_dict_with_required_keys():
"""Fiecare rezultat contine 'cod' si 'similaritate'."""
corpus = [{"denumire": "SCHIMB ULEI", "cod": "OE-3"}]
engine = EmbeddingEngine(backend=MockBackend())
engine.index_corpus(corpus)
results = engine.suggest_nearest("SCHIMB ULEI", top_k=1)
assert results
assert "cod" in results[0]
assert "similaritate" in results[0]
def test_index_empty_corpus():
"""suggest_nearest pe corpus gol returneaza []."""
engine = EmbeddingEngine(backend=MockBackend())
engine.index_corpus([])
assert engine.suggest_nearest("CEVA", top_k=3) == []
def test_suggest_nearest_before_index():
"""suggest_nearest fara index_corpus returneaza []."""
engine = EmbeddingEngine(backend=MockBackend())
assert engine.suggest_nearest("CEVA", top_k=3) == []
def test_engine_is_available_with_backend():
"""is_available() = True cand backend-ul e furnizat."""
engine = EmbeddingEngine(backend=MockBackend())
assert engine.is_available() is True
# --------------------------------------------------------------------------- #
# (b) Degradare gratioasa -- backend None / arunca #
# --------------------------------------------------------------------------- #
def test_is_available_false_when_backend_none():
"""is_available() = False cand backend = None."""
engine = EmbeddingEngine(backend=None)
assert engine.is_available() is False
def test_suggest_nearest_returns_empty_when_backend_none():
"""suggest_nearest = [] fara exceptie cand backend = None."""
engine = EmbeddingEngine(backend=None)
result = engine.suggest_nearest("CEVA", top_k=3)
assert result == []
def test_index_corpus_no_exception_when_backend_none():
"""index_corpus nu arunca exceptie cand backend = None."""
engine = EmbeddingEngine(backend=None)
engine.index_corpus([{"denumire": "CEVA", "cod": "OE-1"}]) # nu arunca
def test_suggest_nearest_no_exception_on_backend_error():
"""suggest_nearest prinde exceptia din backend si returneaza []."""
class BrokenBackend:
def embed(self, texts):
raise RuntimeError("backend broke")
corpus = [{"denumire": "SCHIMB ULEI", "cod": "OE-3"}]
engine = EmbeddingEngine(backend=BrokenBackend())
engine.index_corpus(corpus) # index poate esua silentios
# suggest_nearest nu trebuie sa arunce exceptie
result = engine.suggest_nearest("SCHIMB ULEI", top_k=1)
assert result == []
def test_index_corpus_no_exception_on_backend_error():
"""index_corpus nu arunca exceptie cand backend-ul arunca la embed."""
class BrokenBackend:
def embed(self, texts):
raise ValueError("embed error")
engine = EmbeddingEngine(backend=BrokenBackend())
engine.index_corpus([{"denumire": "CEVA", "cod": "OE-1"}])
# corpus ramane gol, suggest_nearest returneaza []
assert engine.suggest_nearest("CEVA") == []
# --------------------------------------------------------------------------- #
# API la nivel de modul (singleton global) #
# --------------------------------------------------------------------------- #
def test_module_level_is_available_no_exception():
"""Apelul global is_available() nu arunca exceptie."""
result = emb.is_available()
assert isinstance(result, bool)
def test_module_level_suggest_nearest_no_exception():
"""Apelul global suggest_nearest() nu arunca exceptie."""
result = emb.suggest_nearest("SCHIMB ULEI MOTOR", top_k=3)
assert isinstance(result, list)
def test_module_level_index_corpus_no_exception():
"""Apelul global index_corpus() nu arunca exceptie."""
corpus = [{"denumire": "REPARATIE", "cod": "OE-1"}]
emb.index_corpus(corpus) # nu trebuie sa arunce
# --------------------------------------------------------------------------- #
# (c) Test real fastembed -- skip daca modelul nu e descarcat #
# --------------------------------------------------------------------------- #
try:
import fastembed as _fe
_FASTEMBED_AVAILABLE = True
except ImportError:
_FASTEMBED_AVAILABLE = False
@pytest.mark.skipif(not _FASTEMBED_AVAILABLE, reason="fastembed nu e instalat")
def test_fastembed_backend_is_available_type():
"""is_available() returneaza bool (indiferent daca modelul e descarcat sau nu)."""
result = emb.is_available()
assert isinstance(result, bool)
@pytest.mark.slow
@pytest.mark.skipif(not _FASTEMBED_AVAILABLE, reason="fastembed nu e instalat")
def test_fastembed_real_embedding_similarity():
"""Test real end-to-end: denumiri similare au similaritate mai mare decat cele diferite.
Necesita download model la prima rulare (~220MB). Skip cu: pytest -m 'not slow'.
"""
from app.embeddings import EmbeddingEngine, FastEmbedBackend
backend = FastEmbedBackend()
engine = EmbeddingEngine(backend=backend)
corpus = [
{"denumire": "schimb ulei motor", "cod": "OE-3"},
{"denumire": "reparatie motor cutie viteze", "cod": "OE-1"},
{"denumire": "verificare directie volan", "cod": "OE-4"},
]
engine.index_corpus(corpus)
results = engine.suggest_nearest("schimb ulei", top_k=3)
assert results, "Trebuie sa returneze cel putin un rezultat"
# 'schimb ulei' trebuie sa fie mai aproape de 'schimb ulei motor' (OE-3)
assert results[0]["cod"] == "OE-3", (
f"Asteptat OE-3 ca primul rezultat, primit: {results}"
)