5.15 (propagare design + dashboard editare) si 5.14 (mapare LLM distilata) inchise dupa /code-review high. 8 buguri reparate TDD: - HIGH modal nu se deschidea pe randul slim (base.html: trimitere-slim) - HIGH /repune trunchia prestatii (declaratie incompleta la RAR) -> iterare peste existing, codes pozitional - HIGH embeddings incarca model ~230MB degeaba pe corpus gol -> poarta has_corpus() - HIGH picker chips gol pe re-render eroare -> conn/account_id pe toate ramurile - MED obs re-derivat dupa stergere explicita -> _merge_override pastreaza obs='' - MED mapare salvata fara denumire poluă GOLD -> _record_gold_validation guard - MED typo nome_prestatie -> nume_prestatie in select /repune - MED bucketare timp +3h gresita iarna -> SQLite localtime + TZ=Europe/Bucharest Embeddings WIRE-uit functional (PRD #15, decizie user): ensure_embeddings_corpus construieste corpus din nomenclator, gated pe AUTOPASS_EMBEDDINGS_ENABLED (default off). Marime model corectata ~50MB->~230MB (estimare PRD gresita). Cleanup: hoist load_* din bucla bulk-fix; import re la top. Regresie: 1256 passed, 1 deselected (live), 0 failed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
234 lines
8.4 KiB
Python
234 lines
8.4 KiB
Python
"""
|
|
Teste pentru app/embeddings.py -- modul embedding in-proces (L14-S4).
|
|
|
|
Structura:
|
|
(a) backend MOCK (vectori deterministi) -- index + suggest_nearest
|
|
(b) degradare gratioasa: backend None/broken -> is_available()=False,
|
|
suggest_nearest()=[] fara exceptie
|
|
(c) test real fastembed, skip daca nu e instalat (marker slow)
|
|
"""
|
|
import math
|
|
import pytest
|
|
|
|
from app import embeddings as emb
|
|
from app.embeddings import EmbeddingEngine
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Helpers #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
def _vec(text: str, dim: int = 8) -> list:
|
|
"""Vector determinist bazat pe hash-ul textului (mock pur, fara retea)."""
|
|
h = abs(hash(text))
|
|
components = [(h >> (i * 5)) & 0x1F for i in range(dim)]
|
|
norm = math.sqrt(sum(c * c for c in components)) or 1.0
|
|
return [c / norm for c in components]
|
|
|
|
|
|
class MockBackend:
|
|
"""Backend embedding determinist pentru teste."""
|
|
|
|
def embed(self, texts: list) -> list:
|
|
return [_vec(t) for t in texts]
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# (a) Mock backend -- index + suggest_nearest #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
def test_index_and_suggest_nearest_mock():
|
|
"""Cel mai apropiat vecin al unui text identic == el insusi."""
|
|
corpus = [
|
|
{"denumire": "SCHIMB ULEI", "cod": "OE-3"},
|
|
{"denumire": "REPARATIE MOTOR", "cod": "OE-1"},
|
|
{"denumire": "VERIFICARE DIRECTIE", "cod": "OE-4"},
|
|
]
|
|
engine = EmbeddingEngine(backend=MockBackend())
|
|
engine.index_corpus(corpus)
|
|
|
|
results = engine.suggest_nearest("SCHIMB ULEI", top_k=1)
|
|
assert results, "Trebuie sa returneze cel putin un rezultat"
|
|
assert results[0]["cod"] == "OE-3"
|
|
assert 0.0 <= results[0]["similaritate"] <= 1.0 + 1e-9
|
|
|
|
|
|
def test_suggest_nearest_top_k_respects_limit():
|
|
"""suggest_nearest(top_k=2) nu returneaza mai mult de 2 rezultate."""
|
|
corpus = [
|
|
{"denumire": "SCHIMB ULEI MOTOR", "cod": "OE-3"},
|
|
{"denumire": "REVIZIE COMPLETA", "cod": "OE-3"},
|
|
{"denumire": "REPARATIE MOTOR", "cod": "OE-1"},
|
|
{"denumire": "INLOCUIT FRANA", "cod": "OE-2"},
|
|
]
|
|
engine = EmbeddingEngine(backend=MockBackend())
|
|
engine.index_corpus(corpus)
|
|
|
|
results = engine.suggest_nearest("ULEI MOTOR", top_k=2)
|
|
assert len(results) <= 2
|
|
|
|
|
|
def test_suggest_nearest_sorted_descending():
|
|
"""Rezultatele sunt sortate descrescator dupa similaritate."""
|
|
corpus = [
|
|
{"denumire": "SCHIMB ULEI", "cod": "OE-3"},
|
|
{"denumire": "REPARATIE MOTOR", "cod": "OE-1"},
|
|
{"denumire": "VERIFICARE FRANURI", "cod": "OE-2"},
|
|
]
|
|
engine = EmbeddingEngine(backend=MockBackend())
|
|
engine.index_corpus(corpus)
|
|
|
|
results = engine.suggest_nearest("SCHIMB ULEI", top_k=3)
|
|
scores = [r["similaritate"] for r in results]
|
|
assert scores == sorted(scores, reverse=True)
|
|
|
|
|
|
def test_suggest_nearest_returns_dict_with_required_keys():
|
|
"""Fiecare rezultat contine 'cod' si 'similaritate'."""
|
|
corpus = [{"denumire": "SCHIMB ULEI", "cod": "OE-3"}]
|
|
engine = EmbeddingEngine(backend=MockBackend())
|
|
engine.index_corpus(corpus)
|
|
|
|
results = engine.suggest_nearest("SCHIMB ULEI", top_k=1)
|
|
assert results
|
|
assert "cod" in results[0]
|
|
assert "similaritate" in results[0]
|
|
|
|
|
|
def test_index_empty_corpus():
|
|
"""suggest_nearest pe corpus gol returneaza []."""
|
|
engine = EmbeddingEngine(backend=MockBackend())
|
|
engine.index_corpus([])
|
|
assert engine.suggest_nearest("CEVA", top_k=3) == []
|
|
|
|
|
|
def test_suggest_nearest_before_index():
|
|
"""suggest_nearest fara index_corpus returneaza []."""
|
|
engine = EmbeddingEngine(backend=MockBackend())
|
|
assert engine.suggest_nearest("CEVA", top_k=3) == []
|
|
|
|
|
|
def test_engine_is_available_with_backend():
|
|
"""is_available() = True cand backend-ul e furnizat."""
|
|
engine = EmbeddingEngine(backend=MockBackend())
|
|
assert engine.is_available() is True
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# (b) Degradare gratioasa -- backend None / arunca #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
def test_is_available_false_when_backend_none():
|
|
"""is_available() = False cand backend = None."""
|
|
engine = EmbeddingEngine(backend=None)
|
|
assert engine.is_available() is False
|
|
|
|
|
|
def test_suggest_nearest_returns_empty_when_backend_none():
|
|
"""suggest_nearest = [] fara exceptie cand backend = None."""
|
|
engine = EmbeddingEngine(backend=None)
|
|
result = engine.suggest_nearest("CEVA", top_k=3)
|
|
assert result == []
|
|
|
|
|
|
def test_index_corpus_no_exception_when_backend_none():
|
|
"""index_corpus nu arunca exceptie cand backend = None."""
|
|
engine = EmbeddingEngine(backend=None)
|
|
engine.index_corpus([{"denumire": "CEVA", "cod": "OE-1"}]) # nu arunca
|
|
|
|
|
|
def test_suggest_nearest_no_exception_on_backend_error():
|
|
"""suggest_nearest prinde exceptia din backend si returneaza []."""
|
|
|
|
class BrokenBackend:
|
|
def embed(self, texts):
|
|
raise RuntimeError("backend broke")
|
|
|
|
corpus = [{"denumire": "SCHIMB ULEI", "cod": "OE-3"}]
|
|
engine = EmbeddingEngine(backend=BrokenBackend())
|
|
engine.index_corpus(corpus) # index poate esua silentios
|
|
|
|
# suggest_nearest nu trebuie sa arunce exceptie
|
|
result = engine.suggest_nearest("SCHIMB ULEI", top_k=1)
|
|
assert result == []
|
|
|
|
|
|
def test_index_corpus_no_exception_on_backend_error():
|
|
"""index_corpus nu arunca exceptie cand backend-ul arunca la embed."""
|
|
|
|
class BrokenBackend:
|
|
def embed(self, texts):
|
|
raise ValueError("embed error")
|
|
|
|
engine = EmbeddingEngine(backend=BrokenBackend())
|
|
engine.index_corpus([{"denumire": "CEVA", "cod": "OE-1"}])
|
|
# corpus ramane gol, suggest_nearest returneaza []
|
|
assert engine.suggest_nearest("CEVA") == []
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# API la nivel de modul (singleton global) #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
def test_module_level_is_available_no_exception():
|
|
"""Apelul global is_available() nu arunca exceptie."""
|
|
result = emb.is_available()
|
|
assert isinstance(result, bool)
|
|
|
|
|
|
def test_module_level_suggest_nearest_no_exception():
|
|
"""Apelul global suggest_nearest() nu arunca exceptie."""
|
|
result = emb.suggest_nearest("SCHIMB ULEI MOTOR", top_k=3)
|
|
assert isinstance(result, list)
|
|
|
|
|
|
def test_module_level_index_corpus_no_exception():
|
|
"""Apelul global index_corpus() nu arunca exceptie."""
|
|
corpus = [{"denumire": "REPARATIE", "cod": "OE-1"}]
|
|
emb.index_corpus(corpus) # nu trebuie sa arunce
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# (c) Test real fastembed -- skip daca modelul nu e descarcat #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
try:
|
|
import fastembed as _fe
|
|
_FASTEMBED_AVAILABLE = True
|
|
except ImportError:
|
|
_FASTEMBED_AVAILABLE = False
|
|
|
|
|
|
@pytest.mark.skipif(not _FASTEMBED_AVAILABLE, reason="fastembed nu e instalat")
|
|
def test_fastembed_backend_is_available_type():
|
|
"""is_available() returneaza bool (indiferent daca modelul e descarcat sau nu)."""
|
|
result = emb.is_available()
|
|
assert isinstance(result, bool)
|
|
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.skipif(not _FASTEMBED_AVAILABLE, reason="fastembed nu e instalat")
|
|
def test_fastembed_real_embedding_similarity():
|
|
"""Test real end-to-end: denumiri similare au similaritate mai mare decat cele diferite.
|
|
|
|
Necesita download model la prima rulare (~220MB). Skip cu: pytest -m 'not slow'.
|
|
"""
|
|
from app.embeddings import EmbeddingEngine, FastEmbedBackend
|
|
|
|
backend = FastEmbedBackend()
|
|
engine = EmbeddingEngine(backend=backend)
|
|
|
|
corpus = [
|
|
{"denumire": "schimb ulei motor", "cod": "OE-3"},
|
|
{"denumire": "reparatie motor cutie viteze", "cod": "OE-1"},
|
|
{"denumire": "verificare directie volan", "cod": "OE-4"},
|
|
]
|
|
engine.index_corpus(corpus)
|
|
|
|
results = engine.suggest_nearest("schimb ulei", top_k=3)
|
|
assert results, "Trebuie sa returneze cel putin un rezultat"
|
|
# 'schimb ulei' trebuie sa fie mai aproape de 'schimb ulei motor' (OE-3)
|
|
assert results[0]["cod"] == "OE-3", (
|
|
f"Asteptat OE-3 ca primul rezultat, primit: {results}"
|
|
)
|