Seed app/data/operatii-etichetate.json regenerat cu subagenti Haiku pe TOATE cele 17181 operatii distincte (ordine frecventa, 100%), inlocuind seed-ul Groq (3758). Validare Haiku vs Groq pe 157 op etichetate: la dezacorduri Haiku corect ~22/30, Groq ~0. Haiku prinde gunoiul ratat de Groq (ITP, chirie anvelope, nume piese fara actiune): NUL 2200 (12.8%) vs ~7.6% Groq; adaptare electronica OE-7 (nu OE-5), placute frana uzura OE-1 (nu OE-F avarie). US-001..006: prefiltru NUL determinist, etichetator offline, generator seed, seeder mapping_suggestions (in init_db, gated seed_operatii_enabled), embeddings indexeaza corpus etichetat, enrich NUL+kNN. Distributie seed: OE-1 80.1%, NUL 12.8%, OE-2 3.5%, restul rar (OE-4/3/7/8/R/I/5, AITLV, R-ODO). config: seed_operatii_enabled=True + embeddings_enabled=True implicit (SILVER populat + sugestii semantice; ambele suggestion-only, dezactivabile prin env). Suita: 1387 passed, 1 deselected (live). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
134 lines
5.0 KiB
Python
134 lines
5.0 KiB
Python
"""US-006 (PRD 5.18) — enrich_suggestions = pre-filtru NUL + k-NN pe corpus etichetat.
|
|
|
|
Ordinea de precedenta: pre-filtru NUL -> (daca NUL: fara cod) altfel GOLD partajat >
|
|
exact (SILVER) > k-NN embeddings. k-NN sub prag -> abtinere. Vecin k-NN NUL -> supresie.
|
|
Invariant #13: nimic din asta nu intra in resolve_prestatii/load_mapping.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture()
|
|
def env(monkeypatch):
|
|
tmp = tempfile.mkdtemp()
|
|
monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "us006.db"))
|
|
monkeypatch.setenv("AUTOPASS_WEB_AUTH_REQUIRED", "false")
|
|
monkeypatch.setenv("AUTOPASS_EMBEDDINGS_ENABLED", "true")
|
|
from app.config import get_settings
|
|
get_settings.cache_clear()
|
|
from app.db import init_db
|
|
init_db()
|
|
yield monkeypatch
|
|
get_settings.cache_clear()
|
|
|
|
|
|
@pytest.fixture()
|
|
def conn(env):
|
|
from app.db import get_connection
|
|
c = get_connection()
|
|
yield c
|
|
c.close()
|
|
|
|
|
|
def _silver(conn, denumire_norm, cod, is_nul=0):
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO mapping_suggestions "
|
|
"(denumire_normalizata, cod_prestatie, is_nul, source, confidence) VALUES (?, ?, ?, 'llm_seed', 0.7)",
|
|
(denumire_norm, cod, is_nul),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def _mock_embedding(monkeypatch, cod, sim, is_nul=False):
|
|
import app.embeddings as emb
|
|
monkeypatch.setattr(emb, "has_corpus", lambda: True)
|
|
monkeypatch.setattr(emb, "suggest_nearest",
|
|
lambda text, top_k=1: [{"cod": cod, "is_nul": is_nul, "similaritate": sim}])
|
|
|
|
|
|
def test_prefiltru_nul_supreseaza_inainte_de_knn(conn, monkeypatch):
|
|
# Embedding-ul AR sugera un cod, dar pre-filtrul NUL trebuie sa scurtcircuiteze.
|
|
chemat = {"da": False}
|
|
import app.embeddings as emb
|
|
monkeypatch.setattr(emb, "has_corpus", lambda: True)
|
|
|
|
def spion(text, top_k=1):
|
|
chemat["da"] = True
|
|
return [{"cod": "OE-1", "is_nul": False, "similaritate": 0.99}]
|
|
|
|
monkeypatch.setattr(emb, "suggest_nearest", spion)
|
|
from app.mapping import enrich_suggestions
|
|
out = enrich_suggestions(conn, "13 X ITP")
|
|
assert out["sugestie_principala"] is None # non-operatie -> fara cod
|
|
assert out["surse"]["nul"] is True
|
|
assert chemat["da"] is False # k-NN nici macar interogat
|
|
|
|
|
|
def test_precedenta_gold_exact_embedding(conn, monkeypatch):
|
|
from app.shared_store import record_human_validation
|
|
from app.mapping import enrich_suggestions, normalize_for_match
|
|
den = "OPERATIE DE TEST UNICA"
|
|
norm = normalize_for_match(den)
|
|
|
|
# Toate trei sursele dau coduri diferite.
|
|
record_human_validation(conn, den, "OE-1") # GOLD partajat
|
|
_silver(conn, norm, "OE-2") # SILVER exact
|
|
_mock_embedding(monkeypatch, "OE-3", 0.99) # embedding
|
|
conn.commit()
|
|
|
|
out = enrich_suggestions(conn, den)
|
|
assert out["sugestie_principala"] == {"cod_prestatie": "OE-1", "sursa": "gold_partajat"}
|
|
|
|
# Fara GOLD -> castiga SILVER.
|
|
conn.execute("DELETE FROM shared_mappings")
|
|
conn.commit()
|
|
out = enrich_suggestions(conn, den)
|
|
assert out["sugestie_principala"]["sursa"] == "silver"
|
|
assert out["sugestie_principala"]["cod_prestatie"] == "OE-2"
|
|
|
|
# Fara GOLD si fara SILVER -> castiga embedding.
|
|
conn.execute("DELETE FROM mapping_suggestions")
|
|
conn.commit()
|
|
out = enrich_suggestions(conn, den)
|
|
assert out["sugestie_principala"] == {"cod_prestatie": "OE-3", "sursa": "embedding"}
|
|
|
|
|
|
def test_prag_similaritate(conn, monkeypatch):
|
|
from app.mapping import enrich_suggestions, EMB_MIN_SIMILARITATE
|
|
_mock_embedding(monkeypatch, "OE-3", EMB_MIN_SIMILARITATE + 0.01)
|
|
out = enrich_suggestions(conn, "CEVA NEVAZUT")
|
|
assert out["surse"]["embedding"] == "OE-3"
|
|
|
|
|
|
def test_abtinere_sub_prag(conn, monkeypatch):
|
|
from app.mapping import enrich_suggestions, EMB_MIN_SIMILARITATE
|
|
_mock_embedding(monkeypatch, "OE-3", EMB_MIN_SIMILARITATE - 0.01)
|
|
out = enrich_suggestions(conn, "CEVA NEVAZUT")
|
|
assert out["surse"]["embedding"] is None # sub prag -> abtinere
|
|
assert out["sugestie_principala"] is None
|
|
|
|
|
|
def test_vecin_knn_nul_supreseaza(conn, monkeypatch):
|
|
from app.mapping import enrich_suggestions
|
|
_mock_embedding(monkeypatch, None, 0.99, is_nul=True) # vecin NUL peste prag
|
|
out = enrich_suggestions(conn, "CEVA CARE SEAMANA CU GUNOI")
|
|
assert out["surse"]["embedding"] is None # NUL -> nu produce cod
|
|
assert out["surse"]["nul"] is True
|
|
assert out["sugestie_principala"] is None
|
|
|
|
|
|
def test_invariant_13_resolve_neatins(conn):
|
|
"""Regresie #13: SILVER populat NU produce auto-rezolvare in resolve_prestatii."""
|
|
from app.mapping import resolve_prestatii, normalize_for_match
|
|
_silver(conn, normalize_for_match("OPERATIE X"), "OE-1")
|
|
resolved, unmapped = resolve_prestatii(
|
|
[{"cod_op_service": "OPERATIE X", "denumire": "OPERATIE X"}], mapping={}, valid_codes={"OE-1"}
|
|
)
|
|
assert resolved[0]["cod_prestatie"] is None # ramane nemapat, NU ia codul din SILVER
|
|
assert unmapped and unmapped[0]["cod_op_service"] == "OPERATIE X"
|