Files
rar-autopass/tests/test_enrich_corpus_etichetat.py
Claude Agent 756f77730f feat(5.18): corpus k-NN exemple etichetate + seed real Haiku (17181 op)
Seed app/data/operatii-etichetate.json regenerat cu subagenti Haiku pe TOATE
cele 17181 operatii distincte (ordine frecventa, 100%), inlocuind seed-ul Groq
(3758). Validare Haiku vs Groq pe 157 op etichetate: la dezacorduri Haiku corect
~22/30, Groq ~0. Haiku prinde gunoiul ratat de Groq (ITP, chirie anvelope, nume
piese fara actiune): NUL 2200 (12.8%) vs ~7.6% Groq; adaptare electronica OE-7
(nu OE-5), placute frana uzura OE-1 (nu OE-F avarie).

US-001..006: prefiltru NUL determinist, etichetator offline, generator seed,
seeder mapping_suggestions (in init_db, gated seed_operatii_enabled), embeddings
indexeaza corpus etichetat, enrich NUL+kNN. Distributie seed: OE-1 80.1%, NUL
12.8%, OE-2 3.5%, restul rar (OE-4/3/7/8/R/I/5, AITLV, R-ODO).

config: seed_operatii_enabled=True + embeddings_enabled=True implicit (SILVER
populat + sugestii semantice; ambele suggestion-only, dezactivabile prin env).

Suita: 1387 passed, 1 deselected (live).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 06:59:15 +00:00

134 lines
5.0 KiB
Python

"""US-006 (PRD 5.18) — enrich_suggestions = pre-filtru NUL + k-NN pe corpus etichetat.
Ordinea de precedenta: pre-filtru NUL -> (daca NUL: fara cod) altfel GOLD partajat >
exact (SILVER) > k-NN embeddings. k-NN sub prag -> abtinere. Vecin k-NN NUL -> supresie.
Invariant #13: nimic din asta nu intra in resolve_prestatii/load_mapping.
"""
from __future__ import annotations
import os
import tempfile
import pytest
@pytest.fixture()
def env(monkeypatch):
tmp = tempfile.mkdtemp()
monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "us006.db"))
monkeypatch.setenv("AUTOPASS_WEB_AUTH_REQUIRED", "false")
monkeypatch.setenv("AUTOPASS_EMBEDDINGS_ENABLED", "true")
from app.config import get_settings
get_settings.cache_clear()
from app.db import init_db
init_db()
yield monkeypatch
get_settings.cache_clear()
@pytest.fixture()
def conn(env):
from app.db import get_connection
c = get_connection()
yield c
c.close()
def _silver(conn, denumire_norm, cod, is_nul=0):
conn.execute(
"INSERT OR IGNORE INTO mapping_suggestions "
"(denumire_normalizata, cod_prestatie, is_nul, source, confidence) VALUES (?, ?, ?, 'llm_seed', 0.7)",
(denumire_norm, cod, is_nul),
)
conn.commit()
def _mock_embedding(monkeypatch, cod, sim, is_nul=False):
import app.embeddings as emb
monkeypatch.setattr(emb, "has_corpus", lambda: True)
monkeypatch.setattr(emb, "suggest_nearest",
lambda text, top_k=1: [{"cod": cod, "is_nul": is_nul, "similaritate": sim}])
def test_prefiltru_nul_supreseaza_inainte_de_knn(conn, monkeypatch):
# Embedding-ul AR sugera un cod, dar pre-filtrul NUL trebuie sa scurtcircuiteze.
chemat = {"da": False}
import app.embeddings as emb
monkeypatch.setattr(emb, "has_corpus", lambda: True)
def spion(text, top_k=1):
chemat["da"] = True
return [{"cod": "OE-1", "is_nul": False, "similaritate": 0.99}]
monkeypatch.setattr(emb, "suggest_nearest", spion)
from app.mapping import enrich_suggestions
out = enrich_suggestions(conn, "13 X ITP")
assert out["sugestie_principala"] is None # non-operatie -> fara cod
assert out["surse"]["nul"] is True
assert chemat["da"] is False # k-NN nici macar interogat
def test_precedenta_gold_exact_embedding(conn, monkeypatch):
from app.shared_store import record_human_validation
from app.mapping import enrich_suggestions, normalize_for_match
den = "OPERATIE DE TEST UNICA"
norm = normalize_for_match(den)
# Toate trei sursele dau coduri diferite.
record_human_validation(conn, den, "OE-1") # GOLD partajat
_silver(conn, norm, "OE-2") # SILVER exact
_mock_embedding(monkeypatch, "OE-3", 0.99) # embedding
conn.commit()
out = enrich_suggestions(conn, den)
assert out["sugestie_principala"] == {"cod_prestatie": "OE-1", "sursa": "gold_partajat"}
# Fara GOLD -> castiga SILVER.
conn.execute("DELETE FROM shared_mappings")
conn.commit()
out = enrich_suggestions(conn, den)
assert out["sugestie_principala"]["sursa"] == "silver"
assert out["sugestie_principala"]["cod_prestatie"] == "OE-2"
# Fara GOLD si fara SILVER -> castiga embedding.
conn.execute("DELETE FROM mapping_suggestions")
conn.commit()
out = enrich_suggestions(conn, den)
assert out["sugestie_principala"] == {"cod_prestatie": "OE-3", "sursa": "embedding"}
def test_prag_similaritate(conn, monkeypatch):
from app.mapping import enrich_suggestions, EMB_MIN_SIMILARITATE
_mock_embedding(monkeypatch, "OE-3", EMB_MIN_SIMILARITATE + 0.01)
out = enrich_suggestions(conn, "CEVA NEVAZUT")
assert out["surse"]["embedding"] == "OE-3"
def test_abtinere_sub_prag(conn, monkeypatch):
from app.mapping import enrich_suggestions, EMB_MIN_SIMILARITATE
_mock_embedding(monkeypatch, "OE-3", EMB_MIN_SIMILARITATE - 0.01)
out = enrich_suggestions(conn, "CEVA NEVAZUT")
assert out["surse"]["embedding"] is None # sub prag -> abtinere
assert out["sugestie_principala"] is None
def test_vecin_knn_nul_supreseaza(conn, monkeypatch):
from app.mapping import enrich_suggestions
_mock_embedding(monkeypatch, None, 0.99, is_nul=True) # vecin NUL peste prag
out = enrich_suggestions(conn, "CEVA CARE SEAMANA CU GUNOI")
assert out["surse"]["embedding"] is None # NUL -> nu produce cod
assert out["surse"]["nul"] is True
assert out["sugestie_principala"] is None
def test_invariant_13_resolve_neatins(conn):
"""Regresie #13: SILVER populat NU produce auto-rezolvare in resolve_prestatii."""
from app.mapping import resolve_prestatii, normalize_for_match
_silver(conn, normalize_for_match("OPERATIE X"), "OE-1")
resolved, unmapped = resolve_prestatii(
[{"cod_op_service": "OPERATIE X", "denumire": "OPERATIE X"}], mapping={}, valid_codes={"OE-1"}
)
assert resolved[0]["cod_prestatie"] is None # ramane nemapat, NU ia codul din SILVER
assert unmapped and unmapped[0]["cod_op_service"] == "OPERATIE X"