feat(5.18): corpus k-NN exemple etichetate + seed real Haiku (17181 op)
Seed app/data/operatii-etichetate.json regenerat cu subagenti Haiku pe TOATE cele 17181 operatii distincte (ordine frecventa, 100%), inlocuind seed-ul Groq (3758). Validare Haiku vs Groq pe 157 op etichetate: la dezacorduri Haiku corect ~22/30, Groq ~0. Haiku prinde gunoiul ratat de Groq (ITP, chirie anvelope, nume piese fara actiune): NUL 2200 (12.8%) vs ~7.6% Groq; adaptare electronica OE-7 (nu OE-5), placute frana uzura OE-1 (nu OE-F avarie). US-001..006: prefiltru NUL determinist, etichetator offline, generator seed, seeder mapping_suggestions (in init_db, gated seed_operatii_enabled), embeddings indexeaza corpus etichetat, enrich NUL+kNN. Distributie seed: OE-1 80.1%, NUL 12.8%, OE-2 3.5%, restul rar (OE-4/3/7/8/R/I/5, AITLV, R-ODO). config: seed_operatii_enabled=True + embeddings_enabled=True implicit (SILVER populat + sugestii semantice; ambele suggestion-only, dezactivabile prin env). Suita: 1387 passed, 1 deselected (live). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
113
tests/test_operatii_seed.py
Normal file
113
tests/test_operatii_seed.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""US-004 (PRD 5.18) — seeder corpus etichetat in mapping_suggestions (SILVER).
|
||||
|
||||
INSERT OR IGNORE din artefactul comis -> SILVER nu mai e gol in productie.
|
||||
NB (F10): confirmarile UMANE stau in shared_mappings, NU aici; deci INSERT OR IGNORE
|
||||
pastreaza codul LLM existent la re-seed (v1 = ignore, nu upsert).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def env(monkeypatch):
|
||||
tmp = tempfile.mkdtemp()
|
||||
monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "us004.db"))
|
||||
monkeypatch.setenv("AUTOPASS_WEB_AUTH_REQUIRED", "false")
|
||||
from app.config import get_settings
|
||||
get_settings.cache_clear()
|
||||
from app.db import init_db
|
||||
init_db()
|
||||
yield tmp
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def conn(env):
|
||||
from app.db import get_connection
|
||||
c = get_connection()
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
def _scrie_seed(tmp, items) -> str:
|
||||
p = os.path.join(tmp, "operatii-etichetate.json")
|
||||
with open(p, "w", encoding="utf-8") as fh:
|
||||
json.dump(items, fh, ensure_ascii=False)
|
||||
return p
|
||||
|
||||
|
||||
SEED_OE = {"denumire": "SCHIMB ULEI MOTOR", "denumire_normalizata": "SCHIMB ULEI MOTOR",
|
||||
"cod": "OE-3", "is_nul": False, "source": "llm_seed", "confidence": 0.7}
|
||||
SEED_NUL = {"denumire": "13 X ITP", "denumire_normalizata": "13 X ITP",
|
||||
"cod": None, "is_nul": True, "source": "llm_seed", "confidence": 0.7}
|
||||
|
||||
|
||||
def test_seed_populeaza_mapping_suggestions(env, conn):
|
||||
from app.operatii_seed import seed_operatii_etichetate
|
||||
path = _scrie_seed(env, [SEED_OE])
|
||||
n = seed_operatii_etichetate(conn, path)
|
||||
conn.commit()
|
||||
assert n == 1
|
||||
row = conn.execute(
|
||||
"SELECT cod_prestatie, source, confidence FROM mapping_suggestions "
|
||||
"WHERE denumire_normalizata = 'SCHIMB ULEI MOTOR'"
|
||||
).fetchone()
|
||||
assert row["cod_prestatie"] == "OE-3"
|
||||
assert row["source"] == "llm_seed"
|
||||
assert abs(row["confidence"] - 0.7) < 1e-9
|
||||
|
||||
|
||||
def test_is_nul_din_seed(env, conn):
|
||||
from app.operatii_seed import seed_operatii_etichetate
|
||||
path = _scrie_seed(env, [SEED_NUL])
|
||||
seed_operatii_etichetate(conn, path)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT cod_prestatie, is_nul FROM mapping_suggestions WHERE denumire_normalizata = '13 X ITP'"
|
||||
).fetchone()
|
||||
assert row["is_nul"] == 1
|
||||
assert row["cod_prestatie"] is None # respecta CHECK-ul (NUL -> cod NULL)
|
||||
|
||||
|
||||
def test_insert_or_ignore_nu_clobber(env, conn):
|
||||
from app.operatii_seed import seed_operatii_etichetate
|
||||
# Un rand pre-existent (ex. embedding) pe aceeasi cheie, cu alt cod.
|
||||
conn.execute(
|
||||
"INSERT INTO mapping_suggestions (denumire_normalizata, cod_prestatie, is_nul, source, confidence) "
|
||||
"VALUES ('SCHIMB ULEI MOTOR', 'OE-1', 0, 'embedding', 0.5)"
|
||||
)
|
||||
conn.commit()
|
||||
path = _scrie_seed(env, [SEED_OE])
|
||||
n = seed_operatii_etichetate(conn, path)
|
||||
conn.commit()
|
||||
assert n == 0 # INSERT OR IGNORE -> nu suprascrie
|
||||
row = conn.execute(
|
||||
"SELECT cod_prestatie, source FROM mapping_suggestions WHERE denumire_normalizata = 'SCHIMB ULEI MOTOR'"
|
||||
).fetchone()
|
||||
assert row["cod_prestatie"] == "OE-1" # randul existent ramane neatins
|
||||
assert row["source"] == "embedding"
|
||||
|
||||
|
||||
def test_idempotent_la_reinit(env, conn):
|
||||
from app.operatii_seed import seed_operatii_etichetate
|
||||
path = _scrie_seed(env, [SEED_OE, SEED_NUL])
|
||||
n1 = seed_operatii_etichetate(conn, path)
|
||||
conn.commit()
|
||||
n2 = seed_operatii_etichetate(conn, path)
|
||||
conn.commit()
|
||||
assert n1 == 2
|
||||
assert n2 == 0 # a doua rulare nu dubleaza
|
||||
total = conn.execute("SELECT COUNT(*) AS n FROM mapping_suggestions").fetchone()["n"]
|
||||
assert total == 2
|
||||
|
||||
|
||||
def test_seed_inexistent_e_noop(env, conn):
|
||||
from app.operatii_seed import seed_operatii_etichetate
|
||||
n = seed_operatii_etichetate(conn, os.path.join(env, "nu-exista.json"))
|
||||
assert n == 0
|
||||
Reference in New Issue
Block a user