Seed app/data/operatii-etichetate.json regenerat cu subagenti Haiku pe TOATE cele 17181 operatii distincte (ordine frecventa, 100%), inlocuind seed-ul Groq (3758). Validare Haiku vs Groq pe 157 op etichetate: la dezacorduri Haiku corect ~22/30, Groq ~0. Haiku prinde gunoiul ratat de Groq (ITP, chirie anvelope, nume piese fara actiune): NUL 2200 (12.8%) vs ~7.6% Groq; adaptare electronica OE-7 (nu OE-5), placute frana uzura OE-1 (nu OE-F avarie). US-001..006: prefiltru NUL determinist, etichetator offline, generator seed, seeder mapping_suggestions (in init_db, gated seed_operatii_enabled), embeddings indexeaza corpus etichetat, enrich NUL+kNN. Distributie seed: OE-1 80.1%, NUL 12.8%, OE-2 3.5%, restul rar (OE-4/3/7/8/R/I/5, AITLV, R-ODO). config: seed_operatii_enabled=True + embeddings_enabled=True implicit (SILVER populat + sugestii semantice; ambele suggestion-only, dezactivabile prin env). Suita: 1387 passed, 1 deselected (live). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
114 lines
3.8 KiB
Python
114 lines
3.8 KiB
Python
"""US-004 (PRD 5.18) — seeder corpus etichetat in mapping_suggestions (SILVER).
|
|
|
|
INSERT OR IGNORE din artefactul comis -> SILVER nu mai e gol in productie.
|
|
NB (F10): confirmarile UMANE stau in shared_mappings, NU aici; deci INSERT OR IGNORE
|
|
pastreaza codul LLM existent la re-seed (v1 = ignore, nu upsert).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture()
|
|
def env(monkeypatch):
|
|
tmp = tempfile.mkdtemp()
|
|
monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "us004.db"))
|
|
monkeypatch.setenv("AUTOPASS_WEB_AUTH_REQUIRED", "false")
|
|
from app.config import get_settings
|
|
get_settings.cache_clear()
|
|
from app.db import init_db
|
|
init_db()
|
|
yield tmp
|
|
get_settings.cache_clear()
|
|
|
|
|
|
@pytest.fixture()
|
|
def conn(env):
|
|
from app.db import get_connection
|
|
c = get_connection()
|
|
yield c
|
|
c.close()
|
|
|
|
|
|
def _scrie_seed(tmp, items) -> str:
|
|
p = os.path.join(tmp, "operatii-etichetate.json")
|
|
with open(p, "w", encoding="utf-8") as fh:
|
|
json.dump(items, fh, ensure_ascii=False)
|
|
return p
|
|
|
|
|
|
SEED_OE = {"denumire": "SCHIMB ULEI MOTOR", "denumire_normalizata": "SCHIMB ULEI MOTOR",
|
|
"cod": "OE-3", "is_nul": False, "source": "llm_seed", "confidence": 0.7}
|
|
SEED_NUL = {"denumire": "13 X ITP", "denumire_normalizata": "13 X ITP",
|
|
"cod": None, "is_nul": True, "source": "llm_seed", "confidence": 0.7}
|
|
|
|
|
|
def test_seed_populeaza_mapping_suggestions(env, conn):
|
|
from app.operatii_seed import seed_operatii_etichetate
|
|
path = _scrie_seed(env, [SEED_OE])
|
|
n = seed_operatii_etichetate(conn, path)
|
|
conn.commit()
|
|
assert n == 1
|
|
row = conn.execute(
|
|
"SELECT cod_prestatie, source, confidence FROM mapping_suggestions "
|
|
"WHERE denumire_normalizata = 'SCHIMB ULEI MOTOR'"
|
|
).fetchone()
|
|
assert row["cod_prestatie"] == "OE-3"
|
|
assert row["source"] == "llm_seed"
|
|
assert abs(row["confidence"] - 0.7) < 1e-9
|
|
|
|
|
|
def test_is_nul_din_seed(env, conn):
|
|
from app.operatii_seed import seed_operatii_etichetate
|
|
path = _scrie_seed(env, [SEED_NUL])
|
|
seed_operatii_etichetate(conn, path)
|
|
conn.commit()
|
|
row = conn.execute(
|
|
"SELECT cod_prestatie, is_nul FROM mapping_suggestions WHERE denumire_normalizata = '13 X ITP'"
|
|
).fetchone()
|
|
assert row["is_nul"] == 1
|
|
assert row["cod_prestatie"] is None # respecta CHECK-ul (NUL -> cod NULL)
|
|
|
|
|
|
def test_insert_or_ignore_nu_clobber(env, conn):
|
|
from app.operatii_seed import seed_operatii_etichetate
|
|
# Un rand pre-existent (ex. embedding) pe aceeasi cheie, cu alt cod.
|
|
conn.execute(
|
|
"INSERT INTO mapping_suggestions (denumire_normalizata, cod_prestatie, is_nul, source, confidence) "
|
|
"VALUES ('SCHIMB ULEI MOTOR', 'OE-1', 0, 'embedding', 0.5)"
|
|
)
|
|
conn.commit()
|
|
path = _scrie_seed(env, [SEED_OE])
|
|
n = seed_operatii_etichetate(conn, path)
|
|
conn.commit()
|
|
assert n == 0 # INSERT OR IGNORE -> nu suprascrie
|
|
row = conn.execute(
|
|
"SELECT cod_prestatie, source FROM mapping_suggestions WHERE denumire_normalizata = 'SCHIMB ULEI MOTOR'"
|
|
).fetchone()
|
|
assert row["cod_prestatie"] == "OE-1" # randul existent ramane neatins
|
|
assert row["source"] == "embedding"
|
|
|
|
|
|
def test_idempotent_la_reinit(env, conn):
|
|
from app.operatii_seed import seed_operatii_etichetate
|
|
path = _scrie_seed(env, [SEED_OE, SEED_NUL])
|
|
n1 = seed_operatii_etichetate(conn, path)
|
|
conn.commit()
|
|
n2 = seed_operatii_etichetate(conn, path)
|
|
conn.commit()
|
|
assert n1 == 2
|
|
assert n2 == 0 # a doua rulare nu dubleaza
|
|
total = conn.execute("SELECT COUNT(*) AS n FROM mapping_suggestions").fetchone()["n"]
|
|
assert total == 2
|
|
|
|
|
|
def test_seed_inexistent_e_noop(env, conn):
|
|
from app.operatii_seed import seed_operatii_etichetate
|
|
n = seed_operatii_etichetate(conn, os.path.join(env, "nu-exista.json"))
|
|
assert n == 0
|