Files
rar-autopass/tests/test_operatii_seed.py
Claude Agent 756f77730f feat(5.18): corpus k-NN exemple etichetate + seed real Haiku (17181 op)
Seed app/data/operatii-etichetate.json regenerat cu subagenti Haiku pe TOATE
cele 17181 operatii distincte (ordine frecventa, 100%), inlocuind seed-ul Groq
(3758). Validare Haiku vs Groq pe 157 op etichetate: la dezacorduri Haiku corect
~22/30, Groq ~0. Haiku prinde gunoiul ratat de Groq (ITP, chirie anvelope, nume
piese fara actiune): NUL 2200 (12.8%) vs ~7.6% Groq; adaptare electronica OE-7
(nu OE-5), placute frana uzura OE-1 (nu OE-F avarie).

US-001..006: prefiltru NUL determinist, etichetator offline, generator seed,
seeder mapping_suggestions (in init_db, gated seed_operatii_enabled), embeddings
indexeaza corpus etichetat, enrich NUL+kNN. Distributie seed: OE-1 80.1%, NUL
12.8%, OE-2 3.5%, restul rar (OE-4/3/7/8/R/I/5, AITLV, R-ODO).

config: seed_operatii_enabled=True + embeddings_enabled=True implicit (SILVER
populat + sugestii semantice; ambele suggestion-only, dezactivabile prin env).

Suita: 1387 passed, 1 deselected (live).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 06:59:15 +00:00

114 lines
3.8 KiB
Python

"""US-004 (PRD 5.18) — seeder corpus etichetat in mapping_suggestions (SILVER).
INSERT OR IGNORE din artefactul comis -> SILVER nu mai e gol in productie.
NB (F10): confirmarile UMANE stau in shared_mappings, NU aici; deci INSERT OR IGNORE
pastreaza codul LLM existent la re-seed (v1 = ignore, nu upsert).
"""
from __future__ import annotations
import json
import os
import tempfile
import pytest
@pytest.fixture()
def env(monkeypatch):
tmp = tempfile.mkdtemp()
monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "us004.db"))
monkeypatch.setenv("AUTOPASS_WEB_AUTH_REQUIRED", "false")
from app.config import get_settings
get_settings.cache_clear()
from app.db import init_db
init_db()
yield tmp
get_settings.cache_clear()
@pytest.fixture()
def conn(env):
from app.db import get_connection
c = get_connection()
yield c
c.close()
def _scrie_seed(tmp, items) -> str:
p = os.path.join(tmp, "operatii-etichetate.json")
with open(p, "w", encoding="utf-8") as fh:
json.dump(items, fh, ensure_ascii=False)
return p
SEED_OE = {"denumire": "SCHIMB ULEI MOTOR", "denumire_normalizata": "SCHIMB ULEI MOTOR",
"cod": "OE-3", "is_nul": False, "source": "llm_seed", "confidence": 0.7}
SEED_NUL = {"denumire": "13 X ITP", "denumire_normalizata": "13 X ITP",
"cod": None, "is_nul": True, "source": "llm_seed", "confidence": 0.7}
def test_seed_populeaza_mapping_suggestions(env, conn):
from app.operatii_seed import seed_operatii_etichetate
path = _scrie_seed(env, [SEED_OE])
n = seed_operatii_etichetate(conn, path)
conn.commit()
assert n == 1
row = conn.execute(
"SELECT cod_prestatie, source, confidence FROM mapping_suggestions "
"WHERE denumire_normalizata = 'SCHIMB ULEI MOTOR'"
).fetchone()
assert row["cod_prestatie"] == "OE-3"
assert row["source"] == "llm_seed"
assert abs(row["confidence"] - 0.7) < 1e-9
def test_is_nul_din_seed(env, conn):
from app.operatii_seed import seed_operatii_etichetate
path = _scrie_seed(env, [SEED_NUL])
seed_operatii_etichetate(conn, path)
conn.commit()
row = conn.execute(
"SELECT cod_prestatie, is_nul FROM mapping_suggestions WHERE denumire_normalizata = '13 X ITP'"
).fetchone()
assert row["is_nul"] == 1
assert row["cod_prestatie"] is None # respecta CHECK-ul (NUL -> cod NULL)
def test_insert_or_ignore_nu_clobber(env, conn):
from app.operatii_seed import seed_operatii_etichetate
# Un rand pre-existent (ex. embedding) pe aceeasi cheie, cu alt cod.
conn.execute(
"INSERT INTO mapping_suggestions (denumire_normalizata, cod_prestatie, is_nul, source, confidence) "
"VALUES ('SCHIMB ULEI MOTOR', 'OE-1', 0, 'embedding', 0.5)"
)
conn.commit()
path = _scrie_seed(env, [SEED_OE])
n = seed_operatii_etichetate(conn, path)
conn.commit()
assert n == 0 # INSERT OR IGNORE -> nu suprascrie
row = conn.execute(
"SELECT cod_prestatie, source FROM mapping_suggestions WHERE denumire_normalizata = 'SCHIMB ULEI MOTOR'"
).fetchone()
assert row["cod_prestatie"] == "OE-1" # randul existent ramane neatins
assert row["source"] == "embedding"
def test_idempotent_la_reinit(env, conn):
from app.operatii_seed import seed_operatii_etichetate
path = _scrie_seed(env, [SEED_OE, SEED_NUL])
n1 = seed_operatii_etichetate(conn, path)
conn.commit()
n2 = seed_operatii_etichetate(conn, path)
conn.commit()
assert n1 == 2
assert n2 == 0 # a doua rulare nu dubleaza
total = conn.execute("SELECT COUNT(*) AS n FROM mapping_suggestions").fetchone()["n"]
assert total == 2
def test_seed_inexistent_e_noop(env, conn):
from app.operatii_seed import seed_operatii_etichetate
n = seed_operatii_etichetate(conn, os.path.join(env, "nu-exista.json"))
assert n == 0