feat(5.18): VERIFY+CLOSE — US-007 badge sursa + fix findings code-review

VERIFY PASS pe corpus k-NN exemple etichetate (seed real 17181 Haiku, comis
in 756f777): suita 1392 passed, 1 deselected (live); smoke init_db seeder
(17181/NUL=2200/idempotent); toate codurile in nomenclator.

US-007 (cerere user la CLOSE) — badge sursa pe sugestia fuzzy din editor:
- _mapari.html: chip confirmat (GOLD) / similar (SILVER+k-NN) / non-operatie (NUL)
- base.html: .sugg-sursa--{confirmat,similar,nul} pe tokeni de tema (color-mix)
- routes.py: cheia `nul` adaugata in surse_sugestie default (finding cross-file)
- tests/test_web_badge_sursa.py: gold/silver/nul/fara-sursa (4 teste)
- E2E render live verificat in serverul real (/_fragments/mapari)

CLOSE /code-review high (main..HEAD, 3 finder x 8 unghiuri) — runtime curat,
invariant #13 intact; 3 findings low/cosmetic REPARATE + lock-uite:
- shared_store.seed_suggestions: cod whitespace -> NULL (era ''), + test lock
- genereaza_seed.py: with open(...) in loc de open().read() (FD leak tool offline)
- embeddings.py: docstring-uri aliniate la [{cod, is_nul, similaritate}]

ROADMAP: 5.18 LIVRAT. PRD: raport VERIFY/CLOSE scris.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-06-29 07:29:14 +00:00
parent 308fee6c27
commit 12021eb269
10 changed files with 258 additions and 13 deletions

View File

@@ -11,7 +11,7 @@ Design (PRD 5.14, Decision #16/#16b):
API public (nivel modul): API public (nivel modul):
index_corpus(items) -> None index_corpus(items) -> None
suggest_nearest(text, top_k) -> [{cod, similaritate}] suggest_nearest(text, top_k) -> [{cod, is_nul, similaritate}]
is_available() -> bool is_available() -> bool
Clase (pentru teste / injectare backend): Clase (pentru teste / injectare backend):
@@ -242,7 +242,7 @@ def index_corpus(items: list[dict], signature: str | None = None) -> None:
def suggest_nearest(denumire: str, top_k: int = 3) -> list[dict]: def suggest_nearest(denumire: str, top_k: int = 3) -> list[dict]:
"""Returneaza top_k sugestii [{cod, similaritate}] sau [] la eroare. """Returneaza top_k sugestii [{cod, is_nul, similaritate}] sau [] la eroare.
Sigur de apelat indiferent de starea backend-ului. Sigur de apelat indiferent de starea backend-ului.
""" """

View File

@@ -46,9 +46,12 @@ def seed_suggestions(
continue continue
is_nul = 1 if item.get("is_nul") else 0 is_nul = 1 if item.get("is_nul") else 0
# NUL -> cod NULL obligatoriu (supresie stricta, #4) # NUL -> cod NULL obligatoriu (supresie stricta, #4)
cod = None if is_nul else ((item.get("cod_prestatie") or "") or None) # Normalizeaza INAINTE de truthiness: un cod whitespace-only (" ") sau
if cod: # ne-string trebuie sa devina NULL, nu '' (altfel rand non-NUL cu cod gol).
cod = cod.strip().upper() cod = None
if not is_nul:
raw_cod = str(item.get("cod_prestatie") or "").strip().upper()
cod = raw_cod or None
source = str(item.get("source") or "llm") source = str(item.get("source") or "llm")
confidence = float(item.get("confidence") or 0.0) confidence = float(item.get("confidence") or 0.0)
cur = conn.execute( cur = conn.execute(

View File

@@ -1247,7 +1247,7 @@ def _nemapate_pentru_submission(row, nomenclator: list[dict], conn=None) -> list
"denumire": item.get("denumire"), "denumire": item.get("denumire"),
"suggestions": suggest_codes(item.get("denumire"), nomenclator, limit=5), "suggestions": suggest_codes(item.get("denumire"), nomenclator, limit=5),
"sugestie_principala": None, "sugestie_principala": None,
"surse_sugestie": {"gold_partajat": None, "silver": None, "embedding": None}, "surse_sugestie": {"gold_partajat": None, "silver": None, "embedding": None, "nul": False},
} }
# L14-S6: imbogatire cu GOLD partajat > SILVER > embeddings (SUGGESTION-ONLY, #13) # L14-S6: imbogatire cu GOLD partajat > SILVER > embeddings (SUGGESTION-ONLY, #13)
if conn is not None: if conn is not None:

View File

@@ -54,11 +54,22 @@
<div class="muted">{{ e.denumire or '(fara denumire)' }}</div> <div class="muted">{{ e.denumire or '(fara denumire)' }}</div>
</td> </td>
<td class="muted" style="font-size:12px;" data-eticheta="Sugestii"> <td class="muted" style="font-size:12px;" data-eticheta="Sugestii">
{# 5.18 US-007: badge sursa pe sugestia sistemului — confirmat (GOLD) / similar
(SILVER+embedding k-NN) / non-operatie (pre-filtru NUL). Suggestion-only. #}
{% if e.sugestie_principala %}
{% if e.sugestie_principala.sursa == 'gold_partajat' %}
<span class="sugg-sursa sugg-sursa--confirmat" title="cod confirmat de un operator">confirmat</span>
{% else %}
<span class="sugg-sursa sugg-sursa--similar" title="operatie similara deja vazuta (k-NN/exact)">similar</span>
{% endif %}
{% elif e.surse_sugestie and e.surse_sugestie.nul %}
<span class="sugg-sursa sugg-sursa--nul" title="pare non-operatie (ITP/plata/discount...)">non-operatie</span>
{% endif %}
{% if e.suggestions %} {% if e.suggestions %}
{% for s in e.suggestions[:3] %} {% for s in e.suggestions[:3] %}
<span class="sugg">{{ s.cod_prestatie }} ({{ s.score|round|int }}%)</span>{% if not loop.last %}, {% endif %} <span class="sugg">{{ s.cod_prestatie }} ({{ s.score|round|int }}%)</span>{% if not loop.last %}, {% endif %}
{% endfor %} {% endfor %}
{% else %}—{% endif %} {% elif not e.sugestie_principala and not (e.surse_sugestie and e.surse_sugestie.nul) %}—{% endif %}
</td> </td>
<td data-eticheta="Cod RAR"> <td data-eticheta="Cod RAR">
<select name="cod_prestatie" form="map-rez-{{ loop.index }}" required <select name="cod_prestatie" form="map-rez-{{ loop.index }}" required

View File

@@ -104,6 +104,18 @@
th { color:var(--muted); font-weight:500; font-size:12px; text-transform:uppercase; letter-spacing:.04em; } th { color:var(--muted); font-weight:500; font-size:12px; text-transform:uppercase; letter-spacing:.04em; }
.empty { color:var(--muted); padding:24px; text-align:center; } .empty { color:var(--muted); padding:24px; text-align:center; }
.pill { font-size:12px; padding:2px 8px; border-radius:99px; border:1px solid var(--line); } .pill { font-size:12px; padding:2px 8px; border-radius:99px; border:1px solid var(--line); }
/* Badge sursa sugestie (5.18 US-007): de unde vine sugestia de cod in editorul de mapare.
confirmat = GOLD validat de om (verde); similar = SILVER/embedding k-NN (azur);
non-operatie = pre-filtru NUL / vecin NUL (gri-cald). Suggestion-only, doar indiciu vizual. */
.sugg-sursa { display:inline-block; font-size:10px; font-weight:700; line-height:1; padding:2px 6px;
border-radius:99px; text-transform:uppercase; letter-spacing:.03em; vertical-align:middle;
border:1px solid transparent; }
.sugg-sursa--confirmat { color:var(--ok); border-color:color-mix(in srgb, var(--ok) 45%, transparent);
background:color-mix(in srgb, var(--ok) 12%, transparent); }
.sugg-sursa--similar { color:var(--accent); border-color:color-mix(in srgb, var(--accent) 45%, transparent);
background:color-mix(in srgb, var(--accent) 12%, transparent); }
.sugg-sursa--nul { color:var(--muted); border-color:color-mix(in srgb, var(--muted) 40%, transparent);
background:color-mix(in srgb, var(--muted) 12%, transparent); }
/* Pill-uri de filtrare a starii (bara de filtre Trimiteri). Inactiv = contur+text pe /* Pill-uri de filtrare a starii (bara de filtre Trimiteri). Inactiv = contur+text pe
culoarea categoriei (injectata inline); activ = umplere pe acea culoare. */ culoarea categoriei (injectata inline); activ = umplere pe acea culoare. */
.pills-categorii { display:inline-flex; gap:8px; flex-wrap:wrap; align-items:center; } .pills-categorii { display:inline-flex; gap:8px; flex-wrap:wrap; align-items:center; }

File diff suppressed because one or more lines are too long

View File

@@ -260,10 +260,71 @@ PASUL 2 — CONSUM SEED (fara LLM):
--- ---
## Raport VERIFY ## Raport VERIFY (2026-06-29) — PASS
> Completat de subagentul verificator (context curat) in faza VERIFY — vezi ROADMAP §5.6. > Faza VERIFY + CLOSE rulata pe `feat/5.18-corpus-knn-exemple-etichetate`, commit-uri
> PASS/FAIL per criteriu, cu dovezi (output pytest citat, E2E pe RAR test). Lipseste pana la VERIFY. > `756f777` (5.18 core + seed) + `308fee6` (fix lateral start-test ONNX). Seed-ul real produs
> cu subagenti Haiku (decizie user 2026-06-29), NU LM Studio (GPU jos) si NU Groq — vezi
> nota la "Seed real" mai jos. Abaterea de la D4 (LM Studio = backend bootstrap v1) e
> documentata si justificata: motorul de etichetare s-a schimbat, garantiile de calitate
> (validare 157 op Haiku vs Groq) sunt mai bune, restul pipeline-ului (US-003..006) e neatins.
### PASS/FAIL per story
| Story | Stare | Dovada |
|-------|-------|--------|
| US-001 pre-filtru NUL | PASS | `tests/test_prefiltru_nul.py` verde; seed contine 2200 NUL (`is_nul=1`, `cod=NULL`) |
| US-002 etichetator offline | PASS | `tests/test_eticheteaza_tool.py` verde (json_schema envelope, enum cod, scrub PII, no_think) |
| US-003 generare seed pe frecventa | PASS | `tests/test_genereaza_seed.py` verde (dedup normalizat, zero-duplicat, idempotenta cross-run, conflict determinist) |
| US-004 seeder DB | PASS | `tests/test_operatii_seed.py` verde; smoke `init_db` pe DB gol -> `mapping_suggestions`=17181, NUL=2200, re-seed = 0 inserate (idempotent) |
| US-005 embeddings pe corpus etichetat | PASS | `tests/test_embeddings_corpus_etichetat.py` verde (corpus din `mapping_suggestions`, query normalizat simetric, `is_nul` propagat) |
| US-006 enrich = NUL + exact + k-NN | PASS | `tests/test_enrich_corpus_etichetat.py` verde (precedenta NUL>GOLD>exact>k-NN, abtinere sub prag, invariant #13 regresie) |
| US-007 badge sursa (optional) | PASS | `tests/test_web_badge_sursa.py` verde (4 teste); E2E render live confirma chip confirmat/similar/non-operatie. Implementat la cererea user (2026-06-29) |
### Dovezi agregat
- **Suita completa**: `python3 -m pytest -q -m "not live"` -> **1387 passed, 1 deselected (live), 0 failed** (142.77s).
- **Cele 6 fisiere de test 5.18** rulate izolat: **36 passed** (`test_prefiltru_nul`, `test_eticheteaza_tool`, `test_genereaza_seed`, `test_operatii_seed`, `test_embeddings_corpus_etichetat`, `test_enrich_corpus_etichetat`).
- **Smoke seeder** (`init_db` pe DB gol, `AUTOPASS_SEED_OPERATII_ENABLED=true`): 17181 randuri in `mapping_suggestions`, 2200 NUL, `source='haiku_seed'`, re-seed idempotent (0 inserate).
- **Validare nomenclator**: toate codurile distincte din seed (`OE-1`..`OE-8`, `OE-I/R`, `AITLV`, `R-ODO`) sunt in `FALLBACK_NOMENCLATOR` — zero cod gunoi care ar da HTTP 500 / `ORA-12899` la RAR.
### Seed real (abatere de la D4, aprobata de user)
Seed-ul `app/data/operatii-etichetate.json` rescris de la 3758 (Groq partial) la **17181** operatii
distincte (toate, ordine frecventa), `source="haiku_seed"`, prin subagenti Haiku in Claude Code
(blocantul GPU LM Studio rezolvat fara GPU). Validare la dezacorduri Haiku vs Groq pe 157 operatii:
Haiku corect ~22/30, Groq ~0 (ex: CHIRIE ANVELOPE->NUL, ADAPTARE electronica->OE-7, INLOCUIT
PLACUTE FRANA->OE-1). Distributie: OE-1=13764 (cap, asteptat), NUL=2200, restul sparse. Calitate
estimata la scara ~95%; codurile rare (avarii grave OE-C/S/D/F/A, OE-5/6) sunt sparse si pot avea
erori de margine ne-verificate uman — ramane recomandarea Decision #19 (esantion uman) inainte de
orice crestere de incredere / auto-send.
### CLOSE — `/code-review high` (main..HEAD, 3 finder x 8 unghiuri)
Calea de runtime in productie = **curata**. Verificat intact:
- **Invariant #13**: nimic din SILVER/k-NN/NUL nu intra in `resolve_prestatii`/`load_mapping` (suggestion-only).
- `suggest_nearest`/`enrich_suggestions` semnatura noua (`is_nul`) consumata corect de unicul apelant.
- Worker keepalive RAR (`308fee6`/`c05fa00`): fara race (worker single-thread), heartbeat actualizat doar pe login reusit.
- Config `embeddings_enabled=True` + `seed_operatii_enabled=True` default: teste neafectate (conftest override).
Findings (toate low / cosmetic, niciun bug de runtime) — **REPARATE in faza CLOSE**:
1. `tools/mapare-llm/genereaza_seed.py` (`_incarca_seed`/`construieste_harta_etichete`): `json.loads(open(...).read())` fara context manager -> FD leak in tool offline. **Fix**: `with open(...)`.
2. `app/shared_store.py` `seed_suggestions`: `cod=" "` (whitespace) -> `''` in loc de NULL pe rand non-NUL. **Fix**: `str(...).strip().upper() or None` INAINTE de truthiness. Lock: `test_seed_suggestions_cod_whitespace_devine_null`.
3. `app/embeddings.py` (2 docstring-uri): ziceau `[{cod, similaritate}]`, real `[{cod, is_nul, similaritate}]`. **Fix**: docstring-uri aliniate.
Concluzie VERIFY: **PASS**. US-001..006 livrate cu dovezi; zero bug de corectitudine in runtime; cele 3 findings de cleanup reparate + lock-uite.
### CLOSE — US-007 implementat (cerere user 2026-06-29)
User a cerut la poarta CLOSE sa includem badge-ul direct pe sugestiile sistemului fuzzy.
Implementat: chip in coloana "Sugestii" din `_mapari.html`, mapat din `sugestie_principala.sursa`:
**confirmat** (GOLD partajat) / **similar** (SILVER exact + k-NN embeddings) / **non-operatie**
(pre-filtru NUL / vecin NUL). CSS `.sugg-sursa--{confirmat,similar,nul}` pe tokeni de tema
(`--ok`/`--accent`/`--muted` cu `color-mix`), nu rupe layoutul. Suggestion-only (#13). Fix lateral:
`surse_sugestie` default in `routes.py` a primit cheia `nul` (lipsea — finding cross-file). Teste:
`tests/test_web_badge_sursa.py` (gold/silver/nul/fara-sursa). Render verificat in serverul real
(`/_fragments/mapari`): OP-REV->confirmat, OP-REP->similar, OP-ITP->non-operatie, OP-XYZ->fara chip.
Suita: **1392 passed, 1 deselected (live)**.
--- ---

View File

@@ -141,6 +141,21 @@ def test_seed_suggestions_nul_cu_cod_explicit_tot_nul(conn):
assert row["cod_prestatie"] is None # cod explicit ignorat cand is_nul assert row["cod_prestatie"] is None # cod explicit ignorat cand is_nul
def test_seed_suggestions_cod_whitespace_devine_null(conn):
"""Rand non-NUL cu cod whitespace-only (' ') -> cod_prestatie NULL, NU '' (corectitudine)."""
from app.shared_store import seed_suggestions, lookup_suggestion
seed_suggestions(conn, [
{"denumire": "OPERATIE CU COD GOL", "cod_prestatie": " ", "source": "llm", "confidence": 0.5},
])
conn.commit()
row = lookup_suggestion(conn, "OPERATIE CU COD GOL")
assert row is not None
assert row["is_nul"] == 0 # nu e marcat NUL
assert row["cod_prestatie"] is None # whitespace -> NULL, nu '' (rand non-NUL fara cod gol)
def test_seed_suggestions_normalizare_diacritice(conn): def test_seed_suggestions_normalizare_diacritice(conn):
"""Lookup pe forma cu diacritice gaseste randul seedat fara diacritice (normalize_for_match).""" """Lookup pe forma cu diacritice gaseste randul seedat fara diacritice (normalize_for_match)."""
from app.shared_store import seed_suggestions, lookup_suggestion from app.shared_store import seed_suggestions, lookup_suggestion

View File

@@ -0,0 +1,140 @@
"""TDD 5.18 US-007 — Badge sursa sugestie in editorul de mapare (_mapari.html).
Chip mic langa sugestia sistemului care arata DE UNDE vine codul propus:
- "confirmat" -> GOLD partajat (validat de om, shared_mappings)
- "similar" -> SILVER exact-match / k-NN embeddings (exemplu deja vazut)
- "non-operatie" -> pre-filtru NUL determinist (ITP/plata/discount...)
Toate suggestion-only (#13): badge-ul e doar indiciu vizual, nu schimba enqueue.
Render real prin GET /_fragments/mapari (fragmentul HTMX scoped pe cont).
"""
from __future__ import annotations
import json
import os
import tempfile
import pytest
@pytest.fixture()
def env(monkeypatch):
"""DB temporara cu schema, auth web dezactivata (mod dev -> cont id=1)."""
tmp = tempfile.mkdtemp()
monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "badge_sursa_test.db"))
monkeypatch.setenv("AUTOPASS_WEB_AUTH_REQUIRED", "false")
from app.config import get_settings
get_settings.cache_clear()
from app.db import init_db
init_db()
yield monkeypatch
get_settings.cache_clear()
@pytest.fixture()
def client(env):
from app.main import app
from fastapi.testclient import TestClient
with TestClient(app) as c:
yield c
def _seed_nomenclator(conn):
conn.executemany(
"INSERT OR IGNORE INTO nomenclator_rar (cod_prestatie, nume_prestatie) VALUES (?, ?)",
[("OE-1", "REPARATIE"), ("OE-3", "REVIZIE PERIODICA")],
)
def _insert_needs_mapping(conn, *, op: str, denumire: str, key: str):
conn.execute(
"INSERT INTO submissions (account_id, status, payload_json, idempotency_key) "
"VALUES (1, 'needs_mapping', ?, ?)",
(json.dumps({
"vin": "WVWZZZ1KZAW001111",
"prestatii": [{"cod_op_service": op, "denumire": denumire}],
}), key),
)
def test_badge_gold_confirmat(env, client):
"""O operatie cu match in GOLD partajat -> chip 'confirmat' in coloana Sugestii."""
from app.db import get_connection
from app.shared_store import record_human_validation
conn = get_connection()
try:
_seed_nomenclator(conn)
record_human_validation(conn, "Revizie anuala", "OE-3")
_insert_needs_mapping(conn, op="OP-REV", denumire="Revizie anuala", key="badge-gold-1")
conn.commit()
finally:
conn.close()
resp = client.get("/_fragments/mapari")
assert resp.status_code == 200, resp.text
html = resp.text
assert "sugg-sursa--confirmat" in html
assert ">confirmat<" in html
def test_badge_similar_silver(env, client):
"""O operatie cu match in SILVER (mapping_suggestions) -> chip 'similar'."""
from app.db import get_connection
from app.shared_store import seed_suggestions
conn = get_connection()
try:
_seed_nomenclator(conn)
seed_suggestions(conn, [
{"denumire": "Reparatie motor", "cod_prestatie": "OE-1", "source": "llm", "confidence": 0.9},
])
_insert_needs_mapping(conn, op="OP-REP", denumire="Reparatie motor", key="badge-similar-1")
conn.commit()
finally:
conn.close()
resp = client.get("/_fragments/mapari")
assert resp.status_code == 200, resp.text
html = resp.text
assert "sugg-sursa--similar" in html
assert ">similar<" in html
# NU trebuie sa fie marcat confirmat (sursa e SILVER, nu GOLD).
assert "sugg-sursa--confirmat" not in html
def test_badge_nul_non_operatie(env, client):
"""O operatie prinsa de pre-filtrul NUL (ITP) -> chip 'non-operatie', fara cod sugerat."""
from app.db import get_connection
conn = get_connection()
try:
_seed_nomenclator(conn)
_insert_needs_mapping(conn, op="OP-ITP", denumire="ITP CT 12 ABC", key="badge-nul-1")
conn.commit()
finally:
conn.close()
resp = client.get("/_fragments/mapari")
assert resp.status_code == 200, resp.text
html = resp.text
assert "sugg-sursa--nul" in html
assert ">non-operatie<" in html
def test_fara_sursa_fara_badge(env, client):
"""O operatie fara nicio sursa (necunoscuta) NU primeste chip de sursa."""
from app.db import get_connection
conn = get_connection()
try:
_seed_nomenclator(conn)
_insert_needs_mapping(conn, op="OP-NISA", denumire="Operatie complet necunoscuta xyz", key="badge-none-1")
conn.commit()
finally:
conn.close()
resp = client.get("/_fragments/mapari")
assert resp.status_code == 200, resp.text
assert "sugg-sursa" not in resp.text

View File

@@ -113,7 +113,8 @@ def _incarca_seed(seed_path: str | None) -> list[dict]:
if not seed_path or not os.path.exists(seed_path): if not seed_path or not os.path.exists(seed_path):
return [] return []
try: try:
return json.loads(open(seed_path, encoding="utf-8").read()) with open(seed_path, encoding="utf-8") as fh:
return json.loads(fh.read())
except (ValueError, OSError): except (ValueError, OSError):
return [] return []
@@ -132,7 +133,8 @@ def construieste_harta_etichete(
# labels-groq-partial.json: cheiat pe text BRUT. # labels-groq-partial.json: cheiat pe text BRUT.
if labels_path and os.path.exists(labels_path): if labels_path and os.path.exists(labels_path):
labels = json.loads(open(labels_path, encoding="utf-8").read()) with open(labels_path, encoding="utf-8") as fh:
labels = json.loads(fh.read())
for raw, cod in labels.items(): for raw, cod in labels.items():
cheie = normalize_for_match(raw) cheie = normalize_for_match(raw)
if not cheie: if not cheie: