feat(5.15+5.14): CLOSE — fix-uri code-review + embeddings functional
5.15 (propagare design + dashboard editare) si 5.14 (mapare LLM distilata) inchise dupa /code-review high. 8 buguri reparate TDD: - HIGH modal nu se deschidea pe randul slim (base.html: trimitere-slim) - HIGH /repune trunchia prestatii (declaratie incompleta la RAR) -> iterare peste existing, codes pozitional - HIGH embeddings incarca model ~230MB degeaba pe corpus gol -> poarta has_corpus() - HIGH picker chips gol pe re-render eroare -> conn/account_id pe toate ramurile - MED obs re-derivat dupa stergere explicita -> _merge_override pastreaza obs='' - MED mapare salvata fara denumire poluă GOLD -> _record_gold_validation guard - MED typo nome_prestatie -> nume_prestatie in select /repune - MED bucketare timp +3h gresita iarna -> SQLite localtime + TZ=Europe/Bucharest Embeddings WIRE-uit functional (PRD #15, decizie user): ensure_embeddings_corpus construieste corpus din nomenclator, gated pe AUTOPASS_EMBEDDINGS_ENABLED (default off). Marime model corectata ~50MB->~230MB (estimare PRD gresita). Cleanup: hoist load_* din bucla bulk-fix; import re la top. Regresie: 1256 passed, 1 deselected (live), 0 failed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
491
tests/test_or_label.py
Normal file
491
tests/test_or_label.py
Normal file
@@ -0,0 +1,491 @@
|
||||
"""Teste pentru or_label.py — etichetator batch offline OpenRouter (L14-S1).
|
||||
|
||||
TDD: aceste teste TREBUIE sa fie RED inainte de implementare, GREEN dupa.
|
||||
Fara apeluri LLM reale — or_common.call() este MOCK-at in toate testele
|
||||
care ating API-ul. Testeaza: grupare+propagare, vot ensemble, scrub PII,
|
||||
resumabilitate, format output.
|
||||
|
||||
Rulare: python3 -m pytest tests/test_or_label.py -v
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
# Setam cheia inainte de import (or_common.py o citeste la nivel de modul).
|
||||
# Valoarea nu conteaza in teste (call() e mock-at).
|
||||
os.environ.setdefault("OPENROUTER_KEY", "test-key-mock")
|
||||
|
||||
# Adaugam calea tools/mapare-llm/ la sys.path ca sa putem importa or_label
|
||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
TOOLS_DIR = os.path.abspath(os.path.join(HERE, "..", "tools", "mapare-llm"))
|
||||
if TOOLS_DIR not in sys.path:
|
||||
sys.path.insert(0, TOOLS_DIR)
|
||||
|
||||
import or_label # subject under test
|
||||
import or_common as oc # pentru VALID, CODURI, scrub
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Grupare pe similaritate + propagare cod
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestGroupBySimilarity:
|
||||
"""Verifica logica de grupare greedy pe fuzz.token_sort_ratio."""
|
||||
|
||||
def test_similar_strings_grouped_in_one(self):
|
||||
"""Denumiri aproape identice -> un singur reprezentant, ceilalti membri."""
|
||||
# Scoruri masurate: token_sort_ratio("REGLAT DIRECTIE","REGLAT DIRECTIA")=93
|
||||
# token_sort_ratio("REGLAT DIRECTIE","REGLARE DIRECTIE")=90
|
||||
corpus = [
|
||||
("REGLAT DIRECTIE", 100), # reprezentant (frecventa maxima)
|
||||
("REGLAT DIRECTIA", 80), # similar: 93 >= 85
|
||||
("REGLARE DIRECTIE", 60), # similar: 90 >= 85
|
||||
]
|
||||
groups = or_label.group_by_similarity(corpus, threshold=85)
|
||||
assert len(groups) == 1
|
||||
g = groups[0]
|
||||
assert g["rep"] == "REGLAT DIRECTIE"
|
||||
assert len(g["members"]) == 2
|
||||
member_names = [m[0] for m in g["members"]]
|
||||
assert "REGLAT DIRECTIA" in member_names
|
||||
assert "REGLARE DIRECTIE" in member_names
|
||||
|
||||
def test_distinct_strings_separate_groups(self):
|
||||
"""Denumiri foarte diferite -> grupuri separate."""
|
||||
corpus = [
|
||||
("REVIZIE", 100),
|
||||
("D/R BARA FATA", 80),
|
||||
("SCHIMB ULEI MOTOR", 60),
|
||||
]
|
||||
groups = or_label.group_by_similarity(corpus, threshold=85)
|
||||
assert len(groups) == 3
|
||||
|
||||
def test_representative_is_highest_frequency(self):
|
||||
"""Reprezentantul = cel cu frecventa maxima (primul in sorted desc)."""
|
||||
corpus = [
|
||||
("INLOCUIT FILTRU AER", 300), # frecventa maxima
|
||||
("INLOCUIRE FILTRU AER", 100), # similar: 92 >= 85
|
||||
]
|
||||
groups = or_label.group_by_similarity(corpus, threshold=85)
|
||||
assert len(groups) == 1
|
||||
assert groups[0]["rep"] == "INLOCUIT FILTRU AER"
|
||||
assert groups[0]["freq"] == 300
|
||||
|
||||
def test_singleton_group(self):
|
||||
"""O denumire fara vecini -> grup cu 0 membri."""
|
||||
corpus = [("REVIZIE", 100)]
|
||||
groups = or_label.group_by_similarity(corpus, threshold=85)
|
||||
assert len(groups) == 1
|
||||
assert groups[0]["rep"] == "REVIZIE"
|
||||
assert groups[0]["members"] == []
|
||||
|
||||
def test_below_threshold_not_grouped(self):
|
||||
"""Similaritate sub threshold -> grupuri separate."""
|
||||
# D/R BARA FATA vs D/R BARA SPATE = 81 < 85
|
||||
corpus = [
|
||||
("D/R BARA FATA", 200),
|
||||
("D/R BARA SPATE", 180),
|
||||
]
|
||||
groups = or_label.group_by_similarity(corpus, threshold=85)
|
||||
assert len(groups) == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Vot ensemble (acord/dezacord) — fara apeluri LLM
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEnsembleVote:
|
||||
"""Verifica logica de vot pe coduri (nu self-confidence)."""
|
||||
|
||||
def test_unanim_cod_rar(self):
|
||||
"""Ambele modele de acord pe cod RAR -> confidence high, sursa unanim."""
|
||||
votes = {
|
||||
"nvidia/nemotron-3-super-120b-a12b:free": "OE-3",
|
||||
"nvidia/nemotron-nano-9b-v2:free": "OE-3",
|
||||
}
|
||||
cod, confidence, sursa = or_label.ensemble_vote(votes)
|
||||
assert cod == "OE-3"
|
||||
assert confidence == "high"
|
||||
assert "unanim" in sursa
|
||||
|
||||
def test_unanim_nul_marcat_separat(self):
|
||||
"""Ambele spun NUL -> NUL confidence high, NUL nu e promovat la cod RAR."""
|
||||
votes = {
|
||||
"nvidia/nemotron-3-super-120b-a12b:free": "NUL",
|
||||
"nvidia/nemotron-nano-9b-v2:free": "NUL",
|
||||
}
|
||||
cod, confidence, sursa = or_label.ensemble_vote(votes)
|
||||
assert cod == "NUL"
|
||||
assert confidence == "high"
|
||||
# NUL nu este in codurile OE-* (nu e promovat)
|
||||
rar_codes = {c.split("=")[0] for c in oc.CODURI.replace(", ", ",").split(",")} - {"NUL"}
|
||||
assert cod not in rar_codes
|
||||
assert "nul" in sursa.lower()
|
||||
|
||||
def test_dezacord_total(self):
|
||||
"""Modele nu se inteleg -> needs_mapping."""
|
||||
votes = {
|
||||
"nvidia/nemotron-3-super-120b-a12b:free": "OE-2",
|
||||
"nvidia/nemotron-nano-9b-v2:free": "OE-4",
|
||||
}
|
||||
cod, confidence, sursa = or_label.ensemble_vote(votes)
|
||||
assert confidence == "needs_mapping"
|
||||
assert "dezacord" in sursa
|
||||
|
||||
def test_parse_fail_partial(self):
|
||||
"""Un model intoarce '?' (parse-fail), altul cod valid -> dezacord (conservator)."""
|
||||
votes = {
|
||||
"nvidia/nemotron-3-super-120b-a12b:free": "OE-1",
|
||||
"nvidia/nemotron-nano-9b-v2:free": "?",
|
||||
}
|
||||
cod, confidence, sursa = or_label.ensemble_vote(votes)
|
||||
# Conservator: fara unanimitate -> needs_mapping
|
||||
assert confidence == "needs_mapping"
|
||||
|
||||
def test_toate_parse_fail(self):
|
||||
"""Ambele modele intorc '?' -> needs_mapping."""
|
||||
votes = {
|
||||
"nvidia/nemotron-3-super-120b-a12b:free": "?",
|
||||
"nvidia/nemotron-nano-9b-v2:free": "?",
|
||||
}
|
||||
cod, confidence, sursa = or_label.ensemble_vote(votes)
|
||||
assert confidence == "needs_mapping"
|
||||
|
||||
def test_cod_invalid_returnat_de_llm(self):
|
||||
"""LLM returneaza cod necunoscut (nu e in VALID) -> needs_mapping."""
|
||||
votes = {
|
||||
"nvidia/nemotron-3-super-120b-a12b:free": "OE-99",
|
||||
"nvidia/nemotron-nano-9b-v2:free": "OE-99",
|
||||
}
|
||||
cod, confidence, sursa = or_label.ensemble_vote(votes)
|
||||
assert confidence == "needs_mapping"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scrub PII — refoloseste or_common.scrub (F3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestScrubPII:
|
||||
"""Scrub-ul PII e integrat in or_common.call() si testat independent."""
|
||||
|
||||
def test_nr_inmatriculare_scrubbed(self):
|
||||
"""Nr de inmatriculare (ex: CT 12 ABC) este scrubuit."""
|
||||
s = "ITP CT 12 ABC"
|
||||
assert "[NR]" in oc.scrub(s)
|
||||
|
||||
def test_vin_scrubbed(self):
|
||||
"""VIN (17 char alfanumeric) este scrubuit."""
|
||||
vin = "WVWZZZ1KZAM000001" # 17 caractere, format VIN
|
||||
s = f"VERIFICAT {vin}"
|
||||
assert "[VIN]" in oc.scrub(s)
|
||||
|
||||
def test_text_normal_nemodificat(self):
|
||||
"""Text fara PII ramane neatins."""
|
||||
s = "REVIZIE PERIODICA MOTOR"
|
||||
assert oc.scrub(s) == s
|
||||
|
||||
def test_scrub_in_batch_call(self, monkeypatch):
|
||||
"""or_common.call() aplica scrub intern inainte de trimitere."""
|
||||
trimis = []
|
||||
|
||||
def mock_urlopen(req, timeout=None):
|
||||
import io
|
||||
body_str = req.data.decode()
|
||||
trimis.append(body_str)
|
||||
# Simuleaza raspuns LLM
|
||||
resp = json.dumps({
|
||||
"choices": [{"message": {"content": json.dumps({"rez": [{"i": 1, "cod": "NUL"}]})}}]
|
||||
}).encode()
|
||||
class FakeResp:
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *a): pass
|
||||
def read(self): return resp
|
||||
def __iter__(self): return iter([resp])
|
||||
import urllib.request
|
||||
r = FakeResp()
|
||||
r.read = lambda: resp
|
||||
# urllib.request.urlopen returneaza context manager
|
||||
class CM:
|
||||
def __enter__(self_): return self_
|
||||
def __exit__(self_, *a): pass
|
||||
def read(self_): return resp
|
||||
import json as _json
|
||||
class FakeFile:
|
||||
def read(self_): return resp
|
||||
# Patch-uim json.load
|
||||
monkeypatch.setattr("json.load", lambda f: _json.loads(resp))
|
||||
return CM()
|
||||
|
||||
batch = ["ITP CT 12 ABC"]
|
||||
# Verificam ca scrub e aplicat in continut trimis
|
||||
# (nu putem usor mock-ui urlopen, asa ca testam scrub() direct)
|
||||
scrubbed = oc.scrub("ITP CT 12 ABC")
|
||||
assert "[NR]" in scrubbed
|
||||
# Deci batch-ul trimis nu va contine nr original
|
||||
assert "CT 12 ABC" not in scrubbed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Resumabilitate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestResumabil:
|
||||
"""Etichetatorul reia de unde a ramas din partial.json."""
|
||||
|
||||
def test_skip_already_labeled(self, monkeypatch):
|
||||
"""Reprezentantii deja in partial NU sunt retrimisi la LLM."""
|
||||
call_reps = []
|
||||
|
||||
def mock_call(model, batch, **kw):
|
||||
call_reps.extend(batch)
|
||||
return ["OE-1"] * len(batch), {"ms": 100, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
groups = [{"rep": "REVIZIE", "freq": 5000, "members": []}]
|
||||
# REVIZIE e deja in partial
|
||||
partial = {
|
||||
"REVIZIE": {
|
||||
"cod": "OE-3",
|
||||
"confidence": "high",
|
||||
"sursa": "ensemble-unanim",
|
||||
"votes": {},
|
||||
}
|
||||
}
|
||||
result = or_label.label_groups(groups, partial, batch_size=20, pace=0)
|
||||
|
||||
# LLM nu trebuia apelat pentru REVIZIE
|
||||
assert "REVIZIE" not in call_reps
|
||||
# Codul din partial e pastrat
|
||||
assert result["REVIZIE"]["cod"] == "OE-3"
|
||||
|
||||
def test_labels_new_reps(self, monkeypatch):
|
||||
"""Reprezentantii noi (nu in partial) sunt etichetati."""
|
||||
call_count = [0]
|
||||
|
||||
def mock_call(model, batch, **kw):
|
||||
call_count[0] += 1
|
||||
return ["OE-1"] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
groups = [{"rep": "D/R BARA FATA", "freq": 3000, "members": []}]
|
||||
partial = {}
|
||||
result = or_label.label_groups(groups, partial, batch_size=20, pace=0)
|
||||
|
||||
# LLM a fost apelat (cel putin o data per model)
|
||||
assert call_count[0] >= len(or_label.MODELS)
|
||||
assert "D/R BARA FATA" in result
|
||||
assert result["D/R BARA FATA"]["cod"] == "OE-1"
|
||||
|
||||
def test_partial_mixt(self, monkeypatch):
|
||||
"""Partial cu unii etichetati, altii noi -> eticheteaza doar cei noi."""
|
||||
labeled_batches = []
|
||||
|
||||
def mock_call(model, batch, **kw):
|
||||
labeled_batches.extend(batch)
|
||||
return ["OE-2"] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
groups = [
|
||||
{"rep": "REVIZIE", "freq": 5000, "members": []}, # deja in partial
|
||||
{"rep": "D/R BARA FATA", "freq": 3000, "members": []}, # nou
|
||||
]
|
||||
partial = {
|
||||
"REVIZIE": {"cod": "OE-3", "confidence": "high",
|
||||
"sursa": "ensemble-unanim", "votes": {}}
|
||||
}
|
||||
result = or_label.label_groups(groups, partial, batch_size=20, pace=0)
|
||||
|
||||
# Doar D/R BARA FATA trebuie trimis la LLM
|
||||
assert "REVIZIE" not in labeled_batches
|
||||
assert "D/R BARA FATA" in labeled_batches
|
||||
# Partial complet: ambele chei prezente
|
||||
assert "REVIZIE" in result
|
||||
assert "D/R BARA FATA" in result
|
||||
# REVIZIE pastrat din partial
|
||||
assert result["REVIZIE"]["cod"] == "OE-3"
|
||||
|
||||
def test_load_partial_fisier_gol(self, tmp_path):
|
||||
"""load_partial pe fisier inexistent intoarce dict gol."""
|
||||
result = or_label.load_partial(str(tmp_path / "inexistent.json"))
|
||||
assert result == {}
|
||||
|
||||
def test_save_si_load_partial(self, tmp_path):
|
||||
"""save_partial + load_partial sunt inversele una alteia."""
|
||||
path = str(tmp_path / "partial.json")
|
||||
data = {
|
||||
"REVIZIE": {"cod": "OE-3", "confidence": "high",
|
||||
"sursa": "ensemble-unanim", "votes": {}}
|
||||
}
|
||||
or_label.save_partial(path, data)
|
||||
loaded = or_label.load_partial(path)
|
||||
assert loaded == data
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Format output si propagare
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestOutputFormat:
|
||||
"""expand_to_all produce outputul cu campurile cerute si propagare corecta."""
|
||||
|
||||
def test_campuri_obligatorii(self, monkeypatch):
|
||||
"""Fiecare intrare are: denumire, cod, sursa, confidence."""
|
||||
def mock_call(model, batch, **kw):
|
||||
return ["OE-3"] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
groups = [{"rep": "REVIZIE", "freq": 5000,
|
||||
"members": [("REVIZIE MICA", 100)]}]
|
||||
partial = {}
|
||||
partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
|
||||
results = or_label.expand_to_all(groups, partial)
|
||||
|
||||
assert len(results) == 2 # reprezentant + 1 membru
|
||||
for row in results:
|
||||
assert "denumire" in row
|
||||
assert "cod" in row
|
||||
assert "sursa" in row
|
||||
assert "confidence" in row
|
||||
assert "grup_rep" in row
|
||||
|
||||
def test_reprezentant_cu_sursa_ensemble(self, monkeypatch):
|
||||
"""Reprezentantul are sursa 'ensemble-*', nu 'propagat'."""
|
||||
def mock_call(model, batch, **kw):
|
||||
return ["OE-3"] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
groups = [{"rep": "REVIZIE", "freq": 5000, "members": []}]
|
||||
partial = {}
|
||||
partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
|
||||
results = or_label.expand_to_all(groups, partial)
|
||||
|
||||
row = results[0]
|
||||
assert row["denumire"] == "REVIZIE"
|
||||
assert row["sursa"].startswith("ensemble-")
|
||||
assert row["sursa"] != "propagat"
|
||||
|
||||
def test_membru_primeste_sursa_propagat(self, monkeypatch):
|
||||
"""Membrii grupului au sursa='propagat' si codul reprezentantului."""
|
||||
def mock_call(model, batch, **kw):
|
||||
return ["OE-3"] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
groups = [{"rep": "REVIZIE", "freq": 5000,
|
||||
"members": [("REVIZIE MICA", 100), ("REVIZIE AUTO", 80)]}]
|
||||
partial = {}
|
||||
partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
|
||||
results = or_label.expand_to_all(groups, partial)
|
||||
|
||||
assert len(results) == 3
|
||||
membri = [r for r in results if r["sursa"] == "propagat"]
|
||||
assert len(membri) == 2
|
||||
for m in membri:
|
||||
assert m["cod"] == "OE-3" # propagat de la reprezentant
|
||||
assert m["grup_rep"] == "REVIZIE"
|
||||
|
||||
def test_nul_propagat_ca_nul_nu_ca_cod_rar(self, monkeypatch):
|
||||
"""NUL este propagat ca NUL la membri, nu convertit la cod RAR."""
|
||||
def mock_call(model, batch, **kw):
|
||||
return ["NUL"] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
groups = [{"rep": "ITP", "freq": 50,
|
||||
"members": [("ITP + RAR", 30)]}]
|
||||
partial = {}
|
||||
partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
|
||||
results = or_label.expand_to_all(groups, partial)
|
||||
|
||||
rar_codes = {c.split("=")[0] for c in oc.CODURI.replace(", ", ",").split(",")} - {"NUL"}
|
||||
for row in results:
|
||||
assert row["cod"] == "NUL"
|
||||
assert row["cod"] not in rar_codes
|
||||
|
||||
def test_dezacord_propagat_ca_needs_mapping(self, monkeypatch):
|
||||
"""Dezacordul ensemble se propaga la membri ca needs_mapping."""
|
||||
call_n = [0]
|
||||
|
||||
def mock_call(model, batch, **kw):
|
||||
call_n[0] += 1
|
||||
# Modelele dau coduri diferite in functie de ordinea apelului
|
||||
cod = "OE-1" if call_n[0] % 2 == 1 else "OE-3"
|
||||
return [cod] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
groups = [{"rep": "REGLAT DIRECTIE", "freq": 200,
|
||||
"members": [("REGLAT DIRECTIA", 150)]}]
|
||||
partial = {}
|
||||
partial = or_label.label_groups(groups, partial, batch_size=20, pace=0)
|
||||
results = or_label.expand_to_all(groups, partial)
|
||||
|
||||
# Ambii (rep + member) trebuie sa aiba needs_mapping
|
||||
for row in results:
|
||||
assert row["confidence"] == "needs_mapping"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integrare end-to-end (fara apeluri reale)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRunIntegrare:
|
||||
"""Verifica run() cu corpus mock si LLM mock."""
|
||||
|
||||
def test_run_produce_fisier_output(self, tmp_path, monkeypatch):
|
||||
"""run() salveaza fisierul de output JSON."""
|
||||
def mock_corpus():
|
||||
return [("REVIZIE", 5000), ("D/R BARA FATA", 3000)]
|
||||
|
||||
def mock_call(model, batch, **kw):
|
||||
return ["OE-3"] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "corpus_by_freq", mock_corpus)
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
out = str(tmp_path / "final.json")
|
||||
partial = str(tmp_path / "partial.json")
|
||||
results = or_label.run(n=2, output_path=out, partial_path=partial,
|
||||
threshold=85, batch_size=20, pace=0)
|
||||
|
||||
assert os.path.exists(out)
|
||||
loaded = json.load(open(out, encoding="utf-8"))
|
||||
assert len(loaded) >= 2
|
||||
# Toate intrarile au campurile cerute
|
||||
for row in loaded:
|
||||
assert "denumire" in row
|
||||
assert "cod" in row
|
||||
|
||||
def test_run_resumabil(self, tmp_path, monkeypatch):
|
||||
"""run() cu partial existent sare intrarile deja etichetate."""
|
||||
call_count = [0]
|
||||
|
||||
def mock_corpus():
|
||||
return [("REVIZIE", 5000), ("D/R BARA FATA", 3000)]
|
||||
|
||||
def mock_call(model, batch, **kw):
|
||||
call_count[0] += 1
|
||||
return ["OE-1"] * len(batch), {"ms": 50, "err": None}
|
||||
|
||||
monkeypatch.setattr(or_label.oc, "corpus_by_freq", mock_corpus)
|
||||
monkeypatch.setattr(or_label.oc, "call", mock_call)
|
||||
|
||||
partial_path = str(tmp_path / "partial.json")
|
||||
# Pre-populam partial cu REVIZIE
|
||||
or_label.save_partial(partial_path, {
|
||||
"REVIZIE": {"cod": "OE-3", "confidence": "high",
|
||||
"sursa": "ensemble-unanim", "votes": {}}
|
||||
})
|
||||
|
||||
out = str(tmp_path / "final.json")
|
||||
results = or_label.run(n=2, output_path=out, partial_path=partial_path,
|
||||
threshold=85, batch_size=20, pace=0)
|
||||
|
||||
# LLM apelat DOAR pentru D/R BARA FATA (nu si REVIZIE)
|
||||
# call_count = 2 (un apel per model, pentru un singur representant)
|
||||
assert call_count[0] == len(or_label.MODELS)
|
||||
Reference in New Issue
Block a user