Files
rar-autopass/tools/mapare-llm/eval_det.py
Claude Agent 9031f81908 feat(mapare-llm): pivot PRD 5.14 + tooling etichetare OpenRouter
PRD 5.14 rescris cu pivotul arhitectural: LLM doar etichetator OFFLINE,
runtime = clasificator local fara API (fuzzy + embeddings), baza de
cunostinte GOLD partajata cross-account (validarea unui service ajuta
toate). Decizia 8 (corpus per-cont) SUPERSEDED.

Tooling nou OpenRouter (free, familia NVIDIA Nemotron): or_common.py
(client + corpus pe frecventa, cheie din .env) + or_modeltest.py
(comparatie modele, acord ensemble vs Groq). Masurat: super-120b +
nano-9b fiabile, 3/3 unanim pe 87% volum; ultra-550b aruncat.

Corpus real (4 CSV service, coloana NR=frecventa) + etichete Groq
bootstrap incluse ca date de masurare.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 14:10:10 +00:00

82 lines
3.0 KiB
Python

"""Evalueaza un clasificator DETERMINIST (fara AI la runtime) construit din
etichetele Groq. Split 90/10: 'antrenam' pe 90% (lookup exact + fuzzy NN +
Naive Bayes pe tokeni), testam pe 10% nevazute. Masuram acoperire + acuratete
per strat si global, fata de etichetele Groq (referinta)."""
import json, re, unicodedata, random, math, time, os
from collections import defaultdict, Counter
from rapidfuzz import process, fuzz
OUT="/tmp/claude-1000/-workspace-autopass/4177677c-7995-4fab-bbd5-16735cb335e3/scratchpad/labels.json"
random.seed(7)
def norm(s):
s=''.join(c for c in unicodedata.normalize('NFD',s.upper()) if unicodedata.category(c)!='Mn')
s=re.sub(r'[^A-Z0-9/ ]',' ',s); s=re.sub(r'\s+',' ',s).strip()
return s
def toks(s): return [t for t in norm(s).split() if len(t)>1]
labels=json.load(open(OUT))
items=list(labels.items())
random.shuffle(items)
cut=int(len(items)*0.9)
train=items[:cut]; test=items[cut:]
print(f"{len(items)} etichetate | train {len(train)} | test {len(test)}")
# --- strat 1: lookup exact normalizat ---
exact={}
for op,c in train: exact[norm(op)]=c # ultima castiga (rar conflicte)
# --- strat 2: fuzzy NN (rapidfuzz) ---
train_norm=[norm(op) for op,_ in train]
train_code=[c for _,c in train]
norm2code={}
for n,c in zip(train_norm,train_code): norm2code.setdefault(n,c)
choices=list(norm2code.keys())
FUZZ_THR=88
# --- strat 3: Naive Bayes pe tokeni (invatat din etichete) ---
classes=Counter(c for _,c in train)
prior={c:math.log(n/len(train)) for c,n in classes.items()}
tok_cnt=defaultdict(lambda: defaultdict(int)); tok_tot=defaultdict(int)
vocab=set()
for op,c in train:
for t in toks(op): tok_cnt[c][t]+=1; tok_tot[c]+=1; vocab.add(t)
V=len(vocab)
def nb(op):
best=None; bests=-1e18
for c in classes:
s=prior[c]
for t in toks(op):
s+=math.log((tok_cnt[c][t]+1)/(tok_tot[c]+V))
if s>bests: bests=s; best=c
return best
MAJ=classes.most_common(1)[0][0]
def predict(op):
n=norm(op)
if n in exact: return exact[n],"exact"
m=process.extractOne(n,choices,scorer=fuzz.WRatio)
if m and m[1]>=FUZZ_THR: return norm2code[m[0]],"fuzzy"
if toks(op): return nb(op),"nb"
return MAJ,"default"
t0=time.time()
layer=Counter(); ok=Counter(); tot=Counter()
mis=[]
for op,truth in test:
pred,lyr=predict(op)
layer[lyr]+=1; tot[lyr]+=1
if pred==truth: ok[lyr]+=1
elif len(mis)<25: mis.append((op,pred,truth,lyr))
dt=time.time()-t0
TOTAL=len(test); OKALL=sum(ok.values())
print(f"\nPredictie {TOTAL} cazuri in {dt:.2f}s ({1000*dt/TOTAL:.2f} ms/op) - FARA AI")
print(f"\nACURATETE GLOBALA (vs Groq): {OKALL}/{TOTAL} = {100*OKALL/TOTAL:.1f}%")
print(f"\n{'strat':<8}{'aparitii':>9}{'corecte':>9}{'acuratete':>11}{'acoperire':>11}")
for lyr in ["exact","fuzzy","nb","default"]:
if tot[lyr]:
print(f"{lyr:<8}{tot[lyr]:>9}{ok[lyr]:>9}{100*ok[lyr]/tot[lyr]:>10.1f}%{100*tot[lyr]/TOTAL:>10.1f}%")
print("\nExemple gresite (pred != Groq):")
for op,p,t,l in mis[:20]:
print(f" [{l}] {op:<42} pred={p:<6} groq={t}")