Files
rar-autopass/tools/mapare-llm/bigtest.py
Claude Agent 9031f81908 feat(mapare-llm): pivot PRD 5.14 + tooling etichetare OpenRouter
PRD 5.14 rescris cu pivotul arhitectural: LLM doar etichetator OFFLINE,
runtime = clasificator local fara API (fuzzy + embeddings), baza de
cunostinte GOLD partajata cross-account (validarea unui service ajuta
toate). Decizia 8 (corpus per-cont) SUPERSEDED.

Tooling nou OpenRouter (free, familia NVIDIA Nemotron): or_common.py
(client + corpus pe frecventa, cheie din .env) + or_modeltest.py
(comparatie modele, acord ensemble vs Groq). Masurat: super-120b +
nano-9b fiabile, 3/3 unanim pe 87% volum; ultra-550b aruncat.

Corpus real (4 CSV service, coloana NR=frecventa) + etichete Groq
bootstrap incluse ca date de masurare.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 14:10:10 +00:00

86 lines
4.8 KiB
Python

import json, urllib.request, math, time, csv, glob, random, re, unicodedata
IP="10.0.20.161"; MODEL="nomic-embed-text"; N=140
random.seed(42)
def embed(t):
req=urllib.request.Request(f"http://{IP}:11434/api/embeddings",
data=json.dumps({"model":MODEL,"prompt":t,"keep_alive":"30m"}).encode(),
headers={"Content-Type":"application/json"})
with urllib.request.urlopen(req,timeout=120) as r: return json.load(r)["embedding"]
def cos(a,b): return sum(x*y for x,y in zip(a,b))/(math.sqrt(sum(x*x for x in a))*math.sqrt(sum(y*y for y in b)))
ANCORE={
"OE-1":"REPARATIE: inlocuit sau reparat piesa defecta: placute frana, kit ambreaj, kit distributie, amortizoare, rulment, toba esapament, alternator, pompa, radiator, demontat montat piesa, vopsit usa aripa, reparat caroserie, vulcanizare, sudura",
"OE-2":"INTRETINERE curenta: aerisit frane, gresat, curatat, verificat si completat nivele lichide, intretinere periodica minora",
"OE-3":"REVIZIE PERIODICA la kilometri: schimb ulei motor si filtru ulei, filtru aer polen combustibil, revizie 15000 30000 60000 km",
"OE-4":"REGLARE FUNCTIONALA: reglaj faruri, geometrie directie, supape, calibrare senzori, adaptare actuator, fara inlocuire piese",
"OE-5":"MODIFICARE CONSTRUCTIVA: montare carlig remorcare, GPL, transformare omologata",
"OE-6":"RECONSTRUCTIE vehicul avariat sever dupa dauna totala",
"OE-7":"ACTUALIZARE SOFTWARE: update software calculator, programare ECU, codare module, flash",
"OE-8":"INLOCUIRE SEZONIERA A ANVELOPELOR: montat anvelope iarna vara, schimb sezonier cauciucuri",
"OE-D":"AVARIE GRAVA sistem directie in urma unui accident",
"OE-F":"AVARIE GRAVA sistem franare in urma unui accident",
"OE-C":"AVARIE GRAVA structura caroserie in urma unui accident",
"OE-S":"AVARIE GRAVA structura sasiu in urma unui accident",
"OE-R":"AVARIE GRAVA sistem retinere airbag centuri in urma unui accident",
"OE-A":"AVARIE GRAVA sistem ADAS asistenta condus in urma unui accident",
"OE-I":"ISTORIC INDICATIE ODOMETRU vehicule inmatriculate anterior in alte tari",
"AITLV":"INREGISTRARE ATELIER inspectie tahografe limitatoare viteza",
"R-ODO":"REPARATIE ODOMETRU kilometraj ceas bord",
"I-ODO":"INLOCUIRE ODOMETRU kilometraj ceas bord"}
# heuristica de referinta (doar pe cazuri clare) pentru un semnal de acord
def norm(s): return ''.join(c for c in unicodedata.normalize('NFD',s.upper()) if unicodedata.category(c)!='Mn')
def heur(op):
s=norm(op)
if re.search(r'\bITP\b|ACHITAT|CONF\.|FACTUR|MANOPERA|DEPLAS|^[A-Z]{1,2} ?\d{2,3} ?[A-Z]{3}$',s): return "NUL"
if 'ANVELOP' in s or 'CAUCIUC' in s or 'JANT' in s: return "OE-8"
if 'SOFTWARE' in s or 'CODARE' in s or 'PROGRAMARE' in s or 'UPDATE' in s: return "OE-7"
if 'REVIZIE' in s or ('ULEI' in s and 'MOTOR' in s) or 'FILTRU ULEI' in s: return "OE-3"
if 'REGLAJ' in s or 'REGLARE' in s or 'GEOMETRIE' in s or 'CALIBRARE' in s: return "OE-4"
if 'AERISIT' in s or 'GRESAT' in s or 'CURATAT' in s: return "OE-2"
if s.startswith('INLOCUIT') or s.startswith('INLOC') or 'REPARAT' in s or s.startswith('D/R') or 'VOPSIT' in s or 'SCHIMBAT' in s or 'MONTAT' in s:
return "OE-1"
return None # necunoscut -> nu evaluam
# incarca toate operatiile distincte
ops=set()
for f in sorted(glob.glob("/workspace/autopass/docs/operatii-service/*.csv")):
for r in list(csv.reader(open(f,encoding="utf-8",errors="replace"),delimiter=";"))[1:]:
if len(r)>1 and r[1].strip(): ops.add(r[1].strip())
ops=sorted(ops); sample=random.sample(ops,N)
print(f"{len(ops)} operatii distincte; esantion random {N}\n",flush=True)
t0=time.time()
anc=[(c,embed(d)) for c,d in ANCORE.items()]
print(f"ancore embed: {time.time()-t0:.0f}s",flush=True)
THR=0.55 # prag de incredere
res=[]
for op in sample:
e=embed(op)
rang=sorted(((cos(e,v),c) for c,v in anc),reverse=True)
sim,cod=rang[0]; sim2=rang[1][0]
res.append((op,cod,sim,sim-sim2))
# statistici
from collections import Counter
dist=Counter(c for _,c,_,_ in res)
acc=sum(1 for _,_,s,_ in res if s>=THR)
amb=sum(1 for _,_,_,m in res if m<0.03)
# acord cu heuristica pe subsetul clar
ev=[(op,cod) for (op,cod,_,_) in res if heur(op) is not None]
agree=sum(1 for op,cod in ev if heur(op)==cod)
print(f"\n--- {N} operatii in {time.time()-t0:.0f}s ({(time.time()-t0)/N:.1f}s/op) ---")
print("Distributie coduri:", dict(dist.most_common()))
print(f"Peste prag {THR}: {acc}/{N} ({100*acc//N}%) | Ambigue(marja<0.03): {amb}")
print(f"Acord cu heuristica pe {len(ev)} cazuri clare: {agree}/{len(ev)} ({100*agree//max(len(ev),1)}%)")
print("\nDEZACORDURI fata de heuristica (de inspectat):")
for op,cod in ev:
if heur(op)!=cod: print(f" {op:<40} embed={cod:<6} heur={heur(op)}")
print("\nESANTION (primele 45):")
for op,cod,sim,m in res[:45]:
flag="LOW" if sim<THR else ("AMB" if m<0.03 else "")
print(f" {op:<42}{cod:<6}{sim:.3f} {flag}")