import json, urllib.request, math, time, csv, glob, random, re, unicodedata IP="10.0.20.161"; MODEL="nomic-embed-text"; N=140 random.seed(42) def embed(t): req=urllib.request.Request(f"http://{IP}:11434/api/embeddings", data=json.dumps({"model":MODEL,"prompt":t,"keep_alive":"30m"}).encode(), headers={"Content-Type":"application/json"}) with urllib.request.urlopen(req,timeout=120) as r: return json.load(r)["embedding"] def cos(a,b): return sum(x*y for x,y in zip(a,b))/(math.sqrt(sum(x*x for x in a))*math.sqrt(sum(y*y for y in b))) ANCORE={ "OE-1":"REPARATIE: inlocuit sau reparat piesa defecta: placute frana, kit ambreaj, kit distributie, amortizoare, rulment, toba esapament, alternator, pompa, radiator, demontat montat piesa, vopsit usa aripa, reparat caroserie, vulcanizare, sudura", "OE-2":"INTRETINERE curenta: aerisit frane, gresat, curatat, verificat si completat nivele lichide, intretinere periodica minora", "OE-3":"REVIZIE PERIODICA la kilometri: schimb ulei motor si filtru ulei, filtru aer polen combustibil, revizie 15000 30000 60000 km", "OE-4":"REGLARE FUNCTIONALA: reglaj faruri, geometrie directie, supape, calibrare senzori, adaptare actuator, fara inlocuire piese", "OE-5":"MODIFICARE CONSTRUCTIVA: montare carlig remorcare, GPL, transformare omologata", "OE-6":"RECONSTRUCTIE vehicul avariat sever dupa dauna totala", "OE-7":"ACTUALIZARE SOFTWARE: update software calculator, programare ECU, codare module, flash", "OE-8":"INLOCUIRE SEZONIERA A ANVELOPELOR: montat anvelope iarna vara, schimb sezonier cauciucuri", "OE-D":"AVARIE GRAVA sistem directie in urma unui accident", "OE-F":"AVARIE GRAVA sistem franare in urma unui accident", "OE-C":"AVARIE GRAVA structura caroserie in urma unui accident", "OE-S":"AVARIE GRAVA structura sasiu in urma unui accident", "OE-R":"AVARIE GRAVA sistem retinere airbag centuri in urma unui accident", "OE-A":"AVARIE GRAVA sistem ADAS asistenta condus in urma unui accident", "OE-I":"ISTORIC INDICATIE ODOMETRU vehicule inmatriculate anterior in alte tari", "AITLV":"INREGISTRARE ATELIER inspectie tahografe limitatoare viteza", "R-ODO":"REPARATIE ODOMETRU kilometraj ceas bord", "I-ODO":"INLOCUIRE ODOMETRU kilometraj ceas bord"} # heuristica de referinta (doar pe cazuri clare) pentru un semnal de acord def norm(s): return ''.join(c for c in unicodedata.normalize('NFD',s.upper()) if unicodedata.category(c)!='Mn') def heur(op): s=norm(op) if re.search(r'\bITP\b|ACHITAT|CONF\.|FACTUR|MANOPERA|DEPLAS|^[A-Z]{1,2} ?\d{2,3} ?[A-Z]{3}$',s): return "NUL" if 'ANVELOP' in s or 'CAUCIUC' in s or 'JANT' in s: return "OE-8" if 'SOFTWARE' in s or 'CODARE' in s or 'PROGRAMARE' in s or 'UPDATE' in s: return "OE-7" if 'REVIZIE' in s or ('ULEI' in s and 'MOTOR' in s) or 'FILTRU ULEI' in s: return "OE-3" if 'REGLAJ' in s or 'REGLARE' in s or 'GEOMETRIE' in s or 'CALIBRARE' in s: return "OE-4" if 'AERISIT' in s or 'GRESAT' in s or 'CURATAT' in s: return "OE-2" if s.startswith('INLOCUIT') or s.startswith('INLOC') or 'REPARAT' in s or s.startswith('D/R') or 'VOPSIT' in s or 'SCHIMBAT' in s or 'MONTAT' in s: return "OE-1" return None # necunoscut -> nu evaluam # incarca toate operatiile distincte ops=set() for f in sorted(glob.glob("/workspace/autopass/docs/operatii-service/*.csv")): for r in list(csv.reader(open(f,encoding="utf-8",errors="replace"),delimiter=";"))[1:]: if len(r)>1 and r[1].strip(): ops.add(r[1].strip()) ops=sorted(ops); sample=random.sample(ops,N) print(f"{len(ops)} operatii distincte; esantion random {N}\n",flush=True) t0=time.time() anc=[(c,embed(d)) for c,d in ANCORE.items()] print(f"ancore embed: {time.time()-t0:.0f}s",flush=True) THR=0.55 # prag de incredere res=[] for op in sample: e=embed(op) rang=sorted(((cos(e,v),c) for c,v in anc),reverse=True) sim,cod=rang[0]; sim2=rang[1][0] res.append((op,cod,sim,sim-sim2)) # statistici from collections import Counter dist=Counter(c for _,c,_,_ in res) acc=sum(1 for _,_,s,_ in res if s>=THR) amb=sum(1 for _,_,_,m in res if m<0.03) # acord cu heuristica pe subsetul clar ev=[(op,cod) for (op,cod,_,_) in res if heur(op) is not None] agree=sum(1 for op,cod in ev if heur(op)==cod) print(f"\n--- {N} operatii in {time.time()-t0:.0f}s ({(time.time()-t0)/N:.1f}s/op) ---") print("Distributie coduri:", dict(dist.most_common())) print(f"Peste prag {THR}: {acc}/{N} ({100*acc//N}%) | Ambigue(marja<0.03): {amb}") print(f"Acord cu heuristica pe {len(ev)} cazuri clare: {agree}/{len(ev)} ({100*agree//max(len(ev),1)}%)") print("\nDEZACORDURI fata de heuristica (de inspectat):") for op,cod in ev: if heur(op)!=cod: print(f" {op:<40} embed={cod:<6} heur={heur(op)}") print("\nESANTION (primele 45):") for op,cod,sim,m in res[:45]: flag="LOW" if sim