PRD 5.14 rescris cu pivotul arhitectural: LLM doar etichetator OFFLINE, runtime = clasificator local fara API (fuzzy + embeddings), baza de cunostinte GOLD partajata cross-account (validarea unui service ajuta toate). Decizia 8 (corpus per-cont) SUPERSEDED. Tooling nou OpenRouter (free, familia NVIDIA Nemotron): or_common.py (client + corpus pe frecventa, cheie din .env) + or_modeltest.py (comparatie modele, acord ensemble vs Groq). Masurat: super-120b + nano-9b fiabile, 3/3 unanim pe 87% volum; ultra-550b aruncat. Corpus real (4 CSV service, coloana NR=frecventa) + etichete Groq bootstrap incluse ca date de masurare. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
86 lines
4.8 KiB
Python
86 lines
4.8 KiB
Python
import json, urllib.request, math, time, csv, glob, random, re, unicodedata
|
|
|
|
IP="10.0.20.161"; MODEL="nomic-embed-text"; N=140
|
|
random.seed(42)
|
|
|
|
def embed(t):
|
|
req=urllib.request.Request(f"http://{IP}:11434/api/embeddings",
|
|
data=json.dumps({"model":MODEL,"prompt":t,"keep_alive":"30m"}).encode(),
|
|
headers={"Content-Type":"application/json"})
|
|
with urllib.request.urlopen(req,timeout=120) as r: return json.load(r)["embedding"]
|
|
def cos(a,b): return sum(x*y for x,y in zip(a,b))/(math.sqrt(sum(x*x for x in a))*math.sqrt(sum(y*y for y in b)))
|
|
|
|
ANCORE={
|
|
"OE-1":"REPARATIE: inlocuit sau reparat piesa defecta: placute frana, kit ambreaj, kit distributie, amortizoare, rulment, toba esapament, alternator, pompa, radiator, demontat montat piesa, vopsit usa aripa, reparat caroserie, vulcanizare, sudura",
|
|
"OE-2":"INTRETINERE curenta: aerisit frane, gresat, curatat, verificat si completat nivele lichide, intretinere periodica minora",
|
|
"OE-3":"REVIZIE PERIODICA la kilometri: schimb ulei motor si filtru ulei, filtru aer polen combustibil, revizie 15000 30000 60000 km",
|
|
"OE-4":"REGLARE FUNCTIONALA: reglaj faruri, geometrie directie, supape, calibrare senzori, adaptare actuator, fara inlocuire piese",
|
|
"OE-5":"MODIFICARE CONSTRUCTIVA: montare carlig remorcare, GPL, transformare omologata",
|
|
"OE-6":"RECONSTRUCTIE vehicul avariat sever dupa dauna totala",
|
|
"OE-7":"ACTUALIZARE SOFTWARE: update software calculator, programare ECU, codare module, flash",
|
|
"OE-8":"INLOCUIRE SEZONIERA A ANVELOPELOR: montat anvelope iarna vara, schimb sezonier cauciucuri",
|
|
"OE-D":"AVARIE GRAVA sistem directie in urma unui accident",
|
|
"OE-F":"AVARIE GRAVA sistem franare in urma unui accident",
|
|
"OE-C":"AVARIE GRAVA structura caroserie in urma unui accident",
|
|
"OE-S":"AVARIE GRAVA structura sasiu in urma unui accident",
|
|
"OE-R":"AVARIE GRAVA sistem retinere airbag centuri in urma unui accident",
|
|
"OE-A":"AVARIE GRAVA sistem ADAS asistenta condus in urma unui accident",
|
|
"OE-I":"ISTORIC INDICATIE ODOMETRU vehicule inmatriculate anterior in alte tari",
|
|
"AITLV":"INREGISTRARE ATELIER inspectie tahografe limitatoare viteza",
|
|
"R-ODO":"REPARATIE ODOMETRU kilometraj ceas bord",
|
|
"I-ODO":"INLOCUIRE ODOMETRU kilometraj ceas bord"}
|
|
|
|
# heuristica de referinta (doar pe cazuri clare) pentru un semnal de acord
|
|
def norm(s): return ''.join(c for c in unicodedata.normalize('NFD',s.upper()) if unicodedata.category(c)!='Mn')
|
|
def heur(op):
|
|
s=norm(op)
|
|
if re.search(r'\bITP\b|ACHITAT|CONF\.|FACTUR|MANOPERA|DEPLAS|^[A-Z]{1,2} ?\d{2,3} ?[A-Z]{3}$',s): return "NUL"
|
|
if 'ANVELOP' in s or 'CAUCIUC' in s or 'JANT' in s: return "OE-8"
|
|
if 'SOFTWARE' in s or 'CODARE' in s or 'PROGRAMARE' in s or 'UPDATE' in s: return "OE-7"
|
|
if 'REVIZIE' in s or ('ULEI' in s and 'MOTOR' in s) or 'FILTRU ULEI' in s: return "OE-3"
|
|
if 'REGLAJ' in s or 'REGLARE' in s or 'GEOMETRIE' in s or 'CALIBRARE' in s: return "OE-4"
|
|
if 'AERISIT' in s or 'GRESAT' in s or 'CURATAT' in s: return "OE-2"
|
|
if s.startswith('INLOCUIT') or s.startswith('INLOC') or 'REPARAT' in s or s.startswith('D/R') or 'VOPSIT' in s or 'SCHIMBAT' in s or 'MONTAT' in s:
|
|
return "OE-1"
|
|
return None # necunoscut -> nu evaluam
|
|
|
|
# incarca toate operatiile distincte
|
|
ops=set()
|
|
for f in sorted(glob.glob("/workspace/autopass/docs/operatii-service/*.csv")):
|
|
for r in list(csv.reader(open(f,encoding="utf-8",errors="replace"),delimiter=";"))[1:]:
|
|
if len(r)>1 and r[1].strip(): ops.add(r[1].strip())
|
|
ops=sorted(ops); sample=random.sample(ops,N)
|
|
print(f"{len(ops)} operatii distincte; esantion random {N}\n",flush=True)
|
|
|
|
t0=time.time()
|
|
anc=[(c,embed(d)) for c,d in ANCORE.items()]
|
|
print(f"ancore embed: {time.time()-t0:.0f}s",flush=True)
|
|
|
|
THR=0.55 # prag de incredere
|
|
res=[]
|
|
for op in sample:
|
|
e=embed(op)
|
|
rang=sorted(((cos(e,v),c) for c,v in anc),reverse=True)
|
|
sim,cod=rang[0]; sim2=rang[1][0]
|
|
res.append((op,cod,sim,sim-sim2))
|
|
|
|
# statistici
|
|
from collections import Counter
|
|
dist=Counter(c for _,c,_,_ in res)
|
|
acc=sum(1 for _,_,s,_ in res if s>=THR)
|
|
amb=sum(1 for _,_,_,m in res if m<0.03)
|
|
# acord cu heuristica pe subsetul clar
|
|
ev=[(op,cod) for (op,cod,_,_) in res if heur(op) is not None]
|
|
agree=sum(1 for op,cod in ev if heur(op)==cod)
|
|
print(f"\n--- {N} operatii in {time.time()-t0:.0f}s ({(time.time()-t0)/N:.1f}s/op) ---")
|
|
print("Distributie coduri:", dict(dist.most_common()))
|
|
print(f"Peste prag {THR}: {acc}/{N} ({100*acc//N}%) | Ambigue(marja<0.03): {amb}")
|
|
print(f"Acord cu heuristica pe {len(ev)} cazuri clare: {agree}/{len(ev)} ({100*agree//max(len(ev),1)}%)")
|
|
print("\nDEZACORDURI fata de heuristica (de inspectat):")
|
|
for op,cod in ev:
|
|
if heur(op)!=cod: print(f" {op:<40} embed={cod:<6} heur={heur(op)}")
|
|
print("\nESANTION (primele 45):")
|
|
for op,cod,sim,m in res[:45]:
|
|
flag="LOW" if sim<THR else ("AMB" if m<0.03 else "")
|
|
print(f" {op:<42}{cod:<6}{sim:.3f} {flag}")
|