feat(mapare-llm): pivot PRD 5.14 + tooling etichetare OpenRouter
PRD 5.14 rescris cu pivotul arhitectural: LLM doar etichetator OFFLINE, runtime = clasificator local fara API (fuzzy + embeddings), baza de cunostinte GOLD partajata cross-account (validarea unui service ajuta toate). Decizia 8 (corpus per-cont) SUPERSEDED. Tooling nou OpenRouter (free, familia NVIDIA Nemotron): or_common.py (client + corpus pe frecventa, cheie din .env) + or_modeltest.py (comparatie modele, acord ensemble vs Groq). Masurat: super-120b + nano-9b fiabile, 3/3 unanim pe 87% volum; ultra-550b aruncat. Corpus real (4 CSV service, coloana NR=frecventa) + etichete Groq bootstrap incluse ca date de masurare. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
85
tools/mapare-llm/bigtest.py
Normal file
85
tools/mapare-llm/bigtest.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import json, urllib.request, math, time, csv, glob, random, re, unicodedata
|
||||
|
||||
IP="10.0.20.161"; MODEL="nomic-embed-text"; N=140
|
||||
random.seed(42)
|
||||
|
||||
def embed(t):
|
||||
req=urllib.request.Request(f"http://{IP}:11434/api/embeddings",
|
||||
data=json.dumps({"model":MODEL,"prompt":t,"keep_alive":"30m"}).encode(),
|
||||
headers={"Content-Type":"application/json"})
|
||||
with urllib.request.urlopen(req,timeout=120) as r: return json.load(r)["embedding"]
|
||||
def cos(a,b): return sum(x*y for x,y in zip(a,b))/(math.sqrt(sum(x*x for x in a))*math.sqrt(sum(y*y for y in b)))
|
||||
|
||||
ANCORE={
|
||||
"OE-1":"REPARATIE: inlocuit sau reparat piesa defecta: placute frana, kit ambreaj, kit distributie, amortizoare, rulment, toba esapament, alternator, pompa, radiator, demontat montat piesa, vopsit usa aripa, reparat caroserie, vulcanizare, sudura",
|
||||
"OE-2":"INTRETINERE curenta: aerisit frane, gresat, curatat, verificat si completat nivele lichide, intretinere periodica minora",
|
||||
"OE-3":"REVIZIE PERIODICA la kilometri: schimb ulei motor si filtru ulei, filtru aer polen combustibil, revizie 15000 30000 60000 km",
|
||||
"OE-4":"REGLARE FUNCTIONALA: reglaj faruri, geometrie directie, supape, calibrare senzori, adaptare actuator, fara inlocuire piese",
|
||||
"OE-5":"MODIFICARE CONSTRUCTIVA: montare carlig remorcare, GPL, transformare omologata",
|
||||
"OE-6":"RECONSTRUCTIE vehicul avariat sever dupa dauna totala",
|
||||
"OE-7":"ACTUALIZARE SOFTWARE: update software calculator, programare ECU, codare module, flash",
|
||||
"OE-8":"INLOCUIRE SEZONIERA A ANVELOPELOR: montat anvelope iarna vara, schimb sezonier cauciucuri",
|
||||
"OE-D":"AVARIE GRAVA sistem directie in urma unui accident",
|
||||
"OE-F":"AVARIE GRAVA sistem franare in urma unui accident",
|
||||
"OE-C":"AVARIE GRAVA structura caroserie in urma unui accident",
|
||||
"OE-S":"AVARIE GRAVA structura sasiu in urma unui accident",
|
||||
"OE-R":"AVARIE GRAVA sistem retinere airbag centuri in urma unui accident",
|
||||
"OE-A":"AVARIE GRAVA sistem ADAS asistenta condus in urma unui accident",
|
||||
"OE-I":"ISTORIC INDICATIE ODOMETRU vehicule inmatriculate anterior in alte tari",
|
||||
"AITLV":"INREGISTRARE ATELIER inspectie tahografe limitatoare viteza",
|
||||
"R-ODO":"REPARATIE ODOMETRU kilometraj ceas bord",
|
||||
"I-ODO":"INLOCUIRE ODOMETRU kilometraj ceas bord"}
|
||||
|
||||
# heuristica de referinta (doar pe cazuri clare) pentru un semnal de acord
|
||||
def norm(s): return ''.join(c for c in unicodedata.normalize('NFD',s.upper()) if unicodedata.category(c)!='Mn')
|
||||
def heur(op):
|
||||
s=norm(op)
|
||||
if re.search(r'\bITP\b|ACHITAT|CONF\.|FACTUR|MANOPERA|DEPLAS|^[A-Z]{1,2} ?\d{2,3} ?[A-Z]{3}$',s): return "NUL"
|
||||
if 'ANVELOP' in s or 'CAUCIUC' in s or 'JANT' in s: return "OE-8"
|
||||
if 'SOFTWARE' in s or 'CODARE' in s or 'PROGRAMARE' in s or 'UPDATE' in s: return "OE-7"
|
||||
if 'REVIZIE' in s or ('ULEI' in s and 'MOTOR' in s) or 'FILTRU ULEI' in s: return "OE-3"
|
||||
if 'REGLAJ' in s or 'REGLARE' in s or 'GEOMETRIE' in s or 'CALIBRARE' in s: return "OE-4"
|
||||
if 'AERISIT' in s or 'GRESAT' in s or 'CURATAT' in s: return "OE-2"
|
||||
if s.startswith('INLOCUIT') or s.startswith('INLOC') or 'REPARAT' in s or s.startswith('D/R') or 'VOPSIT' in s or 'SCHIMBAT' in s or 'MONTAT' in s:
|
||||
return "OE-1"
|
||||
return None # necunoscut -> nu evaluam
|
||||
|
||||
# incarca toate operatiile distincte
|
||||
ops=set()
|
||||
for f in sorted(glob.glob("/workspace/autopass/docs/operatii-service/*.csv")):
|
||||
for r in list(csv.reader(open(f,encoding="utf-8",errors="replace"),delimiter=";"))[1:]:
|
||||
if len(r)>1 and r[1].strip(): ops.add(r[1].strip())
|
||||
ops=sorted(ops); sample=random.sample(ops,N)
|
||||
print(f"{len(ops)} operatii distincte; esantion random {N}\n",flush=True)
|
||||
|
||||
t0=time.time()
|
||||
anc=[(c,embed(d)) for c,d in ANCORE.items()]
|
||||
print(f"ancore embed: {time.time()-t0:.0f}s",flush=True)
|
||||
|
||||
THR=0.55 # prag de incredere
|
||||
res=[]
|
||||
for op in sample:
|
||||
e=embed(op)
|
||||
rang=sorted(((cos(e,v),c) for c,v in anc),reverse=True)
|
||||
sim,cod=rang[0]; sim2=rang[1][0]
|
||||
res.append((op,cod,sim,sim-sim2))
|
||||
|
||||
# statistici
|
||||
from collections import Counter
|
||||
dist=Counter(c for _,c,_,_ in res)
|
||||
acc=sum(1 for _,_,s,_ in res if s>=THR)
|
||||
amb=sum(1 for _,_,_,m in res if m<0.03)
|
||||
# acord cu heuristica pe subsetul clar
|
||||
ev=[(op,cod) for (op,cod,_,_) in res if heur(op) is not None]
|
||||
agree=sum(1 for op,cod in ev if heur(op)==cod)
|
||||
print(f"\n--- {N} operatii in {time.time()-t0:.0f}s ({(time.time()-t0)/N:.1f}s/op) ---")
|
||||
print("Distributie coduri:", dict(dist.most_common()))
|
||||
print(f"Peste prag {THR}: {acc}/{N} ({100*acc//N}%) | Ambigue(marja<0.03): {amb}")
|
||||
print(f"Acord cu heuristica pe {len(ev)} cazuri clare: {agree}/{len(ev)} ({100*agree//max(len(ev),1)}%)")
|
||||
print("\nDEZACORDURI fata de heuristica (de inspectat):")
|
||||
for op,cod in ev:
|
||||
if heur(op)!=cod: print(f" {op:<40} embed={cod:<6} heur={heur(op)}")
|
||||
print("\nESANTION (primele 45):")
|
||||
for op,cod,sim,m in res[:45]:
|
||||
flag="LOW" if sim<THR else ("AMB" if m<0.03 else "")
|
||||
print(f" {op:<42}{cod:<6}{sim:.3f} {flag}")
|
||||
Reference in New Issue
Block a user