"""Etichetator offline operatii service -> coduri RAR (US-002, PRD 5.18). Backend implicit = **LM Studio local** (Qwen3-4B, GPU RX 6600M via Tailscale), backend-ul APROBAT pentru bootstrap-ul v1 (decizia D4). Groq / OpenRouter raman fallback-uri interschimbabile, dar NU sunt calea aprobata pentru v1. Particularitati care justifica un tool NOU (nu reuse de `or_common.call`): - LM Studio RESPINGE `response_format: json_object` (eroare 400). Cere envelope `json_schema` STRICT complet: {"type":"json_schema","json_schema":{...,"strict":true}}. - `cod` e ENUM peste cele 19 etichete (18 coduri RAR + NUL) -> modelul nu poate inventa coduri; orice abatere e prinsa de garda de truncare ('?'). - Qwen3 emite `...` daca nu dezactivam thinking-ul -> umfla tokeni/latenta sub structured output strict. Punem `/no_think` in promptul de sistem. Setari conservatoare OBLIGATORII pe GPU-box (a facut shutdown sub sarcina 2026-06-29, probabil termic/alimentare): in LM Studio incarca modelul cu `n_parallel=1`, `n_ctx=4096`, batch 32-40, monitorizeaza temperatura. NU mari batch/context fara headroom termic. Vezi memorie `lmstudio-gpu-etichetare`. Reutilizeaza din `or_common`: scrub-ul PII (F3) si lista de coduri. """ from __future__ import annotations import json import os import sys import time import urllib.error import urllib.request from dataclasses import dataclass # --- Coduri + scrub PII: sursa de adevar = or_common (acelasi nomenclator de etichete) --- import importlib.util as _ilu _OR_PATH = os.path.join(os.path.dirname(__file__), "or_common.py") _spec = _ilu.spec_from_file_location("or_common", _OR_PATH) or_common = _ilu.module_from_spec(_spec) sys.modules.setdefault("or_common", or_common) _spec.loader.exec_module(or_common) scrub = or_common.scrub # VIN/placuta -> [VIN]/[NR] # Cele 19 etichete (18 coduri RAR + NUL), extrase din CODURI (sursa unica or_common). ALL_LABELS: list[str] = [c.split("=")[0].strip() for c in or_common.CODURI.replace(", ", ",").split(",")] assert "NUL" in ALL_LABELS and len(ALL_LABELS) == 19, ALL_LABELS _VALID = set(ALL_LABELS) # --------------------------------------------------------------------------- # # Prompt procedural in 3 pasi (versionat) # # --------------------------------------------------------------------------- # PROMPT_VERSION = "3pasi-v1" _CODURI_LISTA = or_common.CODURI SYS = ( "Esti expert RAR AUTOPASS. Clasifici fiecare operatie de service-auto in EXACT unul " "din aceste coduri:\n" + _CODURI_LISTA + "\n\n" "Urmeaza PROCEDURA in 3 pasi, in ordine:\n" "PAS 1 (non-operatie -> NUL): daca textul NU e o operatie tehnica de service " "(ITP, plata/achitat, discount/reducere, taxa, nr inmatriculare/placuta, manopera " "generica, sau DOAR un nume de piesa fara actiune) -> cod = NUL. Opreste-te.\n" "PAS 2 (avarie din ACCIDENT -> avarie grava): foloseste codurile de avarie grava DOAR " "pentru daune in urma unui accident, pe sistemul avariat:\n" " caroserie/structura rezistenta -> OE-C; sasiu -> OE-S; directie -> OE-D; " "franare -> OE-F; sistem de retinere/airbag -> OE-R; ADAS (asistenta condus) -> OE-A.\n" " Reparatiile curente, de uzura (NU dintr-un accident) NU sunt avarii grave -> mergi la PAS 3.\n" "PAS 3 (operatie obisnuita): \n" " inlocuire / D-R / reparare / vopsire / retus piese -> OE-1 (REPARATIE);\n" " schimb ulei motor + filtre -> OE-3 (REVIZIE PERIODICA);\n" " aerisit / gresat / completat nivele -> OE-2 (INTRETINERE);\n" " reglare functionala (geometrie directie, faruri, ralanti) -> OE-4;\n" " actualizare/programare software -> OE-7; schimb sezonier anvelope -> OE-8;\n" " istoric/reparatie/inlocuire odometru -> OE-I / R-ODO / I-ODO; tahograf -> AITLV.\n\n" "Raspunde DOAR cu JSON conform schemei. /no_think" ) def construieste_mesaje(batch: list[str]) -> list[dict]: """Mesajele chat (system procedural + user enumerat). Scrub PII pe fiecare item.""" user = "\n".join(f"{i + 1}. {scrub(o)}" for i, o in enumerate(batch)) return [ {"role": "system", "content": SYS}, {"role": "user", "content": user}, ] # --------------------------------------------------------------------------- # # Schema json_schema strict (envelope complet — LM Studio respinge json_object) # # --------------------------------------------------------------------------- # def _response_format() -> dict: return { "type": "json_schema", "json_schema": { "name": "etichete_operatii", "strict": True, "schema": { "type": "object", "properties": { "rez": { "type": "array", "items": { "type": "object", "properties": { "i": {"type": "integer"}, "cod": {"type": "string", "enum": ALL_LABELS}, }, "required": ["i", "cod"], "additionalProperties": False, }, } }, "required": ["rez"], "additionalProperties": False, }, }, } # --------------------------------------------------------------------------- # # Backend-uri (LM Studio default; Groq/OpenRouter fallback) # # --------------------------------------------------------------------------- # @dataclass class Backend: name: str url: str model: str api_key: str | None = None # Endpoint LM Studio implicit = GPU-box pe Tailscale (memorie lmstudio-gpu-etichetare). _DEFAULT_LMSTUDIO_URL = "http://100.64.151.22:1234/v1/chat/completions" _BACKENDS = { "lmstudio": {"url": _DEFAULT_LMSTUDIO_URL, "model": "qwen/qwen3-4b", "key_env": None}, "groq": {"url": "https://api.groq.com/openai/v1/chat/completions", "model": "llama-3.3-70b-versatile", "key_env": "GROQ_KEY"}, "openrouter": {"url": "https://openrouter.ai/api/v1/chat/completions", "model": "qwen/qwen3-4b:free", "key_env": "OPENROUTER_KEY"}, } def get_backend(name: str | None = None) -> Backend: """Construieste backend-ul din env. Default = lmstudio (D4). Override-uri: ETICHETARE_BACKEND, ETICHETARE_ENDPOINT, ETICHETARE_MODEL. Cheia API (Groq/OpenRouter) se citeste din env-ul indicat de backend; LM Studio local nu cere cheie. """ name = (name or os.environ.get("ETICHETARE_BACKEND") or "lmstudio").strip().lower() if name not in _BACKENDS: raise ValueError(f"backend necunoscut: {name} (alege din {list(_BACKENDS)})") cfg = _BACKENDS[name] url = os.environ.get("ETICHETARE_ENDPOINT") or cfg["url"] model = os.environ.get("ETICHETARE_MODEL") or cfg["model"] api_key = os.environ.get(cfg["key_env"]) if cfg["key_env"] else None return Backend(name=name, url=url, model=model, api_key=api_key) def construieste_body(batch: list[str], backend: Backend) -> dict: """Corpul request-ului OpenAI-compatibil cu envelope json_schema strict.""" return { "model": backend.model, "messages": construieste_mesaje(batch), "temperature": 0, "response_format": _response_format(), } # --------------------------------------------------------------------------- # # Parsare + garda de truncare # # --------------------------------------------------------------------------- # def parseaza_raspuns(content: dict, n: int) -> list[str]: """Mapeaza raspunsul {"rez":[{i,cod}]} la o lista paralela cu batch-ul (len n). Garda de truncare/validare (F8): pozitiile lipsa SAU codurile in afara enum-ului devin '?', NU sunt ascunse tacit. Apelantul logheaza cate '?' au ramas. """ by_i: dict[int, str] = {} for x in content.get("rez") or []: try: idx = int(x["i"]) except (KeyError, TypeError, ValueError): continue cod = str(x.get("cod") or "").strip().upper() by_i[idx] = cod if cod in _VALID else "?" return [by_i.get(i + 1, "?") for i in range(n)] # --------------------------------------------------------------------------- # # Transport (injectabil in teste) # # --------------------------------------------------------------------------- # def _urllib_transport(url: str, headers: dict, payload: dict, timeout: int) -> dict: data = json.dumps(payload).encode() req = urllib.request.Request(url, data=data, headers=headers) with urllib.request.urlopen(req, timeout=timeout) as r: return json.load(r) def call( batch: list[str], backend: Backend, *, timeout: int = 180, max_attempts: int = 5, transport=None, ) -> tuple[list[str], dict]: """Un apel pe un batch. Intoarce (codes, meta). codes: lista paralela cu batch; '?' pe pozitiile fara raspuns valid (garda F8). meta: {ms, err, missing} — `missing` = cate '?' au ramas (truncare/cod invalid). transport: callable(url, headers, payload, timeout) -> dict raspuns OpenAI (injectabil in teste; default urllib). """ transport = transport or _urllib_transport body = construieste_body(batch, backend) headers = {"Content-Type": "application/json", "User-Agent": "Mozilla/5.0"} if backend.api_key: headers["Authorization"] = f"Bearer {backend.api_key}" t0 = time.time() for attempt in range(max_attempts): try: resp = transport(backend.url, headers, body, timeout) content = json.loads(resp["choices"][0]["message"]["content"]) codes = parseaza_raspuns(content, len(batch)) missing = codes.count("?") return codes, {"ms": int((time.time() - t0) * 1000), "err": None, "missing": missing} except urllib.error.HTTPError as e: if e.code in (429, 500, 502, 503): wait = float(e.headers.get("retry-after", 0)) or min(2 ** attempt, 30) time.sleep(wait) continue return ["?"] * len(batch), {"ms": int((time.time() - t0) * 1000), "err": f"HTTP {e.code}", "missing": len(batch)} except Exception as e: # noqa: BLE001 — degradare gratioasa, batch-ul devine '?' if attempt < max_attempts - 1: time.sleep(min(2 ** attempt, 20)) continue return ["?"] * len(batch), {"ms": int((time.time() - t0) * 1000), "err": type(e).__name__, "missing": len(batch)} return ["?"] * len(batch), {"ms": int((time.time() - t0) * 1000), "err": "max_attempts", "missing": len(batch)} if __name__ == "__main__": # Sanity-check manual: 1 batch mic pe backend-ul configurat (default lmstudio). import sys probe = sys.argv[1:] or ["13 X ITP", "INLOCUIT PLACUTE FRANA FATA", "SCHIMB ULEI MOTOR SI FILTRE"] b = get_backend() print(f"backend={b.name} url={b.url} model={b.model}") codes, meta = call(probe, b) for op, c in zip(probe, codes): print(f" {c:6} {op}") print("meta:", meta)