""" Match GoMag SKUs → ROA id_articol by matching order lines on unit price. For each matched order-invoice pair, compare lines by price to discover mappings. Output: SQL for nom_articole codmat updates + CSV for ARTICOLE_TERTI mappings. """ import oracledb import os import sys import sqlite3 import csv from collections import defaultdict from difflib import SequenceMatcher sys.stdout.reconfigure(encoding='utf-8', errors='replace') os.environ['PATH'] = r'C:\app\Server\product\18.0.0\dbhomeXE\bin' + ';' + os.environ.get('PATH','') oracledb.init_oracle_client() # --- Load GoMag orders --- db = sqlite3.connect(r'C:\gomag-vending\api\data\import.db') db.row_factory = sqlite3.Row c = db.cursor() c.execute("SELECT order_number, order_date, customer_name, status, order_total FROM orders ORDER BY order_date DESC") orders = [dict(r) for r in c.fetchall()] for order in orders: c.execute("SELECT sku, product_name, quantity, price, vat FROM order_items WHERE order_number = ? ORDER BY sku", (order['order_number'],)) order['items'] = [dict(r) for r in c.fetchall()] db.close() print(f"Loaded {len(orders)} GoMag orders") # --- Load Oracle invoices --- conn = oracledb.connect(user='VENDING', password='ROMFASTSOFT', dsn='ROA') cur = conn.cursor() min_date = min(str(o['order_date'])[:10] for o in orders) max_date = max(str(o['order_date'])[:10] for o in orders) cur.execute(""" SELECT v.id_vanzare, v.numar_act, v.serie_act, TO_CHAR(v.data_act, 'YYYY-MM-DD') as data_act, v.total_fara_tva, v.total_cu_tva, v.id_part, p.denumire as partener, p.prenume FROM vanzari v LEFT JOIN nom_parteneri p ON v.id_part = p.id_part WHERE v.sters = 0 AND v.data_act >= TO_DATE(:1, 'YYYY-MM-DD') - 3 AND v.data_act <= TO_DATE(:2, 'YYYY-MM-DD') + 3 AND v.total_cu_tva > 0 ORDER BY v.data_act DESC """, [min_date, max_date]) invoices = [] inv_map = {} for r in cur: inv = { 'id_vanzare': r[0], 'numar_act': r[1], 'serie_act': r[2] or '', 'data_act': r[3], 'total_fara_tva': float(r[4] or 0), 'total_cu_tva': float(r[5] or 0), 'id_part': r[6], 'partener': ((r[7] or '') + ' ' + (r[8] or '')).strip(), 'items': [], } invoices.append(inv) inv_map[inv['id_vanzare']] = inv inv_ids = [inv['id_vanzare'] for inv in invoices] for i in range(0, len(inv_ids), 500): batch = inv_ids[i:i+500] placeholders = ",".join([f":d{j}" for j in range(len(batch))]) params = {f"d{j}": did for j, did in enumerate(batch)} cur.execute(f""" SELECT vd.id_vanzare, vd.id_articol, a.codmat, a.denumire, vd.cantitate, vd.pret, vd.pret_cu_tva, vd.proc_tvav FROM vanzari_detalii vd LEFT JOIN nom_articole a ON vd.id_articol = a.id_articol WHERE vd.id_vanzare IN ({placeholders}) AND vd.sters = 0 ORDER BY vd.id_vanzare, vd.id_articol """, params) for r in cur: inv_map[r[0]]['items'].append({ 'id_articol': r[1], 'codmat': r[2], 'denumire': r[3], 'cantitate': float(r[4] or 0), 'pret': float(r[5] or 0), 'pret_cu_tva': float(r[6] or 0), 'tva_pct': float(r[7] or 0), }) print(f"Loaded {len(invoices)} Oracle invoices") # --- Match orders → invoices (same as before) --- def normalize_name(name): if not name: return '' n = name.strip().upper() for old, new in [('S.R.L.', 'SRL'), ('S.R.L', 'SRL'), ('SC ', ''), ('PFA ', ''), ('PF ', '')]: n = n.replace(old, new) return n def name_similarity(n1, n2): nn1 = normalize_name(n1) nn2 = normalize_name(n2) if not nn1 or not nn2: return 0 sim1 = SequenceMatcher(None, nn1, nn2).ratio() words1 = nn1.split() if len(words1) >= 2: sim2 = SequenceMatcher(None, ' '.join(reversed(words1)), nn2).ratio() return max(sim1, sim2) return sim1 matches = [] used_invoices = set() orders_sorted = sorted(orders, key=lambda o: -(o['order_total'] or 0)) for order in orders_sorted: best_match = None best_score = 0 order_date = str(order['order_date'])[:10] order_total = order['order_total'] or 0 for inv in invoices: if inv['id_vanzare'] in used_invoices: continue try: date_diff = abs(int(order_date.replace('-','')) - int(inv['data_act'].replace('-',''))) except: continue if date_diff > 3: continue total_diff = abs(order_total - inv['total_cu_tva']) total_pct = total_diff / max(order_total, 0.01) * 100 if total_pct > 15 and total_diff > 15: continue sim = name_similarity(order['customer_name'] or '', inv['partener']) date_score = 1 if date_diff == 0 else (0.7 if date_diff == 1 else (0.4 if date_diff == 2 else 0.2)) total_score = 1 - min(total_pct / 100, 1) score = sim * 0.45 + total_score * 0.40 + date_score * 0.15 if score > best_score: best_score = score best_match = inv if best_match and best_score > 0.45: matches.append({'order': order, 'invoice': best_match, 'score': best_score}) used_invoices.add(best_match['id_vanzare']) print(f"Matched: {len(matches)} orders → invoices") # --- Match line items by PRICE --- # For each matched pair, match GoMag items → ROA items by line total (qty * price) # Discovery: SKU → (id_articol, codmat, denumire, qty_ratio) # Collect all discovered mappings: sku → list of observations sku_observations = defaultdict(list) for m in matches: o = m['order'] inv = m['invoice'] go_items = o['items'] # Exclude transport/discount from ROA roa_items = [ri for ri in inv['items'] if ri['cantitate'] > 0 and ri['codmat'] not in ('TRANSPORT', 'DISCOUNT')] roa_transport = [ri for ri in inv['items'] if ri['codmat'] in ('TRANSPORT', 'DISCOUNT') or ri['cantitate'] < 0] go_remaining = list(range(len(go_items))) roa_remaining = list(range(len(roa_items))) item_matches = [] # Pass 1: match by line total (qty * unit_price_fara_tva) for gi_idx in list(go_remaining): gi = go_items[gi_idx] go_line = gi['quantity'] * gi['price'] # cu TVA go_line_fara = go_line / (1 + gi['vat']/100) if gi['vat'] else go_line for ri_idx in list(roa_remaining): ri = roa_items[ri_idx] roa_line = ri['cantitate'] * ri['pret'] # fara TVA if abs(go_line_fara - roa_line) < 0.50: item_matches.append((gi_idx, [ri_idx])) go_remaining.remove(gi_idx) roa_remaining.remove(ri_idx) break # Pass 2: match by unit price (for items where qty might differ but price is same) for gi_idx in list(go_remaining): gi = go_items[gi_idx] go_price_fara = gi['price'] / (1 + gi['vat']/100) if gi['vat'] else gi['price'] for ri_idx in list(roa_remaining): ri = roa_items[ri_idx] if abs(go_price_fara - ri['pret']) < 0.02: item_matches.append((gi_idx, [ri_idx])) go_remaining.remove(gi_idx) roa_remaining.remove(ri_idx) break # Pass 3: 1:1 positional if same count remaining if len(go_remaining) == 1 and len(roa_remaining) == 1: item_matches.append((go_remaining[0], [roa_remaining[0]])) go_remaining = [] roa_remaining = [] # Pass 4: 1:N — one GoMag item matches multiple ROA items by combined total for gi_idx in list(go_remaining): gi = go_items[gi_idx] go_line_fara = (gi['quantity'] * gi['price']) / (1 + gi['vat']/100) if gi['vat'] else gi['quantity'] * gi['price'] if len(roa_remaining) >= 2: for i_pos, ri_idx1 in enumerate(roa_remaining): for ri_idx2 in roa_remaining[i_pos+1:]: ri1 = roa_items[ri_idx1] ri2 = roa_items[ri_idx2] combined = ri1['cantitate'] * ri1['pret'] + ri2['cantitate'] * ri2['pret'] if abs(go_line_fara - combined) < 1.0: item_matches.append((gi_idx, [ri_idx1, ri_idx2])) go_remaining.remove(gi_idx) roa_remaining.remove(ri_idx1) roa_remaining.remove(ri_idx2) break else: continue break # Record observations for gi_idx, ri_indices in item_matches: gi = go_items[gi_idx] ris = [roa_items[i] for i in ri_indices] if len(ris) == 1: ri = ris[0] qty_ratio = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1 sku_observations[gi['sku']].append({ 'type': 'simple' if abs(qty_ratio - round(qty_ratio)) < 0.01 and abs(qty_ratio - 1) < 0.01 else 'repack', 'id_articol': ri['id_articol'], 'codmat': ri['codmat'], 'denumire': ri['denumire'], 'go_qty': gi['quantity'], 'roa_qty': ri['cantitate'], 'qty_ratio': round(qty_ratio, 4), 'go_price': gi['price'], 'roa_pret': ri['pret'], 'product_name': gi['product_name'], 'order': o['order_number'], 'factura': f"VM{inv['numar_act']}", }) else: # Complex set go_line_fara = (gi['quantity'] * gi['price']) / (1 + gi['vat']/100) if gi['vat'] else gi['quantity'] * gi['price'] for ri in ris: ri_line = ri['cantitate'] * ri['pret'] pct = round(ri_line / go_line_fara * 100, 2) if go_line_fara else 0 qty_ratio = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1 sku_observations[gi['sku']].append({ 'type': 'set', 'id_articol': ri['id_articol'], 'codmat': ri['codmat'], 'denumire': ri['denumire'], 'go_qty': gi['quantity'], 'roa_qty': ri['cantitate'], 'qty_ratio': round(qty_ratio, 4), 'procent_pret': pct, 'go_price': gi['price'], 'roa_pret': ri['pret'], 'product_name': gi['product_name'], 'order': o['order_number'], 'factura': f"VM{inv['numar_act']}", }) conn.close() # --- Analyze observations: find consistent mappings --- print(f"\n{'='*80}") print(f"ANALYSIS: {len(sku_observations)} unique SKUs with observations") print(f"{'='*80}") # For each SKU, check if all observations agree on the same id_articol simple_update = {} # SKU → {id_articol, codmat, denumire} — for nom_articole UPDATE repack_csv = {} # (SKU, codmat) → {cantitate_roa} — for ARTICOLE_TERTI set_csv = {} # (SKU, codmat) → {cantitate_roa, procent_pret} inconsistent = {} # SKU → list of conflicting observations already_has_codmat = {} # SKU already equals codmat for sku, obs_list in sorted(sku_observations.items()): # Group by id_articol by_articol = defaultdict(list) for obs in obs_list: by_articol[obs['id_articol']].append(obs) # Check if any observation shows SKU == CODMAT already if any(obs.get('codmat') == sku for obs in obs_list): already_has_codmat[sku] = obs_list[0] continue # Filter to types types = set(obs['type'] for obs in obs_list) if 'set' in types: # Complex set — collect all components components = {} for obs in obs_list: if obs['type'] == 'set': key = obs['id_articol'] if key not in components: components[key] = obs # Check consistency across observations if len(components) >= 2: for art_id, obs in components.items(): codmat = obs['codmat'] or f"ID:{art_id}" set_csv[(sku, codmat)] = { 'id_articol': art_id, 'cantitate_roa': obs['qty_ratio'], 'procent_pret': obs['procent_pret'], 'denumire': obs['denumire'], 'product_name': obs['product_name'], } continue if len(by_articol) == 1: # All observations point to same article art_id = list(by_articol.keys())[0] obs = by_articol[art_id][0] # Check qty ratios are consistent ratios = [o['qty_ratio'] for o in by_articol[art_id]] avg_ratio = sum(ratios) / len(ratios) if all(abs(r - avg_ratio) < 0.01 for r in ratios): if abs(avg_ratio - 1.0) < 0.01: # Simple 1:1 simple_update[sku] = { 'id_articol': art_id, 'codmat_actual': obs['codmat'], 'denumire': obs['denumire'], 'product_name': obs['product_name'], 'observations': len(by_articol[art_id]), } else: # Repackaging codmat = obs['codmat'] or f"ID:{art_id}" repack_csv[(sku, codmat)] = { 'id_articol': art_id, 'cantitate_roa': round(avg_ratio, 3), 'denumire': obs['denumire'], 'product_name': obs['product_name'], 'observations': len(by_articol[art_id]), } else: inconsistent[sku] = obs_list else: # Multiple different articles for same SKU across orders if len(by_articol) == 1: pass # handled above else: inconsistent[sku] = obs_list # --- Output --- out_dir = r'C:\gomag-vending\scripts\output' os.makedirs(out_dir, exist_ok=True) print(f"\n{'='*80}") print(f"RESULTS") print(f"{'='*80}") print(f"\n--- Already mapped (SKU == CODMAT): {len(already_has_codmat)} ---") print(f"\n--- Simple 1:1 → UPDATE nom_articole SET codmat = SKU: {len(simple_update)} ---") for sku, info in sorted(simple_update.items()): print(f" {sku:25s} → id_articol={info['id_articol']:6d} codmat_actual='{info['codmat_actual'] or ''}' [{info['denumire'][:40]}] ({info['observations']} obs)") print(f"\n--- Repackaging → ARTICOLE_TERTI: {len(repack_csv)} ---") for (sku, codmat), info in sorted(repack_csv.items()): print(f" {sku:25s} → {codmat:15s} x{info['cantitate_roa']} id_art={info['id_articol']} [{info['denumire'][:35]}] ({info['observations']} obs)") print(f"\n--- Complex sets → ARTICOLE_TERTI: {len(set_csv)} ---") for (sku, codmat), info in sorted(set_csv.items()): print(f" {sku:25s} → {codmat:15s} {info['procent_pret']:6.2f}% x{info['cantitate_roa']} [{info['denumire'][:35]}]") print(f"\n--- Inconsistent (different articles across orders): {len(inconsistent)} ---") for sku, obs_list in sorted(inconsistent.items()): arts = set((o['id_articol'], o['denumire'][:30]) for o in obs_list) print(f" {sku:25s} → {len(arts)} different articles: {'; '.join(f'id={a[0]}({a[1]})' for a in arts)}") # Write SQL for simple updates with open(os.path.join(out_dir, 'update_codmat.sql'), 'w', encoding='utf-8') as f: f.write("-- UPDATE nom_articole: set codmat = GoMag SKU for 1:1 mappings\n") f.write("-- Generated from invoice-order matching\n") f.write("-- VERIFY BEFORE RUNNING!\n\n") for sku, info in sorted(simple_update.items()): f.write(f"-- {info['product_name'][:60]} → {info['denumire'][:60]}\n") f.write(f"-- Current codmat: '{info['codmat_actual'] or ''}' | {info['observations']} order matches\n") f.write(f"UPDATE nom_articole SET codmat = '{sku}' WHERE id_articol = {info['id_articol']} AND sters = 0;\n\n") # Write CSV for repackaging (ARTICOLE_TERTI format) with open(os.path.join(out_dir, 'repack_mappings.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'id_articol', 'product_name_gomag', 'denumire_roa', 'observations']) for (sku, codmat), info in sorted(repack_csv.items()): w.writerow([sku, codmat, info['cantitate_roa'], 100, info['id_articol'], info['product_name'], info['denumire'], info['observations']]) # Write CSV for sets with open(os.path.join(out_dir, 'set_mappings.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'id_articol', 'product_name_gomag', 'denumire_roa']) for (sku, codmat), info in sorted(set_csv.items()): w.writerow([sku, codmat, info['cantitate_roa'], info['procent_pret'], info['id_articol'], info['product_name'], info['denumire']]) # Write inconsistent for manual review with open(os.path.join(out_dir, 'inconsistent_skus.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['sku', 'product_name', 'id_articol', 'codmat', 'denumire_roa', 'qty_ratio', 'type', 'order', 'factura']) for sku, obs_list in sorted(inconsistent.items()): for obs in obs_list: w.writerow([sku, obs['product_name'], obs['id_articol'], obs['codmat'] or '', obs['denumire'], obs['qty_ratio'], obs['type'], obs['order'], obs['factura']]) print(f"\nOutput written to {out_dir}:") print(f" update_codmat.sql - {len(simple_update)} SQL updates for nom_articole") print(f" repack_mappings.csv - {len(repack_csv)} repackaging mappings") print(f" set_mappings.csv - {len(set_csv)} complex set mappings") print(f" inconsistent_skus.csv - {len(inconsistent)} SKUs needing manual review")