""" Match ALL GoMag orders (SQLite) with manual invoices (Oracle vanzari) by date + client name + total value. Then compare line items to discover SKU → CODMAT mappings. """ import oracledb import os import sys import sqlite3 import csv from difflib import SequenceMatcher sys.stdout.reconfigure(encoding='utf-8', errors='replace') os.environ['PATH'] = r'C:\app\Server\product\18.0.0\dbhomeXE\bin' + ';' + os.environ.get('PATH','') oracledb.init_oracle_client() # --- Step 1: Get ALL GoMag orders from SQLite --- print("=" * 80) print("STEP 1: Loading ALL GoMag orders from SQLite") print("=" * 80) db = sqlite3.connect(r'C:\gomag-vending\api\data\import.db') db.row_factory = sqlite3.Row c = db.cursor() # ALL orders, not just IMPORTED c.execute(""" SELECT order_number, order_date, customer_name, status, id_comanda, order_total, billing_name, shipping_name FROM orders ORDER BY order_date DESC """) orders = [dict(r) for r in c.fetchall()] # Get order items for order in orders: c.execute(""" SELECT sku, product_name, quantity, price, vat, mapping_status FROM order_items WHERE order_number = ? ORDER BY sku """, (order['order_number'],)) order['items'] = [dict(r) for r in c.fetchall()] db.close() by_status = {} for o in orders: by_status.setdefault(o['status'], 0) by_status[o['status']] += 1 print(f"Loaded {len(orders)} GoMag orders: {by_status}") # --- Step 2: Get Oracle invoices with date range matching orders --- print() print("=" * 80) print("STEP 2: Loading Oracle invoices (vanzari + detalii)") print("=" * 80) conn = oracledb.connect(user='VENDING', password='ROMFASTSOFT', dsn='ROA') cur = conn.cursor() # Get date range from orders min_date = min(str(o['order_date'])[:10] for o in orders) max_date = max(str(o['order_date'])[:10] for o in orders) print(f"Order date range: {min_date} to {max_date}") # Get vanzari in that range (with some margin) cur.execute(""" SELECT v.id_vanzare, v.numar_act, v.serie_act, TO_CHAR(v.data_act, 'YYYY-MM-DD') as data_act, v.total_fara_tva, v.total_cu_tva, v.id_part, p.denumire as partener, p.prenume FROM vanzari v LEFT JOIN nom_parteneri p ON v.id_part = p.id_part WHERE v.sters = 0 AND v.data_act >= TO_DATE(:1, 'YYYY-MM-DD') - 2 AND v.data_act <= TO_DATE(:2, 'YYYY-MM-DD') + 2 AND v.total_cu_tva > 0 ORDER BY v.data_act DESC """, [min_date, max_date]) invoices = [] for r in cur: inv = { 'id_vanzare': r[0], 'numar_act': r[1], 'serie_act': r[2] or '', 'data_act': r[3], 'total_fara_tva': float(r[4] or 0), 'total_cu_tva': float(r[5] or 0), 'id_part': r[6], 'partener': ((r[7] or '') + ' ' + (r[8] or '')).strip(), } invoices.append(inv) print(f"Loaded {len(invoices)} Oracle invoices in range {min_date} - {max_date}") # Get detail lines for ALL invoices in one batch inv_ids = [inv['id_vanzare'] for inv in invoices] inv_map = {inv['id_vanzare']: inv for inv in invoices} for inv in invoices: inv['items'] = [] # Batch fetch details for i in range(0, len(inv_ids), 500): batch = inv_ids[i:i+500] placeholders = ",".join([f":d{j}" for j in range(len(batch))]) params = {f"d{j}": did for j, did in enumerate(batch)} cur.execute(f""" SELECT vd.id_vanzare, vd.id_articol, a.codmat, a.denumire, vd.cantitate, vd.pret, vd.pret_cu_tva, vd.proc_tvav FROM vanzari_detalii vd LEFT JOIN nom_articole a ON vd.id_articol = a.id_articol WHERE vd.id_vanzare IN ({placeholders}) AND vd.sters = 0 ORDER BY vd.id_vanzare, vd.id_articol """, params) for r in cur: inv_map[r[0]]['items'].append({ 'id_articol': r[1], 'codmat': r[2], 'denumire': r[3], 'cantitate': float(r[4] or 0), 'pret': float(r[5] or 0), 'pret_cu_tva': float(r[6] or 0), 'tva_pct': float(r[7] or 0), }) conn.close() # --- Step 3: Fuzzy matching --- print() print("=" * 80) print("STEP 3: Matching orders → invoices (date + name + total)") print("=" * 80) def normalize_name(name): if not name: return '' n = name.strip().upper() for old, new in [('S.R.L.', 'SRL'), ('S.R.L', 'SRL'), ('SC ', ''), ('PFA ', ''), ('PF ', '')]: n = n.replace(old, new) return n def name_similarity(n1, n2): nn1 = normalize_name(n1) nn2 = normalize_name(n2) if not nn1 or not nn2: return 0 # Also try reversed word order (GoMag: "Popescu Ion", ROA: "ION POPESCU") sim1 = SequenceMatcher(None, nn1, nn2).ratio() words1 = nn1.split() if len(words1) >= 2: reversed1 = ' '.join(reversed(words1)) sim2 = SequenceMatcher(None, reversed1, nn2).ratio() return max(sim1, sim2) return sim1 matches = [] unmatched_orders = [] used_invoices = set() # Sort orders by total descending (match big orders first - more unique) orders_sorted = sorted(orders, key=lambda o: -(o['order_total'] or 0)) for order in orders_sorted: best_match = None best_score = 0 order_date = str(order['order_date'])[:10] order_total = order['order_total'] or 0 order_name = order['customer_name'] or '' for inv in invoices: if inv['id_vanzare'] in used_invoices: continue # Date match (must be within +/- 2 days) try: od = int(order_date.replace('-','')) id_ = int(inv['data_act'].replace('-','')) date_diff = abs(od - id_) except: continue if date_diff > 2: continue # Total match (within 10% or 10 lei — more lenient for transport/discount) total_diff = abs(order_total - inv['total_cu_tva']) total_pct = total_diff / max(order_total, 0.01) * 100 if total_pct > 15 and total_diff > 15: continue # Name similarity sim = name_similarity(order_name, inv['partener']) # Also check billing_name/shipping_name sim2 = name_similarity(order.get('billing_name') or '', inv['partener']) sim3 = name_similarity(order.get('shipping_name') or '', inv['partener']) sim = max(sim, sim2, sim3) # Score date_score = 1 if date_diff == 0 else (0.7 if date_diff == 1 else 0.3) total_score = 1 - min(total_pct / 100, 1) score = sim * 0.45 + total_score * 0.40 + date_score * 0.15 if score > best_score: best_score = score best_match = inv if best_match and best_score > 0.45: matches.append({ 'order': order, 'invoice': best_match, 'score': best_score, }) used_invoices.add(best_match['id_vanzare']) else: unmatched_orders.append(order) print(f"Matched: {len(matches)} | Unmatched orders: {len(unmatched_orders)}") matched_statuses = {} for m in matches: s = m['order']['status'] matched_statuses.setdefault(s, 0) matched_statuses[s] += 1 print(f"Matched by status: {matched_statuses}") # --- Step 4: Compare line items --- print() print("=" * 80) print("STEP 4: Line item comparison") print("=" * 80) simple_mappings = [] repack_mappings = [] complex_mappings = [] unresolved = [] match_details = [] for m in matches: o = m['order'] inv = m['invoice'] go_items = o['items'] # Filter out TRANSPORT and DISCOUNT from ROA items roa_items = [ri for ri in inv['items'] if ri['codmat'] not in ('TRANSPORT', 'DISCOUNT', None, '') and ri['cantitate'] > 0] roa_transport = [ri for ri in inv['items'] if ri['codmat'] in ('TRANSPORT', 'DISCOUNT') or ri['cantitate'] < 0] detail = { 'order_number': o['order_number'], 'customer': o['customer_name'], 'order_total': o['order_total'], 'factura': f"{inv['serie_act']}{inv['numar_act']}", 'inv_total': inv['total_cu_tva'], 'score': m['score'], 'go_items': len(go_items), 'roa_items': len(roa_items), 'matched_items': [], 'unresolved_items': [], } go_remaining = list(range(len(go_items))) roa_remaining = list(range(len(roa_items))) item_matches = [] # Pass 1: exact match by codmat (SKU == CODMAT) for gi_idx in list(go_remaining): gi = go_items[gi_idx] for ri_idx in list(roa_remaining): ri = roa_items[ri_idx] if ri['codmat'] and gi['sku'] == ri['codmat']: item_matches.append((gi_idx, [ri_idx])) go_remaining.remove(gi_idx) roa_remaining.remove(ri_idx) break # Pass 2: match by total value (qty * price) for gi_idx in list(go_remaining): gi = go_items[gi_idx] go_total_cu = gi['quantity'] * gi['price'] go_total_fara = go_total_cu / (1 + gi['vat']/100) if gi['vat'] else go_total_cu for ri_idx in list(roa_remaining): ri = roa_items[ri_idx] roa_total_fara = ri['cantitate'] * ri['pret'] roa_total_cu = ri['cantitate'] * ri['pret_cu_tva'] if (abs(go_total_fara - roa_total_fara) < 1.0 or abs(go_total_cu - roa_total_cu) < 1.0 or abs(go_total_cu - roa_total_fara) < 1.0): item_matches.append((gi_idx, [ri_idx])) go_remaining.remove(gi_idx) roa_remaining.remove(ri_idx) break # Pass 3: 1:1 positional match (if same count remaining) if len(go_remaining) == len(roa_remaining) == 1: item_matches.append((go_remaining[0], [roa_remaining[0]])) go_remaining = [] roa_remaining = [] # Pass 4: 1:N by combined total for gi_idx in list(go_remaining): gi = go_items[gi_idx] go_total_cu = gi['quantity'] * gi['price'] go_total_fara = go_total_cu / (1 + gi['vat']/100) if gi['vat'] else go_total_cu if len(roa_remaining) >= 2: # Try all pairs found = False for i_pos, ri_idx1 in enumerate(roa_remaining): for ri_idx2 in roa_remaining[i_pos+1:]: ri1 = roa_items[ri_idx1] ri2 = roa_items[ri_idx2] combined_fara = ri1['cantitate'] * ri1['pret'] + ri2['cantitate'] * ri2['pret'] combined_cu = ri1['cantitate'] * ri1['pret_cu_tva'] + ri2['cantitate'] * ri2['pret_cu_tva'] if (abs(go_total_fara - combined_fara) < 2.0 or abs(go_total_cu - combined_cu) < 2.0): item_matches.append((gi_idx, [ri_idx1, ri_idx2])) go_remaining.remove(gi_idx) roa_remaining.remove(ri_idx1) roa_remaining.remove(ri_idx2) found = True break if found: break # Classify matches for gi_idx, ri_indices in item_matches: gi = go_items[gi_idx] ris = [roa_items[i] for i in ri_indices] if len(ris) == 1: ri = ris[0] if gi['sku'] == ri['codmat']: # Already mapped (SKU == CODMAT) detail['matched_items'].append(f"ALREADY: {gi['sku']} == {ri['codmat']}") simple_mappings.append({ 'sku': gi['sku'], 'codmat': ri['codmat'], 'id_articol': ri['id_articol'], 'type': 'already_equal', 'product_name': gi['product_name'], 'denumire': ri['denumire'], 'go_qty': gi['quantity'], 'roa_qty': ri['cantitate'], 'go_price': gi['price'], 'roa_pret': ri['pret'], }) elif abs(gi['quantity'] - ri['cantitate']) < 0.01: # Simple 1:1 different codmat detail['matched_items'].append(f"SIMPLE: {gi['sku']} → {ri['codmat']}") simple_mappings.append({ 'sku': gi['sku'], 'codmat': ri['codmat'], 'id_articol': ri['id_articol'], 'type': 'simple', 'product_name': gi['product_name'], 'denumire': ri['denumire'], 'go_qty': gi['quantity'], 'roa_qty': ri['cantitate'], 'go_price': gi['price'], 'roa_pret': ri['pret'], }) else: # Repackaging cantitate_roa = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1 detail['matched_items'].append(f"REPACK: {gi['sku']} → {ri['codmat']} x{cantitate_roa:.3f}") repack_mappings.append({ 'sku': gi['sku'], 'codmat': ri['codmat'], 'id_articol': ri['id_articol'], 'cantitate_roa': round(cantitate_roa, 3), 'product_name': gi['product_name'], 'denumire': ri['denumire'], 'go_qty': gi['quantity'], 'roa_qty': ri['cantitate'], }) else: # Complex set go_total_cu = gi['quantity'] * gi['price'] go_total_fara = go_total_cu / (1 + gi['vat']/100) if gi['vat'] else go_total_cu for ri in ris: ri_total = ri['cantitate'] * ri['pret'] pct = round(ri_total / go_total_fara * 100, 2) if go_total_fara else 0 cantitate_roa = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1 detail['matched_items'].append(f"SET: {gi['sku']} → {ri['codmat']} {pct}%") complex_mappings.append({ 'sku': gi['sku'], 'codmat': ri['codmat'], 'id_articol': ri['id_articol'], 'cantitate_roa': round(cantitate_roa, 3), 'procent_pret': pct, 'product_name': gi['product_name'], 'denumire': ri['denumire'], }) for gi_idx in go_remaining: gi = go_items[gi_idx] remaining_roa = [roa_items[i] for i in roa_remaining] detail['unresolved_items'].append(gi['sku']) unresolved.append({ 'sku': gi['sku'], 'product_name': gi['product_name'], 'quantity': gi['quantity'], 'price': gi['price'], 'order': o['order_number'], 'factura': f"{inv['serie_act']}{inv['numar_act']}", 'roa_remaining': '; '.join([f"{r['codmat'] or '?'}({r['cantitate']}x{r['pret']:.2f}={r['denumire'][:30]})" for r in remaining_roa]), }) match_details.append(detail) # --- Step 5: Deduplicate and summarize --- print() print("=" * 80) print("STEP 5: SUMMARY") print("=" * 80) # Deduplicate simple seen_simple_equal = {} seen_simple_new = {} for m in simple_mappings: key = (m['sku'], m['codmat']) if m['type'] == 'already_equal': seen_simple_equal[key] = m else: seen_simple_new[key] = m seen_repack = {} for m in repack_mappings: key = (m['sku'], m['codmat']) if key not in seen_repack: seen_repack[key] = m seen_complex = {} for m in complex_mappings: key = (m['sku'], m['codmat']) if key not in seen_complex: seen_complex[key] = m # Deduplicate unresolved SKUs seen_unresolved_skus = {} for u in unresolved: if u['sku'] not in seen_unresolved_skus: seen_unresolved_skus[u['sku']] = u print(f"\n--- Already mapped (SKU == CODMAT in nom_articole): {len(seen_simple_equal)} unique ---") for key, m in sorted(seen_simple_equal.items()): print(f" {m['sku']:25s} = {m['codmat']:15s} | {(m['product_name'] or '')[:40]}") print(f"\n--- NEW simple 1:1 (SKU != CODMAT, same qty): {len(seen_simple_new)} unique ---") for key, m in sorted(seen_simple_new.items()): print(f" {m['sku']:25s} → {m['codmat']:15s} | GoMag: {(m['product_name'] or '')[:30]} → ROA: {(m['denumire'] or '')[:30]}") print(f"\n--- Repackaging (different qty): {len(seen_repack)} unique ---") for key, m in sorted(seen_repack.items()): print(f" {m['sku']:25s} → {m['codmat']:15s} x{m['cantitate_roa']} | {(m['product_name'] or '')[:30]} → {(m['denumire'] or '')[:30]}") print(f"\n--- Complex sets (1 SKU → N CODMATs): {len(seen_complex)} unique ---") for key, m in sorted(seen_complex.items()): print(f" {m['sku']:25s} → {m['codmat']:15s} {m['procent_pret']:6.2f}% | {(m['product_name'] or '')[:30]} → {(m['denumire'] or '')[:30]}") print(f"\n--- Unresolved (unique SKUs): {len(seen_unresolved_skus)} ---") for sku, u in sorted(seen_unresolved_skus.items()): print(f" {sku:25s} | {(u['product_name'] or '')[:40]} | example: order={u['order']}") print(f"\n--- Unmatched orders (no invoice found): {len(unmatched_orders)} ---") for o in unmatched_orders[:20]: print(f" {o['order_number']:>12s} | {str(o['order_date'])[:10]} | {(o['customer_name'] or '')[:30]:30s} | {o['order_total'] or 0:10.2f} | {o['status']}") if len(unmatched_orders) > 20: print(f" ... and {len(unmatched_orders) - 20} more") # --- Write output files --- out_dir = r'C:\gomag-vending\scripts\output' os.makedirs(out_dir, exist_ok=True) # Full match report with open(os.path.join(out_dir, 'match_report.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['order_number', 'customer', 'order_total', 'factura', 'inv_total', 'score', 'go_items', 'roa_items', 'matched', 'unresolved']) for d in match_details: w.writerow([d['order_number'], d['customer'], d['order_total'], d['factura'], d['inv_total'], f"{d['score']:.2f}", d['go_items'], d['roa_items'], '; '.join(d['matched_items']), '; '.join(d['unresolved_items'])]) # New simple mappings (SKU → CODMAT where SKU != CODMAT) with open(os.path.join(out_dir, 'simple_new_mappings.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['sku', 'codmat', 'id_articol', 'product_name_gomag', 'denumire_roa', 'go_qty', 'roa_qty', 'go_price', 'roa_pret']) for m in seen_simple_new.values(): w.writerow([m['sku'], m['codmat'], m['id_articol'], m['product_name'], m['denumire'], m['go_qty'], m['roa_qty'], m['go_price'], m['roa_pret']]) # Repackaging CSV with open(os.path.join(out_dir, 'repack_mappings.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'product_name_gomag', 'denumire_roa']) for m in seen_repack.values(): w.writerow([m['sku'], m['codmat'], m['cantitate_roa'], 100, m['product_name'], m['denumire']]) # Complex sets CSV with open(os.path.join(out_dir, 'complex_mappings.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'product_name_gomag', 'denumire_roa']) for m in seen_complex.values(): w.writerow([m['sku'], m['codmat'], round(m['cantitate_roa'], 3), m['procent_pret'], m['product_name'], m['denumire']]) # Unresolved with open(os.path.join(out_dir, 'unresolved.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['sku', 'product_name', 'quantity', 'price', 'order', 'factura', 'roa_remaining_items']) for u in unresolved: w.writerow([u['sku'], u['product_name'], u['quantity'], u['price'], u['order'], u['factura'], u['roa_remaining']]) # Already equal (for reference) with open(os.path.join(out_dir, 'already_mapped.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['sku', 'codmat', 'id_articol', 'product_name_gomag', 'denumire_roa']) for m in seen_simple_equal.values(): w.writerow([m['sku'], m['codmat'], m['id_articol'], m['product_name'], m['denumire']]) # Unmatched orders with open(os.path.join(out_dir, 'unmatched_orders.csv'), 'w', newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerow(['order_number', 'order_date', 'customer_name', 'status', 'order_total', 'items_count']) for o in unmatched_orders: w.writerow([o['order_number'], str(o['order_date'])[:10], o['customer_name'], o['status'], o['order_total'], len(o['items'])]) print(f"\nOutput written to {out_dir}:") print(f" match_report.csv - {len(match_details)} matched order-invoice pairs") print(f" already_mapped.csv - {len(seen_simple_equal)} SKU==CODMAT (already OK)") print(f" simple_new_mappings.csv - {len(seen_simple_new)} new SKU→CODMAT (need codmat in nom_articole or ARTICOLE_TERTI)") print(f" repack_mappings.csv - {len(seen_repack)} repackaging") print(f" complex_mappings.csv - {len(seen_complex)} complex sets") print(f" unresolved.csv - {len(unresolved)} unresolved item lines") print(f" unmatched_orders.csv - {len(unmatched_orders)} orders without invoice match")