Files
gomag-vending/scripts/match_all.py
Claude Agent 3d73d9e422 add: scripts for invoice-order matching and SKU discovery
Analysis scripts to match GoMag orders with Oracle invoices by
date/client/total, then compare line items by price to discover
SKU → id_articol mappings. Generates SQL for nom_articole codmat
updates and CSV for ARTICOLE_TERTI repackaging/set mappings.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 12:01:51 +00:00

534 lines
21 KiB
Python

"""
Match ALL GoMag orders (SQLite) with manual invoices (Oracle vanzari)
by date + client name + total value.
Then compare line items to discover SKU → CODMAT mappings.
"""
import oracledb
import os
import sys
import sqlite3
import csv
from difflib import SequenceMatcher
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
os.environ['PATH'] = r'C:\app\Server\product\18.0.0\dbhomeXE\bin' + ';' + os.environ.get('PATH','')
oracledb.init_oracle_client()
# --- Step 1: Get ALL GoMag orders from SQLite ---
print("=" * 80)
print("STEP 1: Loading ALL GoMag orders from SQLite")
print("=" * 80)
db = sqlite3.connect(r'C:\gomag-vending\api\data\import.db')
db.row_factory = sqlite3.Row
c = db.cursor()
# ALL orders, not just IMPORTED
c.execute("""
SELECT order_number, order_date, customer_name, status,
id_comanda, order_total, billing_name, shipping_name
FROM orders
ORDER BY order_date DESC
""")
orders = [dict(r) for r in c.fetchall()]
# Get order items
for order in orders:
c.execute("""
SELECT sku, product_name, quantity, price, vat, mapping_status
FROM order_items
WHERE order_number = ?
ORDER BY sku
""", (order['order_number'],))
order['items'] = [dict(r) for r in c.fetchall()]
db.close()
by_status = {}
for o in orders:
by_status.setdefault(o['status'], 0)
by_status[o['status']] += 1
print(f"Loaded {len(orders)} GoMag orders: {by_status}")
# --- Step 2: Get Oracle invoices with date range matching orders ---
print()
print("=" * 80)
print("STEP 2: Loading Oracle invoices (vanzari + detalii)")
print("=" * 80)
conn = oracledb.connect(user='VENDING', password='ROMFASTSOFT', dsn='ROA')
cur = conn.cursor()
# Get date range from orders
min_date = min(str(o['order_date'])[:10] for o in orders)
max_date = max(str(o['order_date'])[:10] for o in orders)
print(f"Order date range: {min_date} to {max_date}")
# Get vanzari in that range (with some margin)
cur.execute("""
SELECT v.id_vanzare, v.numar_act, v.serie_act,
TO_CHAR(v.data_act, 'YYYY-MM-DD') as data_act,
v.total_fara_tva, v.total_cu_tva, v.id_part,
p.denumire as partener, p.prenume
FROM vanzari v
LEFT JOIN nom_parteneri p ON v.id_part = p.id_part
WHERE v.sters = 0
AND v.data_act >= TO_DATE(:1, 'YYYY-MM-DD') - 2
AND v.data_act <= TO_DATE(:2, 'YYYY-MM-DD') + 2
AND v.total_cu_tva > 0
ORDER BY v.data_act DESC
""", [min_date, max_date])
invoices = []
for r in cur:
inv = {
'id_vanzare': r[0],
'numar_act': r[1],
'serie_act': r[2] or '',
'data_act': r[3],
'total_fara_tva': float(r[4] or 0),
'total_cu_tva': float(r[5] or 0),
'id_part': r[6],
'partener': ((r[7] or '') + ' ' + (r[8] or '')).strip(),
}
invoices.append(inv)
print(f"Loaded {len(invoices)} Oracle invoices in range {min_date} - {max_date}")
# Get detail lines for ALL invoices in one batch
inv_ids = [inv['id_vanzare'] for inv in invoices]
inv_map = {inv['id_vanzare']: inv for inv in invoices}
for inv in invoices:
inv['items'] = []
# Batch fetch details
for i in range(0, len(inv_ids), 500):
batch = inv_ids[i:i+500]
placeholders = ",".join([f":d{j}" for j in range(len(batch))])
params = {f"d{j}": did for j, did in enumerate(batch)}
cur.execute(f"""
SELECT vd.id_vanzare, vd.id_articol, a.codmat, a.denumire,
vd.cantitate, vd.pret, vd.pret_cu_tva, vd.proc_tvav
FROM vanzari_detalii vd
LEFT JOIN nom_articole a ON vd.id_articol = a.id_articol
WHERE vd.id_vanzare IN ({placeholders}) AND vd.sters = 0
ORDER BY vd.id_vanzare, vd.id_articol
""", params)
for r in cur:
inv_map[r[0]]['items'].append({
'id_articol': r[1],
'codmat': r[2],
'denumire': r[3],
'cantitate': float(r[4] or 0),
'pret': float(r[5] or 0),
'pret_cu_tva': float(r[6] or 0),
'tva_pct': float(r[7] or 0),
})
conn.close()
# --- Step 3: Fuzzy matching ---
print()
print("=" * 80)
print("STEP 3: Matching orders → invoices (date + name + total)")
print("=" * 80)
def normalize_name(name):
if not name:
return ''
n = name.strip().upper()
for old, new in [('S.R.L.', 'SRL'), ('S.R.L', 'SRL'), ('SC ', ''), ('PFA ', ''), ('PF ', '')]:
n = n.replace(old, new)
return n
def name_similarity(n1, n2):
nn1 = normalize_name(n1)
nn2 = normalize_name(n2)
if not nn1 or not nn2:
return 0
# Also try reversed word order (GoMag: "Popescu Ion", ROA: "ION POPESCU")
sim1 = SequenceMatcher(None, nn1, nn2).ratio()
words1 = nn1.split()
if len(words1) >= 2:
reversed1 = ' '.join(reversed(words1))
sim2 = SequenceMatcher(None, reversed1, nn2).ratio()
return max(sim1, sim2)
return sim1
matches = []
unmatched_orders = []
used_invoices = set()
# Sort orders by total descending (match big orders first - more unique)
orders_sorted = sorted(orders, key=lambda o: -(o['order_total'] or 0))
for order in orders_sorted:
best_match = None
best_score = 0
order_date = str(order['order_date'])[:10]
order_total = order['order_total'] or 0
order_name = order['customer_name'] or ''
for inv in invoices:
if inv['id_vanzare'] in used_invoices:
continue
# Date match (must be within +/- 2 days)
try:
od = int(order_date.replace('-',''))
id_ = int(inv['data_act'].replace('-',''))
date_diff = abs(od - id_)
except:
continue
if date_diff > 2:
continue
# Total match (within 10% or 10 lei — more lenient for transport/discount)
total_diff = abs(order_total - inv['total_cu_tva'])
total_pct = total_diff / max(order_total, 0.01) * 100
if total_pct > 15 and total_diff > 15:
continue
# Name similarity
sim = name_similarity(order_name, inv['partener'])
# Also check billing_name/shipping_name
sim2 = name_similarity(order.get('billing_name') or '', inv['partener'])
sim3 = name_similarity(order.get('shipping_name') or '', inv['partener'])
sim = max(sim, sim2, sim3)
# Score
date_score = 1 if date_diff == 0 else (0.7 if date_diff == 1 else 0.3)
total_score = 1 - min(total_pct / 100, 1)
score = sim * 0.45 + total_score * 0.40 + date_score * 0.15
if score > best_score:
best_score = score
best_match = inv
if best_match and best_score > 0.45:
matches.append({
'order': order,
'invoice': best_match,
'score': best_score,
})
used_invoices.add(best_match['id_vanzare'])
else:
unmatched_orders.append(order)
print(f"Matched: {len(matches)} | Unmatched orders: {len(unmatched_orders)}")
matched_statuses = {}
for m in matches:
s = m['order']['status']
matched_statuses.setdefault(s, 0)
matched_statuses[s] += 1
print(f"Matched by status: {matched_statuses}")
# --- Step 4: Compare line items ---
print()
print("=" * 80)
print("STEP 4: Line item comparison")
print("=" * 80)
simple_mappings = []
repack_mappings = []
complex_mappings = []
unresolved = []
match_details = []
for m in matches:
o = m['order']
inv = m['invoice']
go_items = o['items']
# Filter out TRANSPORT and DISCOUNT from ROA items
roa_items = [ri for ri in inv['items']
if ri['codmat'] not in ('TRANSPORT', 'DISCOUNT', None, '')
and ri['cantitate'] > 0]
roa_transport = [ri for ri in inv['items']
if ri['codmat'] in ('TRANSPORT', 'DISCOUNT') or ri['cantitate'] < 0]
detail = {
'order_number': o['order_number'],
'customer': o['customer_name'],
'order_total': o['order_total'],
'factura': f"{inv['serie_act']}{inv['numar_act']}",
'inv_total': inv['total_cu_tva'],
'score': m['score'],
'go_items': len(go_items),
'roa_items': len(roa_items),
'matched_items': [],
'unresolved_items': [],
}
go_remaining = list(range(len(go_items)))
roa_remaining = list(range(len(roa_items)))
item_matches = []
# Pass 1: exact match by codmat (SKU == CODMAT)
for gi_idx in list(go_remaining):
gi = go_items[gi_idx]
for ri_idx in list(roa_remaining):
ri = roa_items[ri_idx]
if ri['codmat'] and gi['sku'] == ri['codmat']:
item_matches.append((gi_idx, [ri_idx]))
go_remaining.remove(gi_idx)
roa_remaining.remove(ri_idx)
break
# Pass 2: match by total value (qty * price)
for gi_idx in list(go_remaining):
gi = go_items[gi_idx]
go_total_cu = gi['quantity'] * gi['price']
go_total_fara = go_total_cu / (1 + gi['vat']/100) if gi['vat'] else go_total_cu
for ri_idx in list(roa_remaining):
ri = roa_items[ri_idx]
roa_total_fara = ri['cantitate'] * ri['pret']
roa_total_cu = ri['cantitate'] * ri['pret_cu_tva']
if (abs(go_total_fara - roa_total_fara) < 1.0 or
abs(go_total_cu - roa_total_cu) < 1.0 or
abs(go_total_cu - roa_total_fara) < 1.0):
item_matches.append((gi_idx, [ri_idx]))
go_remaining.remove(gi_idx)
roa_remaining.remove(ri_idx)
break
# Pass 3: 1:1 positional match (if same count remaining)
if len(go_remaining) == len(roa_remaining) == 1:
item_matches.append((go_remaining[0], [roa_remaining[0]]))
go_remaining = []
roa_remaining = []
# Pass 4: 1:N by combined total
for gi_idx in list(go_remaining):
gi = go_items[gi_idx]
go_total_cu = gi['quantity'] * gi['price']
go_total_fara = go_total_cu / (1 + gi['vat']/100) if gi['vat'] else go_total_cu
if len(roa_remaining) >= 2:
# Try all pairs
found = False
for i_pos, ri_idx1 in enumerate(roa_remaining):
for ri_idx2 in roa_remaining[i_pos+1:]:
ri1 = roa_items[ri_idx1]
ri2 = roa_items[ri_idx2]
combined_fara = ri1['cantitate'] * ri1['pret'] + ri2['cantitate'] * ri2['pret']
combined_cu = ri1['cantitate'] * ri1['pret_cu_tva'] + ri2['cantitate'] * ri2['pret_cu_tva']
if (abs(go_total_fara - combined_fara) < 2.0 or
abs(go_total_cu - combined_cu) < 2.0):
item_matches.append((gi_idx, [ri_idx1, ri_idx2]))
go_remaining.remove(gi_idx)
roa_remaining.remove(ri_idx1)
roa_remaining.remove(ri_idx2)
found = True
break
if found:
break
# Classify matches
for gi_idx, ri_indices in item_matches:
gi = go_items[gi_idx]
ris = [roa_items[i] for i in ri_indices]
if len(ris) == 1:
ri = ris[0]
if gi['sku'] == ri['codmat']:
# Already mapped (SKU == CODMAT)
detail['matched_items'].append(f"ALREADY: {gi['sku']} == {ri['codmat']}")
simple_mappings.append({
'sku': gi['sku'], 'codmat': ri['codmat'],
'id_articol': ri['id_articol'],
'type': 'already_equal',
'product_name': gi['product_name'], 'denumire': ri['denumire'],
'go_qty': gi['quantity'], 'roa_qty': ri['cantitate'],
'go_price': gi['price'], 'roa_pret': ri['pret'],
})
elif abs(gi['quantity'] - ri['cantitate']) < 0.01:
# Simple 1:1 different codmat
detail['matched_items'].append(f"SIMPLE: {gi['sku']}{ri['codmat']}")
simple_mappings.append({
'sku': gi['sku'], 'codmat': ri['codmat'],
'id_articol': ri['id_articol'],
'type': 'simple',
'product_name': gi['product_name'], 'denumire': ri['denumire'],
'go_qty': gi['quantity'], 'roa_qty': ri['cantitate'],
'go_price': gi['price'], 'roa_pret': ri['pret'],
})
else:
# Repackaging
cantitate_roa = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1
detail['matched_items'].append(f"REPACK: {gi['sku']}{ri['codmat']} x{cantitate_roa:.3f}")
repack_mappings.append({
'sku': gi['sku'], 'codmat': ri['codmat'],
'id_articol': ri['id_articol'],
'cantitate_roa': round(cantitate_roa, 3),
'product_name': gi['product_name'], 'denumire': ri['denumire'],
'go_qty': gi['quantity'], 'roa_qty': ri['cantitate'],
})
else:
# Complex set
go_total_cu = gi['quantity'] * gi['price']
go_total_fara = go_total_cu / (1 + gi['vat']/100) if gi['vat'] else go_total_cu
for ri in ris:
ri_total = ri['cantitate'] * ri['pret']
pct = round(ri_total / go_total_fara * 100, 2) if go_total_fara else 0
cantitate_roa = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1
detail['matched_items'].append(f"SET: {gi['sku']}{ri['codmat']} {pct}%")
complex_mappings.append({
'sku': gi['sku'], 'codmat': ri['codmat'],
'id_articol': ri['id_articol'],
'cantitate_roa': round(cantitate_roa, 3),
'procent_pret': pct,
'product_name': gi['product_name'], 'denumire': ri['denumire'],
})
for gi_idx in go_remaining:
gi = go_items[gi_idx]
remaining_roa = [roa_items[i] for i in roa_remaining]
detail['unresolved_items'].append(gi['sku'])
unresolved.append({
'sku': gi['sku'],
'product_name': gi['product_name'],
'quantity': gi['quantity'],
'price': gi['price'],
'order': o['order_number'],
'factura': f"{inv['serie_act']}{inv['numar_act']}",
'roa_remaining': '; '.join([f"{r['codmat'] or '?'}({r['cantitate']}x{r['pret']:.2f}={r['denumire'][:30]})"
for r in remaining_roa]),
})
match_details.append(detail)
# --- Step 5: Deduplicate and summarize ---
print()
print("=" * 80)
print("STEP 5: SUMMARY")
print("=" * 80)
# Deduplicate simple
seen_simple_equal = {}
seen_simple_new = {}
for m in simple_mappings:
key = (m['sku'], m['codmat'])
if m['type'] == 'already_equal':
seen_simple_equal[key] = m
else:
seen_simple_new[key] = m
seen_repack = {}
for m in repack_mappings:
key = (m['sku'], m['codmat'])
if key not in seen_repack:
seen_repack[key] = m
seen_complex = {}
for m in complex_mappings:
key = (m['sku'], m['codmat'])
if key not in seen_complex:
seen_complex[key] = m
# Deduplicate unresolved SKUs
seen_unresolved_skus = {}
for u in unresolved:
if u['sku'] not in seen_unresolved_skus:
seen_unresolved_skus[u['sku']] = u
print(f"\n--- Already mapped (SKU == CODMAT in nom_articole): {len(seen_simple_equal)} unique ---")
for key, m in sorted(seen_simple_equal.items()):
print(f" {m['sku']:25s} = {m['codmat']:15s} | {(m['product_name'] or '')[:40]}")
print(f"\n--- NEW simple 1:1 (SKU != CODMAT, same qty): {len(seen_simple_new)} unique ---")
for key, m in sorted(seen_simple_new.items()):
print(f" {m['sku']:25s}{m['codmat']:15s} | GoMag: {(m['product_name'] or '')[:30]} → ROA: {(m['denumire'] or '')[:30]}")
print(f"\n--- Repackaging (different qty): {len(seen_repack)} unique ---")
for key, m in sorted(seen_repack.items()):
print(f" {m['sku']:25s}{m['codmat']:15s} x{m['cantitate_roa']} | {(m['product_name'] or '')[:30]}{(m['denumire'] or '')[:30]}")
print(f"\n--- Complex sets (1 SKU → N CODMATs): {len(seen_complex)} unique ---")
for key, m in sorted(seen_complex.items()):
print(f" {m['sku']:25s}{m['codmat']:15s} {m['procent_pret']:6.2f}% | {(m['product_name'] or '')[:30]}{(m['denumire'] or '')[:30]}")
print(f"\n--- Unresolved (unique SKUs): {len(seen_unresolved_skus)} ---")
for sku, u in sorted(seen_unresolved_skus.items()):
print(f" {sku:25s} | {(u['product_name'] or '')[:40]} | example: order={u['order']}")
print(f"\n--- Unmatched orders (no invoice found): {len(unmatched_orders)} ---")
for o in unmatched_orders[:20]:
print(f" {o['order_number']:>12s} | {str(o['order_date'])[:10]} | {(o['customer_name'] or '')[:30]:30s} | {o['order_total'] or 0:10.2f} | {o['status']}")
if len(unmatched_orders) > 20:
print(f" ... and {len(unmatched_orders) - 20} more")
# --- Write output files ---
out_dir = r'C:\gomag-vending\scripts\output'
os.makedirs(out_dir, exist_ok=True)
# Full match report
with open(os.path.join(out_dir, 'match_report.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['order_number', 'customer', 'order_total', 'factura', 'inv_total', 'score',
'go_items', 'roa_items', 'matched', 'unresolved'])
for d in match_details:
w.writerow([d['order_number'], d['customer'], d['order_total'],
d['factura'], d['inv_total'], f"{d['score']:.2f}",
d['go_items'], d['roa_items'],
'; '.join(d['matched_items']),
'; '.join(d['unresolved_items'])])
# New simple mappings (SKU → CODMAT where SKU != CODMAT)
with open(os.path.join(out_dir, 'simple_new_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'id_articol', 'product_name_gomag', 'denumire_roa', 'go_qty', 'roa_qty', 'go_price', 'roa_pret'])
for m in seen_simple_new.values():
w.writerow([m['sku'], m['codmat'], m['id_articol'], m['product_name'], m['denumire'],
m['go_qty'], m['roa_qty'], m['go_price'], m['roa_pret']])
# Repackaging CSV
with open(os.path.join(out_dir, 'repack_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'product_name_gomag', 'denumire_roa'])
for m in seen_repack.values():
w.writerow([m['sku'], m['codmat'], m['cantitate_roa'], 100, m['product_name'], m['denumire']])
# Complex sets CSV
with open(os.path.join(out_dir, 'complex_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'product_name_gomag', 'denumire_roa'])
for m in seen_complex.values():
w.writerow([m['sku'], m['codmat'], round(m['cantitate_roa'], 3), m['procent_pret'],
m['product_name'], m['denumire']])
# Unresolved
with open(os.path.join(out_dir, 'unresolved.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'product_name', 'quantity', 'price', 'order', 'factura', 'roa_remaining_items'])
for u in unresolved:
w.writerow([u['sku'], u['product_name'], u['quantity'], u['price'],
u['order'], u['factura'], u['roa_remaining']])
# Already equal (for reference)
with open(os.path.join(out_dir, 'already_mapped.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'id_articol', 'product_name_gomag', 'denumire_roa'])
for m in seen_simple_equal.values():
w.writerow([m['sku'], m['codmat'], m['id_articol'], m['product_name'], m['denumire']])
# Unmatched orders
with open(os.path.join(out_dir, 'unmatched_orders.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['order_number', 'order_date', 'customer_name', 'status', 'order_total', 'items_count'])
for o in unmatched_orders:
w.writerow([o['order_number'], str(o['order_date'])[:10], o['customer_name'],
o['status'], o['order_total'], len(o['items'])])
print(f"\nOutput written to {out_dir}:")
print(f" match_report.csv - {len(match_details)} matched order-invoice pairs")
print(f" already_mapped.csv - {len(seen_simple_equal)} SKU==CODMAT (already OK)")
print(f" simple_new_mappings.csv - {len(seen_simple_new)} new SKU→CODMAT (need codmat in nom_articole or ARTICOLE_TERTI)")
print(f" repack_mappings.csv - {len(seen_repack)} repackaging")
print(f" complex_mappings.csv - {len(seen_complex)} complex sets")
print(f" unresolved.csv - {len(unresolved)} unresolved item lines")
print(f" unmatched_orders.csv - {len(unmatched_orders)} orders without invoice match")