add: scripts for invoice-order matching and SKU discovery

Analysis scripts to match GoMag orders with Oracle invoices by
date/client/total, then compare line items by price to discover
SKU → id_articol mappings. Generates SQL for nom_articole codmat
updates and CSV for ARTICOLE_TERTI repackaging/set mappings.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-03-17 12:01:51 +00:00
parent dafc2df0d4
commit 3d73d9e422
14 changed files with 2451 additions and 0 deletions

442
scripts/match_invoices.py Normal file
View File

@@ -0,0 +1,442 @@
"""
Match GoMag orders (SQLite) with manual invoices (Oracle vanzari)
by date + client name + total value.
Then compare line items to discover SKU → CODMAT mappings.
"""
import oracledb
import os
import sys
import sqlite3
import csv
from difflib import SequenceMatcher
# Fix Windows console encoding
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
os.environ['PATH'] = r'C:\app\Server\product\18.0.0\dbhomeXE\bin' + ';' + os.environ.get('PATH','')
oracledb.init_oracle_client()
# --- Step 1: Get GoMag orders from SQLite ---
print("=" * 80)
print("STEP 1: Loading GoMag orders from SQLite")
print("=" * 80)
db = sqlite3.connect(r'C:\gomag-vending\api\data\import.db')
db.row_factory = sqlite3.Row
c = db.cursor()
# Get orders with status IMPORTED or ALREADY_IMPORTED
c.execute("""
SELECT order_number, order_date, customer_name, status,
id_comanda, order_total, billing_name, shipping_name
FROM orders
WHERE status IN ('IMPORTED', 'ALREADY_IMPORTED')
AND order_date >= date('now', '-10 days')
ORDER BY order_date DESC
""")
orders = [dict(r) for r in c.fetchall()]
# Get order items
for order in orders:
c.execute("""
SELECT sku, product_name, quantity, price, vat, mapping_status
FROM order_items
WHERE order_number = ?
ORDER BY sku
""", (order['order_number'],))
order['items'] = [dict(r) for r in c.fetchall()]
db.close()
print(f"Loaded {len(orders)} GoMag orders")
for o in orders:
print(f" {o['order_number']:>10s} | {str(o['order_date'])[:10]} | {(o['customer_name'] or '')[:35]:35s} | {o['order_total'] or 0:10.2f} | {len(o['items'])} items")
# --- Step 2: Get Oracle invoices (vanzari) with detail lines ---
print()
print("=" * 80)
print("STEP 2: Loading Oracle invoices (vanzari + detalii)")
print("=" * 80)
conn = oracledb.connect(user='VENDING', password='ROMFASTSOFT', dsn='ROA')
cur = conn.cursor()
# Get vanzari header + partner name
cur.execute("""
SELECT v.id_vanzare, v.numar_act, v.serie_act,
TO_CHAR(v.data_act, 'YYYY-MM-DD') as data_act,
v.total_fara_tva, v.total_cu_tva, v.id_part,
p.denumire as partener, p.prenume
FROM vanzari v
LEFT JOIN nom_parteneri p ON v.id_part = p.id_part
WHERE v.sters = 0 AND v.data_act >= SYSDATE - 10
ORDER BY v.data_act DESC
""")
invoices = []
for r in cur:
inv = {
'id_vanzare': r[0],
'numar_act': r[1],
'serie_act': r[2],
'data_act': r[3],
'total_fara_tva': float(r[4] or 0),
'total_cu_tva': float(r[5] or 0),
'id_part': r[6],
'partener': (r[7] or '') + (' ' + r[8] if r[8] else ''),
}
invoices.append(inv)
# Get detail lines for each invoice
for inv in invoices:
cur.execute("""
SELECT vd.id_articol, a.codmat, a.denumire,
vd.cantitate, vd.pret, vd.pret_cu_tva, vd.proc_tvav
FROM vanzari_detalii vd
LEFT JOIN nom_articole a ON vd.id_articol = a.id_articol
WHERE vd.id_vanzare = :1 AND vd.sters = 0
ORDER BY vd.id_articol
""", [inv['id_vanzare']])
inv['items'] = []
for r in cur:
inv['items'].append({
'id_articol': r[0],
'codmat': r[1],
'denumire': r[2],
'cantitate': float(r[3] or 0),
'pret': float(r[4] or 0),
'pret_cu_tva': float(r[5] or 0),
'tva_pct': float(r[6] or 0),
})
conn.close()
print(f"Loaded {len(invoices)} Oracle invoices")
for inv in invoices:
print(f" {inv['serie_act']}{str(inv['numar_act']):>6s} | {inv['data_act']} | {inv['partener'][:35]:35s} | {inv['total_cu_tva']:10.2f} | {len(inv['items'])} items")
# --- Step 3: Fuzzy matching ---
print()
print("=" * 80)
print("STEP 3: Matching orders → invoices (date + name + total)")
print("=" * 80)
def normalize_name(name):
if not name:
return ''
return name.strip().upper().replace('S.R.L.', 'SRL').replace('S.R.L', 'SRL')
def name_similarity(n1, n2):
return SequenceMatcher(None, normalize_name(n1), normalize_name(n2)).ratio()
matches = []
unmatched_orders = []
used_invoices = set()
for order in orders:
best_match = None
best_score = 0
order_date = str(order['order_date'])[:10]
order_total = order['order_total'] or 0
order_name = order['customer_name'] or ''
for inv in invoices:
if inv['id_vanzare'] in used_invoices:
continue
# Date match (must be same day or +/- 1 day)
inv_date = inv['data_act']
date_diff = abs(
(int(order_date.replace('-','')) - int(inv_date.replace('-','')))
)
if date_diff > 1:
continue
# Total match (within 5% or 5 lei)
total_diff = abs(order_total - inv['total_cu_tva'])
total_pct = total_diff / max(order_total, 0.01) * 100
if total_pct > 5 and total_diff > 5:
continue
# Name similarity
sim = name_similarity(order_name, inv['partener'])
# Score: name similarity (0-1) + total closeness (0-1) + date match (0-1)
total_score = sim * 0.5 + (1 - min(total_pct/100, 1)) * 0.4 + (1 if date_diff == 0 else 0.5) * 0.1
if total_score > best_score:
best_score = total_score
best_match = inv
if best_match and best_score > 0.3:
matches.append({
'order': order,
'invoice': best_match,
'score': best_score,
})
used_invoices.add(best_match['id_vanzare'])
else:
unmatched_orders.append(order)
print(f"\nMatched: {len(matches)} | Unmatched orders: {len(unmatched_orders)}")
print()
for m in matches:
o = m['order']
inv = m['invoice']
print(f" ORDER {o['order_number']} ({(o['customer_name'] or '')[:25]}, {o['order_total']:.2f})")
print(f" ↔ FACT {inv['serie_act']}{inv['numar_act']} ({inv['partener'][:25]}, {inv['total_cu_tva']:.2f}) [score={m['score']:.2f}]")
print()
if unmatched_orders:
print("Unmatched orders:")
for o in unmatched_orders:
print(f" {o['order_number']} | {(o['customer_name'] or '')[:35]} | {o['order_total'] or 0:.2f}")
# --- Step 4: Compare line items for matched pairs ---
print()
print("=" * 80)
print("STEP 4: Line item comparison for matched orders")
print("=" * 80)
simple_mappings = [] # SKU → CODMAT, same qty/price → update nom_articole
repack_mappings = [] # SKU → CODMAT, different qty → ARTICOLE_TERTI
complex_mappings = [] # 1 SKU → N CODMATs → ARTICOLE_TERTI with procent_pret
unresolved = [] # Cannot determine mapping
for m in matches:
o = m['order']
inv = m['invoice']
go_items = o['items']
roa_items = inv['items']
print(f"\n--- ORDER {o['order_number']} ↔ FACT {inv['serie_act']}{inv['numar_act']} ---")
print(f" GoMag: {len(go_items)} items | ROA: {len(roa_items)} items")
# Show items side by side
print(f" GoMag items:")
for gi in go_items:
print(f" SKU={gi['sku']:20s} qty={gi['quantity']:6.1f} price={gi['price']:10.2f} [{gi['product_name'][:40]}]")
print(f" ROA items:")
for ri in roa_items:
print(f" COD={str(ri['codmat'] or ''):20s} qty={ri['cantitate']:6.1f} pret={ri['pret']:10.4f} [{(ri['denumire'] or '')[:40]}]")
# Try matching by price (unit price with TVA)
# GoMag price is usually with TVA, ROA pret can be fara TVA
# Let's try both
go_remaining = list(range(len(go_items)))
roa_remaining = list(range(len(roa_items)))
item_matches = []
# First pass: exact 1:1 by total value (qty * price)
for gi_idx in list(go_remaining):
gi = go_items[gi_idx]
go_total = gi['quantity'] * gi['price']
go_total_fara = go_total / (1 + gi['vat']/100) if gi['vat'] else go_total
for ri_idx in list(roa_remaining):
ri = roa_items[ri_idx]
roa_total = ri['cantitate'] * ri['pret']
roa_total_cu = ri['cantitate'] * ri['pret_cu_tva']
# Match by total (fara TVA or cu TVA)
if (abs(go_total_fara - roa_total) < 0.5 or
abs(go_total - roa_total_cu) < 0.5 or
abs(go_total - roa_total) < 0.5):
item_matches.append((gi_idx, [ri_idx]))
go_remaining.remove(gi_idx)
roa_remaining.remove(ri_idx)
break
# Second pass: 1:N matching (one GoMag item → multiple ROA items)
for gi_idx in list(go_remaining):
gi = go_items[gi_idx]
go_total = gi['quantity'] * gi['price']
go_total_fara = go_total / (1 + gi['vat']/100) if gi['vat'] else go_total
# Try combinations of remaining ROA items
if len(roa_remaining) >= 2:
# Try pairs
for i, ri_idx1 in enumerate(roa_remaining):
for ri_idx2 in roa_remaining[i+1:]:
ri1 = roa_items[ri_idx1]
ri2 = roa_items[ri_idx2]
combined_total = ri1['cantitate'] * ri1['pret'] + ri2['cantitate'] * ri2['pret']
combined_total_cu = ri1['cantitate'] * ri1['pret_cu_tva'] + ri2['cantitate'] * ri2['pret_cu_tva']
if (abs(go_total_fara - combined_total) < 1.0 or
abs(go_total - combined_total_cu) < 1.0):
item_matches.append((gi_idx, [ri_idx1, ri_idx2]))
go_remaining.remove(gi_idx)
roa_remaining.remove(ri_idx1)
roa_remaining.remove(ri_idx2)
break
else:
continue
break
# Report matches
for gi_idx, ri_indices in item_matches:
gi = go_items[gi_idx]
ris = [roa_items[i] for i in ri_indices]
if len(ris) == 1:
ri = ris[0]
# Same quantity?
if abs(gi['quantity'] - ri['cantitate']) < 0.01:
# Simple 1:1
entry = {
'sku': gi['sku'],
'codmat': ri['codmat'],
'id_articol': ri['id_articol'],
'product_name': gi['product_name'],
'denumire': ri['denumire'],
'go_qty': gi['quantity'],
'roa_qty': ri['cantitate'],
'go_price': gi['price'],
'roa_pret': ri['pret'],
'order': o['order_number'],
'factura': f"{inv['serie_act']}{inv['numar_act']}",
}
simple_mappings.append(entry)
print(f" ✓ SIMPLE: {gi['sku']}{ri['codmat']} (qty {gi['quantity']}={ri['cantitate']})")
else:
# Repackaging
cantitate_roa = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1
entry = {
'sku': gi['sku'],
'codmat': ri['codmat'],
'id_articol': ri['id_articol'],
'cantitate_roa': round(cantitate_roa, 3),
'product_name': gi['product_name'],
'denumire': ri['denumire'],
'go_qty': gi['quantity'],
'roa_qty': ri['cantitate'],
'order': o['order_number'],
'factura': f"{inv['serie_act']}{inv['numar_act']}",
}
repack_mappings.append(entry)
print(f" ✓ REPACK: {gi['sku']}{ri['codmat']} (qty {gi['quantity']}{ri['cantitate']}, ratio={cantitate_roa:.3f})")
else:
# Complex set
go_total = gi['quantity'] * gi['price']
go_total_fara = go_total / (1 + gi['vat']/100) if gi['vat'] else go_total
for ri in ris:
ri_total = ri['cantitate'] * ri['pret']
pct = round(ri_total / go_total_fara * 100, 2) if go_total_fara else 0
entry = {
'sku': gi['sku'],
'codmat': ri['codmat'],
'id_articol': ri['id_articol'],
'cantitate_roa': ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1,
'procent_pret': pct,
'product_name': gi['product_name'],
'denumire': ri['denumire'],
'order': o['order_number'],
'factura': f"{inv['serie_act']}{inv['numar_act']}",
}
complex_mappings.append(entry)
print(f" ✓ SET: {gi['sku']}{ri['codmat']} ({pct}%)")
# Unresolved
for gi_idx in go_remaining:
gi = go_items[gi_idx]
unresolved.append({
'sku': gi['sku'],
'product_name': gi['product_name'],
'quantity': gi['quantity'],
'price': gi['price'],
'order': o['order_number'],
'factura': f"{inv['serie_act']}{inv['numar_act']}",
'roa_remaining': [roa_items[i] for i in roa_remaining],
})
print(f" ? UNRESOLVED: {gi['sku']} ({gi['product_name'][:40]})")
# --- Step 5: Summary and output ---
print()
print("=" * 80)
print("STEP 5: SUMMARY")
print("=" * 80)
# Deduplicate mappings
seen_simple = {}
for m in simple_mappings:
key = (m['sku'], m['codmat'])
if key not in seen_simple:
seen_simple[key] = m
seen_repack = {}
for m in repack_mappings:
key = (m['sku'], m['codmat'])
if key not in seen_repack:
seen_repack[key] = m
seen_complex = {}
for m in complex_mappings:
key = (m['sku'], m['codmat'])
if key not in seen_complex:
seen_complex[key] = m
print(f"\nSimple 1:1 (update nom_articole.codmat = SKU): {len(seen_simple)} unique")
for key, m in seen_simple.items():
print(f" {m['sku']:25s}{m['codmat']:15s} | {m['product_name'][:35]}{(m['denumire'] or '')[:35]}")
print(f"\nRepackaging (ARTICOLE_TERTI with cantitate_roa): {len(seen_repack)} unique")
for key, m in seen_repack.items():
print(f" {m['sku']:25s}{m['codmat']:15s} x{m['cantitate_roa']} | {m['product_name'][:30]}{(m['denumire'] or '')[:30]}")
print(f"\nComplex sets (ARTICOLE_TERTI with procent_pret): {len(seen_complex)} unique")
for key, m in seen_complex.items():
print(f" {m['sku']:25s}{m['codmat']:15s} {m['procent_pret']}% | {m['product_name'][:30]}{(m['denumire'] or '')[:30]}")
print(f"\nUnresolved: {len(unresolved)}")
for u in unresolved:
print(f" {u['sku']:25s} | {u['product_name'][:40]} | order={u['order']}")
# --- Write CSVs ---
out_dir = r'C:\gomag-vending\scripts\output'
os.makedirs(out_dir, exist_ok=True)
# Simple mappings CSV (for verification before SQL update)
with open(os.path.join(out_dir, 'simple_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'id_articol', 'product_name_gomag', 'denumire_roa', 'go_qty', 'roa_qty', 'go_price', 'roa_pret', 'order', 'factura'])
for m in seen_simple.values():
w.writerow([m['sku'], m['codmat'], m['id_articol'], m['product_name'], m['denumire'], m['go_qty'], m['roa_qty'], m['go_price'], m['roa_pret'], m['order'], m['factura']])
# Repackaging CSV (for ARTICOLE_TERTI import)
with open(os.path.join(out_dir, 'repack_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'product_name_gomag', 'denumire_roa'])
for m in seen_repack.values():
w.writerow([m['sku'], m['codmat'], m['cantitate_roa'], 100, m['product_name'], m['denumire']])
# Complex sets CSV (for ARTICOLE_TERTI import)
with open(os.path.join(out_dir, 'complex_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'product_name_gomag', 'denumire_roa'])
for m in seen_complex.values():
w.writerow([m['sku'], m['codmat'], round(m['cantitate_roa'], 3), m['procent_pret'], m['product_name'], m['denumire']])
# Unresolved CSV
with open(os.path.join(out_dir, 'unresolved.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'product_name', 'quantity', 'price', 'order', 'factura', 'roa_remaining_items'])
for u in unresolved:
roa_str = '; '.join([f"{r['codmat']}({r['cantitate']}x{r['pret']:.2f})" for r in u['roa_remaining']])
w.writerow([u['sku'], u['product_name'], u['quantity'], u['price'], u['order'], u['factura'], roa_str])
# SQL script for simple mappings (update nom_articole)
with open(os.path.join(out_dir, 'update_codmat.sql'), 'w', encoding='utf-8') as f:
f.write("-- Simple SKU → CODMAT: set SKU as CODMAT in nom_articole\n")
f.write("-- VERIFY BEFORE RUNNING!\n\n")
for m in seen_simple.values():
codmat = m['codmat']
sku = m['sku']
f.write(f"-- {m['product_name'][:50]}{m['denumire'][:50]}\n")
f.write(f"UPDATE nom_articole SET codmat = '{sku}' WHERE codmat = '{codmat}' AND sters = 0;\n\n")
print(f"\nOutput written to {out_dir}:")
print(f" simple_mappings.csv - {len(seen_simple)} rows (verify, then run update_codmat.sql)")
print(f" repack_mappings.csv - {len(seen_repack)} rows (import via /api/mappings/import-csv)")
print(f" complex_mappings.csv - {len(seen_complex)} rows (import via /api/mappings/import-csv)")
print(f" unresolved.csv - {len(unresolved)} rows (manual review needed)")
print(f" update_codmat.sql - SQL for simple mappings")