add: scripts for invoice-order matching and SKU discovery
Analysis scripts to match GoMag orders with Oracle invoices by date/client/total, then compare line items by price to discover SKU → id_articol mappings. Generates SQL for nom_articole codmat updates and CSV for ARTICOLE_TERTI repackaging/set mappings. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
442
scripts/match_invoices.py
Normal file
442
scripts/match_invoices.py
Normal file
@@ -0,0 +1,442 @@
|
||||
"""
|
||||
Match GoMag orders (SQLite) with manual invoices (Oracle vanzari)
|
||||
by date + client name + total value.
|
||||
Then compare line items to discover SKU → CODMAT mappings.
|
||||
"""
|
||||
import oracledb
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
import csv
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
# Fix Windows console encoding
|
||||
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||||
|
||||
os.environ['PATH'] = r'C:\app\Server\product\18.0.0\dbhomeXE\bin' + ';' + os.environ.get('PATH','')
|
||||
oracledb.init_oracle_client()
|
||||
|
||||
# --- Step 1: Get GoMag orders from SQLite ---
|
||||
print("=" * 80)
|
||||
print("STEP 1: Loading GoMag orders from SQLite")
|
||||
print("=" * 80)
|
||||
|
||||
db = sqlite3.connect(r'C:\gomag-vending\api\data\import.db')
|
||||
db.row_factory = sqlite3.Row
|
||||
c = db.cursor()
|
||||
|
||||
# Get orders with status IMPORTED or ALREADY_IMPORTED
|
||||
c.execute("""
|
||||
SELECT order_number, order_date, customer_name, status,
|
||||
id_comanda, order_total, billing_name, shipping_name
|
||||
FROM orders
|
||||
WHERE status IN ('IMPORTED', 'ALREADY_IMPORTED')
|
||||
AND order_date >= date('now', '-10 days')
|
||||
ORDER BY order_date DESC
|
||||
""")
|
||||
orders = [dict(r) for r in c.fetchall()]
|
||||
|
||||
# Get order items
|
||||
for order in orders:
|
||||
c.execute("""
|
||||
SELECT sku, product_name, quantity, price, vat, mapping_status
|
||||
FROM order_items
|
||||
WHERE order_number = ?
|
||||
ORDER BY sku
|
||||
""", (order['order_number'],))
|
||||
order['items'] = [dict(r) for r in c.fetchall()]
|
||||
|
||||
db.close()
|
||||
print(f"Loaded {len(orders)} GoMag orders")
|
||||
for o in orders:
|
||||
print(f" {o['order_number']:>10s} | {str(o['order_date'])[:10]} | {(o['customer_name'] or '')[:35]:35s} | {o['order_total'] or 0:10.2f} | {len(o['items'])} items")
|
||||
|
||||
# --- Step 2: Get Oracle invoices (vanzari) with detail lines ---
|
||||
print()
|
||||
print("=" * 80)
|
||||
print("STEP 2: Loading Oracle invoices (vanzari + detalii)")
|
||||
print("=" * 80)
|
||||
|
||||
conn = oracledb.connect(user='VENDING', password='ROMFASTSOFT', dsn='ROA')
|
||||
cur = conn.cursor()
|
||||
|
||||
# Get vanzari header + partner name
|
||||
cur.execute("""
|
||||
SELECT v.id_vanzare, v.numar_act, v.serie_act,
|
||||
TO_CHAR(v.data_act, 'YYYY-MM-DD') as data_act,
|
||||
v.total_fara_tva, v.total_cu_tva, v.id_part,
|
||||
p.denumire as partener, p.prenume
|
||||
FROM vanzari v
|
||||
LEFT JOIN nom_parteneri p ON v.id_part = p.id_part
|
||||
WHERE v.sters = 0 AND v.data_act >= SYSDATE - 10
|
||||
ORDER BY v.data_act DESC
|
||||
""")
|
||||
|
||||
invoices = []
|
||||
for r in cur:
|
||||
inv = {
|
||||
'id_vanzare': r[0],
|
||||
'numar_act': r[1],
|
||||
'serie_act': r[2],
|
||||
'data_act': r[3],
|
||||
'total_fara_tva': float(r[4] or 0),
|
||||
'total_cu_tva': float(r[5] or 0),
|
||||
'id_part': r[6],
|
||||
'partener': (r[7] or '') + (' ' + r[8] if r[8] else ''),
|
||||
}
|
||||
invoices.append(inv)
|
||||
|
||||
# Get detail lines for each invoice
|
||||
for inv in invoices:
|
||||
cur.execute("""
|
||||
SELECT vd.id_articol, a.codmat, a.denumire,
|
||||
vd.cantitate, vd.pret, vd.pret_cu_tva, vd.proc_tvav
|
||||
FROM vanzari_detalii vd
|
||||
LEFT JOIN nom_articole a ON vd.id_articol = a.id_articol
|
||||
WHERE vd.id_vanzare = :1 AND vd.sters = 0
|
||||
ORDER BY vd.id_articol
|
||||
""", [inv['id_vanzare']])
|
||||
inv['items'] = []
|
||||
for r in cur:
|
||||
inv['items'].append({
|
||||
'id_articol': r[0],
|
||||
'codmat': r[1],
|
||||
'denumire': r[2],
|
||||
'cantitate': float(r[3] or 0),
|
||||
'pret': float(r[4] or 0),
|
||||
'pret_cu_tva': float(r[5] or 0),
|
||||
'tva_pct': float(r[6] or 0),
|
||||
})
|
||||
|
||||
conn.close()
|
||||
print(f"Loaded {len(invoices)} Oracle invoices")
|
||||
for inv in invoices:
|
||||
print(f" {inv['serie_act']}{str(inv['numar_act']):>6s} | {inv['data_act']} | {inv['partener'][:35]:35s} | {inv['total_cu_tva']:10.2f} | {len(inv['items'])} items")
|
||||
|
||||
# --- Step 3: Fuzzy matching ---
|
||||
print()
|
||||
print("=" * 80)
|
||||
print("STEP 3: Matching orders → invoices (date + name + total)")
|
||||
print("=" * 80)
|
||||
|
||||
def normalize_name(name):
|
||||
if not name:
|
||||
return ''
|
||||
return name.strip().upper().replace('S.R.L.', 'SRL').replace('S.R.L', 'SRL')
|
||||
|
||||
def name_similarity(n1, n2):
|
||||
return SequenceMatcher(None, normalize_name(n1), normalize_name(n2)).ratio()
|
||||
|
||||
matches = []
|
||||
unmatched_orders = []
|
||||
used_invoices = set()
|
||||
|
||||
for order in orders:
|
||||
best_match = None
|
||||
best_score = 0
|
||||
|
||||
order_date = str(order['order_date'])[:10]
|
||||
order_total = order['order_total'] or 0
|
||||
order_name = order['customer_name'] or ''
|
||||
|
||||
for inv in invoices:
|
||||
if inv['id_vanzare'] in used_invoices:
|
||||
continue
|
||||
|
||||
# Date match (must be same day or +/- 1 day)
|
||||
inv_date = inv['data_act']
|
||||
date_diff = abs(
|
||||
(int(order_date.replace('-','')) - int(inv_date.replace('-','')))
|
||||
)
|
||||
if date_diff > 1:
|
||||
continue
|
||||
|
||||
# Total match (within 5% or 5 lei)
|
||||
total_diff = abs(order_total - inv['total_cu_tva'])
|
||||
total_pct = total_diff / max(order_total, 0.01) * 100
|
||||
if total_pct > 5 and total_diff > 5:
|
||||
continue
|
||||
|
||||
# Name similarity
|
||||
sim = name_similarity(order_name, inv['partener'])
|
||||
|
||||
# Score: name similarity (0-1) + total closeness (0-1) + date match (0-1)
|
||||
total_score = sim * 0.5 + (1 - min(total_pct/100, 1)) * 0.4 + (1 if date_diff == 0 else 0.5) * 0.1
|
||||
|
||||
if total_score > best_score:
|
||||
best_score = total_score
|
||||
best_match = inv
|
||||
|
||||
if best_match and best_score > 0.3:
|
||||
matches.append({
|
||||
'order': order,
|
||||
'invoice': best_match,
|
||||
'score': best_score,
|
||||
})
|
||||
used_invoices.add(best_match['id_vanzare'])
|
||||
else:
|
||||
unmatched_orders.append(order)
|
||||
|
||||
print(f"\nMatched: {len(matches)} | Unmatched orders: {len(unmatched_orders)}")
|
||||
print()
|
||||
|
||||
for m in matches:
|
||||
o = m['order']
|
||||
inv = m['invoice']
|
||||
print(f" ORDER {o['order_number']} ({(o['customer_name'] or '')[:25]}, {o['order_total']:.2f})")
|
||||
print(f" ↔ FACT {inv['serie_act']}{inv['numar_act']} ({inv['partener'][:25]}, {inv['total_cu_tva']:.2f}) [score={m['score']:.2f}]")
|
||||
print()
|
||||
|
||||
if unmatched_orders:
|
||||
print("Unmatched orders:")
|
||||
for o in unmatched_orders:
|
||||
print(f" {o['order_number']} | {(o['customer_name'] or '')[:35]} | {o['order_total'] or 0:.2f}")
|
||||
|
||||
# --- Step 4: Compare line items for matched pairs ---
|
||||
print()
|
||||
print("=" * 80)
|
||||
print("STEP 4: Line item comparison for matched orders")
|
||||
print("=" * 80)
|
||||
|
||||
simple_mappings = [] # SKU → CODMAT, same qty/price → update nom_articole
|
||||
repack_mappings = [] # SKU → CODMAT, different qty → ARTICOLE_TERTI
|
||||
complex_mappings = [] # 1 SKU → N CODMATs → ARTICOLE_TERTI with procent_pret
|
||||
unresolved = [] # Cannot determine mapping
|
||||
|
||||
for m in matches:
|
||||
o = m['order']
|
||||
inv = m['invoice']
|
||||
go_items = o['items']
|
||||
roa_items = inv['items']
|
||||
|
||||
print(f"\n--- ORDER {o['order_number']} ↔ FACT {inv['serie_act']}{inv['numar_act']} ---")
|
||||
print(f" GoMag: {len(go_items)} items | ROA: {len(roa_items)} items")
|
||||
|
||||
# Show items side by side
|
||||
print(f" GoMag items:")
|
||||
for gi in go_items:
|
||||
print(f" SKU={gi['sku']:20s} qty={gi['quantity']:6.1f} price={gi['price']:10.2f} [{gi['product_name'][:40]}]")
|
||||
print(f" ROA items:")
|
||||
for ri in roa_items:
|
||||
print(f" COD={str(ri['codmat'] or ''):20s} qty={ri['cantitate']:6.1f} pret={ri['pret']:10.4f} [{(ri['denumire'] or '')[:40]}]")
|
||||
|
||||
# Try matching by price (unit price with TVA)
|
||||
# GoMag price is usually with TVA, ROA pret can be fara TVA
|
||||
# Let's try both
|
||||
go_remaining = list(range(len(go_items)))
|
||||
roa_remaining = list(range(len(roa_items)))
|
||||
item_matches = []
|
||||
|
||||
# First pass: exact 1:1 by total value (qty * price)
|
||||
for gi_idx in list(go_remaining):
|
||||
gi = go_items[gi_idx]
|
||||
go_total = gi['quantity'] * gi['price']
|
||||
go_total_fara = go_total / (1 + gi['vat']/100) if gi['vat'] else go_total
|
||||
|
||||
for ri_idx in list(roa_remaining):
|
||||
ri = roa_items[ri_idx]
|
||||
roa_total = ri['cantitate'] * ri['pret']
|
||||
roa_total_cu = ri['cantitate'] * ri['pret_cu_tva']
|
||||
|
||||
# Match by total (fara TVA or cu TVA)
|
||||
if (abs(go_total_fara - roa_total) < 0.5 or
|
||||
abs(go_total - roa_total_cu) < 0.5 or
|
||||
abs(go_total - roa_total) < 0.5):
|
||||
item_matches.append((gi_idx, [ri_idx]))
|
||||
go_remaining.remove(gi_idx)
|
||||
roa_remaining.remove(ri_idx)
|
||||
break
|
||||
|
||||
# Second pass: 1:N matching (one GoMag item → multiple ROA items)
|
||||
for gi_idx in list(go_remaining):
|
||||
gi = go_items[gi_idx]
|
||||
go_total = gi['quantity'] * gi['price']
|
||||
go_total_fara = go_total / (1 + gi['vat']/100) if gi['vat'] else go_total
|
||||
|
||||
# Try combinations of remaining ROA items
|
||||
if len(roa_remaining) >= 2:
|
||||
# Try pairs
|
||||
for i, ri_idx1 in enumerate(roa_remaining):
|
||||
for ri_idx2 in roa_remaining[i+1:]:
|
||||
ri1 = roa_items[ri_idx1]
|
||||
ri2 = roa_items[ri_idx2]
|
||||
combined_total = ri1['cantitate'] * ri1['pret'] + ri2['cantitate'] * ri2['pret']
|
||||
combined_total_cu = ri1['cantitate'] * ri1['pret_cu_tva'] + ri2['cantitate'] * ri2['pret_cu_tva']
|
||||
if (abs(go_total_fara - combined_total) < 1.0 or
|
||||
abs(go_total - combined_total_cu) < 1.0):
|
||||
item_matches.append((gi_idx, [ri_idx1, ri_idx2]))
|
||||
go_remaining.remove(gi_idx)
|
||||
roa_remaining.remove(ri_idx1)
|
||||
roa_remaining.remove(ri_idx2)
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
|
||||
# Report matches
|
||||
for gi_idx, ri_indices in item_matches:
|
||||
gi = go_items[gi_idx]
|
||||
ris = [roa_items[i] for i in ri_indices]
|
||||
|
||||
if len(ris) == 1:
|
||||
ri = ris[0]
|
||||
# Same quantity?
|
||||
if abs(gi['quantity'] - ri['cantitate']) < 0.01:
|
||||
# Simple 1:1
|
||||
entry = {
|
||||
'sku': gi['sku'],
|
||||
'codmat': ri['codmat'],
|
||||
'id_articol': ri['id_articol'],
|
||||
'product_name': gi['product_name'],
|
||||
'denumire': ri['denumire'],
|
||||
'go_qty': gi['quantity'],
|
||||
'roa_qty': ri['cantitate'],
|
||||
'go_price': gi['price'],
|
||||
'roa_pret': ri['pret'],
|
||||
'order': o['order_number'],
|
||||
'factura': f"{inv['serie_act']}{inv['numar_act']}",
|
||||
}
|
||||
simple_mappings.append(entry)
|
||||
print(f" ✓ SIMPLE: {gi['sku']} → {ri['codmat']} (qty {gi['quantity']}={ri['cantitate']})")
|
||||
else:
|
||||
# Repackaging
|
||||
cantitate_roa = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1
|
||||
entry = {
|
||||
'sku': gi['sku'],
|
||||
'codmat': ri['codmat'],
|
||||
'id_articol': ri['id_articol'],
|
||||
'cantitate_roa': round(cantitate_roa, 3),
|
||||
'product_name': gi['product_name'],
|
||||
'denumire': ri['denumire'],
|
||||
'go_qty': gi['quantity'],
|
||||
'roa_qty': ri['cantitate'],
|
||||
'order': o['order_number'],
|
||||
'factura': f"{inv['serie_act']}{inv['numar_act']}",
|
||||
}
|
||||
repack_mappings.append(entry)
|
||||
print(f" ✓ REPACK: {gi['sku']} → {ri['codmat']} (qty {gi['quantity']}→{ri['cantitate']}, ratio={cantitate_roa:.3f})")
|
||||
else:
|
||||
# Complex set
|
||||
go_total = gi['quantity'] * gi['price']
|
||||
go_total_fara = go_total / (1 + gi['vat']/100) if gi['vat'] else go_total
|
||||
for ri in ris:
|
||||
ri_total = ri['cantitate'] * ri['pret']
|
||||
pct = round(ri_total / go_total_fara * 100, 2) if go_total_fara else 0
|
||||
entry = {
|
||||
'sku': gi['sku'],
|
||||
'codmat': ri['codmat'],
|
||||
'id_articol': ri['id_articol'],
|
||||
'cantitate_roa': ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1,
|
||||
'procent_pret': pct,
|
||||
'product_name': gi['product_name'],
|
||||
'denumire': ri['denumire'],
|
||||
'order': o['order_number'],
|
||||
'factura': f"{inv['serie_act']}{inv['numar_act']}",
|
||||
}
|
||||
complex_mappings.append(entry)
|
||||
print(f" ✓ SET: {gi['sku']} → {ri['codmat']} ({pct}%)")
|
||||
|
||||
# Unresolved
|
||||
for gi_idx in go_remaining:
|
||||
gi = go_items[gi_idx]
|
||||
unresolved.append({
|
||||
'sku': gi['sku'],
|
||||
'product_name': gi['product_name'],
|
||||
'quantity': gi['quantity'],
|
||||
'price': gi['price'],
|
||||
'order': o['order_number'],
|
||||
'factura': f"{inv['serie_act']}{inv['numar_act']}",
|
||||
'roa_remaining': [roa_items[i] for i in roa_remaining],
|
||||
})
|
||||
print(f" ? UNRESOLVED: {gi['sku']} ({gi['product_name'][:40]})")
|
||||
|
||||
# --- Step 5: Summary and output ---
|
||||
print()
|
||||
print("=" * 80)
|
||||
print("STEP 5: SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
# Deduplicate mappings
|
||||
seen_simple = {}
|
||||
for m in simple_mappings:
|
||||
key = (m['sku'], m['codmat'])
|
||||
if key not in seen_simple:
|
||||
seen_simple[key] = m
|
||||
|
||||
seen_repack = {}
|
||||
for m in repack_mappings:
|
||||
key = (m['sku'], m['codmat'])
|
||||
if key not in seen_repack:
|
||||
seen_repack[key] = m
|
||||
|
||||
seen_complex = {}
|
||||
for m in complex_mappings:
|
||||
key = (m['sku'], m['codmat'])
|
||||
if key not in seen_complex:
|
||||
seen_complex[key] = m
|
||||
|
||||
print(f"\nSimple 1:1 (update nom_articole.codmat = SKU): {len(seen_simple)} unique")
|
||||
for key, m in seen_simple.items():
|
||||
print(f" {m['sku']:25s} → {m['codmat']:15s} | {m['product_name'][:35]} ↔ {(m['denumire'] or '')[:35]}")
|
||||
|
||||
print(f"\nRepackaging (ARTICOLE_TERTI with cantitate_roa): {len(seen_repack)} unique")
|
||||
for key, m in seen_repack.items():
|
||||
print(f" {m['sku']:25s} → {m['codmat']:15s} x{m['cantitate_roa']} | {m['product_name'][:30]} ↔ {(m['denumire'] or '')[:30]}")
|
||||
|
||||
print(f"\nComplex sets (ARTICOLE_TERTI with procent_pret): {len(seen_complex)} unique")
|
||||
for key, m in seen_complex.items():
|
||||
print(f" {m['sku']:25s} → {m['codmat']:15s} {m['procent_pret']}% | {m['product_name'][:30]} ↔ {(m['denumire'] or '')[:30]}")
|
||||
|
||||
print(f"\nUnresolved: {len(unresolved)}")
|
||||
for u in unresolved:
|
||||
print(f" {u['sku']:25s} | {u['product_name'][:40]} | order={u['order']}")
|
||||
|
||||
# --- Write CSVs ---
|
||||
out_dir = r'C:\gomag-vending\scripts\output'
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
# Simple mappings CSV (for verification before SQL update)
|
||||
with open(os.path.join(out_dir, 'simple_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
|
||||
w = csv.writer(f)
|
||||
w.writerow(['sku', 'codmat', 'id_articol', 'product_name_gomag', 'denumire_roa', 'go_qty', 'roa_qty', 'go_price', 'roa_pret', 'order', 'factura'])
|
||||
for m in seen_simple.values():
|
||||
w.writerow([m['sku'], m['codmat'], m['id_articol'], m['product_name'], m['denumire'], m['go_qty'], m['roa_qty'], m['go_price'], m['roa_pret'], m['order'], m['factura']])
|
||||
|
||||
# Repackaging CSV (for ARTICOLE_TERTI import)
|
||||
with open(os.path.join(out_dir, 'repack_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
|
||||
w = csv.writer(f)
|
||||
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'product_name_gomag', 'denumire_roa'])
|
||||
for m in seen_repack.values():
|
||||
w.writerow([m['sku'], m['codmat'], m['cantitate_roa'], 100, m['product_name'], m['denumire']])
|
||||
|
||||
# Complex sets CSV (for ARTICOLE_TERTI import)
|
||||
with open(os.path.join(out_dir, 'complex_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
|
||||
w = csv.writer(f)
|
||||
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'product_name_gomag', 'denumire_roa'])
|
||||
for m in seen_complex.values():
|
||||
w.writerow([m['sku'], m['codmat'], round(m['cantitate_roa'], 3), m['procent_pret'], m['product_name'], m['denumire']])
|
||||
|
||||
# Unresolved CSV
|
||||
with open(os.path.join(out_dir, 'unresolved.csv'), 'w', newline='', encoding='utf-8') as f:
|
||||
w = csv.writer(f)
|
||||
w.writerow(['sku', 'product_name', 'quantity', 'price', 'order', 'factura', 'roa_remaining_items'])
|
||||
for u in unresolved:
|
||||
roa_str = '; '.join([f"{r['codmat']}({r['cantitate']}x{r['pret']:.2f})" for r in u['roa_remaining']])
|
||||
w.writerow([u['sku'], u['product_name'], u['quantity'], u['price'], u['order'], u['factura'], roa_str])
|
||||
|
||||
# SQL script for simple mappings (update nom_articole)
|
||||
with open(os.path.join(out_dir, 'update_codmat.sql'), 'w', encoding='utf-8') as f:
|
||||
f.write("-- Simple SKU → CODMAT: set SKU as CODMAT in nom_articole\n")
|
||||
f.write("-- VERIFY BEFORE RUNNING!\n\n")
|
||||
for m in seen_simple.values():
|
||||
codmat = m['codmat']
|
||||
sku = m['sku']
|
||||
f.write(f"-- {m['product_name'][:50]} → {m['denumire'][:50]}\n")
|
||||
f.write(f"UPDATE nom_articole SET codmat = '{sku}' WHERE codmat = '{codmat}' AND sters = 0;\n\n")
|
||||
|
||||
print(f"\nOutput written to {out_dir}:")
|
||||
print(f" simple_mappings.csv - {len(seen_simple)} rows (verify, then run update_codmat.sql)")
|
||||
print(f" repack_mappings.csv - {len(seen_repack)} rows (import via /api/mappings/import-csv)")
|
||||
print(f" complex_mappings.csv - {len(seen_complex)} rows (import via /api/mappings/import-csv)")
|
||||
print(f" unresolved.csv - {len(unresolved)} rows (manual review needed)")
|
||||
print(f" update_codmat.sql - SQL for simple mappings")
|
||||
Reference in New Issue
Block a user