add: scripts for invoice-order matching and SKU discovery
Analysis scripts to match GoMag orders with Oracle invoices by date/client/total, then compare line items by price to discover SKU → id_articol mappings. Generates SQL for nom_articole codmat updates and CSV for ARTICOLE_TERTI repackaging/set mappings. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
325
scripts/compare_detail.py
Normal file
325
scripts/compare_detail.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""
|
||||
Generate detailed comparison CSV: GoMag orders vs Oracle invoices
|
||||
Side-by-side view for manual analysis.
|
||||
"""
|
||||
import oracledb
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
import csv
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||||
os.environ['PATH'] = r'C:\app\Server\product\18.0.0\dbhomeXE\bin' + ';' + os.environ.get('PATH','')
|
||||
oracledb.init_oracle_client()
|
||||
|
||||
# --- Load GoMag orders ---
|
||||
db = sqlite3.connect(r'C:\gomag-vending\api\data\import.db')
|
||||
db.row_factory = sqlite3.Row
|
||||
c = db.cursor()
|
||||
|
||||
c.execute("""
|
||||
SELECT order_number, order_date, customer_name, status,
|
||||
id_comanda, order_total, billing_name, shipping_name
|
||||
FROM orders ORDER BY order_date DESC
|
||||
""")
|
||||
orders = [dict(r) for r in c.fetchall()]
|
||||
|
||||
for order in orders:
|
||||
c.execute("""
|
||||
SELECT sku, product_name, quantity, price, vat, mapping_status
|
||||
FROM order_items WHERE order_number = ? ORDER BY sku
|
||||
""", (order['order_number'],))
|
||||
order['items'] = [dict(r) for r in c.fetchall()]
|
||||
|
||||
db.close()
|
||||
print(f"Loaded {len(orders)} GoMag orders")
|
||||
|
||||
# --- Load Oracle invoices ---
|
||||
conn = oracledb.connect(user='VENDING', password='ROMFASTSOFT', dsn='ROA')
|
||||
cur = conn.cursor()
|
||||
|
||||
min_date = min(str(o['order_date'])[:10] for o in orders)
|
||||
max_date = max(str(o['order_date'])[:10] for o in orders)
|
||||
|
||||
cur.execute("""
|
||||
SELECT v.id_vanzare, v.numar_act, v.serie_act,
|
||||
TO_CHAR(v.data_act, 'YYYY-MM-DD') as data_act,
|
||||
v.total_fara_tva, v.total_cu_tva, v.id_part,
|
||||
p.denumire as partener, p.prenume
|
||||
FROM vanzari v
|
||||
LEFT JOIN nom_parteneri p ON v.id_part = p.id_part
|
||||
WHERE v.sters = 0
|
||||
AND v.data_act >= TO_DATE(:1, 'YYYY-MM-DD') - 3
|
||||
AND v.data_act <= TO_DATE(:2, 'YYYY-MM-DD') + 3
|
||||
AND v.total_cu_tva > 0
|
||||
ORDER BY v.data_act DESC
|
||||
""", [min_date, max_date])
|
||||
|
||||
invoices = []
|
||||
inv_map = {}
|
||||
for r in cur:
|
||||
inv = {
|
||||
'id_vanzare': r[0], 'numar_act': r[1], 'serie_act': r[2] or '',
|
||||
'data_act': r[3], 'total_fara_tva': float(r[4] or 0),
|
||||
'total_cu_tva': float(r[5] or 0), 'id_part': r[6],
|
||||
'partener': ((r[7] or '') + ' ' + (r[8] or '')).strip(),
|
||||
'items': [],
|
||||
}
|
||||
invoices.append(inv)
|
||||
inv_map[inv['id_vanzare']] = inv
|
||||
|
||||
# Batch fetch details
|
||||
inv_ids = [inv['id_vanzare'] for inv in invoices]
|
||||
for i in range(0, len(inv_ids), 500):
|
||||
batch = inv_ids[i:i+500]
|
||||
placeholders = ",".join([f":d{j}" for j in range(len(batch))])
|
||||
params = {f"d{j}": did for j, did in enumerate(batch)}
|
||||
cur.execute(f"""
|
||||
SELECT vd.id_vanzare, vd.id_articol, a.codmat, a.denumire,
|
||||
vd.cantitate, vd.pret, vd.pret_cu_tva, vd.proc_tvav
|
||||
FROM vanzari_detalii vd
|
||||
LEFT JOIN nom_articole a ON vd.id_articol = a.id_articol
|
||||
WHERE vd.id_vanzare IN ({placeholders}) AND vd.sters = 0
|
||||
ORDER BY vd.id_vanzare, vd.id_articol
|
||||
""", params)
|
||||
for r in cur:
|
||||
inv_map[r[0]]['items'].append({
|
||||
'id_articol': r[1], 'codmat': r[2], 'denumire': r[3],
|
||||
'cantitate': float(r[4] or 0), 'pret': float(r[5] or 0),
|
||||
'pret_cu_tva': float(r[6] or 0), 'tva_pct': float(r[7] or 0),
|
||||
})
|
||||
|
||||
conn.close()
|
||||
print(f"Loaded {len(invoices)} Oracle invoices")
|
||||
|
||||
# --- Fuzzy matching orders → invoices ---
|
||||
def normalize_name(name):
|
||||
if not name:
|
||||
return ''
|
||||
n = name.strip().upper()
|
||||
for old, new in [('S.R.L.', 'SRL'), ('S.R.L', 'SRL'), ('SC ', ''), ('PFA ', ''), ('PF ', '')]:
|
||||
n = n.replace(old, new)
|
||||
return n
|
||||
|
||||
def name_similarity(n1, n2):
|
||||
nn1 = normalize_name(n1)
|
||||
nn2 = normalize_name(n2)
|
||||
if not nn1 or not nn2:
|
||||
return 0
|
||||
sim1 = SequenceMatcher(None, nn1, nn2).ratio()
|
||||
words1 = nn1.split()
|
||||
if len(words1) >= 2:
|
||||
reversed1 = ' '.join(reversed(words1))
|
||||
sim2 = SequenceMatcher(None, reversed1, nn2).ratio()
|
||||
return max(sim1, sim2)
|
||||
return sim1
|
||||
|
||||
matches = []
|
||||
used_invoices = set()
|
||||
orders_sorted = sorted(orders, key=lambda o: -(o['order_total'] or 0))
|
||||
|
||||
for order in orders_sorted:
|
||||
best_match = None
|
||||
best_score = 0
|
||||
order_date = str(order['order_date'])[:10]
|
||||
order_total = order['order_total'] or 0
|
||||
order_name = order['customer_name'] or ''
|
||||
|
||||
for inv in invoices:
|
||||
if inv['id_vanzare'] in used_invoices:
|
||||
continue
|
||||
try:
|
||||
od = int(order_date.replace('-',''))
|
||||
id_ = int(inv['data_act'].replace('-',''))
|
||||
date_diff = abs(od - id_)
|
||||
except:
|
||||
continue
|
||||
if date_diff > 3:
|
||||
continue
|
||||
|
||||
total_diff = abs(order_total - inv['total_cu_tva'])
|
||||
total_pct = total_diff / max(order_total, 0.01) * 100
|
||||
if total_pct > 15 and total_diff > 15:
|
||||
continue
|
||||
|
||||
sim = name_similarity(order_name, inv['partener'])
|
||||
sim2 = name_similarity(order.get('billing_name') or '', inv['partener'])
|
||||
sim3 = name_similarity(order.get('shipping_name') or '', inv['partener'])
|
||||
sim = max(sim, sim2, sim3)
|
||||
|
||||
date_score = 1 if date_diff == 0 else (0.7 if date_diff == 1 else (0.4 if date_diff == 2 else 0.2))
|
||||
total_score = 1 - min(total_pct / 100, 1)
|
||||
score = sim * 0.45 + total_score * 0.40 + date_score * 0.15
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_match = inv
|
||||
|
||||
if best_match and best_score > 0.45:
|
||||
matches.append({'order': order, 'invoice': best_match, 'score': best_score})
|
||||
used_invoices.add(best_match['id_vanzare'])
|
||||
else:
|
||||
matches.append({'order': order, 'invoice': None, 'score': 0})
|
||||
|
||||
# Sort by order date
|
||||
matches.sort(key=lambda m: str(m['order']['order_date']), reverse=True)
|
||||
|
||||
# --- Write detailed comparison CSV ---
|
||||
out_dir = r'C:\gomag-vending\scripts\output'
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
with open(os.path.join(out_dir, 'comparatie_detaliata.csv'), 'w', newline='', encoding='utf-8-sig') as f:
|
||||
w = csv.writer(f, delimiter=';')
|
||||
|
||||
w.writerow([
|
||||
'NR_COMANDA_GOMAG', 'DATA_COMANDA', 'CLIENT_GOMAG', 'STATUS_IMPORT',
|
||||
'TOTAL_COMANDA_GOMAG',
|
||||
'NR_ARTICOLE_GOMAG', 'SKU_GOMAG', 'PRODUS_GOMAG', 'QTY_GOMAG', 'PRET_GOMAG', 'TVA_GOMAG',
|
||||
'LINIE_TOTAL_GOMAG',
|
||||
'|',
|
||||
'FACTURA_ROA', 'DATA_FACTURA', 'CLIENT_ROA', 'TOTAL_FACTURA_ROA',
|
||||
'NR_ARTICOLE_ROA', 'CODMAT_ROA', 'PRODUS_ROA', 'QTY_ROA', 'PRET_ROA', 'TVA_ROA',
|
||||
'LINIE_TOTAL_ROA',
|
||||
'|',
|
||||
'MATCH_SCORE', 'DIFF_TOTAL', 'SKU_EQ_CODMAT',
|
||||
])
|
||||
|
||||
for m in matches:
|
||||
o = m['order']
|
||||
inv = m['invoice']
|
||||
go_items = o['items']
|
||||
roa_items = inv['items'] if inv else []
|
||||
|
||||
# Filter out transport/discount for comparison count
|
||||
roa_real = [ri for ri in roa_items if ri['codmat'] not in ('TRANSPORT', 'DISCOUNT', None, '') and ri['cantitate'] > 0]
|
||||
roa_extra = [ri for ri in roa_items if ri['codmat'] in ('TRANSPORT', 'DISCOUNT') or ri['cantitate'] < 0]
|
||||
|
||||
max_lines = max(len(go_items), len(roa_items), 1)
|
||||
|
||||
order_total = o['order_total'] or 0
|
||||
inv_total = inv['total_cu_tva'] if inv else 0
|
||||
diff_total = round(order_total - inv_total, 2) if inv else ''
|
||||
|
||||
for idx in range(max_lines):
|
||||
gi = go_items[idx] if idx < len(go_items) else None
|
||||
ri = roa_items[idx] if idx < len(roa_items) else None
|
||||
|
||||
# GoMag side
|
||||
if idx == 0:
|
||||
go_order = o['order_number']
|
||||
go_date = str(o['order_date'])[:10]
|
||||
go_client = o['customer_name'] or ''
|
||||
go_status = o['status']
|
||||
go_total = order_total
|
||||
go_nr_art = len(go_items)
|
||||
else:
|
||||
go_order = ''
|
||||
go_date = ''
|
||||
go_client = ''
|
||||
go_status = ''
|
||||
go_total = ''
|
||||
go_nr_art = ''
|
||||
|
||||
if gi:
|
||||
go_sku = gi['sku'] or ''
|
||||
go_prod = gi['product_name'] or ''
|
||||
go_qty = gi['quantity']
|
||||
go_price = gi['price']
|
||||
go_vat = gi['vat']
|
||||
go_line_total = round(gi['quantity'] * gi['price'], 2)
|
||||
else:
|
||||
go_sku = go_prod = go_qty = go_price = go_vat = go_line_total = ''
|
||||
|
||||
# ROA side
|
||||
if idx == 0 and inv:
|
||||
roa_fact = f"{inv['serie_act']}{inv['numar_act']}"
|
||||
roa_date = inv['data_act']
|
||||
roa_client = inv['partener']
|
||||
roa_total = inv_total
|
||||
roa_nr_art = len(roa_items)
|
||||
else:
|
||||
roa_fact = ''
|
||||
roa_date = ''
|
||||
roa_client = ''
|
||||
roa_total = ''
|
||||
roa_nr_art = ''
|
||||
|
||||
if ri:
|
||||
roa_codmat = ri['codmat'] or ''
|
||||
roa_prod = ri['denumire'] or ''
|
||||
roa_qty = ri['cantitate']
|
||||
roa_price = ri['pret']
|
||||
roa_vat = ri['tva_pct']
|
||||
roa_line_total = round(ri['cantitate'] * ri['pret'], 2) if ri['cantitate'] > 0 else round(-ri['cantitate'] * ri['pret'], 2)
|
||||
else:
|
||||
roa_codmat = roa_prod = roa_qty = roa_price = roa_vat = roa_line_total = ''
|
||||
|
||||
# Match indicators
|
||||
if idx == 0:
|
||||
score = round(m['score'], 2) if m['score'] else ''
|
||||
diff = diff_total
|
||||
else:
|
||||
score = ''
|
||||
diff = ''
|
||||
|
||||
# Check SKU == CODMAT
|
||||
sku_eq = ''
|
||||
if gi and ri and go_sku and roa_codmat:
|
||||
if go_sku == roa_codmat:
|
||||
sku_eq = 'DA'
|
||||
else:
|
||||
sku_eq = ''
|
||||
|
||||
w.writerow([
|
||||
go_order, go_date, go_client, go_status,
|
||||
go_total,
|
||||
go_nr_art, go_sku, go_prod, go_qty, go_price, go_vat,
|
||||
go_line_total,
|
||||
'|',
|
||||
roa_fact, roa_date, roa_client, roa_total,
|
||||
roa_nr_art, roa_codmat, roa_prod, roa_qty, roa_price, roa_vat,
|
||||
roa_line_total,
|
||||
'|',
|
||||
score, diff, sku_eq,
|
||||
])
|
||||
|
||||
# Empty separator row before unmatched invoice summary
|
||||
w.writerow([])
|
||||
w.writerow(['--- FACTURI ROA FARA COMANDA GOMAG ---'])
|
||||
w.writerow([])
|
||||
|
||||
unmatched_inv = [inv for inv in invoices if inv['id_vanzare'] not in used_invoices]
|
||||
unmatched_inv.sort(key=lambda x: x['data_act'], reverse=True)
|
||||
|
||||
for inv in unmatched_inv:
|
||||
for idx, ri in enumerate(inv['items']):
|
||||
if idx == 0:
|
||||
w.writerow([
|
||||
'', '', '', '', '', '', '', '', '', '', '', '',
|
||||
'|',
|
||||
f"{inv['serie_act']}{inv['numar_act']}", inv['data_act'],
|
||||
inv['partener'], inv['total_cu_tva'],
|
||||
len(inv['items']),
|
||||
ri['codmat'] or '', ri['denumire'] or '',
|
||||
ri['cantitate'], ri['pret'], ri['tva_pct'],
|
||||
round(ri['cantitate'] * ri['pret'], 2),
|
||||
'|', '', '', '',
|
||||
])
|
||||
else:
|
||||
w.writerow([
|
||||
'', '', '', '', '', '', '', '', '', '', '', '',
|
||||
'|',
|
||||
'', '', '', '',
|
||||
'',
|
||||
ri['codmat'] or '', ri['denumire'] or '',
|
||||
ri['cantitate'], ri['pret'], ri['tva_pct'],
|
||||
round(ri['cantitate'] * ri['pret'], 2),
|
||||
'|', '', '', '',
|
||||
])
|
||||
|
||||
print(f"\nDone!")
|
||||
print(f"Matched: {sum(1 for m in matches if m['invoice'])} / {len(orders)} orders")
|
||||
print(f"Unmatched invoices: {len(unmatched_inv)}")
|
||||
print(f"\nOutput: {os.path.join(out_dir, 'comparatie_detaliata.csv')}")
|
||||
print(f"Open in Excel (separator: ;, encoding: UTF-8)")
|
||||
Reference in New Issue
Block a user