Files
gomag-vending/scripts/match_by_price.py
Claude Agent 3d73d9e422 add: scripts for invoice-order matching and SKU discovery
Analysis scripts to match GoMag orders with Oracle invoices by
date/client/total, then compare line items by price to discover
SKU → id_articol mappings. Generates SQL for nom_articole codmat
updates and CSV for ARTICOLE_TERTI repackaging/set mappings.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 12:01:51 +00:00

415 lines
17 KiB
Python

"""
Match GoMag SKUs → ROA id_articol by matching order lines on unit price.
For each matched order-invoice pair, compare lines by price to discover mappings.
Output: SQL for nom_articole codmat updates + CSV for ARTICOLE_TERTI mappings.
"""
import oracledb
import os
import sys
import sqlite3
import csv
from collections import defaultdict
from difflib import SequenceMatcher
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
os.environ['PATH'] = r'C:\app\Server\product\18.0.0\dbhomeXE\bin' + ';' + os.environ.get('PATH','')
oracledb.init_oracle_client()
# --- Load GoMag orders ---
db = sqlite3.connect(r'C:\gomag-vending\api\data\import.db')
db.row_factory = sqlite3.Row
c = db.cursor()
c.execute("SELECT order_number, order_date, customer_name, status, order_total FROM orders ORDER BY order_date DESC")
orders = [dict(r) for r in c.fetchall()]
for order in orders:
c.execute("SELECT sku, product_name, quantity, price, vat FROM order_items WHERE order_number = ? ORDER BY sku", (order['order_number'],))
order['items'] = [dict(r) for r in c.fetchall()]
db.close()
print(f"Loaded {len(orders)} GoMag orders")
# --- Load Oracle invoices ---
conn = oracledb.connect(user='VENDING', password='ROMFASTSOFT', dsn='ROA')
cur = conn.cursor()
min_date = min(str(o['order_date'])[:10] for o in orders)
max_date = max(str(o['order_date'])[:10] for o in orders)
cur.execute("""
SELECT v.id_vanzare, v.numar_act, v.serie_act,
TO_CHAR(v.data_act, 'YYYY-MM-DD') as data_act,
v.total_fara_tva, v.total_cu_tva, v.id_part,
p.denumire as partener, p.prenume
FROM vanzari v
LEFT JOIN nom_parteneri p ON v.id_part = p.id_part
WHERE v.sters = 0 AND v.data_act >= TO_DATE(:1, 'YYYY-MM-DD') - 3
AND v.data_act <= TO_DATE(:2, 'YYYY-MM-DD') + 3 AND v.total_cu_tva > 0
ORDER BY v.data_act DESC
""", [min_date, max_date])
invoices = []
inv_map = {}
for r in cur:
inv = {
'id_vanzare': r[0], 'numar_act': r[1], 'serie_act': r[2] or '',
'data_act': r[3], 'total_fara_tva': float(r[4] or 0),
'total_cu_tva': float(r[5] or 0), 'id_part': r[6],
'partener': ((r[7] or '') + ' ' + (r[8] or '')).strip(),
'items': [],
}
invoices.append(inv)
inv_map[inv['id_vanzare']] = inv
inv_ids = [inv['id_vanzare'] for inv in invoices]
for i in range(0, len(inv_ids), 500):
batch = inv_ids[i:i+500]
placeholders = ",".join([f":d{j}" for j in range(len(batch))])
params = {f"d{j}": did for j, did in enumerate(batch)}
cur.execute(f"""
SELECT vd.id_vanzare, vd.id_articol, a.codmat, a.denumire,
vd.cantitate, vd.pret, vd.pret_cu_tva, vd.proc_tvav
FROM vanzari_detalii vd
LEFT JOIN nom_articole a ON vd.id_articol = a.id_articol
WHERE vd.id_vanzare IN ({placeholders}) AND vd.sters = 0
ORDER BY vd.id_vanzare, vd.id_articol
""", params)
for r in cur:
inv_map[r[0]]['items'].append({
'id_articol': r[1], 'codmat': r[2], 'denumire': r[3],
'cantitate': float(r[4] or 0), 'pret': float(r[5] or 0),
'pret_cu_tva': float(r[6] or 0), 'tva_pct': float(r[7] or 0),
})
print(f"Loaded {len(invoices)} Oracle invoices")
# --- Match orders → invoices (same as before) ---
def normalize_name(name):
if not name:
return ''
n = name.strip().upper()
for old, new in [('S.R.L.', 'SRL'), ('S.R.L', 'SRL'), ('SC ', ''), ('PFA ', ''), ('PF ', '')]:
n = n.replace(old, new)
return n
def name_similarity(n1, n2):
nn1 = normalize_name(n1)
nn2 = normalize_name(n2)
if not nn1 or not nn2:
return 0
sim1 = SequenceMatcher(None, nn1, nn2).ratio()
words1 = nn1.split()
if len(words1) >= 2:
sim2 = SequenceMatcher(None, ' '.join(reversed(words1)), nn2).ratio()
return max(sim1, sim2)
return sim1
matches = []
used_invoices = set()
orders_sorted = sorted(orders, key=lambda o: -(o['order_total'] or 0))
for order in orders_sorted:
best_match = None
best_score = 0
order_date = str(order['order_date'])[:10]
order_total = order['order_total'] or 0
for inv in invoices:
if inv['id_vanzare'] in used_invoices:
continue
try:
date_diff = abs(int(order_date.replace('-','')) - int(inv['data_act'].replace('-','')))
except:
continue
if date_diff > 3:
continue
total_diff = abs(order_total - inv['total_cu_tva'])
total_pct = total_diff / max(order_total, 0.01) * 100
if total_pct > 15 and total_diff > 15:
continue
sim = name_similarity(order['customer_name'] or '', inv['partener'])
date_score = 1 if date_diff == 0 else (0.7 if date_diff == 1 else (0.4 if date_diff == 2 else 0.2))
total_score = 1 - min(total_pct / 100, 1)
score = sim * 0.45 + total_score * 0.40 + date_score * 0.15
if score > best_score:
best_score = score
best_match = inv
if best_match and best_score > 0.45:
matches.append({'order': order, 'invoice': best_match, 'score': best_score})
used_invoices.add(best_match['id_vanzare'])
print(f"Matched: {len(matches)} orders → invoices")
# --- Match line items by PRICE ---
# For each matched pair, match GoMag items → ROA items by line total (qty * price)
# Discovery: SKU → (id_articol, codmat, denumire, qty_ratio)
# Collect all discovered mappings: sku → list of observations
sku_observations = defaultdict(list)
for m in matches:
o = m['order']
inv = m['invoice']
go_items = o['items']
# Exclude transport/discount from ROA
roa_items = [ri for ri in inv['items'] if ri['cantitate'] > 0
and ri['codmat'] not in ('TRANSPORT', 'DISCOUNT')]
roa_transport = [ri for ri in inv['items']
if ri['codmat'] in ('TRANSPORT', 'DISCOUNT') or ri['cantitate'] < 0]
go_remaining = list(range(len(go_items)))
roa_remaining = list(range(len(roa_items)))
item_matches = []
# Pass 1: match by line total (qty * unit_price_fara_tva)
for gi_idx in list(go_remaining):
gi = go_items[gi_idx]
go_line = gi['quantity'] * gi['price'] # cu TVA
go_line_fara = go_line / (1 + gi['vat']/100) if gi['vat'] else go_line
for ri_idx in list(roa_remaining):
ri = roa_items[ri_idx]
roa_line = ri['cantitate'] * ri['pret'] # fara TVA
if abs(go_line_fara - roa_line) < 0.50:
item_matches.append((gi_idx, [ri_idx]))
go_remaining.remove(gi_idx)
roa_remaining.remove(ri_idx)
break
# Pass 2: match by unit price (for items where qty might differ but price is same)
for gi_idx in list(go_remaining):
gi = go_items[gi_idx]
go_price_fara = gi['price'] / (1 + gi['vat']/100) if gi['vat'] else gi['price']
for ri_idx in list(roa_remaining):
ri = roa_items[ri_idx]
if abs(go_price_fara - ri['pret']) < 0.02:
item_matches.append((gi_idx, [ri_idx]))
go_remaining.remove(gi_idx)
roa_remaining.remove(ri_idx)
break
# Pass 3: 1:1 positional if same count remaining
if len(go_remaining) == 1 and len(roa_remaining) == 1:
item_matches.append((go_remaining[0], [roa_remaining[0]]))
go_remaining = []
roa_remaining = []
# Pass 4: 1:N — one GoMag item matches multiple ROA items by combined total
for gi_idx in list(go_remaining):
gi = go_items[gi_idx]
go_line_fara = (gi['quantity'] * gi['price']) / (1 + gi['vat']/100) if gi['vat'] else gi['quantity'] * gi['price']
if len(roa_remaining) >= 2:
for i_pos, ri_idx1 in enumerate(roa_remaining):
for ri_idx2 in roa_remaining[i_pos+1:]:
ri1 = roa_items[ri_idx1]
ri2 = roa_items[ri_idx2]
combined = ri1['cantitate'] * ri1['pret'] + ri2['cantitate'] * ri2['pret']
if abs(go_line_fara - combined) < 1.0:
item_matches.append((gi_idx, [ri_idx1, ri_idx2]))
go_remaining.remove(gi_idx)
roa_remaining.remove(ri_idx1)
roa_remaining.remove(ri_idx2)
break
else:
continue
break
# Record observations
for gi_idx, ri_indices in item_matches:
gi = go_items[gi_idx]
ris = [roa_items[i] for i in ri_indices]
if len(ris) == 1:
ri = ris[0]
qty_ratio = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1
sku_observations[gi['sku']].append({
'type': 'simple' if abs(qty_ratio - round(qty_ratio)) < 0.01 and abs(qty_ratio - 1) < 0.01 else 'repack',
'id_articol': ri['id_articol'],
'codmat': ri['codmat'],
'denumire': ri['denumire'],
'go_qty': gi['quantity'],
'roa_qty': ri['cantitate'],
'qty_ratio': round(qty_ratio, 4),
'go_price': gi['price'],
'roa_pret': ri['pret'],
'product_name': gi['product_name'],
'order': o['order_number'],
'factura': f"VM{inv['numar_act']}",
})
else:
# Complex set
go_line_fara = (gi['quantity'] * gi['price']) / (1 + gi['vat']/100) if gi['vat'] else gi['quantity'] * gi['price']
for ri in ris:
ri_line = ri['cantitate'] * ri['pret']
pct = round(ri_line / go_line_fara * 100, 2) if go_line_fara else 0
qty_ratio = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1
sku_observations[gi['sku']].append({
'type': 'set',
'id_articol': ri['id_articol'],
'codmat': ri['codmat'],
'denumire': ri['denumire'],
'go_qty': gi['quantity'],
'roa_qty': ri['cantitate'],
'qty_ratio': round(qty_ratio, 4),
'procent_pret': pct,
'go_price': gi['price'],
'roa_pret': ri['pret'],
'product_name': gi['product_name'],
'order': o['order_number'],
'factura': f"VM{inv['numar_act']}",
})
conn.close()
# --- Analyze observations: find consistent mappings ---
print(f"\n{'='*80}")
print(f"ANALYSIS: {len(sku_observations)} unique SKUs with observations")
print(f"{'='*80}")
# For each SKU, check if all observations agree on the same id_articol
simple_update = {} # SKU → {id_articol, codmat, denumire} — for nom_articole UPDATE
repack_csv = {} # (SKU, codmat) → {cantitate_roa} — for ARTICOLE_TERTI
set_csv = {} # (SKU, codmat) → {cantitate_roa, procent_pret}
inconsistent = {} # SKU → list of conflicting observations
already_has_codmat = {} # SKU already equals codmat
for sku, obs_list in sorted(sku_observations.items()):
# Group by id_articol
by_articol = defaultdict(list)
for obs in obs_list:
by_articol[obs['id_articol']].append(obs)
# Check if any observation shows SKU == CODMAT already
if any(obs.get('codmat') == sku for obs in obs_list):
already_has_codmat[sku] = obs_list[0]
continue
# Filter to types
types = set(obs['type'] for obs in obs_list)
if 'set' in types:
# Complex set — collect all components
components = {}
for obs in obs_list:
if obs['type'] == 'set':
key = obs['id_articol']
if key not in components:
components[key] = obs
# Check consistency across observations
if len(components) >= 2:
for art_id, obs in components.items():
codmat = obs['codmat'] or f"ID:{art_id}"
set_csv[(sku, codmat)] = {
'id_articol': art_id,
'cantitate_roa': obs['qty_ratio'],
'procent_pret': obs['procent_pret'],
'denumire': obs['denumire'],
'product_name': obs['product_name'],
}
continue
if len(by_articol) == 1:
# All observations point to same article
art_id = list(by_articol.keys())[0]
obs = by_articol[art_id][0]
# Check qty ratios are consistent
ratios = [o['qty_ratio'] for o in by_articol[art_id]]
avg_ratio = sum(ratios) / len(ratios)
if all(abs(r - avg_ratio) < 0.01 for r in ratios):
if abs(avg_ratio - 1.0) < 0.01:
# Simple 1:1
simple_update[sku] = {
'id_articol': art_id,
'codmat_actual': obs['codmat'],
'denumire': obs['denumire'],
'product_name': obs['product_name'],
'observations': len(by_articol[art_id]),
}
else:
# Repackaging
codmat = obs['codmat'] or f"ID:{art_id}"
repack_csv[(sku, codmat)] = {
'id_articol': art_id,
'cantitate_roa': round(avg_ratio, 3),
'denumire': obs['denumire'],
'product_name': obs['product_name'],
'observations': len(by_articol[art_id]),
}
else:
inconsistent[sku] = obs_list
else:
# Multiple different articles for same SKU across orders
if len(by_articol) == 1:
pass # handled above
else:
inconsistent[sku] = obs_list
# --- Output ---
out_dir = r'C:\gomag-vending\scripts\output'
os.makedirs(out_dir, exist_ok=True)
print(f"\n{'='*80}")
print(f"RESULTS")
print(f"{'='*80}")
print(f"\n--- Already mapped (SKU == CODMAT): {len(already_has_codmat)} ---")
print(f"\n--- Simple 1:1 → UPDATE nom_articole SET codmat = SKU: {len(simple_update)} ---")
for sku, info in sorted(simple_update.items()):
print(f" {sku:25s} → id_articol={info['id_articol']:6d} codmat_actual='{info['codmat_actual'] or ''}' [{info['denumire'][:40]}] ({info['observations']} obs)")
print(f"\n--- Repackaging → ARTICOLE_TERTI: {len(repack_csv)} ---")
for (sku, codmat), info in sorted(repack_csv.items()):
print(f" {sku:25s}{codmat:15s} x{info['cantitate_roa']} id_art={info['id_articol']} [{info['denumire'][:35]}] ({info['observations']} obs)")
print(f"\n--- Complex sets → ARTICOLE_TERTI: {len(set_csv)} ---")
for (sku, codmat), info in sorted(set_csv.items()):
print(f" {sku:25s}{codmat:15s} {info['procent_pret']:6.2f}% x{info['cantitate_roa']} [{info['denumire'][:35]}]")
print(f"\n--- Inconsistent (different articles across orders): {len(inconsistent)} ---")
for sku, obs_list in sorted(inconsistent.items()):
arts = set((o['id_articol'], o['denumire'][:30]) for o in obs_list)
print(f" {sku:25s}{len(arts)} different articles: {'; '.join(f'id={a[0]}({a[1]})' for a in arts)}")
# Write SQL for simple updates
with open(os.path.join(out_dir, 'update_codmat.sql'), 'w', encoding='utf-8') as f:
f.write("-- UPDATE nom_articole: set codmat = GoMag SKU for 1:1 mappings\n")
f.write("-- Generated from invoice-order matching\n")
f.write("-- VERIFY BEFORE RUNNING!\n\n")
for sku, info in sorted(simple_update.items()):
f.write(f"-- {info['product_name'][:60]}{info['denumire'][:60]}\n")
f.write(f"-- Current codmat: '{info['codmat_actual'] or ''}' | {info['observations']} order matches\n")
f.write(f"UPDATE nom_articole SET codmat = '{sku}' WHERE id_articol = {info['id_articol']} AND sters = 0;\n\n")
# Write CSV for repackaging (ARTICOLE_TERTI format)
with open(os.path.join(out_dir, 'repack_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'id_articol', 'product_name_gomag', 'denumire_roa', 'observations'])
for (sku, codmat), info in sorted(repack_csv.items()):
w.writerow([sku, codmat, info['cantitate_roa'], 100, info['id_articol'], info['product_name'], info['denumire'], info['observations']])
# Write CSV for sets
with open(os.path.join(out_dir, 'set_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'id_articol', 'product_name_gomag', 'denumire_roa'])
for (sku, codmat), info in sorted(set_csv.items()):
w.writerow([sku, codmat, info['cantitate_roa'], info['procent_pret'], info['id_articol'], info['product_name'], info['denumire']])
# Write inconsistent for manual review
with open(os.path.join(out_dir, 'inconsistent_skus.csv'), 'w', newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerow(['sku', 'product_name', 'id_articol', 'codmat', 'denumire_roa', 'qty_ratio', 'type', 'order', 'factura'])
for sku, obs_list in sorted(inconsistent.items()):
for obs in obs_list:
w.writerow([sku, obs['product_name'], obs['id_articol'], obs['codmat'] or '',
obs['denumire'], obs['qty_ratio'], obs['type'], obs['order'], obs['factura']])
print(f"\nOutput written to {out_dir}:")
print(f" update_codmat.sql - {len(simple_update)} SQL updates for nom_articole")
print(f" repack_mappings.csv - {len(repack_csv)} repackaging mappings")
print(f" set_mappings.csv - {len(set_csv)} complex set mappings")
print(f" inconsistent_skus.csv - {len(inconsistent)} SKUs needing manual review")