Analysis scripts to match GoMag orders with Oracle invoices by date/client/total, then compare line items by price to discover SKU → id_articol mappings. Generates SQL for nom_articole codmat updates and CSV for ARTICOLE_TERTI repackaging/set mappings. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
415 lines
17 KiB
Python
415 lines
17 KiB
Python
"""
|
|
Match GoMag SKUs → ROA id_articol by matching order lines on unit price.
|
|
For each matched order-invoice pair, compare lines by price to discover mappings.
|
|
Output: SQL for nom_articole codmat updates + CSV for ARTICOLE_TERTI mappings.
|
|
"""
|
|
import oracledb
|
|
import os
|
|
import sys
|
|
import sqlite3
|
|
import csv
|
|
from collections import defaultdict
|
|
from difflib import SequenceMatcher
|
|
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
|
os.environ['PATH'] = r'C:\app\Server\product\18.0.0\dbhomeXE\bin' + ';' + os.environ.get('PATH','')
|
|
oracledb.init_oracle_client()
|
|
|
|
# --- Load GoMag orders ---
|
|
db = sqlite3.connect(r'C:\gomag-vending\api\data\import.db')
|
|
db.row_factory = sqlite3.Row
|
|
c = db.cursor()
|
|
c.execute("SELECT order_number, order_date, customer_name, status, order_total FROM orders ORDER BY order_date DESC")
|
|
orders = [dict(r) for r in c.fetchall()]
|
|
for order in orders:
|
|
c.execute("SELECT sku, product_name, quantity, price, vat FROM order_items WHERE order_number = ? ORDER BY sku", (order['order_number'],))
|
|
order['items'] = [dict(r) for r in c.fetchall()]
|
|
db.close()
|
|
print(f"Loaded {len(orders)} GoMag orders")
|
|
|
|
# --- Load Oracle invoices ---
|
|
conn = oracledb.connect(user='VENDING', password='ROMFASTSOFT', dsn='ROA')
|
|
cur = conn.cursor()
|
|
min_date = min(str(o['order_date'])[:10] for o in orders)
|
|
max_date = max(str(o['order_date'])[:10] for o in orders)
|
|
|
|
cur.execute("""
|
|
SELECT v.id_vanzare, v.numar_act, v.serie_act,
|
|
TO_CHAR(v.data_act, 'YYYY-MM-DD') as data_act,
|
|
v.total_fara_tva, v.total_cu_tva, v.id_part,
|
|
p.denumire as partener, p.prenume
|
|
FROM vanzari v
|
|
LEFT JOIN nom_parteneri p ON v.id_part = p.id_part
|
|
WHERE v.sters = 0 AND v.data_act >= TO_DATE(:1, 'YYYY-MM-DD') - 3
|
|
AND v.data_act <= TO_DATE(:2, 'YYYY-MM-DD') + 3 AND v.total_cu_tva > 0
|
|
ORDER BY v.data_act DESC
|
|
""", [min_date, max_date])
|
|
|
|
invoices = []
|
|
inv_map = {}
|
|
for r in cur:
|
|
inv = {
|
|
'id_vanzare': r[0], 'numar_act': r[1], 'serie_act': r[2] or '',
|
|
'data_act': r[3], 'total_fara_tva': float(r[4] or 0),
|
|
'total_cu_tva': float(r[5] or 0), 'id_part': r[6],
|
|
'partener': ((r[7] or '') + ' ' + (r[8] or '')).strip(),
|
|
'items': [],
|
|
}
|
|
invoices.append(inv)
|
|
inv_map[inv['id_vanzare']] = inv
|
|
|
|
inv_ids = [inv['id_vanzare'] for inv in invoices]
|
|
for i in range(0, len(inv_ids), 500):
|
|
batch = inv_ids[i:i+500]
|
|
placeholders = ",".join([f":d{j}" for j in range(len(batch))])
|
|
params = {f"d{j}": did for j, did in enumerate(batch)}
|
|
cur.execute(f"""
|
|
SELECT vd.id_vanzare, vd.id_articol, a.codmat, a.denumire,
|
|
vd.cantitate, vd.pret, vd.pret_cu_tva, vd.proc_tvav
|
|
FROM vanzari_detalii vd
|
|
LEFT JOIN nom_articole a ON vd.id_articol = a.id_articol
|
|
WHERE vd.id_vanzare IN ({placeholders}) AND vd.sters = 0
|
|
ORDER BY vd.id_vanzare, vd.id_articol
|
|
""", params)
|
|
for r in cur:
|
|
inv_map[r[0]]['items'].append({
|
|
'id_articol': r[1], 'codmat': r[2], 'denumire': r[3],
|
|
'cantitate': float(r[4] or 0), 'pret': float(r[5] or 0),
|
|
'pret_cu_tva': float(r[6] or 0), 'tva_pct': float(r[7] or 0),
|
|
})
|
|
|
|
print(f"Loaded {len(invoices)} Oracle invoices")
|
|
|
|
# --- Match orders → invoices (same as before) ---
|
|
def normalize_name(name):
|
|
if not name:
|
|
return ''
|
|
n = name.strip().upper()
|
|
for old, new in [('S.R.L.', 'SRL'), ('S.R.L', 'SRL'), ('SC ', ''), ('PFA ', ''), ('PF ', '')]:
|
|
n = n.replace(old, new)
|
|
return n
|
|
|
|
def name_similarity(n1, n2):
|
|
nn1 = normalize_name(n1)
|
|
nn2 = normalize_name(n2)
|
|
if not nn1 or not nn2:
|
|
return 0
|
|
sim1 = SequenceMatcher(None, nn1, nn2).ratio()
|
|
words1 = nn1.split()
|
|
if len(words1) >= 2:
|
|
sim2 = SequenceMatcher(None, ' '.join(reversed(words1)), nn2).ratio()
|
|
return max(sim1, sim2)
|
|
return sim1
|
|
|
|
matches = []
|
|
used_invoices = set()
|
|
orders_sorted = sorted(orders, key=lambda o: -(o['order_total'] or 0))
|
|
|
|
for order in orders_sorted:
|
|
best_match = None
|
|
best_score = 0
|
|
order_date = str(order['order_date'])[:10]
|
|
order_total = order['order_total'] or 0
|
|
|
|
for inv in invoices:
|
|
if inv['id_vanzare'] in used_invoices:
|
|
continue
|
|
try:
|
|
date_diff = abs(int(order_date.replace('-','')) - int(inv['data_act'].replace('-','')))
|
|
except:
|
|
continue
|
|
if date_diff > 3:
|
|
continue
|
|
total_diff = abs(order_total - inv['total_cu_tva'])
|
|
total_pct = total_diff / max(order_total, 0.01) * 100
|
|
if total_pct > 15 and total_diff > 15:
|
|
continue
|
|
sim = name_similarity(order['customer_name'] or '', inv['partener'])
|
|
date_score = 1 if date_diff == 0 else (0.7 if date_diff == 1 else (0.4 if date_diff == 2 else 0.2))
|
|
total_score = 1 - min(total_pct / 100, 1)
|
|
score = sim * 0.45 + total_score * 0.40 + date_score * 0.15
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = inv
|
|
|
|
if best_match and best_score > 0.45:
|
|
matches.append({'order': order, 'invoice': best_match, 'score': best_score})
|
|
used_invoices.add(best_match['id_vanzare'])
|
|
|
|
print(f"Matched: {len(matches)} orders → invoices")
|
|
|
|
# --- Match line items by PRICE ---
|
|
# For each matched pair, match GoMag items → ROA items by line total (qty * price)
|
|
# Discovery: SKU → (id_articol, codmat, denumire, qty_ratio)
|
|
|
|
# Collect all discovered mappings: sku → list of observations
|
|
sku_observations = defaultdict(list)
|
|
|
|
for m in matches:
|
|
o = m['order']
|
|
inv = m['invoice']
|
|
go_items = o['items']
|
|
# Exclude transport/discount from ROA
|
|
roa_items = [ri for ri in inv['items'] if ri['cantitate'] > 0
|
|
and ri['codmat'] not in ('TRANSPORT', 'DISCOUNT')]
|
|
roa_transport = [ri for ri in inv['items']
|
|
if ri['codmat'] in ('TRANSPORT', 'DISCOUNT') or ri['cantitate'] < 0]
|
|
|
|
go_remaining = list(range(len(go_items)))
|
|
roa_remaining = list(range(len(roa_items)))
|
|
item_matches = []
|
|
|
|
# Pass 1: match by line total (qty * unit_price_fara_tva)
|
|
for gi_idx in list(go_remaining):
|
|
gi = go_items[gi_idx]
|
|
go_line = gi['quantity'] * gi['price'] # cu TVA
|
|
go_line_fara = go_line / (1 + gi['vat']/100) if gi['vat'] else go_line
|
|
|
|
for ri_idx in list(roa_remaining):
|
|
ri = roa_items[ri_idx]
|
|
roa_line = ri['cantitate'] * ri['pret'] # fara TVA
|
|
|
|
if abs(go_line_fara - roa_line) < 0.50:
|
|
item_matches.append((gi_idx, [ri_idx]))
|
|
go_remaining.remove(gi_idx)
|
|
roa_remaining.remove(ri_idx)
|
|
break
|
|
|
|
# Pass 2: match by unit price (for items where qty might differ but price is same)
|
|
for gi_idx in list(go_remaining):
|
|
gi = go_items[gi_idx]
|
|
go_price_fara = gi['price'] / (1 + gi['vat']/100) if gi['vat'] else gi['price']
|
|
|
|
for ri_idx in list(roa_remaining):
|
|
ri = roa_items[ri_idx]
|
|
if abs(go_price_fara - ri['pret']) < 0.02:
|
|
item_matches.append((gi_idx, [ri_idx]))
|
|
go_remaining.remove(gi_idx)
|
|
roa_remaining.remove(ri_idx)
|
|
break
|
|
|
|
# Pass 3: 1:1 positional if same count remaining
|
|
if len(go_remaining) == 1 and len(roa_remaining) == 1:
|
|
item_matches.append((go_remaining[0], [roa_remaining[0]]))
|
|
go_remaining = []
|
|
roa_remaining = []
|
|
|
|
# Pass 4: 1:N — one GoMag item matches multiple ROA items by combined total
|
|
for gi_idx in list(go_remaining):
|
|
gi = go_items[gi_idx]
|
|
go_line_fara = (gi['quantity'] * gi['price']) / (1 + gi['vat']/100) if gi['vat'] else gi['quantity'] * gi['price']
|
|
|
|
if len(roa_remaining) >= 2:
|
|
for i_pos, ri_idx1 in enumerate(roa_remaining):
|
|
for ri_idx2 in roa_remaining[i_pos+1:]:
|
|
ri1 = roa_items[ri_idx1]
|
|
ri2 = roa_items[ri_idx2]
|
|
combined = ri1['cantitate'] * ri1['pret'] + ri2['cantitate'] * ri2['pret']
|
|
if abs(go_line_fara - combined) < 1.0:
|
|
item_matches.append((gi_idx, [ri_idx1, ri_idx2]))
|
|
go_remaining.remove(gi_idx)
|
|
roa_remaining.remove(ri_idx1)
|
|
roa_remaining.remove(ri_idx2)
|
|
break
|
|
else:
|
|
continue
|
|
break
|
|
|
|
# Record observations
|
|
for gi_idx, ri_indices in item_matches:
|
|
gi = go_items[gi_idx]
|
|
ris = [roa_items[i] for i in ri_indices]
|
|
|
|
if len(ris) == 1:
|
|
ri = ris[0]
|
|
qty_ratio = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1
|
|
sku_observations[gi['sku']].append({
|
|
'type': 'simple' if abs(qty_ratio - round(qty_ratio)) < 0.01 and abs(qty_ratio - 1) < 0.01 else 'repack',
|
|
'id_articol': ri['id_articol'],
|
|
'codmat': ri['codmat'],
|
|
'denumire': ri['denumire'],
|
|
'go_qty': gi['quantity'],
|
|
'roa_qty': ri['cantitate'],
|
|
'qty_ratio': round(qty_ratio, 4),
|
|
'go_price': gi['price'],
|
|
'roa_pret': ri['pret'],
|
|
'product_name': gi['product_name'],
|
|
'order': o['order_number'],
|
|
'factura': f"VM{inv['numar_act']}",
|
|
})
|
|
else:
|
|
# Complex set
|
|
go_line_fara = (gi['quantity'] * gi['price']) / (1 + gi['vat']/100) if gi['vat'] else gi['quantity'] * gi['price']
|
|
for ri in ris:
|
|
ri_line = ri['cantitate'] * ri['pret']
|
|
pct = round(ri_line / go_line_fara * 100, 2) if go_line_fara else 0
|
|
qty_ratio = ri['cantitate'] / gi['quantity'] if gi['quantity'] else 1
|
|
sku_observations[gi['sku']].append({
|
|
'type': 'set',
|
|
'id_articol': ri['id_articol'],
|
|
'codmat': ri['codmat'],
|
|
'denumire': ri['denumire'],
|
|
'go_qty': gi['quantity'],
|
|
'roa_qty': ri['cantitate'],
|
|
'qty_ratio': round(qty_ratio, 4),
|
|
'procent_pret': pct,
|
|
'go_price': gi['price'],
|
|
'roa_pret': ri['pret'],
|
|
'product_name': gi['product_name'],
|
|
'order': o['order_number'],
|
|
'factura': f"VM{inv['numar_act']}",
|
|
})
|
|
|
|
conn.close()
|
|
|
|
# --- Analyze observations: find consistent mappings ---
|
|
print(f"\n{'='*80}")
|
|
print(f"ANALYSIS: {len(sku_observations)} unique SKUs with observations")
|
|
print(f"{'='*80}")
|
|
|
|
# For each SKU, check if all observations agree on the same id_articol
|
|
simple_update = {} # SKU → {id_articol, codmat, denumire} — for nom_articole UPDATE
|
|
repack_csv = {} # (SKU, codmat) → {cantitate_roa} — for ARTICOLE_TERTI
|
|
set_csv = {} # (SKU, codmat) → {cantitate_roa, procent_pret}
|
|
inconsistent = {} # SKU → list of conflicting observations
|
|
already_has_codmat = {} # SKU already equals codmat
|
|
|
|
for sku, obs_list in sorted(sku_observations.items()):
|
|
# Group by id_articol
|
|
by_articol = defaultdict(list)
|
|
for obs in obs_list:
|
|
by_articol[obs['id_articol']].append(obs)
|
|
|
|
# Check if any observation shows SKU == CODMAT already
|
|
if any(obs.get('codmat') == sku for obs in obs_list):
|
|
already_has_codmat[sku] = obs_list[0]
|
|
continue
|
|
|
|
# Filter to types
|
|
types = set(obs['type'] for obs in obs_list)
|
|
|
|
if 'set' in types:
|
|
# Complex set — collect all components
|
|
components = {}
|
|
for obs in obs_list:
|
|
if obs['type'] == 'set':
|
|
key = obs['id_articol']
|
|
if key not in components:
|
|
components[key] = obs
|
|
# Check consistency across observations
|
|
if len(components) >= 2:
|
|
for art_id, obs in components.items():
|
|
codmat = obs['codmat'] or f"ID:{art_id}"
|
|
set_csv[(sku, codmat)] = {
|
|
'id_articol': art_id,
|
|
'cantitate_roa': obs['qty_ratio'],
|
|
'procent_pret': obs['procent_pret'],
|
|
'denumire': obs['denumire'],
|
|
'product_name': obs['product_name'],
|
|
}
|
|
continue
|
|
|
|
if len(by_articol) == 1:
|
|
# All observations point to same article
|
|
art_id = list(by_articol.keys())[0]
|
|
obs = by_articol[art_id][0]
|
|
|
|
# Check qty ratios are consistent
|
|
ratios = [o['qty_ratio'] for o in by_articol[art_id]]
|
|
avg_ratio = sum(ratios) / len(ratios)
|
|
|
|
if all(abs(r - avg_ratio) < 0.01 for r in ratios):
|
|
if abs(avg_ratio - 1.0) < 0.01:
|
|
# Simple 1:1
|
|
simple_update[sku] = {
|
|
'id_articol': art_id,
|
|
'codmat_actual': obs['codmat'],
|
|
'denumire': obs['denumire'],
|
|
'product_name': obs['product_name'],
|
|
'observations': len(by_articol[art_id]),
|
|
}
|
|
else:
|
|
# Repackaging
|
|
codmat = obs['codmat'] or f"ID:{art_id}"
|
|
repack_csv[(sku, codmat)] = {
|
|
'id_articol': art_id,
|
|
'cantitate_roa': round(avg_ratio, 3),
|
|
'denumire': obs['denumire'],
|
|
'product_name': obs['product_name'],
|
|
'observations': len(by_articol[art_id]),
|
|
}
|
|
else:
|
|
inconsistent[sku] = obs_list
|
|
else:
|
|
# Multiple different articles for same SKU across orders
|
|
if len(by_articol) == 1:
|
|
pass # handled above
|
|
else:
|
|
inconsistent[sku] = obs_list
|
|
|
|
# --- Output ---
|
|
out_dir = r'C:\gomag-vending\scripts\output'
|
|
os.makedirs(out_dir, exist_ok=True)
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f"RESULTS")
|
|
print(f"{'='*80}")
|
|
|
|
print(f"\n--- Already mapped (SKU == CODMAT): {len(already_has_codmat)} ---")
|
|
|
|
print(f"\n--- Simple 1:1 → UPDATE nom_articole SET codmat = SKU: {len(simple_update)} ---")
|
|
for sku, info in sorted(simple_update.items()):
|
|
print(f" {sku:25s} → id_articol={info['id_articol']:6d} codmat_actual='{info['codmat_actual'] or ''}' [{info['denumire'][:40]}] ({info['observations']} obs)")
|
|
|
|
print(f"\n--- Repackaging → ARTICOLE_TERTI: {len(repack_csv)} ---")
|
|
for (sku, codmat), info in sorted(repack_csv.items()):
|
|
print(f" {sku:25s} → {codmat:15s} x{info['cantitate_roa']} id_art={info['id_articol']} [{info['denumire'][:35]}] ({info['observations']} obs)")
|
|
|
|
print(f"\n--- Complex sets → ARTICOLE_TERTI: {len(set_csv)} ---")
|
|
for (sku, codmat), info in sorted(set_csv.items()):
|
|
print(f" {sku:25s} → {codmat:15s} {info['procent_pret']:6.2f}% x{info['cantitate_roa']} [{info['denumire'][:35]}]")
|
|
|
|
print(f"\n--- Inconsistent (different articles across orders): {len(inconsistent)} ---")
|
|
for sku, obs_list in sorted(inconsistent.items()):
|
|
arts = set((o['id_articol'], o['denumire'][:30]) for o in obs_list)
|
|
print(f" {sku:25s} → {len(arts)} different articles: {'; '.join(f'id={a[0]}({a[1]})' for a in arts)}")
|
|
|
|
# Write SQL for simple updates
|
|
with open(os.path.join(out_dir, 'update_codmat.sql'), 'w', encoding='utf-8') as f:
|
|
f.write("-- UPDATE nom_articole: set codmat = GoMag SKU for 1:1 mappings\n")
|
|
f.write("-- Generated from invoice-order matching\n")
|
|
f.write("-- VERIFY BEFORE RUNNING!\n\n")
|
|
for sku, info in sorted(simple_update.items()):
|
|
f.write(f"-- {info['product_name'][:60]} → {info['denumire'][:60]}\n")
|
|
f.write(f"-- Current codmat: '{info['codmat_actual'] or ''}' | {info['observations']} order matches\n")
|
|
f.write(f"UPDATE nom_articole SET codmat = '{sku}' WHERE id_articol = {info['id_articol']} AND sters = 0;\n\n")
|
|
|
|
# Write CSV for repackaging (ARTICOLE_TERTI format)
|
|
with open(os.path.join(out_dir, 'repack_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
|
|
w = csv.writer(f)
|
|
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'id_articol', 'product_name_gomag', 'denumire_roa', 'observations'])
|
|
for (sku, codmat), info in sorted(repack_csv.items()):
|
|
w.writerow([sku, codmat, info['cantitate_roa'], 100, info['id_articol'], info['product_name'], info['denumire'], info['observations']])
|
|
|
|
# Write CSV for sets
|
|
with open(os.path.join(out_dir, 'set_mappings.csv'), 'w', newline='', encoding='utf-8') as f:
|
|
w = csv.writer(f)
|
|
w.writerow(['sku', 'codmat', 'cantitate_roa', 'procent_pret', 'id_articol', 'product_name_gomag', 'denumire_roa'])
|
|
for (sku, codmat), info in sorted(set_csv.items()):
|
|
w.writerow([sku, codmat, info['cantitate_roa'], info['procent_pret'], info['id_articol'], info['product_name'], info['denumire']])
|
|
|
|
# Write inconsistent for manual review
|
|
with open(os.path.join(out_dir, 'inconsistent_skus.csv'), 'w', newline='', encoding='utf-8') as f:
|
|
w = csv.writer(f)
|
|
w.writerow(['sku', 'product_name', 'id_articol', 'codmat', 'denumire_roa', 'qty_ratio', 'type', 'order', 'factura'])
|
|
for sku, obs_list in sorted(inconsistent.items()):
|
|
for obs in obs_list:
|
|
w.writerow([sku, obs['product_name'], obs['id_articol'], obs['codmat'] or '',
|
|
obs['denumire'], obs['qty_ratio'], obs['type'], obs['order'], obs['factura']])
|
|
|
|
print(f"\nOutput written to {out_dir}:")
|
|
print(f" update_codmat.sql - {len(simple_update)} SQL updates for nom_articole")
|
|
print(f" repack_mappings.csv - {len(repack_csv)} repackaging mappings")
|
|
print(f" set_mappings.csv - {len(set_csv)} complex set mappings")
|
|
print(f" inconsistent_skus.csv - {len(inconsistent)} SKUs needing manual review")
|