fix(address): normalize SECTORUL + etaj in addr_match, fix Oracle duplicate addrs

- _addr_match / addrMatch: add SECTORUL\s*\d* branch to strip sector
  number; add (?:\b|(?=\d)) to catch glued keywords (sc1, ap94);
  include etaj field in rStreet concat
- database.py: replace duplicate addr_match impl with import from sync_service
- import_service.py: short-circuit billing addr Oracle call when
  billing == shipping (avoids duplicate address creation)
- PL/SQL: normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before
  TIER 1; resolve id_localitate before search; TIER 1 now matches on
  id_loc instead of text locality
- Add scripts/cleanup_duplicate_addresses.sql for manual prod cleanup
- Add 5 new tests: sectorul, keyword+digit gluing, etaj, short-circuit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-07 13:48:49 +00:00
parent 5b4b317636
commit 0f817b2130
8 changed files with 214 additions and 106 deletions

View File

@@ -1,12 +1,8 @@
import oracledb
import aiosqlite
import sqlite3
import json
import re
import unicodedata
import logging
import os
from pathlib import Path
from .config import settings
logger = logging.getLogger(__name__)
@@ -375,33 +371,7 @@ def init_sqlite():
def _backfill_address_mismatch(conn):
"""Recompute address_mismatch from stored address JSON for all orders."""
_ADDR_WORDS = re.compile(
r'\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b'
)
def norm(s):
s = unicodedata.normalize('NFD', s or '')
s = re.sub(r'[\u0300-\u036f]', '', s).upper()
s = _ADDR_WORDS.sub('', s)
return re.sub(r'[^A-Z0-9]', '', s)
def addr_match(gomag_json, roa_json):
if not gomag_json or not roa_json:
return True
try:
g = json.loads(gomag_json) if isinstance(gomag_json, str) else gomag_json
r = json.loads(roa_json) if isinstance(roa_json, str) else roa_json
except (json.JSONDecodeError, TypeError):
return True
g_street = norm(g.get('address') or g.get('strada') or '')
r_street = norm((r.get('strada') or '') + (r.get('numar') or ''))
g_city = norm(g.get('city') or g.get('localitate') or '')
r_city = norm(r.get('localitate') or '')
g_region = norm(g.get('region') or g.get('judet') or '')
r_region = norm(r.get('judet') or '')
return g_street == r_street and g_city == r_city and g_region == r_region
from .services.sync_service import _addr_match
try:
rows = conn.execute("""
SELECT order_number, adresa_livrare_gomag, adresa_livrare_roa,
@@ -411,8 +381,8 @@ def _backfill_address_mismatch(conn):
""").fetchall()
updated = 0
for r in rows:
livr_ok = addr_match(r[1], r[2])
fact_ok = addr_match(r[3], r[4])
livr_ok = _addr_match(r[1], r[2])
fact_ok = _addr_match(r[3], r[4])
new_val = 1 if (not livr_ok or not fact_ok) else 0
conn.execute(
"UPDATE orders SET address_mismatch = ? WHERE order_number = ?",

View File

@@ -338,28 +338,32 @@ def import_single_order(order, id_pol: int = None, id_sectie: int = None, app_se
# Different person: use shipping address for BOTH billing and shipping in ROA
addr_fact_id = addr_livr_id
else:
# Same person: use billing address as-is
id_adresa_fact = cur.var(oracledb.DB_TYPE_NUMBER)
# Same person: compute billing addr, short-circuit if identical to shipping
billing_addr = format_address_for_oracle(
order.billing.address, order.billing.city, order.billing.region
)
cur.callproc("PACK_IMPORT_PARTENERI.cauta_sau_creeaza_adresa", [
partner_id, billing_addr,
order.billing.phone or "",
order.billing.email or "",
id_adresa_fact
])
addr_fact_id = id_adresa_fact.getvalue()
if addr_livr_id and order.shipping and billing_addr == shipping_addr:
# billing = shipping: reuse addr_livr_id to avoid duplicate Oracle address
addr_fact_id = addr_livr_id
else:
id_adresa_fact = cur.var(oracledb.DB_TYPE_NUMBER)
cur.callproc("PACK_IMPORT_PARTENERI.cauta_sau_creeaza_adresa", [
partner_id, billing_addr,
order.billing.phone or "",
order.billing.email or "",
id_adresa_fact
])
addr_fact_id = id_adresa_fact.getvalue()
if addr_fact_id is None:
cur.execute("SELECT PACK_IMPORT_PARTENERI.get_last_error FROM dual")
plsql_err = cur.fetchone()[0]
err_msg = f"Billing address creation failed for partner {partner_id}"
if plsql_err:
err_msg += f": {plsql_err}"
logger.error(f"Order {order_number}: {err_msg}")
result["error"] = err_msg
return result
if addr_fact_id is None:
cur.execute("SELECT PACK_IMPORT_PARTENERI.get_last_error FROM dual")
plsql_err = cur.fetchone()[0]
err_msg = f"Billing address creation failed for partner {partner_id}"
if plsql_err:
err_msg += f": {plsql_err}"
logger.error(f"Order {order_number}: {err_msg}")
result["error"] = err_msg
return result
if addr_fact_id is not None:
result["id_adresa_facturare"] = int(addr_fact_id)

View File

@@ -31,8 +31,9 @@ def _addr_match(gomag_json, roa_json):
except (json.JSONDecodeError, TypeError):
return True
_ADDR_WORDS = re.compile(
r'\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b'
r'\bSECTORUL\s*\d*'
r'|\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))'
)
def norm(s):
s = unicodedata.normalize('NFD', s or '')
@@ -40,7 +41,7 @@ def _addr_match(gomag_json, roa_json):
s = _ADDR_WORDS.sub('', s)
return re.sub(r'[^A-Z0-9]', '', s)
g_street = norm(g.get('address') or g.get('strada') or '')
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or ''))
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or '') + (r.get('etaj') or ''))
g_city = norm(g.get('city') or g.get('localitate') or '')
r_city = norm(r.get('localitate') or '')
g_region = norm(g.get('region') or g.get('judet') or '')

View File

@@ -850,11 +850,11 @@ function addrMatch(gomag, roa) {
function norm(s) {
return (s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '')
.toUpperCase()
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b/g, '')
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
.replace(/[^A-Z0-9]/g, '');
}
const gStreet = norm(gomag.address || gomag.strada || '');
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||''));
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||'') + (roa.etaj||''));
const gCity = norm(gomag.city || gomag.localitate || '');
const rCity = norm(roa.localitate || '');
const gRegion = norm(gomag.region || gomag.judet || '');

View File

@@ -168,7 +168,7 @@
<script>window.ROOT_PATH = "{{ rp }}";</script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
<script src="{{ rp }}/static/js/shared.js?v=32"></script>
<script src="{{ rp }}/static/js/shared.js?v=33"></script>
<script>
// Dark mode toggle
function toggleDarkMode() {