fix(address): normalize SECTORUL + etaj in addr_match, fix Oracle duplicate addrs
- _addr_match / addrMatch: add SECTORUL\s*\d* branch to strip sector number; add (?:\b|(?=\d)) to catch glued keywords (sc1, ap94); include etaj field in rStreet concat - database.py: replace duplicate addr_match impl with import from sync_service - import_service.py: short-circuit billing addr Oracle call when billing == shipping (avoids duplicate address creation) - PL/SQL: normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before TIER 1; resolve id_localitate before search; TIER 1 now matches on id_loc instead of text locality - Add scripts/cleanup_duplicate_addresses.sql for manual prod cleanup - Add 5 new tests: sectorul, keyword+digit gluing, etaj, short-circuit Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,12 +1,8 @@
|
||||
import oracledb
|
||||
import aiosqlite
|
||||
import sqlite3
|
||||
import json
|
||||
import re
|
||||
import unicodedata
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from .config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -375,33 +371,7 @@ def init_sqlite():
|
||||
|
||||
def _backfill_address_mismatch(conn):
|
||||
"""Recompute address_mismatch from stored address JSON for all orders."""
|
||||
_ADDR_WORDS = re.compile(
|
||||
r'\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
|
||||
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b'
|
||||
)
|
||||
|
||||
def norm(s):
|
||||
s = unicodedata.normalize('NFD', s or '')
|
||||
s = re.sub(r'[\u0300-\u036f]', '', s).upper()
|
||||
s = _ADDR_WORDS.sub('', s)
|
||||
return re.sub(r'[^A-Z0-9]', '', s)
|
||||
|
||||
def addr_match(gomag_json, roa_json):
|
||||
if not gomag_json or not roa_json:
|
||||
return True
|
||||
try:
|
||||
g = json.loads(gomag_json) if isinstance(gomag_json, str) else gomag_json
|
||||
r = json.loads(roa_json) if isinstance(roa_json, str) else roa_json
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return True
|
||||
g_street = norm(g.get('address') or g.get('strada') or '')
|
||||
r_street = norm((r.get('strada') or '') + (r.get('numar') or ''))
|
||||
g_city = norm(g.get('city') or g.get('localitate') or '')
|
||||
r_city = norm(r.get('localitate') or '')
|
||||
g_region = norm(g.get('region') or g.get('judet') or '')
|
||||
r_region = norm(r.get('judet') or '')
|
||||
return g_street == r_street and g_city == r_city and g_region == r_region
|
||||
|
||||
from .services.sync_service import _addr_match
|
||||
try:
|
||||
rows = conn.execute("""
|
||||
SELECT order_number, adresa_livrare_gomag, adresa_livrare_roa,
|
||||
@@ -411,8 +381,8 @@ def _backfill_address_mismatch(conn):
|
||||
""").fetchall()
|
||||
updated = 0
|
||||
for r in rows:
|
||||
livr_ok = addr_match(r[1], r[2])
|
||||
fact_ok = addr_match(r[3], r[4])
|
||||
livr_ok = _addr_match(r[1], r[2])
|
||||
fact_ok = _addr_match(r[3], r[4])
|
||||
new_val = 1 if (not livr_ok or not fact_ok) else 0
|
||||
conn.execute(
|
||||
"UPDATE orders SET address_mismatch = ? WHERE order_number = ?",
|
||||
|
||||
Reference in New Issue
Block a user