fix(address): normalize SECTORUL + etaj in addr_match, fix Oracle duplicate addrs
- _addr_match / addrMatch: add SECTORUL\s*\d* branch to strip sector number; add (?:\b|(?=\d)) to catch glued keywords (sc1, ap94); include etaj field in rStreet concat - database.py: replace duplicate addr_match impl with import from sync_service - import_service.py: short-circuit billing addr Oracle call when billing == shipping (avoids duplicate address creation) - PL/SQL: normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before TIER 1; resolve id_localitate before search; TIER 1 now matches on id_loc instead of text locality - Add scripts/cleanup_duplicate_addresses.sql for manual prod cleanup - Add 5 new tests: sectorul, keyword+digit gluing, etaj, short-circuit Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -733,6 +733,47 @@ class TestAddrMatch:
|
||||
roa = json.dumps({"strada": "STRADA VASILE GOLDIS", "numar": "19", "bloc": "", "scara": "", "apart": "", "localitate": "ALBA IULIA", "judet": "ALBA"})
|
||||
assert _addr_match(gomag, roa) is False
|
||||
|
||||
def test_sectorul_in_city(self):
|
||||
"""GoMag 'Municipiul București' matches ROA 'BUCURESTI SECTORUL 1'."""
|
||||
from app.services.sync_service import _addr_match
|
||||
import json
|
||||
g = json.dumps({"address": "Bld Decebal 24", "city": "Municipiul București", "region": "Bucuresti"})
|
||||
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "localitate": "BUCURESTI SECTORUL 1", "judet": "BUCURESTI"})
|
||||
assert _addr_match(g, r) is True
|
||||
|
||||
def test_keyword_digit_gluing(self):
|
||||
"""Keywords glued to digits like 'sc1', 'ap94' are stripped correctly."""
|
||||
from app.services.sync_service import _addr_match
|
||||
import json
|
||||
g = json.dumps({"address": "Bld Decebal nr24 bl S2B sc1 ap94", "city": "Bucuresti", "region": "Bucuresti"})
|
||||
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
|
||||
assert _addr_match(g, r) is True
|
||||
|
||||
def test_etaj_in_street(self):
|
||||
"""GoMag address with 'etaj 7' matches ROA with etaj field."""
|
||||
from app.services.sync_service import _addr_match
|
||||
import json
|
||||
g = json.dumps({"address": "Bld Decebal 24 Bl S2B Sc 1 Ap 94 Etaj 7", "city": "Bucuresti", "region": "Bucuresti"})
|
||||
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
|
||||
assert _addr_match(g, r) is True
|
||||
|
||||
def test_billing_equals_shipping_short_circuit(self):
|
||||
"""Short-circuit condition: billing == shipping → reuse addr_livr_id."""
|
||||
from app.services.import_service import format_address_for_oracle
|
||||
shipping_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
|
||||
billing_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
|
||||
addr_livr_id = 123
|
||||
# Simulate the short-circuit condition
|
||||
assert addr_livr_id and billing_addr == shipping_addr
|
||||
|
||||
def test_billing_differs_shipping_no_short_circuit(self):
|
||||
"""When billing != shipping, short-circuit does NOT apply."""
|
||||
from app.services.import_service import format_address_for_oracle
|
||||
shipping_addr = format_address_for_oracle("Str. Victoriei 10", "Cluj", "Cluj")
|
||||
billing_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
|
||||
addr_livr_id = 123
|
||||
assert not (addr_livr_id and billing_addr == shipping_addr)
|
||||
|
||||
|
||||
class TestFormatAddressForOracle:
|
||||
"""Tests for format_address_for_oracle city stripping."""
|
||||
|
||||
Reference in New Issue
Block a user