fix(address): normalize SECTORUL + etaj in addr_match, fix Oracle duplicate addrs

- _addr_match / addrMatch: add SECTORUL\s*\d* branch to strip sector
  number; add (?:\b|(?=\d)) to catch glued keywords (sc1, ap94);
  include etaj field in rStreet concat
- database.py: replace duplicate addr_match impl with import from sync_service
- import_service.py: short-circuit billing addr Oracle call when
  billing == shipping (avoids duplicate address creation)
- PL/SQL: normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before
  TIER 1; resolve id_localitate before search; TIER 1 now matches on
  id_loc instead of text locality
- Add scripts/cleanup_duplicate_addresses.sql for manual prod cleanup
- Add 5 new tests: sectorul, keyword+digit gluing, etaj, short-circuit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-07 13:48:49 +00:00
parent 5b4b317636
commit 0f817b2130
8 changed files with 214 additions and 106 deletions

View File

@@ -733,6 +733,47 @@ class TestAddrMatch:
roa = json.dumps({"strada": "STRADA VASILE GOLDIS", "numar": "19", "bloc": "", "scara": "", "apart": "", "localitate": "ALBA IULIA", "judet": "ALBA"})
assert _addr_match(gomag, roa) is False
def test_sectorul_in_city(self):
"""GoMag 'Municipiul București' matches ROA 'BUCURESTI SECTORUL 1'."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal 24", "city": "Municipiul București", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "localitate": "BUCURESTI SECTORUL 1", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_keyword_digit_gluing(self):
"""Keywords glued to digits like 'sc1', 'ap94' are stripped correctly."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal nr24 bl S2B sc1 ap94", "city": "Bucuresti", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_etaj_in_street(self):
"""GoMag address with 'etaj 7' matches ROA with etaj field."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal 24 Bl S2B Sc 1 Ap 94 Etaj 7", "city": "Bucuresti", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_billing_equals_shipping_short_circuit(self):
"""Short-circuit condition: billing == shipping → reuse addr_livr_id."""
from app.services.import_service import format_address_for_oracle
shipping_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
billing_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
addr_livr_id = 123
# Simulate the short-circuit condition
assert addr_livr_id and billing_addr == shipping_addr
def test_billing_differs_shipping_no_short_circuit(self):
"""When billing != shipping, short-circuit does NOT apply."""
from app.services.import_service import format_address_for_oracle
shipping_addr = format_address_for_oracle("Str. Victoriei 10", "Cluj", "Cluj")
billing_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
addr_livr_id = 123
assert not (addr_livr_id and billing_addr == shipping_addr)
class TestFormatAddressForOracle:
"""Tests for format_address_for_oracle city stripping."""