fix(address): extract scara/etaj/apartament from comma-less addresses

Oracle parser failed to extract sc/ap/et when GoMag addresses had no
commas. Added REGEXP_REPLACE to insert commas before address keywords
in v_strada before the comma-split, ensuring the token parser always
fires. Also added 5 Oracle integration tests calling
parseaza_adresa_semicolon directly, and improved diacritics handling
in addr_match (Python + JS).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-08 22:00:17 +00:00
parent f049b0bf12
commit f48c2d62c6
5 changed files with 122 additions and 5 deletions

View File

@@ -615,6 +615,19 @@ class TestAddrMatch:
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_addr_match_diacritics(self):
"""Romanian diacritics (â, ș, ț, î) are normalized same as Oracle storage."""
from app.services.sync_service import _addr_match
import json
# â→a, î→i in city name
g = json.dumps({"address": "Str. Morii 208", "city": "Sf\u00e2ntu Ilie", "region": "Suceava"})
r = json.dumps({"strada": "MORII", "numar": "208", "localitate": "SFANTU ILIE", "judet": "SUCEAVA"})
assert _addr_match(g, r) is True
# ș→s, ț→t in street
g2 = json.dumps({"address": "Str. \u0218oseaua \u021a\u0103rii 5", "city": "Bucure\u0219ti", "region": "Bucure\u0219ti"})
r2 = json.dumps({"strada": "SOSEAUA TARII", "numar": "5", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g2, r2) is True
def test_billing_equals_shipping_short_circuit(self):
"""Short-circuit condition: billing == shipping → reuse addr_livr_id."""
from app.services.import_service import format_address_for_oracle