fix(address): extract scara/etaj/apartament from comma-less addresses
Oracle parser failed to extract sc/ap/et when GoMag addresses had no commas. Added REGEXP_REPLACE to insert commas before address keywords in v_strada before the comma-split, ensuring the token parser always fires. Also added 5 Oracle integration tests calling parseaza_adresa_semicolon directly, and improved diacritics handling in addr_match (Python + JS). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -93,7 +93,8 @@ def app_settings(client):
|
||||
resp = client.get("/api/sync/schedule")
|
||||
assert resp.status_code == 200
|
||||
import sqlite3
|
||||
db_path = os.environ.get("SQLITE_DB_PATH", os.path.join(_script_dir, "orders.db"))
|
||||
from app.config import settings as _s
|
||||
db_path = _s.SQLITE_DB_PATH if os.path.isabs(_s.SQLITE_DB_PATH) else os.path.join(_script_dir, _s.SQLITE_DB_PATH)
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
rows = conn.execute("SELECT key, value FROM app_settings").fetchall()
|
||||
@@ -273,6 +274,97 @@ class TestAddressRulesE2E:
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test: parsare componente adresă (strada, numar, bloc, scara, apart, etaj)
|
||||
# Apelează direct parseaza_adresa_semicolon din Oracle — fără import comandă.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAddressComponentParsing:
|
||||
"""Verifică extragerea componentelor adresei direct prin parseaza_adresa_semicolon."""
|
||||
|
||||
def _parse_address(self, oracle_pool, address, city="Bucuresti", region="Bucuresti"):
|
||||
"""Call Oracle parseaza_adresa_semicolon and return parsed components."""
|
||||
from app.services.import_service import format_address_for_oracle
|
||||
formatted = format_address_for_oracle(address, city, region)
|
||||
|
||||
conn = oracle_pool.acquire()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
p_judet = cur.var(str, 200)
|
||||
p_localitate = cur.var(str, 200)
|
||||
p_strada = cur.var(str, 100)
|
||||
p_numar = cur.var(str, 100)
|
||||
p_sector = cur.var(str, 100)
|
||||
p_bloc = cur.var(str, 30)
|
||||
p_scara = cur.var(str, 10)
|
||||
p_apart = cur.var(str, 10)
|
||||
p_etaj = cur.var(str, 20)
|
||||
|
||||
cur.callproc("PACK_IMPORT_PARTENERI.parseaza_adresa_semicolon", [
|
||||
formatted, p_judet, p_localitate, p_strada, p_numar,
|
||||
p_sector, p_bloc, p_scara, p_apart, p_etaj
|
||||
])
|
||||
|
||||
return {
|
||||
"strada": p_strada.getvalue(),
|
||||
"numar": p_numar.getvalue(),
|
||||
"bloc": p_bloc.getvalue(),
|
||||
"scara": p_scara.getvalue(),
|
||||
"apart": p_apart.getvalue(),
|
||||
"etaj": p_etaj.getvalue(),
|
||||
"localitate": p_localitate.getvalue(),
|
||||
"judet": p_judet.getvalue(),
|
||||
}
|
||||
finally:
|
||||
oracle_pool.release(conn)
|
||||
|
||||
def test_full_address_all_components(self, oracle_pool):
|
||||
"""Adresa completă cu nr, bl, sc, ap — toate componentele se extrag din strada."""
|
||||
addr = self._parse_address(oracle_pool,
|
||||
"Bd. 1 Decembrie 1918 nr. 26 bl. 6 sc. 2 ap. 36")
|
||||
assert addr["numar"] == "26", f"numar={addr['numar']}"
|
||||
assert addr["bloc"] == "6", f"bloc={addr['bloc']}"
|
||||
assert addr["scara"] == "2", f"scara={addr['scara']}"
|
||||
assert addr["apart"] == "36", f"apart={addr['apart']}"
|
||||
assert "SC" not in (addr["strada"] or ""), f"SC ramas in strada: {addr['strada']}"
|
||||
assert "AP" not in (addr["strada"] or ""), f"AP ramas in strada: {addr['strada']}"
|
||||
|
||||
def test_alphanumeric_bloc_and_letter_scara(self, oracle_pool):
|
||||
"""Bloc alfanumeric (VN9) și scara literă (A) + etaj."""
|
||||
addr = self._parse_address(oracle_pool,
|
||||
"Strada Becatei nr 29 bl. VN9 sc. A et. 10 ap. 42")
|
||||
assert addr["numar"] == "29", f"numar={addr['numar']}"
|
||||
assert addr["bloc"] == "VN9", f"bloc={addr['bloc']}"
|
||||
assert addr["scara"] == "A", f"scara={addr['scara']}"
|
||||
assert addr["etaj"] == "10", f"etaj={addr['etaj']}"
|
||||
assert addr["apart"] == "42", f"apart={addr['apart']}"
|
||||
|
||||
def test_address_without_commas_uppercase(self, oracle_pool):
|
||||
"""Adresa uppercase fără virgule — keywords spațiu-separate."""
|
||||
addr = self._parse_address(oracle_pool,
|
||||
"STR DACIA NR 15 BLOC Z2 SC 1 AP 7 ET 3")
|
||||
assert addr["numar"] == "15", f"numar={addr['numar']}"
|
||||
assert addr["bloc"] == "Z2", f"bloc={addr['bloc']}"
|
||||
assert addr["scara"] == "1", f"scara={addr['scara']}"
|
||||
assert addr["apart"] == "7", f"apart={addr['apart']}"
|
||||
assert addr["etaj"] == "3", f"etaj={addr['etaj']}"
|
||||
|
||||
def test_address_with_existing_commas(self, oracle_pool):
|
||||
"""Adresa care deja are virgule — nu se strică parsarea."""
|
||||
addr = self._parse_address(oracle_pool,
|
||||
"Str Victoriei, nr. 10, bl. A1, sc. B, et. 2, ap. 15")
|
||||
assert addr["numar"] == "10", f"numar={addr['numar']}"
|
||||
assert addr["bloc"] == "A1", f"bloc={addr['bloc']}"
|
||||
assert addr["scara"] == "B", f"scara={addr['scara']}"
|
||||
assert addr["etaj"] == "2", f"etaj={addr['etaj']}"
|
||||
assert addr["apart"] == "15", f"apart={addr['apart']}"
|
||||
|
||||
def test_no_keywords_street_unchanged(self, oracle_pool):
|
||||
"""Adresa simplă fără keywords — strada rămâne intactă."""
|
||||
addr = self._parse_address(oracle_pool, "Strada Victoriei 10")
|
||||
assert "VICTORIEI" in (addr["strada"] or ""), f"strada={addr['strada']}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test regresie: comenzi existente în SQLite
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -615,6 +615,19 @@ class TestAddrMatch:
|
||||
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
|
||||
assert _addr_match(g, r) is True
|
||||
|
||||
def test_addr_match_diacritics(self):
|
||||
"""Romanian diacritics (â, ș, ț, î) are normalized same as Oracle storage."""
|
||||
from app.services.sync_service import _addr_match
|
||||
import json
|
||||
# â→a, î→i in city name
|
||||
g = json.dumps({"address": "Str. Morii 208", "city": "Sf\u00e2ntu Ilie", "region": "Suceava"})
|
||||
r = json.dumps({"strada": "MORII", "numar": "208", "localitate": "SFANTU ILIE", "judet": "SUCEAVA"})
|
||||
assert _addr_match(g, r) is True
|
||||
# ș→s, ț→t in street
|
||||
g2 = json.dumps({"address": "Str. \u0218oseaua \u021a\u0103rii 5", "city": "Bucure\u0219ti", "region": "Bucure\u0219ti"})
|
||||
r2 = json.dumps({"strada": "SOSEAUA TARII", "numar": "5", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
|
||||
assert _addr_match(g2, r2) is True
|
||||
|
||||
def test_billing_equals_shipping_short_circuit(self):
|
||||
"""Short-circuit condition: billing == shipping → reuse addr_livr_id."""
|
||||
from app.services.import_service import format_address_for_oracle
|
||||
|
||||
Reference in New Issue
Block a user