fix(address): extract scara/etaj/apartament from comma-less addresses

Oracle parser failed to extract sc/ap/et when GoMag addresses had no
commas. Added REGEXP_REPLACE to insert commas before address keywords
in v_strada before the comma-split, ensuring the token parser always
fires. Also added 5 Oracle integration tests calling
parseaza_adresa_semicolon directly, and improved diacritics handling
in addr_match (Python + JS).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-08 22:00:17 +00:00
parent f049b0bf12
commit f48c2d62c6
5 changed files with 122 additions and 5 deletions

View File

@@ -2,7 +2,6 @@ import asyncio
import json import json
import logging import logging
import re import re
import unicodedata
import uuid import uuid
from datetime import datetime, timedelta from datetime import datetime, timedelta
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
@@ -36,8 +35,7 @@ def _addr_match(gomag_json, roa_json):
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))' r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))'
) )
def norm(s): def norm(s):
s = unicodedata.normalize('NFD', s or '') s = (s or '').translate(import_service._DIACRITICS).upper()
s = re.sub(r'[\u0300-\u036f]', '', s).upper()
s = _ADDR_WORDS.sub('', s) s = _ADDR_WORDS.sub('', s)
return re.sub(r'[^A-Z0-9]', '', s) return re.sub(r'[^A-Z0-9]', '', s)
g_street = norm(g.get('address') or g.get('strada') or '') g_street = norm(g.get('address') or g.get('strada') or '')

View File

@@ -822,8 +822,13 @@ function fmtAddr(a) {
function addrMatch(gomag, roa) { function addrMatch(gomag, roa) {
if (!gomag || !roa) return true; // can't compare if (!gomag || !roa) return true; // can't compare
const _DIAC = {
'\u0103':'a','\u00e2':'a','\u00ee':'i','\u0219':'s','\u021b':'t',
'\u0102':'A','\u00c2':'A','\u00ce':'I','\u0218':'S','\u021a':'T',
'\u015f':'s','\u0163':'t','\u015e':'S','\u0162':'T'
};
function norm(s) { function norm(s) {
return (s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '') return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c)
.toUpperCase() .toUpperCase()
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '') .replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
.replace(/[^A-Z0-9]/g, ''); .replace(/[^A-Z0-9]/g, '');

View File

@@ -12,6 +12,7 @@ CREATE OR REPLACE PACKAGE PACK_IMPORT_PARTENERI AS
-- 07.04.2026 - fix parser adrese: inserare virgule inaintea keywords, tokeni lipiti (Ap78), strip localitate din strada -- 07.04.2026 - fix parser adrese: inserare virgule inaintea keywords, tokeni lipiti (Ap78), strip localitate din strada
-- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc) -- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc)
-- 07.04.2026 - fix localitate necunoscuta: SOUNDEX fuzzy match (TIER L2) + pastreaza judetul in L3 -- 07.04.2026 - fix localitate necunoscuta: SOUNDEX fuzzy match (TIER L2) + pastreaza judetul in L3
-- 08.04.2026 - fix parser: inserare virgule in strada inainte de comma-split (sc/ap/et nu se extrageau fara virgula)
-- ==================================================================== -- ====================================================================
-- CONSTANTS -- CONSTANTS
@@ -441,6 +442,7 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS
END separa_nume_prenume; END separa_nume_prenume;
-- 31.03.2026 - parser inteligent: split numar in bloc/scara/apart/etaj (fix ORA-12899 pe NUMAR max 10 chars) -- 31.03.2026 - parser inteligent: split numar in bloc/scara/apart/etaj (fix ORA-12899 pe NUMAR max 10 chars)
-- 08.04.2026 - fix: inserare virgule in strada inainte de comma-split (sc/ap/et nu se extrageau fara virgula)
PROCEDURE parseaza_adresa_semicolon(p_adresa_text IN VARCHAR2, PROCEDURE parseaza_adresa_semicolon(p_adresa_text IN VARCHAR2,
p_judet OUT VARCHAR2, p_judet OUT VARCHAR2,
p_localitate OUT VARCHAR2, p_localitate OUT VARCHAR2,
@@ -518,6 +520,13 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS
p_strada := SUBSTR(v_componente(3), 1, 100); p_strada := SUBSTR(v_componente(3), 1, 100);
v_strada := p_strada; v_strada := p_strada;
-- 08.04.2026 - insert commas before address keywords so comma-split always fires
-- Reuses same regex as v_raw_numar comma insertion (lines below)
-- Ex: "Str X nr 26 bl 6 sc 2 ap 36" → "Str X,nr 26,bl 6,sc 2,ap 36"
v_strada := REGEXP_REPLACE(v_strada,
'(\s)(BLOC|BL|SCARA|SC|APARTAMENT|APART|AP|ETAJ|ET|NUMARUL|NUMAR|NR)(\s|\.|\d)',
',\2\3', 1, 0, 'i');
-- Separa strada de tot ce e dupa prima virgula -- Separa strada de tot ce e dupa prima virgula
v_pozitie := INSTR(v_strada, ','); v_pozitie := INSTR(v_strada, ',');
IF v_pozitie > 0 THEN IF v_pozitie > 0 THEN

View File

@@ -93,7 +93,8 @@ def app_settings(client):
resp = client.get("/api/sync/schedule") resp = client.get("/api/sync/schedule")
assert resp.status_code == 200 assert resp.status_code == 200
import sqlite3 import sqlite3
db_path = os.environ.get("SQLITE_DB_PATH", os.path.join(_script_dir, "orders.db")) from app.config import settings as _s
db_path = _s.SQLITE_DB_PATH if os.path.isabs(_s.SQLITE_DB_PATH) else os.path.join(_script_dir, _s.SQLITE_DB_PATH)
conn = sqlite3.connect(db_path) conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row conn.row_factory = sqlite3.Row
rows = conn.execute("SELECT key, value FROM app_settings").fetchall() rows = conn.execute("SELECT key, value FROM app_settings").fetchall()
@@ -273,6 +274,97 @@ class TestAddressRulesE2E:
) )
# ---------------------------------------------------------------------------
# Test: parsare componente adresă (strada, numar, bloc, scara, apart, etaj)
# Apelează direct parseaza_adresa_semicolon din Oracle — fără import comandă.
# ---------------------------------------------------------------------------
class TestAddressComponentParsing:
"""Verifică extragerea componentelor adresei direct prin parseaza_adresa_semicolon."""
def _parse_address(self, oracle_pool, address, city="Bucuresti", region="Bucuresti"):
"""Call Oracle parseaza_adresa_semicolon and return parsed components."""
from app.services.import_service import format_address_for_oracle
formatted = format_address_for_oracle(address, city, region)
conn = oracle_pool.acquire()
try:
with conn.cursor() as cur:
p_judet = cur.var(str, 200)
p_localitate = cur.var(str, 200)
p_strada = cur.var(str, 100)
p_numar = cur.var(str, 100)
p_sector = cur.var(str, 100)
p_bloc = cur.var(str, 30)
p_scara = cur.var(str, 10)
p_apart = cur.var(str, 10)
p_etaj = cur.var(str, 20)
cur.callproc("PACK_IMPORT_PARTENERI.parseaza_adresa_semicolon", [
formatted, p_judet, p_localitate, p_strada, p_numar,
p_sector, p_bloc, p_scara, p_apart, p_etaj
])
return {
"strada": p_strada.getvalue(),
"numar": p_numar.getvalue(),
"bloc": p_bloc.getvalue(),
"scara": p_scara.getvalue(),
"apart": p_apart.getvalue(),
"etaj": p_etaj.getvalue(),
"localitate": p_localitate.getvalue(),
"judet": p_judet.getvalue(),
}
finally:
oracle_pool.release(conn)
def test_full_address_all_components(self, oracle_pool):
"""Adresa completă cu nr, bl, sc, ap — toate componentele se extrag din strada."""
addr = self._parse_address(oracle_pool,
"Bd. 1 Decembrie 1918 nr. 26 bl. 6 sc. 2 ap. 36")
assert addr["numar"] == "26", f"numar={addr['numar']}"
assert addr["bloc"] == "6", f"bloc={addr['bloc']}"
assert addr["scara"] == "2", f"scara={addr['scara']}"
assert addr["apart"] == "36", f"apart={addr['apart']}"
assert "SC" not in (addr["strada"] or ""), f"SC ramas in strada: {addr['strada']}"
assert "AP" not in (addr["strada"] or ""), f"AP ramas in strada: {addr['strada']}"
def test_alphanumeric_bloc_and_letter_scara(self, oracle_pool):
"""Bloc alfanumeric (VN9) și scara literă (A) + etaj."""
addr = self._parse_address(oracle_pool,
"Strada Becatei nr 29 bl. VN9 sc. A et. 10 ap. 42")
assert addr["numar"] == "29", f"numar={addr['numar']}"
assert addr["bloc"] == "VN9", f"bloc={addr['bloc']}"
assert addr["scara"] == "A", f"scara={addr['scara']}"
assert addr["etaj"] == "10", f"etaj={addr['etaj']}"
assert addr["apart"] == "42", f"apart={addr['apart']}"
def test_address_without_commas_uppercase(self, oracle_pool):
"""Adresa uppercase fără virgule — keywords spațiu-separate."""
addr = self._parse_address(oracle_pool,
"STR DACIA NR 15 BLOC Z2 SC 1 AP 7 ET 3")
assert addr["numar"] == "15", f"numar={addr['numar']}"
assert addr["bloc"] == "Z2", f"bloc={addr['bloc']}"
assert addr["scara"] == "1", f"scara={addr['scara']}"
assert addr["apart"] == "7", f"apart={addr['apart']}"
assert addr["etaj"] == "3", f"etaj={addr['etaj']}"
def test_address_with_existing_commas(self, oracle_pool):
"""Adresa care deja are virgule — nu se strică parsarea."""
addr = self._parse_address(oracle_pool,
"Str Victoriei, nr. 10, bl. A1, sc. B, et. 2, ap. 15")
assert addr["numar"] == "10", f"numar={addr['numar']}"
assert addr["bloc"] == "A1", f"bloc={addr['bloc']}"
assert addr["scara"] == "B", f"scara={addr['scara']}"
assert addr["etaj"] == "2", f"etaj={addr['etaj']}"
assert addr["apart"] == "15", f"apart={addr['apart']}"
def test_no_keywords_street_unchanged(self, oracle_pool):
"""Adresa simplă fără keywords — strada rămâne intactă."""
addr = self._parse_address(oracle_pool, "Strada Victoriei 10")
assert "VICTORIEI" in (addr["strada"] or ""), f"strada={addr['strada']}"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Test regresie: comenzi existente în SQLite # Test regresie: comenzi existente în SQLite
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View File

@@ -615,6 +615,19 @@ class TestAddrMatch:
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"}) r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True assert _addr_match(g, r) is True
def test_addr_match_diacritics(self):
"""Romanian diacritics (â, ș, ț, î) are normalized same as Oracle storage."""
from app.services.sync_service import _addr_match
import json
# â→a, î→i in city name
g = json.dumps({"address": "Str. Morii 208", "city": "Sf\u00e2ntu Ilie", "region": "Suceava"})
r = json.dumps({"strada": "MORII", "numar": "208", "localitate": "SFANTU ILIE", "judet": "SUCEAVA"})
assert _addr_match(g, r) is True
# ș→s, ț→t in street
g2 = json.dumps({"address": "Str. \u0218oseaua \u021a\u0103rii 5", "city": "Bucure\u0219ti", "region": "Bucure\u0219ti"})
r2 = json.dumps({"strada": "SOSEAUA TARII", "numar": "5", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g2, r2) is True
def test_billing_equals_shipping_short_circuit(self): def test_billing_equals_shipping_short_circuit(self):
"""Short-circuit condition: billing == shipping → reuse addr_livr_id.""" """Short-circuit condition: billing == shipping → reuse addr_livr_id."""
from app.services.import_service import format_address_for_oracle from app.services.import_service import format_address_for_oracle