From f48c2d62c62c07e2971144bc1b6d733dc7d3dee6 Mon Sep 17 00:00:00 2001 From: Claude Agent Date: Wed, 8 Apr 2026 22:00:17 +0000 Subject: [PATCH] fix(address): extract scara/etaj/apartament from comma-less addresses Oracle parser failed to extract sc/ap/et when GoMag addresses had no commas. Added REGEXP_REPLACE to insert commas before address keywords in v_strada before the comma-split, ensuring the token parser always fires. Also added 5 Oracle integration tests calling parseaza_adresa_semicolon directly, and improved diacritics handling in addr_match (Python + JS). Co-Authored-By: Claude Opus 4.6 (1M context) --- api/app/services/sync_service.py | 4 +- api/app/static/js/shared.js | 7 +- .../05_pack_import_parteneri.pck | 9 ++ api/tests/test_address_rules_oracle.py | 94 ++++++++++++++++++- api/tests/test_business_rules.py | 13 +++ 5 files changed, 122 insertions(+), 5 deletions(-) diff --git a/api/app/services/sync_service.py b/api/app/services/sync_service.py index 877008a..a58b3eb 100644 --- a/api/app/services/sync_service.py +++ b/api/app/services/sync_service.py @@ -2,7 +2,6 @@ import asyncio import json import logging import re -import unicodedata import uuid from datetime import datetime, timedelta from zoneinfo import ZoneInfo @@ -36,8 +35,7 @@ def _addr_match(gomag_json, roa_json): r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))' ) def norm(s): - s = unicodedata.normalize('NFD', s or '') - s = re.sub(r'[\u0300-\u036f]', '', s).upper() + s = (s or '').translate(import_service._DIACRITICS).upper() s = _ADDR_WORDS.sub('', s) return re.sub(r'[^A-Z0-9]', '', s) g_street = norm(g.get('address') or g.get('strada') or '') diff --git a/api/app/static/js/shared.js b/api/app/static/js/shared.js index 027a383..7861e3d 100644 --- a/api/app/static/js/shared.js +++ b/api/app/static/js/shared.js @@ -822,8 +822,13 @@ function fmtAddr(a) { function addrMatch(gomag, roa) { if (!gomag || !roa) return true; // can't compare + const _DIAC = { + '\u0103':'a','\u00e2':'a','\u00ee':'i','\u0219':'s','\u021b':'t', + '\u0102':'A','\u00c2':'A','\u00ce':'I','\u0218':'S','\u021a':'T', + '\u015f':'s','\u0163':'t','\u015e':'S','\u0162':'T' + }; function norm(s) { - return (s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '') + return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c) .toUpperCase() .replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '') .replace(/[^A-Z0-9]/g, ''); diff --git a/api/database-scripts/05_pack_import_parteneri.pck b/api/database-scripts/05_pack_import_parteneri.pck index e0c8d1d..9b531d1 100644 --- a/api/database-scripts/05_pack_import_parteneri.pck +++ b/api/database-scripts/05_pack_import_parteneri.pck @@ -12,6 +12,7 @@ CREATE OR REPLACE PACKAGE PACK_IMPORT_PARTENERI AS -- 07.04.2026 - fix parser adrese: inserare virgule inaintea keywords, tokeni lipiti (Ap78), strip localitate din strada -- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc) -- 07.04.2026 - fix localitate necunoscuta: SOUNDEX fuzzy match (TIER L2) + pastreaza judetul in L3 + -- 08.04.2026 - fix parser: inserare virgule in strada inainte de comma-split (sc/ap/et nu se extrageau fara virgula) -- ==================================================================== -- CONSTANTS @@ -441,6 +442,7 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS END separa_nume_prenume; -- 31.03.2026 - parser inteligent: split numar in bloc/scara/apart/etaj (fix ORA-12899 pe NUMAR max 10 chars) + -- 08.04.2026 - fix: inserare virgule in strada inainte de comma-split (sc/ap/et nu se extrageau fara virgula) PROCEDURE parseaza_adresa_semicolon(p_adresa_text IN VARCHAR2, p_judet OUT VARCHAR2, p_localitate OUT VARCHAR2, @@ -518,6 +520,13 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS p_strada := SUBSTR(v_componente(3), 1, 100); v_strada := p_strada; + -- 08.04.2026 - insert commas before address keywords so comma-split always fires + -- Reuses same regex as v_raw_numar comma insertion (lines below) + -- Ex: "Str X nr 26 bl 6 sc 2 ap 36" → "Str X,nr 26,bl 6,sc 2,ap 36" + v_strada := REGEXP_REPLACE(v_strada, + '(\s)(BLOC|BL|SCARA|SC|APARTAMENT|APART|AP|ETAJ|ET|NUMARUL|NUMAR|NR)(\s|\.|\d)', + ',\2\3', 1, 0, 'i'); + -- Separa strada de tot ce e dupa prima virgula v_pozitie := INSTR(v_strada, ','); IF v_pozitie > 0 THEN diff --git a/api/tests/test_address_rules_oracle.py b/api/tests/test_address_rules_oracle.py index 028be88..e587553 100644 --- a/api/tests/test_address_rules_oracle.py +++ b/api/tests/test_address_rules_oracle.py @@ -93,7 +93,8 @@ def app_settings(client): resp = client.get("/api/sync/schedule") assert resp.status_code == 200 import sqlite3 - db_path = os.environ.get("SQLITE_DB_PATH", os.path.join(_script_dir, "orders.db")) + from app.config import settings as _s + db_path = _s.SQLITE_DB_PATH if os.path.isabs(_s.SQLITE_DB_PATH) else os.path.join(_script_dir, _s.SQLITE_DB_PATH) conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row rows = conn.execute("SELECT key, value FROM app_settings").fetchall() @@ -273,6 +274,97 @@ class TestAddressRulesE2E: ) +# --------------------------------------------------------------------------- +# Test: parsare componente adresă (strada, numar, bloc, scara, apart, etaj) +# Apelează direct parseaza_adresa_semicolon din Oracle — fără import comandă. +# --------------------------------------------------------------------------- + +class TestAddressComponentParsing: + """Verifică extragerea componentelor adresei direct prin parseaza_adresa_semicolon.""" + + def _parse_address(self, oracle_pool, address, city="Bucuresti", region="Bucuresti"): + """Call Oracle parseaza_adresa_semicolon and return parsed components.""" + from app.services.import_service import format_address_for_oracle + formatted = format_address_for_oracle(address, city, region) + + conn = oracle_pool.acquire() + try: + with conn.cursor() as cur: + p_judet = cur.var(str, 200) + p_localitate = cur.var(str, 200) + p_strada = cur.var(str, 100) + p_numar = cur.var(str, 100) + p_sector = cur.var(str, 100) + p_bloc = cur.var(str, 30) + p_scara = cur.var(str, 10) + p_apart = cur.var(str, 10) + p_etaj = cur.var(str, 20) + + cur.callproc("PACK_IMPORT_PARTENERI.parseaza_adresa_semicolon", [ + formatted, p_judet, p_localitate, p_strada, p_numar, + p_sector, p_bloc, p_scara, p_apart, p_etaj + ]) + + return { + "strada": p_strada.getvalue(), + "numar": p_numar.getvalue(), + "bloc": p_bloc.getvalue(), + "scara": p_scara.getvalue(), + "apart": p_apart.getvalue(), + "etaj": p_etaj.getvalue(), + "localitate": p_localitate.getvalue(), + "judet": p_judet.getvalue(), + } + finally: + oracle_pool.release(conn) + + def test_full_address_all_components(self, oracle_pool): + """Adresa completă cu nr, bl, sc, ap — toate componentele se extrag din strada.""" + addr = self._parse_address(oracle_pool, + "Bd. 1 Decembrie 1918 nr. 26 bl. 6 sc. 2 ap. 36") + assert addr["numar"] == "26", f"numar={addr['numar']}" + assert addr["bloc"] == "6", f"bloc={addr['bloc']}" + assert addr["scara"] == "2", f"scara={addr['scara']}" + assert addr["apart"] == "36", f"apart={addr['apart']}" + assert "SC" not in (addr["strada"] or ""), f"SC ramas in strada: {addr['strada']}" + assert "AP" not in (addr["strada"] or ""), f"AP ramas in strada: {addr['strada']}" + + def test_alphanumeric_bloc_and_letter_scara(self, oracle_pool): + """Bloc alfanumeric (VN9) și scara literă (A) + etaj.""" + addr = self._parse_address(oracle_pool, + "Strada Becatei nr 29 bl. VN9 sc. A et. 10 ap. 42") + assert addr["numar"] == "29", f"numar={addr['numar']}" + assert addr["bloc"] == "VN9", f"bloc={addr['bloc']}" + assert addr["scara"] == "A", f"scara={addr['scara']}" + assert addr["etaj"] == "10", f"etaj={addr['etaj']}" + assert addr["apart"] == "42", f"apart={addr['apart']}" + + def test_address_without_commas_uppercase(self, oracle_pool): + """Adresa uppercase fără virgule — keywords spațiu-separate.""" + addr = self._parse_address(oracle_pool, + "STR DACIA NR 15 BLOC Z2 SC 1 AP 7 ET 3") + assert addr["numar"] == "15", f"numar={addr['numar']}" + assert addr["bloc"] == "Z2", f"bloc={addr['bloc']}" + assert addr["scara"] == "1", f"scara={addr['scara']}" + assert addr["apart"] == "7", f"apart={addr['apart']}" + assert addr["etaj"] == "3", f"etaj={addr['etaj']}" + + def test_address_with_existing_commas(self, oracle_pool): + """Adresa care deja are virgule — nu se strică parsarea.""" + addr = self._parse_address(oracle_pool, + "Str Victoriei, nr. 10, bl. A1, sc. B, et. 2, ap. 15") + assert addr["numar"] == "10", f"numar={addr['numar']}" + assert addr["bloc"] == "A1", f"bloc={addr['bloc']}" + assert addr["scara"] == "B", f"scara={addr['scara']}" + assert addr["etaj"] == "2", f"etaj={addr['etaj']}" + assert addr["apart"] == "15", f"apart={addr['apart']}" + + def test_no_keywords_street_unchanged(self, oracle_pool): + """Adresa simplă fără keywords — strada rămâne intactă.""" + addr = self._parse_address(oracle_pool, "Strada Victoriei 10") + assert "VICTORIEI" in (addr["strada"] or ""), f"strada={addr['strada']}" + + # --------------------------------------------------------------------------- # Test regresie: comenzi existente în SQLite # --------------------------------------------------------------------------- diff --git a/api/tests/test_business_rules.py b/api/tests/test_business_rules.py index 21ec675..8e530d3 100644 --- a/api/tests/test_business_rules.py +++ b/api/tests/test_business_rules.py @@ -615,6 +615,19 @@ class TestAddrMatch: r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"}) assert _addr_match(g, r) is True + def test_addr_match_diacritics(self): + """Romanian diacritics (â, ș, ț, î) are normalized same as Oracle storage.""" + from app.services.sync_service import _addr_match + import json + # â→a, î→i in city name + g = json.dumps({"address": "Str. Morii 208", "city": "Sf\u00e2ntu Ilie", "region": "Suceava"}) + r = json.dumps({"strada": "MORII", "numar": "208", "localitate": "SFANTU ILIE", "judet": "SUCEAVA"}) + assert _addr_match(g, r) is True + # ș→s, ț→t in street + g2 = json.dumps({"address": "Str. \u0218oseaua \u021a\u0103rii 5", "city": "Bucure\u0219ti", "region": "Bucure\u0219ti"}) + r2 = json.dumps({"strada": "SOSEAUA TARII", "numar": "5", "localitate": "BUCURESTI", "judet": "BUCURESTI"}) + assert _addr_match(g2, r2) is True + def test_billing_equals_shipping_short_circuit(self): """Short-circuit condition: billing == shipping → reuse addr_livr_id.""" from app.services.import_service import format_address_for_oracle