fix(address): normalize SECTORUL + etaj in addr_match, fix Oracle duplicate addrs

- _addr_match / addrMatch: add SECTORUL\s*\d* branch to strip sector
  number; add (?:\b|(?=\d)) to catch glued keywords (sc1, ap94);
  include etaj field in rStreet concat
- database.py: replace duplicate addr_match impl with import from sync_service
- import_service.py: short-circuit billing addr Oracle call when
  billing == shipping (avoids duplicate address creation)
- PL/SQL: normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before
  TIER 1; resolve id_localitate before search; TIER 1 now matches on
  id_loc instead of text locality
- Add scripts/cleanup_duplicate_addresses.sql for manual prod cleanup
- Add 5 new tests: sectorul, keyword+digit gluing, etaj, short-circuit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-07 13:48:49 +00:00
parent 5b4b317636
commit 0f817b2130
8 changed files with 214 additions and 106 deletions

View File

@@ -10,6 +10,7 @@ CREATE OR REPLACE PACKAGE PACK_IMPORT_PARTENERI AS
-- 06.04.2026 - fix strip_diacritics: UNISTR encoding-safe (TRANSLATE cu UTF-8 literal se corupea pe Windows)
-- 06.04.2026 - fix TIER 1: strip_diacritics si pe localitate (nu doar strada)
-- 07.04.2026 - fix parser adrese: inserare virgule inaintea keywords, tokeni lipiti (Ap78), strip localitate din strada
-- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc)
-- ====================================================================
-- CONSTANTS
@@ -973,16 +974,63 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS
v_apart,
v_etaj);
-- 01.04.2026 - cautare adresa pe strada + diacritics + id_loc validation
-- 06.04.2026 - strip_diacritics si pe localitate (fix: 'FĂLTICENI' vs 'FALTICENI')
-- TIER 1: county + city + street (all diacritics normalized) + valid id_loc
-- 07.04.2026 - normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before TIER 1
IF UPPER(TRIM(v_localitate)) IN ('MUNICIPIUL BUCURESTI', 'MUN BUCURESTI', 'MUN. BUCURESTI', 'BUCURESTI') THEN
IF v_sector IS NOT NULL AND TRIM(v_sector) IS NOT NULL THEN
v_localitate := 'BUCURESTI SECTORUL ' || TRIM(v_sector);
END IF;
END IF;
-- Resolve id_judet inainte de TIER 1
BEGIN
SELECT id_judet INTO v_id_judet
FROM syn_nom_judete
WHERE judet = v_judet
AND sters = 0;
EXCEPTION
WHEN NO_DATA_FOUND THEN v_id_judet := N_ID_JUD_DEFAULT;
END;
-- Resolve id_localitate inainte de TIER 1
BEGIN
SELECT id_loc, id_judet, id_tara
INTO v_id_localitate, v_id_judet, v_id_tara
FROM (SELECT id_loc, id_judet, id_tara, rownum rn
FROM syn_nom_localitati l
WHERE id_judet = v_id_judet
AND strip_diacritics(localitate) = strip_diacritics(v_localitate)
AND inactiv = 0
AND sters = 0
ORDER BY localitate)
WHERE rn = 1;
EXCEPTION
WHEN NO_DATA_FOUND THEN
BEGIN
SELECT id_loc, id_judet, id_tara
INTO v_id_localitate, v_id_judet, v_id_tara
FROM (SELECT id_loc, id_judet, id_tara, rownum rn
FROM syn_nom_localitati l
WHERE id_judet = v_id_judet
AND inactiv = 0
AND sters = 0
ORDER BY localitate)
WHERE rn = 1;
EXCEPTION
WHEN NO_DATA_FOUND THEN
v_id_localitate := N_ID_LOCALITATE_DEFAULT;
v_id_judet := N_ID_JUD_DEFAULT;
v_id_tara := N_ID_TARA_DEFAULT;
END;
END;
-- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc)
-- TIER 1: match pe id_loc + strada (evita duplicate MUNICIPIUL BUCURESTI vs BUCURESTI SECTORUL X)
begin
select id_adresa into p_id_adresa from (
select id_adresa
from vadrese_parteneri
where id_part = p_id_part
and strip_diacritics(judet) = strip_diacritics(v_judet)
and strip_diacritics(localitate) = strip_diacritics(v_localitate)
and id_loc = v_id_localitate
and strip_diacritics(strada) = strip_diacritics(v_strada)
and id_loc IS NOT NULL
order by principala desc, id_adresa desc
@@ -993,50 +1041,6 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS
-- Adaug o adresa
if p_id_adresa is null then
-- caut judetul
begin
select id_judet
into v_id_judet
from syn_nom_judete
where judet = v_judet
and sters = 0;
exception
when NO_DATA_FOUND then
v_id_judet := N_ID_JUD_DEFAULT;
end;
-- caut localitatea (strip_diacritics pe ambele parti — fix encoding mismatch)
begin
select id_loc, id_judet, id_tara
into v_id_localitate, v_id_judet, v_id_tara
from (select id_loc, id_judet, id_tara, rownum rn
from syn_nom_localitati l
where id_judet = v_id_judet
and strip_diacritics(localitate) = strip_diacritics(v_localitate)
and inactiv = 0
and sters = 0
order by localitate)
where rn = 1;
exception
when NO_DATA_FOUND then
begin
select id_loc, id_judet, id_tara
into v_id_localitate, v_id_judet, v_id_tara
from (select id_loc, id_judet, id_tara, rownum rn
from syn_nom_localitati l
where id_judet = v_id_judet
and inactiv = 0
and sters = 0
order by localitate)
where rn = 1;
exception
when NO_DATA_FOUND then
v_id_localitate := N_ID_LOCALITATE_DEFAULT;
v_id_judet := N_ID_JUD_DEFAULT;
v_id_tara := N_ID_TARA_DEFAULT;
end;
end;
-- 01.04.2026 - strip_diacritics la stocare adrese
v_strada := strip_diacritics(v_strada);
v_localitate := strip_diacritics(v_localitate);