fix(address): normalize SECTORUL + etaj in addr_match, fix Oracle duplicate addrs

- _addr_match / addrMatch: add SECTORUL\s*\d* branch to strip sector
  number; add (?:\b|(?=\d)) to catch glued keywords (sc1, ap94);
  include etaj field in rStreet concat
- database.py: replace duplicate addr_match impl with import from sync_service
- import_service.py: short-circuit billing addr Oracle call when
  billing == shipping (avoids duplicate address creation)
- PL/SQL: normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before
  TIER 1; resolve id_localitate before search; TIER 1 now matches on
  id_loc instead of text locality
- Add scripts/cleanup_duplicate_addresses.sql for manual prod cleanup
- Add 5 new tests: sectorul, keyword+digit gluing, etaj, short-circuit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-07 13:48:49 +00:00
parent 5b4b317636
commit 0f817b2130
8 changed files with 214 additions and 106 deletions

View File

@@ -1,12 +1,8 @@
import oracledb
import aiosqlite
import sqlite3
import json
import re
import unicodedata
import logging
import os
from pathlib import Path
from .config import settings
logger = logging.getLogger(__name__)
@@ -375,33 +371,7 @@ def init_sqlite():
def _backfill_address_mismatch(conn):
"""Recompute address_mismatch from stored address JSON for all orders."""
_ADDR_WORDS = re.compile(
r'\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b'
)
def norm(s):
s = unicodedata.normalize('NFD', s or '')
s = re.sub(r'[\u0300-\u036f]', '', s).upper()
s = _ADDR_WORDS.sub('', s)
return re.sub(r'[^A-Z0-9]', '', s)
def addr_match(gomag_json, roa_json):
if not gomag_json or not roa_json:
return True
try:
g = json.loads(gomag_json) if isinstance(gomag_json, str) else gomag_json
r = json.loads(roa_json) if isinstance(roa_json, str) else roa_json
except (json.JSONDecodeError, TypeError):
return True
g_street = norm(g.get('address') or g.get('strada') or '')
r_street = norm((r.get('strada') or '') + (r.get('numar') or ''))
g_city = norm(g.get('city') or g.get('localitate') or '')
r_city = norm(r.get('localitate') or '')
g_region = norm(g.get('region') or g.get('judet') or '')
r_region = norm(r.get('judet') or '')
return g_street == r_street and g_city == r_city and g_region == r_region
from .services.sync_service import _addr_match
try:
rows = conn.execute("""
SELECT order_number, adresa_livrare_gomag, adresa_livrare_roa,
@@ -411,8 +381,8 @@ def _backfill_address_mismatch(conn):
""").fetchall()
updated = 0
for r in rows:
livr_ok = addr_match(r[1], r[2])
fact_ok = addr_match(r[3], r[4])
livr_ok = _addr_match(r[1], r[2])
fact_ok = _addr_match(r[3], r[4])
new_val = 1 if (not livr_ok or not fact_ok) else 0
conn.execute(
"UPDATE orders SET address_mismatch = ? WHERE order_number = ?",

View File

@@ -338,11 +338,15 @@ def import_single_order(order, id_pol: int = None, id_sectie: int = None, app_se
# Different person: use shipping address for BOTH billing and shipping in ROA
addr_fact_id = addr_livr_id
else:
# Same person: use billing address as-is
id_adresa_fact = cur.var(oracledb.DB_TYPE_NUMBER)
# Same person: compute billing addr, short-circuit if identical to shipping
billing_addr = format_address_for_oracle(
order.billing.address, order.billing.city, order.billing.region
)
if addr_livr_id and order.shipping and billing_addr == shipping_addr:
# billing = shipping: reuse addr_livr_id to avoid duplicate Oracle address
addr_fact_id = addr_livr_id
else:
id_adresa_fact = cur.var(oracledb.DB_TYPE_NUMBER)
cur.callproc("PACK_IMPORT_PARTENERI.cauta_sau_creeaza_adresa", [
partner_id, billing_addr,
order.billing.phone or "",

View File

@@ -31,8 +31,9 @@ def _addr_match(gomag_json, roa_json):
except (json.JSONDecodeError, TypeError):
return True
_ADDR_WORDS = re.compile(
r'\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b'
r'\bSECTORUL\s*\d*'
r'|\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))'
)
def norm(s):
s = unicodedata.normalize('NFD', s or '')
@@ -40,7 +41,7 @@ def _addr_match(gomag_json, roa_json):
s = _ADDR_WORDS.sub('', s)
return re.sub(r'[^A-Z0-9]', '', s)
g_street = norm(g.get('address') or g.get('strada') or '')
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or ''))
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or '') + (r.get('etaj') or ''))
g_city = norm(g.get('city') or g.get('localitate') or '')
r_city = norm(r.get('localitate') or '')
g_region = norm(g.get('region') or g.get('judet') or '')

View File

@@ -850,11 +850,11 @@ function addrMatch(gomag, roa) {
function norm(s) {
return (s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '')
.toUpperCase()
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b/g, '')
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
.replace(/[^A-Z0-9]/g, '');
}
const gStreet = norm(gomag.address || gomag.strada || '');
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||''));
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||'') + (roa.etaj||''));
const gCity = norm(gomag.city || gomag.localitate || '');
const rCity = norm(roa.localitate || '');
const gRegion = norm(gomag.region || gomag.judet || '');

View File

@@ -168,7 +168,7 @@
<script>window.ROOT_PATH = "{{ rp }}";</script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
<script src="{{ rp }}/static/js/shared.js?v=32"></script>
<script src="{{ rp }}/static/js/shared.js?v=33"></script>
<script>
// Dark mode toggle
function toggleDarkMode() {

View File

@@ -10,6 +10,7 @@ CREATE OR REPLACE PACKAGE PACK_IMPORT_PARTENERI AS
-- 06.04.2026 - fix strip_diacritics: UNISTR encoding-safe (TRANSLATE cu UTF-8 literal se corupea pe Windows)
-- 06.04.2026 - fix TIER 1: strip_diacritics si pe localitate (nu doar strada)
-- 07.04.2026 - fix parser adrese: inserare virgule inaintea keywords, tokeni lipiti (Ap78), strip localitate din strada
-- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc)
-- ====================================================================
-- CONSTANTS
@@ -973,16 +974,63 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS
v_apart,
v_etaj);
-- 01.04.2026 - cautare adresa pe strada + diacritics + id_loc validation
-- 06.04.2026 - strip_diacritics si pe localitate (fix: 'FĂLTICENI' vs 'FALTICENI')
-- TIER 1: county + city + street (all diacritics normalized) + valid id_loc
-- 07.04.2026 - normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before TIER 1
IF UPPER(TRIM(v_localitate)) IN ('MUNICIPIUL BUCURESTI', 'MUN BUCURESTI', 'MUN. BUCURESTI', 'BUCURESTI') THEN
IF v_sector IS NOT NULL AND TRIM(v_sector) IS NOT NULL THEN
v_localitate := 'BUCURESTI SECTORUL ' || TRIM(v_sector);
END IF;
END IF;
-- Resolve id_judet inainte de TIER 1
BEGIN
SELECT id_judet INTO v_id_judet
FROM syn_nom_judete
WHERE judet = v_judet
AND sters = 0;
EXCEPTION
WHEN NO_DATA_FOUND THEN v_id_judet := N_ID_JUD_DEFAULT;
END;
-- Resolve id_localitate inainte de TIER 1
BEGIN
SELECT id_loc, id_judet, id_tara
INTO v_id_localitate, v_id_judet, v_id_tara
FROM (SELECT id_loc, id_judet, id_tara, rownum rn
FROM syn_nom_localitati l
WHERE id_judet = v_id_judet
AND strip_diacritics(localitate) = strip_diacritics(v_localitate)
AND inactiv = 0
AND sters = 0
ORDER BY localitate)
WHERE rn = 1;
EXCEPTION
WHEN NO_DATA_FOUND THEN
BEGIN
SELECT id_loc, id_judet, id_tara
INTO v_id_localitate, v_id_judet, v_id_tara
FROM (SELECT id_loc, id_judet, id_tara, rownum rn
FROM syn_nom_localitati l
WHERE id_judet = v_id_judet
AND inactiv = 0
AND sters = 0
ORDER BY localitate)
WHERE rn = 1;
EXCEPTION
WHEN NO_DATA_FOUND THEN
v_id_localitate := N_ID_LOCALITATE_DEFAULT;
v_id_judet := N_ID_JUD_DEFAULT;
v_id_tara := N_ID_TARA_DEFAULT;
END;
END;
-- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc)
-- TIER 1: match pe id_loc + strada (evita duplicate MUNICIPIUL BUCURESTI vs BUCURESTI SECTORUL X)
begin
select id_adresa into p_id_adresa from (
select id_adresa
from vadrese_parteneri
where id_part = p_id_part
and strip_diacritics(judet) = strip_diacritics(v_judet)
and strip_diacritics(localitate) = strip_diacritics(v_localitate)
and id_loc = v_id_localitate
and strip_diacritics(strada) = strip_diacritics(v_strada)
and id_loc IS NOT NULL
order by principala desc, id_adresa desc
@@ -993,50 +1041,6 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS
-- Adaug o adresa
if p_id_adresa is null then
-- caut judetul
begin
select id_judet
into v_id_judet
from syn_nom_judete
where judet = v_judet
and sters = 0;
exception
when NO_DATA_FOUND then
v_id_judet := N_ID_JUD_DEFAULT;
end;
-- caut localitatea (strip_diacritics pe ambele parti — fix encoding mismatch)
begin
select id_loc, id_judet, id_tara
into v_id_localitate, v_id_judet, v_id_tara
from (select id_loc, id_judet, id_tara, rownum rn
from syn_nom_localitati l
where id_judet = v_id_judet
and strip_diacritics(localitate) = strip_diacritics(v_localitate)
and inactiv = 0
and sters = 0
order by localitate)
where rn = 1;
exception
when NO_DATA_FOUND then
begin
select id_loc, id_judet, id_tara
into v_id_localitate, v_id_judet, v_id_tara
from (select id_loc, id_judet, id_tara, rownum rn
from syn_nom_localitati l
where id_judet = v_id_judet
and inactiv = 0
and sters = 0
order by localitate)
where rn = 1;
exception
when NO_DATA_FOUND then
v_id_localitate := N_ID_LOCALITATE_DEFAULT;
v_id_judet := N_ID_JUD_DEFAULT;
v_id_tara := N_ID_TARA_DEFAULT;
end;
end;
-- 01.04.2026 - strip_diacritics la stocare adrese
v_strada := strip_diacritics(v_strada);
v_localitate := strip_diacritics(v_localitate);

View File

@@ -733,6 +733,47 @@ class TestAddrMatch:
roa = json.dumps({"strada": "STRADA VASILE GOLDIS", "numar": "19", "bloc": "", "scara": "", "apart": "", "localitate": "ALBA IULIA", "judet": "ALBA"})
assert _addr_match(gomag, roa) is False
def test_sectorul_in_city(self):
"""GoMag 'Municipiul București' matches ROA 'BUCURESTI SECTORUL 1'."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal 24", "city": "Municipiul București", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "localitate": "BUCURESTI SECTORUL 1", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_keyword_digit_gluing(self):
"""Keywords glued to digits like 'sc1', 'ap94' are stripped correctly."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal nr24 bl S2B sc1 ap94", "city": "Bucuresti", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_etaj_in_street(self):
"""GoMag address with 'etaj 7' matches ROA with etaj field."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal 24 Bl S2B Sc 1 Ap 94 Etaj 7", "city": "Bucuresti", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_billing_equals_shipping_short_circuit(self):
"""Short-circuit condition: billing == shipping → reuse addr_livr_id."""
from app.services.import_service import format_address_for_oracle
shipping_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
billing_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
addr_livr_id = 123
# Simulate the short-circuit condition
assert addr_livr_id and billing_addr == shipping_addr
def test_billing_differs_shipping_no_short_circuit(self):
"""When billing != shipping, short-circuit does NOT apply."""
from app.services.import_service import format_address_for_oracle
shipping_addr = format_address_for_oracle("Str. Victoriei 10", "Cluj", "Cluj")
billing_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
addr_livr_id = 123
assert not (addr_livr_id and billing_addr == shipping_addr)
class TestFormatAddressForOracle:
"""Tests for format_address_for_oracle city stripping."""

View File

@@ -0,0 +1,88 @@
-- cleanup_duplicate_addresses.sql
-- Diagnostic and cleanup script for duplicate Oracle partner addresses
-- Run on ROA Oracle database AFTER deploying 07.04.2026 PL/SQL fix
-- IMPORTANT: Review Step 2 output BEFORE running Step 3 COMMIT
-- =============================================================================
-- STEP 1: Diagnostic — find partners with duplicate addresses (same id_loc + strada)
-- =============================================================================
SELECT p.id_part,
p.denumire,
strip_diacritics(a.strada) as strada_norm,
a.id_loc,
COUNT(*) as nr_duplicate,
MIN(a.id_adresa) as keep_id,
MAX(a.id_adresa) as dup_id
FROM vadrese_parteneri a
JOIN syn_parteneri p ON p.id_part = a.id_part
WHERE a.id_loc IS NOT NULL
AND a.strada IS NOT NULL
GROUP BY p.id_part, p.denumire, strip_diacritics(a.strada), a.id_loc
HAVING COUNT(*) > 1
ORDER BY nr_duplicate DESC, p.denumire;
-- =============================================================================
-- STEP 2: FK references for each duplicate address
-- Review this before proceeding to Step 3
-- =============================================================================
SELECT 'LIVRARE' as tip,
c.numar_comanda,
c.id_adresa_livrare as id_adresa
FROM comenzi c
WHERE c.id_adresa_livrare IN (
SELECT MAX(a.id_adresa)
FROM vadrese_parteneri a
WHERE a.id_loc IS NOT NULL AND a.strada IS NOT NULL
GROUP BY a.id_part, strip_diacritics(a.strada), a.id_loc
HAVING COUNT(*) > 1
)
UNION ALL
SELECT 'FACTURARE',
c.numar_comanda,
c.id_adresa_facturare
FROM comenzi c
WHERE c.id_adresa_facturare IN (
SELECT MAX(a.id_adresa)
FROM vadrese_parteneri a
WHERE a.id_loc IS NOT NULL AND a.strada IS NOT NULL
GROUP BY a.id_part, strip_diacritics(a.strada), a.id_loc
HAVING COUNT(*) > 1
)
ORDER BY id_adresa;
-- =============================================================================
-- STEP 3: Consolidation — update FK references, then soft-delete duplicates
-- IMPORTANT: Run STEP 1 and 2 first. Manual COMMIT required after review.
-- =============================================================================
-- Update comenzi references from dup_id → keep_id
BEGIN
FOR rec IN (
SELECT MIN(id_adresa) as keep_id, MAX(id_adresa) as dup_id
FROM vadrese_parteneri
WHERE id_loc IS NOT NULL AND strada IS NOT NULL
GROUP BY id_part, strip_diacritics(strada), id_loc
HAVING COUNT(*) > 1
) LOOP
UPDATE comenzi SET id_adresa_livrare = rec.keep_id
WHERE id_adresa_livrare = rec.dup_id;
UPDATE comenzi SET id_adresa_facturare = rec.keep_id
WHERE id_adresa_facturare = rec.dup_id;
-- Soft-delete duplicate address
UPDATE vadrese_parteneri SET sters = 1
WHERE id_adresa = rec.dup_id;
DBMS_OUTPUT.PUT_LINE('Merged dup_id=' || rec.dup_id || ' → keep_id=' || rec.keep_id);
END LOOP;
END;
/
-- COMMIT; -- Uncomment after reviewing DBMS_OUTPUT
-- =============================================================================
-- STEP 4: Find addresses with principala=1 and strada IS NULL (empty principals)
-- =============================================================================
SELECT a.id_adresa, a.id_part, p.denumire, a.principala
FROM vadrese_parteneri a
JOIN syn_parteneri p ON p.id_part = a.id_part
WHERE a.principala = 1
AND (a.strada IS NULL OR TRIM(a.strada) = '')
AND a.sters = 0
ORDER BY p.denumire;