fix(address): normalize SECTORUL + etaj in addr_match, fix Oracle duplicate addrs

- _addr_match / addrMatch: add SECTORUL\s*\d* branch to strip sector
  number; add (?:\b|(?=\d)) to catch glued keywords (sc1, ap94);
  include etaj field in rStreet concat
- database.py: replace duplicate addr_match impl with import from sync_service
- import_service.py: short-circuit billing addr Oracle call when
  billing == shipping (avoids duplicate address creation)
- PL/SQL: normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before
  TIER 1; resolve id_localitate before search; TIER 1 now matches on
  id_loc instead of text locality
- Add scripts/cleanup_duplicate_addresses.sql for manual prod cleanup
- Add 5 new tests: sectorul, keyword+digit gluing, etaj, short-circuit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-07 13:48:49 +00:00
parent 5b4b317636
commit 0f817b2130
8 changed files with 214 additions and 106 deletions

View File

@@ -1,12 +1,8 @@
import oracledb import oracledb
import aiosqlite import aiosqlite
import sqlite3 import sqlite3
import json
import re
import unicodedata
import logging import logging
import os import os
from pathlib import Path
from .config import settings from .config import settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -375,33 +371,7 @@ def init_sqlite():
def _backfill_address_mismatch(conn): def _backfill_address_mismatch(conn):
"""Recompute address_mismatch from stored address JSON for all orders.""" """Recompute address_mismatch from stored address JSON for all orders."""
_ADDR_WORDS = re.compile( from .services.sync_service import _addr_match
r'\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b'
)
def norm(s):
s = unicodedata.normalize('NFD', s or '')
s = re.sub(r'[\u0300-\u036f]', '', s).upper()
s = _ADDR_WORDS.sub('', s)
return re.sub(r'[^A-Z0-9]', '', s)
def addr_match(gomag_json, roa_json):
if not gomag_json or not roa_json:
return True
try:
g = json.loads(gomag_json) if isinstance(gomag_json, str) else gomag_json
r = json.loads(roa_json) if isinstance(roa_json, str) else roa_json
except (json.JSONDecodeError, TypeError):
return True
g_street = norm(g.get('address') or g.get('strada') or '')
r_street = norm((r.get('strada') or '') + (r.get('numar') or ''))
g_city = norm(g.get('city') or g.get('localitate') or '')
r_city = norm(r.get('localitate') or '')
g_region = norm(g.get('region') or g.get('judet') or '')
r_region = norm(r.get('judet') or '')
return g_street == r_street and g_city == r_city and g_region == r_region
try: try:
rows = conn.execute(""" rows = conn.execute("""
SELECT order_number, adresa_livrare_gomag, adresa_livrare_roa, SELECT order_number, adresa_livrare_gomag, adresa_livrare_roa,
@@ -411,8 +381,8 @@ def _backfill_address_mismatch(conn):
""").fetchall() """).fetchall()
updated = 0 updated = 0
for r in rows: for r in rows:
livr_ok = addr_match(r[1], r[2]) livr_ok = _addr_match(r[1], r[2])
fact_ok = addr_match(r[3], r[4]) fact_ok = _addr_match(r[3], r[4])
new_val = 1 if (not livr_ok or not fact_ok) else 0 new_val = 1 if (not livr_ok or not fact_ok) else 0
conn.execute( conn.execute(
"UPDATE orders SET address_mismatch = ? WHERE order_number = ?", "UPDATE orders SET address_mismatch = ? WHERE order_number = ?",

View File

@@ -338,28 +338,32 @@ def import_single_order(order, id_pol: int = None, id_sectie: int = None, app_se
# Different person: use shipping address for BOTH billing and shipping in ROA # Different person: use shipping address for BOTH billing and shipping in ROA
addr_fact_id = addr_livr_id addr_fact_id = addr_livr_id
else: else:
# Same person: use billing address as-is # Same person: compute billing addr, short-circuit if identical to shipping
id_adresa_fact = cur.var(oracledb.DB_TYPE_NUMBER)
billing_addr = format_address_for_oracle( billing_addr = format_address_for_oracle(
order.billing.address, order.billing.city, order.billing.region order.billing.address, order.billing.city, order.billing.region
) )
cur.callproc("PACK_IMPORT_PARTENERI.cauta_sau_creeaza_adresa", [ if addr_livr_id and order.shipping and billing_addr == shipping_addr:
partner_id, billing_addr, # billing = shipping: reuse addr_livr_id to avoid duplicate Oracle address
order.billing.phone or "", addr_fact_id = addr_livr_id
order.billing.email or "", else:
id_adresa_fact id_adresa_fact = cur.var(oracledb.DB_TYPE_NUMBER)
]) cur.callproc("PACK_IMPORT_PARTENERI.cauta_sau_creeaza_adresa", [
addr_fact_id = id_adresa_fact.getvalue() partner_id, billing_addr,
order.billing.phone or "",
order.billing.email or "",
id_adresa_fact
])
addr_fact_id = id_adresa_fact.getvalue()
if addr_fact_id is None: if addr_fact_id is None:
cur.execute("SELECT PACK_IMPORT_PARTENERI.get_last_error FROM dual") cur.execute("SELECT PACK_IMPORT_PARTENERI.get_last_error FROM dual")
plsql_err = cur.fetchone()[0] plsql_err = cur.fetchone()[0]
err_msg = f"Billing address creation failed for partner {partner_id}" err_msg = f"Billing address creation failed for partner {partner_id}"
if plsql_err: if plsql_err:
err_msg += f": {plsql_err}" err_msg += f": {plsql_err}"
logger.error(f"Order {order_number}: {err_msg}") logger.error(f"Order {order_number}: {err_msg}")
result["error"] = err_msg result["error"] = err_msg
return result return result
if addr_fact_id is not None: if addr_fact_id is not None:
result["id_adresa_facturare"] = int(addr_fact_id) result["id_adresa_facturare"] = int(addr_fact_id)

View File

@@ -31,8 +31,9 @@ def _addr_match(gomag_json, roa_json):
except (json.JSONDecodeError, TypeError): except (json.JSONDecodeError, TypeError):
return True return True
_ADDR_WORDS = re.compile( _ADDR_WORDS = re.compile(
r'\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|' r'\bSECTORUL\s*\d*'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b' r'|\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|'
r'ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))'
) )
def norm(s): def norm(s):
s = unicodedata.normalize('NFD', s or '') s = unicodedata.normalize('NFD', s or '')
@@ -40,7 +41,7 @@ def _addr_match(gomag_json, roa_json):
s = _ADDR_WORDS.sub('', s) s = _ADDR_WORDS.sub('', s)
return re.sub(r'[^A-Z0-9]', '', s) return re.sub(r'[^A-Z0-9]', '', s)
g_street = norm(g.get('address') or g.get('strada') or '') g_street = norm(g.get('address') or g.get('strada') or '')
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or '')) r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or '') + (r.get('etaj') or ''))
g_city = norm(g.get('city') or g.get('localitate') or '') g_city = norm(g.get('city') or g.get('localitate') or '')
r_city = norm(r.get('localitate') or '') r_city = norm(r.get('localitate') or '')
g_region = norm(g.get('region') or g.get('judet') or '') g_region = norm(g.get('region') or g.get('judet') or '')

View File

@@ -850,11 +850,11 @@ function addrMatch(gomag, roa) {
function norm(s) { function norm(s) {
return (s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '') return (s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '')
.toUpperCase() .toUpperCase()
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|ORAS)\b/g, '') .replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
.replace(/[^A-Z0-9]/g, ''); .replace(/[^A-Z0-9]/g, '');
} }
const gStreet = norm(gomag.address || gomag.strada || ''); const gStreet = norm(gomag.address || gomag.strada || '');
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||'')); const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||'') + (roa.etaj||''));
const gCity = norm(gomag.city || gomag.localitate || ''); const gCity = norm(gomag.city || gomag.localitate || '');
const rCity = norm(roa.localitate || ''); const rCity = norm(roa.localitate || '');
const gRegion = norm(gomag.region || gomag.judet || ''); const gRegion = norm(gomag.region || gomag.judet || '');

View File

@@ -168,7 +168,7 @@
<script>window.ROOT_PATH = "{{ rp }}";</script> <script>window.ROOT_PATH = "{{ rp }}";</script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
<script src="{{ rp }}/static/js/shared.js?v=32"></script> <script src="{{ rp }}/static/js/shared.js?v=33"></script>
<script> <script>
// Dark mode toggle // Dark mode toggle
function toggleDarkMode() { function toggleDarkMode() {

View File

@@ -10,6 +10,7 @@ CREATE OR REPLACE PACKAGE PACK_IMPORT_PARTENERI AS
-- 06.04.2026 - fix strip_diacritics: UNISTR encoding-safe (TRANSLATE cu UTF-8 literal se corupea pe Windows) -- 06.04.2026 - fix strip_diacritics: UNISTR encoding-safe (TRANSLATE cu UTF-8 literal se corupea pe Windows)
-- 06.04.2026 - fix TIER 1: strip_diacritics si pe localitate (nu doar strada) -- 06.04.2026 - fix TIER 1: strip_diacritics si pe localitate (nu doar strada)
-- 07.04.2026 - fix parser adrese: inserare virgule inaintea keywords, tokeni lipiti (Ap78), strip localitate din strada -- 07.04.2026 - fix parser adrese: inserare virgule inaintea keywords, tokeni lipiti (Ap78), strip localitate din strada
-- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc)
-- ==================================================================== -- ====================================================================
-- CONSTANTS -- CONSTANTS
@@ -973,16 +974,63 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS
v_apart, v_apart,
v_etaj); v_etaj);
-- 01.04.2026 - cautare adresa pe strada + diacritics + id_loc validation -- 07.04.2026 - normalize MUNICIPIUL BUCURESTI → BUCURESTI SECTORUL X before TIER 1
-- 06.04.2026 - strip_diacritics si pe localitate (fix: 'FĂLTICENI' vs 'FALTICENI') IF UPPER(TRIM(v_localitate)) IN ('MUNICIPIUL BUCURESTI', 'MUN BUCURESTI', 'MUN. BUCURESTI', 'BUCURESTI') THEN
-- TIER 1: county + city + street (all diacritics normalized) + valid id_loc IF v_sector IS NOT NULL AND TRIM(v_sector) IS NOT NULL THEN
v_localitate := 'BUCURESTI SECTORUL ' || TRIM(v_sector);
END IF;
END IF;
-- Resolve id_judet inainte de TIER 1
BEGIN
SELECT id_judet INTO v_id_judet
FROM syn_nom_judete
WHERE judet = v_judet
AND sters = 0;
EXCEPTION
WHEN NO_DATA_FOUND THEN v_id_judet := N_ID_JUD_DEFAULT;
END;
-- Resolve id_localitate inainte de TIER 1
BEGIN
SELECT id_loc, id_judet, id_tara
INTO v_id_localitate, v_id_judet, v_id_tara
FROM (SELECT id_loc, id_judet, id_tara, rownum rn
FROM syn_nom_localitati l
WHERE id_judet = v_id_judet
AND strip_diacritics(localitate) = strip_diacritics(v_localitate)
AND inactiv = 0
AND sters = 0
ORDER BY localitate)
WHERE rn = 1;
EXCEPTION
WHEN NO_DATA_FOUND THEN
BEGIN
SELECT id_loc, id_judet, id_tara
INTO v_id_localitate, v_id_judet, v_id_tara
FROM (SELECT id_loc, id_judet, id_tara, rownum rn
FROM syn_nom_localitati l
WHERE id_judet = v_id_judet
AND inactiv = 0
AND sters = 0
ORDER BY localitate)
WHERE rn = 1;
EXCEPTION
WHEN NO_DATA_FOUND THEN
v_id_localitate := N_ID_LOCALITATE_DEFAULT;
v_id_judet := N_ID_JUD_DEFAULT;
v_id_tara := N_ID_TARA_DEFAULT;
END;
END;
-- 07.04.2026 - fix duplicate: normalize localitate + resolve id_localitate inainte de TIER 1 (match pe id_loc)
-- TIER 1: match pe id_loc + strada (evita duplicate MUNICIPIUL BUCURESTI vs BUCURESTI SECTORUL X)
begin begin
select id_adresa into p_id_adresa from ( select id_adresa into p_id_adresa from (
select id_adresa select id_adresa
from vadrese_parteneri from vadrese_parteneri
where id_part = p_id_part where id_part = p_id_part
and strip_diacritics(judet) = strip_diacritics(v_judet) and id_loc = v_id_localitate
and strip_diacritics(localitate) = strip_diacritics(v_localitate)
and strip_diacritics(strada) = strip_diacritics(v_strada) and strip_diacritics(strada) = strip_diacritics(v_strada)
and id_loc IS NOT NULL and id_loc IS NOT NULL
order by principala desc, id_adresa desc order by principala desc, id_adresa desc
@@ -993,50 +1041,6 @@ CREATE OR REPLACE PACKAGE BODY PACK_IMPORT_PARTENERI AS
-- Adaug o adresa -- Adaug o adresa
if p_id_adresa is null then if p_id_adresa is null then
-- caut judetul
begin
select id_judet
into v_id_judet
from syn_nom_judete
where judet = v_judet
and sters = 0;
exception
when NO_DATA_FOUND then
v_id_judet := N_ID_JUD_DEFAULT;
end;
-- caut localitatea (strip_diacritics pe ambele parti — fix encoding mismatch)
begin
select id_loc, id_judet, id_tara
into v_id_localitate, v_id_judet, v_id_tara
from (select id_loc, id_judet, id_tara, rownum rn
from syn_nom_localitati l
where id_judet = v_id_judet
and strip_diacritics(localitate) = strip_diacritics(v_localitate)
and inactiv = 0
and sters = 0
order by localitate)
where rn = 1;
exception
when NO_DATA_FOUND then
begin
select id_loc, id_judet, id_tara
into v_id_localitate, v_id_judet, v_id_tara
from (select id_loc, id_judet, id_tara, rownum rn
from syn_nom_localitati l
where id_judet = v_id_judet
and inactiv = 0
and sters = 0
order by localitate)
where rn = 1;
exception
when NO_DATA_FOUND then
v_id_localitate := N_ID_LOCALITATE_DEFAULT;
v_id_judet := N_ID_JUD_DEFAULT;
v_id_tara := N_ID_TARA_DEFAULT;
end;
end;
-- 01.04.2026 - strip_diacritics la stocare adrese -- 01.04.2026 - strip_diacritics la stocare adrese
v_strada := strip_diacritics(v_strada); v_strada := strip_diacritics(v_strada);
v_localitate := strip_diacritics(v_localitate); v_localitate := strip_diacritics(v_localitate);

View File

@@ -733,6 +733,47 @@ class TestAddrMatch:
roa = json.dumps({"strada": "STRADA VASILE GOLDIS", "numar": "19", "bloc": "", "scara": "", "apart": "", "localitate": "ALBA IULIA", "judet": "ALBA"}) roa = json.dumps({"strada": "STRADA VASILE GOLDIS", "numar": "19", "bloc": "", "scara": "", "apart": "", "localitate": "ALBA IULIA", "judet": "ALBA"})
assert _addr_match(gomag, roa) is False assert _addr_match(gomag, roa) is False
def test_sectorul_in_city(self):
"""GoMag 'Municipiul București' matches ROA 'BUCURESTI SECTORUL 1'."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal 24", "city": "Municipiul București", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "localitate": "BUCURESTI SECTORUL 1", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_keyword_digit_gluing(self):
"""Keywords glued to digits like 'sc1', 'ap94' are stripped correctly."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal nr24 bl S2B sc1 ap94", "city": "Bucuresti", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_etaj_in_street(self):
"""GoMag address with 'etaj 7' matches ROA with etaj field."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bld Decebal 24 Bl S2B Sc 1 Ap 94 Etaj 7", "city": "Bucuresti", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_billing_equals_shipping_short_circuit(self):
"""Short-circuit condition: billing == shipping → reuse addr_livr_id."""
from app.services.import_service import format_address_for_oracle
shipping_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
billing_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
addr_livr_id = 123
# Simulate the short-circuit condition
assert addr_livr_id and billing_addr == shipping_addr
def test_billing_differs_shipping_no_short_circuit(self):
"""When billing != shipping, short-circuit does NOT apply."""
from app.services.import_service import format_address_for_oracle
shipping_addr = format_address_for_oracle("Str. Victoriei 10", "Cluj", "Cluj")
billing_addr = format_address_for_oracle("Bld Decebal 24", "Bucuresti", "Bucuresti")
addr_livr_id = 123
assert not (addr_livr_id and billing_addr == shipping_addr)
class TestFormatAddressForOracle: class TestFormatAddressForOracle:
"""Tests for format_address_for_oracle city stripping.""" """Tests for format_address_for_oracle city stripping."""

View File

@@ -0,0 +1,88 @@
-- cleanup_duplicate_addresses.sql
-- Diagnostic and cleanup script for duplicate Oracle partner addresses
-- Run on ROA Oracle database AFTER deploying 07.04.2026 PL/SQL fix
-- IMPORTANT: Review Step 2 output BEFORE running Step 3 COMMIT
-- =============================================================================
-- STEP 1: Diagnostic — find partners with duplicate addresses (same id_loc + strada)
-- =============================================================================
SELECT p.id_part,
p.denumire,
strip_diacritics(a.strada) as strada_norm,
a.id_loc,
COUNT(*) as nr_duplicate,
MIN(a.id_adresa) as keep_id,
MAX(a.id_adresa) as dup_id
FROM vadrese_parteneri a
JOIN syn_parteneri p ON p.id_part = a.id_part
WHERE a.id_loc IS NOT NULL
AND a.strada IS NOT NULL
GROUP BY p.id_part, p.denumire, strip_diacritics(a.strada), a.id_loc
HAVING COUNT(*) > 1
ORDER BY nr_duplicate DESC, p.denumire;
-- =============================================================================
-- STEP 2: FK references for each duplicate address
-- Review this before proceeding to Step 3
-- =============================================================================
SELECT 'LIVRARE' as tip,
c.numar_comanda,
c.id_adresa_livrare as id_adresa
FROM comenzi c
WHERE c.id_adresa_livrare IN (
SELECT MAX(a.id_adresa)
FROM vadrese_parteneri a
WHERE a.id_loc IS NOT NULL AND a.strada IS NOT NULL
GROUP BY a.id_part, strip_diacritics(a.strada), a.id_loc
HAVING COUNT(*) > 1
)
UNION ALL
SELECT 'FACTURARE',
c.numar_comanda,
c.id_adresa_facturare
FROM comenzi c
WHERE c.id_adresa_facturare IN (
SELECT MAX(a.id_adresa)
FROM vadrese_parteneri a
WHERE a.id_loc IS NOT NULL AND a.strada IS NOT NULL
GROUP BY a.id_part, strip_diacritics(a.strada), a.id_loc
HAVING COUNT(*) > 1
)
ORDER BY id_adresa;
-- =============================================================================
-- STEP 3: Consolidation — update FK references, then soft-delete duplicates
-- IMPORTANT: Run STEP 1 and 2 first. Manual COMMIT required after review.
-- =============================================================================
-- Update comenzi references from dup_id → keep_id
BEGIN
FOR rec IN (
SELECT MIN(id_adresa) as keep_id, MAX(id_adresa) as dup_id
FROM vadrese_parteneri
WHERE id_loc IS NOT NULL AND strada IS NOT NULL
GROUP BY id_part, strip_diacritics(strada), id_loc
HAVING COUNT(*) > 1
) LOOP
UPDATE comenzi SET id_adresa_livrare = rec.keep_id
WHERE id_adresa_livrare = rec.dup_id;
UPDATE comenzi SET id_adresa_facturare = rec.keep_id
WHERE id_adresa_facturare = rec.dup_id;
-- Soft-delete duplicate address
UPDATE vadrese_parteneri SET sters = 1
WHERE id_adresa = rec.dup_id;
DBMS_OUTPUT.PUT_LINE('Merged dup_id=' || rec.dup_id || ' → keep_id=' || rec.keep_id);
END LOOP;
END;
/
-- COMMIT; -- Uncomment after reviewing DBMS_OUTPUT
-- =============================================================================
-- STEP 4: Find addresses with principala=1 and strada IS NULL (empty principals)
-- =============================================================================
SELECT a.id_adresa, a.id_part, p.denumire, a.principala
FROM vadrese_parteneri a
JOIN syn_parteneri p ON p.id_part = a.id_part
WHERE a.principala = 1
AND (a.strada IS NULL OR TRIM(a.strada) = '')
AND a.sters = 0
ORDER BY p.denumire;