fix(address): use SOUNDEX city matching and strip SECTORUL from city

Fixes false negatives where city spellings differ slightly (e.g.
"Sfântu Ilie" vs "SFINTU ILIE") or ROA stores "BUCURESTI SECTORUL 1"
while GoMag sends "Municipiul București". Both backend (_addr_match)
and frontend (addrMatch) now use identical SOUNDEX logic mirroring
Oracle's implementation.

Also fixes field order: etaj before apart in r_street concatenation.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-08 22:31:36 +00:00
parent f48c2d62c6
commit 5eba87976b
4 changed files with 64 additions and 6 deletions

View File

@@ -38,13 +38,34 @@ def _addr_match(gomag_json, roa_json):
s = (s or '').translate(import_service._DIACRITICS).upper() s = (s or '').translate(import_service._DIACRITICS).upper()
s = _ADDR_WORDS.sub('', s) s = _ADDR_WORDS.sub('', s)
return re.sub(r'[^A-Z0-9]', '', s) return re.sub(r'[^A-Z0-9]', '', s)
def _soundex(s):
"""SOUNDEX matching Oracle's implementation — for city fuzzy compare."""
if not s:
return ''
_code = {'B':'1','F':'1','P':'1','V':'1',
'C':'2','G':'2','J':'2','K':'2','Q':'2','S':'2','X':'2','Z':'2',
'D':'3','T':'3','L':'4','M':'5','N':'5','R':'6'}
result = s[0]
prev = _code.get(s[0], '0')
for c in s[1:]:
if len(result) >= 4:
break
if c in 'AEIOU':
prev = '0'
elif c not in 'HW':
d = _code.get(c, '')
if d and d != prev:
result += d
if d:
prev = d
return result.ljust(4, '0')
g_street = norm(g.get('address') or g.get('strada') or '') g_street = norm(g.get('address') or g.get('strada') or '')
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or '') + (r.get('etaj') or '')) r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('etaj') or '') + (r.get('apart') or ''))
g_city = norm(g.get('city') or g.get('localitate') or '') g_city = norm(g.get('city') or g.get('localitate') or '')
r_city = norm(r.get('localitate') or '') r_city = norm(r.get('localitate') or '')
g_region = norm(g.get('region') or g.get('judet') or '') g_region = norm(g.get('region') or g.get('judet') or '')
r_region = norm(r.get('judet') or '') r_region = norm(r.get('judet') or '')
return g_street == r_street and g_city == r_city and g_region == r_region return g_street == r_street and _soundex(g_city) == _soundex(r_city) and g_region == r_region
# Sync state # Sync state

View File

@@ -830,16 +830,33 @@ function addrMatch(gomag, roa) {
function norm(s) { function norm(s) {
return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c) return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c)
.toUpperCase() .toUpperCase()
.replace(/\bSECTORUL\s*\d*/g, '')
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '') .replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
.replace(/[^A-Z0-9]/g, ''); .replace(/[^A-Z0-9]/g, '');
} }
function soundex(s) {
if (!s) return '';
const code = {B:1,F:1,P:1,V:1,C:2,G:2,J:2,K:2,Q:2,S:2,X:2,Z:2,
D:3,T:3,L:4,M:5,N:5,R:6};
let result = s[0], prev = code[s[0]] || 0;
for (let i = 1; i < s.length && result.length < 4; i++) {
const c = s[i];
if ('AEIOU'.includes(c)) { prev = 0; }
else if (c !== 'H' && c !== 'W') {
const d = code[c];
if (d && d !== prev) result += d;
if (d) prev = d;
}
}
return result.padEnd(4, '0');
}
const gStreet = norm(gomag.address || gomag.strada || ''); const gStreet = norm(gomag.address || gomag.strada || '');
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||'') + (roa.etaj||'')); const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.etaj||'') + (roa.apart||''));
const gCity = norm(gomag.city || gomag.localitate || ''); const gCity = norm(gomag.city || gomag.localitate || '');
const rCity = norm(roa.localitate || ''); const rCity = norm(roa.localitate || '');
const gRegion = norm(gomag.region || gomag.judet || ''); const gRegion = norm(gomag.region || gomag.judet || '');
const rRegion = norm(roa.judet || ''); const rRegion = norm(roa.judet || '');
return gStreet === rStreet && gCity === rCity && gRegion === rRegion; return gStreet === rStreet && soundex(gCity) === soundex(rCity) && gRegion === rRegion;
} }
function hasEfacturaRisk(roa) { function hasEfacturaRisk(roa) {

View File

@@ -167,7 +167,7 @@
<script>window.ROOT_PATH = "{{ rp }}";</script> <script>window.ROOT_PATH = "{{ rp }}";</script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
<script src="{{ rp }}/static/js/shared.js?v=41"></script> <script src="{{ rp }}/static/js/shared.js?v=42"></script>
<script> <script>
// Dark mode toggle // Dark mode toggle
function toggleDarkMode() { function toggleDarkMode() {

View File

@@ -611,7 +611,7 @@ class TestAddrMatch:
"""GoMag address with 'etaj 7' matches ROA with etaj field.""" """GoMag address with 'etaj 7' matches ROA with etaj field."""
from app.services.sync_service import _addr_match from app.services.sync_service import _addr_match
import json import json
g = json.dumps({"address": "Bld Decebal 24 Bl S2B Sc 1 Ap 94 Etaj 7", "city": "Bucuresti", "region": "Bucuresti"}) g = json.dumps({"address": "Bld Decebal 24 Bl S2B Sc 1 Et 7 Ap 94", "city": "Bucuresti", "region": "Bucuresti"})
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"}) r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True assert _addr_match(g, r) is True
@@ -628,6 +628,26 @@ class TestAddrMatch:
r2 = json.dumps({"strada": "SOSEAUA TARII", "numar": "5", "localitate": "BUCURESTI", "judet": "BUCURESTI"}) r2 = json.dumps({"strada": "SOSEAUA TARII", "numar": "5", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
assert _addr_match(g2, r2) is True assert _addr_match(g2, r2) is True
def test_sectorul_digit_stripping(self):
"""'BUCURESTI SECTORUL 1' trebuie să egaleze 'Municipiul București'."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Bd. 1 Decembrie 1918 26", "city": "Municipiul București", "region": "Bucuresti"})
r = json.dumps({"strada": "BD 1 DECEMBRIE 1918", "numar": "26", "localitate": "BUCURESTI SECTORUL 1", "judet": "BUCURESTI"})
assert _addr_match(g, r) is True
def test_addr_match_soundex_city(self):
"""SOUNDEX city matching: SFANTU ILIE ≈ SFINTU ILIE (ca in Oracle L2)."""
from app.services.sync_service import _addr_match
import json
g = json.dumps({"address": "Str. Morii 208", "city": "Sfântu Ilie", "region": "Suceava"})
r = json.dumps({"strada": "MORII", "numar": "208", "localitate": "SFINTU ILIE", "judet": "SUCEAVA"})
assert _addr_match(g, r) is True
# Negative test: city complet diferit nu trebuie sa dea match
g2 = json.dumps({"address": "Str. Morii 208", "city": "Cluj", "region": "Cluj"})
r2 = json.dumps({"strada": "MORII", "numar": "208", "localitate": "TIMISOARA", "judet": "CLUJ"})
assert _addr_match(g2, r2) is False
def test_billing_equals_shipping_short_circuit(self): def test_billing_equals_shipping_short_circuit(self):
"""Short-circuit condition: billing == shipping → reuse addr_livr_id.""" """Short-circuit condition: billing == shipping → reuse addr_livr_id."""
from app.services.import_service import format_address_for_oracle from app.services.import_service import format_address_for_oracle