fix(address): use SOUNDEX city matching and strip SECTORUL from city
Fixes false negatives where city spellings differ slightly (e.g. "Sfântu Ilie" vs "SFINTU ILIE") or ROA stores "BUCURESTI SECTORUL 1" while GoMag sends "Municipiul București". Both backend (_addr_match) and frontend (addrMatch) now use identical SOUNDEX logic mirroring Oracle's implementation. Also fixes field order: etaj before apart in r_street concatenation. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -38,13 +38,34 @@ def _addr_match(gomag_json, roa_json):
|
|||||||
s = (s or '').translate(import_service._DIACRITICS).upper()
|
s = (s or '').translate(import_service._DIACRITICS).upper()
|
||||||
s = _ADDR_WORDS.sub('', s)
|
s = _ADDR_WORDS.sub('', s)
|
||||||
return re.sub(r'[^A-Z0-9]', '', s)
|
return re.sub(r'[^A-Z0-9]', '', s)
|
||||||
|
def _soundex(s):
|
||||||
|
"""SOUNDEX matching Oracle's implementation — for city fuzzy compare."""
|
||||||
|
if not s:
|
||||||
|
return ''
|
||||||
|
_code = {'B':'1','F':'1','P':'1','V':'1',
|
||||||
|
'C':'2','G':'2','J':'2','K':'2','Q':'2','S':'2','X':'2','Z':'2',
|
||||||
|
'D':'3','T':'3','L':'4','M':'5','N':'5','R':'6'}
|
||||||
|
result = s[0]
|
||||||
|
prev = _code.get(s[0], '0')
|
||||||
|
for c in s[1:]:
|
||||||
|
if len(result) >= 4:
|
||||||
|
break
|
||||||
|
if c in 'AEIOU':
|
||||||
|
prev = '0'
|
||||||
|
elif c not in 'HW':
|
||||||
|
d = _code.get(c, '')
|
||||||
|
if d and d != prev:
|
||||||
|
result += d
|
||||||
|
if d:
|
||||||
|
prev = d
|
||||||
|
return result.ljust(4, '0')
|
||||||
g_street = norm(g.get('address') or g.get('strada') or '')
|
g_street = norm(g.get('address') or g.get('strada') or '')
|
||||||
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or '') + (r.get('etaj') or ''))
|
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('etaj') or '') + (r.get('apart') or ''))
|
||||||
g_city = norm(g.get('city') or g.get('localitate') or '')
|
g_city = norm(g.get('city') or g.get('localitate') or '')
|
||||||
r_city = norm(r.get('localitate') or '')
|
r_city = norm(r.get('localitate') or '')
|
||||||
g_region = norm(g.get('region') or g.get('judet') or '')
|
g_region = norm(g.get('region') or g.get('judet') or '')
|
||||||
r_region = norm(r.get('judet') or '')
|
r_region = norm(r.get('judet') or '')
|
||||||
return g_street == r_street and g_city == r_city and g_region == r_region
|
return g_street == r_street and _soundex(g_city) == _soundex(r_city) and g_region == r_region
|
||||||
|
|
||||||
|
|
||||||
# Sync state
|
# Sync state
|
||||||
|
|||||||
@@ -830,16 +830,33 @@ function addrMatch(gomag, roa) {
|
|||||||
function norm(s) {
|
function norm(s) {
|
||||||
return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c)
|
return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c)
|
||||||
.toUpperCase()
|
.toUpperCase()
|
||||||
|
.replace(/\bSECTORUL\s*\d*/g, '')
|
||||||
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
|
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
|
||||||
.replace(/[^A-Z0-9]/g, '');
|
.replace(/[^A-Z0-9]/g, '');
|
||||||
}
|
}
|
||||||
|
function soundex(s) {
|
||||||
|
if (!s) return '';
|
||||||
|
const code = {B:1,F:1,P:1,V:1,C:2,G:2,J:2,K:2,Q:2,S:2,X:2,Z:2,
|
||||||
|
D:3,T:3,L:4,M:5,N:5,R:6};
|
||||||
|
let result = s[0], prev = code[s[0]] || 0;
|
||||||
|
for (let i = 1; i < s.length && result.length < 4; i++) {
|
||||||
|
const c = s[i];
|
||||||
|
if ('AEIOU'.includes(c)) { prev = 0; }
|
||||||
|
else if (c !== 'H' && c !== 'W') {
|
||||||
|
const d = code[c];
|
||||||
|
if (d && d !== prev) result += d;
|
||||||
|
if (d) prev = d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.padEnd(4, '0');
|
||||||
|
}
|
||||||
const gStreet = norm(gomag.address || gomag.strada || '');
|
const gStreet = norm(gomag.address || gomag.strada || '');
|
||||||
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||'') + (roa.etaj||''));
|
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.etaj||'') + (roa.apart||''));
|
||||||
const gCity = norm(gomag.city || gomag.localitate || '');
|
const gCity = norm(gomag.city || gomag.localitate || '');
|
||||||
const rCity = norm(roa.localitate || '');
|
const rCity = norm(roa.localitate || '');
|
||||||
const gRegion = norm(gomag.region || gomag.judet || '');
|
const gRegion = norm(gomag.region || gomag.judet || '');
|
||||||
const rRegion = norm(roa.judet || '');
|
const rRegion = norm(roa.judet || '');
|
||||||
return gStreet === rStreet && gCity === rCity && gRegion === rRegion;
|
return gStreet === rStreet && soundex(gCity) === soundex(rCity) && gRegion === rRegion;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hasEfacturaRisk(roa) {
|
function hasEfacturaRisk(roa) {
|
||||||
|
|||||||
@@ -167,7 +167,7 @@
|
|||||||
|
|
||||||
<script>window.ROOT_PATH = "{{ rp }}";</script>
|
<script>window.ROOT_PATH = "{{ rp }}";</script>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
|
||||||
<script src="{{ rp }}/static/js/shared.js?v=41"></script>
|
<script src="{{ rp }}/static/js/shared.js?v=42"></script>
|
||||||
<script>
|
<script>
|
||||||
// Dark mode toggle
|
// Dark mode toggle
|
||||||
function toggleDarkMode() {
|
function toggleDarkMode() {
|
||||||
|
|||||||
@@ -611,7 +611,7 @@ class TestAddrMatch:
|
|||||||
"""GoMag address with 'etaj 7' matches ROA with etaj field."""
|
"""GoMag address with 'etaj 7' matches ROA with etaj field."""
|
||||||
from app.services.sync_service import _addr_match
|
from app.services.sync_service import _addr_match
|
||||||
import json
|
import json
|
||||||
g = json.dumps({"address": "Bld Decebal 24 Bl S2B Sc 1 Ap 94 Etaj 7", "city": "Bucuresti", "region": "Bucuresti"})
|
g = json.dumps({"address": "Bld Decebal 24 Bl S2B Sc 1 Et 7 Ap 94", "city": "Bucuresti", "region": "Bucuresti"})
|
||||||
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
|
r = json.dumps({"strada": "BLD DECEBAL", "numar": "24", "bloc": "S2B", "scara": "1", "apart": "94", "etaj": "7", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
|
||||||
assert _addr_match(g, r) is True
|
assert _addr_match(g, r) is True
|
||||||
|
|
||||||
@@ -628,6 +628,26 @@ class TestAddrMatch:
|
|||||||
r2 = json.dumps({"strada": "SOSEAUA TARII", "numar": "5", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
|
r2 = json.dumps({"strada": "SOSEAUA TARII", "numar": "5", "localitate": "BUCURESTI", "judet": "BUCURESTI"})
|
||||||
assert _addr_match(g2, r2) is True
|
assert _addr_match(g2, r2) is True
|
||||||
|
|
||||||
|
def test_sectorul_digit_stripping(self):
|
||||||
|
"""'BUCURESTI SECTORUL 1' trebuie să egaleze 'Municipiul București'."""
|
||||||
|
from app.services.sync_service import _addr_match
|
||||||
|
import json
|
||||||
|
g = json.dumps({"address": "Bd. 1 Decembrie 1918 26", "city": "Municipiul București", "region": "Bucuresti"})
|
||||||
|
r = json.dumps({"strada": "BD 1 DECEMBRIE 1918", "numar": "26", "localitate": "BUCURESTI SECTORUL 1", "judet": "BUCURESTI"})
|
||||||
|
assert _addr_match(g, r) is True
|
||||||
|
|
||||||
|
def test_addr_match_soundex_city(self):
|
||||||
|
"""SOUNDEX city matching: SFANTU ILIE ≈ SFINTU ILIE (ca in Oracle L2)."""
|
||||||
|
from app.services.sync_service import _addr_match
|
||||||
|
import json
|
||||||
|
g = json.dumps({"address": "Str. Morii 208", "city": "Sfântu Ilie", "region": "Suceava"})
|
||||||
|
r = json.dumps({"strada": "MORII", "numar": "208", "localitate": "SFINTU ILIE", "judet": "SUCEAVA"})
|
||||||
|
assert _addr_match(g, r) is True
|
||||||
|
# Negative test: city complet diferit nu trebuie sa dea match
|
||||||
|
g2 = json.dumps({"address": "Str. Morii 208", "city": "Cluj", "region": "Cluj"})
|
||||||
|
r2 = json.dumps({"strada": "MORII", "numar": "208", "localitate": "TIMISOARA", "judet": "CLUJ"})
|
||||||
|
assert _addr_match(g2, r2) is False
|
||||||
|
|
||||||
def test_billing_equals_shipping_short_circuit(self):
|
def test_billing_equals_shipping_short_circuit(self):
|
||||||
"""Short-circuit condition: billing == shipping → reuse addr_livr_id."""
|
"""Short-circuit condition: billing == shipping → reuse addr_livr_id."""
|
||||||
from app.services.import_service import format_address_for_oracle
|
from app.services.import_service import format_address_for_oracle
|
||||||
|
|||||||
Reference in New Issue
Block a user