fix(address): use SOUNDEX city matching and strip SECTORUL from city
Fixes false negatives where city spellings differ slightly (e.g. "Sfântu Ilie" vs "SFINTU ILIE") or ROA stores "BUCURESTI SECTORUL 1" while GoMag sends "Municipiul București". Both backend (_addr_match) and frontend (addrMatch) now use identical SOUNDEX logic mirroring Oracle's implementation. Also fixes field order: etaj before apart in r_street concatenation. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -38,13 +38,34 @@ def _addr_match(gomag_json, roa_json):
|
||||
s = (s or '').translate(import_service._DIACRITICS).upper()
|
||||
s = _ADDR_WORDS.sub('', s)
|
||||
return re.sub(r'[^A-Z0-9]', '', s)
|
||||
def _soundex(s):
|
||||
"""SOUNDEX matching Oracle's implementation — for city fuzzy compare."""
|
||||
if not s:
|
||||
return ''
|
||||
_code = {'B':'1','F':'1','P':'1','V':'1',
|
||||
'C':'2','G':'2','J':'2','K':'2','Q':'2','S':'2','X':'2','Z':'2',
|
||||
'D':'3','T':'3','L':'4','M':'5','N':'5','R':'6'}
|
||||
result = s[0]
|
||||
prev = _code.get(s[0], '0')
|
||||
for c in s[1:]:
|
||||
if len(result) >= 4:
|
||||
break
|
||||
if c in 'AEIOU':
|
||||
prev = '0'
|
||||
elif c not in 'HW':
|
||||
d = _code.get(c, '')
|
||||
if d and d != prev:
|
||||
result += d
|
||||
if d:
|
||||
prev = d
|
||||
return result.ljust(4, '0')
|
||||
g_street = norm(g.get('address') or g.get('strada') or '')
|
||||
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or '') + (r.get('etaj') or ''))
|
||||
r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('etaj') or '') + (r.get('apart') or ''))
|
||||
g_city = norm(g.get('city') or g.get('localitate') or '')
|
||||
r_city = norm(r.get('localitate') or '')
|
||||
g_region = norm(g.get('region') or g.get('judet') or '')
|
||||
r_region = norm(r.get('judet') or '')
|
||||
return g_street == r_street and g_city == r_city and g_region == r_region
|
||||
return g_street == r_street and _soundex(g_city) == _soundex(r_city) and g_region == r_region
|
||||
|
||||
|
||||
# Sync state
|
||||
|
||||
@@ -830,16 +830,33 @@ function addrMatch(gomag, roa) {
|
||||
function norm(s) {
|
||||
return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c)
|
||||
.toUpperCase()
|
||||
.replace(/\bSECTORUL\s*\d*/g, '')
|
||||
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
|
||||
.replace(/[^A-Z0-9]/g, '');
|
||||
}
|
||||
function soundex(s) {
|
||||
if (!s) return '';
|
||||
const code = {B:1,F:1,P:1,V:1,C:2,G:2,J:2,K:2,Q:2,S:2,X:2,Z:2,
|
||||
D:3,T:3,L:4,M:5,N:5,R:6};
|
||||
let result = s[0], prev = code[s[0]] || 0;
|
||||
for (let i = 1; i < s.length && result.length < 4; i++) {
|
||||
const c = s[i];
|
||||
if ('AEIOU'.includes(c)) { prev = 0; }
|
||||
else if (c !== 'H' && c !== 'W') {
|
||||
const d = code[c];
|
||||
if (d && d !== prev) result += d;
|
||||
if (d) prev = d;
|
||||
}
|
||||
}
|
||||
return result.padEnd(4, '0');
|
||||
}
|
||||
const gStreet = norm(gomag.address || gomag.strada || '');
|
||||
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||'') + (roa.etaj||''));
|
||||
const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.etaj||'') + (roa.apart||''));
|
||||
const gCity = norm(gomag.city || gomag.localitate || '');
|
||||
const rCity = norm(roa.localitate || '');
|
||||
const gRegion = norm(gomag.region || gomag.judet || '');
|
||||
const rRegion = norm(roa.judet || '');
|
||||
return gStreet === rStreet && gCity === rCity && gRegion === rRegion;
|
||||
return gStreet === rStreet && soundex(gCity) === soundex(rCity) && gRegion === rRegion;
|
||||
}
|
||||
|
||||
function hasEfacturaRisk(roa) {
|
||||
|
||||
@@ -167,7 +167,7 @@
|
||||
|
||||
<script>window.ROOT_PATH = "{{ rp }}";</script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<script src="{{ rp }}/static/js/shared.js?v=41"></script>
|
||||
<script src="{{ rp }}/static/js/shared.js?v=42"></script>
|
||||
<script>
|
||||
// Dark mode toggle
|
||||
function toggleDarkMode() {
|
||||
|
||||
Reference in New Issue
Block a user