diff --git a/api/app/services/sync_service.py b/api/app/services/sync_service.py index a58b3eb..c83a098 100644 --- a/api/app/services/sync_service.py +++ b/api/app/services/sync_service.py @@ -38,13 +38,34 @@ def _addr_match(gomag_json, roa_json): s = (s or '').translate(import_service._DIACRITICS).upper() s = _ADDR_WORDS.sub('', s) return re.sub(r'[^A-Z0-9]', '', s) + def _soundex(s): + """SOUNDEX matching Oracle's implementation — for city fuzzy compare.""" + if not s: + return '' + _code = {'B':'1','F':'1','P':'1','V':'1', + 'C':'2','G':'2','J':'2','K':'2','Q':'2','S':'2','X':'2','Z':'2', + 'D':'3','T':'3','L':'4','M':'5','N':'5','R':'6'} + result = s[0] + prev = _code.get(s[0], '0') + for c in s[1:]: + if len(result) >= 4: + break + if c in 'AEIOU': + prev = '0' + elif c not in 'HW': + d = _code.get(c, '') + if d and d != prev: + result += d + if d: + prev = d + return result.ljust(4, '0') g_street = norm(g.get('address') or g.get('strada') or '') - r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('apart') or '') + (r.get('etaj') or '')) + r_street = norm((r.get('strada') or '') + (r.get('numar') or '') + (r.get('bloc') or '') + (r.get('scara') or '') + (r.get('etaj') or '') + (r.get('apart') or '')) g_city = norm(g.get('city') or g.get('localitate') or '') r_city = norm(r.get('localitate') or '') g_region = norm(g.get('region') or g.get('judet') or '') r_region = norm(r.get('judet') or '') - return g_street == r_street and g_city == r_city and g_region == r_region + return g_street == r_street and _soundex(g_city) == _soundex(r_city) and g_region == r_region # Sync state diff --git a/api/app/static/js/shared.js b/api/app/static/js/shared.js index 7861e3d..f1efda9 100644 --- a/api/app/static/js/shared.js +++ b/api/app/static/js/shared.js @@ -830,16 +830,33 @@ function addrMatch(gomag, roa) { function norm(s) { return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c) .toUpperCase() + .replace(/\bSECTORUL\s*\d*/g, '') .replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '') .replace(/[^A-Z0-9]/g, ''); } + function soundex(s) { + if (!s) return ''; + const code = {B:1,F:1,P:1,V:1,C:2,G:2,J:2,K:2,Q:2,S:2,X:2,Z:2, + D:3,T:3,L:4,M:5,N:5,R:6}; + let result = s[0], prev = code[s[0]] || 0; + for (let i = 1; i < s.length && result.length < 4; i++) { + const c = s[i]; + if ('AEIOU'.includes(c)) { prev = 0; } + else if (c !== 'H' && c !== 'W') { + const d = code[c]; + if (d && d !== prev) result += d; + if (d) prev = d; + } + } + return result.padEnd(4, '0'); + } const gStreet = norm(gomag.address || gomag.strada || ''); - const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.apart||'') + (roa.etaj||'')); + const rStreet = norm((roa.strada||'') + (roa.numar||'') + (roa.bloc||'') + (roa.scara||'') + (roa.etaj||'') + (roa.apart||'')); const gCity = norm(gomag.city || gomag.localitate || ''); const rCity = norm(roa.localitate || ''); const gRegion = norm(gomag.region || gomag.judet || ''); const rRegion = norm(roa.judet || ''); - return gStreet === rStreet && gCity === rCity && gRegion === rRegion; + return gStreet === rStreet && soundex(gCity) === soundex(rCity) && gRegion === rRegion; } function hasEfacturaRisk(roa) { diff --git a/api/app/templates/base.html b/api/app/templates/base.html index f8c526a..a4e0342 100644 --- a/api/app/templates/base.html +++ b/api/app/templates/base.html @@ -167,7 +167,7 @@ - +