fix(address): extract scara/etaj/apartament from comma-less addresses

Oracle parser failed to extract sc/ap/et when GoMag addresses had no
commas. Added REGEXP_REPLACE to insert commas before address keywords
in v_strada before the comma-split, ensuring the token parser always
fires. Also added 5 Oracle integration tests calling
parseaza_adresa_semicolon directly, and improved diacritics handling
in addr_match (Python + JS).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-08 22:00:17 +00:00
parent f049b0bf12
commit f48c2d62c6
5 changed files with 122 additions and 5 deletions

View File

@@ -822,8 +822,13 @@ function fmtAddr(a) {
function addrMatch(gomag, roa) {
if (!gomag || !roa) return true; // can't compare
const _DIAC = {
'\u0103':'a','\u00e2':'a','\u00ee':'i','\u0219':'s','\u021b':'t',
'\u0102':'A','\u00c2':'A','\u00ce':'I','\u0218':'S','\u021a':'T',
'\u015f':'s','\u0163':'t','\u015e':'S','\u0162':'T'
};
function norm(s) {
return (s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '')
return (s || '').replace(/[\u0103\u00e2\u00ee\u0219\u021b\u0102\u00c2\u00ce\u0218\u021a\u015f\u0163\u015e\u0162]/g, c => _DIAC[c] || c)
.toUpperCase()
.replace(/\b(STR|STRADA|NR|NUMAR|NUMARUL|BL|BLOC|SC|SCARA|AP|APART|APARTAMENT|ET|ETAJ|COM|COMUNA|SAT|MUN|MUNICIPIUL|JUD|JUDETUL|CARTIER|PARTER|SECTOR|SECTORUL|ORAS)(?:\b|(?=\d))/g, '')
.replace(/[^A-Z0-9]/g, '');