*!* CLEAR *!* ?strtranx([ana are 1234567890.1234 lei], [\s\d+\.\d\s], [=TRANSFORM($1, "999 999 999 999.99")]) *?strtranx([ana are <<1234567890.1234>> lei], [<<], [=TRANSFORM($1, "AA")]) *!* RETURN CLEAR *-- http://www.cornerstonenw.com/article_id_parsing3.htm SET STEP ON lcSourceString = [ana are mere 123,345 678 ad] LOCAL laItems[10] lnResults = GetRegExpAll(lcSourceString, '\d+', @laItems) SET STEP ON RETURN strTest = [ab cd2""$$£] ?strTest ?StripNonAscii(strTest) *-- replace non a-z09 with "" case-insensitive ? strtranx([Ab ra /ca\d&abr'a],"[^a-z0-9]",[],1,,1) RETURN *-- count words ? OccursRegExp("\b(\w+)\b", [the then quick quick brown fox fox]) && prints 7 *-- count repeatedwords ? OccursRegExp("\b(\w+)\s\1\b", [the then quick quick brown fox fox]) && prints 2 *-- replace first and second lower-case "a" ? strtranx([Abracadabra],[a],[*],1,2) && prints Abr*c*dabra *-- replace first and second "a" case-insensitive ? strtranx([Abracadabra],[a],[*],1,2,1) && prints *br*cadabra *-- locate the replacement targets ? strtranx([Abracadabra],[^a|a$],[*],1,2,0) && Abracadabr* ? strtranx([Abracadabra],[^a|a$],[*],1,2,1) && *bracadabr* lcText = "The cost, is $123,345.75. " *-- convert the commas lcText = strtranx( m.lcText, "(\d{1,3})\,(\d{1,}) ","$1 $2" ) *-- convert the decimals ? strtranx( m.lcText, "(\d{1,3})\.(\d{1,})", "$1,$2" ) ** prints "The cost, is $123 345,75." *-- add 1 to all digits ? strtranx( [ABC123], "(\d)", [=TRANSFORM(VAL($1)+1)] ) ** prints "ABC234" *-- convert all dates to long format ? strtranx( [the date is: 7/18/2004 ] , [(\d{1,2}/\d{1,2}/\d{4})], [=TRANSFORM(CTOD($1),"@YL")]) ** prints "the date is: Sunday, July 18, 2004" *---------------------------------------------------------- FUNCTION StrtranRegExp( tcSourceString, tcPattern, tcReplace ) LOCAL loRE loRE = CREATEOBJECT("vbscript.regexp") WITH loRE .PATTERN = tcPattern .GLOBAL = .T. .multiline = .T. RETURN .REPLACE( tcSourceString , tcReplace ) ENDWITH ENDFUNC *---------------------------------------------------------- FUNCTION OccursRegExp(tcPattern, tcText) LOCAL loRE, loMatches, lnResult loRE = CREATEOBJECT("vbscript.regexp") WITH loRE .PATTERN = m.tcPattern .GLOBAL = .T. .multiline = .T. loMatches = loRE.Execute( m.tcText ) lnResult = loMatches.COUNT loMatches = NULL ENDWITH RETURN m.lnResult ENDFUNC *---------------------------------------------------------- FUNCTION strtranx(tcSearched, ; tcSearchFor, ; tcReplacement, ; tnStart, tnNumber, ; tnFlag ) *-- the final version of the UDF LOCAL loRE, lcText, lnShift, lcCommand,; loMatch, loMatches, lnI, lnK, lcSubMatch,; llevaluate, lcMatchDelim, lcReplaceText, lcReplacement,; lnStart, lnNumber, loCol, lcKey IF EMPTY(NVL(tcSearched, '')) RETURN NVL(tcSearched, '') ENDIF loRE = CREATEOBJECT("vbscript.regexp") WITH loRE .PATTERN = m.tcSearchFor .GLOBAL = .T. .multiline = .T. .ignorecase = IIF(VARTYPE(m.tnFlag)=[N],m.tnFlag = 1,.F.) ENDWITH lcReplacement = m.tcReplacement *--- are we evaluating? IF m.lcReplacement = [=] llevaluate = .T. lcReplacement = SUBSTR( m.lcReplacement, 2 ) ENDIF IF VARTYPE( m.tnStart )=[N] lnStart = m.tnStart ELSE lnStart = 1 ENDIF IF VARTYPE( m.tnNumber) =[N] lnNumber = m.tnNumber ELSE lnNumber = -1 ENDIF IF m.lnStart>1 OR m.lnNumber#-1 OR m.llevaluate lcText = m.tcSearched lnShift = 1 loMatches = loRE.execute( m.lcText ) loCol = CREATEOBJECT([collection]) lnNumber = IIF( lnNumber=-1,loMatches.COUNT,MIN(lnNumber,loMatches.COUNT)) FOR lnK = m.lnStart TO m.lnNumber loMatch = loMatches.ITEM(m.lnK-1) && zero based lcCommand = m.lcReplacement FOR lnI= 1 TO loMatch.submatches.COUNT lcSubMatch = loMatch.submatches(m.lnI-1) && zero based IF m.llevaluate * "escape" the string we are about to use in an evaluation. * it is important to escape due to possible delim chars (like ", ' etc) * malicious content, or VFP line-length violations. lcKey = ALLTRIM(TRANSFORM(m.lnK)+[_]+TRANSFORM(m.lnI)) loCol.ADD( m.lcSubMatch, m.lcKey ) lcSubMatch = [loCol.item(']+m.lcKey+[')] ENDIF lcCommand = STRTRAN( m.lcCommand, "$" + ALLTRIM( STR( m.lnI ) ) , m.lcSubMatch) ENDFOR IF m.llevaluate TRY lcReplaceText = EVALUATE( m.lcCommand ) CATCH TO loErr lcReplaceText="[[ERROR #"+TRANSFORM(loErr.ERRORNO)+[ ]+loErr.MESSAGE+"]]" ENDTRY ELSE lcReplaceText = m.lcCommand ENDIF lcText = STUFF( m.lcText, loMatch.FirstIndex + m.lnShift, m.loMatch.LENGTH, m.lcReplaceText ) lnShift = m.lnShift + LEN( m.lcReplaceText ) - m.loMatch.LENGTH ENDFOR ELSE lcText = loRE.REPLACE( m.tcSearched, m.tcReplacement ) ENDIF RETURN m.lcText ENDFUNC *===================== FUNCTION StripNonAscii LPARAMETERS tcSourceString, tcReplaceString TEXT TO lcPattern NOSHOW [^A-Za-z 0-9 \.,\?'""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~] ENDTEXT lcReplace = IIF(TYPE('tcReplaceString') <> 'C', "", tcReplaceString) lcReturn = strtranx( m.tcSourceString, m.lcPattern, m.lcReplace,1,,1) RETURN m.lcReturn ENDFUNC && StripNonAscii *===================== * Intoarce un text care se potriveste cu pattern-ul * Ex. Localitatea din textul: STRADA NR LOCALITATE *===================== FUNCTION GetRegExp LPARAMETERS tcSourceString, tcPattern, tnOccurence * tcSourceString: Bld. Stefan cel Mare 14 Tirgu Neamt * tcPattern: [A-Za-z\s]+$ = (caracter sau spatiu) de cel putin o data la sfarsitul liniei = Tirgu Neamt * tcPattern: \d+[A-Za-z\s]+$ = oricate cifre (caracter sau spatiu) de cel putin o data la sfarsitul liniei = 14 Tirgu Neamt LOCAL loRE, loMatches, lcResult, lnOccurence lcResult = '' lnOccurence = IIF(!EMPTY(m.tnOccurence) and TYPE('tnOccurence') = 'N', m.tnOccurence, 1) loRE = CREATEOBJECT("vbscript.regexp") WITH loRE .PATTERN = m.tcPattern .GLOBAL = .T. .multiline = .T. loMatches = loRE.Execute( m.tcSourceString) IF loMatches.COUNT >= m.lnOccurence lcResult = loMatches.Item(m.lnOccurence - 1).Value ENDIF loMatches = NULL ENDWITH RETURN m.lcResult ENDFUNC && GetRegExp *===================== * Intoarce numarul potrivirilor si un parametru OUT array sau lista de numere facturi separate prin "," * Ex. Toate numerele dintr-un text lnMatches = GetRegExpAll(lcSourceString, '\d+', @loMatches) *===================== FUNCTION GetRegExpAll LPARAMETERS tcSourceString, tcPattern, taItems * tcSourceString: Bld. Stefan cel Mare 14 Tirgu Neamt * tcPattern: [A-Za-z\s]+$ = (caracter sau spatiu) de cel putin o data la sfarsitul liniei = Tirgu Neamt * tcPattern: \d+[A-Za-z\s]+$ = oricate cifre (caracter sau spatiu) de cel putin o data la sfarsitul liniei = 14 Tirgu Neamt * taItems "A">taItems : array cu rezultatele (OUT) taItems[1..Result] sau taItems "C" lista facturi separate prin virgula LOCAL loRE, loMatches, lnResults, lnItem IF TYPE('taItems') = "A" EXTERNAL ARRAY taItems ELSE taItems = "" ENDIF lnResult = 0 loRE = CREATEOBJECT("vbscript.regexp") WITH loRE .PATTERN = m.tcPattern .GLOBAL = .T. .multiline = .T. loMatches = loRE.Execute( m.tcSourceString) lnResults = loMatches.COUNT IF TYPE('taItems') = "A" DIMENSION taItems[m.lnResult] FOR lnItem = 1 TO m.lnResult taItems[m.lnItem] = loMatches.Item(m.lnItem-1).Value ENDFOR ELSE FOR lnItem = 1 TO m.lnResults taItems = taItems + IIF(m.lnItem > 1, ",", "") + loMatches.Item(m.lnItem-1).Value ENDFOR ENDIF loMatches = NULL ENDWITH RETURN m.lnResults ENDFUNC && GetRegExp