SplitExtended – improved VB split function with group characters

Based on SplitEx by Chip Pearson, SplitExtended is optimised to be over double the speed, with fixes and additional features;

Features over the standard VB Split are;

  • grouping characters, no longer split strings in quotes,
  • ignore consecutive delimiters, while preserving those in  grouping characters,
  • option to remove grouping characters, start and end quotes can be removed, and
  • double grouping characters inside a grouping character is converted to single grouping characters, double quotes inside quotes are converted to single quotes.

Download modSplitExtended.bas

Code here;

Option Explicit

'====================================================================================
' SplitExtended
'
'
' Based on SplitEx by Chip Pearson http://www.cpearson.com/Excel/Split.aspx
'
' Improvements include;
' - over double the speed of SplitEx
' - doesn't remove double Delimiters in groups
' - double GroupChar in groups treated as escaped and converted to single
'====================================================================================
Public Function SplitExtended(ByRef InString As String, _
ByVal Delimiter As String, _
Optional ByVal GroupChar As String = vbNullString, _
Optional ByVal IgnoreConsecutiveDelimiters As Boolean = False, _
Optional ByVal DeleteGroupCharacters As Boolean = False) As String()

Dim arr() As String

Dim InGroupReplace As String
Dim consectGroupReplace As String

Dim S As String

S = InString
Dim i As Long, j As Long

If LenB(S) = 0 Then
'string is empty so return an unbound array (similar to original Split)
ElseIf LenB(Delimiter) = 0 Then
'Delimiter is empty so return an array with the first element the string
ReDim arr(0)
arr(0) = S
ElseIf InStrB(1, S, Delimiter, vbBinaryCompare) = 0 Then
'Delimiter is not found in the string, return an array with the first element of the string
ReDim arr(0)
arr(0) = S
Else

'find a unique character in string s, that isn't the Delimiter
'it can be unique AND the group character as this means there won't be any grouping
i = -1
Do While LenB(InGroupReplace) = 0
i = i + 1
'the character is unique
If InStrB(1, S, ChrW$(i), vbBinaryCompare) = 0 Then
'the character is not the delimiter
If StrComp(ChrW$(i), Delimiter, vbBinaryCompare) <> 0 Then
InGroupReplace = ChrW$(i)
End If
End If
Loop

'if DeleteGroupCharacters is enabled, it is common to double the GroupChar if that character is needed
'replace all consecutive group characters with consectGroupReplace
If DeleteGroupCharacters Then
'only if there are consecutive GroupChar
If InStrB(1, S, GroupChar & GroupChar, vbBinaryCompare) > 0 Then
'find a unique character in string s, that isn't the Delimiter and isn't inGroupReplace
Do While LenB(consectGroupReplace) = 0
i = i + 1
'the character is unique
If InStrB(1, S, ChrW$(i), vbBinaryCompare) = 0 Then
'the character is not the delimiter
If StrComp(ChrW$(i), Delimiter, vbBinaryCompare) <> 0 Then
'the character is not the GroupChar
If StrComp(ChrW$(i), GroupChar, vbBinaryCompare) <> 0 Then
consectGroupReplace = ChrW$(i)
End If
End If
End If
Loop

'once the character is found, replace all double GroupChar
S = Replace(S, GroupChar & GroupChar, consectGroupReplace, 1, -1)
End If
End If

'replace any Delimiter occuring in a group with inGroupReplace
i = InStr(1, S, GroupChar, vbBinaryCompare)
j = InStr(i + Len(GroupChar), S, GroupChar, vbBinaryCompare)
Do While i > 0 And j > 0
If j > i Then
'mid$(s, 1, i) = Replace(mid$(s, 1, i), Delimiter, inGroupReplace, 1, -1, vbBinaryCompare)
'mid$(s, j) = Replace(mid$(s, j), Delimiter, inGroupReplace, 1, -1, vbBinaryCompare)
Mid$(S, i, j - i) = Replace(Mid$(S, i, j - i), Delimiter, InGroupReplace, 1, -1, vbBinaryCompare)
Else
S = Replace(S, Delimiter, InGroupReplace, 1, -1, vbBinaryCompare)
End If

i = InStr(j + Len(GroupChar), S, GroupChar, vbBinaryCompare)
j = InStr(i + Len(GroupChar), S, GroupChar, vbBinaryCompare)
Loop

'remove any consecutive delimiters, iteratively
If IgnoreConsecutiveDelimiters Then
Do While InStrB(1, S, Delimiter & Delimiter, vbBinaryCompare) > 0
S = Replace(S, Delimiter & Delimiter, Delimiter, 1, -1, vbBinaryCompare)
Loop
End If

'perform the split
arr = Split(S, Delimiter, -1, vbBinaryCompare)

'loop through the array and restore the special characters
For i = 0 To UBound(arr)
If InStrB(1, arr(i), InGroupReplace, vbBinaryCompare) > 0 Then
arr(i) = Replace(arr(i), InGroupReplace, Delimiter, 1, -1, vbBinaryCompare)
End If
If DeleteGroupCharacters Then
If InStrB(1, arr(i), GroupChar, vbBinaryCompare) > 0 Then
arr(i) = Replace(arr(i), GroupChar, vbNullString, 1, -1, vbBinaryCompare)
End If
If InStrB(1, arr(i), consectGroupReplace, vbBinaryCompare) > 0 Then
arr(i) = Replace(arr(i), consectGroupReplace, GroupChar, 1, -1, vbBinaryCompare)
End If
End If
Next i
End If

SplitExtended = arr

End Function

Posted

in

by

Comments

Leave a Reply

Your email address will not be published. Required fields are marked *