I have been tasked with rewriting a vba script that replaces circumflex characters in a csv with their normal counterparts. The current script just uses a bunch of replace functions for every character. This is the current script:
'i is the current row, this whole thing sits in a loop to loop through all rows in the csv
.Cells(i, 4) = Replace(.Cells(i, 4), "Á", "A")
.Cells(i, 5) = Replace(.Cells(i, 5), "Á", "A")
.Cells(i, 7) = Replace(.Cells(i, 7), "Á", "A")
.Cells(i, 9) = Replace(.Cells(i, 9), "Á", "A")
.Cells(i, 4) = Replace(.Cells(i, 4), "À", "A")
.Cells(i, 5) = Replace(.Cells(i, 5), "À", "A")
.Cells(i, 7) = Replace(.Cells(i, 7), "À", "A")
.Cells(i, 9) = Replace(.Cells(i, 9), "À", "A")
.Cells(i, 4) = Replace(.Cells(i, 4), "Â", "A")
.Cells(i, 5) = Replace(.Cells(i, 5), "Â", "A")
.Cells(i, 7) = Replace(.Cells(i, 7), "Â", "A")
.Cells(i, 9) = Replace(.Cells(i, 9), "Â", "A")
.Cells(i, 4) = Replace(.Cells(i, 4), "Å", "A")
.Cells(i, 5) = Replace(.Cells(i, 5), "Å", "A")
.Cells(i, 7) = Replace(.Cells(i, 7), "Å", "A")
.Cells(i, 9) = Replace(.Cells(i, 9), "Å", "A")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ã", "A")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ã", "A")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ã", "A")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ã", "A")
.Cells(i, 4) = Replace(.Cells(i, 4), "à", "a")
.Cells(i, 5) = Replace(.Cells(i, 5), "à", "a")
.Cells(i, 7) = Replace(.Cells(i, 7), "à", "a")
.Cells(i, 9) = Replace(.Cells(i, 9), "à", "a")
.Cells(i, 4) = Replace(.Cells(i, 4), "á", "a")
.Cells(i, 5) = Replace(.Cells(i, 5), "á", "a")
.Cells(i, 7) = Replace(.Cells(i, 7), "á", "a")
.Cells(i, 9) = Replace(.Cells(i, 9), "á", "a")
.Cells(i, 4) = Replace(.Cells(i, 4), "â", "a")
.Cells(i, 5) = Replace(.Cells(i, 5), "â", "a")
.Cells(i, 7) = Replace(.Cells(i, 7), "â", "a")
.Cells(i, 9) = Replace(.Cells(i, 9), "â", "a")
.Cells(i, 4) = Replace(.Cells(i, 4), "å", "a")
.Cells(i, 5) = Replace(.Cells(i, 5), "å", "a")
.Cells(i, 7) = Replace(.Cells(i, 7), "å", "a")
.Cells(i, 9) = Replace(.Cells(i, 9), "å", "a")
.Cells(i, 4) = Replace(.Cells(i, 4), "ã", "a")
.Cells(i, 5) = Replace(.Cells(i, 5), "ã", "a")
.Cells(i, 7) = Replace(.Cells(i, 7), "ã", "a")
.Cells(i, 9) = Replace(.Cells(i, 9), "ã", "a")
.Cells(i, 4) = Replace(.Cells(i, 4), "É", "E")
.Cells(i, 5) = Replace(.Cells(i, 5), "É", "E")
.Cells(i, 7) = Replace(.Cells(i, 7), "É", "E")
.Cells(i, 9) = Replace(.Cells(i, 9), "É", "E")
.Cells(i, 4) = Replace(.Cells(i, 4), "È", "E")
.Cells(i, 5) = Replace(.Cells(i, 5), "È", "E")
.Cells(i, 7) = Replace(.Cells(i, 7), "È", "E")
.Cells(i, 9) = Replace(.Cells(i, 9), "È", "E")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ê", "E")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ê", "E")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ê", "E")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ê", "E")
.Cells(i, 4) = Replace(.Cells(i, 4), "é", "e")
.Cells(i, 5) = Replace(.Cells(i, 5), "é", "e")
.Cells(i, 7) = Replace(.Cells(i, 7), "é", "e")
.Cells(i, 9) = Replace(.Cells(i, 9), "é", "e")
.Cells(i, 4) = Replace(.Cells(i, 4), "è", "e")
.Cells(i, 5) = Replace(.Cells(i, 5), "è", "e")
.Cells(i, 7) = Replace(.Cells(i, 7), "è", "e")
.Cells(i, 9) = Replace(.Cells(i, 9), "è", "e")
.Cells(i, 4) = Replace(.Cells(i, 4), "ê", "e")
.Cells(i, 5) = Replace(.Cells(i, 5), "ê", "e")
.Cells(i, 7) = Replace(.Cells(i, 7), "ê", "e")
.Cells(i, 9) = Replace(.Cells(i, 9), "ê", "e")
.Cells(i, 4) = Replace(.Cells(i, 4), "Í", "I")
.Cells(i, 5) = Replace(.Cells(i, 5), "Í", "I")
.Cells(i, 7) = Replace(.Cells(i, 7), "Í", "I")
.Cells(i, 9) = Replace(.Cells(i, 9), "Í", "I")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ì", "I")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ì", "I")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ì", "I")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ì", "I")
.Cells(i, 4) = Replace(.Cells(i, 4), "Î", "I")
.Cells(i, 5) = Replace(.Cells(i, 5), "Î", "I")
.Cells(i, 7) = Replace(.Cells(i, 7), "Î", "I")
.Cells(i, 9) = Replace(.Cells(i, 9), "Î", "I")
.Cells(i, 4) = Replace(.Cells(i, 4), "í", "i")
.Cells(i, 5) = Replace(.Cells(i, 5), "í", "i")
.Cells(i, 7) = Replace(.Cells(i, 7), "í", "i")
.Cells(i, 9) = Replace(.Cells(i, 9), "í", "i")
.Cells(i, 4) = Replace(.Cells(i, 4), "ì", "i")
.Cells(i, 5) = Replace(.Cells(i, 5), "ì", "i")
.Cells(i, 7) = Replace(.Cells(i, 7), "ì", "i")
.Cells(i, 9) = Replace(.Cells(i, 9), "ì", "i")
.Cells(i, 4) = Replace(.Cells(i, 4), "î", "i")
.Cells(i, 5) = Replace(.Cells(i, 5), "î", "i")
.Cells(i, 7) = Replace(.Cells(i, 7), "î", "i")
.Cells(i, 9) = Replace(.Cells(i, 9), "î", "i")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ó", "O")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ó", "O")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ó", "O")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ó", "O")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ò", "O")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ò", "O")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ò", "O")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ò", "O")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ô", "O")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ô", "O")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ô", "O")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ô", "O")
.Cells(i, 4) = Replace(.Cells(i, 4), "Õ", "O")
.Cells(i, 5) = Replace(.Cells(i, 5), "Õ", "O")
.Cells(i, 7) = Replace(.Cells(i, 7), "Õ", "O")
.Cells(i, 9) = Replace(.Cells(i, 9), "Õ", "O")
.Cells(i, 4) = Replace(.Cells(i, 4), "ó", "o")
.Cells(i, 5) = Replace(.Cells(i, 5), "ó", "o")
.Cells(i, 7) = Replace(.Cells(i, 7), "ó", "o")
.Cells(i, 9) = Replace(.Cells(i, 9), "ó", "o")
.Cells(i, 4) = Replace(.Cells(i, 4), "ò", "o")
.Cells(i, 5) = Replace(.Cells(i, 5), "ò", "o")
.Cells(i, 7) = Replace(.Cells(i, 7), "ò", "o")
.Cells(i, 9) = Replace(.Cells(i, 9), "ò", "o")
.Cells(i, 4) = Replace(.Cells(i, 4), "ô", "o")
.Cells(i, 5) = Replace(.Cells(i, 5), "ô", "o")
.Cells(i, 7) = Replace(.Cells(i, 7), "ô", "o")
.Cells(i, 9) = Replace(.Cells(i, 9), "ô", "o")
.Cells(i, 4) = Replace(.Cells(i, 4), "õ", "o")
.Cells(i, 5) = Replace(.Cells(i, 5), "õ", "o")
.Cells(i, 7) = Replace(.Cells(i, 7), "õ", "o")
.Cells(i, 9) = Replace(.Cells(i, 9), "õ", "o")
.Cells(i, 4) = Replace(.Cells(i, 4), "ø", "o")
.Cells(i, 5) = Replace(.Cells(i, 5), "ø", "o")
.Cells(i, 7) = Replace(.Cells(i, 7), "ø", "o")
.Cells(i, 9) = Replace(.Cells(i, 9), "ø", "o")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ú", "U")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ú", "U")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ú", "U")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ú", "U")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ù", "U")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ù", "U")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ù", "U")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ù", "U")
.Cells(i, 4) = Replace(.Cells(i, 4), "Û", "U")
.Cells(i, 5) = Replace(.Cells(i, 5), "Û", "U")
.Cells(i, 7) = Replace(.Cells(i, 7), "Û", "U")
.Cells(i, 9) = Replace(.Cells(i, 9), "Û", "U")
.Cells(i, 4) = Replace(.Cells(i, 4), "ú", "u")
.Cells(i, 5) = Replace(.Cells(i, 5), "ú", "u")
.Cells(i, 7) = Replace(.Cells(i, 7), "ú", "u")
.Cells(i, 9) = Replace(.Cells(i, 9), "ú", "u")
.Cells(i, 4) = Replace(.Cells(i, 4), "ù", "u")
.Cells(i, 5) = Replace(.Cells(i, 5), "ù", "u")
.Cells(i, 7) = Replace(.Cells(i, 7), "ù", "u")
.Cells(i, 9) = Replace(.Cells(i, 9), "ù", "u")
.Cells(i, 4) = Replace(.Cells(i, 4), "û", "u")
.Cells(i, 5) = Replace(.Cells(i, 5), "û", "u")
.Cells(i, 7) = Replace(.Cells(i, 7), "û", "u")
.Cells(i, 9) = Replace(.Cells(i, 9), "û", "u")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ý", "Y")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ý", "Y")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ý", "Y")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ý", "Y")
.Cells(i, 4) = Replace(.Cells(i, 4), "ý", "y")
.Cells(i, 5) = Replace(.Cells(i, 5), "ý", "y")
.Cells(i, 7) = Replace(.Cells(i, 7), "ý", "y")
.Cells(i, 9) = Replace(.Cells(i, 9), "ý", "y")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ç", "C")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ç", "C")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ç", "C")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ç", "C")
.Cells(i, 4) = Replace(.Cells(i, 4), "ç", "c")
.Cells(i, 5) = Replace(.Cells(i, 5), "ç", "c")
.Cells(i, 7) = Replace(.Cells(i, 7), "ç", "c")
.Cells(i, 9) = Replace(.Cells(i, 9), "ç", "c")
.Cells(i, 4) = Replace(.Cells(i, 4), "Ñ", "N")
.Cells(i, 5) = Replace(.Cells(i, 5), "Ñ", "N")
.Cells(i, 7) = Replace(.Cells(i, 7), "Ñ", "N")
.Cells(i, 9) = Replace(.Cells(i, 9), "Ñ", "N")
.Cells(i, 4) = Replace(.Cells(i, 4), "ñ", "n")
.Cells(i, 5) = Replace(.Cells(i, 5), "ñ", "n")
.Cells(i, 7) = Replace(.Cells(i, 7), "ñ", "n")
.Cells(i, 9) = Replace(.Cells(i, 9), "ñ", "n")
.Cells(i, 4) = Replace(.Cells(i, 4), "š", "s")
.Cells(i, 5) = Replace(.Cells(i, 5), "š", "s")
.Cells(i, 7) = Replace(.Cells(i, 7), "š", "s")
.Cells(i, 9) = Replace(.Cells(i, 9), "š", "s")
.Cells(i, 4) = Replace(.Cells(i, 4), "é", "e")
.Cells(i, 5) = Replace(.Cells(i, 5), "é", "e")
.Cells(i, 7) = Replace(.Cells(i, 7), "é", "e")
.Cells(i, 9) = Replace(.Cells(i, 9), "é", "e")
I'm looking to rewrite this monstrosity into a easier to read and better to maintain script.
Im aware i can already simplyfie this by using loops and grouping different to-replace-Characters together. But this still doestn really satisfy me, I want to know if theres a better way to go about this that im not seeing.
CodePudding user response:
Return unaccented base characters
You may profit from a match of a current character within an alphabetic array Application.Match(curChar, abc)
; omitting the optional 3rd argument allows to return the closest alphabetical value position in an ordered list of letters, i.e. the unaccented base letter.
The match result no
(a Variant) shows a valid numeric position number for all alphabetical counterparts, any other symbol checks (thus Not IsNumeric
or caught by IsError
) will be used to return the unchanged input character.
Matched letters will be converted to uppercase if the original letter is uppercase.
Function unaccented(ByVal txt As String) As String
'1) assign alphabet letters to array
Dim abc As Variant: abc = [Char(Column(A:Z) 96)] ' Excel only shortcut!
'2) match base characters in alphabet
Dim i As Long
For i = 1 To Len(txt)
'get current character
Dim curChar As String: curChar = Mid(txt, i, 1)
'get alphabetical base position
Dim no As Variant: no = Application.Match(curChar, abc) ' without 3rd arg 0 !!
'provide for case sensibility
If IsNumeric(no) Then
curChar = IIf(curChar = UCase(curChar), UCase(abc(no)), abc(no))
End If
'assemble total string to be returned
unaccented = unaccented & curChar
Next i
End Function
CodePudding user response:
Using a Dictionary Object to hold the character mappings and a Regular Expression to do the replacement. It might not be quicker but it should be easier to maintain, just update the arrays ar1 and ar2.
Option Explicit
Sub ConvertToAscii()
Dim wb As Workbook, ws As Worksheet
Dim regex As Object, dict As Object
Dim i As Long, lastrow As Long, t0 As Single: t0 = Timer
' character map
Set dict = BuildCharacterMap()
' regex
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
End With
' worksheet
Set wb = ThisWorkbook
Set ws = wb.Sheets(1)
Application.ScreenUpdating = False
With ws
lastrow = .Cells(.Rows.Count, 4).End(xlUp).Row
For i = 2 To lastrow
Call ChangeChar(.Cells(i, 4), regex, dict)
Call ChangeChar(.Cells(i, 5), regex, dict)
Call ChangeChar(.Cells(i, 7), regex, dict)
Call ChangeChar(.Cells(i, 9), regex, dict)
.Cells(i, 10) = "X"
Next
End With
Application.ScreenUpdating = True
MsgBox lastrow - 1 & " rows processed", vbInformation, Format(Timer - t0, "0.0 secs")
End Sub
Sub ChangeChar(cell As Range, ByRef regex, ByRef dict)
Dim k, s As String
s = Replace(cell.Value, "é", "e") ' special case
For Each k In dict.keys
regex.Pattern = "[" & dict(k) & "]"
s = regex.Replace(s, k)
Next
cell.Value = s
End Sub
Function BuildCharacterMap() As Object
Dim dict, ar1, ar2, a, v
' character mappings
ar1 = Array("o ø", "C Ç", "s š")
ar2 = Array("A ÁÀÂÅÃ", "E ÉÈÊ", "I ÍÌÎ", "N Ñ", "O ÓÒÔÕ", "U ÚÙÛ", "Y Ý")
' build character map as dictionary
Set dict = CreateObject("Scripting.Dictionary")
dict.CompareMode = vbBinaryCompare
' upper and lower cases
For Each v In ar2
a = Split(v, " ")
dict.Add a(0), a(1)
dict.Add LCase(a(0)), LCase(a(1))
Next
' single case
For Each v In ar1
a = Split(v, " ")
If dict.exists(a(0)) Then
dict(a(0)) = dict(a(0)) & a(1)
Else
dict.Add a(0), a(1)
End If
Next
Set BuildCharacterMap = dict
End Function