Home > Net >  Replace different types of characters with circumflex with their non-circumflex counterpart
Replace different types of characters with circumflex with their non-circumflex counterpart

Time:01-31

I have been tasked with rewriting a vba script that replaces circumflex characters in a csv with their normal counterparts. The current script just uses a bunch of replace functions for every character. This is the current script:

'i is the current row, this whole thing sits in a loop to loop through all rows in the csv 

            .Cells(i, 4) = Replace(.Cells(i, 4), "Á", "A")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Á", "A")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Á", "A")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Á", "A")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "À", "A")
            .Cells(i, 5) = Replace(.Cells(i, 5), "À", "A")
            .Cells(i, 7) = Replace(.Cells(i, 7), "À", "A")
            .Cells(i, 9) = Replace(.Cells(i, 9), "À", "A")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Â", "A")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Â", "A")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Â", "A")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Â", "A")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Å", "A")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Å", "A")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Å", "A")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Å", "A")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ã", "A")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ã", "A")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ã", "A")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ã", "A")
                        
            .Cells(i, 4) = Replace(.Cells(i, 4), "à", "a")
            .Cells(i, 5) = Replace(.Cells(i, 5), "à", "a")
            .Cells(i, 7) = Replace(.Cells(i, 7), "à", "a")
            .Cells(i, 9) = Replace(.Cells(i, 9), "à", "a")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "á", "a")
            .Cells(i, 5) = Replace(.Cells(i, 5), "á", "a")
            .Cells(i, 7) = Replace(.Cells(i, 7), "á", "a")
            .Cells(i, 9) = Replace(.Cells(i, 9), "á", "a")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "â", "a")
            .Cells(i, 5) = Replace(.Cells(i, 5), "â", "a")
            .Cells(i, 7) = Replace(.Cells(i, 7), "â", "a")
            .Cells(i, 9) = Replace(.Cells(i, 9), "â", "a")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "å", "a")
            .Cells(i, 5) = Replace(.Cells(i, 5), "å", "a")
            .Cells(i, 7) = Replace(.Cells(i, 7), "å", "a")
            .Cells(i, 9) = Replace(.Cells(i, 9), "å", "a")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ã", "a")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ã", "a")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ã", "a")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ã", "a")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "É", "E")
            .Cells(i, 5) = Replace(.Cells(i, 5), "É", "E")
            .Cells(i, 7) = Replace(.Cells(i, 7), "É", "E")
            .Cells(i, 9) = Replace(.Cells(i, 9), "É", "E")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "È", "E")
            .Cells(i, 5) = Replace(.Cells(i, 5), "È", "E")
            .Cells(i, 7) = Replace(.Cells(i, 7), "È", "E")
            .Cells(i, 9) = Replace(.Cells(i, 9), "È", "E")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ê", "E")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ê", "E")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ê", "E")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ê", "E")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "é", "e")
            .Cells(i, 5) = Replace(.Cells(i, 5), "é", "e")
            .Cells(i, 7) = Replace(.Cells(i, 7), "é", "e")
            .Cells(i, 9) = Replace(.Cells(i, 9), "é", "e")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "è", "e")
            .Cells(i, 5) = Replace(.Cells(i, 5), "è", "e")
            .Cells(i, 7) = Replace(.Cells(i, 7), "è", "e")
            .Cells(i, 9) = Replace(.Cells(i, 9), "è", "e")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ê", "e")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ê", "e")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ê", "e")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ê", "e")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Í", "I")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Í", "I")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Í", "I")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Í", "I")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ì", "I")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ì", "I")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ì", "I")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ì", "I")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Î", "I")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Î", "I")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Î", "I")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Î", "I")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "í", "i")
            .Cells(i, 5) = Replace(.Cells(i, 5), "í", "i")
            .Cells(i, 7) = Replace(.Cells(i, 7), "í", "i")
            .Cells(i, 9) = Replace(.Cells(i, 9), "í", "i")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ì", "i")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ì", "i")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ì", "i")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ì", "i")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "î", "i")
            .Cells(i, 5) = Replace(.Cells(i, 5), "î", "i")
            .Cells(i, 7) = Replace(.Cells(i, 7), "î", "i")
            .Cells(i, 9) = Replace(.Cells(i, 9), "î", "i")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ó", "O")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ó", "O")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ó", "O")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ó", "O")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ò", "O")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ò", "O")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ò", "O")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ò", "O")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ô", "O")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ô", "O")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ô", "O")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ô", "O")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Õ", "O")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Õ", "O")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Õ", "O")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Õ", "O")
                        
            .Cells(i, 4) = Replace(.Cells(i, 4), "ó", "o")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ó", "o")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ó", "o")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ó", "o")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ò", "o")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ò", "o")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ò", "o")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ò", "o")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ô", "o")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ô", "o")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ô", "o")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ô", "o")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "õ", "o")
            .Cells(i, 5) = Replace(.Cells(i, 5), "õ", "o")
            .Cells(i, 7) = Replace(.Cells(i, 7), "õ", "o")
            .Cells(i, 9) = Replace(.Cells(i, 9), "õ", "o")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ø", "o")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ø", "o")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ø", "o")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ø", "o")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ú", "U")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ú", "U")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ú", "U")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ú", "U")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ù", "U")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ù", "U")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ù", "U")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ù", "U")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Û", "U")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Û", "U")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Û", "U")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Û", "U")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ú", "u")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ú", "u")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ú", "u")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ú", "u")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ù", "u")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ù", "u")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ù", "u")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ù", "u")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "û", "u")
            .Cells(i, 5) = Replace(.Cells(i, 5), "û", "u")
            .Cells(i, 7) = Replace(.Cells(i, 7), "û", "u")
            .Cells(i, 9) = Replace(.Cells(i, 9), "û", "u")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ý", "Y")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ý", "Y")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ý", "Y")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ý", "Y")
                        
            .Cells(i, 4) = Replace(.Cells(i, 4), "ý", "y")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ý", "y")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ý", "y")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ý", "y")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ç", "C")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ç", "C")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ç", "C")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ç", "C")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ç", "c")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ç", "c")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ç", "c")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ç", "c")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "Ñ", "N")
            .Cells(i, 5) = Replace(.Cells(i, 5), "Ñ", "N")
            .Cells(i, 7) = Replace(.Cells(i, 7), "Ñ", "N")
            .Cells(i, 9) = Replace(.Cells(i, 9), "Ñ", "N")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "ñ", "n")
            .Cells(i, 5) = Replace(.Cells(i, 5), "ñ", "n")
            .Cells(i, 7) = Replace(.Cells(i, 7), "ñ", "n")
            .Cells(i, 9) = Replace(.Cells(i, 9), "ñ", "n")
            
            .Cells(i, 4) = Replace(.Cells(i, 4), "š", "s")
            .Cells(i, 5) = Replace(.Cells(i, 5), "š", "s")
            .Cells(i, 7) = Replace(.Cells(i, 7), "š", "s")
            .Cells(i, 9) = Replace(.Cells(i, 9), "š", "s")
                       
            .Cells(i, 4) = Replace(.Cells(i, 4), "é", "e")
            .Cells(i, 5) = Replace(.Cells(i, 5), "é", "e")
            .Cells(i, 7) = Replace(.Cells(i, 7), "é", "e")
            .Cells(i, 9) = Replace(.Cells(i, 9), "é", "e")

I'm looking to rewrite this monstrosity into a easier to read and better to maintain script.

Im aware i can already simplyfie this by using loops and grouping different to-replace-Characters together. But this still doestn really satisfy me, I want to know if theres a better way to go about this that im not seeing.

CodePudding user response:

Return unaccented base characters

You may profit from a match of a current character within an alphabetic array Application.Match(curChar, abc); omitting the optional 3rd argument allows to return the closest alphabetical value position in an ordered list of letters, i.e. the unaccented base letter.

The match result no (a Variant) shows a valid numeric position number for all alphabetical counterparts, any other symbol checks (thus Not IsNumeric or caught by IsError) will be used to return the unchanged input character.

Matched letters will be converted to uppercase if the original letter is uppercase.

Function unaccented(ByVal txt As String) As String
'1) assign alphabet letters to array
    Dim abc As Variant: abc = [Char(Column(A:Z) 96)]    ' Excel only shortcut!
'2) match base characters in alphabet
    Dim i As Long
    For i = 1 To Len(txt)
        'get current character
        Dim curChar As String: curChar = Mid(txt, i, 1)
        'get alphabetical base position
        Dim no As Variant:  no = Application.Match(curChar, abc)   ' without 3rd arg 0 !!
        'provide for case sensibility
        If IsNumeric(no) Then
            curChar = IIf(curChar = UCase(curChar), UCase(abc(no)), abc(no))
        End If
        'assemble total string to be returned
        unaccented = unaccented & curChar
    Next i
End Function

CodePudding user response:

Using a Dictionary Object to hold the character mappings and a Regular Expression to do the replacement. It might not be quicker but it should be easier to maintain, just update the arrays ar1 and ar2.

Option Explicit

Sub ConvertToAscii()

    Dim wb As Workbook, ws As Worksheet
    Dim regex As Object, dict As Object
    Dim i As Long, lastrow As Long, t0 As Single: t0 = Timer
   
    ' character map
    Set dict = BuildCharacterMap()
   
    ' regex
    Set regex = CreateObject("VBScript.RegExp")
    With regex
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
    End With
    
    ' worksheet
    Set wb = ThisWorkbook
    Set ws = wb.Sheets(1)
    Application.ScreenUpdating = False
    With ws
        lastrow = .Cells(.Rows.Count, 4).End(xlUp).Row
        For i = 2 To lastrow
            Call ChangeChar(.Cells(i, 4), regex, dict)
            Call ChangeChar(.Cells(i, 5), regex, dict)
            Call ChangeChar(.Cells(i, 7), regex, dict)
            Call ChangeChar(.Cells(i, 9), regex, dict)
            .Cells(i, 10) = "X"
        Next
    End With
    Application.ScreenUpdating = True
    MsgBox lastrow - 1 & " rows processed", vbInformation, Format(Timer - t0, "0.0 secs")
   
End Sub

Sub ChangeChar(cell As Range, ByRef regex, ByRef dict)

    Dim k, s As String
    s = Replace(cell.Value, "é", "e") ' special case
    For Each k In dict.keys
        regex.Pattern = "[" & dict(k) & "]"
        s = regex.Replace(s, k)
    Next
    cell.Value = s

End Sub

Function BuildCharacterMap() As Object

    Dim dict, ar1, ar2, a, v
    ' character mappings
    ar1 = Array("o ø", "C Ç", "s š")
    ar2 = Array("A ÁÀÂÅÃ", "E ÉÈÊ", "I ÍÌÎ", "N Ñ", "O ÓÒÔÕ", "U ÚÙÛ", "Y Ý")

    ' build character map as dictionary
    Set dict = CreateObject("Scripting.Dictionary")
    dict.CompareMode = vbBinaryCompare
   
    ' upper and lower cases
    For Each v In ar2
        a = Split(v, " ")
        dict.Add a(0), a(1)
        dict.Add LCase(a(0)), LCase(a(1))
    Next
   
    ' single case
    For Each v In ar1
        a = Split(v, " ")
        If dict.exists(a(0)) Then
            dict(a(0)) = dict(a(0)) & a(1)
        Else
            dict.Add a(0), a(1)
        End If
    Next
    
    Set BuildCharacterMap = dict

End Function
  • Related