My team generates files from webscraping where one of the columns is company name, but the source doesn't normalize, so the values end in things like Pvt. Ltd., Pvt. Ltd, Pvt Ltd., Co., Corp., Corporation
, etc., but these variants must be removed for the next step in our process.
My macro works, but it takes literally 8 minutes to run on the one Company column with 1626 typical records. I know the problem is related to how many replacements and loops I'm doing, and I've read about things like:
Dim xArray As Variant
xArray = Array(" -*", " (*", " –*", " ,*", ", *", "No Experienced Listed")
For Each cell In Selection
temp = cell.Value
If InStr(1, temp, xArray) > 0 Then ' << BUT this line is generating Type Mismatch error - WHY?
cell.Value = Replace(temp, xArray, "")
End If
Next
But I don't know if the above (even if you told me how to fix) would noticeably help.
What's a more efficient way to code the macro below?
Set ws = ActiveSheet
MsgBox ("Please wait up to 8 minutes for macro to complete after clicking OK button. Ignore any Excel Not Responding warning atop screen or slowly spinning icon in middle of screen.")
Application.ScreenUpdating = False
For Each cell In Selection
cell.Value = Trim(cell)
temp = cell.Value
cell.Value = StrConv(temp, vbProperCase)
Next
For Each cell In Selection
If InStr(cell, " -") > 0 Then 'get rid of space hyphen and anything after
cell.Replace " -*", ""
End If
If InStr(cell, " ,") > 0 Then 'get rid of comma and anything after
cell.Replace ",*", ""
End If
If InStr(cell, " (") > 0 Then 'get rid of space left paren and anything after
cell.Replace " (*", ""
End If
If cell.Value = "No Experience Listed" Then 'clear cell
cell.Value = ""
End If
Next
'reduced number of replacements needed below via strTextProperCase = StrConv(strText, vbProperCase)
'inserted above per https://www.automateexcel.com/vba/upper-lower-proper-case-functions/
For Each cell In Selection
If cell <> "" Then cell.Value = Trim(cell)
If Left(cell, 4) = "The " Then cell.Value = Right(cell, Len(cell) - 4)
If Right(cell, 5) = " Inc." Then cell.Value = Left(cell, Len(cell) - 5)
If Right(cell, 4) = " Inc" Then cell.Value = Left(cell, Len(cell) - 4)
If Right(cell, 8) = " Pvt.Ltd" Then cell.Value = Left(cell, Len(cell) - 8)
If Right(cell, 10) = " Pvt. Ltd." Then cell.Value = Left(cell, Len(cell) - 10)
If Right(cell, 9) = " Pvt. Ltd" Then cell.Value = Left(cell, Len(cell) - 9)
If Right(cell, 9) = " Pvt Ltd." Then cell.Value = Left(cell, Len(cell) - 9)
If Right(cell, 8) = " Pvt Ltd" Then cell.Value = Left(cell, Len(cell) - 7)
If Right(cell, 5) = " Pvt." Then cell.Value = Left(cell, Len(cell) - 5)
If Right(cell, 4) = " Pvt" Then cell.Value = Left(cell, Len(cell) - 4)
If Right(cell, 7) = " L.L.C." Then cell.Value = Left(cell, Len(cell) - 7)
If Right(cell, 8) = " Private" Then cell.Value = Left(cell, Len(cell) - 8)
If Right(cell, 5) = " Ltd." Then cell.Value = Left(cell, Len(cell) - 5)
If Right(cell, 4) = " Ltd" Then cell.Value = Left(cell, Len(cell) - 4)
If Right(cell, 4) = " LLC" Then cell.Value = Left(cell, Len(cell) - 4)
If Right(cell, 6) = ", L.P." Then cell.Value = Left(cell, Len(cell) - 6)
If Right(cell, 5) = " B.V." Then cell.Value = Left(cell, Len(cell) - 5)
If Right(cell, 5) = " L.P." Then cell.Value = Left(cell, Len(cell) - 5)
If Right(cell, 4) = " Llp" Then cell.Value = Left(cell, Len(cell) - 4)
If Right(cell, 3) = " Lp" Then cell.Value = Left(cell, Len(cell) - 3)
If Right(cell, 12) = " Corporation" Then cell.Value = Left(cell, Len(cell) - 12)
If Right(cell, 6) = " Corp." Then cell.Value = Left(cell, Len(cell) - 6)
If Right(cell, 8) = " Limited" Then cell.Value = Left(cell, Len(cell) - 8)
If Right(cell, 6) = " & Co." Then cell.Value = Left(cell, Len(cell) - 6)
If Right(cell, 5) = " & Co" Then cell.Value = Left(cell, Len(cell) - 5)
If Right(cell, 4) = " Co." Then cell.Value = Left(cell, Len(cell) - 4)
If Right(cell, 3) = " Co" Then cell.Value = Left(cell, Len(cell) - 3)
If Right(cell, 3) = " Ag" Then cell.Value = Left(cell, Len(cell) - 3)
If Right(cell, 10) = " Companies" Then cell.Value = Left(cell, Len(cell) - 10)
If Right(cell, 8) = " Company" Then cell.Value = Left(cell, Len(cell) - 8)
If Right(cell, 13) = " Incorporated" Then cell.Value = Left(cell, Len(cell) - 13)
If Right(cell, 3) = " In" Then cell.Value = Left(cell, Len(cell) - 3)
If Right(cell, 3) = " Lt" Then cell.Value = Left(cell, Len(cell) - 3)
If Right(cell, 3) = " Ag" Then cell.Value = Left(cell, Len(cell) - 3)
If Right(cell, 2) = " S" Then cell.Value = Left(cell, Len(cell) - 3)
If Right(cell, 4) = " B.V" Then cell.Value = Left(cell, Len(cell) - 4)
Next
End If
Application.Goto Reference:=ws.Range("B1")
Application.ScreenUpdating = True
End Sub
CodePudding user response:
How to gain efficiency
- Looping through cells by means of vba is time consuming: assign the relevant column to a (1) datafield array & (2) loop through the elements eventually.
- Split the company informations into tokens.
- Structure your code and break it down into the necessary steps; essentially you want to remove (2a) articles in the first token, (2b) company suffixes in the last token as well as (2c) possible prefixes like
Private, Pvt
etc. This can be done via help proceduremark
in only three steps - thus you avoid to read each cell value dozens of times
(see Tim's valuable comment). - (2d:e) Change the array elements with the condensed company infos and
- (3) Write the entire array back to the original range (or a neighbouring column when testing).
The following procedure should give you an idea how to procede
(possible example call: Condense Sheet1.Range("A2:A6000")
)
Sub Condense(rng As Range)
'0. Definitions
Dim articles As String: articles = "The La (No)"
Dim corps As String: corps = "Ltd Corporation comp company (Listed)"
Dim privs As String: privs = "Pvt Pvt. Private (Experience)"
'1. Get data
Dim data As Variant
data = rng.Resize(, 1).Value ' accept only 1st column
'2. Loop through data and mark tokens to be removed
Dim i As Long
For i = 1 To UBound(data)
Dim tokens As Variant
tokens = Split(data(i, 1))
'2a) Mark articles in first token
mark tokens, 0, articles
'2b) Mark corporations in last token
Dim last As Long
last = UBound(tokens)
mark tokens, last, corps
'2c) Mark possible corporation prefix Private etc.
If last - 1 Then
mark tokens, last - 1, privs
End If
'2d) Remove marked tokens via negative filtering
tokens = Filter(tokens, "