First, I must mention that I am using Excel for Mac, so any code suggestions needs to work for a Mac using Office 365.
I have a large dataset that has nine columns of names. I want to delete the entire row if the same name is in multiple columns in the same row
Example dataset:
So all of these rows would be deleted because:
Jason
appearstwice
in row1
Jason
appears3
times in row2
Jason
appears4
times in row3
Sam
appearstwice
in row4
Fred
appears3
times in row5
So no matter how many times a name is repeated in the same row of data, I want to delete that entirerow.
My code is below. This code works but it crashes with a large dataset. I know there has to be a faster, more efficient way to write this code so that it can handle a large dataset. Plus, my code is too repetitive. There has to be a way to make the code more simple. Anyway, here's the code.
'<---- ***** DELETE ANY ROWS WHERE SAME NAME APPEARS TWICE (OR MORE) IN THAT ROW
Sub RemoveDuplicateRows()
Dim Lastrow As Long
Dim Lrow As Long
Lastrow = Range("A" & Rows.Count).End(xlUp).row
For Lrow = Lastrow To 2 Step -1
If Cells(Lrow, "A").Value = Cells(Lrow, "B").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "A").Value = Cells(Lrow, "C").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "A").Value = Cells(Lrow, "D").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "A").Value = Cells(Lrow, "E").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "A").Value = Cells(Lrow, "F").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "A").Value = Cells(Lrow, "G").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "A").Value = Cells(Lrow, "H").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "A").Value = Cells(Lrow, "I").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "B").Value = Cells(Lrow, "C").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "B").Value = Cells(Lrow, "D").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "B").Value = Cells(Lrow, "E").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "B").Value = Cells(Lrow, "F").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "B").Value = Cells(Lrow, "G").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "B").Value = Cells(Lrow, "H").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "B").Value = Cells(Lrow, "I").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "C").Value = Cells(Lrow, "D").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "C").Value = Cells(Lrow, "E").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "C").Value = Cells(Lrow, "F").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "C").Value = Cells(Lrow, "G").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "C").Value = Cells(Lrow, "H").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "C").Value = Cells(Lrow, "I").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "D").Value = Cells(Lrow, "E").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "D").Value = Cells(Lrow, "F").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "D").Value = Cells(Lrow, "G").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "D").Value = Cells(Lrow, "H").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "D").Value = Cells(Lrow, "I").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "E").Value = Cells(Lrow, "F").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "E").Value = Cells(Lrow, "G").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "E").Value = Cells(Lrow, "H").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "E").Value = Cells(Lrow, "I").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "F").Value = Cells(Lrow, "G").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "F").Value = Cells(Lrow, "H").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "F").Value = Cells(Lrow, "I").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "G").Value = Cells(Lrow, "H").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "G").Value = Cells(Lrow, "I").Value Then
Cells(Lrow, "A").EntireRow.Delete
ElseIf Cells(Lrow, "H").Value = Cells(Lrow, "I").Value Then
Cells(Lrow, "A").EntireRow.Delete
End If
Next Lrow
End Sub
CodePudding user response:
Let's say your data looks like this
Code
Option Explicit
Sub Sample()
Dim ws As Worksheet
Dim Ar As Variant
Dim lRow As Long, lCol As Long
Dim i As Long, j As Long, k As Long, l As Long
'~~> Set this to the relevant sheet
Set ws = Sheet1
With ws
If Application.WorksheetFunction.CountA(.Cells) = 0 Then Exit Sub
'~~> Find last row and column
lRow = .Cells.Find(What:="*", _
After:=.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
lCol = .Cells.Find(What:="*", _
After:=.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByColumns, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Column
'~~> Get the data into an array
Ar = .Range(.Cells(1, 1), .Cells(lRow, lCol))
End With
'~~> Clear the rows in an array for the required condition
For i = LBound(Ar) To UBound(Ar)
For j = 1 To lCol
For k = 2 To lCol
'~~> An additional check to see if the compared cell is not blank
If Ar(i, j) = Ar(i, k) And Len(Trim(Ar(i, 1))) <> 0 And j <> k Then
For l = 1 To lCol: Ar(i, l) = "": Next l
Exit For
End If
Next k
Next j
Next i
Dim delRange As Range
With ws
'~~> Clear data for output
.Cells.Clear
'~~> Get the data back in the worksheet
.Range("A1").Resize(lRow, lCol).Value = Ar
If Application.WorksheetFunction.CountA(.Cells) = 0 Then Exit Sub
'~~> Find the new last row
lRow = .Cells.Find(What:="*", _
After:=.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
'~~> Check for blank rows
For i = 1 To lRow
If Application.WorksheetFunction.CountA(.Range(.Cells(i, 1), .Cells(i, lCol))) = 0 Then
If delRange Is Nothing Then
Set delRange = .Rows(i)
Else
Set delRange = Union(delRange, .Rows(i))
End If
End If
Next i
'~~> If blank rows found then delete them in one go
If Not delRange Is Nothing Then delRange.Delete shift:=xlUp
End With
End Sub
In Action
CodePudding user response:
With a Little Research, I found the below function which would remove the duplicates within the same cell.
Function RemoveDupeWords(text As String, Optional delimiter As String = " ") As String
Dim dictionary As Object
Dim x, part
Set dictionary = CreateObject("Scripting.Dictionary")
dictionary.CompareMode = vbTextCompare
For Each x In Split(text, delimiter)
part = Trim(x)
If part <> "" And Not dictionary.Exists(part) Then
dictionary.Add part, Nothing
End If
Next
If dictionary.Count > 0 Then
RemoveDupeWords = Join(dictionary.keys, delimiter)
Else
RemoveDupeWords = ""
End If
Set dictionary = Nothing
End Function
Apply this function as a Formula in the range in which you need the Solution.
RemoveDupeWords(text, [delimiter])
Where:
Text (required) - a string or cell from which you want to delete repeated text. Delimiter (optional) - the delimiter that the repeated text is separated by. If omitted, a space is used for the delimiter. The function is not case-sensitive, meaning that lowercase and uppercase letters are treated as the same characters.
Source: AbleBits