How to remove duplicates cells with different data ordering
A header |
---|
it is amazing day |
it is amazing |
amazing day |
amazing day it is |
Expected Result
A header |
---|
it is amazing day |
it is amazing |
amazing day |
Note That my Cells Strings could be up to 7
CodePudding user response:
Split the string into words, sort them to create a key and use a dictionary to find duplicates.
Option Explicit
Sub RemoveDuplicates()
Dim ws As Worksheet, ar, s As String
Dim lastrow As Long, i As Long, n As Long
Dim dict As Object, key As String, rng As Range
Set dict = CreateObject("Scripting.Dictionary")
Set ws = Sheet1
With ws
lastrow = .Cells(.Rows.Count, "A").End(xlUp).Row
For i = 2 To lastrow
' split and sort words into key
s = Application.Trim(.Cells(i, "A"))
ar = bubblesort(Split(s, " "))
key = Join(ar, " ")
' check dupicate
If dict.exists(key) Then
.Cells(i, "B") = "Duplicated"
If n = 0 Then
Set rng = .Rows(i)
Else
Set rng = Application.Union(rng, .Rows(i))
End If
n = n 1
Else
.Cells(i, "B") = "Unique"
dict.Add key, i
End If
Next
End With
' delete duplicates
If n > 0 Then
If MsgBox(n & " duplicates found, do you want to delete them ?", vbYesNo) = vbYes Then
rng.Delete
End If
Else
MsgBox "No duplicates found", vbInformation
End If
End Sub
Function bubblesort(ar)
Dim a As Long, b As Long, tmp As String
For a = 0 To UBound(ar)
For b = a 1 To UBound(ar)
If CStr(ar(a)) > CStr(ar(b)) Then
tmp = ar(a)
ar(a) = ar(b)
ar(b) = tmp
End If
Next
Next
bubblesort = ar
End Function