I have two Strings in different cells for example
ADSGPINDTDANPR
RGTELDDGIQADSGPINDTDANPRY VPGYY ESQSDDPHFHEK
Another example
LADNS TFDDDLDDLTPSKMKPANFKGD
RSLA FDDDLDDLTPSKMKPANFKGDYG
Also character sequences can have gaps for similar sequences like RGX in following example
LADNS TFDDDLDDLTPSKMKPANFKGD
RSLA FDDDLDDLTPSRGXKMKPANFKGDYG
What I want to do is Highlight both the sequences as shown in above example in Bold And Italic but in color using VBA code.
CodePudding user response:
Tough challenge, and I'm not sure if it's that feasible with Excel alone. Assuming that:
- You will not allow the 1st entry to have gaps;
- You allow for 0 gaps in between in the 2nd entry;
- You are looking for the longest match between both entries;
- You have ms365;
You may try the below answer that I based of on a formula first, see the below screenshot:
Formula in C1
:
=LET(x,SCAN(,UNIQUE(TOCOL(MID(A1,SEQUENCE(LEN(A1)),SEQUENCE(1,LEN(A1))))),LAMBDA(a,b,TEXTJOIN("*",,MID(b,SEQUENCE(1,LEN(b)),1)))),y,SEARCH(x,B1),z,SORTBY(HSTACK(x,y),LEN(x)*(ISNUMBER(y)),-1),SUBSTITUTE(TAKE(FILTER(z,ISNUMBER(INDEX(z,,2))),1,1),"*",))
The above will identify the longest substring that has a match with 0 gaps in between. This is going to be the input to the below macro:
Sub Test()
Dim ws As Worksheet, lr As Long, x As Long, y As Long, z As Long, a As Long, arr As Variant, s As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
lr = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row
arr = ws.Range("A1:C" & lr)
For x = LBound(arr) To UBound(arr)
s = arr(x, 3)
'Format column A:A
ws.Cells(x, 1).Characters(InStr(1, ws.Cells(x, 1).Value, s), Len(s)).Font.Bold = True
ws.Cells(x, 1).Characters(InStr(1, ws.Cells(x, 1).Value, s), Len(s)).Font.Italic = True
'Format column B:B
z = 0
For y = 1 To Len(s)
z = InStr(z 1, ws.Cells(x, 2).Value, Mid(s, y, 1))
ws.Cells(x, 2).Characters(z, 1).Font.Bold = True
ws.Cells(x, 2).Characters(z, 1).Font.Italic = True
Next
Next
End Sub
The results look like:
CodePudding user response:
It's really nice that we have these two as a reference https://www.sciencedirect.com/science/article/pii/S0890540114000765 https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
Here is the main function which is returning all subsequences forming the LCS (if you join all the keys you get the LCS in reverse subsequence form). Function returns a dictionary where keys are subsequences, and values are arrays with 2 elements (position of subsequence in seqA and position of subsequence in seqB).
Function GetLCSSubSequenceDict(seqA As String, seqB As String) As Object
Set GetLCSSubSequenceDict = Nothing
Dim i As Long, n As Long
n = Len(seqA)
If n = 0 Then: Exit Function
Dim j As Long, m As Long
m = Len(seqB)
If m = 0 Then: Exit Function
Dim T() As Long
ReDim T(0 To n, 0 To m)
'Building up table
For i = 1 To n
For j = 1 To m
If Mid$(seqA, i, 1) <> Mid$(seqB, j, 1) Then
'bitwise max
T(i, j) = T(i - 1, j) Xor ((T(i - 1, j) Xor T(i, j - 1)) And --(T(i - 1, j) < T(i, j - 1)))
Else
T(i, j) = T(i - 1, j - 1) 1
End If
Next j
Next i
Dim subseqKey As String
Dim subseqABDict As Object
Set subseqABDict = CreateObject("Scripting.Dictionary")
'Backtracking and building up dict of subsequences
'key = subsequence
'value = array(starting pos of the key in seqA,starting pos of the key in seqB)
i = n
j = m
Do While (i > 0 And j > 0)
If Not Mid$(seqA, i, 1) <> Mid$(seqB, j, 1) Then
subseqKey = Mid$(seqA, i, 1) & subseqKey
i = i - 1
j = j - 1
ElseIf T(i - 1, j) > T(i, j - 1) Then
If subseqKey <> vbNullString Then
subseqABDict(subseqKey) = Array(i 1, j 1)
subseqKey = vbNullString
End If
i = i - 1
Else
If subseqKey <> vbNullString Then
subseqABDict(subseqKey) = Array(i 1, j 1)
subseqKey = vbNullString
End If
j = j - 1
End If
Loop
If subseqKey <> vbNullString Then
subseqABDict(subseqKey) = Array(i 1, j 1)
End If
Set GetLCSSubSequenceDict = subseqABDict
Set subseqABDict = Nothing
End Function
Time complexity of the function is O ( Len(seqA) * Len(seqB) ), for those who are interested.
Following is a show case of setting font properties on 2 ranges seqA and seqB.
Sub test()
Dim seqA As Range
Dim seqB As Range
Set seqA = Range("A4")
Set seqB = Range("B4")
Dim fontColor As Long
fontColor = RGB(84, 84, 84)
Dim subseqKey As Variant
Dim lcsSubSequenceDict As Object
Set lcsSubSequenceDict = GetLCSSubSequenceDict(seqA.Value2, seqB.Value2)
'gives subsequences in reversed order, since we used backtracking
'MsgBox Join(lcsSubSequenceDict.keys())
If lcsSubSequenceDict Is Nothing Then: Exit Sub
For Each subseqKey In lcsSubSequenceDict
With seqA.Characters(lcsSubSequenceDict(subseqKey)(0), Len(subseqKey)).Font
.color = fontColor
.Bold = True
.Italic = True
End With
With seqB.Characters(lcsSubSequenceDict(subseqKey)(1), Len(subseqKey)).Font
.color = fontColor
.Bold = True
.Italic = True
End With
Next subseqKey
Set lcsSubSequenceDict = Nothing
Set seqA = Nothing
Set seqB = Nothing
End Sub