Hello smart human beings out there
I have this setup in my Excel
Basically, what I'm trying to achieve here is automatically grab every single string from column A (and paste to column H) and return the frequency in column I. The script is below
Sub WordCountTester()
Dim d As Object, k, i As Long, ws As Worksheet
Set ws = ActiveSheet
With ws.ListObjects("Table3")
If Not .DataBodyRange Is Nothing Then
.DataBodyRange.Delete
End If
End With
Set d = WordCounts(ws.Range("A2:A" & ws.Cells(Rows.Count, "A").End(xlUp).Row), _
ws.Range("F2:F" & ws.Cells(Rows.Count, "F").End(xlUp).Row))
'list words and frequencies
For Each k In d.keys
ws.Range("H2").Resize(1, 2).Offset(i, 0).Value = Array(k, d(k))
i = i 1
Next k
End Sub
'rngTexts = range with text to be word-counted, defined in set d= above
'rngExclude = 'range with words to exclude from count, defined in set d= above
Public Function WordCounts(rngTexts As Range, rngExclude As Range) As Object 'dictionary
Dim words, c As Range, dict As Object, regexp As Object, w, wd As String, m
Set dict = CreateObject("scripting.dictionary")
Set regexp = CreateObject("VBScript.RegExp") 'see link below for reference
With regexp
.Global = True
.MultiLine = True
.ignorecase = True
.Pattern = "[\dA-Z-]{3,}" 'at least 3 characters
End With
'loop over input range
For Each c In rngTexts.Cells
If Len(c.Value) > 0 Then
Set words = regexp.Execute(LCase(c.Value))
'loop over matches
For Each w In words
wd = w.Value 'the text of the match
If Len(wd) > 1 Then 'EDIT: ignore single characters
'increment count if the word is not found in the "excluded" range
If IsError(Application.Match(wd, rngExclude, 0)) Then
dict(wd) = dict(wd) 1
End If
End If '>1 char
Next w
End If
Next c
Set WordCounts = dict
End Function
However, it currently count the string with 1 word only. I want to count strings with 2 and 3 words (and I will consider drive-by as 2 words). Can someone please tell me where in this code I have to fix to achieve that? I still want to keep column F there because there can be 2- or 3- word strings that I want to exclude. Thanks!
CodePudding user response:
If you changed your mind and consider that also two words pairs 2-3, 4-5, 6-7 and so on are necessary, please test the next solution:
Private Sub WordPairsCountTester()
Dim d As Object, k, i As Long, ws As Worksheet, arrFin
Set ws = ActiveSheet
'list words pairs and frequencies
Set d = WordPairCounts(ws.Range("A2:A" & ws.cells(rows.count, "A").End(xlUp).row), _
ws.Range("F2:F" & ws.cells(rows.count, "F").End(xlUp).row))
arrFin = Application.Transpose(Array(d.Keys, d.items)) 'place the dictionary in an array
ws.Range("H2").Resize(UBound(arrFin), 2).Value = arrFin 'drop the array content at once
End Sub
Private Function WordPairCounts(rngTexts As Range, rngExclude As Range) As Object
Dim dict As Object, arr, arrCell, i As Long, pairWd As String, j As Long
arr = rngTexts.Value 'place the range in an array for faster iteration
Set dict = CreateObject("scripting.dictionary")
For i = 1 To UBound(arr) 'iterate between the array elements
arrCell = Split(arr(i, 1)) 'split the string by default delimiter (space)
For j = 0 To UBound(arrCell) - 1 'iterate between the array elements
pairWd = arrCell(j) & " " & arrCell(j 1) 'create a string from two neighbour words (1-2, 2-3, 3-4 and so on)
If IsError(Application.match(pairWd, rngExclude, 0)) Then
dict(pairWd) = dict(pairWd) 1 'place the unique pairs as keys and add occurrences as items
End If
Next j
Next i
Set WordPairCounts = dict 'return the above created dictionary
End Function
Edited:
The function able to deal with 3 neighbor words:
Private Function WordPairCounts3(rngTexts As Range, rngExclude As Range) As Object
Dim dict As Object, arr, arrCell, i As Long, pairWd As String, j As Long
arr = rngTexts.Value 'place the range in an array for faster iteration
Set dict = CreateObject("scripting.dictionary")
For i = 1 To UBound(arr) 'iterate between the array elements
arrCell = Split(arr(i, 1)) 'split the string by default delimiter (space)
For j = 0 To UBound(arrCell) - 2 'iterate between the array elements
pairWd = arrCell(j) & " " & arrCell(j 1) & " " & arrCell(j 2) 'create a string from 3 neighbour words
If IsError(Application.match(pairWd, rngExclude, 0)) Then
dict(pairWd) = dict(pairWd) 1 'place the unique pairs as keys and add occurrences as items
End If
Next j
Next i
Set WordPairCounts3 = dict 'return the above created dictionary
End Function
Not tested, but it should work, I think.