The image below shows the example table of data I have (columns A to B), and the table on the right (columns e to f) shows my desired output.
I just don't know where to start with this one, as I need to combine all the different DATA items together for each individual id. Each ID is unique, but there can be any number of the same ID. The data can be replicated many times in the same ID and also include different data.
The data items will always be comma separated if more than 1 data item, and can be a mixture of numbers and letters of multiple lengths (even though my example shows single characters). The required data is what's always between each comma, where commas exist (i.e. except for single data items.)
The IDs are always numerical.
Therefore I'm struggling to come up with some VBA code for Excel 2010 (being a novice with VBA) to achieve this requirement. Any help would be very appreciated?
CodePudding user response:
Combine Unique and Delimited Data Using a Dictionary of Dictionaries
Option Explicit
Sub CombineData()
' Source
Const sName As String = "Sheet1"
Const sDelimiter As String = ", "
' Destination
Const dName As String = "Sheet1"
Const dFirstCellAddress As String = "E2"
Const dDelimiter As String = ", "
' Source range to an array.
Dim Data As Variant
Dim rCount As Long
With ThisWorkbook.Worksheets(sName).Range("A1").CurrentRegion
rCount = .Rows.Count - 1
If rCount < 1 Then Exit Sub ' no data or only headers
Data = .Resize(rCount, 2).Offset(1).Value
End With
' Array to a dictionary of dictionaries.
Dim dict As Object: Set dict = CreateObject("Scripting.Dictionary")
dict.CompareMode = vbTextCompare
Dim Key As Variant
Dim Item As Variant
Dim r As Long
Dim n As Long
For r = 1 To rCount
Item = CStr(Data(r, 2))
If Not IsError(Item) Then
If Len(Item) > 0 Then
Key = Data(r, 1)
If Not IsError(Key) Then
If Len(Key) > 0 Then
Item = Split(Item, sDelimiter)
If Not dict.Exists(Key) Then
Set dict(Key) = CreateObject("Scripting.Dictionary")
End If
For n = 0 To UBound(Item)
dict(Key)(Item(n)) = Empty
Next n
End If
End If
End If
End If
Next r
rCount = dict.Count
If rCount = 0 Then Exit Sub ' only error values or blanks
' Dictionary of dictionaries to the array.
ReDim Data(1 To rCount, 1 To 2)
r = 0
For Each Key In dict.Keys
r = r 1
Data(r, 1) = Key
Data(r, 2) = Join(dict(Key).Keys, dDelimiter)
Next Key
' Array to the destination range.
With ThisWorkbook.Worksheets(dName).Range(dFirstCellAddress).Resize(, 2)
.Resize(rCount).Value = Data
.Resize(.Worksheet.Rows.Count - .Row - rCount 1).Offset(rCount).Clear
End With
MsgBox "Data combined.", vbInformation
End Sub
CodePudding user response:
Please, use the next way. As I said in my comment, it uses a dictionary to extract the unique keys and other four arrays to keep intermediary values and build the final one. The following code is able to deal with both (possible) separators: comma "," and comma followed by one or more spaces ", ", ", ", " ,". It works only in memory and should be very fast, even for large ranges:
Sub extractUniqueIDsUniqueData()
Dim sh As Worksheet, lastR As Long, arr, arrItem, arrIt, arrFin
Dim i As Long, mtch, El, dict As Object
Set sh = ActiveSheet
lastR = sh.Range("A" & sh.rows.count).End(xlUp).row
arr = sh.Range("A2:B" & lastR).value 'place the range in an array for faster iteration
Set dict = CreateObject("Scripting.Dictionary")
'fill the dictionary:
For i = 1 To UBound(arr)
If Not dict.Exists(arr(i, 1)) Then
arrItem = Split(Replace(arr(i, 2), " ", ""), ",") 'replace spaces before splitting
dict.Add arr(i, 1), arrItem
Else
If arr(i, 2) <> "" Then 'skip the empty strings in B:B
arrIt = Split(Replace(arr(i, 2), " ", ""), ",")
arrItem = dict(arr(i, 1))
If UBound(arrItem) = -1 Then 'if no any element in the item array
arrItem = arrIt 'use the existing processed B:B value instead
Else
For Each El In arrIt
mtch = Application.match(El, arrItem, 0)
If IsError(mtch) Then 'not existing in the item array
ReDim Preserve arrItem(UBound(arrItem) 1)
arrItem(UBound(arrItem)) = El 'add the new element in the item array
End If
Next El
End If
dict(arr(i, 1)) = arrItem 'place the array back as dictionary item
End If
End If
Next i
'Process the dictionary content:
ReDim arrFin(1 To dict.count 1, 1 To 2) 'redim the array to keep all dictionary elements
'fill the header in the final array:
arrFin(1, 1) = "FinalList": arrFin(1, 2) = "Combined DATA"
'fill the rest of the final array rows
For i = 0 To dict.count - 1
arrFin(i 2, 1) = dict.Keys()(i)
arrFin(i 2, 2) = Join(dict.items()(i), ", ")
Next i
'drop the final array content at once:
With sh.Range("E1").Resize(UBound(arrFin), UBound(arrFin, 2))
.value = arrFin
.EntireColumn.AutoFit
End With
MsgBox "Ready..."
End Sub
CodePudding user response:
This generates the combined output per the input data provided. The code uses Dictionaries
to assist in getting to the unique sets of values.
Option Explicit
Public Sub Test()
Dim sourceWksht As Worksheet
Set sourceWksht = Application.ActiveWorkbook.Worksheets.("Sheet1")
Dim rawData As Variant
rawData = sourceWksht.Range("A2:B12").Value2
Dim rawInputDictionary As Dictionary
Set rawInputDictionary = New Dictionary
Dim csvValue As String
Dim rawIndex As Long
For rawIndex = LBound(rawData, 1) To UBound(rawData, 1)
csvValue = Trim$(rawData(rawIndex, 2))
If Not rawInputDictionary.Exists(rawData(rawIndex, 1)) And Len(csvValue) > 0 Then
rawInputDictionary.Add rawData(rawIndex, 1), csvValue
ElseIf Len(csvValue) > 0 Then
rawInputDictionary.Item(rawData(rawIndex, 1)) _
= rawInputDictionary.Item(rawData(rawIndex, 1)) & "," & csvValue
End If
Next
GenerateOutput rawInputDictionary, sourceWksht
End Sub
Private Sub GenerateOutput(ByVal rawInputDictionary As Dictionary, ByVal wksht As Worksheet)
Dim outputArray As Variant
ReDim outputArray(1 To rawInputDictionary.Count, 1 To 2)
Dim outputArrayIndex As Long
outputArrayIndex = 1
Dim idKey As Variant
For Each idKey In rawInputDictionary.Keys
outputArray(outputArrayIndex, 1) = idKey
outputArray(outputArrayIndex, 2) = GenerateCombinedData(rawInputDictionary.Item(idKey))
outputArrayIndex = outputArrayIndex 1
Next
Dim outputRange As Range
Set outputRange = wksht.Range("E2:F" & CStr(rawInputDictionary.Count 1))
outputRange.Value = outputArray
End Sub
Private Function GenerateCombinedData(ByVal idValues As String) As String
Dim combinedData As String
combinedData = vbNullString
Dim outputDictionary As Dictionary
Set outputDictionary = New Dictionary
Dim valuesArrayIndex As Long
Dim valuesArray As Variant
valuesArray = Split(idValues, ",")
For valuesArrayIndex = LBound(valuesArray) To UBound(valuesArray)
If Not outputDictionary.Exists(valuesArray(valuesArrayIndex)) Then
combinedData = combinedData & valuesArray(valuesArrayIndex) & ","
'Use the outputDictionary 'Keys' to ignore duplicate values
outputDictionary.Add valuesArray(valuesArrayIndex), ""
End If
Next
'Trim the trailing comma
combinedData = Left$(combinedData, Len(combinedData) - 1)
GenerateCombinedData = combinedData
End Function