Home > database >  Concatenate duplicate values by specific values
Concatenate duplicate values by specific values

Time:12-29

On Excel, I am trying to put in a single line both colors separated by semicolon. If the cod and name are equal it must concatenate in just a single line both colors.

What I have:

enter image description here

What I am trying to get:

enter image description here

CodePudding user response:

You can do this in Power Query, using the Table.Group method and a custom aggregation.

Based on your comment in your question, I assumed you did not want to retain the original Color column (first row only), but that is easily added back if not the case.

To use Power Query

  • Select some cell in your Data Table
  • Data => Get&Transform => from Table/Range
  • When the PQ Editor opens: Home => Advanced Editor
  • Make note of the Table Name in Line 2
  • Paste the M Code below in place of what you see
  • Change the Table name in line 2 back to what was generated originally.
  • Read the comments and explore the Applied Steps to understand the algorithm

M Code

let

//Change next line to reflect actual data source
    Source = Excel.CurrentWorkbook(){[Name="Table17"]}[Content],

//set data types
    #"Changed Type" = Table.TransformColumnTypes(Source,{{"Cod", type text}, {"Name", type text}, {"Color", type text}}),

//Group by Cod amd Name, then aggregate by combining the colors
    #"Grouped Rows" = Table.Group(#"Changed Type", {"Cod","Name"}, {
        {"Colors", each Text.Combine(List.Distinct(_[Color]),";")}})
in
    #"Grouped Rows"

enter image description here

CodePudding user response:

Please, use the next code. It uses a dictionary (loaded from an array) to extract unique codes, then processes its content:

Sub CondenseFruitsTable()
  Dim sh As Worksheet, lastR As Long, arr, arrFin, mtch, dict As Object, i As Long
  
  Set sh = ActiveSheet 'use here the sheet you need
  lastR = sh.Range("A" & sh.rows.count).End(xlUp).row 'last row on column A:A
 
  arr = sh.Range("A1:C" & lastR).Value2 'place the range in an array for faster iteration/processing
  Set dict = CreateObject("Scripting.Dictionary")
  For i = 2 To UBound(arr)
    If Not dict.Exists(arr(i, 1)) Then
        dict(arr(i, 1)) = arr(i, 2) & "|" & arr(i, 3)
    Else
        mtch = Application.match(arr(i, 3), Split(Split(dict(arr(i, 1)), "|")(1), ";"), 0)
        If IsError(mtch) Then 'if the color does  not already exist:
            dict(arr(i, 1)) = dict(arr(i, 1)) & ";" & arr(i, 3)
        End If
    End If
  Next i
  
  'redim the final array to also include the header:
  ReDim arrFin(1 To dict.count   1, 1 To 3)
  
  arrFin(1, 1) = arr(1, 1): arrFin(1, 2) = arr(1, 2): arrFin(1, 3) = arr(1, 3)
  For i = 0 To dict.count - 1
        arrFin(i   2, 1) = CStr(dict.keys()(i))
        arrFin(i   2, 2) = Split(dict.Items()(i), "|")(0)
        arrFin(i   2, 3) = Split(dict.Items()(i), "|")(1)
  Next i
  'drop the result and format a little:
  With sh.Range("F1").Resize(UBound(arrFin), 3)
        .Columns(1).NumberFormat = "@"
        .Value2 = arrFin
        .EntireColumn.AutoFit
  End With
End Sub

CodePudding user response:

Uniquify Table Data

enter image description here

Sub UniquifyTable()
    
    ' Define constants.
    Const UNI_COL As Long = 1 ' or 2
    Const JOIN_COL As Long = 3
    Const JOIN_DELIMITER As String = "; "
    Const NEW_COLUMN_TITLE As String = "Color (sep. Semicolon)"

    With ActiveSheet.Range("A1").CurrentRegion
        
        ' Unique to dictionary.
        
        Dim Data: Data = .Columns(UNI_COL).Value
        Dim uDict As Object: Set uDict = CreateObject("Scripting.Dictionary")
        uDict.CompareMode = vbTextCompare
        Dim srCount As Long: srCount = .Rows.Count
        
        Dim r As Long
        
        For r = 2 To srCount ' skip headers
            If Not uDict.Exists(Data(r, UNI_COL)) Then
                Set uDict(Data(r, UNI_COL)) = New Collection
            End If
            uDict(Data(r, UNI_COL)).Add r
        Next r
        
        ' Source to array.
        
        Dim scCount As Long: scCount = .Columns.Count
        Data = .Value
        
        ' Overwrite array with results.
        
        ' Resize (add column).
        ReDim Preserve Data(1 To srCount, 1 To scCount   1)
        Data(1, scCount   1) = NEW_COLUMN_TITLE
        
        Dim jDict As Object: Set jDict = CreateObject("Scripting.Dictionary")
        jDict.CompareMode = vbTextCompare
        
        Dim Key, Item, c As Long, IsNewRow As Boolean
        r = 1 ' skip headers
        
        ' Populate top.
        For Each Key In uDict.Keys
            r = r   1
            For Each Item In uDict(Key)
                If jDict.Count = 0 Then
                    For c = 1 To scCount
                        Data(r, c) = Data(Item, c)
                    Next c
                End If
                If Not jDict.Exists(Data(Item, JOIN_COL)) Then
                    jDict(Data(Item, JOIN_COL)) = Empty
                End If
            Next Item
            Data(r, c) = Join(jDict.Keys, JOIN_DELIMITER)
            jDict.RemoveAll ' reset for next iteration
        Next Key
        
        ' Clear bottom.
        For r = r   1 To srCount
            For c = 1 To scCount
                Data(r, c) = Empty
            Next c
        Next r
        
        ' Write back to worksheet.
        
        .Resize(, c).Value = Data
        .Columns(c).AutoFit
        
    End With

End Sub
  • Related