I have a CSV export with the following layout:
ID | Section A | Section B | ... |
---|---|---|---|
1 | val1, val2 | val3, val4, val5 | ... |
2 | val6, val7, val8 | val9, val10 | ... |
And I'd like to have the dataset transformed to the following output:
ID | Section | Value |
---|---|---|
1 | Section A | val1 |
1 | Section A | val2 |
1 | Section B | val3 |
1 | Section B | val4 |
1 | Section B | val5 |
2 | Section A | val6 |
2 | Section A | val7 |
2 | Section A | val8 |
2 | Section B | val9 |
2 | Section B | val10 |
... | ... | ... |
My challenge is that I have to deal with tens of thousands of these rows, so I want to be able to apply the same solution to the entire dataset.
I figured out how to transpose the comma separated values into multiple rows, but can't seem to figure out how to programmatically pair up the transposed values with the corresponding ID
and Section
.
Below is the VBA code I have for the transposition, in case anyone wants to see it:
Sub TransposeTest()
Dim rng As Range
Dim inputRng As Range, outputRng As Range
titleTxt = "Transpose Test"
Set inputRng = Application.Selection.Range("A1")
Set inputRng = Application.InputBox("Input cell:", titleTxt, inputRng.Address, Type:=8)
Set outputRng = Application.InputBox("Output cell:", titleTxt, Type:=8)
output = VBA.Split(inputRng.Range("A1").Value, ",")
outputRng.Resize(UBound(output) - LBound(output) 1).Value = Application.Transpose(output)
End Sub
CodePudding user response:
You can benefit from arrays and split to transform data easily:
Sub test()
Dim i As Long, j As Long, k As Long, s As Long
Dim MyData As Variant
Dim SubData As Variant
Dim TotalColumns As Long
MyData = Range("A1").CurrentRegion.Value
TotalColumns = Range("A1").CurrentRegion.Columns.Count
Range("E1").Value = "ID"
Range("F1").Value = "Section"
Range("G1").Value = "Value"
k = 2 'initial row to paste data
For i = 2 To UBound(MyData) Step 1
For j = 2 To TotalColumns Step 1 'j=2 because first column because it's ID
SubData = Split(MyData(i, j), ", ")
For s = 0 To UBound(SubData) Step 1
Range("E" & k).Value = MyData(i, 1) 'ID always in first column
Range("F" & k).Value = MyData(1, j) 'Section always in first row
Range("G" & k).Value = SubData(s)
k = k 1
Next s
Next j
Next i
Erase SubData
Erase MyData
End Sub
I got the output in same worksheet but code can be easily adapted to make it in a different worksheet.
CodePudding user response:
If the 9 cells in your table were configured as an Excel table named 'Data' then the M code below:
let
Source = Excel.CurrentWorkbook(){[Name="Data"]}[Content],
#"Changed Type" = Table.TransformColumnTypes(Source,{{"ID", Int64.Type}, {"Section A", type text}, {"Section B", type text}}),
#"Split Column by Delimiter" = Table.ExpandListColumn(Table.TransformColumns(#"Changed Type", {{"Section A", Splitter.SplitTextByDelimiter(",", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Section A"),
#"Split Column by Delimiter1" = Table.ExpandListColumn(Table.TransformColumns(#"Split Column by Delimiter", {{"Section B", Splitter.SplitTextByDelimiter(",", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Section B"),
#"Unpivoted Other Columns" = Table.UnpivotOtherColumns(#"Split Column by Delimiter1", {"ID"}, "Attribute", "Value"),
#"Renamed Columns" = Table.RenameColumns(#"Unpivoted Other Columns",{{"Attribute", "Section"}}),
#"Trimmed Text" = Table.TransformColumns(#"Renamed Columns",{{"Value", Text.Trim, type text}}),
#"Removed Duplicates" = Table.Distinct(#"Trimmed Text")
in
#"Removed Duplicates"
would render this result in PowerQuery (you would need to have a Split Column by Delimiter step for each of your Sections)