I have a set of files, some have no suffix and some with different suffixes. I would like to segregate the file names irrespective of their suffixes and list them along with the range of their row indices in the same spreadsheet. Below is the example and my failed code. Also attached the spreadsheet snapshot. Can you please help? Any new code/logic is welcome.
Input:
Row index | Filename |
---|---|
1 | File1 |
2 | File2_a |
3 | File2_b |
4 | File2_c |
5 | File3_a |
6 | File3_b |
Output:
Filename | Row indices range | |
---|---|---|
File1 | 1 | 1 |
File2 | 2 | 4 |
File3 | 5 | 6 |
VBA code
Sub GetUniqueFiles()
Dim sh As Worksheet
Set sh = ThisWorkbook.Sheets("Sheet1")
lastrow = sh1.Range("A" & Rows.Count).End(xlUp).Row
SameFile = False ' Flag to compare 2 consecutive file names
i = 3: j = 3
While i <= (lastrow - 1)
name_curt = sh.Range("B" & i).Value
name_next = sh.Range("B" & i 1).Value
file_curt = Split(name_curt, "_")(0)
file_next = Split(name_next, "_")(0)
If file_curt <> file_next Then
sh.Range("D" & j).Value = file_curt
k1 = i
sh.Range("E" & j).Value = k1
sh.Range("F" & j).Value = k2
i = i 1: j = j 1
ElseIf file_curt = file_next Then
SameFile = True
sh.Range("B" & j).Value = file_curt
k1 = i
While SameFile
i = i 1
name_curt = sh.Range("B" & i).Value
name_next = sh.Range("B" & i 1).Value
file_curt = Split(name_curt, "_")(0)
file_next = Split(name_next, "_")(0)
Wend
End If
Wend
End Sub
CodePudding user response:
Try this:
Sub GetUniqueFiles()
Dim sh As Worksheet, m, i, indx, rw As Range, f As String, r As Long
Set sh = ThisWorkbook.Sheets("Sheet1")
Set rw = sh.Range("A3:B3") 'first input row
Do While Application.CountA(rw) = 2 'loop while have data
indx = rw.Cells(1).Value
f = Split(rw.Cells(2).Value, "_")(0) ' "base" file name
m = Application.Match(f, sh.Columns("D"), 0) 'see if already listed
If IsError(m) Then 'not already listed ?
r = sh.Cells(Rows.Count, "D").End(xlUp).Row 1 'next empty row
sh.Cells(r, "D").Value = f 'file name
sh.Cells(r, "E").Value = indx ' "first seen" index
m = r
End If
sh.Cells(r, "F").Value = indx ' "last seen" index
Set rw = rw.Offset(1, 0) 'next input row
Loop
End Sub
CodePudding user response:
You can solve this without VBA. Add something like this in column C:
=LEFT(B2,IFERROR(FIND("_",B2)-1,LEN(B2) 1))
It will cut the underscore and everything after it. Next step is counting the distinct values. I would go for a pivot table, but there are lots of other ways.
CodePudding user response:
You can obtain your desired output using Power Query
, available in Windows Excel 2010 and Office 365 Excel
- Select some cell in your original table
Data => Get&Transform => From Table/Range
orFrom within sheet
- When the PQ UI opens, navigate to
Home => Advanced Editor
- Make note of the Table Name in Line 2 of the code.
- Replace the existing code with the M-Code below
- Change the table name in line 2 of the pasted code to your "real" table name
- Examine any comments, and also the
Applied Steps
window, to better understand the algorithm and steps
M Code
let
//Change table name in next line to actual table name in your workbook
Source = Excel.CurrentWorkbook(){[Name="Table22"]}[Content],
//split on the underscore and remove the splitted suffix
#"Split Column by Delimiter" = Table.SplitColumn(Source, "Filename",
Splitter.SplitTextByDelimiter("_", QuoteStyle.Csv), {"Filename", "Filename.2"}),
//set data types -- frequently a good idea in PQ
#"Changed Type1" = Table.TransformColumnTypes(#"Split Column by Delimiter",{
{"Filename", type text}, {"Filename.2", type text}}),
#"Removed Columns" = Table.RemoveColumns(#"Changed Type1",{"Filename.2"}),
//Group by file name and extract the lowest and highest rows
#"Grouped Rows" = Table.Group(#"Removed Columns", {"Filename"}, {
{"Start Row", each List.Min([Row index]), type number},
{"End Row", each List.Max([Row index]), type number}})
in
#"Grouped Rows"
CodePudding user response:
For execution speed and resource conservation it's best to minimize interaction with the worksheet from VBA. For example the following references the worksheet precisely twice no matter how long the list of files. Don't underestimate the value of restricting worksheet interaction from VBA.
Sub GetUniqueFiles()
Dim c&, i&, a$, b$, vIn, vOut
Const FILES_IN$ = "b3"
Const FILES_OUT$ = "d3"
With Range(FILES_IN)
vIn = .Resize(.End(xlDown).Row - .Row 1)
End With
ReDim vOut(1 To UBound(vIn), 1 To 3)
For i = 1 To UBound(vIn)
b = Split(vIn(i, 1), "_")(0)
If a <> b Then
a = b
c = c 1
vOut(c, 1) = b
vOut(c, 2) = i
If c > 1 Then vOut(c - 1, 3) = i - 1
End If
Next
If c > 1 Then vOut(c, 3) = i - 1
Range(FILES_OUT).Resize(UBound(vIn), 3) = vOut
End Sub