The below mention code can successfully count the required tags in an XML files and also provides name of file and tag count in excel sheet. I have just one query that currently it only reads the folder individually. However if there are 300 folders in a parent folder i need to select each folder every time. Is there anyway if anyone can amend the code so that if there are 300 folders in a parent folder in read each and every file (XML) in all subfolders. This will be very helpful for me.
I have tried to do it my own but this is beyond my capacity.
Option Explicit
Sub process_folder()
Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
Set wb = ThisWorkbook
Set ws = wb.Sheets(1)
ws.UsedRange.Clear
ws.Range("A1:C1") = Array("Source", "<Headline> Tag Count")
iRow = 1
' create FSO and regular expression pattern
Dim FSO As Object, ts As Object, regEx As Object, txt As String
Set FSO = CreateObject("Scripting.FileSystemObject")
Set regEx = CreateObject("VBScript.RegExp")
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = True
.pattern = "<Headline>(.*)</Headline>"
End With
'Opens the folder picker dialog to allow user selection
Dim myfolder As String, myfile As String, n As Long
With Application.FileDialog(msoFileDialogFolderPicker)
.Title = "Please select a folder"
.Show
.AllowMultiSelect = False
If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
MsgBox "You did not select a folder"
Exit Sub
End If
myfolder = .SelectedItems(1) & "\" 'Assign selected folder to MyFolder
End With
'Loop through all files in a folder until DIR cannot find anymore
Application.ScreenUpdating = False
myfile = Dir(myfolder & "*.xml")
Do While myfile <> ""
iRow = iRow 1
ws.Cells(iRow, 1) = myfile
' open file and read all lines
Set ts = FSO.OpenTextFile(myfolder & myfile)
txt = ts.ReadAll
ts.Close
' count pattern matches
Dim m As Object
If regEx.test(txt) Then
Set m = regEx.Execute(txt)
ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
ws.Cells(iRow, 3) = m.Count
Else
ws.Cells(iRow, 2) = "No tags"
ws.Cells(iRow, 3) = 0
End If
myfile = Dir 'DIR gets the next file in the folder
Loop
' results
ws.UsedRange.Columns.AutoFit
Application.ScreenUpdating = True
End Sub
CodePudding user response:
Use Subfolders property of the parent folder object.
Option Explicit
Sub process_folder()
Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
Set wb = ThisWorkbook
Set ws = wb.Sheets(1)
ws.UsedRange.Clear
ws.Range("A1:B1") = Array("Source", "<Headline> Tag Count")
iRow = 1
' create FSO and regular expression pattern
Dim fso As Object, ts As Object, regEx As Object, txt As String
Set fso = CreateObject("Scripting.FileSystemObject")
Set regEx = CreateObject("VBScript.RegExp")
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = "<Headline>(.*)</Headline>"
End With
'Opens the folder picker dialog to allow user selection
Dim myfolder, myfile As String, n As Long
Dim parentfolder As String, oParent
With Application.FileDialog(msoFileDialogFolderPicker)
.Title = "Please select a folder"
.Show
.AllowMultiSelect = False
If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
MsgBox "You did not select a folder"
Exit Sub
End If
parentfolder = .SelectedItems(1) 'Assign selected folder to ParentFolder
End With
Set oParent = fso.getFolder(parentfolder)
For Each myfolder In oParent.subfolders
'Loop through all files in a folder until DIR cannot find anymore
Application.ScreenUpdating = False
myfile = Dir(myfolder & "\*.xml")
Do While myfile <> ""
iRow = iRow 1
ws.Cells(iRow, 1) = myfolder & "\" & myfile
' open file and read all lines
Set ts = fso.OpenTextFile(myfolder & "\" & myfile)
txt = ts.ReadAll
ts.Close
' count pattern matches
Dim m As Object
If regEx.test(txt) Then
Set m = regEx.Execute(txt)
ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
ws.Cells(iRow, 3) = m.Count
Else
ws.Cells(iRow, 2) = "No tags"
ws.Cells(iRow, 3) = 0
End If
Debug.Print myfile
myfile = Dir 'DIR gets the next file in the folder
Loop
' results
ws.UsedRange.Columns.AutoFit
Next
Application.ScreenUpdating = True
MsgBox "Done"
End Sub
CodePudding user response:
Loop Through All Folders and Subfolders
- In this post under the title Subfolder Paths to Collection, you can find the
CollSubfolderPaths
function, which will return the paths of all folders and their subfolders in a collection. - In your code, you could utilize it in the following way.
Sub process_folder()
' Preceding code...
Application.ScreenUpdating = False
' Return the paths of all folders and subfolders in a collection.
Dim MyFolders As Collection: Set MyFolders = CollSubfolderPaths(myfolder)
Dim Item As Variant
' Loop through the items in the collection.
For Each Item In MyFolders
' Get the first file.
myfile = Dir(Item & "\" & "*.xml")
'Loop through all files in a folder until DIR cannot find anymore
Do While myfile <> ""
' The same code...
Loop
Next Item
' results
ws.UsedRange.Columns.AutoFit
Application.ScreenUpdating = True
End Sub