Home > Software engineering >  To count tags in XML from folders and subfolders
To count tags in XML from folders and subfolders

Time:01-18

The below mention code can successfully count the required tags in an XML files and also provides name of file and tag count in excel sheet. I have just one query that currently it only reads the folder individually. However if there are 300 folders in a parent folder i need to select each folder every time. Is there anyway if anyone can amend the code so that if there are 300 folders in a parent folder in read each and every file (XML) in all subfolders. This will be very helpful for me.

I have tried to do it my own but this is beyond my capacity.

Option Explicit

Sub process_folder()

    Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
    Set wb = ThisWorkbook
    Set ws = wb.Sheets(1)
    ws.UsedRange.Clear
    ws.Range("A1:C1") = Array("Source", "<Headline> Tag Count")
    iRow = 1
    
    ' create FSO and regular expression pattern
    Dim FSO As Object, ts As Object, regEx As Object, txt As String
    Set FSO = CreateObject("Scripting.FileSystemObject")
    Set regEx = CreateObject("VBScript.RegExp")
    With regEx
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .pattern = "<Headline>(.*)</Headline>"
   
        
    End With

    'Opens the folder picker dialog to allow user selection
    Dim myfolder As String, myfile As String, n As Long
    With Application.FileDialog(msoFileDialogFolderPicker)
        .Title = "Please select a folder"
        .Show
        .AllowMultiSelect = False
        If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
            MsgBox "You did not select a folder"
            Exit Sub
        End If
        myfolder = .SelectedItems(1) & "\" 'Assign selected folder to MyFolder
    End With
    
    'Loop through all files in a folder until DIR cannot find anymore
    Application.ScreenUpdating = False
    myfile = Dir(myfolder & "*.xml")
    Do While myfile <> ""
    
        iRow = iRow   1
        ws.Cells(iRow, 1) = myfile
    
        ' open file and read all lines
        Set ts = FSO.OpenTextFile(myfolder & myfile)
        txt = ts.ReadAll
        ts.Close
                                   
        ' count pattern matches
        Dim m As Object
        If regEx.test(txt) Then
            Set m = regEx.Execute(txt)
            ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
            ws.Cells(iRow, 3) = m.Count
            
        Else
            ws.Cells(iRow, 2) = "No tags"
            ws.Cells(iRow, 3) = 0
        End If

        myfile = Dir 'DIR gets the next file in the folder
    Loop
    
    ' results
    ws.UsedRange.Columns.AutoFit
    Application.ScreenUpdating = True
    

End Sub

CodePudding user response:

Use Subfolders property of the parent folder object.

Option Explicit

Sub process_folder()

    Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
    Set wb = ThisWorkbook
    Set ws = wb.Sheets(1)
    ws.UsedRange.Clear
    ws.Range("A1:B1") = Array("Source", "<Headline> Tag Count")
    iRow = 1
    
    ' create FSO and regular expression pattern
    Dim fso As Object, ts As Object, regEx As Object, txt As String
    Set fso = CreateObject("Scripting.FileSystemObject")
    Set regEx = CreateObject("VBScript.RegExp")
    With regEx
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .Pattern = "<Headline>(.*)</Headline>"
    End With

    'Opens the folder picker dialog to allow user selection
    Dim myfolder, myfile As String, n As Long
    Dim parentfolder As String, oParent
   
    With Application.FileDialog(msoFileDialogFolderPicker)
        .Title = "Please select a folder"
        .Show
        .AllowMultiSelect = False
        If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
            MsgBox "You did not select a folder"
            Exit Sub
        End If
        parentfolder = .SelectedItems(1) 'Assign selected folder to ParentFolder
    End With
    
    Set oParent = fso.getFolder(parentfolder)
    For Each myfolder In oParent.subfolders
    
        'Loop through all files in a folder until DIR cannot find anymore
        Application.ScreenUpdating = False
        myfile = Dir(myfolder & "\*.xml")
        Do While myfile <> ""
        
            iRow = iRow   1
            ws.Cells(iRow, 1) = myfolder & "\" & myfile
        
            ' open file and read all lines
            Set ts = fso.OpenTextFile(myfolder & "\" & myfile)
            txt = ts.ReadAll
            ts.Close
                                       
            ' count pattern matches
            Dim m As Object
            If regEx.test(txt) Then
                Set m = regEx.Execute(txt)
                ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
                ws.Cells(iRow, 3) = m.Count
                
            Else
                ws.Cells(iRow, 2) = "No tags"
                ws.Cells(iRow, 3) = 0
            End If
    
            Debug.Print myfile
            myfile = Dir 'DIR gets the next file in the folder
        Loop
        
        ' results
        ws.UsedRange.Columns.AutoFit
    Next
    Application.ScreenUpdating = True
    MsgBox "Done"

End Sub

CodePudding user response:

Loop Through All Folders and Subfolders

  • In this post under the title Subfolder Paths to Collection, you can find the CollSubfolderPaths function, which will return the paths of all folders and their subfolders in a collection.
  • In your code, you could utilize it in the following way.
Sub process_folder()
    
    ' Preceding code...
    
    Application.ScreenUpdating = False
    
    ' Return the paths of all folders and subfolders in a collection.
    Dim MyFolders As Collection: Set MyFolders = CollSubfolderPaths(myfolder)
    
    Dim Item As Variant
    
    ' Loop through the items in the collection.
    For Each Item In MyFolders
        ' Get the first file.
        myfile = Dir(Item & "\" & "*.xml")
        'Loop through all files in a folder until DIR cannot find anymore
        Do While myfile <> ""
            ' The same code...
        Loop
    
    Next Item

    ' results
    ws.UsedRange.Columns.AutoFit
    Application.ScreenUpdating = True

End Sub
  • Related