I have a fairly large excel file (Think 65,000 rows).
Within the excel file, only two columns matter for this exercise: CCNumber and FileFound (Col BC/BD).
I am trying to use a for loop to loop through the 65,000 rows and compare the CCNumber (ID) against a folder of files (30,000 files), and then if an id matches/isnt found print "Available" or "Not Found" in the FileFound column - As below:
Sub LoopFiles
Dim fileName As Variant, csheet As Variant
fileName = Dir("Some\Directory\Here\*pdf")
Dim CCNums As Range
Set CCNums = Range("BC4:BC68512")
Application.ScreenUpdating = False
While fileName <> ""
ID = Left(fileName,6) 'id is a 6 digit numeric number, strip away everything else
For Each CCNum in CCNums
csheet = Left(CCNum, 6)
if(ID = csheet) Then
CCNum.Offset(0,1).Value = "Available"
Else
CCNum.Offset(0,1).Value = "Not Found"
End If
Next CCNum
fileName = Dir
Wend
Application.ScreenUpdating = True
End Sub
The above is hilariously inefficient and it takes forever. Is there a way I can speed this up, or am I just going to have to sit here and wait for the spinning wheel of doom to stop.
CodePudding user response:
Instead of looping through a file list you can directly check with Dir
and wildcards if a file exists.
eg. you can use Dir("C:\Temp\myNumber*.pdf")
to find a file that is named myNumberAndUnusefulText.pdf
. So if you use fileName = Dir("Some\Directory\Here\" & CSheet & "*.pdf")
it will return the file name of a file that starts with the number in CSheet
.
Further reading all the values into an array first and then processing the array makes your code much faster. Reading and writing actions to cells use a lot of overhead and therefore are slow. By reading the values into an array you reduce it to just one cell reading and one cell writing action.
Option Explicit
Public Sub LoopFilesImproved()
Dim CCNums As Range
Set CCNums = ThisWorkbook.Worksheets("Sheet1").Range("BC4:BC68512") ' always specify in which sheet a range is!
' define output range
Dim Output As Range
Set Output = CCNums.Offset(ColumnOffset:=1)
' read output range into array for faster processing
Dim OutputValues() As Variant
OutputValues = Output.Value2
' read all values into an array for faster processing
Dim CCNumsValues() As Variant
CCNumsValues = CCNums.Value2
' loop through numbers and check if a file exists
Dim iCCNum As Long
For iCCNum = LBound(CCNumsValues, 1) To UBound(CCNumsValues, 1)
Dim CSheet As String
CSheet = Left$(CCNumsValues(iCCNum, 1), 6)
Dim fileName As String
fileName = Dir("Some\Directory\Here\" & CSheet & "*.pdf")
If fileName <> vbNullString Then
OutputValues(iCCNum, 1) = "Available"
Else
OutputValues(iCCNum, 1) = "Not Found"
End If
Next iCCNum
' write array values back to cell
Output.Value2 = OutputValues
End Sub
CodePudding user response:
You can try first collecting all of the file names into a dictionary - from that point the check will be fast...
Sub LoopFiles()
Dim dictFiles As Object, arrCC, arrAv, rngCC As Range, r As Long
Set dictFiles = FileIds("Some\Directory\Here\*pdf") 'collect all the file Id's
Set rngCC = ActiveSheet.Range("BC4:BC68512")
arrCC = rngCC.Value
ReDim arrAv(1 To UBound(arrCC, 1), 1 To 1) 'size the "available?" array
For r = 1 To UBound(arrCC, 1) 'loop data from BC
id = Left(arrCC(r, 1), 6) 'extract the id
arrAv(r, 1) = IIf(dict.exists(id), "Available", "Not found")
Next r
rngCC.Offset(0, 1).Value = arrAv 'populate availability in BD
End Sub
'scan all files matching the `folderPath` pattern, and return a Dictionary object
' with keys equal to the first 6 characters of the file names
Function FileIds(folderPath As String)
Dim dict As Object, f, id
Set dict = CreateObject("scripting.dictionary")
f = Dir(folderPath)
Do While Len(f) > 0
If Len(f) >= 10 Then dict(Left(f, 6)) = True 'need at least 10 chars with the extension
f = Dir()
Loop
Set FileIds = dict
End Function
CodePudding user response:
instead of looping through the range for every file, use the WorksheetFunction.Match() function to locate the value in the range that matches the filename.