I am populating a userform list using a collection which pulls from a list in a separate worksheet with over 200,000 records (non unique). However it takes an hour to populate the list because of the number of records. Is there any way to optimize this and make the userform initialize faster, possibly by reducing the 200k records into a smaller list with only unique values before then trying to load it into the collection?
LastRow = SourceSheet.Cells(Rows.Count, 3).End(xlUp).Row
On Error Resume Next
Set Coll = New Collection
'Open a With structure for the ListBox control.
With ClientInput
.Clear
For Each cell In SourceSheet.Range("B2:B" & LastRow)
'Only attempt to populate cells containing a text or value.
If Len(cell.Value) <> 0 Then
Err.Clear
Coll.Add cell.Text, cell.Text
If Err.Number = 0 Then .AddItem cell.Text
End If
Next cell
blnUnsorted = True
Do
blnUnsorted = False
For i = 0 To UBound(.List) - 1
If LCase(.List(i)) > LCase(.List(i 1)) Then
temp = .List(i)
.List(i) = .List(i 1)
.List(i 1) = temp
blnUnsorted = True
Exit For
End If
Next i
Loop While blnUnsorted = True
'Close the With structure for the ListBox control.
End With
CodePudding user response:
Approx. 15 seconds for 200k rows with 10k unique values.
The sorting is the slowest part, so if you have fewer unique values that should be quicker.
Sub Tester()
Dim data, col As Collection, r As Long, v, arr, SourceSheet As Worksheet, t
Set SourceSheet = ActiveSheet 'for example
t = Timer
With SourceSheet
data = .Range("B2:B" & .Cells(.Rows.Count, 3).End(xlUp).Row).Value
End With
Debug.Print UBound(data, 1)
Set col = New Collection
For r = 1 To UBound(data, 1)
v = CStr(data(r, 1))
If Len(v) > 0 Then
On Error Resume Next 'ignore error on duplicate key
col.Add v, v
On Error GoTo 0 'stop ignoring errors
End If
Next r
Debug.Print "collected", Timer - t
arr = CollectionToArray(col) 'convert to array
Debug.Print "in array", Timer - t
ArraySort arr 'sort the array
Debug.Print "sorted", Timer - t
ClientInput.List = arr
Debug.Print "done", Timer - t
End Sub
Function CollectionToArray(c As Collection) As Variant
Dim a() As Variant, i As Long
ReDim a(0 To c.Count - 1)
For i = 0 To c.Count - 1
a(i) = c.Item(i 1)
Next
CollectionToArray = a
End Function
Sub ArraySort(ByRef MyArray As Variant)
Dim First As Long, Last As Long, i As Long, j As Long, temp
First = LBound(MyArray)
Last = UBound(MyArray)
For i = First To Last - 1
For j = i 1 To Last
If MyArray(i) > MyArray(j) Then
temp = MyArray(j)
MyArray(j) = MyArray(i)
MyArray(i) = temp
End If
Next j
Next i
End Sub
CodePudding user response:
Populate List Box With Unique Column Values
The Method
Sub PopulateListBoxWithUniqueColumnValues()
Const WORKSHEET_NAME As String = "Sheet1"
Const FIRST_CELL_ADDRESS As String = "B2"
Const COLUMN_CONTAINS_FORMULAE As Boolean = True
Dim wb As Workbook: Set wb = ThisWorkbook ' workbook containing this code
' Set the non-empty column range ('srg').
Dim ws As Worksheet: Set ws = wb.Worksheets(WORKSHEET_NAME)
Dim fCell As Range: Set fCell = ws.Range(FIRST_CELL_ADDRESS)
Dim srg As Range: Set srg = SetColumn(fCell)
If srg Is Nothing Then Exit Sub ' no data
' Backup the column range values (formulae).
Dim sData() As Variant
If COLUMN_CONTAINS_FORMULAE Then
sData = GetColumnRangeFormulae(srg)
srg.Value = srg.Value
Else
sData = GetColumnRangeValues(srg)
End If
Application.ScreenUpdating = False
srg.Sort srg, xlAscending, , , , , , xlNo
srg.RemoveDuplicates 1, xlNo
' Write the sorted unique values from the column range
' to a 1D one-based array ('dArr').
Dim drg As Range: Set drg = SetColumn(fCell)
Dim dData() As Variant: dData = GetColumnRangeValues(drg)
Dim dArr() As Variant: dArr = ColumnToArray(dData)
' Populate the list box.
ClientInput.List = dArr
' Write back the backed-up values (formulae).
srg.Value = sData
Application.ScreenUpdating = True
' Inform.
MsgBox "List box populated.", vbInformation
End Sub
The Helper Functions
Function SetColumn( _
ByVal FirstCell As Range) _
As Range
With FirstCell.Cells(1)
Dim lCell As Range
Set lCell = .Resize(.Worksheet.Rows.Count - .Row 1) _
.Find("*", , xlFormulas, , , xlPrevious)
If lCell Is Nothing Then Exit Function
Set SetColumn = .Resize(lCell.Row - .Row 1)
End With
End Function
Function GetColumnRangeValues( _
ByVal rg As Range, _
Optional ByVal ColumnNumber As Long = 1) _
As Variant
With rg.Columns(ColumnNumber)
If rg.Rows.Count = 1 Then
Dim Data As Variant: ReDim Data(1 To 1, 1 To 1): Data(1, 1) = .Value
GetColumnRangeValues = Data
Else
GetColumnRangeValues = .Value
End If
End With
End Function
Function GetColumnRangeFormulae( _
ByVal rg As Range, _
Optional ByVal ColumnNumber As Long = 1) _
As Variant
With rg.Columns(ColumnNumber)
If rg.Rows.Count = 1 Then
Dim Data As Variant: ReDim Data(1 To 1, 1 To 1)
Data(1, 1) = .Formula: GetColumnRangeFormulae = Data
Else
GetColumnRangeFormulae = .Formula
End If
End With
End Function
Function ColumnToArray( _
Data() As Variant, _
Optional ByVal ColumnIndex As Long = 1, _
Optional ByVal ArrayLowerLimit As Long = 1) _
As Variant
Dim rDiff As Long: rDiff = LBound(Data, 1) - ArrayLowerLimit
Dim Arr() As Variant
ReDim Arr(ArrayLowerLimit To UBound(Data, 1) - rDiff)
Dim r As Long
For r = LBound(Data, 1) To UBound(Data, 1)
Arr(r - rDiff) = Data(r, ColumnIndex)
Next r
ColumnToArray = Arr
End Function
Benchmarking
Sub PopulateListBoxWithUniqueColumnValuesBenchMark()
Dim TI As Double: TI = Timer
Dim TT As Double: TT = TI
Dim TC As Double: TC = TI
Const WORKSHEET_NAME As String = "Sheet1"
Const FIRST_CELL_ADDRESS As String = "B2"
Const COLUMN_CONTAINS_FORMULAE As Boolean = True
Dim wb As Workbook: Set wb = ThisWorkbook ' workbook containing this code
' Set the non-empty column range ('srg').
Dim ws As Worksheet: Set ws = wb.Worksheets(WORKSHEET_NAME)
Dim fCell As Range: Set fCell = ws.Range(FIRST_CELL_ADDRESS)
Dim srg As Range: Set srg = SetColumn(fCell)
If srg Is Nothing Then Exit Sub ' no data
DebugPrintTimePassed "Set Range: ", TI, TT, TC
' Backup the column range values (formulae).
Dim sData() As Variant
If COLUMN_CONTAINS_FORMULAE Then
sData = GetColumnRangeFormulae(srg)
srg.Value = srg.Value
Else
sData = GetColumnRangeValues(srg)
End If
DebugPrintTimePassed "Backup column: ", TI, TT, TC
Application.ScreenUpdating = False
DebugPrintTimePassed "Turn off screen updating: ", TI, TT, TC
srg.Sort srg, xlAscending, , , , , , xlNo
DebugPrintTimePassed "Sort column: ", TI, TT, TC
srg.RemoveDuplicates 1, xlNo
DebugPrintTimePassed "Remove duplicates: ", TI, TT, TC
' Write the sorted unique values from the column range
' to a 1D one-based array ('dArr').
Dim drg As Range: Set drg = SetColumn(fCell)
Dim dData() As Variant: dData = GetColumnRangeValues(drg)
Dim dArr() As Variant: dArr = ColumnToArray(dData)
DebugPrintTimePassed "Write to array: ", TI, TT, TC
' Populate the list box.
ClientInput.List = dArr
DebugPrintTimePassed "Populate list box: ", TI, TT, TC
' Write back the backed-up values (formulae).
srg.Value = sData
DebugPrintTimePassed "Write back column: ", TI, TT, TC
Application.ScreenUpdating = True
DebugPrintTimePassed "Turn on screen updating: ", TI, TT, TC
' Inform.
MsgBox "List box populated.", vbInformation
End Sub
Sub DebugPrintTimePassed( _
ByVal Title As String, _
ByRef TI As Double, _
ByRef TT As Double, _
ByRef TC As Double)
TT = Timer
Debug.Print Title, TT - TC, TT - TI
TC = TT
End Sub
Results
- Formula:
=C2
- Data: 10k random integers, 10k unique integers, 200k rows
Set Range: 0.001953125 0.001953125
Backup column: 0.4609375 0.462890625
Turn off screen updating: 0.005859375 0.46875
Sort column: 0.435546875 0.904296875
Remove duplicates: 0.662109375 1.56640625
Write to array: 0.056640625 1.623046875
Populate list box: 0.015625 1.638671875
Write back column: 0.341796875 1.98046875
Turn on screen updating: 0.013671875 1.994140625
Results Without Screen Updating
Set Range: 0.00390625 0.00390625
Backup column: 0.501953125 0.505859375
Sort column: 0.484375 0.990234375
Remove duplicates: 0.724609375 1.71484375
Write to array: 0.060546875 1.775390625
Populate list box: 0.015625 1.791015625
Write back column: 0.337890625 2.12890625