Home > Net >  VBA Collection created from a column of 200,000 records. How to optimize this?
VBA Collection created from a column of 200,000 records. How to optimize this?

Time:10-25

I am populating a userform list using a collection which pulls from a list in a separate worksheet with over 200,000 records (non unique). However it takes an hour to populate the list because of the number of records. Is there any way to optimize this and make the userform initialize faster, possibly by reducing the 200k records into a smaller list with only unique values before then trying to load it into the collection?

    LastRow = SourceSheet.Cells(Rows.Count, 3).End(xlUp).Row
    On Error Resume Next
    Set Coll = New Collection
    'Open a With structure for the ListBox control.
    With ClientInput
        .Clear
        For Each cell In SourceSheet.Range("B2:B" & LastRow)
            'Only attempt to populate cells containing a text or value.
            If Len(cell.Value) <> 0 Then
                Err.Clear
                Coll.Add cell.Text, cell.Text
                If Err.Number = 0 Then .AddItem cell.Text
                End If
        Next cell
            blnUnsorted = True
            Do
            blnUnsorted = False
            For i = 0 To UBound(.List) - 1
                If LCase(.List(i)) > LCase(.List(i   1)) Then
                    temp = .List(i)
                    .List(i) = .List(i   1)
                    .List(i   1) = temp
                    blnUnsorted = True
                    Exit For
                End If
            Next i
        Loop While blnUnsorted = True
    'Close the With structure for the ListBox control.
    End With

CodePudding user response:

Approx. 15 seconds for 200k rows with 10k unique values.

The sorting is the slowest part, so if you have fewer unique values that should be quicker.

Sub Tester()
    
    Dim data, col As Collection, r As Long, v, arr, SourceSheet As Worksheet, t
    
    Set SourceSheet = ActiveSheet 'for example
    t = Timer
    With SourceSheet
        data = .Range("B2:B" & .Cells(.Rows.Count, 3).End(xlUp).Row).Value
    End With
    Debug.Print UBound(data, 1)
    
    Set col = New Collection
    
    For r = 1 To UBound(data, 1)
        v = CStr(data(r, 1))
        If Len(v) > 0 Then
            On Error Resume Next 'ignore error on duplicate key
            col.Add v, v
            On Error GoTo 0 'stop ignoring errors
        End If
    Next r
    Debug.Print "collected", Timer - t
    arr = CollectionToArray(col) 'convert to array
    Debug.Print "in array", Timer - t
    ArraySort arr                'sort the array
    Debug.Print "sorted", Timer - t
    
    ClientInput.List = arr
    Debug.Print "done", Timer - t
    
    
End Sub

Function CollectionToArray(c As Collection) As Variant
    Dim a() As Variant, i As Long
    ReDim a(0 To c.Count - 1)
    For i = 0 To c.Count - 1
        a(i) = c.Item(i   1)
    Next
    CollectionToArray = a
End Function

Sub ArraySort(ByRef MyArray As Variant)
    Dim First As Long, Last As Long, i As Long, j As Long, temp
    First = LBound(MyArray)
    Last = UBound(MyArray)
    For i = First To Last - 1
        For j = i   1 To Last
            If MyArray(i) > MyArray(j) Then
                temp = MyArray(j)
                MyArray(j) = MyArray(i)
                MyArray(i) = temp
            End If
        Next j
    Next i
End Sub

CodePudding user response:

Populate List Box With Unique Column Values

The Method

Sub PopulateListBoxWithUniqueColumnValues()

    Const WORKSHEET_NAME As String = "Sheet1"
    Const FIRST_CELL_ADDRESS As String = "B2"
    Const COLUMN_CONTAINS_FORMULAE As Boolean = True
    
    Dim wb As Workbook: Set wb = ThisWorkbook ' workbook containing this code
    
    ' Set the non-empty column range ('srg').
    
    Dim ws As Worksheet: Set ws = wb.Worksheets(WORKSHEET_NAME)
    Dim fCell As Range: Set fCell = ws.Range(FIRST_CELL_ADDRESS)
    Dim srg As Range: Set srg = SetColumn(fCell)
    If srg Is Nothing Then Exit Sub ' no data
    
    ' Backup the column range values (formulae).

    Dim sData() As Variant
    If COLUMN_CONTAINS_FORMULAE Then
        sData = GetColumnRangeFormulae(srg)
        srg.Value = srg.Value
    Else
        sData = GetColumnRangeValues(srg)
    End If

    Application.ScreenUpdating = False
    
    srg.Sort srg, xlAscending, , , , , , xlNo
    
    srg.RemoveDuplicates 1, xlNo
    
    ' Write the sorted unique values from the column range
    ' to a 1D one-based array ('dArr').
    
    Dim drg As Range: Set drg = SetColumn(fCell)
    Dim dData() As Variant: dData = GetColumnRangeValues(drg)
    Dim dArr() As Variant: dArr = ColumnToArray(dData)
    
    ' Populate the list box.
    
    ClientInput.List = dArr
    
    ' Write back the backed-up values (formulae).
    
    srg.Value = sData

    Application.ScreenUpdating = True
    
    ' Inform.
        
    MsgBox "List box populated.", vbInformation

End Sub

The Helper Functions

Function SetColumn( _
    ByVal FirstCell As Range) _
As Range
    
    With FirstCell.Cells(1)
        Dim lCell As Range
        Set lCell = .Resize(.Worksheet.Rows.Count - .Row   1) _
            .Find("*", , xlFormulas, , , xlPrevious)
        If lCell Is Nothing Then Exit Function
        Set SetColumn = .Resize(lCell.Row - .Row   1)
    End With

End Function

Function GetColumnRangeValues( _
    ByVal rg As Range, _
    Optional ByVal ColumnNumber As Long = 1) _
As Variant
    
    With rg.Columns(ColumnNumber)
        If rg.Rows.Count = 1 Then
            Dim Data As Variant: ReDim Data(1 To 1, 1 To 1): Data(1, 1) = .Value
            GetColumnRangeValues = Data
        Else
            GetColumnRangeValues = .Value
        End If
    End With

End Function

Function GetColumnRangeFormulae( _
    ByVal rg As Range, _
    Optional ByVal ColumnNumber As Long = 1) _
As Variant
    
    With rg.Columns(ColumnNumber)
        If rg.Rows.Count = 1 Then
            Dim Data As Variant: ReDim Data(1 To 1, 1 To 1)
            Data(1, 1) = .Formula: GetColumnRangeFormulae = Data
        Else
            GetColumnRangeFormulae = .Formula
        End If
    End With

End Function

Function ColumnToArray( _
    Data() As Variant, _
    Optional ByVal ColumnIndex As Long = 1, _
    Optional ByVal ArrayLowerLimit As Long = 1) _
As Variant
    
    Dim rDiff As Long: rDiff = LBound(Data, 1) - ArrayLowerLimit
    Dim Arr() As Variant
    ReDim Arr(ArrayLowerLimit To UBound(Data, 1) - rDiff)
    
    Dim r As Long
    
    For r = LBound(Data, 1) To UBound(Data, 1)
        Arr(r - rDiff) = Data(r, ColumnIndex)
    Next r
    
    ColumnToArray = Arr

End Function

Benchmarking

Sub PopulateListBoxWithUniqueColumnValuesBenchMark()
Dim TI As Double: TI = Timer
Dim TT As Double: TT = TI
Dim TC As Double: TC = TI

    Const WORKSHEET_NAME As String = "Sheet1"
    Const FIRST_CELL_ADDRESS As String = "B2"
    Const COLUMN_CONTAINS_FORMULAE As Boolean = True
    
    Dim wb As Workbook: Set wb = ThisWorkbook ' workbook containing this code
    
    ' Set the non-empty column range ('srg').
    
    Dim ws As Worksheet: Set ws = wb.Worksheets(WORKSHEET_NAME)
    Dim fCell As Range: Set fCell = ws.Range(FIRST_CELL_ADDRESS)
    Dim srg As Range: Set srg = SetColumn(fCell)
    If srg Is Nothing Then Exit Sub ' no data
    
DebugPrintTimePassed "Set Range:                ", TI, TT, TC
    
    ' Backup the column range values (formulae).

    Dim sData() As Variant
    If COLUMN_CONTAINS_FORMULAE Then
        sData = GetColumnRangeFormulae(srg)
        srg.Value = srg.Value
    Else
        sData = GetColumnRangeValues(srg)
    End If
DebugPrintTimePassed "Backup column:            ", TI, TT, TC

    Application.ScreenUpdating = False
DebugPrintTimePassed "Turn off screen updating: ", TI, TT, TC
    
    srg.Sort srg, xlAscending, , , , , , xlNo
DebugPrintTimePassed "Sort column:              ", TI, TT, TC
    
    srg.RemoveDuplicates 1, xlNo
DebugPrintTimePassed "Remove duplicates:        ", TI, TT, TC
    
    ' Write the sorted unique values from the column range
    ' to a 1D one-based array ('dArr').
    
    Dim drg As Range: Set drg = SetColumn(fCell)
    Dim dData() As Variant: dData = GetColumnRangeValues(drg)
    Dim dArr() As Variant: dArr = ColumnToArray(dData)
DebugPrintTimePassed "Write to array:           ", TI, TT, TC
    
    ' Populate the list box.
    
    ClientInput.List = dArr
DebugPrintTimePassed "Populate list box:        ", TI, TT, TC
    
    ' Write back the backed-up values (formulae).
    
    srg.Value = sData
DebugPrintTimePassed "Write back column:        ", TI, TT, TC

    Application.ScreenUpdating = True
DebugPrintTimePassed "Turn on screen updating:  ", TI, TT, TC
    
    ' Inform.
        
    MsgBox "List box populated.", vbInformation

End Sub

Sub DebugPrintTimePassed( _
        ByVal Title As String, _
        ByRef TI As Double, _
        ByRef TT As Double, _
        ByRef TC As Double)
    
    TT = Timer
    Debug.Print Title, TT - TC, TT - TI
    TC = TT

End Sub

Results

  • Formula: =C2
  • Data: 10k random integers, 10k unique integers, 200k rows
Set Range:                   0.001953125   0.001953125 
Backup column:               0.4609375     0.462890625 
Turn off screen updating:    0.005859375   0.46875 
Sort column:                 0.435546875   0.904296875 
Remove duplicates:           0.662109375   1.56640625 
Write to array:              0.056640625   1.623046875 
Populate list box:           0.015625      1.638671875 
Write back column:           0.341796875   1.98046875 
Turn on screen updating:     0.013671875   1.994140625 

Results Without Screen Updating

Set Range:                   0.00390625    0.00390625 
Backup column:               0.501953125   0.505859375 
Sort column:                 0.484375      0.990234375 
Remove duplicates:           0.724609375   1.71484375 
Write to array:              0.060546875   1.775390625 
Populate list box:           0.015625      1.791015625 
Write back column:           0.337890625   2.12890625 
  • Related