Home > Software engineering >  Loop through all cells, check if cell value is X then copy cells to different location
Loop through all cells, check if cell value is X then copy cells to different location

Time:10-14

I currently have a CSV file with a single column and a billion rows of text - there's a lot of filler, fluff and unnecessary text in those rows, but there's also a repeating pattern that I want to check for and copy to another sheet.

The CSV looks something like this:

Column A
Fluff
Above Value
Filter Value
Below Value
Fluff

I need to check for a specific string in the 'Filter Value' cell, and if that's a match populate a table in a different sheet with the Filter Value, Above Value and Below Value, looking like this:

Filter Values Above Values Below Values
FValue 1 AValue 1 BValue1
FValue 2 AValue 2 BValue2
... ... ...

The code I currently have looks like this, but it's not putting the Above / Below values into the proper positions:

Sub CopyRecords()

Dim FilterCol As Range
Dim Filter As Range
Dim PasteCell As Range
Dim PasteCellAbove As Range
Dim PasteCellBelow As Range

' Clear Destination table for testing
ThisWorkbook.Sheets(2).Range("A2:C999").Clear

Set FilterCol = ThisWorkbook.Sheets(1).Range("A1:A999")

For Each Filter In FilterCol

    If ThisWorkbook.Sheets(2).Range("A2") = "" Then
        Set PasteCell = ThisWorkbook.Sheets(2).Range("A2")
        Set PasteCellAbove = ThisWorkbook.Sheets(2).Range("B2")
        Set PasteCellBelow = ThisWorkbook.Sheets(2).Range("C2")
    Else
        Set PasteCell = ThisWorkbook.Sheets(2).Range("A1").End(xlDown).Offset(1, 0)
        Set PasteCellAbove = ThisWorkbook.Sheets(2).Range("B1").End(xlDown).Offset(1, 0)
        Set PasteCellBelow = ThisWorkbook.Sheets(2).Range("C1").End(xlDown).Offset(1, 0)
    End If

    If Left(Filter, 5) = "Testo" Then
        Range(Filter.End(xlToLeft), Filter.End(xlToRight)).Copy PasteCell
        Range(Filter.Offset(1, 0), Filter.Offset(0, 0)).Copy PasteCellAbove
        Range(Filter.Offset(-1, 0), Filter.Offset(0, 0)).Copy PasteCellBelow
    End If
Next Filter
        
End Sub

Anyone able to lend a hand?

CodePudding user response:

Try this - wherever possible it's much faster to work with arrays of data.

Sub CopyRecords()

    Dim data, r As Long, rwOut As Range, v
    
    'get all data as an array
    With ThisWorkbook.Sheets(1)
        data = ThisWorkbook.Sheets(1).Range("A1:A" & _
                   .Cells(.Rows.Count, "A").End(xlUp).Row).Value
    End With
    
    With ThisWorkbook.Sheets(2)        'reporting sheet
        .Range("A2:C999").Clear        'clear destination table
        Set rwOut = .Range("A2:C2")    'first row of output
    End With
    
    For r = 2 To UBound(data, 1) - 1
        v = Trim(data(r, 1))
        If v Like "*email:*" Then
            rwOut.Value = Array(v, data(r - 1, 1), data(r   1, 1)) 'write values
            Set rwOut = rwOut.Offset(1, 0)                         'next row down
        End If
    Next r
End Sub

CodePudding user response:

If you really have a billion rows in your input file, I don't think you will want to open that in an Excel sheet in order to process it.
Here is a solution that opens a TextStream object and reads the source file line by line rather than reading it all into memory.
It dumps the output into a new worksheet in the Excel file, but depending on how large your output is, I wonder if you ultimately might want to write that out to a CSV file instead.

Anyway, here is a potential solution. Note that I didn't do any parsing of the "Before" and "After" lines.

Option Explicit


Public Sub extractData()
    Const sourceName As String = "c:\apps\excel\so demo\input.csv" 'change this as necessary
    Const maxOutputRecs As Long = 10000000
    
    Dim fso As Scripting.FileSystemObject
    Dim ts As Scripting.TextStream
    Dim before, after, cLine As String
    Dim n, i As Long
    Dim xlSheet As Excel.Worksheet
    Dim rng As Excel.Range
    
    
    Dim data() As Variant
    ReDim data(1 To maxOutputRecs, 1 To 3)
    
    'Add header line to output array
    data(1, 1) = "Testo"
    data(1, 2) = "Before"
    data(1, 3) = "After"
    
    Set fso = New Scripting.FileSystemObject
    Set ts = fso.OpenTextFile(Filename:=sourceName, IOMode:=ForReading, Create:=False)

    i = 0
    n = 0
    cLine = ""
    'read through source file line by line
    Do While Not ts.AtEndOfStream
        i = i   1
        before = cLine
        cLine = ts.ReadLine
        If VBA.Left(cLine, 5) = "Testo" Then
            n = n   1
            after = ts.ReadLine
            data(n   1, 1) = cLine
            data(n   1, 2) = before
            data(n   1, 3) = after
            cLine = after
        End If
        
        If n   1 = maxOutputRecs Then
            'end loop - may want to throw an error or write to a log file or do something else
            Exit Do
        End If
    Loop
    
    ts.Close
    
    data = redim2DArrayRows(data, n   1, 3)
    
    'create a new worksheet for the output
    Set xlSheet = ThisWorkbook.Worksheets.Add
    xlSheet.Name = "output"
    'define the output range in the worksheet based on array size
    Set rng = xlSheet.Range( _
        xlSheet.Cells(1, 1), _
        xlSheet.Cells(UBound(data, 1), UBound(data, 2)) _
        )
    'Write data out to sheet
    rng.Value = data
    
    
End Sub



Public Function redim2DArrayRows(ByRef sourceArray() As Variant, ByVal rowBound As Long, ByVal colBound As Long) As Variant()
    Dim newArr() As Variant
    Dim i As Long
    Dim j As Long
    
    ReDim newArr(LBound(sourceArray, 1) To rowBound, LBound(sourceArray, 2) To colBound)
    For i = LBound(newArr, 1) To UBound(newArr, 1)
        For j = LBound(newArr, 2) To UBound(newArr, 2)
            newArr(i, j) = sourceArray(i, j)
        Next j
    Next i
    redim2DArrayRows = newArr
End Function

CodePudding user response:

Extract Data Using FindNext

Option Explicit

Sub ExtractData()
    Const ProcTitle As String = "Extract Data"
    
    Const sCriteria As String = "Testo*" ' begins with ("*Testo*" contains)
    Const cCount As Long = 3 ' don't change: it's the same for source and dest.
    
    Dim wb As Workbook: Set wb = ThisWorkbook
    
    ' Source
    
    Dim sws As Worksheet: Set sws = wb.Worksheets(1)
    Dim srg As Range
    ' Either static...
    Set srg = sws.Range("A2:A999") ' no cell above 'A1'
    ' ... or dynamic:
    'Set srg = sws.Range("A2", sws.Cells(sws.Rows.Count, "A").End(xlUp))
    
    Dim sCell As Range
    Set sCell = srg.Find(sCriteria, srg.Cells(srg.Rows.Count), xlValues, xlPart)
    If sCell Is Nothing Then Exit Sub
    
    Dim FirstAddress As String: FirstAddress = sCell.Address
    Dim sTemp As Variant: ReDim sTemp(1 To cCount)
    
    ' Destination
    
    Dim dws As Worksheet: Set dws = wb.Worksheets(2)
    Dim dCell As Range: Set dCell = dws.Range("A2")
    Dim dColl As Collection: Set dColl = New Collection
    
    
    ' Write the 3 values to the Temp array and add the array to the collection.
    Do
        ' Modify here, if you don't need the complete cell contents.
        ' Cell
        sTemp(1) = sCell.Value
        ' Above
        sTemp(2) = sCell.Offset(-1).Value
        ' Below
        sTemp(3) = sCell.Offset(1).Value
        
        dColl.Add sTemp
        Set sCell = srg.FindNext(sCell)
    
    Loop Until sCell.Address = FirstAddress
    
    Dim drCount As Long: drCount = dColl.Count
    Dim dData As Variant: ReDim dData(1 To drCount, 1 To cCount)
    
    Dim Item As Variant
    Dim r As Long
    Dim c As Long
    
    ' Loop over the arrays in the collection and write the elements
    ' of each array to a row of the Destination array.
    For Each Item In dColl
        r = r   1
        For c = 1 To cCount
            dData(r, c) = Item(c)
        Next c
    Next Item
    
    ' Write the values of the Destination array to the Destination range.
    Dim drg As Range: Set drg = dCell.Resize(drCount, cCount)
    drg.Value = dData
    
    ' Clear the range below the Destination range.
    Dim dcrg As Range: Set dcrg = drg.Resize( _
        dws.Rows.Count - drg.Row - drCount   1).Offset(drCount)
    dcrg.Clear
    'Debug.Print drg.Address(0, 0), dcrg.Address(0, 0)
    
    MsgBox "Done.", vbInformation, ProcTitle

End Sub
  • Related