I currently have a CSV file with a single column and a billion rows of text - there's a lot of filler, fluff and unnecessary text in those rows, but there's also a repeating pattern that I want to check for and copy to another sheet.
The CSV looks something like this:
Column A |
---|
Fluff |
Above Value |
Filter Value |
Below Value |
Fluff |
I need to check for a specific string in the 'Filter Value' cell, and if that's a match populate a table in a different sheet with the Filter Value, Above Value and Below Value, looking like this:
Filter Values | Above Values | Below Values |
---|---|---|
FValue 1 | AValue 1 | BValue1 |
FValue 2 | AValue 2 | BValue2 |
... | ... | ... |
The code I currently have looks like this, but it's not putting the Above / Below values into the proper positions:
Sub CopyRecords()
Dim FilterCol As Range
Dim Filter As Range
Dim PasteCell As Range
Dim PasteCellAbove As Range
Dim PasteCellBelow As Range
' Clear Destination table for testing
ThisWorkbook.Sheets(2).Range("A2:C999").Clear
Set FilterCol = ThisWorkbook.Sheets(1).Range("A1:A999")
For Each Filter In FilterCol
If ThisWorkbook.Sheets(2).Range("A2") = "" Then
Set PasteCell = ThisWorkbook.Sheets(2).Range("A2")
Set PasteCellAbove = ThisWorkbook.Sheets(2).Range("B2")
Set PasteCellBelow = ThisWorkbook.Sheets(2).Range("C2")
Else
Set PasteCell = ThisWorkbook.Sheets(2).Range("A1").End(xlDown).Offset(1, 0)
Set PasteCellAbove = ThisWorkbook.Sheets(2).Range("B1").End(xlDown).Offset(1, 0)
Set PasteCellBelow = ThisWorkbook.Sheets(2).Range("C1").End(xlDown).Offset(1, 0)
End If
If Left(Filter, 5) = "Testo" Then
Range(Filter.End(xlToLeft), Filter.End(xlToRight)).Copy PasteCell
Range(Filter.Offset(1, 0), Filter.Offset(0, 0)).Copy PasteCellAbove
Range(Filter.Offset(-1, 0), Filter.Offset(0, 0)).Copy PasteCellBelow
End If
Next Filter
End Sub
Anyone able to lend a hand?
CodePudding user response:
Try this - wherever possible it's much faster to work with arrays of data.
Sub CopyRecords()
Dim data, r As Long, rwOut As Range, v
'get all data as an array
With ThisWorkbook.Sheets(1)
data = ThisWorkbook.Sheets(1).Range("A1:A" & _
.Cells(.Rows.Count, "A").End(xlUp).Row).Value
End With
With ThisWorkbook.Sheets(2) 'reporting sheet
.Range("A2:C999").Clear 'clear destination table
Set rwOut = .Range("A2:C2") 'first row of output
End With
For r = 2 To UBound(data, 1) - 1
v = Trim(data(r, 1))
If v Like "*email:*" Then
rwOut.Value = Array(v, data(r - 1, 1), data(r 1, 1)) 'write values
Set rwOut = rwOut.Offset(1, 0) 'next row down
End If
Next r
End Sub
CodePudding user response:
If you really have a billion rows in your input file, I don't think you will want to open that in an Excel sheet in order to process it.
Here is a solution that opens a TextStream object and reads the source file line by line rather than reading it all into memory.
It dumps the output into a new worksheet in the Excel file, but depending on how large your output is, I wonder if you ultimately might want to write that out to a CSV file instead.
Anyway, here is a potential solution. Note that I didn't do any parsing of the "Before" and "After" lines.
Option Explicit
Public Sub extractData()
Const sourceName As String = "c:\apps\excel\so demo\input.csv" 'change this as necessary
Const maxOutputRecs As Long = 10000000
Dim fso As Scripting.FileSystemObject
Dim ts As Scripting.TextStream
Dim before, after, cLine As String
Dim n, i As Long
Dim xlSheet As Excel.Worksheet
Dim rng As Excel.Range
Dim data() As Variant
ReDim data(1 To maxOutputRecs, 1 To 3)
'Add header line to output array
data(1, 1) = "Testo"
data(1, 2) = "Before"
data(1, 3) = "After"
Set fso = New Scripting.FileSystemObject
Set ts = fso.OpenTextFile(Filename:=sourceName, IOMode:=ForReading, Create:=False)
i = 0
n = 0
cLine = ""
'read through source file line by line
Do While Not ts.AtEndOfStream
i = i 1
before = cLine
cLine = ts.ReadLine
If VBA.Left(cLine, 5) = "Testo" Then
n = n 1
after = ts.ReadLine
data(n 1, 1) = cLine
data(n 1, 2) = before
data(n 1, 3) = after
cLine = after
End If
If n 1 = maxOutputRecs Then
'end loop - may want to throw an error or write to a log file or do something else
Exit Do
End If
Loop
ts.Close
data = redim2DArrayRows(data, n 1, 3)
'create a new worksheet for the output
Set xlSheet = ThisWorkbook.Worksheets.Add
xlSheet.Name = "output"
'define the output range in the worksheet based on array size
Set rng = xlSheet.Range( _
xlSheet.Cells(1, 1), _
xlSheet.Cells(UBound(data, 1), UBound(data, 2)) _
)
'Write data out to sheet
rng.Value = data
End Sub
Public Function redim2DArrayRows(ByRef sourceArray() As Variant, ByVal rowBound As Long, ByVal colBound As Long) As Variant()
Dim newArr() As Variant
Dim i As Long
Dim j As Long
ReDim newArr(LBound(sourceArray, 1) To rowBound, LBound(sourceArray, 2) To colBound)
For i = LBound(newArr, 1) To UBound(newArr, 1)
For j = LBound(newArr, 2) To UBound(newArr, 2)
newArr(i, j) = sourceArray(i, j)
Next j
Next i
redim2DArrayRows = newArr
End Function
CodePudding user response:
Extract Data Using FindNext
Option Explicit
Sub ExtractData()
Const ProcTitle As String = "Extract Data"
Const sCriteria As String = "Testo*" ' begins with ("*Testo*" contains)
Const cCount As Long = 3 ' don't change: it's the same for source and dest.
Dim wb As Workbook: Set wb = ThisWorkbook
' Source
Dim sws As Worksheet: Set sws = wb.Worksheets(1)
Dim srg As Range
' Either static...
Set srg = sws.Range("A2:A999") ' no cell above 'A1'
' ... or dynamic:
'Set srg = sws.Range("A2", sws.Cells(sws.Rows.Count, "A").End(xlUp))
Dim sCell As Range
Set sCell = srg.Find(sCriteria, srg.Cells(srg.Rows.Count), xlValues, xlPart)
If sCell Is Nothing Then Exit Sub
Dim FirstAddress As String: FirstAddress = sCell.Address
Dim sTemp As Variant: ReDim sTemp(1 To cCount)
' Destination
Dim dws As Worksheet: Set dws = wb.Worksheets(2)
Dim dCell As Range: Set dCell = dws.Range("A2")
Dim dColl As Collection: Set dColl = New Collection
' Write the 3 values to the Temp array and add the array to the collection.
Do
' Modify here, if you don't need the complete cell contents.
' Cell
sTemp(1) = sCell.Value
' Above
sTemp(2) = sCell.Offset(-1).Value
' Below
sTemp(3) = sCell.Offset(1).Value
dColl.Add sTemp
Set sCell = srg.FindNext(sCell)
Loop Until sCell.Address = FirstAddress
Dim drCount As Long: drCount = dColl.Count
Dim dData As Variant: ReDim dData(1 To drCount, 1 To cCount)
Dim Item As Variant
Dim r As Long
Dim c As Long
' Loop over the arrays in the collection and write the elements
' of each array to a row of the Destination array.
For Each Item In dColl
r = r 1
For c = 1 To cCount
dData(r, c) = Item(c)
Next c
Next Item
' Write the values of the Destination array to the Destination range.
Dim drg As Range: Set drg = dCell.Resize(drCount, cCount)
drg.Value = dData
' Clear the range below the Destination range.
Dim dcrg As Range: Set dcrg = drg.Resize( _
dws.Rows.Count - drg.Row - drCount 1).Offset(drCount)
dcrg.Clear
'Debug.Print drg.Address(0, 0), dcrg.Address(0, 0)
MsgBox "Done.", vbInformation, ProcTitle
End Sub