I have two huge workbooks (old & new) of annual employee data and trying to compare. Each workbook has the same header and employees are in random order. Here is what I'm trying to accomplish:
- Use employee ID (in column D) as reference and compare if they’ve changed information, specially Physician (in column L).
- Generate report highlight the different cell with added column (Change Information “Yes/No”) if there are changes or not.
Problem: This code compare cell by cell only (took a lot of time) and not per employee id how could I insert here the looping of employee id? I am newbie in VBA. Any guidance on how I should go about this? Thanks.
Sub compare2Worksheets()
Dim ws1row As Long, ws2row As Long, ws1col As Integer, ws2col As Integer
Dim maxrow As Long, maxcol As Integer, colval1 As String, colval2 As String
Dim report As Workbook, difference As Long
Dim row As Long, col As Integer
Dim ws1 As Workbooks
Dim ws2 As Workbooks
Set report = Workbooks.Add
'range of Data1
Set ws1 = ThisWorkbook.Worksheets(“Data1”)
With ws1.UsedRange
ws1row = .Rows.Count
ws1col = .Columns.Count
End With
'range of Data2
Set ws2 = myworkbook.Worksheets(“Data2”)
With ws2.UsedRange
ws2row = .Rows.Count
ws2col = .Columns.Count
End With
maxrow = ws1row
maxcol = ws1col
If maxrow < ws2row Then maxrow = ws2row
If maxcol < ws2col Then maxcol = ws2col
'generate report
report.Worksheets(“Sheet1”).Activate
Cells.Clear
Range(“A1”) = “FirstName”
Range(“B1”) = “LastName”
Range(“C1”) = “DOB”
Range(“D1”) = “EmployeeID”
Range(“E1”) = “Address”
Range(“F1”) = “Emailadd”
Range(“G1”) = “Mobilenumber”
Range(“H1”) = “DeptID”
Range(“I1”) = “DeptName”
Range(“J1”) = “Position”
Range(“K1”) = “Status”
Range(“L1”) = “Physician”
Range(“M1”) = “Change InformationY / N”
erow = Data1.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).row
'look for differences
difference = 0
For col = 1 To maxcol
For row = 1 To maxrow
colval1 = ws1.Cells(row, col)
colval2 = ws2.Cells(row, col)
If colval1 <> colval2 Then
difference = difference 1
'not matched display and highlight
Cells(row, col) = colval1 & “ <> ” & colval2
Cells(row, col).Interior.Color = 255
Cells(row, col).Font.ColorIndex = 2
Cells(row, col).Font.Bold = True
'to update “Change InformationY / N”
Cells(row 1, 13).Value = "Yes"
Else
Cells(row, col) = colval2
Cells(row 1, 13).Value = "No"
End If
Next row
Next col
'saving report
If difference > 0 Then
Columns("A:B").ColumnWidth = 25
myfilename = InputBox("Enter Filename")
myfilename = myfilename & “.xlsx”
ActiveWorkbook.SaveAs Filename:=myfilename
End If
End Sub
CodePudding user response:
I would do the following here:
First I would create an array for the EmployeeID and the rows I found them in both sheets.
For that I need to declare a RecordType (has to be defined at the beginning of the module, not in the procedure!) I assume, that you have less than 1024 employees to handle, if more, simply use a higher value in the Dim-Statement. I also assume, that the Employee-Id is a string, otherwise you have to use 'Long' or 'Integer' instead
Type EmpRowRec
EmpId as string
Row1 as Long
Row2 as Long
End Type
Dim EmpRowArr(1 to 1024) as EmpRowRec, EmpRowCnt as integer
Then I would go through both sheets and search for the row with the data for an employee:
Dim CurRow as long, CurEmpRow as integer,EmpRowOut as integer
…
EmpRowCnt=0
For CurRow = 2 to ws1Row
Colval1=ws1.cells(currow,4).value
EmpRowCnt=EmpRowCnt 1
EmpRowArr(EmpRowCnt).EmpId=colval1
EmpRowArr(EmpRowCnt).row1=CurRow
Next CurRow
For CurRow = 2 to ws2Row
Colval1=ws2.cells(currow,4).value
EmpRowOut=0
For CurEmpRow=1 to EmpRowCnt
If EmpRowArr(CurEmpRow).EmpId=ColVal1 then EmpRowOut=0:Exit For
Next CurEmpRow
If EmpRowOut=0 then ' Employee is only in sheet 2
EmpRowCnt=EmpRowCnt 1
EmpRowArr(EmpRowCnt).EmpId=colval1
EmpRowArr(EmpRowCnt).row2=CurRow
else
EmpRowArr(EmpRowOut).row2=CurRow
End If
Next CurRow
Now you can go through the array and create your report:
Currow =1 'You already copied the head values
For CurEmpRow=1 to EmpRowCnt
with EmpRowArr(CurEmpRow)
If (.row1>0) and (.row2>0) then 'your result will show only employees in both sheets
Currow=currow 1
For col=1 to maxcol
Colval1=ws1.cells(.row1,col).value
Colval2=ws1.cells(.row2,col).value
Report.cells(currow,col).value=colval1
If colval1<>colval2 then report.cells(currow,col).interior.color=rgb(255,200,200)
Next col
End if
End with
Next CurEmpRow
This method shall show you a generic way to solve such a problem (I have to deal very often with). For sure adaptions e.g. how to handle employees appearing only in one sheet, marking changes with low or high impact are needed, but here I can't help you since I don't know your exact requests.
Due to the fact, that I wrote this text only in word I could not test the fragments under VBA, so some small errors may occur. Please try to fix it.
CodePudding user response:
This is the code with your logic:
Type EmpRowRec
EmpId As String
Row1 As Long
Row2 As Long
End Type
Sub compare2Worksheets()
Dim ws1row As Long, ws2row As Long, ws1col As Integer, ws2col As Integer
Dim maxrow As Long, maxcol As Integer, colval1 As String, colval2 As String
Dim report As Workbook, difference As Long
Dim row As Long, col As Integer
Dim CurRow As Long, CurEmpRow As Integer, EmpRowOut As Integer
Dim wbkA As Workbook, wbkB As Workbook
Dim EmpRowArr(1 To 1024) As EmpRowRec, EmpRowCnt As Integer
'get worksheets from the workbooks
Set wbkA = Workbooks("Data1")
Set ws1 = wbkA.Worksheets("Data1")
'range of Data1
With ws1.UsedRange
ws1row = .Rows.Count
ws1col = .Columns.Count
End With
Set wbkB = Workbooks("Data2")
Set ws2 = wbkB.Worksheets("Data2")
'range of Data2
With ws2.UsedRange
ws2row = .Rows.Count
ws2col = .Columns.Count
End With
maxrow = ws1row
maxcol = ws1col
If maxrow < ws2row Then maxrow = ws2row
If maxcol < ws2col Then maxcol = ws2col
'generate report workbook
Set report = Workbooks.Add
report.Worksheets("Sheet1").Activate
Cells.Clear
Range(“A1”) = “FirstName”
Range(“B1”) = “LastName”
Range(“C1”) = “DOB”
Range(“D1”) = “EmployeeID”
Range(“E1”) = “Address”
Range(“F1”) = “Emailadd”
Range(“G1”) = “Mobilenumber”
Range(“H1”) = “DeptID”
Range(“I1”) = “DeptName”
Range(“J1”) = “Position”
Range(“K1”) = “Status”
Range(“L1”) = “Physician”
Range(“M1”) = “Change InformationY / N”
erow = Sheet1.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).row
'go through both sheets and search for the row with the data for an employee
EmpRowCnt = 0
For CurRow = 2 To maxrow 'ws1row
colval1 = ws1.Cells(CurRow, 4).Value
EmpRowCnt = EmpRowCnt 1
EmpRowArr(EmpRowCnt).EmpId = colval1
EmpRowArr(EmpRowCnt).Row1 = CurRow
Next CurRow
For CurRow = 2 To maxrow 'ws2row
colval1 = ws2.Cells(CurRow, 4).Value
EmpRowOut = 0
For CurEmpRow = 1 To EmpRowCnt
If EmpRowArr(CurEmpRow).EmpId = colval1 Then EmpRowOut = 0: Exit For
Next CurEmpRow
If EmpRowOut = 0 Then ' Employee is only in sheet 2
EmpRowCnt = EmpRowCnt 1
EmpRowArr(EmpRowCnt).EmpId = colval1
EmpRowArr(EmpRowCnt).Row2 = CurRow
Else
EmpRowArr(EmpRowOut).Row2 = CurRow
End If
Next CurRow
'go through the array and create your report
CurRow = 1 'You already copied the head values
For CurEmpRow = 1 To EmpRowCnt
With EmpRowArr(CurEmpRow)
If (.Row1 > 0) And (.Row2 > 0) Then 'your result will show only employees in both sheets
CurRow = CurRow 1
For col = 1 To maxcol
colval1 = ws1.Cells(.Row1, col).Value
colval2 = ws1.Cells(.Row2, col).Value
report.Cells(CurRow, col).Value = colval1
If colval1 <> colval2 Then report.Cells(CurRow, col).Interior.Color = RGB(255, 200, 200)
Next col
End If
End With
Next CurEmpRow
If CurRow > 0 Then
Columns("A:Y").ColumnWidth = 25
myfilename = InputBox("Enter Filename")
myfilename = myfilename & “.xlsx”
ActiveWorkbook.SaveAs Filename:=myfilename
End If
End Sub
CodePudding user response:
Use a Dictionary as a lookup table for the row number of each ID on the old data sheet. Then scan down the new sheet comparing rows with the same ID. IDs that appear on the new sheet but not the old are tagged "added". Those on the old sheet but not the new are tagged "deleted".
Option Explicit
Sub compare2Worksheets()
' config
Const COL_ID = "D"
Const COLS = 12 ' header col A to L
Dim wb1 As Workbook, wb2 As Workbook, wbRep As Workbook
Dim ws1 As Worksheet, ws2 As Worksheet, wsRep As Worksheet
Dim LastRow As Long, c As Long, i As Long, r As Long, n As Long
Dim bDiff As Boolean, t0 As Single
t0 = Timer
Dim dict As Object, key As String
Set dict = CreateObject("Scripting.Dictionary")
'range of Data1
Set wb1 = ThisWorkbook
Set wb2 = ThisWorkbook ' or other
Set ws1 = wb1.Sheets("Data1") ' old data
Set ws2 = wb1.Sheets("Data2") ' new data
' build lookup from data1
With ws1
LastRow = .Cells(.Rows.Count, COL_ID).End(xlUp).row
For i = 2 To LastRow
key = Trim(.Cells(i, COL_ID))
If dict.exists(key) Then
MsgBox "Duplicate ID " & key, vbCritical, .Name & " Row " & i
Exit Sub
ElseIf Len(key) > 0 Then
dict.Add key, i
End If
Next
End With
' format report sheet
Set wbRep = Workbooks.Add(1)
Set wsRep = wbRep.Sheets(1)
wsRep.Name = "Created " & Format(Now, "YYYY-MM-DD HHMMSS")
wsRep.Cells.Clear
ws1.Range("A1").Resize(, COLS).Copy wsRep.Range("A1")
wsRep.Cells(1, COLS 1) = "Change InformationY / N"
' copare data2 new data to data1 old data
' copy diff to report
Application.ScreenUpdating = False
With ws2
LastRow = .Cells(.Rows.Count, COL_ID).End(xlUp).row
For i = 2 To LastRow
key = Trim(.Cells(i, COL_ID))
wsRep.Cells(i, COL_ID) = key
If dict.exists(key) Then
r = dict(key)
dict.Remove key ' remove
' check columns in row
bDiff = False
For c = 1 To COLS
If .Cells(i, c) <> ws1.Cells(r, c) Then
With wsRep.Cells(i, c)
.Value = ws2.Cells(i, c) & "<>" & ws1.Cells(r, c)
.Interior.Color = 255
.Font.ColorIndex = 2
.Font.Bold = True
End With
bDiff = True
End If
Next
If bDiff Then
wsRep.Cells(i, COLS 1) = "Yes"
n = n 1
Else
wsRep.Cells(i, COLS 1) = "No"
End If
Else
' copy all
.Cells(i, 1).Resize(, COLS).Copy wsRep.Cells(i, 1)
wsRep.Cells(i, COLS 1) = "Added"
n = n 1
End If
Next
End With
' keys remaining
Dim k
With ws1
For Each k In dict.keys
r = dict(k)
.Cells(r, 1).Resize(, COLS).Copy wsRep.Cells(i, 1)
wsRep.Cells(i, COL_ID) = k
wsRep.Cells(i, COLS 1) = "Deleted"
i = i 1
n = n 1
Next
End With
Application.ScreenUpdating = True
Dim s As String, yn
wsRep.Columns("A:M").AutoFit
yn = MsgBox(n & " lines differ, save report Y/N ?", vbYesNo, _
Format(Timer - t0, "0.0 secs"))
If yn = vbYes Then
s = InputBox("Enter Filename")
wbRep.SaveAs Filename:=s & ".xlsx"
End If
wbRep.Close False
End Sub