How do I find duplicate ID numbers in a imported text file and the program needs to output the duplicate ID and the lines with the same ID and the amount of duplicate IDs. I currently have the strings match with themselves and I need them to match with another that has the same string ID.
The string ID is the first element of each line
The text file is:
7101003,Mike,23 boinig road,2615,48000,12000,0
7201003,Jane Philips,29 boinig cresent,2616,47000,12000,0
7301003,Philip Jane,23 bong road,2615,49000,000,0
7401004,Peta,23 bong bong road,2615,148000,19000,0
7101205,Abdulla,23 Station st,2615,80000,21000,0
7201003,Mary,47 doing st,2615,76000,44000,0
Current Code:
#include <iostream>
#include <string>
#include <vector>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <sstream>
using namespace std;
string ffname = "C:\\Users\\chuboi\\Documents\\ST2TaxExample\\Taxpayerdata.txt";
struct TaxPayer {
// Data part
std::string taxpayerId{};
std::string name{};
std::string address{};
std::string postcode{};
int salary{};
int deductions{};
double taxDue{};
// Extractor
friend std::istream& operator >> (std::istream& is, TaxPayer& tp) {
// Read a complete line
std::string line{};
if (std::getline(is, line) and not line.empty()) {
// Put it into a stringstream for further extraction
std::istringstream iss{ line };
// Now, extract the data parts
std::getline(iss, tp.taxpayerId, ',');
std::getline(iss, tp.name, ',');
std::getline(iss, tp.address, ',');
std::getline(iss, tp.postcode, ',');
// Read and convert
std::getline(iss, line, ',');
tp.salary = std::stoi(line);
std::getline(iss, line, ',');
tp.deductions = std::stoi(line);
std::getline(iss, line, ',');
tp.taxDue = std::stod(line);
}
return is;
}
// Simple inserter
friend std::ostream& operator << (std::ostream& os, const TaxPayer& tp) {
return os << tp.taxpayerId << ' ' << tp.name << ' ' << tp.address << ' ' << tp.postcode
<< ' ' << tp.salary << ' ' << tp.deductions << ' ' << tp.taxDue << '\n';
}
};
struct TaxPayers {
// Data
std::vector<TaxPayer> taxPayers{};
string dupID(size_t i) {
string duID = taxPayers[i].taxpayerId;
return duID;
}
string dupallinfo(size_t i) {
string dallID = taxPayers[i].taxpayerId " " taxPayers[i].name " "
taxPayers[i].address " " taxPayers[i].postcode;
return dallID;
}
// Extractor
friend std::istream& operator >> (std::istream& is, TaxPayers& tp) {
// clar old data
tp.taxPayers.clear();
// Read all new existing data and store in vector
TaxPayer taxPayer{};
while (is >> taxPayer)
tp.taxPayers.push_back(taxPayer);
return is;
}
// Simple inserter
friend std::ostream& operator << (std::ostream& os, const TaxPayers& tp) {
for (const TaxPayer& taxPayer : tp.taxPayers)
os << taxPayer;
return os;
}
};
void opt_D()
{
std::ifstream sourceFile(ffname);
if (!sourceFile.is_open())
{
cout << "No file found, please check for files again" << endl;
return;
}
TaxPayers taxPayers{};
sourceFile >> taxPayers;
int x = 1;
for (size_t i = 0; i < taxPayers.taxPayers.size(); i)
{
if (taxPayers.dupID(i) == taxPayers.dupID(i))
{
cout << "Duplicate Report" << endl;
cout << "-------------------" << endl;
cout << "File: " << ffname << endl;
cout << "Count of Records : " << taxPayers.taxPayers.size() << endl;
cout << "Duplicate Account Found: Account number is: " << taxPayers.dupID(i) << endl;
cout << "Details 1 : " << taxPayers.dupallinfo(i) << endl;
cout << "Details 2 : " << taxPayers.dupallinfo(i) << "\n\n";
cout << "Total count of duplicate ID's found: " << (x) << endl;
x ;
}
}
}
int main()
{
opt_D();
}
The current output is:
Duplicate Report
-------------------
Count of Records : 6
Duplicate Account Found: Account number is: 7101003
Details 1 : 7101003 Mike 23 boinig road 2615
Details 2 : 7101003 Mike 23 boinig road 2615
Total count of duplicate ID's found: 1
Duplicate Report
-------------------
Count of Records : 6
Duplicate Account Found: Account number is: 7201003
Details 1 : 7201003 Jane Philips 29 boinig cresent 2616
Details 2 : 7201003 Jane Philips 29 boinig cresent 2616
Total count of duplicate ID's found: 2
Continues like this for the other sets of data
While the output needs to be:
Duplicate Report
Count of Records : 6
Duplicate Account Found: Account number is: 7201003
Details 1 : 7201003 Jane Philips 29 boinig cresent 2616
Details 2 : 7201003 Mary 47 doing st 2615
Total count of duplicate ID's found: 1
CodePudding user response:
First of all in opt_D() method you should find the duplicate dupIDs and insert them into a list. Then in order to give the output you should traverse the duplicate list and find corresponding vales from taxPayers.
// define a list for duplicate ids.
for (size_t i = 0; i < taxPayers.taxPayers.size(); i)
{
for (size_t j = i 1; j < taxPayers.taxPayers.size(); j)
{
if (taxPayers.dupID(i) == taxPayers.dupID(j))
{
// push taxPayers.dupID(i) to the duplicate id list
}
}
}
// print duplicate list size as output
// traverse the duplicate id list and find taxPayers having the same dupID from taxPayers list and print their info.