Home > Enterprise >  Find duplicate strings and match them together from a imported data file? C
Find duplicate strings and match them together from a imported data file? C

Time:04-04

How do I find duplicate ID numbers in a imported text file and the program needs to output the duplicate ID and the lines with the same ID and the amount of duplicate IDs. I currently have the strings match with themselves and I need them to match with another that has the same string ID.

The string ID is the first element of each line

The text file is:

7101003,Mike,23 boinig road,2615,48000,12000,0

7201003,Jane Philips,29 boinig cresent,2616,47000,12000,0

7301003,Philip Jane,23 bong road,2615,49000,000,0

7401004,Peta,23 bong bong road,2615,148000,19000,0

7101205,Abdulla,23 Station st,2615,80000,21000,0

7201003,Mary,47 doing st,2615,76000,44000,0

Current Code:

#include <iostream>
#include <string>
#include <vector>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <sstream>

using namespace std;

string ffname = "C:\\Users\\chuboi\\Documents\\ST2TaxExample\\Taxpayerdata.txt";

struct TaxPayer {
    // Data part
    std::string taxpayerId{};
    std::string name{};
    std::string address{};
    std::string postcode{};
    int salary{};
    int deductions{};
    double taxDue{};

    // Extractor
    friend std::istream& operator >> (std::istream& is, TaxPayer& tp) {
        // Read a complete line
        std::string line{};
        if (std::getline(is, line) and not line.empty()) {


            // Put it into a stringstream for further extraction
            std::istringstream iss{ line };

            // Now, extract the data parts
            std::getline(iss, tp.taxpayerId, ',');
            std::getline(iss, tp.name, ',');
            std::getline(iss, tp.address, ',');
            std::getline(iss, tp.postcode, ',');

            // Read and convert
            std::getline(iss, line, ',');
            tp.salary = std::stoi(line);

            std::getline(iss, line, ',');
            tp.deductions = std::stoi(line);

            std::getline(iss, line, ',');
            tp.taxDue = std::stod(line);
        }
        return is;
    }
    // Simple inserter
    friend std::ostream& operator << (std::ostream& os, const TaxPayer& tp) {
        return os << tp.taxpayerId << ' ' << tp.name << ' ' << tp.address << ' ' << tp.postcode
            << ' ' << tp.salary << ' ' << tp.deductions << ' ' << tp.taxDue << '\n';
    }

};
struct TaxPayers {
    // Data
    std::vector<TaxPayer> taxPayers{};

    string dupID(size_t i) {
        string duID = taxPayers[i].taxpayerId;
        return duID;
    }

    string dupallinfo(size_t i) {
        string dallID = taxPayers[i].taxpayerId   " "   taxPayers[i].name   " "  
            taxPayers[i].address   " "   taxPayers[i].postcode;
        return dallID;
    }



    // Extractor
    friend std::istream& operator >> (std::istream& is, TaxPayers& tp) {
        // clar old data
        tp.taxPayers.clear();


        // Read all new existing data and store in vector
        TaxPayer taxPayer{};

        while (is >> taxPayer)
            tp.taxPayers.push_back(taxPayer);

        return is;
    }
    // Simple inserter
    friend std::ostream& operator << (std::ostream& os, const TaxPayers& tp) {
        for (const TaxPayer& taxPayer : tp.taxPayers)
            os << taxPayer;
        return os;
    }


};



void opt_D()
{
    std::ifstream sourceFile(ffname);

    if (!sourceFile.is_open())
    {
        cout << "No file found, please check for files again" << endl;
        return;
    }

    TaxPayers taxPayers{};
    sourceFile >> taxPayers;
    int x = 1;

    for (size_t i = 0; i < taxPayers.taxPayers.size();   i)
    {
        if (taxPayers.dupID(i) == taxPayers.dupID(i))
        {
            cout << "Duplicate Report" << endl;
            cout << "-------------------" << endl;
            cout << "File: " << ffname << endl;
            cout << "Count of Records  : " << taxPayers.taxPayers.size() << endl;
            cout << "Duplicate Account Found: Account number is: " << taxPayers.dupID(i) << endl;
            cout << "Details 1       : " << taxPayers.dupallinfo(i) << endl;
            cout << "Details 2       : " << taxPayers.dupallinfo(i) << "\n\n";
            cout << "Total count of duplicate ID's found: " << (x) << endl;
            x  ;
        }
    }

   
}


int main()
{
    opt_D();

}

The current output is:

Duplicate Report

-------------------
Count of Records  : 6

Duplicate Account Found: Account number is: 7101003

Details 1       : 7101003 Mike 23 boinig road 2615

Details 2       : 7101003 Mike 23 boinig road 2615

Total count of duplicate ID's found: 1

Duplicate Report

-------------------
Count of Records  : 6

Duplicate Account Found: Account number is: 7201003

Details 1       : 7201003 Jane Philips 29 boinig cresent 2616

Details 2       : 7201003 Jane Philips 29 boinig cresent 2616

Total count of duplicate ID's found: 2

Continues like this for the other sets of data


While the output needs to be:

Duplicate Report

Count of Records : 6

Duplicate Account Found: Account number is: 7201003

Details 1 : 7201003 Jane Philips 29 boinig cresent 2616

Details 2 : 7201003 Mary 47 doing st 2615

Total count of duplicate ID's found: 1

CodePudding user response:

First of all in opt_D() method you should find the duplicate dupIDs and insert them into a list. Then in order to give the output you should traverse the duplicate list and find corresponding vales from taxPayers.

 // define a list for duplicate ids. 


for (size_t i = 0; i < taxPayers.taxPayers.size();   i)
    {
     for (size_t j = i 1; j < taxPayers.taxPayers.size();   j)
          {
            if (taxPayers.dupID(i) == taxPayers.dupID(j))
            {

             // push taxPayers.dupID(i) to the duplicate id list


           }
        }
    }


// print duplicate list size as output
// traverse the duplicate id list and find taxPayers having the same dupID from taxPayers list and print their info.
  •  Tags:  
  • c
  • Related