Home > front end >  Can't iterate through all the words in thr file.txt
Can't iterate through all the words in thr file.txt

Time:02-02

I have a txt file which contains two txt file references ei. main.txt contains eg1.txt and eg2.txt and i have to access the content in them and find the occurences of every word and return a string with the word and the documents it was preasent in(0 being eg1.txt and 1 being eg2.txt). My program compiles but I can't get past the first word I encounter. It gives the right result (word: 0 1) since the word is preasent in both the files and in the first position but it doesn't return the other words. Could someone please help me find the error? Thank you


string func(string filename) {

map<string, set<int> > invInd;
string line, word;
int fileNum = 0;

ifstream list (filename, ifstream::in);
while (!list.eof()) {
  string fileName;
  getline(list, fileName);
  ifstream input_file(fileName, ifstream::in); //function to iterate through file                                                                                                                           

  if (input_file.is_open()) {

    while (getline(input_file, line)) {
      stringstream ss(line);

      while (ss >> word) {
        if (invInd.find(word) != invInd.end()) {
          set<int>&s_ref = invInd[word];
          s_ref.insert(fileNum);
        }
        else {
          set<int> s;
          s.insert(fileNum);
          invInd.insert(make_pair<string, set<int> >(string(word) , s));
        }
      }

    }
    input_file.close();
  }
  fileNum  ;
}

CodePudding user response:

Basically your function works. It is a little bit complicated, but i works.

After removing some syntax errors, the main problem is, that you do return nothing from you function. There is also no output statement.

Let me show you you the corrected function which shows some output.

#include <string>
#include <map>
#include <iostream>
#include <fstream>
#include <set>
#include <sstream>
#include <utility>

using namespace std;

void func(string filename) {

    map<string, set<int> > invInd;
    string line, word;
    int fileNum = 0;

    ifstream list(filename, ifstream::in);
    while (!list.eof()) {
        string fileName;
        getline(list, fileName);
        ifstream input_file(fileName, ifstream::in); //function to iterate through file                                                                                                                           

        if (input_file.is_open()) {

            while (getline(input_file, line)) {
                stringstream ss(line);

                while (ss >> word) {
                    if (invInd.find(word) != invInd.end()) {
                        set<int>& s_ref = invInd[word];
                        s_ref.insert(fileNum);
                    }
                    else {
                        set<int> s;
                        s.insert(fileNum);
                        invInd.insert(make_pair(string(word), s));
                    }
                }

            }
            input_file.close();
        }
        fileNum  ;
    }
    // Show the output
    for (const auto& [word, fileNumbers] : invInd) {
        std::cout << word << " : ";
        for (const int fileNumber : fileNumbers) std::cout << fileNumber << ' ';
        std::cout << '\n';
    }
    return;
}

int main() {
    func("files.txt");
}

This works, I tested it. But maybe you want to return the findings to your main function. Then you should write:

#include <string>
#include <map>
#include <iostream>
#include <fstream>
#include <set>
#include <sstream>
#include <utility>

using namespace std;

map<string, set<int> >  func(string filename) {

    map<string, set<int> > invInd;
    string line, word;
    int fileNum = 0;

    ifstream list(filename, ifstream::in);
    while (!list.eof()) {
        string fileName;
        getline(list, fileName);
        ifstream input_file(fileName, ifstream::in); //function to iterate through file                                                                                                                           

        if (input_file.is_open()) {

            while (getline(input_file, line)) {
                stringstream ss(line);

                while (ss >> word) {
                    if (invInd.find(word) != invInd.end()) {
                        set<int>& s_ref = invInd[word];
                        s_ref.insert(fileNum);
                    }
                    else {
                        set<int> s;
                        s.insert(fileNum);
                        invInd.insert(make_pair(string(word), s));
                    }
                }

            }
            input_file.close();
        }
        fileNum  ;
    }
    return invInd;
}

int main() {
    map<string, set<int>> data = func("files.txt");

    // Show the output
    for (const auto& [word, fileNumbers] : data) {
        std::cout << word << " : ";
        for (const int fileNumber : fileNumbers) std::cout << fileNumber << ' ';
        std::cout << '\n';
    }
}

Please enable C 17 in your compiler.

And please see below a brushed up solution. A little bit cleaner and compacter, with comments and better variable names.

#include <string>
#include <map>
#include <iostream>
#include <fstream>
#include <set>
#include <sstream>
#include <utility>

using WordFileIndicator = std::map<std::string, std::set<int>>;

WordFileIndicator getWordsWithFiles(const std::string& fileNameForFileLists) {

    // Here will stor the resulting output
    WordFileIndicator wordFileIndicator{};

    // Open the file and check, if it could be opened
    if (std::ifstream istreamForFileList{ fileNameForFileLists }; istreamForFileList) {

        // File number Reference
        int fileNumber{};

        // Read all filenames from the list of filenames
        for (std::string fileName{}; std::getline(istreamForFileList, fileName) and not fileName.empty();) {

            // Open the files to read their content. Check, if the file could be opened
            if (std::ifstream ifs{ fileName }; ifs) {

                // Add word and associated file number to set
                for (std::string word{}; ifs >> word; )
                    wordFileIndicator[word].insert(fileNumber);
            }
            else std::cerr << "\n*** Error: Could not open '" << fileName << "'\n\n";
            // Continue with next file
              fileNumber;
        }
    }
    else std::cerr << "\n*** Error: Could not open '" << fileNameForFileLists << "'\n\n";
    return wordFileIndicator;
}

// Some test code
int main() {

    // Get result. All words and in which file they exists
    WordFileIndicator data = getWordsWithFiles("files.txt");

    // Show the output
    for (const auto& [word, fileNumbers] : data) {
        std::cout << word << " : ";
        for (const int fileNumber : fileNumbers) std::cout << fileNumber << ' ';
        std::cout << '\n';
    }
}

There would be a much faster solution by using std::unordered_map and std::unordered_set

CodePudding user response:

Please make sure your code is composed from many small functions. This improves readability, it easier to reason what code does, in such form parts of code can be reused in alternative context.

Here is demo how it can looks like and why it is better to have small functions:

#include <algorithm>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <string>
#include <unordered_map>
#include <vector>

struct FileData
{
    std::filesystem::path path;
    int index;
};

bool operator==(const FileData& a, const FileData& b)
{
    return a.index == b.index && a.path == b.path;
}

bool operator!=(const FileData& a, const FileData& b)
{
    return !(a == b);
}

using WordLocations = std::unordered_map<std::string, std::vector<FileData>>;

template<typename T>
void mergeWordsFrom(WordLocations& loc, const FileData& fileData, T b, T e)
{
    for (; b != e;   b)
    {
        auto& v = loc[*b];
        if (v.empty() || v.back() != fileData)
            v.push_back(fileData);
    }
}

void mergeWordsFrom(WordLocations& loc, const FileData& fileData, std::istream& in)
{
    return mergeWordsFrom(loc, fileData, std::istream_iterator<std::string>{in}, {});
}

void mergeWordsFrom(WordLocations& loc, const FileData& fileData)
{
    std::ifstream f{fileData.path};
    return mergeWordsFrom(loc, fileData, f);
}

template<typename T>
WordLocations wordLocationsFromFileList(T b, T e)
{
    WordLocations loc;
    FileData fileData{{}, 0};
    for (; b != e;   b)
    {
          fileData.index;
        fileData.path = *b;
        mergeWordsFrom(loc, fileData);
    }
    return loc;
}

WordLocations wordLocationsFromFileList(std::istream& in)
{
    return wordLocationsFromFileList(std::istream_iterator<std::filesystem::path>{in}, {});
}

WordLocations wordLocationsFromFileList(const std::filesystem::path& p)
{
    std::ifstream f{p};
    f.exceptions(std::ifstream::badbit);
    return wordLocationsFromFileList(f);
}

void printLocations(std::ostream& out, const WordLocations& locations)
{
    for (auto& [word, filesData] : locations)
    {
        out << std::setw(10) << word << ": ";
        for (auto& file : filesData)
        {
            out << std::setw(3) << file.index << ':' << file.path << ", ";
        }
        out << '\n';
    }
}

int main()
{
    auto locations = wordLocationsFromFileList("files.txt");
    printLocations(std::cout, locations);
}

https://wandbox.org/permlink/nBbqYV986EsqvN3t

  •  Tags:  
  • Related