Home > OS >  Metadata Like Config file parser
Metadata Like Config file parser

Time:10-04

i am trying to parse this config file ...

MODEL: "modelname1" { FILEPATH = "FILEPATH1"; TEXTUREPATH = "TEXTUREPATH1"; NORMALPATH = "NORMALPATH1"; }
MODEL:"modelname2"{FILEPATH = "FILEPATH2";TEXTUREPATH = "TEXTUREPATH2";NORMALPATH = "NORMALPATH2";}

here is my attempt :

#include <iostream>
#include <map>
#include <fstream>
#include <vector>
#include <string>

using namespace std;


struct ModelData
{
    string tagName;
    string filePath;
    string texturePath;
    string normalPath;
};

vector<ModelData> g_modelData;

void GetStringValue( string _source, string _tagName, string& _outStrVal )
{
    size_t equalPos = _source.find_first_of('=');
    string tagName = _source.substr(0, equalPos);
    tagName.erase(remove(tagName.begin(), tagName.end(), '"'), tagName.end());

    if (tagName == _tagName)
    {
        _source = _source.substr(equalPos   1);
        _source.erase(remove(_source.begin(), _source.end(), '"'), _source.end());

        _outStrVal = _source;
    }
}

int main()
{
    ifstream infile("modeldata.txt", ios::in);
    if (!infile.good())
    {
        cout << "Error opening file!" << endl;
    }

    string line;
    line.resize(1024);

    while (infile.getline((char*)line.data(), line.size(), '\n'))
    {
        line.erase(remove(line.begin(), line.end(), ' '), line.end());
        line.erase(remove(line.begin(), line.end(), '\t'), line.end());

        size_t colonPos = line.find_first_of(':');
        string tagStr = line.substr(0, colonPos);

        if (tagStr == "MODEL")
        {
            ModelData md;

            string tagValueStr = line.substr(colonPos   1);
            size_t tagNamePos = tagValueStr.find_first_of('{');
            string tagName = tagValueStr.substr(0, tagNamePos);
            tagName.erase(remove(tagName.begin(), tagName.end(), '"'), tagName.end());

            md.tagName = tagName;

            tagValueStr = tagValueStr.substr(tagNamePos   1);

            size_t tagValueTerminatingPos = tagValueStr.find_first_of('}');

            tagValueStr = tagValueStr.substr(0, tagValueTerminatingPos);

            string temp;
            char context[1024];
            memset( context,0, 1024 );

            temp = strtok_s(&tagValueStr[0], ";", (char**)&context);

            GetStringValue(temp, "FILEPATH", md.filePath);

            temp = strtok_s(nullptr, ";", (char**)&context);
 
            GetStringValue(temp, "TEXTUREPATH", md.texturePath);

            temp = strtok_s(nullptr, ";", (char**)&context);

            GetStringValue(temp, "NORMALPATH", md.normalPath);

            g_modelData.push_back(md);
        }
    }

    





    infile.close();

    system("pause");
    return 0;
}

but what if I format the config file to

MODEL: "modelname1" 
{ 
    FILEPATH = "FILEPATH1"; 
    TEXTUREPATH = "TEXTUREPATH1"; 
    NORMALPATH = "NORMALPATH1"; 
}

MODEL:"modelname2"
{
    FILEPATH = "FILEPATH2";
    TEXTUREPATH = "TEXTUREPATH2";
    NORMALPATH = "NORMALPATH2";
}

then getline wont work and I have to do character by character parsing ...

so I wanted to ask what could be the faster implementation of the above change, i want to extract this data inside program and use it to load things.

I am ok if you want to redesign this config file for better flow.

CodePudding user response:

You can use regular expression to match strings and extract data from them. regex is part of the standard library just as std::string and std::vector. This is an example:

#include <iostream>
#include <regex>
#include <string>
#include <sstream>

struct ModelData
{
    std::string tagName;
    std::string filePath;
    std::string texturePath;
    std::string normalPath;
};

// helper function to output the content of your structure.
std::ostream& operator<<(std::ostream& os, const ModelData& data)
{
    os << "Model = " << data.tagName << "\n";
    os << "  filePath = " << data.filePath << "\n";
    os << "  texturePath = " << data.texturePath << "\n";
    os << "  normalPath = " << data.normalPath << "\n";
    os << std::endl;

    return os;
}

// load from a stream, so this example can use
// a stringstream instead of a filestream (doesn't change this function)
auto load_from_stram(std::istream& ifile)
{
    // More on regex here : https://regexone.com/, or to test your own regular expressions go here https://regex101.com/
    // between ( ) is a capture group and will contain the value of your variable
    // .  wil match one or more of any character
    static std::regex model_rx{ "MODEL = (. )" };
    static std::regex filepath_rx{ "FILEPATH = (. )" };
    static std::regex texturepath_rx{ "TEXTUREPATH = (. )" };
    static std::regex normalpath_rx{ "NORMALPATH = (. )" };
    std::smatch match;

    std::vector<ModelData> models;
    std::string line;
    ModelData data;

    // read until end of file
    while (std::getline(ifile,line))
    {
        // check if a model starts,
        // condition next 3 lines MUST contain data too
        if (std::regex_search(line, match, model_rx))
        {
            // match[0] will contain full regex match
            // match[1] will contain first matched group
            data.tagName = match[1];
            for (std::size_t n = 0; n < 3;   n)
            {
                std::getline(ifile, line);
                if (std::regex_search(line, match, filepath_rx)) data.filePath = match[1];
                if (std::regex_search(line, match, texturepath_rx)) data.texturePath = match[1];
                if (std::regex_search(line, match, normalpath_rx)) data.normalPath = match[1];
            }

            models.push_back(data);
        }
    }

    return models;
}

int main()
{
    std::istringstream ifile{ "MODEL = modelname1\nFILEPATH = FILEPATH1\nTEXTUREPATH = TEXTUREPATH1\nNORMALPATH = NORMALPATH1\n \
                               MODEL = modelname2\nFILEPATH = FILEPATH2\nTEXTUREPATH = TEXTUREPATH2\nNORMALPATH = NORMALPATH2\n" };

    auto models = load_from_stram(ifile);

    for (const auto& model : models)
    {
        std::cout << model;
    }
}

CodePudding user response:

Since you want to learn fundamentals, I will only answer with outlines of solutions. Note that neither will work if you want to add block nesting to your language. For that you need a proper grammar like boost::spirit or similar.

Use getline to extract one block at a time:

std::string model_name, block_contents;
getline(infile, model_name, '{');
getline(infile, block_contents, '}');

Use a regular expression to do the same:

auto block_regex{ R"(MODEL\s*:\s*"(.*?)"\s*\{(.*?)\})" };

You can then apply a std::regex_iterator to get each block in sequence.

Parsing the block contents

Use std::regex_iterator with the regex (\S )\s*=\s*(".*?");. Assign the results to an std::map.

  • Related