Home > Enterprise >  How to push all the arguments into result vector when parsing with Spirit::Qi?
How to push all the arguments into result vector when parsing with Spirit::Qi?

Time:10-27

I have several commands in a script language, so I need to parse them. During parsing, I would like to check that syntax is correct and the type of commands and its arguments (there is a variable number of arguments per script command type, so I use a std::vector<std::string> to store them).

I have had problems because when parsing, Only the first string is included into the vector, whatever the real numbers of strings exists.

Also, I have had to use a qi::as_string rule in all the arguments in order compiler works.

A minimal working example of my project is shown next:

//#define BOOST_SPIRIT_DEBUG
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <sstream>

namespace qi = boost::spirit::qi;

enum class TYPE {
    NONE,
    CMD1,
    CMD2,
    FAIL
};

struct Command {
    TYPE type = TYPE::NONE;
    std::vector<std::string> args;
};

using Commands = std::vector<Command>;

BOOST_FUSION_ADAPT_STRUCT(Command, type, args)

template <typename It>
class Parser : public qi::grammar<It, Commands()>
{
  private:
    qi::rule<It, Command(), qi::blank_type> none, cmd1, cmd2, fail;
    qi::rule<It, Commands()> start;

  public:
    Parser() : Parser::base_type(start)
    {
        using namespace qi;

        none = omit[*blank] >> &(eol | eoi)
            >> attr(TYPE::NONE)
            >> attr(std::vector<std::string>{});

        cmd1 = lit("CMD1") >> '('
            >> attr(TYPE::CMD1)
            >> as_string[lexeme[ ~char_(")\r\n")]] >> ')';

        cmd2 = lit("CMD2") >> '('
            >> attr(TYPE::CMD2)
            >> as_string[lexeme[ ~char_(",)\r\n")]] >> ','
            >> as_string[raw[double_]] >> ')';


        fail = omit[*~char_("\r\n")] //
            >> attr(TYPE::FAIL);

        start = skip(blank)[(none | cmd1 | cmd2 | fail) % eol] > eoi;
    }
};

Commands parse(std::string text)
{
    std::istringstream in(std::move(text));
    using It = boost::spirit::istream_iterator;

    static const Parser<It> parser;

    Commands commands;
    It first(in >> std::noskipws), last;//No white space skipping

    if (!qi::parse(first, last, parser, commands))
        // throw std::runtime_error("command parse error")
        ;

    return commands;
}

int main()
{
    std::string test{
R"(CMD1(some ad hoc text)
CMD2(identity, 25.5))"};

    try {
        auto commands = parse(test);
        std::cout << "elements: " << commands.size() << std::endl;
        std::cout << "CMD1 args: " << commands[0].args.size() << std::endl;
        std::cout << "CMD2 args: " << commands[1].args.size() << std::endl;// Error! Should be 2!!!!!

    } catch (std::exception const& e) {
        std::cout << e.what() << "\n";
    }
}

Also, here is a link to compiler explorer: https://godbolt.org/z/qM6KTcTTK

Any help fixing this? Thanks in advance

CodePudding user response:

Enabling your debugging shows: https://godbolt.org/z/o3nvjz9bG

Not clear enough for me. Let's add an argument rule:

struct Command {
    using Arg  = std::string;
    using Args = std::vector<Arg>;
    enum TYPE { NONE, CMD1, CMD2, FAIL };

    TYPE type = NONE;
    Args args;
};

qi::rule<It, Command::Arg()> arg;

And

none = omit[*blank] >> &(eol | eoi)
    >> attr(Command::NONE)
    /*>> attr(Command::Args{})*/;

arg  = raw[double_] |  ~char_(",)\r\n");

cmd1 = lit("CMD1") >> attr(Command::CMD1) //
    >> '(' >> arg >> ')';

cmd2 = lit("CMD2") >> attr(Command::CMD2) //
    >> '(' >> arg >> ',' >> arg >> ')';

fail = omit[*~char_("\r\n")] //
    >> attr(Command::FAIL);

Now we can see https://godbolt.org/z/3Kqr3K41v

  <cmd2>
    <try>CMD2(identity, 25.5)</try>
    <arg>
      <try>identity, 25.5)</try>
      <success>, 25.5)</success>
      <attributes>[[i, d, e, n, t, i, t, y]]</attributes>
    </arg>
    <arg>
      <try>25.5)</try>
      <success>)</success>
      <attributes>[[2, 5, ., 5]]</attributes>
    </arg>
    <success></success>
    <attributes>[[CMD2, [[i, d, e, n, t, i, t, y]]]]</attributes>
  </cmd2>

Clearly, both arguments are parsed, but only one is assigned. The sad fact is that you're actively confusing the rule, by adapting a two-element struct and parsing a sequence of 3 elements.

You can get this to work, but you'd have help it (e.g. with transform_attribute, attr_cast<> or a separate rule):

    arg  = raw[double_] |  ~char_(",)\r\n");
    args = arg % ',';

    cmd1 = lit("CMD1") >> attr(Command::CMD1) //
        >> '(' >> arg >> ')';

    cmd2 = lit("CMD2") >> attr(Command::CMD2) //
        >> '(' >> args >> ')';

Now you get:

  <cmd2>
    <try>CMD2(identity, 25.5)</try>
    <args>
      <try>identity, 25.5)</try>
      <arg>
        <try>identity, 25.5)</try>
        <success>, 25.5)</success>
        <attributes>[[i, d, e, n, t, i, t, y]]</attributes>
      </arg>
      <arg>
        <try> 25.5)</try>
        <success>)</success>
        <attributes>[[ , 2, 5, ., 5]]</attributes>
      </arg>
      <success>)</success>
      <attributes>[[[i, d, e, n, t, i, t, y], [ , 2, 5, ., 5]]]</attributes>
    </args>
    <success></success>
    <attributes>[[CMD2, [[i, d, e, n, t, i, t, y], [ , 2, 5, ., 5]]]]</attributes>
  </cmd2>

Now this hints at an obvious improvement: improve the grammar by simplifying:

    none  = omit[*blank] >> &(eol | eoi) >> attr(Command{Command::NONE, {}});
    fail  = omit[*~char_("\r\n")] >> attr(Command::FAIL);

arg   = raw[double_] |  ~char_(",)\r\n");
args  = '(' >> arg % ',' >> ')';
cmd   = no_case[type_] >> -args;

start = skip(blank)[(cmd|fail) % eol] > eoi;

Then add validation to the commands after the fact.

Demo

Live On Compiler Explorer

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>

namespace qi = boost::spirit::qi;

struct Command {
    using Arg  = std::string;
    using Args = std::vector<Arg>;
    enum Type { NONE, CMD1, CMD2, FAIL };

    Type type = NONE;
    Args args;

    friend std::ostream& operator<<(std::ostream& os, Type type) {
        switch(type) {
            case NONE: return os << "NONE";
            case CMD1: return os << "CMD1";
            case CMD2: return os << "CMD2";
            case FAIL: return os << "FAIL";
            default: return os << "???";
        }
    }
    friend std::ostream& operator<<(std::ostream& os, Command const& cmd) {
        os << cmd.type << "(";
        auto sep = "";
        for (auto& arg : cmd.args)
            os << std::exchange(sep, ", ") << std::quoted(arg);
        return os << ")";
    }
};
using Commands = std::vector<Command>;

BOOST_FUSION_ADAPT_STRUCT(Command, type, args)

template <typename It> struct Parser : qi::grammar<It, Commands()> {
    Parser() : Parser::base_type(start) {
        using namespace qi;

        none  = omit[*blank] >> &(eol | eoi) >> attr(Command{Command::NONE, {}});
        fail  = omit[*~char_("\r\n")] >> attr(Command::FAIL);

        arg   = raw[double_] |  ~char_(",)\r\n");
        args  = '(' >> arg % ',' >> ')';
        cmd   = no_case[type] >> -args;

        start = skip(blank)[(cmd|none|fail) % eol] > eoi;

        BOOST_SPIRIT_DEBUG_NODES((start)(fail)(none)(cmd)(arg)(args))
    }

private:
    struct type_sym : qi::symbols<char, Command::Type> {
        type_sym() { this->add//
            ("cmd1", Command::CMD1)
            ("cmd2", Command::CMD2);
        }
    } type;
    qi::rule<It, Command::Arg()>            arg;
    qi::rule<It, Command::Args()>           args;
    qi::rule<It, Command(), qi::blank_type> cmd, none, fail;
    qi::rule<It, Commands()>                start;
};

Commands parse(std::string const& text)
{
    using It = std::string::const_iterator;
    static const Parser<It> parser;

    Commands commands;
    It first = text.begin(), last = text.end();

    if (!qi::parse(first, last, parser, commands))
        throw std::runtime_error("command parse error");

    return commands;
}

int main()
{
    try {
        for (auto& cmd : parse(R"(
CMD1(some ad hoc text)
this is a bogus line
cmd2(identity, 25.5))"))
            std::cout << cmd << "\n";
    } catch (std::exception const& e) {
        std::cout << e.what() << "\n";
    }
}

Prints

NONE()
CMD1("some ad hoc text")
FAIL()
CMD2("identity", " 25.5")
  • Related