I have several commands in a script language, so I need to parse them. During parsing, I would like to check that syntax is correct and the type of commands and its arguments (there is a variable number of arguments per script command type, so I use a std::vector<std::string>
to store them).
I have had problems because when parsing, Only the first string is included into the vector, whatever the real numbers of strings exists.
Also, I have had to use a qi::as_string
rule in all the arguments in order compiler works.
A minimal working example of my project is shown next:
//#define BOOST_SPIRIT_DEBUG
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <sstream>
namespace qi = boost::spirit::qi;
enum class TYPE {
NONE,
CMD1,
CMD2,
FAIL
};
struct Command {
TYPE type = TYPE::NONE;
std::vector<std::string> args;
};
using Commands = std::vector<Command>;
BOOST_FUSION_ADAPT_STRUCT(Command, type, args)
template <typename It>
class Parser : public qi::grammar<It, Commands()>
{
private:
qi::rule<It, Command(), qi::blank_type> none, cmd1, cmd2, fail;
qi::rule<It, Commands()> start;
public:
Parser() : Parser::base_type(start)
{
using namespace qi;
none = omit[*blank] >> &(eol | eoi)
>> attr(TYPE::NONE)
>> attr(std::vector<std::string>{});
cmd1 = lit("CMD1") >> '('
>> attr(TYPE::CMD1)
>> as_string[lexeme[ ~char_(")\r\n")]] >> ')';
cmd2 = lit("CMD2") >> '('
>> attr(TYPE::CMD2)
>> as_string[lexeme[ ~char_(",)\r\n")]] >> ','
>> as_string[raw[double_]] >> ')';
fail = omit[*~char_("\r\n")] //
>> attr(TYPE::FAIL);
start = skip(blank)[(none | cmd1 | cmd2 | fail) % eol] > eoi;
}
};
Commands parse(std::string text)
{
std::istringstream in(std::move(text));
using It = boost::spirit::istream_iterator;
static const Parser<It> parser;
Commands commands;
It first(in >> std::noskipws), last;//No white space skipping
if (!qi::parse(first, last, parser, commands))
// throw std::runtime_error("command parse error")
;
return commands;
}
int main()
{
std::string test{
R"(CMD1(some ad hoc text)
CMD2(identity, 25.5))"};
try {
auto commands = parse(test);
std::cout << "elements: " << commands.size() << std::endl;
std::cout << "CMD1 args: " << commands[0].args.size() << std::endl;
std::cout << "CMD2 args: " << commands[1].args.size() << std::endl;// Error! Should be 2!!!!!
} catch (std::exception const& e) {
std::cout << e.what() << "\n";
}
}
Also, here is a link to compiler explorer: https://godbolt.org/z/qM6KTcTTK
Any help fixing this? Thanks in advance
CodePudding user response:
Enabling your debugging shows: https://godbolt.org/z/o3nvjz9bG
Not clear enough for me. Let's add an argument rule:
struct Command {
using Arg = std::string;
using Args = std::vector<Arg>;
enum TYPE { NONE, CMD1, CMD2, FAIL };
TYPE type = NONE;
Args args;
};
qi::rule<It, Command::Arg()> arg;
And
none = omit[*blank] >> &(eol | eoi)
>> attr(Command::NONE)
/*>> attr(Command::Args{})*/;
arg = raw[double_] | ~char_(",)\r\n");
cmd1 = lit("CMD1") >> attr(Command::CMD1) //
>> '(' >> arg >> ')';
cmd2 = lit("CMD2") >> attr(Command::CMD2) //
>> '(' >> arg >> ',' >> arg >> ')';
fail = omit[*~char_("\r\n")] //
>> attr(Command::FAIL);
Now we can see https://godbolt.org/z/3Kqr3K41v
<cmd2>
<try>CMD2(identity, 25.5)</try>
<arg>
<try>identity, 25.5)</try>
<success>, 25.5)</success>
<attributes>[[i, d, e, n, t, i, t, y]]</attributes>
</arg>
<arg>
<try>25.5)</try>
<success>)</success>
<attributes>[[2, 5, ., 5]]</attributes>
</arg>
<success></success>
<attributes>[[CMD2, [[i, d, e, n, t, i, t, y]]]]</attributes>
</cmd2>
Clearly, both arguments are parsed, but only one is assigned. The sad fact is that you're actively confusing the rule, by adapting a two-element struct and parsing a sequence of 3 elements.
You can get this to work, but you'd have help it (e.g. with transform_attribute
, attr_cast<>
or a separate rule):
arg = raw[double_] | ~char_(",)\r\n");
args = arg % ',';
cmd1 = lit("CMD1") >> attr(Command::CMD1) //
>> '(' >> arg >> ')';
cmd2 = lit("CMD2") >> attr(Command::CMD2) //
>> '(' >> args >> ')';
Now you get:
<cmd2>
<try>CMD2(identity, 25.5)</try>
<args>
<try>identity, 25.5)</try>
<arg>
<try>identity, 25.5)</try>
<success>, 25.5)</success>
<attributes>[[i, d, e, n, t, i, t, y]]</attributes>
</arg>
<arg>
<try> 25.5)</try>
<success>)</success>
<attributes>[[ , 2, 5, ., 5]]</attributes>
</arg>
<success>)</success>
<attributes>[[[i, d, e, n, t, i, t, y], [ , 2, 5, ., 5]]]</attributes>
</args>
<success></success>
<attributes>[[CMD2, [[i, d, e, n, t, i, t, y], [ , 2, 5, ., 5]]]]</attributes>
</cmd2>
Now this hints at an obvious improvement: improve the grammar by simplifying:
none = omit[*blank] >> &(eol | eoi) >> attr(Command{Command::NONE, {}});
fail = omit[*~char_("\r\n")] >> attr(Command::FAIL);
arg = raw[double_] | ~char_(",)\r\n");
args = '(' >> arg % ',' >> ')';
cmd = no_case[type_] >> -args;
start = skip(blank)[(cmd|fail) % eol] > eoi;
Then add validation to the commands after the fact.
Demo
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
namespace qi = boost::spirit::qi;
struct Command {
using Arg = std::string;
using Args = std::vector<Arg>;
enum Type { NONE, CMD1, CMD2, FAIL };
Type type = NONE;
Args args;
friend std::ostream& operator<<(std::ostream& os, Type type) {
switch(type) {
case NONE: return os << "NONE";
case CMD1: return os << "CMD1";
case CMD2: return os << "CMD2";
case FAIL: return os << "FAIL";
default: return os << "???";
}
}
friend std::ostream& operator<<(std::ostream& os, Command const& cmd) {
os << cmd.type << "(";
auto sep = "";
for (auto& arg : cmd.args)
os << std::exchange(sep, ", ") << std::quoted(arg);
return os << ")";
}
};
using Commands = std::vector<Command>;
BOOST_FUSION_ADAPT_STRUCT(Command, type, args)
template <typename It> struct Parser : qi::grammar<It, Commands()> {
Parser() : Parser::base_type(start) {
using namespace qi;
none = omit[*blank] >> &(eol | eoi) >> attr(Command{Command::NONE, {}});
fail = omit[*~char_("\r\n")] >> attr(Command::FAIL);
arg = raw[double_] | ~char_(",)\r\n");
args = '(' >> arg % ',' >> ')';
cmd = no_case[type] >> -args;
start = skip(blank)[(cmd|none|fail) % eol] > eoi;
BOOST_SPIRIT_DEBUG_NODES((start)(fail)(none)(cmd)(arg)(args))
}
private:
struct type_sym : qi::symbols<char, Command::Type> {
type_sym() { this->add//
("cmd1", Command::CMD1)
("cmd2", Command::CMD2);
}
} type;
qi::rule<It, Command::Arg()> arg;
qi::rule<It, Command::Args()> args;
qi::rule<It, Command(), qi::blank_type> cmd, none, fail;
qi::rule<It, Commands()> start;
};
Commands parse(std::string const& text)
{
using It = std::string::const_iterator;
static const Parser<It> parser;
Commands commands;
It first = text.begin(), last = text.end();
if (!qi::parse(first, last, parser, commands))
throw std::runtime_error("command parse error");
return commands;
}
int main()
{
try {
for (auto& cmd : parse(R"(
CMD1(some ad hoc text)
this is a bogus line
cmd2(identity, 25.5))"))
std::cout << cmd << "\n";
} catch (std::exception const& e) {
std::cout << e.what() << "\n";
}
}
Prints
NONE()
CMD1("some ad hoc text")
FAIL()
CMD2("identity", " 25.5")