I'm trying to create a (pretty simple) parser using boost::spirit::qi to extract messages from a stream. Each message starts from a short marker and ends with \r\n
. The message body is ASCII text (letters and numbers) separated by a comma. For example:
!START,01,2.3,ABC\r\n
!START,456.2,890\r\n
I'm using unit tests to check the parser and everything works well when I pass only correct messages one by one. But when I try to emulate some invalid input, like:
!START,01,2.3,ABC\r\n
trash-message
!START,456.2,890\r\n
The parser doesn't see the following messages after an unexpected text.
I'm new in boost::spirit and I'd like to know how a parser based on boost::spirit::qi::grammar is supposed to work.
My question is: Should the parser slide in the input stream and try to find a beginning of a message? Or the caller should check the parsing result and in case of failure move an iterator and then recall the parser again?
Many thanks for considering my request.
CodePudding user response:
My question is: Should the parser slide in the input stream and try to find a beginning of a message?
Only when you tell it to. It's called qi::parse
, not qi::search
. But obviously you can make a grammar ignore things.
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
namespace qi = boost::spirit::qi;
struct Command {
enum Type { START, QUIT, TRASH } type = TRASH;
std::vector<std::string> args;
};
using Commands = std::vector<Command>;
BOOST_FUSION_ADAPT_STRUCT(Command, type, args)
template <typename It> struct CmdParser : qi::grammar<It, Commands()> {
CmdParser() : CmdParser::base_type(commands_) {
type_.add("!start", Command::START);
type_.add("!quit", Command::QUIT);
trash_ = *~qi::char_("\r\n"); // just ignore the entire line
arg_ = *~qi::char_(",\r\n");
command_ = qi::no_case[type_] >> *(',' >> arg_);
commands_ = *((command_ | trash_) >> qi::eol);
BOOST_SPIRIT_DEBUG_NODES((trash_)(arg_)(command_)(commands_))
}
private:
qi::symbols<char, Command::Type> type_;
qi::rule<It, Commands()> commands_;
qi::rule<It, Command()> command_;
qi::rule<It, std::string()> arg_;
qi::rule<It> trash_;
};
int main() {
std::string_view input = "!START,01,2.3,ABC\r\n"
"trash-message\r\n"
"!START,456.2,890\r\n";
using It = std::string_view::const_iterator;
static CmdParser<It> const parser;
Commands parsed;
auto f = input.begin(), l = input.end();
if (parse(f, l, parser, parsed)) {
std::cout << "Parsed:\n";
for(Command const& cmd : parsed) {
std::cout << cmd.type;
for (auto& arg: cmd.args)
std::cout << ", " << quoted(arg);
std::cout << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: " << quoted(std::string(f, l)) << "\n";
}
Printing
Parsed:
0, "01", "2.3", "ABC"
2
0, "456.2", "890"