Home > front end >  How to use EBNF grammar for a small language?
How to use EBNF grammar for a small language?

Time:04-08

I am working on an assignment that parses a file with EBNF grammar, I am a little confuse on the recursive function work.

-first three functions:

Prog ::= PROGRAM IDENT; DeclBlock ProgBody
DeclBlock ::= VAR {DeclStmt;}
DeclStmt ::= Ident {, Ident} : (Integer | Real | String) 

I Know that you're supposed to call Prog first then DeclBlock and then DeclStmt. So when you call DeclBlock, do you call it again only when you see the semicolon or when you see another IDENT? Same goes for DeclStmt, do you call it again only when you see a comma?

//Prog ::= PROGRAM IDENT; DeclBlock ProgBody
bool Prog(istream& in, int& line){
    bool status;
    LexItem t;
    
    t = Parser::GetNextToken(in, line);
    cout << t << endl;

    if(t != PROGRAM){
        ParseError(line, "Missing PROGRAM");
        return false;
    }

    LexItem i = Parser::GetNextToken(in,line);

    cout << i << endl;
    if(i != IDENT){
        ParseError(line, "Missing Program Name");
        return false;
    }

    LexItem semi = Parser::GetNextToken(in, line);
    cout << semi << endl;
    if(semi != SEMICOL){
        ParseError(line, "Missing SemiColon");
        return false;
    }

    status = DeclBlock(in, line);

    if(!status){
        ParseError(line, "Incorrect Declaration Section.");
        return false;
    }

    LexItem b = Parser::GetNextToken(in, line);

    cout << "here at b" << b << endl;

    if(b != BEGIN){
        ParseError(line, "Non-recognizable Program Block");
        ParseError(line, "Incorrect Program Section");
        return false;
    }

    status = ProgBody(in, line);

    if(!status){
        ParseError(line, "Incorrect Program Block");
        return false;
    }

    LexItem e = Parser::GetNextToken(in, line);

    cout << e << endl;

    if(e != END){
        ParseError(line, "Non-recognizable Program Block");
        ParseError(line, "Incorrect Program Section");
        return false;
    }

    return true;
}
//DeclBlock ::= VAR {DeclStmt;}
bool DeclBlock(istream& in, int& line){
    bool status = false;
    
    LexItem v = Parser::GetNextToken(in, line);

    cout << v << endl;

    if(v != VAR){
        ParseError(line, "Non-recognizable Declaration Block.");
        return false;
    }

    status = DeclStmt(in, line);

    if(!status){
        ParseError(line, "Syntactic error in Declaration Block.");
        return false;
    }
    return true;
}

//DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
bool DeclStmt(istream& in, int& line){
    bool status = false;
    LexItem tok = Parser::GetNextToken(in, line);
    cout << "here too " <<  tok << endl;

    if (defVar.find(tok.GetLexeme()) != defVar.end()) {
        cout << "Var Exists!" << endl;
        ParseError(line, "Var cant be redeclared");
        return false;
    }
    else{
        defVar.insert({tok.GetLexeme(), true});
    }

    LexItem c = Parser::GetNextToken(in, line);

    cout << c << endl;

    if(c == COMMA){
        //cout << "before calling declStmt" << endl;
        status = DeclStmt(in, line);
        //cout << "after calling declStmt" << endl;
    }
    else if(c.GetToken() == IDENT){
        ParseError(line, "Unrecognized Input Pattern");
        cout << "( here " << c.GetLexeme() << ")" << endl;
        return false;
    }
    // else if(c == IDENT){
    //  ParseError(line, "Missing comma");
    //  return false;
    // }
    else{
        // Parser::PushBackToken(c);

        if(c != COLON){
            ParseError(line, "Missing Colon");
            return false;
        }

        LexItem t = Parser::GetNextToken(in, line);

        cout << "here t " << t.GetLexeme() << endl;

        if(t.GetLexeme() != "REAL" && t.GetLexeme() != "INTEGER" && t.GetLexeme() != "STRING"){
            ParseError(line, "Incorrect Declaration Type.");
            return false;
        }

        LexItem semi = Parser::GetNextToken(in,line);

        cout << semi << endl;

        if(semi != SEMICOL){
            ParseError(line, "Missing SemiColon");
            return false;
        }

        

        return true;
    }

    return status;
}

CodePudding user response:

DeclBlock should only be called once in this language. DeclStmt can be called multiple times. A DeclStmt is defined to be one or more Ident followed by :, followed by a type, then ending in a ;.

After you read the ; at the end of a DeclStmt, you'd then read the next token to decide what to do next. If the next token is another Ident you know you're at the start of another DeclStmt, so you'd call that again. If it's anything else, you know you're at the start of ProgBody. (I'm assuming the last bit. Normally you'd look for the token that starts a ProgBody, but that's not shown.)

CodePudding user response:

Your DeclBlock function should be something like

bool DeclBlock(istream& in, int& line) {
    if (Parser::GetNextToken(in, line) != VAR) {
        // missing VAR
        return false; }
    while (Parser::Lookahead(in, line) != BEGIN) {
        if (!DeclStmt(in, line)) {
            // error in the DeclStmt
            return false; }
        if (Parser::GetNextToken(in, line) != SEMICOL) {
            // error -- missing semicolon
            return false; }
    }
    return true;
}

The key thing is that you MUST have a parser lookahead function that gives you the next token WITHOUT consuming it.

  • Related