Im pretty new to compiler design. im trying to take the first step in lexical analyzer trying to run the following code.
%{
#include <stdlib.h>
#include <stdio.h>
#include "symboltable.h"
#include "tokens.h"
entry_t** symbol_table;
entry_t** constant_table;
int cmnt_strt = 0;
%}
letter [a-zA-Z]
digit [0-9]
ws [ \t\r\f\v]
identifier (_|{letter})({letter}|{digit}|_){0,31}
hex [0-9a-f]
/* Exclusive states */
%x CMNT
%x PREPROC
%%
/* Keywords*/
"int" {printf("\t%-30s : =\n",yytext,INT);}
"long" {printf("\t%-30s : =\n",yytext,LONG);}
"long long" {printf("\t%-30s : =\n",yytext,LONG_LONG);}
"short" {printf("\t%-30s : =\n",yytext,SHORT);}
"signed" {printf("\t%-30s : =\n",yytext,SIGNED);}
"unsigned" {printf("\t%-30s : =\n",yytext,UNSIGNED);}
"for" {printf("\t%-30s : =\n",yytext,FOR);}
"break" {printf("\t%-30s : =\n",yytext,BREAK);}
"continue" {printf("\t%-30s : =\n",yytext,CONTINUE);}
"if" {printf("\t%-30s : =\n",yytext,IF);}
"else" {printf("\t%-30s : =\n",yytext,ELSE);}
"return" {printf("\t%-30s : =\n",yytext,RETURN);}
{identifier} {printf("\t%-30s : =\n", yytext,IDENTIFIER);
insert( symbol_table,yytext,IDENTIFIER );}
{ws} ;
[ \-]?[0][x|X]{hex} [lLuU]? {printf("\t%-30s : =\n", yytext,HEX_CONSTANT);
insert( constant_table,yytext,HEX_CONSTANT);}
[ \-]?{digit} [lLuU]? {printf("\t%-30s : =\n", yytext,DEC_CONSTANT);
insert( constant_table,yytext,DEC_CONSTANT);}
"/*" {cmnt_strt = yylineno; BEGIN CMNT;}
<CMNT>.|{ws} ;
<CMNT>\n {yylineno ;}
<CMNT>"*/" {BEGIN INITIAL;}
<CMNT>"/*" {printf("Line =: Nested comments are not valid!\n",yylineno);}
<CMNT><<EOF>> {printf("Line =: Unterminated comment\n", cmnt_strt); yyterminate();}
^"#include" {BEGIN PREPROC;}
<PREPROC>"<"[^<>\n] ">" {printf("\t%-30s : =\n",yytext,HEADER_FILE);}
<PREPROC>{ws} ;
<PREPROC>\"[^"\n] \" {printf("\t%-30s : %3d\n",yytext,HEADER_FILE);}
<PREPROC>\n {yylineno ; BEGIN INITIAL;}
<PREPROC>. {printf("Line %3d: Illegal header file format \n",yylineno);}
"//".* ;
\"[^\"\n]*\" {
if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */
{
yyless(yyleng-1); /* push the quote back if it was escaped */
yymore();
}
else
insert( constant_table,yytext,STRING);
}
\"[^\"\n]*$ {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);}
{digit} ({letter}|_) {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);}
\n {yylineno ;}
"--" {printf("\t%-30s : %3d\n",yytext,DECREMENT);}
" " {printf("\t%-30s : %3d\n",yytext,INCREMENT);}
"->" {printf("\t%-30s : %3d\n",yytext,PTR_SELECT);}
"&&" {printf("\t%-30s : %3d\n",yytext,LOGICAL_AND);}
"||" {printf("\t%-30s : %3d\n",yytext,LOGICAL_OR);}
"<=" {printf("\t%-30s : %3d\n",yytext,LS_THAN_EQ);}
">=" {printf("\t%-30s : %3d\n",yytext,GR_THAN_EQ);}
"==" {printf("\t%-30s : %3d\n",yytext,EQ);}
"!=" {printf("\t%-30s : %3d\n",yytext,NOT_EQ);}
";" {printf("\t%-30s : %3d\n",yytext,DELIMITER);}
"{" {printf("\t%-30s : %3d\n",yytext,OPEN_BRACES);}
"}" {printf("\t%-30s : %3d\n",yytext,CLOSE_BRACES);}
"," {printf("\t%-30s : %3d\n",yytext,COMMA);}
"=" {printf("\t%-30s : %3d\n",yytext,ASSIGN);}
"(" {printf("\t%-30s : %3d\n",yytext,OPEN_PAR);}
")" {printf("\t%-30s : %3d\n",yytext,CLOSE_PAR);}
"[" {printf("\t%-30s : %3d\n",yytext,OPEN_SQ_BRKT);}
"]" {printf("\t%-30s : %3d\n",yytext,CLOSE_SQ_BRKT);}
"-" {printf("\t%-30s : %3d\n",yytext,MINUS);}
" " {printf("\t%-30s : %3d\n",yytext,PLUS);}
"*" {printf("\t%-30s : %3d\n",yytext,STAR);}
"/" {printf("\t%-30s : %3d\n",yytext,FW_SLASH);}
"%" {printf("\t%-30s : %3d\n",yytext,MODULO);}
"<" {printf("\t%-30s : %3d\n",yytext,LS_THAN);}
">" {printf("\t%-30s : %3d\n",yytext,GR_THAN);}
. {printf("Line %3d: Illegal character %s\n",yylineno,yytext);}
%%
int yywrap(){ return 1;}
int main()
{
yyin=fopen("testcases/test-case-1.c","r");
symbol_table=create_table();
constant_table=create_table();
yylex();
printf("\n\tSymbol table");
display(symbol_table);
printf("\n\tConstants Table");
display(constant_table);
printf("NOTE: Please refer tokens.h for token meanings\n");
}
I tried to run it using:
flex lexer.l
gcc lex.yy.c -o lexrun
This gives me an error as
lexer.l: In function 'yylex':
lexer.l:46:14: error: 'yylineno' undeclared (first use in this function)
"/*" {cmnt_strt = yylineno; BEGIN CMNT;}
^~~~~~~~
lexer.l:46:14: note: each undeclared identifier is reported only once for each function it appears in
I tried many things on the net such as adding
extern int yylineno;
This started giving another error as
undefined reference to `yylineno'
I have installed Flex version 2.5.4 and runs this in Vscode. Any clarification to overcome this would be much appreciated. Thanks in advance.
CodePudding user response:
If you want flex to track line numbers you need to add
%option yylineno
to your flex prologue.
I also suggest the following:
-
%option noinput nounput
which will allow you to compile the generated scanner without compiler warnings (Always compile with
-Wall
, even generated code, and fix whatever warnings are reported.) -
%option noyywrap
which avoids the need to define
yywrap
-
%option nodefualt
which will warn you if you don't have a rule for every possible input.
As a final note, extern int yylineno;
can't work, since extern
means "this variable is defined in a different translation unit", and there is no other translation unit in your code. I presume that you found that in the discussion of a different file intended to be linked together with the scanner. (If the place you found that suggests putting the extern
declaration in a .l
file, you need to discard it as a source of information.)
If this is the first time you've attempted to write a C application with more than one source file, you should probably take a few minutes to review how linking multiple files works in C. That will save you a lot of frustration later.