%{
#define FUNCT 300
#define IDENTIFIER 301
#define ASSGN 302
#define INTEGER 303
#define PRINT 304
#define TEXT 305
#define INPUT 306
#define CONTINUE 307
#define RETURN 308
#define IF 309
#define THEN 310
#define ENDIF 311
#define ELSE 312
#define WHILE 313
#define DO 314
#define ENDDO 315
#define END 316
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#define MAX_SYM 200
int found;
void initialize();
void create(char *lexeme, int scope, char type, char usage);
int readsymtab(char *lexeme, int scope, char usage);
%}
%%
[\t ] {}
= {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(ASSGN) ;}
print {int found = readsymtab(yytext,0,'L'); //line 39
if(found == -1)
{
create(yytext,0,'S','L');
};
return(PRINT) ;}
input {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(INPUT) ;}
continue {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(CONTINUE) ;}
return {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(RETURN) ;}
if {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(IF) ;}
then {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(THEN) ;}
endif {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(ENDIF) ;}
else {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(ELSE) ;}
while {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(WHILE) ;}
do {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(DO) ;}
enddo {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(ENDDO) ;}
end {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(END);
exit(0); ;}
funct {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'S','L');
};
return(FUNCT) ;}
[0-9] {int found = readsymtab(yytext,0,'L');
if(found == -1)
{
create(yytext,0,'I','L');
};
return(FUNCT) ;}
[a-zA-Z] {int found = readsymtab(yytext,0,'I');
if(found == -1)
{
create(yytext,0,'S','I');
};
return(IDENTIFIER) ;}
\"[^\"\n] |[\\n] \" {int found = readsymtab(yytext,0,'L'); //line130
if(found == -1)
{
create(yytext,0,'S','L');
};
return(TEXT) ;}
. {return(yytext[0]) ;}
%%
//new variable declaration
int num;
int scope;
struct symbtab
{
char Lexeme [18];
int Scope;
char Type;
char Usage;
int Reference;
};
struct symbtab arr_symtab[200]; //data structure in which the symbol table entries are stored
void print_fn() //function which actually prints the symbol tabel in columnar form
{
int rows;
printf("Row No Lexeme Scope Type Usage Reference\n");
for (rows=0; rows<=num; rows ){
printf("m %-16s %-7d %-7c %-7c %-7d \n",rows, arr_symtab[rows].Lexeme,arr_symtab[rows].Scope,arr_symtab[rows].Type,arr_symtab[rows].Usage,arr_symtab[rows].Reference);
}
}
void initialize() //function which enteres the initial value into the symbol table
{
num = -1;
int scope = 0;
char lexeme[18]= "FRED";
char type = 'I';
char usage = 'L';
create(lexeme,scope,type,usage);
}
void create(char *lexeme, int scope, char type, char usage) //function which creates a new entry in the symbol table
{
int reference;
if(type=='I' && usage =='L')
reference = atoi(lexeme);
else
reference = -1;
num = num 1;
strcpy(arr_symtab[num].Lexeme, lexeme);
arr_symtab[num].Scope = scope;
arr_symtab[num].Type = type;
arr_symtab[num].Usage = usage;
arr_symtab[num].Reference = reference;
}
int readsymtab(char *lexeme, int scope, char usage) //function which checks if the entry is already in the table or not and the takes the required action
{
for(int i=num; i>=0; i--){
int comp = strcmp(arr_symtab[i].Lexeme, lexeme);
if(comp==0 && arr_symtab[i].Scope==scope && arr_symtab[i].Usage==usage)
{
return i;
}
else
{
return -1;
}
}
}
int main()
{
//other lines
printf("\n COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 \n");
initialize();
yylex();
print_fn();
printf("End of test.\n");
return 0;
}
int yywrap ()
{
return 1;
}
Following is the output for print"aryan banyal"
COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04
--(end of buffer or a NUL)
--accepting rule at line 39 ("print")
Row No Lexeme Scope Type Usage Reference
0 FRED 0 I L 0
1 print 0 S L -1
End of test.
As you can see it's not even going to the "aryan banyal" part just does the print thing and exits... Following is the output for "aryan banyal"
COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04
--(end of buffer or a NUL)
--accepting rule at line 130 (""aryan banyal")
Row No Lexeme Scope Type Usage Reference
0 FRED 0 I L 0
1 "aryan banyal 0 S L -1
End of test.
It shoud be aryan banyal there on Row No 1 but there a " before for some reason.
CodePudding user response:
You have (at least) three (somewhat) unrelated problems.
Using the lexical scanner
Your code stops after reading a single token because you only call yylex()
once (and ignore what it returns). yylex()
returns a single token every time you call it; if you want to scan the entire file, you need to call it in a loop. It will return 0 when it encounters the end of input.
Understanding patterns
The pattern \"[^\"\n] |[\\n] \"
has an |
in the middle; that operator matches either of the patterns which surround it. So you are matching \"[^\"\n]
or [\\n] \"
. The first one matches a single double quote, followed by any number of characters (but at least one), which cannot be a quote or a new line. So that matches "aryan banyal
without the closing quote but including the open quote. The second half of the alternative would match any number of characters (again, at least one) all of which are either a backslash or the letter n
, and then a single double quote.
(I don't understand the thinking behind this pattern, and it is almost certainly not what you intended. Had you called yylex
again after the match of "aryan banyal
, the closing quote would not have been matched, because it would be the immediate next character, and the pattern insists that it be preceded by at least one backslash or n
. (Maybe you intended that to be a newline, but there is not one of those either.)
I think you probably wanted to match the entire quoted string, and then to keep only the part between the quotes. If you had written the pattern correctly, that's what it would have matched, and then you would need to remove the double quotes. I'll leave writing the correct pattern as an exercise. You might want to read the short description of Flex patterns in the Flex manual; you probably also have some information in your class notes.
Selecting just a part of the match
It's easy to remove the quote at the beginning of the token. All that requires is adding one to yytext
. To get rid of the one at the end, you need to overwrite it with a \0
, thereby terminating the string one character earlier. That's easy to do because Flex provides you with the length of the match in the variable yyleng
. So you could set yytext[yyleng - 1] = '\0'
and then call your symbol table function with yytext 1
.
If the above paragraph did not make sense, you should review any introductory text on string processing in C. Remember that in C, a string is nothing but an array of single characters (small integers) terminated with a 0. That's makes some things very easy to do, and other things a bit painful (but never mysterious).