This is my code here, I need it to be output as lexical analysis. I should what the output should be on the bottom. I don't understand why my code is giving me this error.
/* front.c - a lexical analyzer system for simple
arithmetic expressions */
#include <stdio.h>
#include <ctype.h>
/* Global declarations */
/* Variables */
int charClass;
char lexeme[100];
char nextChar;
int lexLen;
int token;
int nextToken;
FILE *in_fp, *fopen();
/* Function declarations */
void addChar();
void getChar();
void getNonBlank();
int lex();
/* Character classes */
#define LETTER 0
#define DIGIT 1
#define UNKNOWN 99
/* Token codes */
#define INT_LIT 10
#define IDENT 11
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26
/******************************************************/
/* main driver */
int main(int argc, char *argv[]) {
/* Open the input data file and process its contents */
if ((in_fp = fopen("front.in", "r")) == NULL)
printf("ERROR - cannot open front.in \n");
else {
getChar();
do {
lex();
} while (nextToken = EOF);
}
}
/*****************************************************/
/* lookup - a function to lookup operators and parentheses
and return the token */
int lookup(char ch) {
switch (ch) {
case '(':
addChar();
nextToken = LEFT_PAREN;
break;
case ')':
addChar();
nextToken = RIGHT_PAREN;
break;
case ' ':
addChar();
nextToken = ADD_OP;
break;
case '-':
addChar();
nextToken = SUB_OP;
break;
case '*':
addChar();
nextToken = MULT_OP;
break;
case '/':
addChar();
nextToken = DIV_OP;
break;
default:
addChar();
nextToken = EOF;
break;
}
return nextToken;
}
/*****************************************************/
/* addChar - a function to add nextChar to lexeme */
void addChar() {
if (lexLen <= 98) {
lexeme[lexLen ] = nextChar;
lexeme[lexLen] = 0;
} else
printf("Error - lexeme is too long \n");
}
/*****************************************************/
/* getChar - a function to get the next character of
input and determine its character class */
void getChar() {
if ((nextChar = getc(in_fp)) != EOF) {
if (isalpha(nextChar))
charClass = LETTER;
else
if (isdigit(nextChar))
charClass = DIGIT;
else
charClass = UNKNOWN;
} else
charClass = EOF;
}
/*****************************************************/
/* getNonBlank - a function to call getChar until it
returns a non-whitespace character */
void getNonBlank() {
while (isspace(nextChar))
getChar();
}
/* lex - a simple lexical analyzer for arithmetic
expressions */
int lex() {
lexLen = 0;
getNonBlank();
switch (charClass) {
/* Parse identifiers */
case LETTER:
addChar();
getChar();
while (charClass == LETTER || charClass == DIGIT) {
addChar();
getChar();
}
nextToken = IDENT;
break;
/* Parse integer literals */
case DIGIT:
addChar();
getChar();
while (charClass == DIGIT) {
addChar();
getChar();
}
nextToken = INT_LIT;
break;
/* Parentheses and operators */
case UNKNOWN:
lookup(nextChar);
getChar();
break;
/* EOF */
case EOF:
nextToken = EOF;
lexeme[0] = 'E';
lexeme[1] = 'O';
lexeme[2] = 'F';
lexeme[3] = 0;
break;
} /* End of switch */
printf("Next token is: %d, Next lexeme is %s\n",
nextToken, lexeme);
return nextToken;
} /* End of function lex */
The code is giving me:
ERROR - cannot open front.in
When the output should be:
Next token is: 25 Next lexeme is (
Next token is: 11 Next lexeme is sum
Next token is: 21 Next lexeme is
Next token is: 10 Next lexeme is 47
Next token is: 26 Next lexeme is )
Next token is: 24 Next lexeme is /
Next token is: 11 Next lexeme is total
Next token is: -1 Next lexeme is EOF
Thank you for the help, and please let me know if you need anymore details about it.
CodePudding user response:
There are multiple problems in your code:
you should not declare
fopen
yourself. Just rely on the declaration in<stdio.h>
the test
while (nextToken = EOF);
in themain
function should readdo { lex(); } while (nextToken != EOF);
nextChar
must be defined as anint
to reliably detectEOF
and have defined behavior forisdigit()
andisalpha()
.you should set
charClass = LETTER
for'_'
ingetChar()
.you should accept an optional
0x
or0X
prefix and an optional type suffix forINT_LIT
.lookup()
should not setnextToken = EOF
for unknown characters.comments should be skipped by
getNonBlank ()
.many fundamental token types are not supported such as character constants and string literals.
using global variables is very confusing. Try and use local variables or a context structure.
The output clearly indicates the file front.in is not in the current directory when you run the program.
Here is a modified version you can study:
/* front.c - a lexical analyzer system for simple
arithmetic expressions */
#include <ctype.h>
#include <stdio.h>
/* Global Variables */
int nextChar;
int charClass;
char lexeme[1000];
size_t lexLen;
FILE *in_fp;
/* Function declarations */
int getChar(void);
int lex(void);
/* Character classes */
#define LETTER 0
#define DIGIT 1
#define UNDERSCORE 2
#define UNKNOWN 99
/* Token codes */
#define INT_LIT 10
#define IDENT 11
#define CHAR_CONST 12
#define STRING_LIT 13
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26
#define INCR_OP 27
#define DECR_OP 28
#define UNKNOWN_OP 99
/******************************************************/
/* main driver */
int main(int argc, char *argv[]) {
/* Open the input data file and process its contents */
if ((in_fp = fopen("front.in", "r")) == NULL) {
printf("ERROR - cannot open front.in \n");
} else {
getChar();
for (;;) {
int nextToken = lex();
printf("Next token is: %d, Next lexeme is %s\n",
nextToken, lexeme);
if (nextToken == EOF)
break;
}
}
return 0;
}
/*****************************************************/
/* getChar - a function to get the next character of
input and determine its character class */
int getChar(void) {
if ((nextChar = getc(in_fp)) != EOF) {
if (isalpha(nextChar))
charClass = LETTER;
else
if (isdigit(nextChar))
charClass = DIGIT;
else
if (nextChar == '_')
charClass = UNDERSCORE;
else
charClass = UNKNOWN;
} else {
charClass = EOF;
}
return nextChar;
}
/*****************************************************/
/* getNonBlank - a function to call getChar until it
returns a non-whitespace character */
void getNonBlank(void) {
for (;;) {
if (isspace(nextChar)) {
/* consume all spaces */
getChar();
} else
if (nextChar == '/') {
/* check for a comment */
int c = getc(in_fp);
if (c == '/') {
/* single line comment */
while ((c = getc(in_fp)) != EOF && c != '\n')
continue;
getChar();
} else
if (c == '*') {
/* multiline comment */
int last = 0;
while ((c = getc(in_fp)) != EOF && !(last == '*' && c == '/'))
last = c;
getChar();
} else {
ungetc(c, in_fp);
return;
}
} else
return;
}
}
/*****************************************************/
/* addChar - a function to add the character to lexeme */
void addChar(int ch) {
if (lexLen < sizeof(lexeme) - 1) {
lexeme[lexLen ] = (char)ch;
lexeme[lexLen] = 0;
} else {
if (lexLen == sizeof(lexeme) - 1) {
/* output error message once per token */
printf("Error - lexeme is too long\n");
}
lexLen ;
}
}
/*****************************************************/
/* lookup - a function to lookup operators and parentheses
and return the token */
int lookup(int ch) {
switch (ch) {
case '(':
addChar(ch);
getChar();
return LEFT_PAREN;
case ')':
addChar(ch);
getChar();
return RIGHT_PAREN;
case ' ':
addChar(ch);
if (getChar() == ' ') {
addChar(ch);
getChar();
return INCR_OP;
} else {
return ADD_OP;
}
case '-':
addChar(ch);
if (getChar() == '-') {
addChar(ch);
getChar();
return DECR_OP;
} else {
return SUB_OP;
}
case '*':
addChar(ch);
getChar();
return MULT_OP;
case '/':
addChar(ch);
getChar();
return DIV_OP;
case '=':
addChar(ch);
getChar();
return ASSIGN_OP;
default:
addChar(ch);
getChar();
return UNKNOWN_OP;
}
}
/* lex - a simple lexical analyzer for arithmetic
expressions */
int lex(void) {
lexLen = 0;
getNonBlank();
switch (charClass) {
case LETTER:
case UNDERSCORE:
/* Parse identifiers */
addChar(nextChar);
getChar();
while (charClass == LETTER || charClass == DIGIT || charClass == UNDERSCORE) {
addChar(nextChar);
getChar();
}
return IDENT;
case DIGIT:
/* Parse integer literals */
addChar(nextChar);
getChar();
while (charClass == DIGIT) {
addChar(nextChar);
getChar();
}
return INT_LIT;
case UNKNOWN:
default:
if (nextChar == '"' || nextChar == '\'') {
/* Parse character constant and string literals */
int sep = nextChar;
addChar(nextChar);
for (;;) {
if (getChar() == EOF)
break;
addChar(nextChar);
if (nextChar == sep) {
getChar();
break;
} else
if (nextChar == '\\') {
if (getChar() == EOF)
break;
addChar(nextChar);
}
}
if (sep == '\'')
return CHAR_CONST;
else
return STRING_LIT;
}
/* Parentheses and operators */
return lookup(nextChar);
case EOF:
/* end of file: make lexeme <EOF> to distinguish from EOF literal */
addChar('<');
addChar('E');
addChar('O');
addChar('F');
addChar('>');
return EOF;
}
}
CodePudding user response:
At a first glance this should not be a pointer *fopen(); remove it and replace this line:
FILE *in_fp, *fopen();
with
FILE *in_fp;
Plus: call fclose(in_fp); at some point to release the pointer.