Home > Back-end >  lexical Analyzer to C
lexical Analyzer to C

Time:09-18

This is my code here, I need it to be output as lexical analysis. I should what the output should be on the bottom. I don't understand why my code is giving me this error.

/* front.c - a lexical analyzer system for simple
             arithmetic expressions */

#include <stdio.h>
#include <ctype.h>

/* Global declarations */
/* Variables */
int charClass;
char lexeme[100];
char nextChar;
int lexLen;
int token;
int nextToken;
FILE *in_fp, *fopen();
/* Function declarations */
void addChar();
void getChar();
void getNonBlank();
int lex();

/* Character classes */
#define LETTER 0
#define DIGIT 1
#define UNKNOWN 99

/* Token codes */
#define INT_LIT 10
#define IDENT 11
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26

/******************************************************/
/* main driver */
int main(int argc, char *argv[]) {

    /* Open the input data file and process its contents */
    if ((in_fp = fopen("front.in", "r")) == NULL)
        printf("ERROR - cannot open front.in \n");
    else {
        getChar();
        do {
            lex();
        } while (nextToken = EOF);
    }
}

/*****************************************************/
/* lookup - a function to lookup operators and parentheses
            and return the token */
int lookup(char ch) {
    switch (ch) {
      case '(':
        addChar();
        nextToken = LEFT_PAREN;
        break;

      case ')':
        addChar();
        nextToken = RIGHT_PAREN;
        break;

      case ' ':
        addChar();
        nextToken = ADD_OP;
        break;

      case '-':
        addChar();
        nextToken = SUB_OP;
        break; 

      case '*':
        addChar();
        nextToken = MULT_OP;
        break;

      case '/':
        addChar();
        nextToken = DIV_OP;
        break;

      default:
        addChar();
        nextToken = EOF;
        break;
    }
    return nextToken;
}

/*****************************************************/
/* addChar - a function to add nextChar to lexeme */
void addChar() {
    if (lexLen <= 98) {
        lexeme[lexLen  ] = nextChar;
        lexeme[lexLen] = 0;
    } else
        printf("Error - lexeme is too long \n");
}

/*****************************************************/
/* getChar - a function to get the next character of 
             input and determine its character class */
void getChar() {
    if ((nextChar = getc(in_fp)) != EOF) {
        if (isalpha(nextChar))
            charClass = LETTER;
        else 
        if (isdigit(nextChar))
            charClass = DIGIT;
        else
            charClass = UNKNOWN;
    } else
        charClass = EOF;
}

/*****************************************************/
/* getNonBlank - a function to call getChar until it
                 returns a non-whitespace character */
void getNonBlank() {
    while (isspace(nextChar))
        getChar();
}

/* lex - a simple lexical analyzer for arithmetic 
         expressions */
int lex() {
    lexLen = 0;
    getNonBlank();
    switch (charClass) {
        /* Parse identifiers */
      case LETTER:
        addChar();
        getChar();
        while (charClass == LETTER || charClass == DIGIT) {
            addChar();
            getChar();
        }
        nextToken = IDENT;
        break;

        /* Parse integer literals */
      case DIGIT:
        addChar();
        getChar();
        while (charClass == DIGIT) {
            addChar();
            getChar();
        }
        nextToken = INT_LIT;
        break;

        /* Parentheses and operators */
      case UNKNOWN:
        lookup(nextChar);
        getChar();
        break;

        /* EOF */
      case EOF:
        nextToken = EOF;
        lexeme[0] = 'E';
        lexeme[1] = 'O';
        lexeme[2] = 'F';
        lexeme[3] = 0;
        break;
    } /* End of switch */
    printf("Next token is: %d, Next lexeme is %s\n", 
           nextToken, lexeme);
    return nextToken;
} /* End of function lex */

The code is giving me:

ERROR - cannot open front.in

When the output should be:

Next token is: 25 Next lexeme is (
Next token is: 11 Next lexeme is sum
Next token is: 21 Next lexeme is  
Next token is: 10 Next lexeme is 47
Next token is: 26 Next lexeme is )
Next token is: 24 Next lexeme is /
Next token is: 11 Next lexeme is total
Next token is: -1 Next lexeme is EOF

Thank you for the help, and please let me know if you need anymore details about it.

CodePudding user response:

There are multiple problems in your code:

  • you should not declare fopen yourself. Just rely on the declaration in <stdio.h>

  • the test while (nextToken = EOF); in the main function should read

      do {
          lex();
      } while (nextToken != EOF);
    
  • nextChar must be defined as an int to reliably detect EOF and have defined behavior for isdigit() and isalpha().

  • you should set charClass = LETTER for '_' in getChar().

  • you should accept an optional 0x or 0X prefix and an optional type suffix for INT_LIT.

  • lookup() should not set nextToken = EOF for unknown characters.

  • comments should be skipped by getNonBlank ().

  • many fundamental token types are not supported such as character constants and string literals.

  • using global variables is very confusing. Try and use local variables or a context structure.

The output clearly indicates the file front.in is not in the current directory when you run the program.

Here is a modified version you can study:

/* front.c - a lexical analyzer system for simple
   arithmetic expressions */

#include <ctype.h>
#include <stdio.h>

/* Global Variables */
int nextChar;
int charClass;
char lexeme[1000];
size_t lexLen;
FILE *in_fp;

/* Function declarations */
int getChar(void);
int lex(void);

/* Character classes */
#define LETTER 0
#define DIGIT 1
#define UNDERSCORE 2
#define UNKNOWN 99

/* Token codes */
#define INT_LIT 10
#define IDENT 11
#define CHAR_CONST 12
#define STRING_LIT 13
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26
#define INCR_OP 27
#define DECR_OP 28
#define UNKNOWN_OP 99

/******************************************************/
/* main driver */
int main(int argc, char *argv[]) {
    /* Open the input data file and process its contents */
    if ((in_fp = fopen("front.in", "r")) == NULL) {
        printf("ERROR - cannot open front.in \n");
    } else {
        getChar();
        for (;;) {
            int nextToken = lex();
            printf("Next token is: %d, Next lexeme is %s\n",
                   nextToken, lexeme);
            if (nextToken == EOF)
                break;
        }
    }
    return 0;
}

/*****************************************************/
/* getChar - a function to get the next character of
   input and determine its character class */
int getChar(void) {
    if ((nextChar = getc(in_fp)) != EOF) {
        if (isalpha(nextChar))
            charClass = LETTER;
        else
        if (isdigit(nextChar))
            charClass = DIGIT;
        else
        if (nextChar == '_')
            charClass = UNDERSCORE;
        else
            charClass = UNKNOWN;
    } else {
        charClass = EOF;
    }
    return nextChar;
}

/*****************************************************/
/* getNonBlank - a function to call getChar until it
   returns a non-whitespace character */
void getNonBlank(void) {
    for (;;) {
        if (isspace(nextChar)) {
            /* consume all spaces */
            getChar();
        } else
        if (nextChar == '/') {
            /* check for a comment */
            int c = getc(in_fp);
            if (c == '/') {
                /* single line comment */
                while ((c = getc(in_fp)) != EOF && c != '\n')
                    continue;
                getChar();
            } else
            if (c == '*') {
                /* multiline comment */
                int last = 0;
                while ((c = getc(in_fp)) != EOF && !(last == '*' && c == '/'))
                    last = c;
                getChar();
            } else {
                ungetc(c, in_fp);
                return;
            }
        } else
            return;
    }
}

/*****************************************************/
/* addChar - a function to add the character to lexeme */
void addChar(int ch) {
    if (lexLen < sizeof(lexeme) - 1) {
        lexeme[lexLen  ] = (char)ch;
        lexeme[lexLen] = 0;
    } else {
        if (lexLen == sizeof(lexeme) - 1) {
            /* output error message once per token */
            printf("Error - lexeme is too long\n");
        }
        lexLen  ;
    }
}

/*****************************************************/
/* lookup - a function to lookup operators and parentheses
   and return the token */
int lookup(int ch) {
    switch (ch) {
    case '(':
        addChar(ch);
        getChar();
        return LEFT_PAREN;

    case ')':
        addChar(ch);
        getChar();
        return RIGHT_PAREN;

    case ' ':
        addChar(ch);
        if (getChar() == ' ') {
            addChar(ch);
            getChar();
            return INCR_OP;
        } else {
            return ADD_OP;
        }

    case '-':
        addChar(ch);
        if (getChar() == '-') {
            addChar(ch);
            getChar();
            return DECR_OP;
        } else {
            return SUB_OP;
        }

    case '*':
        addChar(ch);
        getChar();
        return MULT_OP;

    case '/':
        addChar(ch);
        getChar();
        return DIV_OP;

    case '=':
        addChar(ch);
        getChar();
        return ASSIGN_OP;

    default:
        addChar(ch);
        getChar();
        return UNKNOWN_OP;
    }
}

/* lex - a simple lexical analyzer for arithmetic
   expressions */
int lex(void) {
    lexLen = 0;
    getNonBlank();
    switch (charClass) {
    case LETTER:
    case UNDERSCORE:
        /* Parse identifiers */
        addChar(nextChar);
        getChar();
        while (charClass == LETTER || charClass == DIGIT || charClass == UNDERSCORE) {
            addChar(nextChar);
            getChar();
        }
        return IDENT;

    case DIGIT:
        /* Parse integer literals */
        addChar(nextChar);
        getChar();
        while (charClass == DIGIT) {
            addChar(nextChar);
            getChar();
        }
        return INT_LIT;

    case UNKNOWN:
    default:
        if (nextChar == '"' || nextChar == '\'') {
            /* Parse character constant and string literals */
            int sep = nextChar;
            addChar(nextChar);
            for (;;) {
                if (getChar() == EOF)
                    break;
                addChar(nextChar);
                if (nextChar == sep) {
                    getChar();
                    break;
                } else
                if (nextChar == '\\') {
                    if (getChar() == EOF)
                        break;
                    addChar(nextChar);
                }
            }
            if (sep == '\'')
                return CHAR_CONST;
            else
                return STRING_LIT;
        }

        /* Parentheses and operators */
        return lookup(nextChar);

    case EOF:
        /* end of file: make lexeme <EOF> to distinguish from EOF literal */
        addChar('<');
        addChar('E');
        addChar('O');
        addChar('F');
        addChar('>');
        return EOF;
    }
}

CodePudding user response:

At a first glance this should not be a pointer *fopen(); remove it and replace this line:

FILE *in_fp, *fopen();

with

FILE *in_fp;

Plus: call fclose(in_fp); at some point to release the pointer.

  • Related