Home > OS >  after assigning token to struct node type variable the token is null
after assigning token to struct node type variable the token is null

Time:12-12

i'm tackling this strange problem (for me at least) i'm using lex and yacc to create an AST while all the tokens work and the grammar works as well (i tested it with just prints) when i'm trying to assign a new node to a variable, just for testing i'm trying to print the node's token, every time i try it the token is just (null) for example this is part of my code:

ifelse: IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",$3,$6);printf("token is %s\n",$$->token);} 

the mknode function works this way:

node* mknode(char* token, node* left, node* right){
    node* newnode = (node*)malloc(sizeof(node));
    char* newstr = (char*)malloc(sizeof(token) 1);
    strcpy(newstr, token);
    newnode->left = left;
    newnode->right = right;
    return newnode;
}

so for this example this was the output:

token is (null)

any idea why it stays null? i'm using VMWare with Ubuntu 20.04 if it matters

The complete lex file:

%{
#include "y.tab.h"
#include <stdio.h>
#include <string.h>
%}

%%
"\"" {printf("LEX: double quote here\n");return DQUOTE;}
"\'" {printf("LEX: single quote here\n");return SQUOTE;}
"}" {printf("LEX: } here\n");return RIGHTBLOCK;}
"{" {printf("LEX: { here\n");return LEFTBLOCK;}
";" {printf("LEX: ; here\n");return SEMICOLON;}
"," {printf("LEX: comma here\n");return COMMA;}
"(" {printf("LEX: opening bracket here\n");return LEFTBRACKET;}
")" {printf("LEX: closing bracket here\n");return RIGHTBRACKET;}


&& {printf("LEX: and here\n");return AND;}
"||" {printf("LEX: || here\n");return OR;}
"=" {printf("LEX: assign here\n");return ASSIGN;}
== {printf("LEX: == here\n");return EQ;}
">" {printf("LEX: > here\n");return GT;}
">=" {printf("LEX: >= here\n");return GTEQ;}
"<" {printf("LEX: < here\n");return LT;}
"<=" {printf("LEX: <= here\n");return LTEQ;}
"-" {printf("LEX: - here\n");return SUB;}
"!" {printf("LEX: ! here\n");return NOT;}
"!=" {printf("LEX: != here\n");return NOTEQ;}
"/" {printf("LEX: div here\n");return DIV;}
" " {printf("LEX: add here\n");return ADD;}
"*" {printf("LEX: mul here\n");return MUL;}
"&" {printf("LEX: & here\n");return ADRS;}

if {printf("LEX: if here\n");return IF;}
else {printf("LEX: else here\n");return ELSE;}

do {printf("LEX: do here\n");return DO;}
while {printf("LEX: while here\n");return WHILE;}
for {printf("LEX: for here\n");return FOR;}

var {printf("LEX: var here\n");return VAR;}
return {printf("LEX: return here\n");return RETURN;}
null {printf("LEX: nullval here\n");return NULLVAL;}

void {printf("LEX: func return type here\n");return VOID;}
"int*" {printf("LEX: int* type here\n");return INTPOINT;}
"char*" {printf("LEX: char* type here\n");return CHARPOINT;}
"real*" {printf("LEX: real* type here\n");return REALPOINT;}
int {printf("LEX: int type here\n");return INT;}
real {printf("LEX: real type here\n");return REAL;}
char {printf("LEX: char type here\n");return CHAR;}
bool {printf("LEX: bool type here\n");return BOOL;}
"true"|"false" {printf("LEX: boolval here\n");return BOOLVAL;}
[0-9]  {yylval.string = yytext;printf("LEX: int val here\n");return INTVAL;}
"-"|0|[1-9][0-9] "."[0-9] |[1-9][0-9] '.'[0-9]['E'|'e'][' '|'-'][0-9]  {printf("LEX: realval here\n");yylval.string = strdup(yytext);return REALVAL;}
[a-zA-Z][0-9]*"_"[a-zA-Z]* {printf("LEX: ID here\n");yylval.string = strdup(yytext);return ID;}
[a-zA-Z] {printf("LEX: char here\n");yylval.string = strdup(yytext);return CHARVAL;}
[a-zAZ]*[0-9]*[a-zAZ] [0-9]*[a-zAZ]* {printf("LEX: string here\n");yylval.string = strdup(yytext);return STRING;}
. ;
%%

The complete yacc file:

%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <string.h>

#include "lex.yy.c"
int yyerror();
typedef struct node
{
char* token;
struct node *left;
struct node *right;
}node;

node* mknode(char* token, node* left, node* right);
void printtree(node* tree);
%}

%union
{
    struct node *node;
    char* string;
}    

%token <string> DIV ADD MUL SUB AND NOT OR RETURN ASSIGN
%token <string> EQ GT GTEQ LT LTEQ NOTEQ SEMICOLON COMMA LEFTBRACKET RIGHTBRACKET RIGHTBLOCK LEFTBLOCK
%token <string> ID CHARVAL INTVAL REALVAL BOOLVAL STRING ADRS DQUOTE SQUOTE 
%token <node> INT REAL BOOL CHAR VOID INTPOINT CHARPOINT REALPOINT VAR NULLVAL 
%token <node> IF ELSE WHILE DO FOR 

%type <string> name oper type rettype ret
%type <node> code ifelse body action args argnum math cond params block valvar

%left SEMICOLON COMMA RIGHTBRACKET RIGHTBLOCK
%right LEFTBLOCK 
%%
st: code {printf("YACC: Code done!\n");}

code: rettype name params block code {
    $$ = mknode("(FUNC",mknode($2,mknode("(ARGS",$3,NULL),mknode("(RET",mknode($1,NULL,NULL),NULL)),mknode("(BODY",$4, NULL));
    printf("YACC: func ready\n");}| {}

params: LEFTBRACKET args RIGHTBRACKET {$$ = $2;}
        

block: LEFTBLOCK code RIGHTBLOCK {$$ = $2;}| 
       LEFTBLOCK body RIGHTBLOCK {$$ = $2;}| 
       LEFTBLOCK code body RIGHTBLOCK {$$ = mknode("",$2,$3);}

ifelse: IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",$3,$6);printf("token is %s\n",$$->token);printf("YACC: if ready\n");}|
        IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK ELSE LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",$3,mknode("",$6,mknode("ELSE",$10,NULL)));
            printf("YACC: if else ready\n");}

rettype:VOID {$$ = "VOID";}|
        type {$$ = $1;}

name: STRING {$$ = $1;}|
      CHARVAL {$$ = $1;}

args: type argnum {$$ = mknode($1,mknode(" ",NULL,NULL),$2);}| 
      {$$ = mknode("",NULL,NULL);printf("YACC: args ready\n");}

argnum: name argnum {$$ = mknode($1,mknode(" ",NULL,NULL),$2);}| 
        COMMA argnum {$$ = mknode(" ",$2,NULL);}| 
        SEMICOLON args {$$ = mknode(" ",$2,NULL);}|  
        {$$ = NULL;printf("YACC: args num ready\n");}

type: INT {$$ = "INT";}|
      REAL {$$ = "REAL";}|
      CHAR {$$ = "CHAR";}|
      INTPOINT {$$ = "INT*";}|
      CHARPOINT {$$ = "CHAR*";}|
      REALPOINT {$$ = "REAL*";};

body: action body {$$ = mknode(" ",$1,$2);}|
       action {$$ = mknode(" ",$1,NULL);}| 
       ifelse  {$$ = mknode("(IF-ELSE",mknode("\n",NULL,NULL),$1);printf("YACC: block ready\n");}

action: name ASSIGN math {$$ = mknode($2,mknode($1,NULL,NULL),mknode(" ",$3,NULL));printf("YACC: action ready\n");}| 
        RETURN ret SEMICOLON {$$ = mknode("(RET",mknode($2,NULL,NULL),NULL);printf("YACC: return action ready\n");}

ret: INTVAL {$$ = $1;}| 
     SQUOTE CHARVAL SQUOTE {$$ = $2;}| 
     REALVAL {$$ = $1;}| 
     DQUOTE STRING DQUOTE  {$$ = $2;}| 
     name {$$ = $1;}| 
     ADRS name {$$ = $2;}

math: valvar oper math {$$ = mknode($2, $1,$3);}| 
      valvar SEMICOLON {$$ = mknode(" ",$1,NULL);}| 
      valvar math {$$ = mknode(" ",$1,$2);}

oper: ADD {$$ = " ";}|
      DIV {$$ = "/";}|
      SUB {$$ = "-";}|
      MUL {$$ = "*";}

cond: valvar EQ valvar {$$ = mknode($2,$1,$3);}| 
      valvar GT valvar {$$ = mknode($2,$1,$3);}| 
      valvar GTEQ valvar {$$ = mknode($2,$1,$3);}| 
      valvar LT valvar {$$ = mknode($2,$1,$3);}| 
      valvar LTEQ valvar {$$ = mknode($2,$1,$3);}| 
      valvar NOTEQ valvar {$$ = mknode($2,$1,$3);}

valvar: name {$$ = mknode($1,NULL,NULL);}|
        INTVAL {$$ = mknode($1,NULL,NULL);}
%%

int main(){
    return yyparse();
}

void printtree(node* tree){
    printf("%s\n", tree->token);
    if(tree->left)
        printtree(tree->left);
    if(tree->right)
        printtree(tree->right);
}

node* mknode(char* token, node* left, node* right){
    node* newnode = (node*)malloc(sizeof(node));
    char* newstr = (char*)malloc(strlen(token) 1);
    strcpy(newstr, token);
    newnode->left = left;
    newnode->right = right;
    return newnode;
}


int yyerror(){
    printf("language error\n");
    return 0;
}

the input test code for now is :

void foo(int x){
    if (x==5){
        return 'a';
    }
}

CodePudding user response:

Amongst many other problems, your mknode function never sets newnode->token. So it is undefined -- you're lucky that it contains a null rather than an invalid pointer that would crash.

  • Related