I am implementing a compiler based on the Tiger language proposed by Andrew Appel's book "Modern Compiler Implementation in C". Link: link
I'm using the available files on the book's website, but when I run the parser, the tokens reported in the error message are wrong. For example, using this input file:
let
type rectype = {name:string, id:int}
var a:= rectype nil
in
a
end
I get the following error:
testes/teste1:1.1: syntax error, unexpected MAIOR_IGUAL
Parsing failed
However the token it should show would be "LET". I have already tested with the lexer and the tokens shown by it are correct:
Token: LET | Posicao: 1 | Valor: -
Token: TYPE | Posicao: 6 | Valor: -
Token: ID | Posicao: 11 | Valor: rectype
Token: IGUAL | Posicao: 19 | Valor: -
Token: ACHAVE | Posicao: 21 | Valor: -
Token: ID | Posicao: 22 | Valor: name
....
I've come to the conclusion that I'm doing something in the compilation, or else, my tiger.grm file (the bison input file) has something wrong, though, I do not know what. I compile the lexer and use the file "lex.yy.c" which is generated to compile with the parser afterwards. Lexer Compilation:
lextest: driver.o lex.yy.o errormsg.o util.o
cc -g -o lextest driver.o lex.yy.o errormsg.o util.o
driver.o: driver.c tokens.h errormsg.h util.h
cc -g -c driver.c
errormsg.o: errormsg.c errormsg.h util.h
cc -g -c errormsg.c
lex.yy.o: lex.yy.c tokens.h errormsg.h util.h
cc -g -c lex.yy.c
lex.yy.c: tiger.l
lex tiger.l
util.o: util.c util.h
cc -g -c util.c
clean:
rm -f a.out util.o driver.o lex.yy.o lex.yy.c errormsg.o
Compilation of the parser (using lex.yy.c generated earlier):
a.out: parsetest.o y.tab.o lex.yy.o errormsg.o util.o
cc -g parsetest.o y.tab.o lex.yy.o errormsg.o util.o
parsetest.o: parsetest.c errormsg.h util.h
cc -g -c parsetest.c
y.tab.o: y.tab.c
cc -g -c y.tab.c
y.tab.c: tiger.grm
yacc -dv tiger.grm
y.tab.h: y.tab.c
echo "y.tab.h was created at the same time as y.tab.c"
errormsg.o: errormsg.c errormsg.h util.h
cc -g -c errormsg.c
lex.yy.o: lex.yy.c y.tab.h errormsg.h util.h
cc -g -c lex.yy.c
#lex.yy.c: tiger.lex
# lex tiger.lex
util.o: util.c util.h
cc -g -c util.c
clean:
rm -f a.out util.o parsetest.o lex.yy.o errormsg.o y.tab.c y.tab.h y.tab.o
The lexer and parser are (respectively):
%{
#include <string.h>
#include "util.h"
#include "tokens.h"
#include "errormsg.h"
int charPos=1;
int yywrap(void){
charPos=1;
return 1;
}
void adjust(void){
EM_tokPos=charPos;
charPos+=yyleng;
}
%}
digit [0-9]
letter [a-zA-Z]
integer {digit}+
identifier {letter}("_"|{letter}|{digit})*
%x C_COMMENT
%%
{integer} {adjust(); yylval.ival = atoi(yytext); return INT;}
"/*" {BEGIN(C_COMMENT);}
<C_COMMENT>"*/" {BEGIN(INITIAL);}
<C_COMMENT>. { }
<INITIAL>"to" {adjust(); return TO;}
<INITIAL>"break" {adjust(); return BREAK;}
<INITIAL>"let" {adjust(); return LET;}
<INITIAL>"in" {adjust(); return IN;}
<INITIAL>"end" {adjust(); return END;}
<INITIAL>"function" {adjust(); return FUNCTION;}
<INITIAL>"var" {adjust(); return VAR;}
<INITIAL>"type" {adjust(); return TYPE;}
<INITIAL>"array" {adjust(); return ARRAY;}
<INITIAL>"if" {adjust(); return IF;}
<INITIAL>"then" {adjust(); return THEN;}
<INITIAL>"else" {adjust(); return ELSE;}
<INITIAL>"do" {adjust(); return DO;}
<INITIAL>"of" {adjust(); return OF;}
<INITIAL>"nil" {adjust(); return NIL;}
<INITIAL>"string" {adjust(); return STRING;}
<INITIAL>"," {adjust(); return VIRGULA;}
<INITIAL>":" {adjust(); return DOIS_PONTOS;}
<INITIAL>";" {adjust(); return PTO_VIRGULA;}
<INITIAL>"(" {adjust(); return APARENT;}
<INITIAL>")" {adjust(); return FPARENT;}
<INITIAL>"[" {adjust(); return ACOLCH;}
<INITIAL>"]" {adjust(); return FCOLCH;}
<INITIAL>"{" {adjust(); return ACHAVE;}
<INITIAL>"}" {adjust(); return FCHAVE;}
<INITIAL>"." {adjust(); return PONTO;}
<INITIAL>"+" {adjust(); return SOMA;}
<INITIAL>"-" {adjust(); return SUB;}
<INITIAL>"*" {adjust(); return MUL;}
<INITIAL>"/" {adjust(); return DIV;}
<INITIAL>"=" {adjust(); return IGUAL;}
<INITIAL>"<>" {adjust(); return DIF;}
<INITIAL>"<" {adjust(); return MENOR;}
<INITIAL>"<=" {adjust(); return MENOR_IGUAL;}
<INITIAL>">" {adjust(); return MAIOR;}
<INITIAL>">=" {adjust(); return MAIOR_IGUAL;}
<INITIAL>"&" {adjust(); return AND;}
<INITIAL>"|" {adjust(); return OR;}
<INITIAL>":=" {adjust(); return ATRIBUICAO;}
<INITIAL>{identifier} {adjust(); yylval.sval = String(yytext); return ID;}
<INITIAL>[0-9][a-z]* {adjust(); EM_error(EM_tokPos, "Erro: token ilegal.");}
<INITIAL>[ \t]+ {adjust(); continue;}
<INITIAL>\n {adjust(); EM_newline();}
<INITIAL>[a-z]"." {adjust(); EM_error(EM_tokPos, "Erro: token ilegal.");}
<INITIAL>. {adjust(); EM_error(EM_tokPos, "Unknown character");}
%%
Disregarding grammar, I can not test it with this error, so it probably must be all wrong. I already tested with a simple good and I saw that it does not influence the error.
%{
#include <stdio.h>
#include "util.h"
#include "errormsg.h"
int yylex(void); /* function prototype */
void yyerror(char *s){
EM_error(EM_tokPos, "%s", s);
}
%}
%union {
int pos;
int ival;
string sval;
}
%token <sval> ID STRING
%token <ival> INT
%error-verbose
%token TO BREAK LET IN END FUNCTION VAR
%token TYPE ARRAY IF THEN ELSE DO OF NIL
%token VIRGULA DOIS_PONTOS PTO_VIRGULA
%token APARENT FPARENT ACHAVE FCHAVE
%token ACOLCH FCOLCH PONTO SOMA SUB MUL
%token DIV IGUAL DIF MENOR MENOR_IGUAL
%token MAIOR MAIOR_IGUAL AND OR ATRIBUICAO
%left SOMA SUB
%left DIV MUL
%left USUB
%start program
%%
program: LET comando IN exp END
|exp
comando: comando {}
|exp {}
|ifthenelse {}
|atrib {}
|funcao {}
|record {}
exp: exp AND exp {}
|exp OR exp {}
|exp1 {}
exp1: exp2 MAIOR exp2 {}
|exp2 MAIOR_IGUAL exp2 {}
|exp2 MENOR exp2 {}
|exp2 MENOR_IGUAL exp2 {}
|exp2 IGUAL exp2 {}
|exp2 DIF exp2 {}
|exp2 {}
exp2: exp2 SOMA exp2 {}
|exp2 SUB exp2 {}
|SUB exp2 %prec USUB {}
|exp2 MUL exp2 {}
|exp2 DIV exp2 {}
|APARENT exp2 FPARENT {}
|INT {}
|ID {}
ifthenelse: IF exp THEN comando ELSE comando {}
|IF exp THEN comando {}
atrib: VAR ID ATRIBUICAO exp {}
|VAR ID DOIS_PONTOS ID ATRIBUICAO exp {}
|VAR ID ATRIBUICAO NIL {}
record: TYPE ID IGUAL STRING {}
|TYPE ID IGUAL INT {}
|TYPE ID IGUAL ACHAVE arg FCHAVE {}
arg: ID DOIS_PONTOS ID VIRGULA arg {}
|ID DOIS_PONTOS ID {}
funcao: FUNCTION ID APARENT arg FPARENT DOIS_PONTOS ID IGUAL program {}