Parsning: Hvor er vi Mellem- kode Kald til Runtimesys. Mellemk.- optimering Kode- generering Kode- optimering Leksikalsk- analyse Syntax- analyse Semantik- analyse if viggo == 3 then.... If-token Ident-token IdentX(type=Int) Gen(Comp, *Viggo,3) Gen(jump if not, adress) Compiler Fortolker Front end Back end
Top-down parsing A A B xxy Top-down Botton-up LL(1): Fx. Predictive Recursive Descent. Top-down LR(1): Fx. YACC Bottom-up (1): Kan altid afgøre hvilke vej vha. næste token.
LL(1): Predictive Recursive Descent. En procedure pr. non-terminal. Plus: Effektiv. Simpel at skrive i hånden. Minus: Ikke venstre rekursiv. Skal kunne bestemme vej udfra næste token.
LL(1)-Parser program -> ’program’ ’(’ idT ’)’ varlist ’begin’ stlist ’end’ varlist -> type identlist ’;’ type -> ’float’ | ’int’ identlist -> idT identelmlist identelmlist -> ’,’ idT identelmlist | e stlist -> statement stlist | e statement -> idT ’=’ exp ’;’ exp -> idT bexp | ’(’ exp ’)’ bexp bexp-> ’+’ exp bexp | ’-’ exp bexp | e
rpdlex.l %{ #include #include "symtab.h" #include "rpd.h" %} % begin {return beginT;} end {return endT;} int {return intT;} float {return floatT;} program {return programT;} [a-zA-Z][a-zA-Z0-9]* {if ((yylval.symptr= lookup_sym(yytext)) == NULL) yylval.symptr= insert_sym(yytext); return idT;} [ \t\n] ;. {return yytext[0];} %
rpd.h #define intT 256 #define floatT 257 #define idT 258 #define beginT 259 #define endT 260 #define realT 261 #define programT 262 void init_sym(); void program (); void varlist(); void type();... typedef union { char *string; double dval; struct symnote *symptr; } YYSTYPE; YYSTYPE yylval;
#include #include "rpd.h" #include "symtab.h" int nextT; void checkfor(int token) { if (token != nextT) { printf("Syntax Error \n "); exit(0); } else nextT = yylex(); }
LL(1)-Parser program -> ’program’ ’(’ idT ’)’ varlist ’begin’ stlist ’end’ varlist -> type identlist ’;’ type -> ’float’ | ’int’ identlist -> idT identelmlist identelmlist -> ’,’ idT identelmlist | e stlist -> statement stlist | e statement -> idT ’=’ exp ’;’ exp -> idT bexp | ’(’ exp ’)’ bexp bexp-> ’+’ exp bexp | ’-’ exp bexp | e
rpd.c void program() { checkfor(programT); checkfor('('); checkfor(idT); checkfor(')'); varlist(); checkfor(beginT); stlist(); checkfor(endT); printf("Compilering faerdig.Ingen fejl\n "); }
LL(1)-Parser program -> ’program’ ’(’ idT ’)’ varlist ’begin’ stlist ’end’ varlist -> type identlist ’;’ type -> ’float’ | ’int’ identlist -> idT identelmlist identelmlist -> ’,’ idT identelmlist | e stlist -> statement stlist | e statement ->idT ’=’ exp ’;’ exp ->idT bexp | ’(’ exp ’)’ bexp bexp->’+’ exp bexp | ’-’ exp bexp | e
rpd.c void type() { if (nextT == floatT) { nextT= yylex(); Ctype= floatT; } else { if (nextT == intT) { nextT= yylex(); Ctype= intT; } else { printf("Syntax Error 'int' eller 'float' type forvendtet! \n "); exit(0); }
Ifst -> ’if’ exp ’then’ statement | ’if’ exp ’then’ statement ’else’ statement Ifst -> ’if’ exp ’then’ statement ifend Ifend -> | ’else’ statement A -> x | x | x | Switch (nextT) { case (x1) :.... break; case (x2) :.... break; case (x3) :.... break;. default : ; }
LL(1)-Parser program -> ’program’ ’(’ idT ’)’ varlist ’begin’ stlist ’end’ varlist -> type identlist ’;’ type -> ’float’ | ’int’ identlist -> idT identelmlist identelmlist -> ’,’ idT identelmlist | e stlist -> statement stlist | e statement ->idT ’=’ exp ’;’ exp ->idT bexp | ’(’ exp ’)’ bexp bexp->’+’ exp bexp | ’-’ exp bexp | e
rpd.c void identelmlist() { if (nextT == ',') { nextT= yylex(); checkfor(idT); identelmlist(); }
rpd.c int main() { init_sym(); nextT = yylex(); program(); }
Venstre rekursiv statementlist -> statementlist statement | void statementlist() { if (nextT == idT) { statementlist(); statement(); }
LL(1)-Parser statement-> idT '=' exp ';' exp-> term expB expB-> termopr term expB | e termopr-> '+' | '-' term-> factor termB termB-> factoropr factor termB | e factoropr-> '*' | '/' factor-> uopr exp | '(' exp ')' | idT uopr-> '-' | e
EBNF statement-> IdT '=' exp ';' exp-> term (('+' | '-') term)* term-> sexp (('*' | '/') sexp)* sexp-> '-' element | element element-> '(' exp ')' | IdT A=(b + (c - c)) + a * d; statement IdT = exp ; term + term sexp element ( exp ) term + term sexp element IdT sexp element ( exp ) term - term sexp * sexp element IdT
EBNF exp-> term (('+' | '-') term)* void exp() { term(); while ((nextT == ’+’)||(next == ’-’)) { nextT= yylex(); term(); }
Botton-up parsning A A B xxy Top-down Botton-up LALR(1): Fx. YACC Bottom-up
Struktur lexyacc yylex()yyparse() Fo.lFo.y Source kodeTarget kode
Makefile LEX= flex YACC= bison LIBS= -L/Programmer/GnuWin32/lib -ly -lfl -lm CC= gcc PROG= Fo $(PROG): $(PROG).tab.o lex.yy.o $(CC) -o $(PROG).tab.o lex.yy.o $(LIBS) lex.yy.o: $(PROG).tab.h $(PROG).tab.c: $(PROG).y $(YACC) -d -v $(PROG).y lex.yy.c: $(PROG).l $(LEX) $(PROG).l
Struktur af Fo.y Erklæringer % Grammatiske regler % C-kode Erklæringer: %{ #include "symtab.h" #include... %} %token name1 name2...
Grammatiske regler. BNF formalisme. Både Højre og venstre rekursioner er tilladt. Ikke EBNF. Dvs ikke (..)*, (..)+ osv. Mulighed for at at styre precedence. Dvs: exp-> exp + exp | exp * exp |..... Er OK.
Regler Type 2 produktion {C kode der udføres når hele højresiden er fundet.}; Ifst: IFT ’(’ logikvalue ’)’ statementlist {CodeGen(Ifst,$3,$5);} $$ : Attributten til venstre siden af produktionen $n : Attributten til de n’te symbol på højre siden. Typen af attributten angives i definitionen af tokens. Default er typen integer.
Simpel heltalsregner regn.y %token TAL % regner: exp {printf("= %d\n",$1);}; exp: exp '+' TAL {$$ = $1 + $3;} | TAL {$$ = $1;} | error ’+’ {printf("bad syntaks ppm");}; % main() { yyparse(); }
Simpel heltals regner regn.l %{ #include "y.tab.h" extern int yylval; %} % [0-9]+ {yylval = atoi(yytext); return TAL;} [ \t] ; \n return 0;. return yytext[0]; %
statement_list: statement '\n' | statement_list statement '\n' ; statement: VARIABEL '=' expression {$1->value = $3;} | expression {printf("= %e\n",$1);}; expression: expression '+' expression {$$ = $1 + $3;} | expression '-' expression {$$ = $1 - $3;} | expression '*' expression {$$ = $1 * $3;} | expression '/' expression {if ($3 == 0.0) yyerror("divide dy zero"); else $$ = $1 / $3;} | '-' expression %prec UMINUS {$$ = - $2;} | '(' expression ')' {$$= $2;} | TAL {$$ = $1;} | VARIABEL {$$ = $1->value;}; %
%{ #include #include "symtab.h" #include %} %union { char *string; double dval; struct symnote *symptr; } %token VARIABEL %token TAL %token LOG EXP SQRT %left '-' '+' %left '*' '/' %right UMINUS %type expression
typedef union { char *string; double dval; struct symnote *symptr; } YYSTYPE; #define VARIABEL 257 #define TAL 258 #define LOG 259 #define EXP 260 #define SQRT 261 #define UMINUS 262 extern YYSTYPE yylval; regn.tab.h
%{ #include #include "symtab.h" #include "regn.tab.h" %} realtal ([0-9]+|([0-9]*\.[0-9]+))([eE][-+]?[0-9]+)? variabelnavn [a-zA-Z][a-zA-Z0-9]* % {realtal} {yylval.dval = atof(yytext); return TAL;} {variabelnavn} {if ((yylval.symptr= lookup_sym(yytext)) == NULL) yylval.symptr= insert_sym(yytext); return VARIABEL;} [ \t] ; '$' {return 0;} \n|. {return yytext[0];} %
main() { init_sym(); yyparse(); }