CSC 372 - Systems II: Compiler Construction
Dr. R. M. Siegfried
The Jason Programming Language Compiler: The LL(1) Parser
/*
* Jason0.c - A table-driven parser for the Jason programming
* language, dialect 0.
* Last revised 9/11/97
*/
#include <stdio.h>
#include "symbol.h"
#include "scan.h"
#define NUMNONTERMS 6
#define NUMPRODUCTIONS 7
#define MAXPARSESTACK 1000
/* The list of nonterminals in the Jason LL(1) grammar */
enum nontermtype {NTProgram, NTHeader, NTDeclSec, NTVarDecls,
NTMoreVarDecls, NTVarDecl, NTDataType, NTIdList,
NTMoreIdList, NTProcDecls, NTProcDecl, NTProcHeader,
NTProcDeclSec, NTParamDeclSec, NTParamDecls,
NTMoreParamDecls, NTParamDecl, NTBlock, NTStatements,
NTMoreStatements, NTStatement, NTExpression,
NTMoreExpression, NTTerm, NTMoreTerm, NTFactor,
NTCondition, NTAddOp, NTMultOp, NTRelOp, NTArglist,
NTArguments, NTMoreArguments, NTElseClause
};
/*
* The nonterminals as character strings, for
* when it is necessary or helpful to print them
* for debugging or verification purposes
*/
char *nontermstrings[] = {"NTProgram", "NTHeader", "NTDeclSec",
"NTVarDecls", "NTMoreVarDecls", "NTVarDecl",
"NTDataType", "NTIdList", "NTMoreIdList",
"NTProcDecls", "NTProcDecl", "NTProcHeader",
"NTProcDeclSec", "NTParamDeclSec", "NTParamDecls",
"NTMoreParamDecls", "NTParamDecl", "NTBlock",
"NTStatements", "NTMoreStatements", "NTStatement",
"NTExpression", "NTMoreExpression", "NTTerm",
"NTMoreTerm", "NTFactor", "NTCondition", "NTAddOp",
"NTMultOp", "NTRelOp", "NTArglist", "NTArguments",
"NTMoreArguments", "NTElseClause"};
/*
* The production table for predictive parsing.
* The nonzero entries are the production numbers for a
* particular nonterminal matched with a lookahead token
* Zero entries means that there is no such production
* and it is a parsing error.
*/
const int prodtable[][NUMTOKENS+3] = {
/*Program*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*Header*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*DeclSect*/ { 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*VarDeclSect*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*MoreVarDecls*/{ 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 5, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*VarDecl*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 7, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*DataType*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*IdList*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
0},
/*MoreIdList*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,12,11, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*ProcDecls*/ {14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*ProcDecl*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,15, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*ProcHeader*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,16, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*ProcDeclSect*/{17, 0,17, 0, 0, 0, 0, 0, 0, 0, 0,17, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*ParamDeclSec*/{19, 0,19, 0, 0, 0, 0, 0, 0, 0, 0,18, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*ParamDecls*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,20, 0, 0, 0, 0,20, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*MoreParmDcls*/{22, 0,22, 0, 0, 0, 0, 0, 0, 0,21, 0, 0, 0, 0,21, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*ParamDecl*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23, 0, 0, 0, 0,23, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*Block*/ {24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*Statements*/ { 0,25, 0, 0, 0, 0, 0, 0, 0,25, 0, 0, 0, 0,25, 0,25, 0,25,
25,25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*MoreStatmnts*/{ 0, 0, 0, 0,27,27,27,27,27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,26, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*Statement*/ { 0,34, 0, 0,58,58,58,58,58,31, 0, 0, 0, 0,28, 0,29, 0,33,
32,30, 0, 0, 0, 0, 0,58, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*Expression*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,35,
35},
/*MoreExpr.*/ { 0, 0, 0,37,37,37,37,37,37, 0, 0, 0, 0, 0, 0, 0, 0,37, 0,
0, 0, 0,36,36, 0,37,37, 0, 0,37,37,37, 0, 0, 0, 0,
0},
/*Term*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,38,
38},
/*MoreTerm*/ { 0, 0, 0,40,40,40,40,40,40, 0, 0, 0, 0, 0, 0, 0, 0,40, 0,
0, 0,39,40,40,39,40,40, 0, 0,40,40,40, 0, 0, 0, 0,
0},
/*Factor*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,41,
42},
/*Condition*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,43,
43},
/*AddOp*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0,44,45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*MultOp*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0,46, 0, 0,47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0},
/*RelOp*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,48, 0, 0, 0,50,51,49, 0, 0, 0, 0,
0},
/*ArgList*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,52, 0, 0, 0,
0},
/*Arguments*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,53,
0},
/*MoreArgs.*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,54, 0, 0, 0, 0, 0,55, 0, 0,
0},
/*ElseClause*/ { 0, 0, 0, 0,56, 0,57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0}
};
/* Twoee type of items appear in a production:
a nonterminal or a terminal */
enum termtagtype {Nonterm, Term};
struct prodarraytype {
enum termtagtype PrTermOrNonterm;
int PrParseItem;
};
/*
* The productions of the Jason language complete with
* their semantic actions. Only the right-hand sides
* of these productions appear. The epsilon-productions
* are not listed here.
*/
const struct prodarraytype prodarray[] = {
{Nonterm, NTHeader}, {Nonterm, NTDeclSec},
{Nonterm, NTBlock}, {Term, tokperiod},
{Term, tokprogram}, {Term, tokidentifier},
{Term, toksemicolon}, {Term, tokdeclare},
{Nonterm, NTVarDecls}, {Nonterm, NTProcDecls},
{Nonterm, NTVarDecl}, {Nonterm, NTMoreVarDecls},
{Nonterm, NTVarDecl}, {Nonterm, NTMoreVarDecls},
{Nonterm, NTDataType}, {Nonterm, NTIdList},
{Term, toksemicolon}, {Term, tokreal},
{Term, tokinteger}, {Term, tokidentifier},
{Nonterm, NTMoreIdList}, {Term, tokcomma},
{Term, tokidentifier}, {Nonterm, NTMoreIdList},
{Nonterm, NTProcDecl}, {Nonterm, NTProcDecls},
{Nonterm, NTProcHeader}, {Nonterm, NTProcDeclSec},
{Nonterm, NTBlock}, {Term, toksemicolon},
{Term, tokprocedure}, {Term, tokidentifier},
{Term, toksemicolon}, {Nonterm, NTParamDeclSec},
{Nonterm, NTDeclSec}, {Term, tokparameters},
{Nonterm, NTParamDecls}, {Nonterm, NTParamDecl},
{Nonterm, NTMoreParamDecls}, {Nonterm, NTParamDecl},
{Nonterm, NTMoreParamDecls}, {Nonterm, NTDataType},
{Term, tokidentifier}, {Term, toksemicolon},
{Term, tokbegin}, {Nonterm, NTStatements},
{Term, tokend}, {Nonterm, NTStatement},
{Nonterm, NTMoreStatements}, {Term, toksemicolon},
{Nonterm, NTStatement}, {Nonterm, NTMoreStatements},
{Term, tokread}, {Term, tokidentifier},
{Term, tokset}, {Term, tokidentifier},
{Term, tokequals}, {Nonterm, NTExpression},
{Term, tokwrite}, {Term, tokidentifier},
{Term, tokif}, {Nonterm, NTCondition},
{Term, tokthen}, {Nonterm, NTStatements},
{Nonterm, NTElseClause}, {Term, tokwhile},
{Nonterm, NTCondition}, {Term, tokdo},
{Nonterm, NTStatements}, {Term, tokendwhile},
{Term, tokuntil}, {Nonterm, NTCondition},
{Term, tokdo}, {Nonterm, NTStatements},
{Term, tokenduntil}, {Term, tokcall},
{Term, tokidentifier}, {Nonterm, NTArglist},
{Nonterm, NTTerm}, {Nonterm, NTMoreExpression},
{Nonterm, NTAddOp}, {Nonterm, NTTerm},
{Nonterm, NTMoreExpression}, {Nonterm, NTFactor},
{Nonterm, NTMoreTerm}, {Nonterm, NTMultOp},
{Nonterm, NTFactor}, {Nonterm, NTMoreTerm},
{Term, tokidentifier}, {Term, tokconstant},
{Nonterm, NTExpression}, {Nonterm, NTRelOp},
{Nonterm, NTExpression}, {Term, tokplus},
{Term, tokminus}, {Term, tokstar},
{Term, tokslash}, {Term, tokequals},
{Term, toknotequal}, {Term, tokgreater},
{Term, tokless}, {Term, tokopenparen},
{Nonterm, NTArguments}, {Term, tokcloseparen},
{Term, tokidentifier}, {Nonterm, NTMoreArguments},
{Term, tokcomma}, {Term, tokidentifier},
{Nonterm, NTMoreArguments}, {Term, tokelse},
{Nonterm, NTStatements}, {Term, tokendif},
{Term, tokendif}
};
struct prodindexrec {
int prstart, prlength;
};
/*
* The index to the productions array, showing the
* starting position of the production and the number of items
* on its right-hand side. The first item is a dummy to get past the
* zeroth element of the array and any other second-entries of 0
* indicate an epsilon production.
*/
const struct prodindexrec prodindex[] = {
{0, 0}, {0, 4}, {4, 3}, {7, 3}, {10, 2}, {12, 2},
{14, 0}, {14, 3}, {17, 1}, {18, 1}, {19, 2}, {21, 3},
{24, 0}, {24, 2}, {26, 0}, {26, 4}, {30, 3}, {33, 2},
{35, 2}, {37, 0}, {37, 2}, {39, 2}, {41, 0}, {41, 3},
{44, 3}, {47, 2}, {49, 3}, {52, 0}, {52, 2}, {54, 4},
{58, 2}, {60, 5}, {65, 5}, {70, 5}, {75, 3}, {78, 2},
{80, 3}, {83, 0}, {83, 2}, {85, 3}, {88, 0}, {88, 1},
{89, 1}, {90, 3}, {93, 1}, {94, 1}, {95, 1}, {96, 1},
{97, 1}, {98, 1}, {99, 1},
{100, 1}, {101, 3}, {104, 2}, {106, 3}, {109, 0},
{109, 3}, {112, 1}, {113, 0}
};
struct parsenoderec {
int level;
enum termtagtype TermOrNonterm;
int ParseItem, symtabentry;
};
/* The basic parse functions */
struct parsenoderec *getparsenode(enum termtagtype termtag, int info);
void parse(void);
void error(char message[], int linenum);
/* The declarations and functions for handling the parse stack */
typedef struct {
int top;
struct parsenoderec *parsptr[MAXPARSESTACK];
} parsestack;
parsestack pa;
struct parsenoderec *parsepop(void);
void parsepush(struct parsenoderec *x);
enum logical parseempty(void);
void initparsestack(void);
procstackitem initparseentry(int tabindex);
/* The functions which are subsidiary to the function parse */
void processnonterm(struct parsenoderec *thisnode);
void printnonterm(int i);
int matchtoken(int thistoken, int thattoken);
/* Enternal variables global to the parser */
FILE *ifp;
char tokenstring[TOKENSTRINGLENGTH];
enum tokentype thistoken;
struct parsenoderec *parsetree, *thisnode;
int currentlevel = 1, tabindex, numops;
/*
* main() - This is a temporary main function to get the parser
* up and running. So far, it opens the source code file,
* parses, pauses then dumps the symbol table.
*/
int main(int argc, char *argv[])
{
initializesymtab();
ifp = openfile(argc, argv);
thistoken = gettoken(ifp, &tabindex);
parse();
getchar();
dumpsymboltable();
/* printintcode();*/
return(0);
}
/*
* Parse() - This procedure checks the production table to
* make certain that there is a production for
* which this nonterminal can be expanded that
* begins with this token. If there isn't, this
* is a fatal syntactic error; the compiler will
* terminate execution.
*
* Then it pushes its right sentential form on
* the stack after linking them to their next
* node.
*/
void parse(void)
{
int i, lines = 0;
initparsestack();
parsetree = getparsenode(Nonterm, NTProgram);
parsepush(parsetree);
do {
/*
* Look up the production in the production table.
* If not there, terminate with an error message.
*/
thisnode = parsepop();
for (i = 0; i < thisnode -> level; i++)
printf(" ");
printf("%d ", thisnode -> level);
if (thisnode -> TermOrNonterm == Term)
printtoken(thisnode -> ParseItem);
else
printnonterm(thisnode -> ParseItem);
putchar('\n');
if (thisnode -> TermOrNonterm == Term) {
/* If its's a terminal, match it to the
lookahead and get a new lookahead token */
if (matchtoken(thistoken, thisnode -> ParseItem))
thisnode -> symtabentry = tabindex;
else error("Parsing error - token does not required"
" terminal", linenum);
thistoken = gettoken(ifp, &tabindex);
}
/* Expand the nonterminal and push the items on
the right hand side in reverse order */
else processnonterm(thisnode);
if (++lines%10 == 0)
getchar();
} while(!parseempty());
}
/*
* GetParseNode() - Get a parse node that will be pushed
* on the stack and then fill it with the
* appropriate information.
*/
struct parsenoderec *getparsenode(enum termtagtype termtag, int info)
{
struct parsenoderec *p;
p = (struct parsenoderec *)malloc(sizeof(struct parsenoderec));
p -> level = currentlevel;
p -> TermOrNonterm = termtag;
p -> ParseItem = info;
return(p);
}
/*
* ProcessNonterm() - The details of looking up the nonterminal in the
* production table and pushing items on the stack
*/
void processnonterm(struct parsenoderec *thisnode)
{
struct parsenoderec *p/*, *q*/;
int prodnum, i;
/*
* Look up the nonterminal in the production table.
* If the production number is 0, there is no production
* and it is a parse error.
*/
currentlevel = thisnode -> level +1;
if ((prodnum = prodtable[thisnode -> ParseItem][thistoken]) == 0){
printf("%d\t", thistoken);
printtoken(thistoken);
error("Nonterminal - token mismatch", linenum);
}
/*
* If there is a production, push the items
* on the right-hand onto the parse stack in
* reverse order.
*/
if (prodindex[prodnum].prlength != 0) {
i = prodindex[prodnum].prstart
+ prodindex[prodnum].prlength - 1;
p = getparsenode(prodarray[i].PrTermOrNonterm,
prodarray[i].PrParseItem);
parsepush(p);
for (i = prodindex[prodnum].prstart
+ prodindex[prodnum].prlength - 2;
i >= prodindex[prodnum].prstart; --i) {
p = getparsenode(prodarray[i].PrTermOrNonterm,
prodarray[i].PrParseItem);
parsepush(p);
}
}
}
/*
* ParsePop() - Remove the top element from the Parse Stack and
return it.
* Precondition: the stack is not empty.
*/
struct parsenoderec *parsepop(void)
{
if (parseempty()) {
printf("Parse stack underflow\n");
exit(10);
}
return(pa.parsptr[--pa.top]);
}
/*
* ParsePush() - Place the item given as an argument onto the
* top of the Parse Stack.
* Precondition: the stack is not full
*/
void parsepush(struct parsenoderec *x)
{
if (pa.top == MAXPARSESTACK) {
printf("Parse stack overflow\n");
exit(10);
}
pa.parsptr[pa.top++] = x;
}
/*
* ParseEmpty() - Returns True if the Parse stack is empty,
* False if it is not empty.
*/
enum logical parseempty()
{
return(pa.top == 0);
}
/*
* InitParseStack() - Initialize the Parse stack by setting top to zero.
*/
void initparsestack(void)
{
pa.top = 0;
}
/*
* MatchToken() - Returns TRUE of the lookahead token matches
* the expected terminal, FALSE if not.
*/
int matchtoken(int thistoken, int thattoken)
{
return(thistoken == thattoken);
}
/*
* PrintNonterm() - Given its enumeration, print the corresponding
* nonterminal.
*/
void printnonterm(int i)
{
printf("%s", nontermstrings[i]);
}
/*
* error() - A catch-all routine for compiling errors. It prints an
* error message including line number and then terminates
* execution.
*/
void error(char message[], int linenum)
{
printf("%s on line %d\n", message, linenum);
exit(4);
}
[Back to the Notes Index]