#include	<stdio.h>
/*
 *scan.c -	Containing the necessary definitions for the scanner
 *		for the Jason programming language (dialect 4).
 *		Last revised 7/24/97
 */

#include	<stdlib.h>
#include	<ctype.h>
#include	<string.h>

#include	"symbol.h"
#include	"scan.h"


enum tokentype	scanword(char c, int *tabindex, FILE *fp);
enum tokentype	scannum(char c, int *tabindex, FILE *fp);
enum tokentype	scanop(char c, int *tabindex);
int		firstchar(FILE *ifp);
void		ungettc(int c, FILE *fp);
int		gettc(FILE *fp);

int	linenum = 1;


/*
 * openfile() - Checks the command-line parameter count in
 *		search of a second parameter that will be the
 * 		filename.  If there is only one argument, it's the
 *		name of the program's executable file.  If there are
 *		more than two, it is an unrecoverable error - Jason
 *		does not compile multi-file programs.
 */
FILE *openfile(int argc, char *argv[], char name[])
{
	char	filename[FILENAMELENGTH];
	FILE	*ifp;

        /* Do we need to get the file name? */
	switch (argc)	{
		case 1:	printf("File name\t?");
			gets(name);
                        break;

		case 2:	strcpy(name, argv[1]);
			break;

		default:printf("Usage: Jason \n");
                	exit(1);
	}

        /*  Add the extension .jsn to the input file's name */
	strcpy(filename, name);
	strcat(filename, ".jsn");

	/* Open the file and quit with an error message
		if the file cannot be opened */
	if ((ifp = fopen(filename, "r")) == NULL)	{
		printf("Cannot open %s\n", filename);
                exit(2);
	}
	return(ifp);
}

/*
 * ungettc() -	Returns a character to the file.  Uses ungetc and will
 *		adjust line number count.
 */
void	ungettc(int c, FILE *fp)
{
	if (c == '\n')
		--linenum;
        ungetc(c, fp);
}


/*
 * gettc() -	Fetches a character from a file.  It uses getc and adjusts
 *		the line number count when necessary.	
 */
int	gettc(FILE *fp)
{
	int	c;

	if ((c = getc(fp)) == '\n')
		linenum++;
        return(tolower(c));
}

/*
 * gettoken() -	Scan out the token strings of the language and return
 *		the cooresponding token class to the parser. 
 */
enum tokentype	gettoken(FILE *ifp, int *tabindex)
{
	int	c;

        
	/*  If this is the end of the file, send the
				token that indicates this*/
	if ((c = firstchar(ifp)) == EOF)
        	return(tokeof);

	/*
	 *	If it begins with a letter, it is a word.  If
	 *	begins with a digit, it is a number.  Otherwise,
	 *	it is an error.
	 */
	if (isalpha(c))
		return(scanword(c, tabindex, ifp));
	else if (isdigit(c))	
		return(scannum(c, tabindex, ifp));
	else		
		return(scanop(c, tabindex));

}

/*
 * firstchar() -	Skips past both white space and comments until
 *			it finds the first non-white space character
 *			outside a comment.
 */			
int	firstchar(FILE *ifp)
{
	int	c, goodchar = NO;	


        while	(!goodchar)	{
		/* Skip the white space in the program */
		while ((c = gettc(ifp)) != EOF && isspace(c))
			;

		/* Is it a comment or a real first character? */
		if  (c != '{')
			goodchar = YES;
		else
                	/* Skip the comment */
			while ((c = gettc(ifp)) != EOF && c != '}')
                		;
	}

	if (c == EOF)
		return(EOF);
	else
		return(c);
}

/*
 * scanword() -	Scan until you encounter something other than a letter.
 */
enum tokentype	scanword(char c, int *tabindex, FILE *fp)
{
	char	tokenstring[TOKENSTRINGLENGTH];
	int	i = 0;


	/*
	 *	Build the string one character at a time.  It keeps
	 *	scanning until either the end of file or until it
	 *	encounters a non-letter
         */
	for (tokenstring[i++] = c;
		(c = gettc(fp)) != EOF && (isalpha(c) || isdigit(c));
				)   
		tokenstring[i++] = c;
	tokenstring[i] ='\0';

        /*  Push back the last character */
	ungettc(c, fp);

	/*
	 *	If the lexeme is already in the symbol table,
	 *	return its tokenclass.  If it isn't,            it must
	 *	be an identifier whose type we do not know yet.
	 */
	if (installname(tokenstring, tabindex))
		return(tokenclass(*tabindex));
	else	{
		setattrib(stunknown, tokidentifier, *tabindex);
		return(tokidentifier);
	}
}


/*
 * scannum() -	Scan for a number.
 */ 
enum tokentype	scannum(char c, int *tabindex, FILE *fp)
{
	int	i = 0, ival, isitreal = NO;
        float	rval;
        char	tokenstring[TOKENSTRINGLENGTH];

	/*Scan until you encounter something that cannot be
		part of a number or the end of file */
	for (tokenstring[i++] = c;  (c = gettc(fp)) != EOF && isdigit(c);
				)
		tokenstring[i++] = c;

        /* Is there a fractional part? */
	if (c == '.')	{
		isitreal = YES;
		for  (tokenstring[i++] = c;
				(c = gettc(fp)) != EOF && isdigit(c);
				)
			tokenstring[i++] = c;
	}
        /*  Put the null byte at the end to terminate the string */
	tokenstring[i] = '\0';

	/* Push back the last character */
	ungettc(c, fp);

	/* If there is no fractional part, it is an integer literal
          constant.  Otherwise, it is a real literal constant. */
	if (installname(tokenstring, tabindex))
		return(tokenclass(*tabindex));
	else if (isitreal)	{
		setattrib(stunknown, tokconstant, *tabindex);
		installdatatype(*tabindex, stliteral, dtreal);
		rval = atof(tokenstring);
		printf("*****rval is %f\n*******", rval);
                setrvalue(*tabindex, rval);
		return(tokconstant);
	}

	else	{
		setattrib(stunknown, tokconstant, *tabindex);
		installdatatype(*tabindex, stliteral, dtinteger);
		ival = atoi(tokenstring);
		printf("*****ival is %d\n*******", ival);
		setivalue(*tabindex, ival);
		return(tokconstant);
	}
}

/*
 * scanop() -	Scan for an operator, which is a single character
 *		other than a letter or number.
 */
enum tokentype	scanop(char c, int *tabindex)
{
	int	i;
	char	tokenstring[TOKENSTRINGLENGTH];

	/*  If it's not already in the symbol table, it cannot
        	be a legal operator.  */
	tokenstring[0] = c;
	tokenstring[1] = '\0';
	if (!installname(tokenstring, tabindex))	{
		fprintf(stderr, "%s is an illegal operator on line #%d\n",
				tokenstring, linenum);
                exit(3);

        }

	return(tokenclass(*tabindex));

}

[Back to the Notes Index]