/* lexical.c */

/*********************************************
 * lexical analyzer implementation           *
 * Author:  Douglas W. Jones, Jun.  25, 2003 *
 * Revised: Douglas W. Jones, Jun.  16, 2004 *                          \*MP1*\
 * Revised: Douglas W. Jones, Jun.  30, 2004 *                          \*MP2*\
 *********************************************/

/* version number and authorship */
/* VERSION defined by Makefile */
/* AUTHOR  defined by Makefile */

#include <stdio.h>
#include <ctype.h>
#include <setjmp.h>
#include <time.h>
#include <limits.h>
#include "boolean.h"
#include "exception.h"
#include "symboltable.h"
#include "objectcode.h"

#define EXTERN
#include "lexical.h"

/*********************************************
 * Private data structures                   *
 *********************************************/

/* maximum length of an input line */
/* LINELEN provided by Makefile */

/* input and output files */
static FILE * infile;
static FILE * outfile;
static FILE * errfile;

/* the text of the line being processed */
static char line[LINELEN + 1];

/* information about this line */
static int lineno;      /* line number in infile, or zero if lines processed */
static char * msg;      /* the first error message concerning this line */
static char * msgpos;   /* the position of the error on this line */

/* key indicator of progress analyzing this line */
static char * pos;      /* pointer to next un-analyzed character on line */

/*********************************************
 * Private Functions                         *
 *********************************************/

static scan_number( unsigned int radix )
/* scan a number in the indicated radix
 * given:   radix, the radix of the number
 *          *pos points to the first character of the number
 *          lex_next.pos points appropriately for error msgs
 * assures: *pos points to non-digit
 *          lex_next.val holds the value of the number
 */
{
	if ((radix < 2) || (radix > 36)) {
		lex_error( &lex_next, "bad radix" );
		radix = 36;
	}
	lex_next.val = 0;
	if (!isalnum( *pos )) {
		lex_error( &lex_next, "digit expected" );
	}
	while (isalnum( *pos )) {
		int digit;
		if (isdigit( *pos )) {
			digit = (int)*pos - (int)'0';
		} else if (isupper( *pos )) {
			digit = 10 + (int)*pos - (int)'A';
		} else /* islower( *pos ) */ {
			digit = 10 + (int)*pos - (int)'a';
		}

		/* now check for all possible errors
		   as we accumulate the number */
		if (digit > radix) {
			lex_error( &lex_next,
				"bad digit in number" );
			digit = 0;
		}
		if (lex_next.val > (UINT_MAX / radix)) {
			lex_error( &lex_next,
				"number way too large" );
			lex_next.val = 0;
		}
		lex_next.val = lex_next.val * radix;
		if (lex_next.val > (UINT_MAX - digit)) {
			lex_error( &lex_next,
				"number too large" );
			lex_next.val = 0;
		}
		lex_next.val = lex_next.val + digit;

		/* finally move on to the next digit */
		lex_next.len++;
		pos++;
	}
}

/*********************************************
 * Implementation of the Interface           *
 *********************************************/

void lex_init( FILE * in, FILE * out, FILE * err )
/* initializer
   given:  in, the input stream from which lexemes are to be extracted
	   out, the output stream for the listing (may be NULL)
	   err, the output stream for error messages (may be NULL)
*/
{
	infile = in;
	outfile = out;
	errfile = err;
	lineno = 0;
	msg = 0;
	if (outfile != NULL) {
		time_t t = time( NULL );
		fputs( "EAL " VERSION " by " AUTHOR "; ", outfile );
		fputs( ctime( &t ), outfile );
		fputs( "\n", outfile );
	}
}

void lex_scan_line()
/* initialize for scanning one more line, generate listing of previous line
*/
{
	if ((lineno > 0) && (outfile != NULL)) {
		/* list previous line */
		fprintf( outfile, "%6d ", lineno );
		object_put( outfile );
		fputs( " |", outfile );
		fputs( line, outfile );
		putc( '\n', outfile );
		if (msg != NULL) {
			/* report error message in listing! */
			char * p;
			/* message begins with a ^ under the error */
			fputs( "        ", outfile );
			object_put( outfile );
			for (p = line; p <= msgpos; p++) putc( ' ', outfile );
			putc( '^', outfile );
			putc( '\n', outfile );
			/* message concludes with the message itself */
			fputs( msg, outfile );
			putc( '\n', outfile );
		}
	}
	if ((msg != NULL) && (errfile != NULL)) {
		/* report messages to user! */
		fprintf( errfile, "%6d ", lineno );
		fputs( msg, errfile );
		putc( '\n', errfile );
	}
	{
		/* read next line */
		int i = 0;
		int c;
		for (;;) {
			/* read line a character at a time and clean it up */
			c = getc( infile );
			if (c == EOF) break;
			if (c == '\n') break;
			if (i >= LINELEN) continue;
			if (c == '\t') {
				/* eliminate tabs in input line */
				do {
					line[i] = ' ';
					i++;
				} while (((i & 7) != 0) && (i < LINELEN));
			} else if (c < ' ') {
				/* eliminate ASCII control chars in input */
				line[i] = ' ';
				i++;
			} else if (c > '~') {
				/* eliminate 8-bit chars in input */
				line[i] = ' ';
				i++;
			} else {
				line[i] = c;
				i++;
			}
		}
		line[i] = '\0';
		if ((i == 0) && (c == EOF)) {
			pos = NULL;
		} else {
			pos = line;
			lineno++;
		}
		msg = NULL;
	}
	{
		/* startup scanner */
		lex_scan();
		lex_scan();
	}
}

void lex_scan()
/* scan for the next lexeme
   updates lex_this and lex_next as it advances one lexeme through the text
*/
{
	lex_this = lex_next;

	/* set lexeme attributes to default values */
	lex_next.pos = line;
	lex_next.len = 0;
	lex_next.val = 0;

	if (pos == NULL) {
		lex_next.typ = endfile;
		return;
	}

	/* for blanks leading up to endline, pos will be first blank*/  /*MP2*/
	lex_next.pos = pos;                                             /*MP2*/

	/* skip blanks */
	while (*pos == ' ') pos++;

	if ((*pos == '\0')
	||  (*pos == ';')) {
		lex_next.typ = endline;
		return;
	}

	/* process nonblank lexeme */
	lex_next.pos = pos;
	if (isalpha( *pos )) {
		lex_next.typ = identifier;
		lex_next.val = (unsigned int)SYM_NOHASH;
		do {
			lex_next.val = (unsigned int)sym_hash( *pos,
				(SYM_HANDLE)lex_next.val );
			lex_next.len++;
			pos++;
		} while (isalnum( *pos ) || (*pos == '_'));
		lex_next.val = (unsigned int)sym_find(
			lex_next.pos, lex_next.len,
			(SYM_HANDLE)lex_next.val );
		return;
	}
	if (isdigit( *pos )) {
		lex_next.typ = number;
		scan_number( (unsigned int)10 );
		if (*pos == '#') {
			lex_next.len++;
			pos++;
			scan_number( lex_next.val );
		}
		return;
	}
	if (*pos == '#') {
		lex_next.typ = number;
		pos++;
		scan_number( (unsigned int) 16 );
		return;
	}
	if (*pos == '\'') { /* quoted character literal */              /*MP1*/
		lex_next.typ = number;                                  /*MP1*/
		pos++;                                                  /*MP1*/
		if (*pos == '\0') {                                     /*MP1*/
			lex_error( &lex_next,                           /*MP1*/
				"incomplete literal" );                 /*MP1*/
			lex_next.val = 0;                               /*MP1*/
		} else { /* get the character and end quote */          /*MP1*/
			lex_next.val = *pos;                            /*MP1*/
			pos++;                                          /*MP1*/
			if (*pos != '\'') {                             /*MP1*/
				lex_error( &lex_next,                   /*MP1*/
					"missing endquote" );           /*MP1*/
			} else { /* scan over end quote */              /*MP1*/
				pos++;                                  /*MP1*/
			}                                               /*MP1*/
		}                                                       /*MP1*/
		return;                                                 /*MP1*/
	}                                                               /*MP1*/
	lex_next.typ = punc;
	lex_next.len = 1;
	lex_next.val = (unsigned int) *pos;
	pos++;
	return;
}

void lex_error( struct lexeme * l, char * m )
/* report error on current line
   given:  l, pointer to lexeme involved
	   m, error message (null terminated string)
*/
{
	if (msg == NULL) {
		msg = m;
		msgpos = l->pos;
	}
}

BOOLEAN lex_ispunc( struct lexeme * l, char c )
/* return TRUE if lexeme is a particular punctuation mark
   given:  l, pointer to lexeme to test
	   c, the character representation of the mark
*/
{
	if (l->typ != punc) return FALSE;
	if (l->val != (unsigned int)c) return FALSE;
	return TRUE;
}