/* lexical.c */ /********************************************* * lexical analyzer implementation * * Author: Douglas W. Jones, Jun. 25, 2003 * * Revised: Douglas W. Jones, Jun. 16, 2004 * \*MP1*\ * Revised: Douglas W. Jones, Jun. 30, 2004 * \*MP2*\ *********************************************/ /* version number and authorship */ /* VERSION defined by Makefile */ /* AUTHOR defined by Makefile */ #include #include #include #include #include #include "boolean.h" #include "exception.h" #include "symboltable.h" #include "objectcode.h" #define EXTERN #include "lexical.h" /********************************************* * Private data structures * *********************************************/ /* maximum length of an input line */ /* LINELEN provided by Makefile */ /* input and output files */ static FILE * infile; static FILE * outfile; static FILE * errfile; /* the text of the line being processed */ static char line[LINELEN + 1]; /* information about this line */ static int lineno; /* line number in infile, or zero if lines processed */ static char * msg; /* the first error message concerning this line */ static char * msgpos; /* the position of the error on this line */ /* key indicator of progress analyzing this line */ static char * pos; /* pointer to next un-analyzed character on line */ /********************************************* * Private Functions * *********************************************/ static scan_number( unsigned int radix ) /* scan a number in the indicated radix * given: radix, the radix of the number * *pos points to the first character of the number * lex_next.pos points appropriately for error msgs * assures: *pos points to non-digit * lex_next.val holds the value of the number */ { if ((radix < 2) || (radix > 36)) { lex_error( &lex_next, "bad radix" ); radix = 36; } lex_next.val = 0; if (!isalnum( *pos )) { lex_error( &lex_next, "digit expected" ); } while (isalnum( *pos )) { int digit; if (isdigit( *pos )) { digit = (int)*pos - (int)'0'; } else if (isupper( *pos )) { digit = 10 + (int)*pos - (int)'A'; } else /* islower( *pos ) */ { digit = 10 + (int)*pos - (int)'a'; } /* now check for all possible errors as we accumulate the number */ if (digit > radix) { lex_error( &lex_next, "bad digit in number" ); digit = 0; } if (lex_next.val > (UINT_MAX / radix)) { lex_error( &lex_next, "number way too large" ); lex_next.val = 0; } lex_next.val = lex_next.val * radix; if (lex_next.val > (UINT_MAX - digit)) { lex_error( &lex_next, "number too large" ); lex_next.val = 0; } lex_next.val = lex_next.val + digit; /* finally move on to the next digit */ lex_next.len++; pos++; } } /********************************************* * Implementation of the Interface * *********************************************/ void lex_init( FILE * in, FILE * out, FILE * err ) /* initializer given: in, the input stream from which lexemes are to be extracted out, the output stream for the listing (may be NULL) err, the output stream for error messages (may be NULL) */ { infile = in; outfile = out; errfile = err; lineno = 0; msg = 0; if (outfile != NULL) { time_t t = time( NULL ); fputs( "EAL " VERSION " by " AUTHOR "; ", outfile ); fputs( ctime( &t ), outfile ); fputs( "\n", outfile ); } } void lex_scan_line() /* initialize for scanning one more line, generate listing of previous line */ { if ((lineno > 0) && (outfile != NULL)) { /* list previous line */ fprintf( outfile, "%6d ", lineno ); object_put( outfile ); fputs( " |", outfile ); fputs( line, outfile ); putc( '\n', outfile ); if (msg != NULL) { /* report error message in listing! */ char * p; /* message begins with a ^ under the error */ fputs( " ", outfile ); object_put( outfile ); for (p = line; p <= msgpos; p++) putc( ' ', outfile ); putc( '^', outfile ); putc( '\n', outfile ); /* message concludes with the message itself */ fputs( msg, outfile ); putc( '\n', outfile ); } } if ((msg != NULL) && (errfile != NULL)) { /* report messages to user! */ fprintf( errfile, "%6d ", lineno ); fputs( msg, errfile ); putc( '\n', errfile ); } { /* read next line */ int i = 0; int c; for (;;) { /* read line a character at a time and clean it up */ c = getc( infile ); if (c == EOF) break; if (c == '\n') break; if (i >= LINELEN) continue; if (c == '\t') { /* eliminate tabs in input line */ do { line[i] = ' '; i++; } while (((i & 7) != 0) && (i < LINELEN)); } else if (c < ' ') { /* eliminate ASCII control chars in input */ line[i] = ' '; i++; } else if (c > '~') { /* eliminate 8-bit chars in input */ line[i] = ' '; i++; } else { line[i] = c; i++; } } line[i] = '\0'; if ((i == 0) && (c == EOF)) { pos = NULL; } else { pos = line; lineno++; } msg = NULL; } { /* startup scanner */ lex_scan(); lex_scan(); } } void lex_scan() /* scan for the next lexeme updates lex_this and lex_next as it advances one lexeme through the text */ { lex_this = lex_next; /* set lexeme attributes to default values */ lex_next.pos = line; lex_next.len = 0; lex_next.val = 0; if (pos == NULL) { lex_next.typ = endfile; return; } /* for blanks leading up to endline, pos will be first blank*/ /*MP2*/ lex_next.pos = pos; /*MP2*/ /* skip blanks */ while (*pos == ' ') pos++; if ((*pos == '\0') || (*pos == ';')) { lex_next.typ = endline; return; } /* process nonblank lexeme */ lex_next.pos = pos; if (isalpha( *pos )) { lex_next.typ = identifier; lex_next.val = (unsigned int)SYM_NOHASH; do { lex_next.val = (unsigned int)sym_hash( *pos, (SYM_HANDLE)lex_next.val ); lex_next.len++; pos++; } while (isalnum( *pos ) || (*pos == '_')); lex_next.val = (unsigned int)sym_find( lex_next.pos, lex_next.len, (SYM_HANDLE)lex_next.val ); return; } if (isdigit( *pos )) { lex_next.typ = number; scan_number( (unsigned int)10 ); if (*pos == '#') { lex_next.len++; pos++; scan_number( lex_next.val ); } return; } if (*pos == '#') { lex_next.typ = number; pos++; scan_number( (unsigned int) 16 ); return; } if (*pos == '\'') { /* quoted character literal */ /*MP1*/ lex_next.typ = number; /*MP1*/ pos++; /*MP1*/ if (*pos == '\0') { /*MP1*/ lex_error( &lex_next, /*MP1*/ "incomplete literal" ); /*MP1*/ lex_next.val = 0; /*MP1*/ } else { /* get the character and end quote */ /*MP1*/ lex_next.val = *pos; /*MP1*/ pos++; /*MP1*/ if (*pos != '\'') { /*MP1*/ lex_error( &lex_next, /*MP1*/ "missing endquote" ); /*MP1*/ } else { /* scan over end quote */ /*MP1*/ pos++; /*MP1*/ } /*MP1*/ } /*MP1*/ return; /*MP1*/ } /*MP1*/ lex_next.typ = punc; lex_next.len = 1; lex_next.val = (unsigned int) *pos; pos++; return; } void lex_error( struct lexeme * l, char * m ) /* report error on current line given: l, pointer to lexeme involved m, error message (null terminated string) */ { if (msg == NULL) { msg = m; msgpos = l->pos; } } BOOLEAN lex_ispunc( struct lexeme * l, char c ) /* return TRUE if lexeme is a particular punctuation mark given: l, pointer to lexeme to test c, the character representation of the mark */ { if (l->typ != punc) return FALSE; if (l->val != (unsigned int)c) return FALSE; return TRUE; }