# This is a shell archive. Save it in a file, remove anything before # this line, and then unpack it by entering "sh file". Note, it may # create directories; files and directories will be owned by you and # have default permissions. # # This archive contains: # # parser.c # lexical.c # mp2.a # echo x - parser.c sed 's/^X//' >parser.c << 'END-of-parser.c' X/* parser.c */ X X/********************************************* X * parser implementation * X * Author: Douglas W. Jones, Jan. 23, 2003 * X * Revised: Douglas W. Jones, Feb. 28, 2003 * \*MP2*\ X *********************************************/ X X/* symbol table size */ X/* SYMSIZE defined by Makefile */ X X#include X#include X#include "boolean.h" X#include "exception.h" X#include "stringpool.h" X#include "symboltable.h" X#include "objectcode.h" X#include "lexical.h" X X#define EXTERN X#include "parser.h" X X/********************************************* X * Private data structures * X *********************************************/ X X/* the location counter */ Xstatic OBJECT_VALUE location; X X/* the value field of the symbol table */ Xstatic OBJECT_VALUE value_table[ SYMSIZE ]; X X/* assembly pseudoops -- NOTE: if we had a few more, we'd build a table */ Xstatic SYM_HANDLE b_handle; Xstatic SYM_HANDLE w_handle; X X/********************************************* X * Private parsing functions * X *********************************************/ X Xvoid parse_punc( char c, char * m ) X/* parse one punctuation mark and gripe with message m if not right! X given: c, the required mark X*/ X{ X if (lex_ispunc( &lex_this, c )) { X lex_scan(); X } else { X lex_error( &lex_this, m ); X } X} X XOBJECT_VALUE parse_operand() X/* parse one operand of the form X ::= | | . \*MP2*\ X ::= { (+|-|&||) } \*MP2*\ X and return its value X*/ X{ X OBJECT_VALUE value; /* new value */ X OBJECT_VALUE accumulator; /* value being accumulated */ /*MP2*/ X struct lexeme operator; /* expression operator */ /*MP2*/ X accumulator.value = (OBJECT_TYPE)0; /*MP2*/ X accumulator.defined = TRUE; /*MP2*/ X X /* here, we create a fictional previous operator */ /*MP2*/ X operator = lex_this; /*MP2*/ X operator.typ = punc; /*MP2*/ X operator.val = ' '; /*MP2*/ X X for (;;) { /*MP2*/ X if (lex_this.typ == identifier) { X value = value_table[ (SYM_HANDLE) lex_this.val ]; X X if (!value.defined) { X lex_error( &lex_this, "undefined" ); X } X X /* skip identifier */ X lex_scan(); X } else if (lex_this.typ == number) { X value.value = (OBJECT_TYPE) lex_this.val; X value.defined = TRUE; X X /* skip number */ X lex_scan(); X } else if (lex_ispunc( &lex_this, '.' )) { X value = location; X X /* skip dot */ X lex_scan(); X } else if (lex_ispunc( &lex_this, '(' )) { /*MP2*/ X lex_scan(); /* skip begin paren */ /*MP2*/ X value = parse_operand(); /*MP2*/ X parse_punc( ')', "end paren expected" ); /*MP2*/ X } else { X lex_error( &lex_this, "operand expected" ); X value.defined = FALSE; /*MP2*/ X value.value = (OBJECT_TYPE)0; /*MP2*/ X } X X /* work out type of result */ /*MP2*/ X accumulator.defined = /*MP2*/ X accumulator.defined && value.defined; /*MP2*/ X if (!accumulator.defined) /*MP2*/ X lex_error( &operator, "undefined operand" ); /*MP2*/ X X /* combine value with accumulator */ /*MP2*/ X if (operator.val == ' ') { /*MP2*/ X /* special case for fictional operator */ /*MP2*/ X accumulator = value; /*MP2*/ X } else if (operator.val == '+') { /*MP2*/ X accumulator.value += value.value; /*MP2*/ X } else if (operator.val == '-') { /*MP2*/ X accumulator.value -= value.value; /*MP2*/ X } else if (operator.val == '&') { /*MP2*/ X accumulator.value &= value.value; /*MP2*/ X } else if (operator.val == '|') { /*MP2*/ X accumulator.value |= value.value; /*MP2*/ X } /*MP2*/ X accumulator.value &= 0xFFFF; /*MP2*/ X X /* exit loop if this term not followed by operator */ /*MP2*/ X if (lex_this.typ != punc) break; /*MP2*/ X operator = lex_this; /*MP2*/ X if ((operator.val != '+') /*MP2*/ X && (operator.val != '-') /*MP2*/ X && (operator.val != '&') /*MP2*/ X && (operator.val != '|')) break; /*MP2*/ X X lex_scan(); /* skip operator */ /*MP2*/ X } /*MP2*/ X return accumulator; /*MP2*/ X} X Xvoid parse_definition() X/* parse one definition of the form X ::= ( | .) = X*/ X{ X if (lex_this.typ == identifier) { X SYM_HANDLE handle; X X /* save and scan over the identifier */ X handle = (SYM_HANDLE)lex_this.val; X lex_scan(); X X /* scan over the equals sign */ X lex_scan(); X X /* parse and save the operand value */ X value_table[ handle ] = parse_operand(); X X } else if (lex_ispunc( &lex_this, '.' )) { X /* scan over . = */ X lex_scan(); X lex_scan(); X X /* parse and use the operand value */ X location = parse_operand(); X X } else { X /* gripe, but still try to parse the line */ X lex_error( &lex_this, "identifier expected" ); X lex_scan(); X X /* scan over the equals sign */ X lex_scan(); X X /* parse and discard the operand value */ X (void)parse_operand(); X } X} X Xvoid parse_statement() X/* parse one statement of the form X ::= [ : ] [ ( B | W ) ] X*/ X{ X /* see if statement begins with a label */ X if ((lex_this.typ == identifier) && lex_ispunc( &lex_next, ':' )) { X X value_table[ lex_this.val ] = location; X X /* scan over the identifer used as a label */ X lex_scan(); X X /* scan over the colon */ X lex_scan(); X } X if (lex_this.typ == identifier) { X /* we have a symbolic opcode or pseudo-op */ X X if ((SYM_HANDLE)lex_this.val == b_handle) { X X /* scan over the B */ X lex_scan(); X X /* process operand */ X object_byte( location, parse_operand() ); X location.value = location.value + 1; X } else if ((SYM_HANDLE)lex_this.val == w_handle) { X X /* scan over the W */ X lex_scan(); X X /* process operand */ X object_word( location, parse_operand() ); X location.value = location.value + 2; X } else { X lex_error( &lex_this, "illegal opcode or pseudo-op" ); X } X } X} X Xvoid parse_line() X/* parse one line of assembly code where X ::= | X*/ X{ X if (lex_ispunc( &lex_next, '=' )) { X parse_definition(); X } else { X parse_statement(); X } X if (lex_this.typ != endline) { X lex_error( &lex_this, "comment expected" ); X X /* here, we just drop the rest of the bad line on the floor, X knowing that parse_program will call lex_scan_line */ X } X} X X X/********************************************* X * Implementation of the Interface * X *********************************************/ X Xvoid parse_init() X/* initializer X*/ X{ X /* clear out value half of symbol table */ X SYM_HANDLE i; X for (i = 0; i < SYMSIZE; i++) { X value_table[i].value = (OBJECT_TYPE)0; X value_table[i].defined = FALSE; X } X X /* predefine the opcodes */ X /* NOTE; if too many opcodes, we could make a better mechanism here */ X b_handle = sym_predefine( "B" ); X w_handle = sym_predefine( "W" ); X} X Xvoid parse_program() X/* top level of syntax-directed parser X parse one assembly language program X ::= { } X*/ X{ X location.value = 0; X location.defined = TRUE; X for (;;) { X /* parse each line of the program, catching exceptions */ X EXCEPT_CATCH( pool_full ) { X EXCEPT_CATCH( sym_full ) { X X lex_scan_line(); X if (lex_this.typ == endfile) break; X parse_line(); X X } EXCEPT_HANDLER { X X lex_error( &(lex_next), X "symbol table overflow" ); X X } EXCEPT_END; X } EXCEPT_HANDLER { X X lex_error( &(lex_next), X "string pool overflow" ); X X } EXCEPT_END; X } X} X Xvoid parse_dump(FILE * f) X/* dump the symbol table to an output stream X given: f, pointer to the stream X*/ X{ X SYM_HANDLE i; X fputs( "\n; symbol table:\n", f ); X for (i = 0; i < SYMSIZE; i++) { X if (value_table[ i ].defined) { X sym_put( i, f ); X fprintf( f, "\t=\t#%4.4X\n", X value_table[ i ].value ); X } X } X} END-of-parser.c echo x - lexical.c sed 's/^X//' >lexical.c << 'END-of-lexical.c' X/* lexical.c */ X X/********************************************* X * lexical analyzer implementation * X * Author: Douglas W. Jones, Jan. 23, 2003 * X * Revised: Douglas W. Jones, Feb. 28, 2003 * \*MP2*\ X *********************************************/ X X/* version number and authorship */ X/* VERSION defined by Makefile */ X/* AUTHOR defined by Makefile */ X X#include X#include X#include X#include X#include "boolean.h" X#include "exception.h" X#include "symboltable.h" X#include "objectcode.h" X X#define EXTERN X#include "lexical.h" X X/********************************************* X * Private data structures * X *********************************************/ X X/* maximum length of an input line */ X/* LINELEN provided by Makefile */ X X/* input and output files */ Xstatic FILE * infile; Xstatic FILE * outfile; Xstatic FILE * errfile; X X/* the text of the line being processed */ Xstatic char line[LINELEN + 1]; X X/* information about this line */ Xstatic int lineno; /* line number in infile, or zero if lines processed */ Xstatic char * msg; /* the first error message concerning this line */ Xstatic char * msgpos; /* the position of the error on this line */ X X/* key indicator of progress analyzing this line */ Xstatic char * pos; /* pointer to next un-analyzed character on line */ X X/********************************************* X * Implementation of the Interface * X *********************************************/ X Xvoid lex_init( FILE * in, FILE * out, FILE * err ) X/* initializer X given: in, the input stream from which lexemes are to be extracted X out, the output stream for the listing (may be NULL) X err, the output stream for error messages (may be NULL) X*/ X{ X infile = in; X outfile = out; X errfile = err; X lineno = 0; X msg = 0; X if (outfile != NULL) { X time_t t = time( NULL ); X fputs( "EAL " VERSION " by " AUTHOR "; ", outfile ); X fputs( ctime( &t ), outfile ); X fputs( "\n", outfile ); X } X} X Xvoid lex_scan_line() X/* initialize for scanning one more line, generate listing of previous line X*/ X{ X if ((lineno > 0) && (outfile != NULL)) { X /* list previous line */ X fprintf( outfile, "%6d ", lineno ); X object_put( outfile ); X fputs( " |", outfile ); X fputs( line, outfile ); X putc( '\n', outfile ); X if (msg != NULL) { X /* report error message in listing! */ X char * p; X /* message begins with a ^ under the error */ X fputs( " ", outfile ); X object_put( outfile ); X for (p = line; p <= msgpos; p++) putc( ' ', outfile ); X putc( '^', outfile ); X putc( '\n', outfile ); X /* message concludes with the message itself */ X fputs( msg, outfile ); X putc( '\n', outfile ); X } X } X if ((msg != NULL) && (errfile != NULL)) { X /* report messages to user! */ X fprintf( errfile, "%6d ", lineno ); X fputs( msg, errfile ); X putc( '\n', errfile ); X } X { X /* read next line */ X int i = 0; X int c; X for (;;) { X /* read line a character at a time and clean it up */ X c = getc( infile ); X if (c == EOF) break; X if (c == '\n') break; X if (i >= LINELEN) continue; X if (c == '\t') { X /* eliminate tabs in input line */ X do { X line[i] = ' '; X i++; X } while (((i & 7) != 0) && (i < LINELEN)); X } else if (c < ' ') { X /* eliminate ASCII control chars in input */ X line[i] = ' '; X i++; X } else if (c > '~') { X /* eliminate 8-bit chars in input */ X line[i] = ' '; X i++; X } else { X line[i] = c; X i++; X } X } X line[i] = '\0'; X if ((i == 0) && (c == EOF)) { X pos = NULL; X } else { X pos = line; X lineno++; X } X msg = NULL; X } X { X /* startup scanner */ X lex_scan(); X lex_scan(); X } X} X Xvoid lex_scan() X/* scan for the next lexeme X updates lex_this and lex_next as it advances one lexeme through the text X*/ X{ X lex_this = lex_next; X X /* set lexeme attributes to default values */ X lex_next.pos = line; X lex_next.len = 0; X lex_next.val = 0; X X if (pos == NULL) { X lex_next.typ = endfile; X return; X } X X /* skip blanks */ X while (*pos == ' ') pos++; X X if ((*pos == '\0') X || (*pos == ';')) { X lex_next.typ = endline; X return; X } X X /* process nonblank lexeme */ X lex_next.pos = pos; X if (isalpha( *pos )) { X lex_next.typ = identifier; X lex_next.val = (unsigned int)SYM_NOHASH; X do { X lex_next.val = (unsigned int)sym_hash( *pos, X (SYM_HANDLE)lex_next.val ); X lex_next.len++; X pos++; X } while (isalnum( *pos ) || (*pos == '_')); /*MP2*/ X lex_next.val = (unsigned int)sym_find( X lex_next.pos, lex_next.len, X (SYM_HANDLE)lex_next.val ); X return; X } X if (isdigit( *pos )) { X lex_next.typ = number; X do { X lex_next.val = lex_next.val * 10 X + (unsigned int)((int)*pos - (int)'0'); X lex_next.len++; X pos++; X } while (isdigit( *pos )); X return; X } X if (*pos == '#') { X lex_next.typ = number; X pos++; X while (isxdigit( *pos )) { X lex_next.val = lex_next.val << 4; X if (isdigit( *pos )) { X lex_next.val = lex_next.val X + (unsigned int)((int)*pos - (int)'0'); X } else if (isupper( *pos )) { X lex_next.val = lex_next.val X + (unsigned int)(10+(int)*pos-(int)'A'); X } else { /* islower( *pos ) */ X lex_next.val = lex_next.val X + (unsigned int)(10+(int)*pos-(int)'a'); X } X lex_next.len++; X pos++; X } X return; X } X lex_next.typ = punc; X lex_next.len = 1; X lex_next.val = (unsigned int) *pos; X pos++; X return; X} X Xvoid lex_error( struct lexeme * l, char * m ) X/* report error on current line X given: l, pointer to lexeme involved X m, error message (null terminated string) X*/ X{ X if (msg == NULL) { X msg = m; X msgpos = l->pos; X } X} X XBOOLEAN lex_ispunc( struct lexeme * l, char c ) X/* return TRUE if lexeme is a particular punctuation mark X given: l, pointer to lexeme to test X c, the character representation of the mark X*/ X{ X if (l->typ != punc) return FALSE; X if (l->val != (unsigned int)c) return FALSE; X return TRUE; X} END-of-lexical.c echo x - mp2.a sed 's/^X//' >mp2.a << 'END-of-mp2.a' X; mp2.a X; test data for MP2 X X; verify that underline works in identifiers XA_B = 1 X W A_B X X; demonstrate parenthesized expression parsing and evaluation X W 1 X W 1+1 X W (1+1) + 1 X W (1) + ((1)+1) + 1 X X; demonstrate other operators X W (1) - (2-3) - 4 ; should be -2 or FFFE X W (4) & (6&7) & 12 ; should be 4 X W (1) | (2|4) | 8 ; should be 15 or 000F X W 1 + 2 - 3 | 4 & 5 ; should be 4 X X; demonstrate that it works in other contexts XA_B = 1 + 2 = 3 | 4 & 5 X W A_B X X; demonstrate what happens to errors X B (1 ; missing paren X B 1) ; extra paren X B 1 + ; missing operand X B 1 + QQ ; undefined operand END-of-mp2.a exit