/*
 EPSHeader

   File: asmtag.c
   Author: J. Kercheval
   Created: Sun, 07/14/1991  17:25:26
*/
/*
 EPSRevision History

   J. Kercheval  Sun, 07/14/1991  20:25:59  creation
   J. Kercheval  Mon, 07/15/1991  22:47:30  finish finite state machine parser
   J. Kercheval  Wed, 07/17/1991  21:35:43  add IsMember() and get_token()
   J. Kercheval  Thu, 07/18/1991  19:57:34  add flags checking
   J. Kercheval  Sun, 07/21/1991  15:58:56  add comment block support
   J. Kercheval  Sat, 07/27/1991  21:16:53  remove public post process support
   J. Kercheval  Sat, 07/27/1991  22:50:49  performance considerations (+10%)
   J. Kercheval  Sat, 08/10/1991  17:48:28  speed up IsMember()
   J. Kercheval  Sat, 08/17/1991  22:50:29  use unique function names (ASM...)
   J. Kercheval  Sun, 08/25/1991  23:52:51  fix bug in ASMSymbolWanted()
   J. Kercheval  Thu, 10/03/1991  12:27:37  fix logic outputting local labels
*/

#include <stdlib.h>
#include <string.h>

#include "asmtag.h"
#include "tagio.h"


/*
 * The finite state machine allows the following interesting paths
 *
 *    1 - Discard, Parse1, Symbol1
 *    2 - Discard, Parse1, Parse2, Symbol2
 *    3 - Discard, Parse1, Parse2, Define
 *
 * all the important cases follow one of these paths according to MASM/TASM
 * syntax.  The exit state is for finish up routine calls and some paths not
 * covered here are simple error paths and probably result from syntax errors
 */
enum state {
    Discard, Parse1, Parse2, Symbol1, Symbol2, Define, Exit
};

typedef enum state State;


#define COMMENT_CHAR ';'

#define SYMBOL_SIZE 15


/*----------------------------------------------------------------------------
 *
 * The symbol lists represent all the symbols we are interested in either
 * obtaining or ignoring.  The order of some of these token lists is
 * important for determining if ouput should be performed.  If you want to
 * change these lists make sure that flag checking is altered to change the
 * changed order.  The first element of each of these symbol lists is a
 * string containing all the first characters within the symbol list.  This
 * allows faster rejection for IsMember() which is called often.
 *
 ---------------------------------------------------------------------------*/

/* symbols which are not significant for this parser */
char ASM_NOP_Sym[][SYMBOL_SIZE] =
{
    "cpbfnwo",                  /* list of starting characters of symbols
                                 * below */
    "c",                        /* C language declaration */
    "pascal",                   /* PASCAL language declaration */
    "basic",                    /* BASIC language declaration */
    "fortran",                  /* FORTRAN language declaration */
    "prolog",                   /* PROLOG language declaration */
    "nolanguage",               /* generic language declaration */
    "windows",                  /* WINDOWS exit and entry modifier */
    "oddnear",                  /* overlay modifier */
    "oddfar",                   /* overlay modifier */
    "normal",                   /* normal procedure entry/exit code */
    "\0"
};

/* symbols which begin a comment block */
char ASM_comment_block[][SYMBOL_SIZE] =
{
    "c",                        /* list of starting characters of symbols
                                 * below */
    "comment",                  /* begin comment block, next character is
                                 * delimiter */
    "\0"
};


/* create the function for determining if a character is a delimiter */
#define IsDelim(c) ( _ASM_delim_boolean_table[c] )

/* the indexed table for white space character lookup */
BOOLEAN _ASM_delim_boolean_table[256];

/* valid delimiters for this syntax */
char ASM_delim[] = " \t;:=.,\"()<>[]*-+/";


/* create the function for determining if a character is a whitespace */
#define IsWhite(c) ( _ASM_white_boolean_table[c] )

/* the indexed table for white space character lookup */
BOOLEAN _ASM_white_boolean_table[256];

/* whitespace characters */
char ASM_white[] = " \t\v\f";


/* symbols which both are delimiters and a special token, these are
    special tokens only when found at the the beginning of a string of
    1 or more delimiters */
char ASM_delim_Sym[] = "=:";

/* symbols which fit into the Define state and represent a tagged symbol */
/* state Define depends on the token ":" being at index 1 in this list */
char ASM_def[][SYMBOL_SIZE] =
{
    ":e=cd",                    /* list of starting characters of symbols
                                 * below */
    ":",                        /* local labels */
    "equ",                      /* equivalence */
    "=",                        /* equivalence */
    "catstr",                   /* concatenated and named strings */
    "db",                       /* named byte data definition */
    "dw",                       /* named word data definition */
    "dd",                       /* named double word data definition */
    "dp",                       /* named 6 byte far pointer data area
                                 * definition */
    "df",                       /* named 6 byte far pointer definition */
    "dq",                       /* named quad word data definition */
    "dt",                       /* named 10 byte data area */
    "\0"
};

/* symbols which fit into the Symbol state and represent a tagged symbol */
char ASM_sym[][SYMBOL_SIZE] =
{
    "pmlsu",                    /* list of starting character of symbols
                                 * below */
    "proc",                     /* procedures */
    "macro",                    /* macros */
    "label",                    /* local labels */
    "struc",                    /* structures */
    "union",                    /* unions */
    "\0"
};


/*----------------------------------------------------------------------------
 *
 * ASMParserInit() initializes the tables required by the parser The tables
 * used are a simple boolean index which are true if the character
 * corresponding to the index is a member of the associated table.
 *
 ---------------------------------------------------------------------------*/

void ASMParserInit()
{
    char *s;
    int i;

    /* init the entire block to FALSE */
    for (i = 0; i < 256; i++) {
        _ASM_delim_boolean_table[i] = FALSE;
        _ASM_white_boolean_table[i] = FALSE;
    }

    /* set the characters in the delim set to TRUE */
    for (s = ASM_delim; *s; s++) {
        _ASM_delim_boolean_table[*s] = TRUE;
    }

    /* NULL is also a delimiter */
    _ASM_delim_boolean_table['\0'] = TRUE;

    /* set the characters in the white set to TRUE */
    for (s = ASM_white; *s; s++) {
        _ASM_white_boolean_table[*s] = TRUE;
    }
}

/*----------------------------------------------------------------------------
 *
 * ASMSymbolWanted() returns true if the index into the sym token list is one
 * of the wanted symbols according to the flags list.  The indexes belong
 * with the following symbols and flags:
 *
 *          Flag       Symbol   Index
 *          ---------  -------  -----
 *          flags->af  "proc"   1
 *          flags->am  "macro"  2
 *          flags->al  "label"  3
 *          flags->as  "struc"  4
 *          flags->au  "union"  5
 *
 ---------------------------------------------------------------------------*/

BOOLEAN ASMSymbolWanted(Flags * flags, int index)
{
    /* return true if the associated flag is true */
    switch (index) {
            case 1:
            return flags->af;
            break;
        case 2:
            return flags->am;
            break;
        case 3:
            return flags->al;
            break;
        case 4:
            return flags->as;
            break;
        case 5:
            return flags->au;
            break;
        default:
            return FALSE;
            break;
    }
}


/*----------------------------------------------------------------------------
 *
 * ASMIsMember() takes the token passed and check for membership in the null
 * terminated array, tokenlist, and return TRUE if a member and FALSE
 * otherwise, index is the index into the token list of the symbol if return
 * value is TRUE
 *
 ---------------------------------------------------------------------------*/

BOOLEAN ASMIsMember(char token_list[][SYMBOL_SIZE], char *token, int *index)
{

    /* look for dirty rejection */
    if (!strchr(token_list[0], tolower(token[0])))
        return FALSE;

    /* march through array until membership is determined */
    for (*index = 1; *token_list[*index]; (*index)++) {

        /* return true if token found */
        if (!stricmp(token, token_list[*index])) {
            return TRUE;
        }
    }

    /* did not find it */
    return FALSE;
}


/*----------------------------------------------------------------------------
 *
 * ASM_get_token() will obtain the next token in the line pointed to by lptr
 * and in addition will return FALSE if EOL is reached or a comment character
 * is the first non whitespace character found
 *
 ---------------------------------------------------------------------------*/

BOOLEAN ASM_get_token(char **lptr, char *token)
{
    char *s;                    /* start location in string */
    int token_length;           /* the length of the current token */
    int dummy;                  /* a temporary variable */

    /* loop until we have a valid token or end of string */
    do {
        /* move past whitespace */
        while (IsWhite(**lptr)) {
            (*lptr)++;
        }

        /* return false if end of line */
        if (!**lptr)
            return FALSE;

        /* check if comment */
        if (**lptr == COMMENT_CHAR) {
            return FALSE;
        }

        /* check for delimiter token */
        if (strchr(ASM_delim_Sym, **lptr)) {
            token[0] = **lptr;
            token[1] = '\0';
            (*lptr)++;
        }
        else {

            /* save the beginning location */
            s = *lptr;

            /* move to the next delimiter in the line */
            while (!IsDelim(**lptr)) {
                (*lptr)++;
            }

            /* get the token */
            token_length = *lptr - s;
            strncpy(token, s, token_length);
            token[token_length] = '\0';
        }

    } while (ASMIsMember(ASM_NOP_Sym, token, &dummy));

    return TRUE;
}


/*----------------------------------------------------------------------------
 *
 * ASMtags() tags an input stream assuming input format of ASM 80x86 format
 * in MASM/TASM syntax
 *
 ---------------------------------------------------------------------------*/

#define TOKEN_LINE_LENGTH 256

void ASMTags(FILE * infile, char *infname, FILE * outfile, Flags * flags)
{
    State state;                /* the current state of the parser */

    char line[TOKEN_LINE_LENGTH];       /* the current input line */
    char cur_token[TOKEN_LINE_LENGTH];  /* the current token */
    char prev_token[TOKEN_LINE_LENGTH]; /* the previous token */

    char *lptr;                 /* pointer into line for token parser */
    char *prev_lptr;            /* pointer into line for previous token */

    long int line_number;       /* the current line in the file */
    int line_length;            /* the length of the current line */
    long int char_number;       /* the current character in the file */

    int symbol_index;           /* the index into the token list of the
                                 * symbol */

    /* init the engine */
    ASMParserInit();
    cur_token[0] = '\0';
    prev_token[0] = '\0';
    state = Discard;
    line_number = 0;
    line_length = 0;
    char_number = -1;
    lptr = prev_lptr = (char *) NULL;

    for (;;) {

        switch (state) {

            case Discard:       /* current line is not valid */

                /* if EOF then return */
                if (GetLine(infile, line, TOKEN_LINE_LENGTH)) {
                    lptr = line;

                    /* increment counters */
                    line_number++;

                    /* char_number increments by length of previous line */
                    char_number += line_length + 1;

                    /* line length */
                    line_length = strlen(line);
                    state = Parse1;
                }
                else {
                    state = Exit;
                }
                break;

            case Parse1:        /* parsing for first *special* token */

                /* get the next valid token */
                if (!ASM_get_token(&lptr, cur_token)) {

                    /* if no token left or a comment as first non white space
                     * char in remainder of line */
                    state = Discard;
                }
                else {

                    /* move the cur_token to prev_token */
                    strcpy(prev_token, cur_token);

                    /* check for membership in the tagging symbol club */
                    if (ASMIsMember(ASM_sym, cur_token, &symbol_index)) {
                        state = Symbol1;
                    }
                    else {

                        /* check if comment block */
                        if (ASMIsMember(ASM_comment_block,
                                        cur_token, &symbol_index)) {

                            /* get the next non white character, this makes
                             * the assumption that the delimiter character is
                             * on the same line as the comment symbol. If the
                             * delimiter character is not on the current line
                             * then parsing continues normally on the next
                             * line. */
                            while (IsWhite(*lptr)) {
                                lptr++;
                            }

                            if (*lptr) {

                                /* this is the delimiter character, store it
                                 * and move lptr past it */
                                *cur_token = *lptr;
                                lptr++;

                                /* move over comment block, remembering to
                                 * update line info as we go */
                                while (*lptr != *cur_token) {

                                    /* get a new line if end of line */
                                    if (!*lptr) {
                                        if (!GetLine(infile, line,
                                                     TOKEN_LINE_LENGTH)) {
                                            *cur_token = *lptr;
                                        }
                                        else {
                                            lptr = line;

                                            /* increment counters */
                                            line_number++;

                                            /* char_number increments by
                                             * length of previous line */
                                            char_number += line_length + 1;

                                            /* line length */
                                            line_length = strlen(line);
                                        }
                                    }
                                    else {
                                        lptr++;
                                    }
                                }
                            }

                            state = Discard;
                        }
                        else {

                            /* nothing special, parse the next symbol */
                            state = Parse2;
                        }
                    }
                }
                break;

            case Parse2:        /* parsing for second *special* token */

                /* save the previous position */
                prev_lptr = lptr;

                /* get the next token */
                if (!ASM_get_token(&lptr, cur_token)) {

                    /* no token left, reset machine */
                    state = Discard;
                }
                else {

                    if (ASMIsMember(ASM_sym, cur_token, &symbol_index)) {

                        /* found a major symbol */
                        state = Symbol2;
                    }
                    else {

                        if (ASMIsMember(ASM_def, cur_token, &symbol_index)) {

                            /* found a defining token */
                            state = Define;
                        }
                        else {
                            state = Discard;
                        }
                    }
                }

                break;

            case Symbol1:       /* next token, ignore if no token found */

                /* get the next symbol and output it */
                if (ASM_get_token(&lptr, cur_token)) {

                    /* output if wanted */
                    if (ASMSymbolWanted(flags, symbol_index)) {
                        OutputTag(outfile, line, cur_token, infname,
                                  line_number, char_number +
                                  abs(lptr - line) -
                                  strlen(cur_token),
                                  flags);
                    }
                }

                /* reset machine */
                state = Discard;

                break;

            case Symbol2:       /* previous token was the wanted symbol */

                /* the previous token is the symbol of interest */
                /* output if wanted */
                if (ASMSymbolWanted(flags, symbol_index)) {
                    OutputTag(outfile, line, prev_token, infname,
                              line_number, char_number +
                              abs(prev_lptr - line) -
                              strlen(prev_token),
                              flags);
                }

                /* reset machine */
                state = Discard;

                break;

            case Define:        /* previous token was the wanted symbol */

                /* the previous token is the symbol of interest */
                /* output if wanted */
                if ((flags->ad && symbol_index != 1) ||
                    (flags->al && symbol_index == 1)) {
                    OutputTag(outfile, line, prev_token, infname,
                              line_number, char_number +
                              abs(prev_lptr - line) -
                              strlen(prev_token),
                              flags);
                }

                /* reset machine */
                state = Discard;

                break;

            case Exit:          /* clean it up */
                return;
                break;

            default:            /* not reached */
                break;
        }
    }
}
