/*
 * ejlex.c -- Ejscript(TM) Lexical Analyser
 *
 * Copyright (c) GoAhead Software Inc., 1995-2000. All Rights Reserved.
 *
 * See the file "license.txt" for usage and redistribution license requirements
 */

/******************************** Description *********************************/

/*
 *	Ejscript lexical analyser. This implementes a lexical analyser for a 
 *	a subset of the JavaScript language.
 */

/********************************** Includes **********************************/

#include	"ejIntrn.h"

#if UEMF
	#include "uemf.h"
#else
	#include "basic/basicInternal.h"
#endif

/********************************** Defines ***********************************/
#define		OCTAL	8
#define		HEX		16
/****************************** Forward Declarations **************************/

static int 		getLexicalToken(ej_t* ep, int state);
static int 		tokenAddChar(ej_t *ep, int c);
static int 		inputGetc(ej_t* ep);
static void		inputPutback(ej_t* ep, int c);
static int		charConvert(ej_t* ep, int base, int maxDig);

/************************************* Code ***********************************/
/*
 *	Setup the lexical analyser
 */

int ejLexOpen(ej_t* ep)
{
	return 0;
}

/******************************************************************************/
/*
 *	Close the lexicial analyser
 */

void ejLexClose(ej_t* ep)
{
}

/******************************************************************************/
/*
 *	Open a new input script
 */

int ejLexOpenScript(ej_t* ep, char_t *script)
{
	ejinput_t	*ip;

	a_assert(ep);
	a_assert(script);

	if ((ep->input = balloc(B_L, sizeof(ejinput_t))) == NULL) {
		return -1;
	}
	ip = ep->input;
	memset(ip, 0, sizeof(*ip));

	a_assert(ip);
	a_assert(ip->putBackToken == NULL);
	a_assert(ip->putBackTokenId == 0);

/*
 *	Create the parse token buffer and script buffer
 */
	if (ringqOpen(&ip->tokbuf, EJ_INC, -1) < 0) {
		return -1;
	}
	if (ringqOpen(&ip->script, EJ_SCRIPT_INC, -1) < 0) {
		return -1;
	}
/*
 *	Put the Ejscript into a ring queue for easy parsing
 */
	ringqPutStr(&ip->script, script);

	ip->lineNumber = 1;
	ip->lineLength = 0;
	ip->lineColumn = 0;
	ip->line = NULL;

	return 0;
}

/******************************************************************************/
/*
 *	Close the input script
 */

void ejLexCloseScript(ej_t* ep)
{
	ejinput_t	*ip;

	a_assert(ep);

	ip = ep->input;
	a_assert(ip);

	if (ip->putBackToken) {
		bfree(B_L, ip->putBackToken);
		ip->putBackToken = NULL;
	}
	ip->putBackTokenId = 0;

	if (ip->line) {
		bfree(B_L, ip->line);
		ip->line = NULL;
	}

	ringqClose(&ip->tokbuf);
	ringqClose(&ip->script);

	bfree(B_L, ip);
}

/******************************************************************************/
/*
 *	Save the input state
 */

void ejLexSaveInputState(ej_t* ep, ejinput_t* state)
{
	ejinput_t	*ip;

	a_assert(ep);

	ip = ep->input;
	a_assert(ip);

	*state = *ip;
	if (ip->putBackToken) {
		state->putBackToken = bstrdup(B_L, ip->putBackToken);
	}
}

/******************************************************************************/
/*
 *	Restore the input state
 */

void ejLexRestoreInputState(ej_t* ep, ejinput_t* state)
{
	ejinput_t	*ip;

	a_assert(ep);

	ip = ep->input;
	a_assert(ip);

	ip->tokbuf = state->tokbuf;
	ip->script = state->script;
	ip->putBackTokenId = state->putBackTokenId;
	if (ip->putBackToken) {
		bfree(B_L, ip->putBackToken);
	}
	if (state->putBackToken) {
		ip->putBackToken = bstrdup(B_L, state->putBackToken);
	}
}

/******************************************************************************/
/*
 *	Free a saved input state
 */

void ejLexFreeInputState(ej_t* ep, ejinput_t* state)
{
	if (state->putBackToken) {
		bfree(B_L, state->putBackToken);
		state->putBackToken = NULL;
	}
}

/******************************************************************************/
/*
 *	Get the next Ejscript token
 */

int ejLexGetToken(ej_t* ep, int state)
{
	ep->tid = getLexicalToken(ep, state);
	trace(9, T("ejGetToken: %d, \"%s\"\n"), ep->tid, ep->token);
	return ep->tid;
}

/******************************************************************************/
/*
 *	Get the next Ejscript token
 */

static int getLexicalToken(ej_t* ep, int state)
{
	ringq_t		*inq, *tokq;
	ejinput_t*	ip;
	int			done, tid, c, quote, style;

	a_assert(ep);
	ip = ep->input;
	a_assert(ip);

	inq = &ip->script;
	tokq = &ip->tokbuf;

	ep->tid = -1;
	tid = -1;
	ep->token = T("");

	ringqFlush(tokq);

	if (ip->putBackTokenId > 0) {
		ringqPutStr(tokq, ip->putBackToken);
		tid = ip->putBackTokenId;
		ip->putBackTokenId = 0;
		ep->token = (char_t*) tokq->servp;
		return tid;
	}

	if ((c = inputGetc(ep)) < 0) {
		return TOK_EOF;
	}

	for (done = 0; !done; ) {
		switch (c) {
		case -1:
			return TOK_EOF;

		case ' ':
		case '\t':
		case '\r':
			do {
				if ((c = inputGetc(ep)) < 0)
					break;
			} while (c == ' ' || c == '\t' || c == '\r');
			break;

		case '\n':
			return TOK_NEWLINE;

		case '(':
			tokenAddChar(ep, c);
			return TOK_LPAREN;

		case ')':
			tokenAddChar(ep, c);
			return TOK_RPAREN;

		case '{':
			tokenAddChar(ep, c);
			return TOK_LBRACE;

		case '}':
			tokenAddChar(ep, c);
			return TOK_RBRACE;

		case '+':
			if ((c = inputGetc(ep)) < 0) {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			if (c != '+' ) {
				inputPutback(ep, c);
				tokenAddChar(ep, EXPR_PLUS);
				return TOK_EXPR;
			}
			tokenAddChar(ep, EXPR_INC);
			return TOK_INC_DEC;

		case '-':
			if ((c = inputGetc(ep)) < 0) {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			if (c != '-' ) {
				inputPutback(ep, c);
				tokenAddChar(ep, EXPR_MINUS);
				return TOK_EXPR;
			}
			tokenAddChar(ep, EXPR_DEC);
			return TOK_INC_DEC;

		case '*':
			tokenAddChar(ep, EXPR_MUL);
			return TOK_EXPR;

		case '%':
			tokenAddChar(ep, EXPR_MOD);
			return TOK_EXPR;

		case '/':
/*
 *			Handle the division operator and comments
 */
			if ((c = inputGetc(ep)) < 0) {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			if (c != '*' && c != '/') {
				inputPutback(ep, c);
				tokenAddChar(ep, EXPR_DIV);
				return TOK_EXPR;
			}
			style = c;
/*
 *			Eat comments. Both C and C++ comment styles are supported.
 */
			while (1) {
				if ((c = inputGetc(ep)) < 0) {
					ejError(ep, T("Syntax Error"));
					return TOK_ERR;
				}
				if (c == '\n' && style == '/') {
					break;
				} else if (c == '*') {
					c = inputGetc(ep);
					if (style == '/') {
						if (c == '\n') {
							break;
						}
					} else {
						if (c == '/') {
							break;
						}
					}
				}
			}
/*
 *			Continue looking for a token, so get the next character
 */
			if ((c = inputGetc(ep)) < 0) {
				return TOK_EOF;
			}
			break;

		case '<':									/* < and <= */
			if ((c = inputGetc(ep)) < 0) {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			if (c == '<') {
				tokenAddChar(ep, EXPR_LSHIFT);
				return TOK_EXPR;
			} else if (c == '=') {
				tokenAddChar(ep, EXPR_LESSEQ);
				return TOK_EXPR;
			}
			tokenAddChar(ep, EXPR_LESS);
			inputPutback(ep, c);
			return TOK_EXPR;

		case '>':									/* > and >= */
			if ((c = inputGetc(ep)) < 0) {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			if (c == '>') {
				tokenAddChar(ep, EXPR_RSHIFT);
				return TOK_EXPR;
			} else if (c == '=') {
				tokenAddChar(ep, EXPR_GREATEREQ);
				return TOK_EXPR;
			}
			tokenAddChar(ep, EXPR_GREATER);
			inputPutback(ep, c);
			return TOK_EXPR;

		case '=':									/* "==" */
			if ((c = inputGetc(ep)) < 0) {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			if (c == '=') {
				tokenAddChar(ep, EXPR_EQ);
				return TOK_EXPR;
			}
			inputPutback(ep, c);
			return TOK_ASSIGNMENT;

		case '!':									/* "!=" or "!"*/
			if ((c = inputGetc(ep)) < 0) {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			if (c == '=') {
				tokenAddChar(ep, EXPR_NOTEQ);
				return TOK_EXPR;
			}
			inputPutback(ep, c);
			tokenAddChar(ep, EXPR_BOOL_COMP);
			return TOK_EXPR;

		case ';':
			tokenAddChar(ep, c);
			return TOK_SEMI;

		case ',':
			tokenAddChar(ep, c);
			return TOK_COMMA;

		case '|':									/* "||" */
			if ((c = inputGetc(ep)) < 0 || c != '|') {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			tokenAddChar(ep, COND_OR);
			return TOK_LOGICAL;

		case '&':									/* "&&" */
			if ((c = inputGetc(ep)) < 0 || c != '&') {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}
			tokenAddChar(ep, COND_AND);
			return TOK_LOGICAL;

		case '\"':									/* String quote */
		case '\'':
			quote = c;
			if ((c = inputGetc(ep)) < 0) {
				ejError(ep, T("Syntax Error"));
				return TOK_ERR;
			}

			while (c != quote) {
/*
 *				check for escape sequence characters
 */
				if (c == '\\') {
					c = inputGetc(ep);

					if (gisdigit(c)) {
/*
 *						octal support, \101 maps to 65 = 'A'. put first char
 *						back so converter will work properly.
 */
						inputPutback(ep, c);
						c = charConvert(ep, OCTAL, 3);

					} else {
						switch (c) {
						case 'n':
							c = '\n'; break;
						case 'b':
							c = '\b'; break;
						case 'f':
							c = '\f'; break;
						case 'r':
							c = '\r'; break;
						case 't':
							c = '\t'; break;
						case 'x':
/*
 *							hex support, \x41 maps to 65 = 'A'
 */
							c = charConvert(ep, HEX, 2);
							break;
						case 'u':
/*
 *							unicode support, \x0401 maps to 65 = 'A'
 */
							c = charConvert(ep, HEX, 2);
							c = c*16 + charConvert(ep, HEX, 2);

							break;
						case '\'':
						case '\"':
						case '\\':
							break;
						default:
							ejError(ep, T("Invalid Escape Sequence"));
							return TOK_ERR;
						}
					}
					if (tokenAddChar(ep, c) < 0) {
						return TOK_ERR;
					}
				} else {
					if (tokenAddChar(ep, c) < 0) {
						return TOK_ERR;
					}
				}
				if ((c = inputGetc(ep)) < 0) {
					ejError(ep, T("Unmatched Quote"));
					return TOK_ERR;
				}
			}
			return TOK_LITERAL;

		case '0': case '1': case '2': case '3': case '4': 
		case '5': case '6': case '7': case '8': case '9':
			do {
				if (tokenAddChar(ep, c) < 0) {
					return TOK_ERR;
				}
				if ((c = inputGetc(ep)) < 0)
					break;
			} while (gisdigit(c));
			inputPutback(ep, c);
			return TOK_LITERAL;

		default:
/*
 *			Identifiers or a function names
 */
			while (1) {
				if (c == '\\') {
/*
 *					just ignore any \ characters.
 */
				} else if (tokenAddChar(ep, c) < 0) {
						break;
				}
				if ((c = inputGetc(ep)) < 0) {
					break;
				}
				if (!gisalnum(c) && c != '$' && c != '_' &&
					c != '\\') {
					break;
				}
			}
			if (! gisalpha(*tokq->servp) && *tokq->servp != '$' && 
					*tokq->servp != '_') {
				ejError(ep, T("Invalid identifier %s"), tokq->servp);
				return TOK_ERR;
			}
/*
 *			Check for reserved words (only "if", "else", "var", "for"
 *			and "return" at the moment)
 */
			if (state == STATE_STMT) {
				if (gstrcmp(ep->token, T("if")) == 0) {
					return TOK_IF;
				} else if (gstrcmp(ep->token, T("else")) == 0) {
					return TOK_ELSE;
				} else if (gstrcmp(ep->token, T("var")) == 0) {
					return TOK_VAR;
				} else if (gstrcmp(ep->token, T("for")) == 0) {
					return TOK_FOR;
				} else if (gstrcmp(ep->token, T("return")) == 0) {
					if ((c == ';') || (c == '(')) {
						inputPutback(ep, c);
					}
					return TOK_RETURN;
				}
			}

/* 
 * 			Skip white space after token to find out whether this is
 * 			a function or not.
 */ 
			while (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
				if ((c = inputGetc(ep)) < 0)
					break;
			}

			tid = (c == '(') ? TOK_FUNCTION : TOK_ID;
			done++;
		}
	}

/*
 *	Putback the last extra character for next time
 */
	inputPutback(ep, c);
	return tid;
}

/******************************************************************************/
/*
 *	Putback the last token read
 */

void ejLexPutbackToken(ej_t* ep, int tid, char_t *string)
{
	ejinput_t*	ip;

	a_assert(ep);
	ip = ep->input;
	a_assert(ip);

	if (ip->putBackToken) {
		bfree(B_L, ip->putBackToken);
	}
	ip->putBackTokenId = tid;
	ip->putBackToken = bstrdup(B_L, string);
}

/******************************************************************************/
/*
 *	Add a character to the token ringq buffer
 */

static int tokenAddChar(ej_t *ep, int c)
{
	ejinput_t*	ip;

	a_assert(ep);
	ip = ep->input;
	a_assert(ip);

	if (ringqPutc(&ip->tokbuf, (char_t) c) < 0) {
		ejError(ep, T("Token too big"));
		return -1;
	}
	* ((char_t*) ip->tokbuf.endp) = '\0';
	ep->token = (char_t*) ip->tokbuf.servp;

	return 0;
}

/******************************************************************************/
/*
 *	Get another input character
 */

static int inputGetc(ej_t* ep)
{
	ejinput_t	*ip;
	int			c, len;

	a_assert(ep);
	ip = ep->input;

	if ((len = ringqLen(&ip->script)) == 0) {
		return -1;
	}

	c = ringqGetc(&ip->script);

	if (c == '\n') {
		ip->lineNumber++;
		ip->lineColumn = 0;
	} else {
		if ((ip->lineColumn + 2) >= ip->lineLength) {
			ip->lineLength += EJ_INC;
			ip->line = brealloc(B_L, ip->line, ip->lineLength * sizeof(char_t));
		}
		ip->line[ip->lineColumn++] = c;
		ip->line[ip->lineColumn] = '\0';
	}
	return c;
}

/******************************************************************************/
/*
 *	Putback a character onto the input queue
 */

static void inputPutback(ej_t* ep, int c)
{
	ejinput_t	*ip;

	a_assert(ep);

	ip = ep->input;
	ringqInsertc(&ip->script, (char_t) c);
	ip->lineColumn--;
	ip->line[ip->lineColumn] = '\0';
}

/******************************************************************************/
/*
 *	Convert a hex or octal character back to binary, return original char if 
 *	not a hex digit
 */

static int charConvert(ej_t* ep, int base, int maxDig)
{
	int		i, c, lval, convChar;

	lval = 0;
	for (i = 0; i < maxDig; i++) {
		if ((c = inputGetc(ep)) < 0) {
			break;
		}
/*
 *		Initialize to out of range value
 */
		convChar = base;
		if (gisdigit(c)) {
			convChar = c - '0';
		} else if (c >= 'a' && c <= 'f') {
			convChar = c - 'a' + 10;
		} else if (c >= 'A' && c <= 'F') {
			convChar = c - 'A' + 10;
		}
/*
 *		if unexpected character then return it to buffer.
 */
		if (convChar >= base) {
			inputPutback(ep, c);
			break;
		}
		lval = (lval * base) + convChar;
	}
	return lval;
}

/******************************************************************************/