cosmopolitan/third_party/ctags/jscript.c

/*
 *	 $Id: jscript.c 763 2010-07-28 14:22:42Z dfishburn $
 *
 *	 Copyright (c) 2003, Darren Hiebert
 *
 *	 This source code is released for free distribution under the
 *       terms of the GNU General Public License.
 *
 *	 This module contains functions for generating tags for
 *       JavaScript language files.
 *
 *	 This is a good reference for different forms of the function statement:
 *		 http://www.permadi.com/tutorial/jsFunc/
 *       Another good reference:
 *       http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
 */
#include "third_party/ctags/general.h"
/* must always come first */
#include "third_party/ctags/debug.h"
#include "third_party/ctags/entry.h"
#include "third_party/ctags/keyword.h"
#include "third_party/ctags/parse.h"
#include "third_party/ctags/read.h"
#include "third_party/ctags/routines.h"
#include "third_party/ctags/vstring.h"

/*
 *	 MACROS
 */
#define isType(token, t)    (boolean)((token)->type == (t))
#define isKeyword(token, k) (boolean)((token)->keyword == (k))

/*
 *	 DATA DECLARATIONS
 */

typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;

/*
 * Tracks class and function names already created
 */
static stringList *ClassNames;
static stringList *FunctionNames;

/*	Used to specify type of keyword.
 */
typedef enum eKeywordId {
  KEYWORD_NONE = -1,
  KEYWORD_function,
  KEYWORD_capital_function,
  KEYWORD_object,
  KEYWORD_capital_object,
  KEYWORD_prototype,
  KEYWORD_var,
  KEYWORD_new,
  KEYWORD_this,
  KEYWORD_for,
  KEYWORD_while,
  KEYWORD_do,
  KEYWORD_if,
  KEYWORD_else,
  KEYWORD_switch,
  KEYWORD_try,
  KEYWORD_catch,
  KEYWORD_finally
} keywordId;

/*	Used to determine whether keyword is valid for the token language and
 *	what its ID is.
 */
typedef struct sKeywordDesc {
  const char *name;
  keywordId id;
} keywordDesc;

typedef enum eTokenType {
  TOKEN_UNDEFINED,
  TOKEN_CHARACTER,
  TOKEN_CLOSE_PAREN,
  TOKEN_SEMICOLON,
  TOKEN_COLON,
  TOKEN_COMMA,
  TOKEN_KEYWORD,
  TOKEN_OPEN_PAREN,
  TOKEN_OPERATOR,
  TOKEN_IDENTIFIER,
  TOKEN_STRING,
  TOKEN_PERIOD,
  TOKEN_OPEN_CURLY,
  TOKEN_CLOSE_CURLY,
  TOKEN_EQUAL_SIGN,
  TOKEN_FORWARD_SLASH,
  TOKEN_OPEN_SQUARE,
  TOKEN_CLOSE_SQUARE
} tokenType;

typedef struct sTokenInfo {
  tokenType type;
  keywordId keyword;
  vString *string;
  vString *scope;
  unsigned long lineNumber;
  fpos_t filePosition;
  int nestLevel;
  boolean ignoreTag;
} tokenInfo;

/*
 *	DATA DEFINITIONS
 */

static langType Lang_js;

static jmp_buf Exception;

typedef enum {
  JSTAG_FUNCTION,
  JSTAG_CLASS,
  JSTAG_METHOD,
  JSTAG_PROPERTY,
  JSTAG_VARIABLE,
  JSTAG_COUNT
} jsKind;

static kindOption JsKinds[] = {{TRUE, 'f', "function", "functions"},
                               {TRUE, 'c', "class", "classes"},
                               {TRUE, 'm', "method", "methods"},
                               {TRUE, 'p', "property", "properties"},
                               {TRUE, 'v', "variable", "global variables"}};

static const keywordDesc JsKeywordTable[] = {
    /* keyword		keyword ID */
    {"function", KEYWORD_function},
    {"Function", KEYWORD_capital_function},
    {"object", KEYWORD_object},
    {"Object", KEYWORD_capital_object},
    {"prototype", KEYWORD_prototype},
    {"var", KEYWORD_var},
    {"new", KEYWORD_new},
    {"this", KEYWORD_this},
    {"for", KEYWORD_for},
    {"while", KEYWORD_while},
    {"do", KEYWORD_do},
    {"if", KEYWORD_if},
    {"else", KEYWORD_else},
    {"switch", KEYWORD_switch},
    {"try", KEYWORD_try},
    {"catch", KEYWORD_catch},
    {"finally", KEYWORD_finally}};

/*
 *	 FUNCTION DEFINITIONS
 */

/* Recursive functions */
static void parseFunction(tokenInfo *const token);
static boolean parseBlock(tokenInfo *const token, tokenInfo *const parent);
static boolean parseLine(tokenInfo *const token, boolean is_inside_class);

static boolean isIdentChar(const int c) {
  return (boolean)(isalpha(c) || isdigit(c) || c == '$' || c == '@' ||
                   c == '_' || c == '#');
}

static void buildJsKeywordHash(void) {
  const size_t count = sizeof(JsKeywordTable) / sizeof(JsKeywordTable[0]);
  size_t i;
  for (i = 0; i < count; ++i) {
    const keywordDesc *const p = &JsKeywordTable[i];
    addKeyword(p->name, Lang_js, (int)p->id);
  }
}

static tokenInfo *newToken(void) {
  tokenInfo *const token = xMalloc(1, tokenInfo);

  token->type = TOKEN_UNDEFINED;
  token->keyword = KEYWORD_NONE;
  token->string = vStringNew();
  token->scope = vStringNew();
  token->nestLevel = 0;
  token->ignoreTag = FALSE;
  token->lineNumber = getSourceLineNumber();
  token->filePosition = getInputFilePosition();

  return token;
}

static void deleteToken(tokenInfo *const token) {
  vStringDelete(token->string);
  vStringDelete(token->scope);
  eFree(token);
}

/*
 *	 Tag generation functions
 */

/*
static void makeConstTag (tokenInfo *const token, const jsKind kind)
{
        if (JsKinds [kind].enabled && ! token->ignoreTag )
        {
                const char *const name = vStringValue (token->string);
                tagEntryInfo e;
                initTagEntry (&e, name);

                e.lineNumber   = token->lineNumber;
                e.filePosition = token->filePosition;
                e.kindName	   = JsKinds [kind].name;
                e.kind		   = JsKinds [kind].letter;

                makeTagEntry (&e);
        }
}

static void makeJsTag (tokenInfo *const token, const jsKind kind)
{
        vString *	fulltag;

        if (JsKinds [kind].enabled && ! token->ignoreTag )
        {
                *
                 * If a scope has been added to the token, change the token
                 * string to include the scope when making the tag.
                 *
                if ( vStringLength(token->scope) > 0 )
                {
                        *
                        fulltag = vStringNew ();
                        vStringCopy(fulltag, token->scope);
                        vStringCatS (fulltag, ".");
                        vStringCatS (fulltag, vStringValue(token->string));
                        vStringTerminate(fulltag);
                        vStringCopy(token->string, fulltag);
                        vStringDelete (fulltag);
                        *
                        jsKind parent_kind = JSTAG_CLASS;

                        *
                         * if we're creating a function (and not a method),
                         * guess we're inside another function
                         *
                        if (kind == JSTAG_FUNCTION)
                                parent_kind = JSTAG_FUNCTION;

                        e.extensionFields.scope[0] = JsKinds [parent_kind].name;
                        e.extensionFields.scope[1] = vStringValue
(token->scope);
                }
                * makeConstTag (token, kind); *
                makeTagEntry (&e);
        }
}
*/

static void makeJsTag(tokenInfo *const token, const jsKind kind) {
  if (JsKinds[kind].enabled && !token->ignoreTag) {
    const char *const name = vStringValue(token->string);
    tagEntryInfo e;
    initTagEntry(&e, name);

    e.lineNumber = token->lineNumber;
    e.filePosition = token->filePosition;
    e.kindName = JsKinds[kind].name;
    e.kind = JsKinds[kind].letter;

    if (vStringLength(token->scope) > 0) {
      jsKind parent_kind = JSTAG_CLASS;

      /*
       * If we're creating a function (and not a method),
       * guess we're inside another function
       */
      if (kind == JSTAG_FUNCTION) parent_kind = JSTAG_FUNCTION;

      e.extensionFields.scope[0] = JsKinds[parent_kind].name;
      e.extensionFields.scope[1] = vStringValue(token->scope);
    }

    makeTagEntry(&e);
  }
}

static void makeClassTag(tokenInfo *const token) {
  vString *fulltag;

  if (!token->ignoreTag) {
    fulltag = vStringNew();
    if (vStringLength(token->scope) > 0) {
      vStringCopy(fulltag, token->scope);
      vStringCatS(fulltag, ".");
      vStringCatS(fulltag, vStringValue(token->string));
    } else {
      vStringCopy(fulltag, token->string);
    }
    vStringTerminate(fulltag);
    if (!stringListHas(ClassNames, vStringValue(fulltag))) {
      stringListAdd(ClassNames, vStringNewCopy(fulltag));
      makeJsTag(token, JSTAG_CLASS);
    }
    vStringDelete(fulltag);
  }
}

static void makeFunctionTag(tokenInfo *const token) {
  vString *fulltag;

  if (!token->ignoreTag) {
    fulltag = vStringNew();
    if (vStringLength(token->scope) > 0) {
      vStringCopy(fulltag, token->scope);
      vStringCatS(fulltag, ".");
      vStringCatS(fulltag, vStringValue(token->string));
    } else {
      vStringCopy(fulltag, token->string);
    }
    vStringTerminate(fulltag);
    if (!stringListHas(FunctionNames, vStringValue(fulltag))) {
      stringListAdd(FunctionNames, vStringNewCopy(fulltag));
      makeJsTag(token, JSTAG_FUNCTION);
    }
    vStringDelete(fulltag);
  }
}

/*
 *	 Parsing functions
 */

static void parseString(vString *const string, const int delimiter) {
  boolean end = FALSE;
  while (!end) {
    int c = fileGetc();
    if (c == EOF)
      end = TRUE;
    else if (c == '\\') {
      c = fileGetc(); /* This maybe a ' or ". */
      vStringPut(string, c);
    } else if (c == delimiter)
      end = TRUE;
    else
      vStringPut(string, c);
  }
  vStringTerminate(string);
}

/*	Read a C identifier beginning with "firstChar" and places it into
 *	"name".
 */
static void parseIdentifier(vString *const string, const int firstChar) {
  int c = firstChar;
  Assert(isIdentChar(c));
  do {
    vStringPut(string, c);
    c = fileGetc();
  } while (isIdentChar(c));
  vStringTerminate(string);
  if (!isspace(c)) fileUngetc(c); /* unget non-identifier character */
}

static void readToken(tokenInfo *const token) {
  int c;

  token->type = TOKEN_UNDEFINED;
  token->keyword = KEYWORD_NONE;
  vStringClear(token->string);

getNextChar:
  do {
    c = fileGetc();
    token->lineNumber = getSourceLineNumber();
    token->filePosition = getInputFilePosition();
  } while (c == '\t' || c == ' ' || c == '\n');

  switch (c) {
    case EOF:
      longjmp(Exception, (int)ExceptionEOF);
      break;
    case '(':
      token->type = TOKEN_OPEN_PAREN;
      break;
    case ')':
      token->type = TOKEN_CLOSE_PAREN;
      break;
    case ';':
      token->type = TOKEN_SEMICOLON;
      break;
    case ',':
      token->type = TOKEN_COMMA;
      break;
    case '.':
      token->type = TOKEN_PERIOD;
      break;
    case ':':
      token->type = TOKEN_COLON;
      break;
    case '{':
      token->type = TOKEN_OPEN_CURLY;
      break;
    case '}':
      token->type = TOKEN_CLOSE_CURLY;
      break;
    case '=':
      token->type = TOKEN_EQUAL_SIGN;
      break;
    case '[':
      token->type = TOKEN_OPEN_SQUARE;
      break;
    case ']':
      token->type = TOKEN_CLOSE_SQUARE;
      break;

    case '\'':
    case '"':
      token->type = TOKEN_STRING;
      parseString(token->string, c);
      token->lineNumber = getSourceLineNumber();
      token->filePosition = getInputFilePosition();
      break;

    case '\\':
      c = fileGetc();
      if (c != '\\' && c != '"' && !isspace(c)) fileUngetc(c);
      token->type = TOKEN_CHARACTER;
      token->lineNumber = getSourceLineNumber();
      token->filePosition = getInputFilePosition();
      break;

    case '/': {
      int d = fileGetc();
      if ((d != '*') && /* is this the start of a comment? */
          (d != '/'))   /* is a one line comment? */
      {
        token->type = TOKEN_FORWARD_SLASH;
        fileUngetc(d);
      } else {
        if (d == '*') {
          do {
            fileSkipToCharacter('*');
            c = fileGetc();
            if (c == '/')
              break;
            else
              fileUngetc(c);
          } while (c != EOF && c != '\0');
          goto getNextChar;
        } else if (d == '/') /* is this the start of a comment?  */
        {
          fileSkipToCharacter('\n');
          goto getNextChar;
        }
      }
      break;
    }

    default:
      if (!isIdentChar(c))
        token->type = TOKEN_UNDEFINED;
      else {
        parseIdentifier(token->string, c);
        token->lineNumber = getSourceLineNumber();
        token->filePosition = getInputFilePosition();
        token->keyword = analyzeToken(token->string, Lang_js);
        if (isKeyword(token, KEYWORD_NONE))
          token->type = TOKEN_IDENTIFIER;
        else
          token->type = TOKEN_KEYWORD;
      }
      break;
  }
}

static void copyToken(tokenInfo *const dest, tokenInfo *const src) {
  dest->nestLevel = src->nestLevel;
  dest->lineNumber = src->lineNumber;
  dest->filePosition = src->filePosition;
  dest->type = src->type;
  dest->keyword = src->keyword;
  vStringCopy(dest->string, src->string);
  vStringCopy(dest->scope, src->scope);
}

/*
 *	 Token parsing functions
 */

static void skipArgumentList(tokenInfo *const token) {
  int nest_level = 0;

  /*
   * Other databases can have arguments with fully declared
   * datatypes:
   *	 (	name varchar(30), text binary(10)  )
   * So we must check for nested open and closing parantheses
   */

  if (isType(token, TOKEN_OPEN_PAREN)) /* arguments? */
  {
    nest_level++;
    while (!(isType(token, TOKEN_CLOSE_PAREN) && (nest_level == 0))) {
      readToken(token);
      if (isType(token, TOKEN_OPEN_PAREN)) {
        nest_level++;
      }
      if (isType(token, TOKEN_CLOSE_PAREN)) {
        if (nest_level > 0) {
          nest_level--;
        }
      }
    }
    readToken(token);
  }
}

static void skipArrayList(tokenInfo *const token) {
  int nest_level = 0;

  /*
   * Handle square brackets
   *	 var name[1]
   * So we must check for nested open and closing square brackets
   */

  if (isType(token, TOKEN_OPEN_SQUARE)) /* arguments? */
  {
    nest_level++;
    while (!(isType(token, TOKEN_CLOSE_SQUARE) && (nest_level == 0))) {
      readToken(token);
      if (isType(token, TOKEN_OPEN_SQUARE)) {
        nest_level++;
      }
      if (isType(token, TOKEN_CLOSE_SQUARE)) {
        if (nest_level > 0) {
          nest_level--;
        }
      }
    }
    readToken(token);
  }
}

static void addContext(tokenInfo *const parent, const tokenInfo *const child) {
  if (vStringLength(parent->string) > 0) {
    vStringCatS(parent->string, ".");
  }
  vStringCatS(parent->string, vStringValue(child->string));
  vStringTerminate(parent->string);
}

static void addToScope(tokenInfo *const token, vString *const extra) {
  if (vStringLength(token->scope) > 0) {
    vStringCatS(token->scope, ".");
  }
  vStringCatS(token->scope, vStringValue(extra));
  vStringTerminate(token->scope);
}

/*
 *	 Scanning functions
 */

static void findCmdTerm(tokenInfo *const token) {
  /*
   * Read until we find either a semicolon or closing brace.
   * Any nested braces will be handled within.
   */
  while (
      !(isType(token, TOKEN_SEMICOLON) || isType(token, TOKEN_CLOSE_CURLY))) {
    /* Handle nested blocks */
    if (isType(token, TOKEN_OPEN_CURLY)) {
      parseBlock(token, token);
    } else if (isType(token, TOKEN_OPEN_PAREN)) {
      skipArgumentList(token);
    } else {
      readToken(token);
    }
  }
}

static void parseSwitch(tokenInfo *const token) {
  /*
   * switch (expression){
   * case value1:
   *	   statement;
   *	   break;
   * case value2:
   *	   statement;
   *	   break;
   * default : statement;
   * }
   */

  readToken(token);

  if (isType(token, TOKEN_OPEN_PAREN)) {
    /*
     * Handle nameless functions, these will only
     * be considered methods.
     */
    skipArgumentList(token);
  }

  if (isType(token, TOKEN_OPEN_CURLY)) {
    /*
     * This will be either a function or a class.
     * We can only determine this by checking the body
     * of the function.  If we find a "this." we know
     * it is a class, otherwise it is a function.
     */
    parseBlock(token, token);
  }
}

static void parseLoop(tokenInfo *const token) {
  /*
   * Handles these statements
   *	   for (x=0; x<3; x++)
   *		   document.write("This text is repeated three times<br>");
   *
   *	   for (x=0; x<3; x++)
   *	   {
   *		   document.write("This text is repeated three times<br>");
   *	   }
   *
   *	   while (number<5){
   *		   document.write(number+"<br>");
   *		   number++;
   *	   }
   *
   *	   do{
   *		   document.write(number+"<br>");
   *		   number++;
   *	   }
   *	   while (number<5);
   */

  if (isKeyword(token, KEYWORD_for) || isKeyword(token, KEYWORD_while)) {
    readToken(token);

    if (isType(token, TOKEN_OPEN_PAREN)) {
      /*
       * Handle nameless functions, these will only
       * be considered methods.
       */
      skipArgumentList(token);
    }

    if (isType(token, TOKEN_OPEN_CURLY)) {
      /*
       * This will be either a function or a class.
       * We can only determine this by checking the body
       * of the function.  If we find a "this." we know
       * it is a class, otherwise it is a function.
       */
      parseBlock(token, token);
    } else {
      parseLine(token, FALSE);
    }
  } else if (isKeyword(token, KEYWORD_do)) {
    readToken(token);

    if (isType(token, TOKEN_OPEN_CURLY)) {
      /*
       * This will be either a function or a class.
       * We can only determine this by checking the body
       * of the function.  If we find a "this." we know
       * it is a class, otherwise it is a function.
       */
      parseBlock(token, token);
    } else {
      parseLine(token, FALSE);
    }

    readToken(token);

    if (isKeyword(token, KEYWORD_while)) {
      readToken(token);

      if (isType(token, TOKEN_OPEN_PAREN)) {
        /*
         * Handle nameless functions, these will only
         * be considered methods.
         */
        skipArgumentList(token);
      }
    }
  }
}

static boolean parseIf(tokenInfo *const token) {
  boolean read_next_token = TRUE;
  /*
   * If statements have two forms
   *	   if ( ... )
   *		   one line;
   *
   *	   if ( ... )
   *		  statement;
   *	   else
   *		  statement
   *
   *	   if ( ... ) {
   *		  multiple;
   *		  statements;
   *	   }
   *
   *
   *	   if ( ... ) {
   *		  return elem
   *	   }
   *
   *     This example if correctly written, but the
   *     else contains only 1 statement without a terminator
   *     since the function finishes with the closing brace.
   *
   *     function a(flag){
   *         if(flag)
   *             test(1);
   *         else
   *             test(2)
   *     }
   *
   * TODO:  Deal with statements that can optional end
   *		  without a semi-colon.  Currently this messes up
   *		  the parsing of blocks.
   *		  Need to somehow detect this has happened, and either
   *		  backup a token, or skip reading the next token if
   *		  that is possible from all code locations.
   *
   */

  readToken(token);

  if (isKeyword(token, KEYWORD_if)) {
    /*
     * Check for an "else if" and consume the "if"
     */
    readToken(token);
  }

  if (isType(token, TOKEN_OPEN_PAREN)) {
    /*
     * Handle nameless functions, these will only
     * be considered methods.
     */
    skipArgumentList(token);
  }

  if (isType(token, TOKEN_OPEN_CURLY)) {
    /*
     * This will be either a function or a class.
     * We can only determine this by checking the body
     * of the function.  If we find a "this." we know
     * it is a class, otherwise it is a function.
     */
    parseBlock(token, token);
  } else {
    findCmdTerm(token);

    /*
     * The IF could be followed by an ELSE statement.
     * This too could have two formats, a curly braced
     * multiline section, or another single line.
     */

    if (isType(token, TOKEN_CLOSE_CURLY)) {
      /*
       * This statement did not have a line terminator.
       */
      read_next_token = FALSE;
    } else {
      readToken(token);

      if (isType(token, TOKEN_CLOSE_CURLY)) {
        /*
         * This statement did not have a line terminator.
         */
        read_next_token = FALSE;
      } else {
        if (isKeyword(token, KEYWORD_else)) read_next_token = parseIf(token);
      }
    }
  }
  return read_next_token;
}

static void parseFunction(tokenInfo *const token) {
  tokenInfo *const name = newToken();
  boolean is_class = FALSE;

  /*
   * This deals with these formats
   *	   function validFunctionTwo(a,b) {}
   */

  readToken(name);
  /* Add scope in case this is an INNER function */
  addToScope(name, token->scope);

  readToken(token);
  if (isType(token, TOKEN_PERIOD)) {
    do {
      readToken(token);
      if (isKeyword(token, KEYWORD_NONE)) {
        addContext(name, token);
        readToken(token);
      }
    } while (isType(token, TOKEN_PERIOD));
  }

  if (isType(token, TOKEN_OPEN_PAREN)) skipArgumentList(token);

  if (isType(token, TOKEN_OPEN_CURLY)) {
    is_class = parseBlock(token, name);
    if (is_class)
      makeClassTag(name);
    else
      makeFunctionTag(name);
  }

  findCmdTerm(token);

  deleteToken(name);
}

static boolean parseBlock(tokenInfo *const token, tokenInfo *const parent) {
  boolean is_class = FALSE;
  boolean read_next_token = TRUE;
  vString *saveScope = vStringNew();

  token->nestLevel++;
  /*
   * Make this routine a bit more forgiving.
   * If called on an open_curly advance it
   */
  if (isType(token, TOKEN_OPEN_CURLY) && isKeyword(token, KEYWORD_NONE))
    readToken(token);

  if (!isType(token, TOKEN_CLOSE_CURLY)) {
    /*
     * Read until we find the closing brace,
     * any nested braces will be handled within
     */
    do {
      read_next_token = TRUE;
      if (isKeyword(token, KEYWORD_this)) {
        /*
         * Means we are inside a class and have found
         * a class, not a function
         */
        is_class = TRUE;
        vStringCopy(saveScope, token->scope);
        addToScope(token, parent->string);

        /*
         * Ignore the remainder of the line
         * findCmdTerm(token);
         */
        parseLine(token, is_class);

        vStringCopy(token->scope, saveScope);
      } else if (isKeyword(token, KEYWORD_var)) {
        /*
         * Potentially we have found an inner function.
         * Set something to indicate the scope
         */
        vStringCopy(saveScope, token->scope);
        addToScope(token, parent->string);
        parseLine(token, is_class);
        vStringCopy(token->scope, saveScope);
      } else if (isKeyword(token, KEYWORD_function)) {
        vStringCopy(saveScope, token->scope);
        addToScope(token, parent->string);
        parseFunction(token);
        vStringCopy(token->scope, saveScope);
      } else if (isType(token, TOKEN_OPEN_CURLY)) {
        /* Handle nested blocks */
        parseBlock(token, parent);
      } else {
        /*
         * It is possible for a line to have no terminator
         * if the following line is a closing brace.
         * parseLine will detect this case and indicate
         * whether we should read an additional token.
         */
        read_next_token = parseLine(token, is_class);
      }

      /*
       * Always read a new token unless we find a statement without
       * a ending terminator
       */
      if (read_next_token) readToken(token);

      /*
       * If we find a statement without a terminator consider the
       * block finished, otherwise the stack will be off by one.
       */
    } while (!isType(token, TOKEN_CLOSE_CURLY) && read_next_token);
  }

  vStringDelete(saveScope);
  token->nestLevel--;

  return is_class;
}

static boolean parseMethods(tokenInfo *const token, tokenInfo *const class) {
  tokenInfo *const name = newToken();
  boolean has_methods = FALSE;

  /*
   * This deals with these formats
   *	   validProperty  : 2,
   *	   validMethod    : function(a,b) {}
   *	   'validMethod2' : function(a,b) {}
   *     container.dirtyTab = {'url': false, 'title':false, 'snapshot':false,
   *'*': false}
   */

  do {
    readToken(token);
    if (isType(token, TOKEN_CLOSE_CURLY)) {
      /*
       * This was most likely a variable declaration of a hash table.
       * indicate there were no methods and return.
       */
      has_methods = FALSE;
      goto cleanUp;
    }

    if (isType(token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE)) {
      copyToken(name, token);

      readToken(token);
      if (isType(token, TOKEN_COLON)) {
        readToken(token);
        if (isKeyword(token, KEYWORD_function)) {
          readToken(token);
          if (isType(token, TOKEN_OPEN_PAREN)) {
            skipArgumentList(token);
          }

          if (isType(token, TOKEN_OPEN_CURLY)) {
            has_methods = TRUE;
            addToScope(name, class->string);
            makeJsTag(name, JSTAG_METHOD);
            parseBlock(token, name);

            /*
             * Read to the closing curly, check next
             * token, if a comma, we must loop again
             */
            readToken(token);
          }
        } else {
          has_methods = TRUE;
          addToScope(name, class->string);
          makeJsTag(name, JSTAG_PROPERTY);

          /*
           * Read the next token, if a comma
           * we must loop again
           */
          readToken(token);
        }
      }
    }
  } while (isType(token, TOKEN_COMMA));

  findCmdTerm(token);

cleanUp:
  deleteToken(name);

  return has_methods;
}

static boolean parseStatement(tokenInfo *const token, boolean is_inside_class) {
  tokenInfo *const name = newToken();
  tokenInfo *const secondary_name = newToken();
  vString *saveScope = vStringNew();
  boolean is_class = FALSE;
  boolean is_terminated = TRUE;
  boolean is_global = FALSE;
  boolean is_prototype = FALSE;
  boolean has_methods = FALSE;
  vString *fulltag;

  vStringClear(saveScope);
  /*
   * Functions can be named or unnamed.
   * This deals with these formats:
   * Function
   *	   validFunctionOne = function(a,b) {}
   *	   testlib.validFunctionFive = function(a,b) {}
   *	   var innerThree = function(a,b) {}
   *	   var innerFour = (a,b) {}
   *	   var D2 = secondary_fcn_name(a,b) {}
   *	   var D3 = new Function("a", "b", "return a+b;");
   * Class
   *	   testlib.extras.ValidClassOne = function(a,b) {
   *		   this.a = a;
   *	   }
   * Class Methods
   *	   testlib.extras.ValidClassOne.prototype = {
   *		   'validMethodOne' : function(a,b) {},
   *		   'validMethodTwo' : function(a,b) {}
   *	   }
   *     ValidClassTwo = function ()
   *     {
   *         this.validMethodThree = function() {}
   *         // unnamed method
   *         this.validMethodFour = () {}
   *     }
   *	   Database.prototype.validMethodThree = Database_getTodaysDate;
   */

  if (is_inside_class) is_class = TRUE;
  /*
   * var can preceed an inner function
   */
  if (isKeyword(token, KEYWORD_var)) {
    /*
     * Only create variables for global scope
     */
    if (token->nestLevel == 0) {
      is_global = TRUE;
    }
    readToken(token);
  }

  if (isKeyword(token, KEYWORD_this)) {
    readToken(token);
    if (isType(token, TOKEN_PERIOD)) {
      readToken(token);
    }
  }

  copyToken(name, token);

  while (!isType(token, TOKEN_CLOSE_CURLY) && !isType(token, TOKEN_SEMICOLON) &&
         !isType(token, TOKEN_EQUAL_SIGN)) {
    /* Potentially the name of the function */
    readToken(token);
    if (isType(token, TOKEN_PERIOD)) {
      /*
       * Cannot be a global variable is it has dot references in the name
       */
      is_global = FALSE;
      do {
        readToken(token);
        if (isKeyword(token, KEYWORD_NONE)) {
          if (is_class) {
            vStringCopy(saveScope, token->scope);
            addToScope(token, name->string);
          } else
            addContext(name, token);
        } else if (isKeyword(token, KEYWORD_prototype)) {
          /*
           * When we reach the "prototype" tag, we infer:
           *     "BindAgent" is a class
           *     "build"     is a method
           *
           * function BindAgent( repeatableIdName, newParentIdName ) {
           * }
           *
           * CASE 1
           * Specified function name: "build"
           *     BindAgent.prototype.build = function( mode ) {
           *     	  ignore everything within this function
           *     }
           *
           * CASE 2
           * Prototype listing
           *     ValidClassOne.prototype = {
           *         'validMethodOne' : function(a,b) {},
           *         'validMethodTwo' : function(a,b) {}
           *     }
           *
           */
          makeClassTag(name);
          is_class = TRUE;
          is_prototype = TRUE;

          /*
           * There should a ".function_name" next.
           */
          readToken(token);
          if (isType(token, TOKEN_PERIOD)) {
            /*
             * Handle CASE 1
             */
            readToken(token);
            if (isKeyword(token, KEYWORD_NONE)) {
              vStringCopy(saveScope, token->scope);
              addToScope(token, name->string);

              makeJsTag(token, JSTAG_METHOD);
              /*
               * We can read until the end of the block / statement.
               * We need to correctly parse any nested blocks, but
               * we do NOT want to create any tags based on what is
               * within the blocks.
               */
              token->ignoreTag = TRUE;
              /*
               * Find to the end of the statement
               */
              findCmdTerm(token);
              token->ignoreTag = FALSE;
              is_terminated = TRUE;
              goto cleanUp;
            }
          } else if (isType(token, TOKEN_EQUAL_SIGN)) {
            readToken(token);
            if (isType(token, TOKEN_OPEN_CURLY)) {
              /*
               * Handle CASE 2
               *
               * Creates tags for each of these class methods
               *     ValidClassOne.prototype = {
               *         'validMethodOne' : function(a,b) {},
               *         'validMethodTwo' : function(a,b) {}
               *     }
               */
              parseMethods(token, name);
              /*
               * Find to the end of the statement
               */
              findCmdTerm(token);
              token->ignoreTag = FALSE;
              is_terminated = TRUE;
              goto cleanUp;
            }
          }
        }
        readToken(token);
      } while (isType(token, TOKEN_PERIOD));
    }

    if (isType(token, TOKEN_OPEN_PAREN)) skipArgumentList(token);

    if (isType(token, TOKEN_OPEN_SQUARE)) skipArrayList(token);

    /*
    if ( isType (token, TOKEN_OPEN_CURLY) )
    {
            is_class = parseBlock (token, name);
    }
    */
  }

  if (isType(token, TOKEN_CLOSE_CURLY)) {
    /*
     * Reaching this section without having
     * processed an open curly brace indicates
     * the statement is most likely not terminated.
     */
    is_terminated = FALSE;
    goto cleanUp;
  }

  if (isType(token, TOKEN_SEMICOLON)) {
    /*
     * Only create variables for global scope
     */
    if (token->nestLevel == 0 && is_global) {
      /*
       * Handles this syntax:
       *	   var g_var2;
       */
      if (isType(token, TOKEN_SEMICOLON)) makeJsTag(name, JSTAG_VARIABLE);
    }
    /*
     * Statement has ended.
     * This deals with calls to functions, like:
     *     alert(..);
     */
    goto cleanUp;
  }

  if (isType(token, TOKEN_EQUAL_SIGN)) {
    readToken(token);

    if (isKeyword(token, KEYWORD_function)) {
      readToken(token);

      if (isKeyword(token, KEYWORD_NONE) && !isType(token, TOKEN_OPEN_PAREN)) {
        /*
         * Functions of this format:
         *	   var D2A = function theAdd(a, b)
         *	   {
         *		  return a+b;
         *	   }
         * Are really two separate defined functions and
         * can be referenced in two ways:
         *	   alert( D2A(1,2) );			  // produces 3
         *	   alert( theAdd(1,2) );		  // also produces 3
         * So it must have two tags:
         *	   D2A
         *	   theAdd
         * Save the reference to the name for later use, once
         * we have established this is a valid function we will
         * create the secondary reference to it.
         */
        copyToken(secondary_name, token);
        readToken(token);
      }

      if (isType(token, TOKEN_OPEN_PAREN)) skipArgumentList(token);

      if (isType(token, TOKEN_OPEN_CURLY)) {
        /*
         * This will be either a function or a class.
         * We can only determine this by checking the body
         * of the function.  If we find a "this." we know
         * it is a class, otherwise it is a function.
         */
        if (is_inside_class) {
          makeJsTag(name, JSTAG_METHOD);
          if (vStringLength(secondary_name->string) > 0)
            makeFunctionTag(secondary_name);
          parseBlock(token, name);
        } else {
          is_class = parseBlock(token, name);
          if (is_class)
            makeClassTag(name);
          else
            makeFunctionTag(name);

          if (vStringLength(secondary_name->string) > 0)
            makeFunctionTag(secondary_name);

          /*
           * Find to the end of the statement
           */
          goto cleanUp;
        }
      }
    } else if (isType(token, TOKEN_OPEN_PAREN)) {
      /*
       * Handle nameless functions
       *     this.method_name = () {}
       */
      skipArgumentList(token);

      if (isType(token, TOKEN_OPEN_CURLY)) {
        /*
         * Nameless functions are only setup as methods.
         */
        makeJsTag(name, JSTAG_METHOD);
        parseBlock(token, name);
      }
    } else if (isType(token, TOKEN_OPEN_CURLY)) {
      /*
       * Creates tags for each of these class methods
       *     ValidClassOne.prototype = {
       *         'validMethodOne' : function(a,b) {},
       *         'validMethodTwo' : function(a,b) {}
       *     }
       * Or checks if this is a hash variable.
       *     var z = {};
       */
      has_methods = parseMethods(token, name);
      if (!has_methods) {
        /*
         * Only create variables for global scope
         */
        if (token->nestLevel == 0 && is_global) {
          /*
           * A pointer can be created to the function.
           * If we recognize the function/class name ignore the variable.
           * This format looks identical to a variable definition.
           * A variable defined outside of a block is considered
           * a global variable:
           *	   var g_var1 = 1;
           *	   var g_var2;
           * This is not a global variable:
           *	   var g_var = function;
           * This is a global variable:
           *	   var g_var = different_var_name;
           */
          fulltag = vStringNew();
          if (vStringLength(token->scope) > 0) {
            vStringCopy(fulltag, token->scope);
            vStringCatS(fulltag, ".");
            vStringCatS(fulltag, vStringValue(token->string));
          } else {
            vStringCopy(fulltag, token->string);
          }
          vStringTerminate(fulltag);
          if (!stringListHas(FunctionNames, vStringValue(fulltag)) &&
              !stringListHas(ClassNames, vStringValue(fulltag))) {
            readToken(token);
            if (!isType(token, TOKEN_SEMICOLON)) findCmdTerm(token);
            if (isType(token, TOKEN_SEMICOLON)) makeJsTag(name, JSTAG_VARIABLE);
          }
          vStringDelete(fulltag);
        }
      }
      if (isType(token, TOKEN_CLOSE_CURLY)) {
        /*
         * Assume the closing parantheses terminates
         * this statements.
         */
        is_terminated = TRUE;
      }
    } else if (isKeyword(token, KEYWORD_new)) {
      readToken(token);
      if (isKeyword(token, KEYWORD_function) ||
          isKeyword(token, KEYWORD_capital_function) ||
          isKeyword(token, KEYWORD_object) ||
          isKeyword(token, KEYWORD_capital_object)) {
        if (isKeyword(token, KEYWORD_object) ||
            isKeyword(token, KEYWORD_capital_object))
          is_class = TRUE;

        readToken(token);
        if (isType(token, TOKEN_OPEN_PAREN)) skipArgumentList(token);

        if (isType(token, TOKEN_SEMICOLON)) {
          if (token->nestLevel == 0) {
            if (is_class) {
              makeClassTag(name);
            } else {
              makeFunctionTag(name);
            }
          }
        }
      }
    } else if (isKeyword(token, KEYWORD_NONE)) {
      /*
       * Only create variables for global scope
       */
      if (token->nestLevel == 0 && is_global) {
        /*
         * A pointer can be created to the function.
         * If we recognize the function/class name ignore the variable.
         * This format looks identical to a variable definition.
         * A variable defined outside of a block is considered
         * a global variable:
         *	   var g_var1 = 1;
         *	   var g_var2;
         * This is not a global variable:
         *	   var g_var = function;
         * This is a global variable:
         *	   var g_var = different_var_name;
         */
        fulltag = vStringNew();
        if (vStringLength(token->scope) > 0) {
          vStringCopy(fulltag, token->scope);
          vStringCatS(fulltag, ".");
          vStringCatS(fulltag, vStringValue(token->string));
        } else {
          vStringCopy(fulltag, token->string);
        }
        vStringTerminate(fulltag);
        if (!stringListHas(FunctionNames, vStringValue(fulltag)) &&
            !stringListHas(ClassNames, vStringValue(fulltag))) {
          findCmdTerm(token);
          if (isType(token, TOKEN_SEMICOLON)) makeJsTag(name, JSTAG_VARIABLE);
        }
        vStringDelete(fulltag);
      }
    }
  }
  findCmdTerm(token);

  /*
   * Statements can be optionally terminated in the case of
   * statement prior to a close curly brace as in the
   * document.write line below:
   *
   * function checkForUpdate() {
   *	   if( 1==1 ) {
   *		   document.write("hello from checkForUpdate<br>")
   *	   }
   *	   return 1;
   * }
   */
  if (!is_terminated && isType(token, TOKEN_CLOSE_CURLY)) is_terminated = FALSE;

cleanUp:
  vStringCopy(token->scope, saveScope);
  deleteToken(name);
  deleteToken(secondary_name);
  vStringDelete(saveScope);

  return is_terminated;
}

static boolean parseLine(tokenInfo *const token, boolean is_inside_class) {
  boolean is_terminated = TRUE;
  /*
   * Detect the common statements, if, while, for, do, ...
   * This is necessary since the last statement within a block "{}"
   * can be optionally terminated.
   *
   * If the statement is not terminated, we need to tell
   * the calling routine to prevent reading an additional token
   * looking for the end of the statement.
   */

  if (isType(token, TOKEN_KEYWORD)) {
    switch (token->keyword) {
      case KEYWORD_for:
      case KEYWORD_while:
      case KEYWORD_do:
        parseLoop(token);
        break;
      case KEYWORD_if:
      case KEYWORD_else:
      case KEYWORD_try:
      case KEYWORD_catch:
      case KEYWORD_finally:
        /* Common semantics */
        is_terminated = parseIf(token);
        break;
      case KEYWORD_switch:
        parseSwitch(token);
        break;
      default:
        parseStatement(token, is_inside_class);
        break;
    }
  } else {
    /*
     * Special case where single line statements may not be
     * SEMICOLON terminated.  parseBlock needs to know this
     * so that it does not read the next token.
     */
    is_terminated = parseStatement(token, is_inside_class);
  }
  return is_terminated;
}

static void parseJsFile(tokenInfo *const token) {
  do {
    readToken(token);

    if (isType(token, TOKEN_KEYWORD)) {
      switch (token->keyword) {
        case KEYWORD_function:
          parseFunction(token);
          break;
        default:
          parseLine(token, FALSE);
          break;
      }
    } else {
      parseLine(token, FALSE);
    }
  } while (TRUE);
}

static void initialize(const langType language) {
  Assert(sizeof(JsKinds) / sizeof(JsKinds[0]) == JSTAG_COUNT);
  Lang_js = language;
  buildJsKeywordHash();
}

static void findJsTags(void) {
  tokenInfo *const token = newToken();
  exception_t exception;

  ClassNames = stringListNew();
  FunctionNames = stringListNew();

  exception = (exception_t)(setjmp(Exception));
  while (exception == ExceptionNone) parseJsFile(token);

  stringListDelete(ClassNames);
  stringListDelete(FunctionNames);
  ClassNames = NULL;
  FunctionNames = NULL;
  deleteToken(token);
}

/* Create parser definition stucture */
extern parserDefinition *JavaScriptParser(void) {
  static const char *const extensions[] = {"js", NULL};
  parserDefinition *const def = parserNew("JavaScript");
  def->extensions = extensions;
  /*
   * New definitions for parsing instead of regex
   */
  def->kinds = JsKinds;
  def->kindCount = KIND_COUNT(JsKinds);
  def->parser = findJsTags;
  def->initialize = initialize;

  return def;
}
/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */