cosmopolitan/third_party/ctags/go.c

#include "third_party/ctags/general.h"
/* must always come first */
#include "libc/calls/calls.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "third_party/ctags/debug.h"
#include "third_party/ctags/entry.h"
#include "third_party/ctags/keyword.h"
#include "third_party/ctags/main.h"
#include "third_party/ctags/options.h"
#include "third_party/ctags/read.h"
#include "third_party/ctags/routines.h"
#include "third_party/ctags/vstring.h"

/*
 *	 MACROS
 */
#define isType(token, t)    (boolean)((token)->type == (t))
#define isKeyword(token, k) (boolean)((token)->keyword == (k))

/*
 *	 DATA DECLARATIONS
 */

typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;

typedef enum eKeywordId {
  KEYWORD_NONE = -1,
  KEYWORD_package,
  KEYWORD_import,
  KEYWORD_const,
  KEYWORD_type,
  KEYWORD_var,
  KEYWORD_func,
  KEYWORD_struct,
  KEYWORD_interface,
  KEYWORD_map,
  KEYWORD_chan
} keywordId;

/*  Used to determine whether keyword is valid for the current language and
 *  what its ID is.
 */
typedef struct sKeywordDesc {
  const char *name;
  keywordId id;
} keywordDesc;

typedef enum eTokenType {
  TOKEN_NONE = -1,
  TOKEN_CHARACTER,
  // Don't need TOKEN_FORWARD_SLASH
  TOKEN_FORWARD_SLASH,
  TOKEN_KEYWORD,
  TOKEN_IDENTIFIER,
  TOKEN_STRING,
  TOKEN_OPEN_PAREN,
  TOKEN_CLOSE_PAREN,
  TOKEN_OPEN_CURLY,
  TOKEN_CLOSE_CURLY,
  TOKEN_OPEN_SQUARE,
  TOKEN_CLOSE_SQUARE,
  TOKEN_SEMICOLON,
  TOKEN_STAR,
  TOKEN_LEFT_ARROW,
  TOKEN_DOT,
  TOKEN_COMMA
} tokenType;

typedef struct sTokenInfo {
  tokenType type;
  keywordId keyword;
  vString *string;          /* the name of the token */
  unsigned long lineNumber; /* line number of tag */
  fpos_t filePosition;      /* file position of line containing name */
} tokenInfo;

/*
 *   DATA DEFINITIONS
 */

static int Lang_go;
static jmp_buf Exception;
static vString *scope;

typedef enum {
  GOTAG_UNDEFINED = -1,
  GOTAG_PACKAGE,
  GOTAG_FUNCTION,
  GOTAG_CONST,
  GOTAG_TYPE,
  GOTAG_VAR,
} goKind;

static kindOption GoKinds[] = {{TRUE, 'p', "package", "packages"},
                               {TRUE, 'f', "func", "functions"},
                               {TRUE, 'c', "const", "constants"},
                               {TRUE, 't', "type", "types"},
                               {TRUE, 'v', "var", "variables"}};

static keywordDesc GoKeywordTable[] = {
    {"package", KEYWORD_package}, {"import", KEYWORD_import},
    {"const", KEYWORD_const},     {"type", KEYWORD_type},
    {"var", KEYWORD_var},         {"func", KEYWORD_func},
    {"struct", KEYWORD_struct},   {"interface", KEYWORD_interface},
    {"map", KEYWORD_map},         {"chan", KEYWORD_chan}};

/*
 *   FUNCTION DEFINITIONS
 */

// XXX UTF-8
static boolean isIdentChar(const int c) {
  return (boolean)(isalpha(c) || isdigit(c) || c == '$' || c == '@' ||
                   c == '_' || c == '#' || c > 128);
}

static void initialize(const langType language) {
  size_t i;
  const size_t count = sizeof(GoKeywordTable) / sizeof(GoKeywordTable[0]);
  Lang_go = language;
  for (i = 0; i < count; ++i) {
    const keywordDesc *const p = &GoKeywordTable[i];
    addKeyword(p->name, language, (int)p->id);
  }
}

static tokenInfo *newToken(void) {
  tokenInfo *const token = xMalloc(1, tokenInfo);
  token->type = TOKEN_NONE;
  token->keyword = KEYWORD_NONE;
  token->string = vStringNew();
  token->lineNumber = getSourceLineNumber();
  token->filePosition = getInputFilePosition();
  return token;
}

static void deleteToken(tokenInfo *const token) {
  if (token != NULL) {
    vStringDelete(token->string);
    eFree(token);
  }
}

/*
 *   Parsing functions
 */

static void parseString(vString *const string, const int delimiter) {
  boolean end = FALSE;
  while (!end) {
    int c = fileGetc();
    if (c == EOF)
      end = TRUE;
    else if (c == '\\' && delimiter != '`') {
      c = fileGetc(); /* This maybe a ' or ". */
      vStringPut(string, c);
    } else if (c == delimiter)
      end = TRUE;
    else
      vStringPut(string, c);
  }
  vStringTerminate(string);
}

static void parseIdentifier(vString *const string, const int firstChar) {
  int c = firstChar;
  // Assert (isIdentChar (c));
  do {
    vStringPut(string, c);
    c = fileGetc();
  } while (isIdentChar(c));
  vStringTerminate(string);
  fileUngetc(c); /* always unget, LF might add a semicolon */
}

static void readToken(tokenInfo *const token) {
  int c;
  static tokenType lastTokenType = TOKEN_NONE;

  token->type = TOKEN_NONE;
  token->keyword = KEYWORD_NONE;
  vStringClear(token->string);

getNextChar:
  do {
    c = fileGetc();
    token->lineNumber = getSourceLineNumber();
    token->filePosition = getInputFilePosition();
    if (c == '\n' &&
        (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING ||
         lastTokenType == TOKEN_CLOSE_PAREN ||
         lastTokenType == TOKEN_CLOSE_CURLY ||
         lastTokenType == TOKEN_CLOSE_SQUARE)) {
      token->type = TOKEN_SEMICOLON;
      goto done;
    }
  } while (c == '\t' || c == ' ' || c == '\r' || c == '\n');

  switch (c) {
    case EOF:
      longjmp(Exception, (int)ExceptionEOF);
      break;

    case '/': {
      boolean hasNewline = FALSE;
      int d = fileGetc();
      switch (d) {
        case '/':
          fileSkipToCharacter('\n');
          /* Line comments start with the
           * character sequence // and
           * continue through the next
           * newline. A line comment acts
           * like a newline.  */
          fileUngetc('\n');
          goto getNextChar;
        case '*':
          do {
            int d;
            do {
              d = fileGetc();
              if (d == '\n') {
                hasNewline = TRUE;
              }
            } while (d != EOF && d != '*');

            c = fileGetc();
            if (c == '/')
              break;
            else
              fileUngetc(c);
          } while (c != EOF && c != '\0');

          fileUngetc(hasNewline ? '\n' : ' ');
          goto getNextChar;
        default:
          token->type = TOKEN_FORWARD_SLASH;
          fileUngetc(d);
          break;
      }
    } break;

    case '"':
    case '\'':
    case '`':
      token->type = TOKEN_STRING;
      parseString(token->string, c);
      token->lineNumber = getSourceLineNumber();
      token->filePosition = getInputFilePosition();
      break;

    case '<': {
      int d = fileGetc();
      if (d == '-') {
        token->type = TOKEN_LEFT_ARROW;
        break;
      } else
        goto getNextChar;
    }

    case '(':
      token->type = TOKEN_OPEN_PAREN;
      break;

    case ')':
      token->type = TOKEN_CLOSE_PAREN;
      break;

    case '{':
      token->type = TOKEN_OPEN_CURLY;
      break;

    case '}':
      token->type = TOKEN_CLOSE_CURLY;
      break;

    case '[':
      token->type = TOKEN_OPEN_SQUARE;
      break;

    case ']':
      token->type = TOKEN_CLOSE_SQUARE;
      break;

    case '*':
      token->type = TOKEN_STAR;
      break;

    case '.':
      token->type = TOKEN_DOT;
      break;

    case ',':
      token->type = TOKEN_COMMA;
      break;

    default:
      parseIdentifier(token->string, c);
      token->lineNumber = getSourceLineNumber();
      token->filePosition = getInputFilePosition();
      token->keyword = lookupKeyword(vStringValue(token->string), Lang_go);
      if (isKeyword(token, KEYWORD_NONE))
        token->type = TOKEN_IDENTIFIER;
      else
        token->type = TOKEN_KEYWORD;
      break;
  }

done:
  lastTokenType = token->type;
}

static void skipToMatched(tokenInfo *const token) {
  int nest_level = 0;
  tokenType open_token;
  tokenType close_token;

  switch (token->type) {
    case TOKEN_OPEN_PAREN:
      open_token = TOKEN_OPEN_PAREN;
      close_token = TOKEN_CLOSE_PAREN;
      break;
    case TOKEN_OPEN_CURLY:
      open_token = TOKEN_OPEN_CURLY;
      close_token = TOKEN_CLOSE_CURLY;
      break;
    case TOKEN_OPEN_SQUARE:
      open_token = TOKEN_OPEN_SQUARE;
      close_token = TOKEN_CLOSE_SQUARE;
      break;
    default:
      return;
  }

  /*
   * This routine will skip to a matching closing token.
   * It will also handle nested tokens like the (, ) below.
   *   (  name varchar(30), text binary(10)  )
   */
  if (isType(token, open_token)) {
    nest_level++;
    while (!(isType(token, close_token) && (nest_level == 0))) {
      readToken(token);
      if (isType(token, open_token)) {
        nest_level++;
      }
      if (isType(token, close_token)) {
        if (nest_level > 0) {
          nest_level--;
        }
      }
    }
    readToken(token);
  }
}

static void skipType(tokenInfo *const token) {
again:
  // Type      = TypeName | TypeLit | "(" Type ")" .
  if (isType(token, TOKEN_OPEN_PAREN)) {
    skipToMatched(token);
    return;
  }

  // TypeName  = QualifiedIdent.
  // QualifiedIdent = [ PackageName "." ] identifier .
  // PackageName    = identifier .
  if (isType(token, TOKEN_IDENTIFIER)) {
    readToken(token);
    if (isType(token, TOKEN_DOT)) {
      readToken(token);
      Assert(isType(token, TOKEN_IDENTIFIER));
      readToken(token);
    }
    return;
  }

  // StructType     = "struct" "{" { FieldDecl ";" } "}"
  // InterfaceType      = "interface" "{" { MethodSpec ";" } "}" .
  if (isKeyword(token, KEYWORD_struct) || isKeyword(token, KEYWORD_interface)) {
    readToken(token);
    Assert(isType(token, TOKEN_OPEN_CURLY));
    skipToMatched(token);
    return;
  }

  // ArrayType   = "[" ArrayLength "]" ElementType .
  // SliceType = "[" "]" ElementType .
  // ElementType = Type .
  if (isType(token, TOKEN_OPEN_SQUARE)) {
    skipToMatched(token);
    goto again;
  }

  // PointerType = "*" BaseType .
  // BaseType = Type .
  // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType .
  if (isType(token, TOKEN_STAR) || isKeyword(token, KEYWORD_chan) ||
      isType(token, TOKEN_LEFT_ARROW)) {
    readToken(token);
    goto again;
  }

  // MapType     = "map" "[" KeyType "]" ElementType .
  // KeyType     = Type .
  if (isKeyword(token, KEYWORD_map)) {
    readToken(token);
    Assert(isType(token, TOKEN_OPEN_SQUARE));
    skipToMatched(token);
    goto again;
  }

  // FunctionType   = "func" Signature .
  // Signature      = Parameters [ Result ] .
  // Result         = Parameters | Type .
  // Parameters     = "(" [ ParameterList [ "," ] ] ")" .
  if (isKeyword(token, KEYWORD_func)) {
    readToken(token);
    Assert(isType(token, TOKEN_OPEN_PAREN));
    // Parameters
    skipToMatched(token);
    // Result is parameters or type or nothing.  skipType treats anything
    // surrounded by parentheses as a type, and does nothing if what
    // follows is not a type.
    goto again;
  }
}

// Skip to the next semicolon, skipping over matching brackets.
static void skipToTopLevelSemicolon(tokenInfo *const token) {
  while (!isType(token, TOKEN_SEMICOLON)) {
    readToken(token);
    skipToMatched(token);
  }
}

static void makeTag(tokenInfo *const token, const goKind kind) {
  const char *const name = vStringValue(token->string);

  tagEntryInfo e;
  initTagEntry(&e, name);

  if (!GoKinds[kind].enabled) return;

  e.lineNumber = token->lineNumber;
  e.filePosition = token->filePosition;
  e.kindName = GoKinds[kind].name;
  e.kind = GoKinds[kind].letter;

  makeTagEntry(&e);

  if (scope && Option.include.qualifiedTags) {
    vString *qualifiedName = vStringNew();
    vStringCopy(qualifiedName, scope);
    vStringCatS(qualifiedName, ".");
    vStringCat(qualifiedName, token->string);
    e.name = vStringValue(qualifiedName);
    makeTagEntry(&e);
    vStringDelete(qualifiedName);
  }
}

static void parsePackage(tokenInfo *const token) {
  tokenInfo *const name = newToken();

  readToken(name);
  Assert(isType(name, TOKEN_IDENTIFIER));
  makeTag(name, GOTAG_PACKAGE);
  if (!scope && Option.include.qualifiedTags) {
    scope = vStringNew();
    vStringCopy(scope, name->string);
  }

  deleteToken(name);
}

static void parseFunctionOrMethod(tokenInfo *const token) {
  // FunctionDecl = "func" identifier Signature [ Body ] .
  // Body         = Block.
  //
  // MethodDecl   = "func" Receiver MethodName Signature [ Body ] .
  // Receiver     = "(" [ identifier ] [ "*" ] BaseTypeName ")" .
  // BaseTypeName = identifier .
  tokenInfo *const name = newToken();

  // Skip over receiver.
  readToken(name);
  if (isType(name, TOKEN_OPEN_PAREN)) skipToMatched(name);

  Assert(isType(name, TOKEN_IDENTIFIER));

  // Skip over parameters.
  readToken(token);
  skipToMatched(token);

  // Skip over result.
  skipType(token);

  // Skip over function body.
  if (isType(token, TOKEN_OPEN_CURLY)) skipToMatched(token);

  makeTag(name, GOTAG_FUNCTION);

  deleteToken(name);
}

static void parseConstTypeVar(tokenInfo *const token, goKind kind) {
  // ConstDecl      = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) .
  // ConstSpec      = IdentifierList [ [ Type ] "=" ExpressionList ] .
  // IdentifierList = identifier { "," identifier } .
  // ExpressionList = Expression { "," Expression } .
  // TypeDecl     = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) .
  // TypeSpec     = identifier Type .
  // VarDecl     = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) .
  // VarSpec     = IdentifierList ( Type [ "=" ExpressionList ] | "="
  // ExpressionList ) .
  tokenInfo *const name = newToken();
  boolean usesParens = FALSE;

  readToken(name);

  if (isType(name, TOKEN_OPEN_PAREN)) {
    usesParens = TRUE;
    readToken(name);
  }

again:
  while (1) {
    makeTag(name, kind);
    readToken(token);
    if (!isType(token, TOKEN_COMMA) && !isType(token, TOKEN_CLOSE_PAREN)) break;
    readToken(name);
  }

  skipType(token);
  skipToTopLevelSemicolon(token);

  if (usesParens) {
    readToken(name);
    if (!isType(name, TOKEN_CLOSE_PAREN)) goto again;
  }

  deleteToken(name);
}

static void parseGoFile(tokenInfo *const token) {
  do {
    readToken(token);

    if (isType(token, TOKEN_KEYWORD)) {
      switch (token->keyword) {
        case KEYWORD_package:
          parsePackage(token);
          break;
        case KEYWORD_func:
          parseFunctionOrMethod(token);
          break;
        case KEYWORD_const:
          parseConstTypeVar(token, GOTAG_CONST);
          break;
        case KEYWORD_type:
          parseConstTypeVar(token, GOTAG_TYPE);
          break;
        case KEYWORD_var:
          parseConstTypeVar(token, GOTAG_VAR);
          break;
        default:
          break;
      }
    }
  } while (TRUE);
}

static void findGoTags(void) {
  tokenInfo *const token = newToken();
  exception_t exception;

  exception = (exception_t)(setjmp(Exception));
  while (exception == ExceptionNone) parseGoFile(token);

  deleteToken(token);
  vStringDelete(scope);
  scope = NULL;
}

extern parserDefinition *GoParser(void) {
  static const char *const extensions[] = {"go", NULL};
  parserDefinition *def = parserNew("Go");
  def->kinds = GoKinds;
  def->kindCount = KIND_COUNT(GoKinds);
  def->extensions = extensions;
  def->parser = findGoTags;
  def->initialize = initialize;
  return def;
}