cosmopolitan/third_party/ctags/python.c

669 lines
18 KiB
C

/*
* $Id: python.c 752 2010-02-27 17:52:46Z elliotth $
*
* Copyright (c) 2000-2003, Darren Hiebert
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for Python language
* files.
*/
#include "third_party/ctags/general.h"
/* must always come first */
#include "libc/mem/mem.h"
#include "third_party/ctags/debug.h"
#include "third_party/ctags/entry.h"
#include "third_party/ctags/main.h"
#include "third_party/ctags/options.h"
#include "third_party/ctags/read.h"
#include "third_party/ctags/routines.h"
#include "third_party/ctags/vstring.h"
/*
* DATA DECLARATIONS
*/
typedef struct NestingLevel NestingLevel;
typedef struct NestingLevels NestingLevels;
struct NestingLevel {
int indentation;
vString *name;
int type;
};
struct NestingLevels {
NestingLevel *levels;
int n; /* number of levels in use */
int allocated;
};
typedef enum { K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT } pythonKind;
/*
* DATA DEFINITIONS
*/
static kindOption PythonKinds[] = {{TRUE, 'c', "class", "classes"},
{TRUE, 'f', "function", "functions"},
{TRUE, 'm', "member", "class members"},
{TRUE, 'v', "variable", "variables"},
{FALSE, 'i', "namespace", "imports"}};
static char const *const singletriple = "'''";
static char const *const doubletriple = "\"\"\"";
/*
* FUNCTION DEFINITIONS
*/
static NestingLevels *nestingLevelsNew(void) {
NestingLevels *nls = xCalloc(1, NestingLevels);
return nls;
}
static void nestingLevelsFree(NestingLevels *nls) {
int i;
for (i = 0; i < nls->allocated; i++) vStringDelete(nls->levels[i].name);
if (nls->levels) eFree(nls->levels);
eFree(nls);
}
static void nestingLevelsPush(NestingLevels *nls, const vString *name,
int type) {
NestingLevel *nl = NULL;
if (nls->n >= nls->allocated) {
nls->allocated++;
nls->levels = xRealloc(nls->levels, nls->allocated, NestingLevel);
nls->levels[nls->n].name = vStringNew();
}
nl = &nls->levels[nls->n];
nls->n++;
vStringCopy(nl->name, name);
nl->type = type;
}
#if 0
static NestingLevel *nestingLevelsGetCurrent (NestingLevels *nls)
{
Assert (nls != NULL);
if (nls->n < 1)
return NULL;
return &nls->levels[nls->n - 1];
}
static void nestingLevelsPop (NestingLevels *nls)
{
const NestingLevel *nl = nestingLevelsGetCurrent(nls);
Assert (nl != NULL);
vStringClear(nl->name);
nls->n--;
}
#endif
static boolean isIdentifierFirstCharacter(int c) {
return (boolean)(isalpha(c) || c == '_');
}
static boolean isIdentifierCharacter(int c) {
return (boolean)(isalnum(c) || c == '_');
}
/* Given a string with the contents of a line directly after the "def" keyword,
* extract all relevant information and create a tag.
*/
static void makeFunctionTag(vString *const function, vString *const parent,
int is_class_parent,
const char *arglist __unused__) {
tagEntryInfo tag;
initTagEntry(&tag, vStringValue(function));
tag.kindName = "function";
tag.kind = 'f';
/* tag.extensionFields.arglist = arglist; */
if (vStringLength(parent) > 0) {
if (is_class_parent) {
tag.kindName = "member";
tag.kind = 'm';
tag.extensionFields.scope[0] = "class";
tag.extensionFields.scope[1] = vStringValue(parent);
} else {
tag.extensionFields.scope[0] = "function";
tag.extensionFields.scope[1] = vStringValue(parent);
}
}
/* If a function starts with __, we mark it as file scope.
* FIXME: What is the proper way to signal such attributes?
* TODO: What does functions/classes starting with _ and __ mean in python?
*/
if (strncmp(vStringValue(function), "__", 2) == 0 &&
strcmp(vStringValue(function), "__init__") != 0) {
tag.extensionFields.access = "private";
tag.isFileScope = TRUE;
} else {
tag.extensionFields.access = "public";
}
makeTagEntry(&tag);
}
/* Given a string with the contents of the line directly after the "class"
* keyword, extract all necessary information and create a tag.
*/
static void makeClassTag(vString *const class, vString *const inheritance,
vString *const parent, int is_class_parent) {
tagEntryInfo tag;
initTagEntry(&tag, vStringValue(class));
tag.kindName = "class";
tag.kind = 'c';
if (vStringLength(parent) > 0) {
if (is_class_parent) {
tag.extensionFields.scope[0] = "class";
tag.extensionFields.scope[1] = vStringValue(parent);
} else {
tag.extensionFields.scope[0] = "function";
tag.extensionFields.scope[1] = vStringValue(parent);
}
}
tag.extensionFields.inheritance = vStringValue(inheritance);
makeTagEntry(&tag);
}
static void makeVariableTag(vString *const var, vString *const parent) {
tagEntryInfo tag;
initTagEntry(&tag, vStringValue(var));
tag.kindName = "variable";
tag.kind = 'v';
if (vStringLength(parent) > 0) {
tag.extensionFields.scope[0] = "class";
tag.extensionFields.scope[1] = vStringValue(parent);
}
makeTagEntry(&tag);
}
/* Skip a single or double quoted string. */
static const char *skipString(const char *cp) {
const char *start = cp;
int escaped = 0;
for (cp++; *cp; cp++) {
if (escaped)
escaped--;
else if (*cp == '\\')
escaped++;
else if (*cp == *start)
return cp + 1;
}
return cp;
}
/* Skip everything up to an identifier start. */
static const char *skipEverything(const char *cp) {
for (; *cp; cp++) {
if (*cp == '"' || *cp == '\'' || *cp == '#') {
cp = skipString(cp);
if (!*cp) break;
}
if (isIdentifierFirstCharacter((int)*cp)) return cp;
}
return cp;
}
/* Skip an identifier. */
static const char *skipIdentifier(const char *cp) {
while (isIdentifierCharacter((int)*cp)) cp++;
return cp;
}
static const char *findDefinitionOrClass(const char *cp) {
while (*cp) {
cp = skipEverything(cp);
if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
!strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5)) {
return cp;
}
cp = skipIdentifier(cp);
}
return NULL;
}
static const char *skipSpace(const char *cp) {
while (isspace((int)*cp)) ++cp;
return cp;
}
/* Starting at ''cp'', parse an identifier into ''identifier''. */
static const char *parseIdentifier(const char *cp, vString *const identifier) {
vStringClear(identifier);
while (isIdentifierCharacter((int)*cp)) {
vStringPut(identifier, (int)*cp);
++cp;
}
vStringTerminate(identifier);
return cp;
}
static void parseClass(const char *cp, vString *const class,
vString *const parent, int is_class_parent) {
vString *const inheritance = vStringNew();
vStringClear(inheritance);
cp = parseIdentifier(cp, class);
cp = skipSpace(cp);
if (*cp == '(') {
++cp;
while (*cp != ')') {
if (*cp == '\0') {
/* Closing parenthesis can be in follow up line. */
cp = (const char *)fileReadLine();
if (!cp) break;
vStringPut(inheritance, ' ');
continue;
}
vStringPut(inheritance, *cp);
++cp;
}
vStringTerminate(inheritance);
}
makeClassTag(class, inheritance, parent, is_class_parent);
vStringDelete(inheritance);
}
static void parseImports(const char *cp) {
const char *pos;
vString *name, *name_next;
cp = skipEverything(cp);
if ((pos = strstr(cp, "import")) == NULL) return;
cp = pos + 6;
/* continue only if there is some space between the keyword and the identifier
*/
if (!isspace(*cp)) return;
cp++;
cp = skipSpace(cp);
name = vStringNew();
name_next = vStringNew();
cp = skipEverything(cp);
while (*cp) {
cp = parseIdentifier(cp, name);
cp = skipEverything(cp);
/* we parse the next possible import statement as well to be able to ignore
* 'foo' in 'import foo as bar' */
parseIdentifier(cp, name_next);
/* take the current tag only if the next one is not "as" */
if (strcmp(vStringValue(name_next), "as") != 0 &&
strcmp(vStringValue(name), "as") != 0) {
makeSimpleTag(name, PythonKinds, K_IMPORT);
}
}
vStringDelete(name);
vStringDelete(name_next);
}
/* modified from get.c getArglistFromStr().
* warning: terminates rest of string past arglist!
* note: does not ignore brackets inside strings! */
static char *parseArglist(const char *buf) {
char *start, *end;
int level;
if (NULL == buf) return NULL;
if (NULL == (start = strchr(buf, '('))) return NULL;
for (level = 1, end = start + 1; level > 0; ++end) {
if ('\0' == *end)
break;
else if ('(' == *end)
++level;
else if (')' == *end)
--level;
}
*end = '\0';
return strdup(start);
}
static void parseFunction(const char *cp, vString *const def,
vString *const parent, int is_class_parent) {
char *arglist;
cp = parseIdentifier(cp, def);
arglist = parseArglist(cp);
makeFunctionTag(def, parent, is_class_parent, arglist);
if (arglist != NULL) {
eFree(arglist);
}
}
/* Get the combined name of a nested symbol. Classes are separated with ".",
* functions with "/". For example this code:
* class MyClass:
* def myFunction:
* def SubFunction:
* class SubClass:
* def Method:
* pass
* Would produce this string:
* MyClass.MyFunction/SubFunction/SubClass.Method
*/
static boolean constructParentString(NestingLevels *nls, int indent,
vString *result) {
int i;
NestingLevel *prev = NULL;
int is_class = FALSE;
vStringClear(result);
for (i = 0; i < nls->n; i++) {
NestingLevel *nl = nls->levels + i;
if (indent <= nl->indentation) break;
if (prev) {
vStringCatS(result,
"."); /* make Geany symbol list grouping work properly */
/*
if (prev->type == K_CLASS)
vStringCatS(result, ".");
else
vStringCatS(result, "/");
*/
}
vStringCat(result, nl->name);
is_class = (nl->type == K_CLASS);
prev = nl;
}
return is_class;
}
/* Check whether parent's indentation level is higher than the current level and
* if so, remove it.
*/
static void checkParent(NestingLevels *nls, int indent, vString *parent) {
int i;
NestingLevel *n;
for (i = 0; i < nls->n; i++) {
n = nls->levels + i;
/* is there a better way to compare two vStrings? */
if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0) {
if (n && indent <= n->indentation) {
/* remove this level by clearing its name */
vStringClear(n->name);
}
break;
}
}
}
static void addNestingLevel(NestingLevels *nls, int indentation,
const vString *name, boolean is_class) {
int i;
NestingLevel *nl = NULL;
for (i = 0; i < nls->n; i++) {
nl = nls->levels + i;
if (indentation <= nl->indentation) break;
}
if (i == nls->n) {
nestingLevelsPush(nls, name, 0);
nl = nls->levels + i;
} else { /* reuse existing slot */
nls->n = i + 1;
vStringCopy(nl->name, name);
}
nl->indentation = indentation;
nl->type = is_class ? K_CLASS : !K_CLASS;
}
/* Return a pointer to the start of the next triple string, or NULL. Store
* the kind of triple string in "which" if the return is not NULL.
*/
static char const *find_triple_start(char const *string, char const **which) {
char const *cp = string;
for (; *cp; cp++) {
if (*cp == '"' || *cp == '\'') {
if (strncmp(cp, doubletriple, 3) == 0) {
*which = doubletriple;
return cp;
}
if (strncmp(cp, singletriple, 3) == 0) {
*which = singletriple;
return cp;
}
cp = skipString(cp);
if (!*cp) break;
}
}
return NULL;
}
/* Find the end of a triple string as pointed to by "which", and update "which"
* with any other triple strings following in the given string.
*/
static void find_triple_end(char const *string, char const **which) {
char const *s = string;
while (1) {
/* Check if the string ends in the same line. */
s = strstr(s, *which);
if (!s) break;
s += 3;
*which = NULL;
/* If yes, check if another one starts in the same line. */
s = find_triple_start(s, which);
if (!s) break;
s += 3;
}
}
static const char *findVariable(const char *line) {
/* Parse global and class variable names (C.x) from assignment statements.
* Object attributes (obj.x) are ignored.
* Assignment to a tuple 'x, y = 2, 3' not supported.
* TODO: ignore duplicate tags from reassignment statements. */
const char *cp, *sp, *eq, *start;
cp = strstr(line, "=");
if (!cp) return NULL;
eq = cp + 1;
while (*eq) {
if (*eq == '=')
return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
if (*eq == '(' || *eq == '#')
break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line
*/
eq++;
}
/* go backwards to the start of the line, checking we have valid chars */
start = cp - 1;
while (start >= line && isspace((int)*start)) --start;
while (start >= line && isIdentifierCharacter((int)*start)) --start;
if (!isIdentifierFirstCharacter(*(start + 1))) return NULL;
sp = start;
while (sp >= line && isspace((int)*sp)) --sp;
if ((sp + 1) != line) /* the line isn't a simple variable assignment */
return NULL;
/* the line is valid, parse the variable name */
++start;
return start;
}
/* Skip type declaration that optionally follows a cdef/cpdef */
static const char *skipTypeDecl(const char *cp, boolean *is_class) {
const char *lastStart = cp, *ptr = cp;
int loopCount = 0;
ptr = skipSpace(cp);
if (!strncmp("extern", ptr, 6)) {
ptr += 6;
ptr = skipSpace(ptr);
if (!strncmp("from", ptr, 4)) {
return NULL;
}
}
if (!strncmp("class", ptr, 5)) {
ptr += 5;
*is_class = TRUE;
ptr = skipSpace(ptr);
return ptr;
}
/* limit so that we don't pick off "int item=obj()" */
while (*ptr && loopCount++ < 2) {
while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++;
if (!*ptr || *ptr == '=') return NULL;
if (*ptr == '(') {
return lastStart; /* if we stopped on a '(' we are done */
}
ptr = skipSpace(ptr);
lastStart = ptr;
while (*lastStart == '*') lastStart++; /* cdef int *identifier */
}
return NULL;
}
static void findPythonTags(void) {
vString *const continuation = vStringNew();
vString *const name = vStringNew();
vString *const parent = vStringNew();
NestingLevels *const nesting_levels = nestingLevelsNew();
const char *line;
int line_skip = 0;
char const *longStringLiteral = NULL;
while ((line = (const char *)fileReadLine()) != NULL) {
const char *cp = line, *candidate;
char const *longstring;
char const *keyword, *variable;
int indent;
cp = skipSpace(cp);
if (*cp == '\0') /* skip blank line */
continue;
/* Skip comment if we are not inside a multi-line string. */
if (*cp == '#' && !longStringLiteral) continue;
/* Deal with line continuation. */
if (!line_skip) vStringClear(continuation);
vStringCatS(continuation, line);
vStringStripTrailing(continuation);
if (vStringLast(continuation) == '\\') {
vStringChop(continuation);
vStringCatS(continuation, " ");
line_skip = 1;
continue;
}
cp = line = vStringValue(continuation);
cp = skipSpace(cp);
indent = cp - line;
line_skip = 0;
checkParent(nesting_levels, indent, parent);
/* Deal with multiline string ending. */
if (longStringLiteral) {
find_triple_end(cp, &longStringLiteral);
continue;
}
/* Deal with multiline string start. */
longstring = find_triple_start(cp, &longStringLiteral);
if (longstring) {
longstring += 3;
find_triple_end(longstring, &longStringLiteral);
/* We don't parse for any tags in the rest of the line. */
continue;
}
/* Deal with def and class keywords. */
keyword = findDefinitionOrClass(cp);
if (keyword) {
boolean found = FALSE;
boolean is_class = FALSE;
if (!strncmp(keyword, "def ", 4)) {
cp = skipSpace(keyword + 3);
found = TRUE;
} else if (!strncmp(keyword, "class ", 6)) {
cp = skipSpace(keyword + 5);
found = TRUE;
is_class = TRUE;
} else if (!strncmp(keyword, "cdef ", 5)) {
cp = skipSpace(keyword + 4);
candidate = skipTypeDecl(cp, &is_class);
if (candidate) {
found = TRUE;
cp = candidate;
}
} else if (!strncmp(keyword, "cpdef ", 6)) {
cp = skipSpace(keyword + 5);
candidate = skipTypeDecl(cp, &is_class);
if (candidate) {
found = TRUE;
cp = candidate;
}
}
if (found) {
boolean is_parent_class;
is_parent_class = constructParentString(nesting_levels, indent, parent);
if (is_class)
parseClass(cp, name, parent, is_parent_class);
else
parseFunction(cp, name, parent, is_parent_class);
addNestingLevel(nesting_levels, indent, name, is_class);
}
}
/* Find global and class variables */
variable = findVariable(line);
if (variable) {
const char *start = variable;
boolean parent_is_class;
vStringClear(name);
while (isIdentifierCharacter((int)*start)) {
vStringPut(name, (int)*start);
++start;
}
vStringTerminate(name);
parent_is_class = constructParentString(nesting_levels, indent, parent);
/* skip variables in methods */
if (!parent_is_class && vStringLength(parent) > 0) continue;
makeVariableTag(name, parent);
}
/* Find and parse imports */
parseImports(line);
}
/* Clean up all memory we allocated. */
vStringDelete(parent);
vStringDelete(name);
vStringDelete(continuation);
nestingLevelsFree(nesting_levels);
}
extern parserDefinition *PythonParser(void) {
static const char *const extensions[] = {"py", "pyx", "pxd",
"pxi", "scons", NULL};
parserDefinition *def = parserNew("Python");
def->kinds = PythonKinds;
def->kindCount = KIND_COUNT(PythonKinds);
def->extensions = extensions;
def->parser = findPythonTags;
return def;
}
/* vi:set tabstop=4 shiftwidth=4: */