cosmopolitan/third_party/ctags/perl.c

328 lines
9.2 KiB
C

/*
* $Id: perl.c 601 2007-08-02 04:45:16Z perlguy0 $
*
* Copyright (c) 2000-2003, Darren Hiebert
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for PERL language
* files.
*/
#include "third_party/ctags/general.h"
/* must always come first */
#include "third_party/ctags/entry.h"
#include "third_party/ctags/options.h"
#include "third_party/ctags/read.h"
#include "third_party/ctags/routines.h"
#include "third_party/ctags/vstring.h"
#define TRACE_PERL_C 0
#define TRACE \
if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf
/*
* DATA DEFINITIONS
*/
typedef enum {
K_NONE = -1,
K_CONSTANT,
K_FORMAT,
K_LABEL,
K_PACKAGE,
K_SUBROUTINE,
K_SUBROUTINE_DECLARATION
} perlKind;
static kindOption PerlKinds[] = {
{TRUE, 'c', "constant", "constants"},
{TRUE, 'f', "format", "formats"},
{TRUE, 'l', "label", "labels"},
{TRUE, 'p', "package", "packages"},
{TRUE, 's', "subroutine", "subroutines"},
{FALSE, 'd', "subroutine declaration", "subroutine declarations"},
};
/*
* FUNCTION DEFINITIONS
*/
static boolean isIdentifier1(int c) {
return (boolean)(isalpha(c) || c == '_');
}
static boolean isIdentifier(int c) {
return (boolean)(isalnum(c) || c == '_');
}
static boolean isPodWord(const char *word) {
boolean result = FALSE;
if (isalpha(*word)) {
const char *const pods[] = {"head1", "head2", "head3", "head4",
"over", "item", "back", "pod",
"begin", "end", "for"};
const size_t count = sizeof(pods) / sizeof(pods[0]);
const char *white = strpbrk(word, " \t");
const size_t len = (white != NULL) ? (size_t)(white - word) : strlen(word);
char *const id = (char *)eMalloc(len + 1);
size_t i;
strncpy(id, word, len);
id[len] = '\0';
for (i = 0; i < count && !result; ++i) {
if (strcmp(id, pods[i]) == 0) result = TRUE;
}
eFree(id);
}
return result;
}
/*
* Perl subroutine declaration may look like one of the following:
*
* sub abc;
* sub abc :attr;
* sub abc (proto);
* sub abc (proto) :attr;
*
* Note that there may be more than one attribute. Attributes may
* have things in parentheses (they look like arguments). Anything
* inside of those parentheses goes. Prototypes may contain semi-colons.
* The matching end when we encounter (outside of any parentheses) either
* a semi-colon (that'd be a declaration) or an left curly brace
* (definition).
*
* This is pretty complicated parsing (plus we all know that only perl can
* parse Perl), so we are only promising best effort here.
*
* If we can't determine what this is (due to a file ending, for example),
* we will return FALSE.
*/
static boolean isSubroutineDeclaration(const unsigned char *cp) {
boolean attr = FALSE;
int nparens = 0;
do {
for (; *cp; ++cp) {
SUB_DECL_SWITCH:
switch (*cp) {
case ':':
if (nparens)
break;
else if (TRUE == attr)
return FALSE; /* Invalid attribute name */
else
attr = TRUE;
break;
case '(':
++nparens;
break;
case ')':
--nparens;
break;
case ' ':
case '\t':
break;
case ';':
if (!nparens) return TRUE;
case '{':
if (!nparens) return FALSE;
default:
if (attr) {
if (isIdentifier1(*cp)) {
cp++;
while (isIdentifier(*cp)) cp++;
attr = FALSE;
goto SUB_DECL_SWITCH; /* Instead of --cp; */
} else {
return FALSE;
}
} else if (nparens) {
break;
} else {
return FALSE;
}
}
}
} while (NULL != (cp = fileReadLine()));
return FALSE;
}
/* Algorithm adapted from from GNU etags.
* Perl support by Bart Robinson <lomew@cs.utah.edu>
* Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/
*/
static void findPerlTags(void) {
vString *name = vStringNew();
vString *package = NULL;
boolean skipPodDoc = FALSE;
const unsigned char *line;
while ((line = fileReadLine()) != NULL) {
boolean spaceRequired = FALSE;
boolean qualified = FALSE;
const unsigned char *cp = line;
perlKind kind = K_NONE;
tagEntryInfo e;
if (skipPodDoc) {
if (strncmp((const char *)line, "=cut", (size_t)4) == 0)
skipPodDoc = FALSE;
continue;
} else if (line[0] == '=') {
skipPodDoc = isPodWord((const char *)line + 1);
continue;
} else if (strcmp((const char *)line, "__DATA__") == 0)
break;
else if (strcmp((const char *)line, "__END__") == 0)
break;
else if (line[0] == '#')
continue;
while (isspace(*cp)) cp++;
if (strncmp((const char *)cp, "sub", (size_t)3) == 0) {
TRACE("this looks like a sub\n");
cp += 3;
kind = K_SUBROUTINE;
spaceRequired = TRUE;
qualified = TRUE;
} else if (strncmp((const char *)cp, "use", (size_t)3) == 0) {
cp += 3;
if (!isspace(*cp)) continue;
while (*cp && isspace(*cp)) ++cp;
if (strncmp((const char *)cp, "constant", (size_t)8) != 0) continue;
cp += 8;
kind = K_CONSTANT;
spaceRequired = TRUE;
qualified = TRUE;
} else if (strncmp((const char *)cp, "package", (size_t)7) == 0) {
/* This will point to space after 'package' so that a tag
can be made */
const unsigned char *space = cp += 7;
if (package == NULL)
package = vStringNew();
else
vStringClear(package);
while (isspace(*cp)) cp++;
while ((int)*cp != ';' && !isspace((int)*cp)) {
vStringPut(package, (int)*cp);
cp++;
}
vStringCatS(package, "::");
cp = space; /* Rewind */
kind = K_PACKAGE;
spaceRequired = TRUE;
qualified = TRUE;
} else if (strncmp((const char *)cp, "format", (size_t)6) == 0) {
cp += 6;
kind = K_FORMAT;
spaceRequired = TRUE;
qualified = TRUE;
} else {
if (isIdentifier1(*cp)) {
const unsigned char *p = cp;
while (isIdentifier(*p)) ++p;
while (isspace(*p)) ++p;
if ((int)*p == ':' && (int)*(p + 1) != ':') kind = K_LABEL;
}
}
if (kind != K_NONE) {
TRACE("cp0: %s\n", (const char *)cp);
if (spaceRequired && *cp && !isspace(*cp)) continue;
TRACE("cp1: %s\n", (const char *)cp);
while (isspace(*cp)) cp++;
while (!*cp || '#' == *cp) { /* Gobble up empty lines
and comments */
cp = fileReadLine();
if (!cp) goto END_MAIN_WHILE;
while (isspace(*cp)) cp++;
}
while (isIdentifier(*cp) || (K_PACKAGE == kind && ':' == *cp)) {
vStringPut(name, (int)*cp);
cp++;
}
if (K_FORMAT == kind &&
vStringLength(name) == 0 && /* cp did not advance */
'=' == *cp) {
/* format's name is optional. If it's omitted, 'STDOUT'
is assumed. */
vStringCatS(name, "STDOUT");
}
vStringTerminate(name);
TRACE("name: %s\n", name->buffer);
if (0 == vStringLength(name)) {
vStringClear(name);
continue;
}
if (K_SUBROUTINE == kind) {
/*
* isSubroutineDeclaration() may consume several lines. So
* we record line positions.
*/
initTagEntry(&e, vStringValue(name));
if (TRUE == isSubroutineDeclaration(cp)) {
if (TRUE == PerlKinds[K_SUBROUTINE_DECLARATION].enabled) {
kind = K_SUBROUTINE_DECLARATION;
} else {
vStringClear(name);
continue;
}
}
e.kind = PerlKinds[kind].letter;
e.kindName = PerlKinds[kind].name;
makeTagEntry(&e);
if (Option.include.qualifiedTags && qualified && package != NULL &&
vStringLength(package) > 0) {
vString *const qualifiedName = vStringNew();
vStringCopy(qualifiedName, package);
vStringCat(qualifiedName, name);
e.name = vStringValue(qualifiedName);
makeTagEntry(&e);
vStringDelete(qualifiedName);
}
} else if (vStringLength(name) > 0) {
makeSimpleTag(name, PerlKinds, kind);
if (Option.include.qualifiedTags && qualified && K_PACKAGE != kind &&
package != NULL && vStringLength(package) > 0) {
vString *const qualifiedName = vStringNew();
vStringCopy(qualifiedName, package);
vStringCat(qualifiedName, name);
makeSimpleTag(qualifiedName, PerlKinds, kind);
vStringDelete(qualifiedName);
}
}
vStringClear(name);
}
}
END_MAIN_WHILE:
vStringDelete(name);
if (package != NULL) vStringDelete(package);
}
extern parserDefinition *PerlParser(void) {
static const char *const extensions[] = {"pl", "pm", "plx", "perl", NULL};
parserDefinition *def = parserNew("Perl");
def->kinds = PerlKinds;
def->kindCount = KIND_COUNT(PerlKinds);
def->extensions = extensions;
def->parser = findPerlTags;
return def;
}
/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */