cosmopolitan/third_party/ctags/lregex.c

617 lines
17 KiB
C

/*
* $Id: lregex.c 747 2009-11-06 02:33:37Z dhiebert $
*
* Copyright (c) 2000-2003, Darren Hiebert
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for applying regular expression matching.
*
* The code for utlizing the Gnu regex package with regards to processing the
* regex option and checking for regex matches was adapted from routines in
* Gnu etags.
*/
#include "third_party/ctags/general.h"
/* must always come first */
#include "third_party/ctags/debug.h"
#include "third_party/ctags/entry.h"
#include "third_party/ctags/parse.h"
#include "third_party/ctags/read.h"
#include "third_party/ctags/routines.h"
#include "third_party/regex/regex.h"
#ifdef HAVE_REGEX
/*
* MACROS
*/
/* Back-references \0 through \9 */
#define BACK_REFERENCE_COUNT 10
#if defined(HAVE_REGCOMP) && !defined(REGCOMP_BROKEN)
#define POSIX_REGEX
#endif
#define REGEX_NAME "Regex"
/*
* DATA DECLARATIONS
*/
#if defined(POSIX_REGEX)
struct sKind {
boolean enabled;
char letter;
char* name;
char* description;
};
enum pType { PTRN_TAG, PTRN_CALLBACK };
typedef struct {
regex_t* pattern;
enum pType type;
union {
struct {
char* name_pattern;
struct sKind kind;
} tag;
struct {
regexCallback function;
} callback;
} u;
} regexPattern;
#endif
typedef struct {
regexPattern* patterns;
unsigned int count;
} patternSet;
/*
* DATA DEFINITIONS
*/
static boolean regexBroken = FALSE;
/* Array of pattern sets, indexed by language */
static patternSet* Sets = NULL;
static int SetUpper = -1; /* upper language index in list */
/*
* FUNCTION DEFINITIONS
*/
static void clearPatternSet(const langType language) {
if (language <= SetUpper) {
patternSet* const set = Sets + language;
unsigned int i;
for (i = 0; i < set->count; ++i) {
regexPattern* p = &set->patterns[i];
#if defined(POSIX_REGEX)
regfree(p->pattern);
#endif
eFree(p->pattern);
p->pattern = NULL;
if (p->type == PTRN_TAG) {
eFree(p->u.tag.name_pattern);
p->u.tag.name_pattern = NULL;
eFree(p->u.tag.kind.name);
p->u.tag.kind.name = NULL;
if (p->u.tag.kind.description != NULL) {
eFree(p->u.tag.kind.description);
p->u.tag.kind.description = NULL;
}
}
}
if (set->patterns != NULL) eFree(set->patterns);
set->patterns = NULL;
set->count = 0;
}
}
/*
* Regex psuedo-parser
*/
static void makeRegexTag(const vString* const name,
const struct sKind* const kind) {
if (kind->enabled) {
tagEntryInfo e;
Assert(name != NULL && vStringLength(name) > 0);
Assert(kind != NULL);
initTagEntry(&e, vStringValue(name));
e.kind = kind->letter;
e.kindName = kind->name;
makeTagEntry(&e);
}
}
/*
* Regex pattern definition
*/
/* Take a string like "/blah/" and turn it into "blah", making sure
* that the first and last characters are the same, and handling
* quoted separator characters. Actually, stops on the occurrence of
* an unquoted separator. Also turns "\t" into a Tab character.
* Returns pointer to terminating separator. Works in place. Null
* terminates name string.
*/
static char* scanSeparators(char* name) {
char sep = name[0];
char* copyto = name;
boolean quoted = FALSE;
for (++name; *name != '\0'; ++name) {
if (quoted) {
if (*name == sep)
*copyto++ = sep;
else if (*name == 't')
*copyto++ = '\t';
else {
/* Something else is quoted, so preserve the quote. */
*copyto++ = '\\';
*copyto++ = *name;
}
quoted = FALSE;
} else if (*name == '\\')
quoted = TRUE;
else if (*name == sep) {
break;
} else
*copyto++ = *name;
}
*copyto = '\0';
return name;
}
/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
* character is whatever the first character of `regexp' is), by breaking it
* up into null terminated strings, removing the separators, and expanding
* '\t' into tabs. When complete, `regexp' points to the line matching
* pattern, a pointer to the name matching pattern is written to `name', a
* pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
* to the trailing flags is written to `flags'. If the pattern is not in the
* correct format, a false value is returned.
*/
static boolean parseTagRegex(char* const regexp, char** const name,
char** const kinds, char** const flags) {
boolean result = FALSE;
const int separator = (unsigned char)regexp[0];
*name = scanSeparators(regexp);
if (*regexp == '\0')
error(WARNING, "empty regexp");
else if (**name != separator)
error(WARNING, "%s: incomplete regexp", regexp);
else {
char* const third = scanSeparators(*name);
if (**name == '\0')
error(WARNING, "%s: regexp missing name pattern", regexp);
if ((*name)[strlen(*name) - 1] == '\\')
error(WARNING, "error in name pattern: \"%s\"", *name);
if (*third != separator)
error(WARNING, "%s: regexp missing final separator", regexp);
else {
char* const fourth = scanSeparators(third);
if (*fourth == separator) {
*kinds = third;
scanSeparators(fourth);
*flags = fourth;
} else {
*flags = third;
*kinds = NULL;
}
result = TRUE;
}
}
return result;
}
static void addCompiledTagPattern(const langType language,
regex_t* const pattern, char* const name,
const char kind, char* const kindName,
char* const description) {
patternSet* set;
regexPattern* ptrn;
if (language > SetUpper) {
int i;
Sets = xRealloc(Sets, (language + 1), patternSet);
for (i = SetUpper + 1; i <= language; ++i) {
Sets[i].patterns = NULL;
Sets[i].count = 0;
}
SetUpper = language;
}
set = Sets + language;
set->patterns = xRealloc(set->patterns, (set->count + 1), regexPattern);
ptrn = &set->patterns[set->count];
set->count += 1;
ptrn->pattern = pattern;
ptrn->type = PTRN_TAG;
ptrn->u.tag.name_pattern = name;
ptrn->u.tag.kind.enabled = TRUE;
ptrn->u.tag.kind.letter = kind;
ptrn->u.tag.kind.name = kindName;
ptrn->u.tag.kind.description = description;
}
static void addCompiledCallbackPattern(const langType language,
regex_t* const pattern,
const regexCallback callback) {
patternSet* set;
regexPattern* ptrn;
if (language > SetUpper) {
int i;
Sets = xRealloc(Sets, (language + 1), patternSet);
for (i = SetUpper + 1; i <= language; ++i) {
Sets[i].patterns = NULL;
Sets[i].count = 0;
}
SetUpper = language;
}
set = Sets + language;
set->patterns = xRealloc(set->patterns, (set->count + 1), regexPattern);
ptrn = &set->patterns[set->count];
set->count += 1;
ptrn->pattern = pattern;
ptrn->type = PTRN_CALLBACK;
ptrn->u.callback.function = callback;
}
#if defined(POSIX_REGEX)
static regex_t* compileRegex(const char* const regexp,
const char* const flags) {
int cflags = REG_EXTENDED | REG_NEWLINE;
regex_t* result = NULL;
int errcode;
int i;
for (i = 0; flags != NULL && flags[i] != '\0'; ++i) {
switch ((int)flags[i]) {
case 'b':
cflags &= ~REG_EXTENDED;
break;
case 'e':
cflags |= REG_EXTENDED;
break;
case 'i':
cflags |= REG_ICASE;
break;
default:
error(WARNING, "unknown regex flag: '%c'", *flags);
break;
}
}
result = xMalloc(1, regex_t);
errcode = regcomp(result, regexp, cflags);
if (errcode != 0) {
char errmsg[256];
regerror(errcode, result, errmsg, 256);
error(WARNING, "regcomp %s: %s", regexp, errmsg);
regfree(result);
eFree(result);
result = NULL;
}
return result;
}
#endif
static void parseKinds(const char* const kinds, char* const kind,
char** const kindName, char** description) {
*kind = '\0';
*kindName = NULL;
*description = NULL;
if (kinds == NULL || kinds[0] == '\0') {
*kind = 'r';
*kindName = eStrdup("regex");
} else if (kinds[0] != '\0') {
const char* k = kinds;
if (k[0] != ',' && (k[1] == ',' || k[1] == '\0'))
*kind = *k++;
else
*kind = 'r';
if (*k == ',') ++k;
if (k[0] == '\0')
*kindName = eStrdup("regex");
else {
const char* const comma = strchr(k, ',');
if (comma == NULL)
*kindName = eStrdup(k);
else {
*kindName = (char*)eMalloc(comma - k + 1);
strncpy(*kindName, k, comma - k);
(*kindName)[comma - k] = '\0';
k = comma + 1;
if (k[0] != '\0') *description = eStrdup(k);
}
}
}
}
static void printRegexKind(const regexPattern* pat, unsigned int i,
boolean indent) {
const struct sKind* const kind = &pat[i].u.tag.kind;
const char* const indentation = indent ? " " : "";
Assert(pat[i].type == PTRN_TAG);
printf("%s%c %s %s\n", indentation,
kind->letter != '\0' ? kind->letter : '?',
kind->description != NULL ? kind->description : kind->name,
kind->enabled ? "" : " [off]");
}
static void processLanguageRegex(const langType language,
const char* const parameter) {
if (parameter == NULL || parameter[0] == '\0')
clearPatternSet(language);
else if (parameter[0] != '@')
addLanguageRegex(language, parameter);
else if (!doesFileExist(parameter + 1))
error(WARNING, "cannot open regex file");
else {
const char* regexfile = parameter + 1;
FILE* const fp = fopen(regexfile, "r");
if (fp == NULL)
error(WARNING | PERROR, "%s", regexfile);
else {
vString* const regex = vStringNew();
while (readLine(regex, fp))
addLanguageRegex(language, vStringValue(regex));
fclose(fp);
vStringDelete(regex);
}
}
}
/*
* Regex pattern matching
*/
#if defined(POSIX_REGEX)
static vString* substitute(const char* const in, const char* out,
const int nmatch, const regmatch_t* const pmatch) {
vString* result = vStringNew();
const char* p;
for (p = out; *p != '\0'; p++) {
if (*p == '\\' && isdigit((int)*++p)) {
const int dig = *p - '0';
if (0 < dig && dig < nmatch && pmatch[dig].rm_so != -1) {
const int diglen = pmatch[dig].rm_eo - pmatch[dig].rm_so;
vStringNCatS(result, in + pmatch[dig].rm_so, diglen);
}
} else if (*p != '\n' && *p != '\r')
vStringPut(result, *p);
}
vStringTerminate(result);
return result;
}
static void matchTagPattern(const vString* const line,
const regexPattern* const patbuf,
const regmatch_t* const pmatch) {
vString* const name =
substitute(vStringValue(line), patbuf->u.tag.name_pattern,
BACK_REFERENCE_COUNT, pmatch);
vStringStripLeading(name);
vStringStripTrailing(name);
if (vStringLength(name) > 0)
makeRegexTag(name, &patbuf->u.tag.kind);
else
error(WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
getInputFileName(), getInputLineNumber(), patbuf->u.tag.name_pattern);
vStringDelete(name);
}
static void matchCallbackPattern(const vString* const line,
const regexPattern* const patbuf,
const regmatch_t* const pmatch) {
regexMatch matches[BACK_REFERENCE_COUNT];
unsigned int count = 0;
int i;
for (i = 0; i < BACK_REFERENCE_COUNT && pmatch[i].rm_so != -1; ++i) {
matches[i].start = pmatch[i].rm_so;
matches[i].length = pmatch[i].rm_eo - pmatch[i].rm_so;
++count;
}
patbuf->u.callback.function(vStringValue(line), matches, count);
}
static boolean matchRegexPattern(const vString* const line,
const regexPattern* const patbuf) {
boolean result = FALSE;
regmatch_t pmatch[BACK_REFERENCE_COUNT];
const int match = regexec(patbuf->pattern, vStringValue(line),
BACK_REFERENCE_COUNT, pmatch, 0);
if (match == 0) {
result = TRUE;
if (patbuf->type == PTRN_TAG)
matchTagPattern(line, patbuf, pmatch);
else if (patbuf->type == PTRN_CALLBACK)
matchCallbackPattern(line, patbuf, pmatch);
else {
Assert("invalid pattern type" == NULL);
result = FALSE;
}
}
return result;
}
#endif
/* PUBLIC INTERFACE */
/* Match against all patterns for specified language. Returns true if at least
* on pattern matched.
*/
extern boolean matchRegex(const vString* const line, const langType language) {
boolean result = FALSE;
if (language != LANG_IGNORE && language <= SetUpper &&
Sets[language].count > 0) {
const patternSet* const set = Sets + language;
unsigned int i;
for (i = 0; i < set->count; ++i)
if (matchRegexPattern(line, set->patterns + i)) result = TRUE;
}
return result;
}
extern void findRegexTags(void) {
/* merely read all lines of the file */
while (fileReadLine() != NULL)
;
}
#endif /* HAVE_REGEX */
extern void addTagRegex(const langType language __unused__,
const char* const regex __unused__,
const char* const name __unused__,
const char* const kinds __unused__,
const char* const flags __unused__) {
#ifdef HAVE_REGEX
Assert(regex != NULL);
Assert(name != NULL);
if (!regexBroken) {
regex_t* const cp = compileRegex(regex, flags);
if (cp != NULL) {
char kind;
char* kindName;
char* description;
parseKinds(kinds, &kind, &kindName, &description);
addCompiledTagPattern(language, cp, eStrdup(name), kind, kindName,
description);
}
}
#endif
}
extern void addCallbackRegex(const langType language __unused__,
const char* const regex __unused__,
const char* const flags __unused__,
const regexCallback callback __unused__) {
#ifdef HAVE_REGEX
Assert(regex != NULL);
if (!regexBroken) {
regex_t* const cp = compileRegex(regex, flags);
if (cp != NULL) addCompiledCallbackPattern(language, cp, callback);
}
#endif
}
extern void addLanguageRegex(const langType language __unused__,
const char* const regex __unused__) {
#ifdef HAVE_REGEX
if (!regexBroken) {
char* const regex_pat = eStrdup(regex);
char *name, *kinds, *flags;
if (parseTagRegex(regex_pat, &name, &kinds, &flags)) {
addTagRegex(language, regex_pat, name, kinds, flags);
eFree(regex_pat);
}
}
#endif
}
/*
* Regex option parsing
*/
extern boolean processRegexOption(const char* const option,
const char* const parameter __unused__) {
boolean handled = FALSE;
const char* const dash = strchr(option, '-');
if (dash != NULL && strncmp(option, "regex", dash - option) == 0) {
#ifdef HAVE_REGEX
langType language;
language = getNamedLanguage(dash + 1);
if (language == LANG_IGNORE)
error(WARNING, "unknown language \"%s\" in --%s option", (dash + 1),
option);
else
processLanguageRegex(language, parameter);
#else
error(WARNING, "regex support not available; required for --%s option",
option);
#endif
handled = TRUE;
}
return handled;
}
extern void disableRegexKinds(const langType language __unused__) {
#ifdef HAVE_REGEX
if (language <= SetUpper && Sets[language].count > 0) {
patternSet* const set = Sets + language;
unsigned int i;
for (i = 0; i < set->count; ++i)
if (set->patterns[i].type == PTRN_TAG)
set->patterns[i].u.tag.kind.enabled = FALSE;
}
#endif
}
extern boolean enableRegexKind(const langType language __unused__,
const int kind __unused__,
const boolean mode __unused__) {
boolean result = FALSE;
#ifdef HAVE_REGEX
if (language <= SetUpper && Sets[language].count > 0) {
patternSet* const set = Sets + language;
unsigned int i;
for (i = 0; i < set->count; ++i)
if (set->patterns[i].type == PTRN_TAG &&
set->patterns[i].u.tag.kind.letter == kind) {
set->patterns[i].u.tag.kind.enabled = mode;
result = TRUE;
}
}
#endif
return result;
}
extern void printRegexKinds(const langType language __unused__,
boolean indent __unused__) {
#ifdef HAVE_REGEX
if (language <= SetUpper && Sets[language].count > 0) {
patternSet* const set = Sets + language;
unsigned int i;
for (i = 0; i < set->count; ++i)
if (set->patterns[i].type == PTRN_TAG)
printRegexKind(set->patterns, i, indent);
}
#endif
}
extern void freeRegexResources(void) {
#ifdef HAVE_REGEX
int i;
for (i = 0; i <= SetUpper; ++i) clearPatternSet(i);
if (Sets != NULL) eFree(Sets);
Sets = NULL;
SetUpper = -1;
#endif
}
/* Check for broken regcomp() on Cygwin */
extern void checkRegex(void) {
#if defined(HAVE_REGEX) && defined(CHECK_REGCOMP)
regex_t patbuf;
int errcode;
if (regcomp(&patbuf, "/hello/", 0) != 0) {
error(WARNING, "Disabling broken regex");
regexBroken = TRUE;
}
#endif
}
/* vi:set tabstop=4 shiftwidth=4: */