615 lines
17 KiB
C
615 lines
17 KiB
C
/*
|
|
* $Id: get.c 559 2007-06-17 03:30:09Z elliotth $
|
|
*
|
|
* Copyright (c) 1996-2002, Darren Hiebert
|
|
*
|
|
* This source code is released for free distribution under the terms of the
|
|
* GNU General Public License.
|
|
*
|
|
* This module contains the high level source read functions (preprocessor
|
|
* directives are handled within this level).
|
|
*/
|
|
#include "third_party/ctags/general.h"
|
|
/* must always come first */
|
|
#include "libc/calls/calls.h"
|
|
#include "libc/str/str.h"
|
|
#include "third_party/ctags/debug.h"
|
|
#include "third_party/ctags/entry.h"
|
|
#include "third_party/ctags/get.h"
|
|
#include "third_party/ctags/options.h"
|
|
#include "third_party/ctags/read.h"
|
|
#include "third_party/ctags/vstring.h"
|
|
|
|
/*
|
|
* MACROS
|
|
*/
|
|
#define stringMatch(s1, s2) (strcmp(s1, s2) == 0)
|
|
#define isspacetab(c) ((c) == SPACE || (c) == TAB)
|
|
|
|
/*
|
|
* DATA DECLARATIONS
|
|
*/
|
|
typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment;
|
|
|
|
enum eCppLimits { MaxCppNestingLevel = 20, MaxDirectiveName = 10 };
|
|
|
|
/* Defines the one nesting level of a preprocessor conditional.
|
|
*/
|
|
typedef struct sConditionalInfo {
|
|
boolean ignoreAllBranches; /* ignoring parent conditional branch */
|
|
boolean singleBranch; /* choose only one branch */
|
|
boolean branchChosen; /* branch already selected */
|
|
boolean ignoring; /* current ignore state */
|
|
} conditionalInfo;
|
|
|
|
enum eState {
|
|
DRCTV_NONE, /* no known directive - ignore to end of line */
|
|
DRCTV_DEFINE, /* "#define" encountered */
|
|
DRCTV_HASH, /* initial '#' read; determine directive */
|
|
DRCTV_IF, /* "#if" or "#ifdef" encountered */
|
|
DRCTV_PRAGMA, /* #pragma encountered */
|
|
DRCTV_UNDEF /* "#undef" encountered */
|
|
};
|
|
|
|
/* Defines the current state of the pre-processor.
|
|
*/
|
|
typedef struct sCppState {
|
|
int ungetch, ungetch2; /* ungotten characters, if any */
|
|
boolean resolveRequired; /* must resolve if/else/elif/endif branch */
|
|
boolean hasAtLiteralStrings; /* supports @"c:\" strings */
|
|
struct sDirective {
|
|
enum eState state; /* current directive being processed */
|
|
boolean accept; /* is a directive syntactically permitted? */
|
|
vString *name; /* macro name */
|
|
unsigned int nestLevel; /* level 0 is not used */
|
|
conditionalInfo ifdef[MaxCppNestingLevel];
|
|
} directive;
|
|
} cppState;
|
|
|
|
/*
|
|
* DATA DEFINITIONS
|
|
*/
|
|
|
|
/* Use brace formatting to detect end of block.
|
|
*/
|
|
static boolean BraceFormat = FALSE;
|
|
|
|
static cppState Cpp = {
|
|
'\0',
|
|
'\0', /* ungetch characters */
|
|
FALSE, /* resolveRequired */
|
|
FALSE, /* hasAtLiteralStrings */
|
|
{
|
|
DRCTV_NONE, /* state */
|
|
FALSE, /* accept */
|
|
NULL, /* tag name */
|
|
0, /* nestLevel */
|
|
{{FALSE, FALSE, FALSE, FALSE}} /* ifdef array */
|
|
} /* directive */
|
|
};
|
|
|
|
/*
|
|
* FUNCTION DEFINITIONS
|
|
*/
|
|
|
|
extern boolean isBraceFormat(void) {
|
|
return BraceFormat;
|
|
}
|
|
|
|
extern unsigned int getDirectiveNestLevel(void) {
|
|
return Cpp.directive.nestLevel;
|
|
}
|
|
|
|
extern void cppInit(const boolean state, const boolean hasAtLiteralStrings) {
|
|
BraceFormat = state;
|
|
|
|
Cpp.ungetch = '\0';
|
|
Cpp.ungetch2 = '\0';
|
|
Cpp.resolveRequired = FALSE;
|
|
Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
|
|
|
|
Cpp.directive.state = DRCTV_NONE;
|
|
Cpp.directive.accept = TRUE;
|
|
Cpp.directive.nestLevel = 0;
|
|
|
|
Cpp.directive.ifdef[0].ignoreAllBranches = FALSE;
|
|
Cpp.directive.ifdef[0].singleBranch = FALSE;
|
|
Cpp.directive.ifdef[0].branchChosen = FALSE;
|
|
Cpp.directive.ifdef[0].ignoring = FALSE;
|
|
|
|
if (Cpp.directive.name == NULL)
|
|
Cpp.directive.name = vStringNew();
|
|
else
|
|
vStringClear(Cpp.directive.name);
|
|
}
|
|
|
|
extern void cppTerminate(void) {
|
|
if (Cpp.directive.name != NULL) {
|
|
vStringDelete(Cpp.directive.name);
|
|
Cpp.directive.name = NULL;
|
|
}
|
|
}
|
|
|
|
extern void cppBeginStatement(void) {
|
|
Cpp.resolveRequired = TRUE;
|
|
}
|
|
|
|
extern void cppEndStatement(void) {
|
|
Cpp.resolveRequired = FALSE;
|
|
}
|
|
|
|
/*
|
|
* Scanning functions
|
|
*
|
|
* This section handles preprocessor directives. It strips out all
|
|
* directives and may emit a tag for #define directives.
|
|
*/
|
|
|
|
/* This puts a character back into the input queue for the source File.
|
|
* Up to two characters may be ungotten.
|
|
*/
|
|
extern void cppUngetc(const int c) {
|
|
Assert(Cpp.ungetch2 == '\0');
|
|
Cpp.ungetch2 = Cpp.ungetch;
|
|
Cpp.ungetch = c;
|
|
}
|
|
|
|
/* Reads a directive, whose first character is given by "c", into "name".
|
|
*/
|
|
static boolean readDirective(int c, char *const name, unsigned int maxLength) {
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < maxLength - 1; ++i) {
|
|
if (i > 0) {
|
|
c = fileGetc();
|
|
if (c == EOF || !isalpha(c)) {
|
|
fileUngetc(c);
|
|
break;
|
|
}
|
|
}
|
|
name[i] = c;
|
|
}
|
|
name[i] = '\0'; /* null terminate */
|
|
|
|
return (boolean)isspacetab(c);
|
|
}
|
|
|
|
/* Reads an identifier, whose first character is given by "c", into "tag",
|
|
* together with the file location and corresponding line number.
|
|
*/
|
|
static void readIdentifier(int c, vString *const name) {
|
|
vStringClear(name);
|
|
do {
|
|
vStringPut(name, c);
|
|
} while (c = fileGetc(), (c != EOF && isident(c)));
|
|
fileUngetc(c);
|
|
vStringTerminate(name);
|
|
}
|
|
|
|
static conditionalInfo *currentConditional(void) {
|
|
return &Cpp.directive.ifdef[Cpp.directive.nestLevel];
|
|
}
|
|
|
|
static boolean isIgnore(void) {
|
|
return Cpp.directive.ifdef[Cpp.directive.nestLevel].ignoring;
|
|
}
|
|
|
|
static boolean setIgnore(const boolean ignore) {
|
|
return Cpp.directive.ifdef[Cpp.directive.nestLevel].ignoring = ignore;
|
|
}
|
|
|
|
static boolean isIgnoreBranch(void) {
|
|
conditionalInfo *const ifdef = currentConditional();
|
|
|
|
/* Force a single branch if an incomplete statement is discovered
|
|
* en route. This may have allowed earlier branches containing complete
|
|
* statements to be followed, but we must follow no further branches.
|
|
*/
|
|
if (Cpp.resolveRequired && !BraceFormat) ifdef->singleBranch = TRUE;
|
|
|
|
/* We will ignore this branch in the following cases:
|
|
*
|
|
* 1. We are ignoring all branches (conditional was within an ignored
|
|
* branch of the parent conditional)
|
|
* 2. A branch has already been chosen and either of:
|
|
* a. A statement was incomplete upon entering the conditional
|
|
* b. A statement is incomplete upon encountering a branch
|
|
*/
|
|
return (boolean)(ifdef->ignoreAllBranches ||
|
|
(ifdef->branchChosen && ifdef->singleBranch));
|
|
}
|
|
|
|
static void chooseBranch(void) {
|
|
if (!BraceFormat) {
|
|
conditionalInfo *const ifdef = currentConditional();
|
|
|
|
ifdef->branchChosen = (boolean)(ifdef->singleBranch || Cpp.resolveRequired);
|
|
}
|
|
}
|
|
|
|
/* Pushes one nesting level for an #if directive, indicating whether or not
|
|
* the branch should be ignored and whether a branch has already been chosen.
|
|
*/
|
|
static boolean pushConditional(const boolean firstBranchChosen) {
|
|
const boolean ignoreAllBranches = isIgnore(); /* current ignore */
|
|
boolean ignoreBranch = FALSE;
|
|
|
|
if (Cpp.directive.nestLevel < (unsigned int)MaxCppNestingLevel - 1) {
|
|
conditionalInfo *ifdef;
|
|
|
|
++Cpp.directive.nestLevel;
|
|
ifdef = currentConditional();
|
|
|
|
/* We take a snapshot of whether there is an incomplete statement in
|
|
* progress upon encountering the preprocessor conditional. If so,
|
|
* then we will flag that only a single branch of the conditional
|
|
* should be followed.
|
|
*/
|
|
ifdef->ignoreAllBranches = ignoreAllBranches;
|
|
ifdef->singleBranch = Cpp.resolveRequired;
|
|
ifdef->branchChosen = firstBranchChosen;
|
|
ifdef->ignoring =
|
|
(boolean)(ignoreAllBranches || (!firstBranchChosen && !BraceFormat &&
|
|
(ifdef->singleBranch || !Option.if0)));
|
|
ignoreBranch = ifdef->ignoring;
|
|
}
|
|
return ignoreBranch;
|
|
}
|
|
|
|
/* Pops one nesting level for an #endif directive.
|
|
*/
|
|
static boolean popConditional(void) {
|
|
if (Cpp.directive.nestLevel > 0) --Cpp.directive.nestLevel;
|
|
|
|
return isIgnore();
|
|
}
|
|
|
|
static void makeDefineTag(const char *const name) {
|
|
const boolean isFileScope = (boolean)(!isHeaderFile());
|
|
|
|
if (includingDefineTags() && (!isFileScope || Option.include.fileScope)) {
|
|
tagEntryInfo e;
|
|
initTagEntry(&e, name);
|
|
e.lineNumberEntry = (boolean)(Option.locate != EX_PATTERN);
|
|
e.isFileScope = isFileScope;
|
|
e.truncateLine = TRUE;
|
|
e.kindName = "macro";
|
|
e.kind = 'd';
|
|
makeTagEntry(&e);
|
|
}
|
|
}
|
|
|
|
static void directiveDefine(const int c) {
|
|
if (isident1(c)) {
|
|
readIdentifier(c, Cpp.directive.name);
|
|
if (!isIgnore()) makeDefineTag(vStringValue(Cpp.directive.name));
|
|
}
|
|
Cpp.directive.state = DRCTV_NONE;
|
|
}
|
|
|
|
static void directivePragma(int c) {
|
|
if (isident1(c)) {
|
|
readIdentifier(c, Cpp.directive.name);
|
|
if (stringMatch(vStringValue(Cpp.directive.name), "weak")) {
|
|
/* generate macro tag for weak name */
|
|
do {
|
|
c = fileGetc();
|
|
} while (c == SPACE);
|
|
if (isident1(c)) {
|
|
readIdentifier(c, Cpp.directive.name);
|
|
makeDefineTag(vStringValue(Cpp.directive.name));
|
|
}
|
|
}
|
|
}
|
|
Cpp.directive.state = DRCTV_NONE;
|
|
}
|
|
|
|
static boolean directiveIf(const int c) {
|
|
DebugStatement(const boolean ignore0 = isIgnore();) const boolean ignore =
|
|
pushConditional((boolean)(c != '0'));
|
|
|
|
Cpp.directive.state = DRCTV_NONE;
|
|
DebugStatement(debugCppNest(TRUE, Cpp.directive.nestLevel);
|
|
if (ignore != ignore0) debugCppIgnore(ignore);)
|
|
|
|
return ignore;
|
|
}
|
|
|
|
static boolean directiveHash(const int c) {
|
|
boolean ignore = FALSE;
|
|
char directive[MaxDirectiveName];
|
|
DebugStatement(const boolean ignore0 = isIgnore();)
|
|
|
|
readDirective(c, directive, MaxDirectiveName);
|
|
if (stringMatch(directive, "define"))
|
|
Cpp.directive.state = DRCTV_DEFINE;
|
|
else if (stringMatch(directive, "undef"))
|
|
Cpp.directive.state = DRCTV_UNDEF;
|
|
else if (strncmp(directive, "if", (size_t)2) == 0)
|
|
Cpp.directive.state = DRCTV_IF;
|
|
else if (stringMatch(directive, "elif") || stringMatch(directive, "else")) {
|
|
ignore = setIgnore(isIgnoreBranch());
|
|
if (!ignore && stringMatch(directive, "else")) chooseBranch();
|
|
Cpp.directive.state = DRCTV_NONE;
|
|
DebugStatement(if (ignore != ignore0) debugCppIgnore(ignore);)
|
|
} else if (stringMatch(directive, "endif")) {
|
|
DebugStatement(debugCppNest(FALSE, Cpp.directive.nestLevel);) ignore =
|
|
popConditional();
|
|
Cpp.directive.state = DRCTV_NONE;
|
|
DebugStatement(if (ignore != ignore0) debugCppIgnore(ignore);)
|
|
} else if (stringMatch(directive, "pragma"))
|
|
Cpp.directive.state = DRCTV_PRAGMA;
|
|
else
|
|
Cpp.directive.state = DRCTV_NONE;
|
|
|
|
return ignore;
|
|
}
|
|
|
|
/* Handles a pre-processor directive whose first character is given by "c".
|
|
*/
|
|
static boolean handleDirective(const int c) {
|
|
boolean ignore = isIgnore();
|
|
|
|
switch (Cpp.directive.state) {
|
|
case DRCTV_NONE:
|
|
ignore = isIgnore();
|
|
break;
|
|
case DRCTV_DEFINE:
|
|
directiveDefine(c);
|
|
break;
|
|
case DRCTV_HASH:
|
|
ignore = directiveHash(c);
|
|
break;
|
|
case DRCTV_IF:
|
|
ignore = directiveIf(c);
|
|
break;
|
|
case DRCTV_PRAGMA:
|
|
directivePragma(c);
|
|
break;
|
|
case DRCTV_UNDEF:
|
|
directiveDefine(c);
|
|
break;
|
|
}
|
|
return ignore;
|
|
}
|
|
|
|
/* Called upon reading of a slash ('/') characters, determines whether a
|
|
* comment is encountered, and its type.
|
|
*/
|
|
static Comment isComment(void) {
|
|
Comment comment;
|
|
const int next = fileGetc();
|
|
|
|
if (next == '*')
|
|
comment = COMMENT_C;
|
|
else if (next == '/')
|
|
comment = COMMENT_CPLUS;
|
|
else {
|
|
fileUngetc(next);
|
|
comment = COMMENT_NONE;
|
|
}
|
|
return comment;
|
|
}
|
|
|
|
/* Skips over a C style comment. According to ANSI specification a comment
|
|
* is treated as white space, so we perform this substitution.
|
|
*/
|
|
int skipOverCComment(void) {
|
|
int c = fileGetc();
|
|
|
|
while (c != EOF) {
|
|
if (c != '*')
|
|
c = fileGetc();
|
|
else {
|
|
const int next = fileGetc();
|
|
|
|
if (next != '/')
|
|
c = next;
|
|
else {
|
|
c = SPACE; /* replace comment with space */
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/* Skips over a C++ style comment.
|
|
*/
|
|
static int skipOverCplusComment(void) {
|
|
int c;
|
|
|
|
while ((c = fileGetc()) != EOF) {
|
|
if (c == BACKSLASH)
|
|
fileGetc(); /* throw away next character, too */
|
|
else if (c == NEWLINE)
|
|
break;
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/* Skips to the end of a string, returning a special character to
|
|
* symbolically represent a generic string.
|
|
*/
|
|
static int skipToEndOfString(boolean ignoreBackslash) {
|
|
int c;
|
|
|
|
while ((c = fileGetc()) != EOF) {
|
|
if (c == BACKSLASH && !ignoreBackslash)
|
|
fileGetc(); /* throw away next character, too */
|
|
else if (c == DOUBLE_QUOTE)
|
|
break;
|
|
}
|
|
return STRING_SYMBOL; /* symbolic representation of string */
|
|
}
|
|
|
|
/* Skips to the end of the three (possibly four) 'c' sequence, returning a
|
|
* special character to symbolically represent a generic character.
|
|
* Also detects Vera numbers that include a base specifier (ie. 'b1010).
|
|
*/
|
|
static int skipToEndOfChar(void) {
|
|
int c;
|
|
int count = 0, veraBase = '\0';
|
|
|
|
while ((c = fileGetc()) != EOF) {
|
|
++count;
|
|
if (c == BACKSLASH)
|
|
fileGetc(); /* throw away next character, too */
|
|
else if (c == SINGLE_QUOTE)
|
|
break;
|
|
else if (c == NEWLINE) {
|
|
fileUngetc(c);
|
|
break;
|
|
} else if (count == 1 && strchr("DHOB", toupper(c)) != NULL)
|
|
veraBase = c;
|
|
else if (veraBase != '\0' && !isalnum(c)) {
|
|
fileUngetc(c);
|
|
break;
|
|
}
|
|
}
|
|
return CHAR_SYMBOL; /* symbolic representation of character */
|
|
}
|
|
|
|
/* This function returns the next character, stripping out comments,
|
|
* C pre-processor directives, and the contents of single and double
|
|
* quoted strings. In short, strip anything which places a burden upon
|
|
* the tokenizer.
|
|
*/
|
|
extern int cppGetc(void) {
|
|
boolean directive = FALSE;
|
|
boolean ignore = FALSE;
|
|
int c;
|
|
|
|
if (Cpp.ungetch != '\0') {
|
|
c = Cpp.ungetch;
|
|
Cpp.ungetch = Cpp.ungetch2;
|
|
Cpp.ungetch2 = '\0';
|
|
return c; /* return here to avoid re-calling debugPutc () */
|
|
} else
|
|
do {
|
|
c = fileGetc();
|
|
process:
|
|
switch (c) {
|
|
case EOF:
|
|
ignore = FALSE;
|
|
directive = FALSE;
|
|
break;
|
|
|
|
case TAB:
|
|
case SPACE:
|
|
break; /* ignore most white space */
|
|
|
|
case NEWLINE:
|
|
if (directive && !ignore) directive = FALSE;
|
|
Cpp.directive.accept = TRUE;
|
|
break;
|
|
|
|
case DOUBLE_QUOTE:
|
|
Cpp.directive.accept = FALSE;
|
|
c = skipToEndOfString(FALSE);
|
|
break;
|
|
|
|
case '#':
|
|
if (Cpp.directive.accept) {
|
|
directive = TRUE;
|
|
Cpp.directive.state = DRCTV_HASH;
|
|
Cpp.directive.accept = FALSE;
|
|
}
|
|
break;
|
|
|
|
case SINGLE_QUOTE:
|
|
Cpp.directive.accept = FALSE;
|
|
c = skipToEndOfChar();
|
|
break;
|
|
|
|
case '/': {
|
|
const Comment comment = isComment();
|
|
|
|
if (comment == COMMENT_C)
|
|
c = skipOverCComment();
|
|
else if (comment == COMMENT_CPLUS) {
|
|
c = skipOverCplusComment();
|
|
if (c == NEWLINE) fileUngetc(c);
|
|
} else
|
|
Cpp.directive.accept = FALSE;
|
|
break;
|
|
}
|
|
|
|
case BACKSLASH: {
|
|
int next = fileGetc();
|
|
|
|
if (next == NEWLINE)
|
|
continue;
|
|
else if (next == '?')
|
|
cppUngetc(next);
|
|
else
|
|
fileUngetc(next);
|
|
break;
|
|
}
|
|
|
|
case '?': {
|
|
int next = fileGetc();
|
|
if (next != '?')
|
|
fileUngetc(next);
|
|
else {
|
|
next = fileGetc();
|
|
switch (next) {
|
|
case '(':
|
|
c = '[';
|
|
break;
|
|
case ')':
|
|
c = ']';
|
|
break;
|
|
case '<':
|
|
c = '{';
|
|
break;
|
|
case '>':
|
|
c = '}';
|
|
break;
|
|
case '/':
|
|
c = BACKSLASH;
|
|
goto process;
|
|
case '!':
|
|
c = '|';
|
|
break;
|
|
case SINGLE_QUOTE:
|
|
c = '^';
|
|
break;
|
|
case '-':
|
|
c = '~';
|
|
break;
|
|
case '=':
|
|
c = '#';
|
|
goto process;
|
|
default:
|
|
fileUngetc(next);
|
|
cppUngetc('?');
|
|
break;
|
|
}
|
|
}
|
|
} break;
|
|
|
|
default:
|
|
if (c == '@' && Cpp.hasAtLiteralStrings) {
|
|
int next = fileGetc();
|
|
if (next == DOUBLE_QUOTE) {
|
|
Cpp.directive.accept = FALSE;
|
|
c = skipToEndOfString(TRUE);
|
|
break;
|
|
}
|
|
}
|
|
Cpp.directive.accept = FALSE;
|
|
if (directive) ignore = handleDirective(c);
|
|
break;
|
|
}
|
|
} while (directive || ignore);
|
|
|
|
DebugStatement(debugPutc(DEBUG_CPP, c);)
|
|
DebugStatement(if (c == NEWLINE) debugPrintf(
|
|
DEBUG_CPP, "%6ld: ", getInputLineNumber() + 1);)
|
|
|
|
return c;
|
|
}
|
|
|
|
/* vi:set tabstop=4 shiftwidth=4: */
|