#if 0 /*─────────────────────────────────────────────────────────────────╗ │ To the extent possible under law, Justine Tunney has waived │ │ all copyright and related or neighboring rights to this file, │ │ as it is written in the following disclaimers: │ │ • http://unlicense.org/ │ │ • http://creativecommons.org/publicdomain/zero/1.0/ │ ╚─────────────────────────────────────────────────────────────────*/ #endif #include "libc/alg/alg.h" #include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/fmt/conv.h" #include "libc/fmt/fmt.h" #include "libc/mem/mem.h" #include "libc/runtime/gc.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/x/x.h" #include "third_party/gdtoa/gdtoa.h" /* ** This file contains all sources (including headers) to the LEMON ** LALR(1) parser generator. The sources have been combined into a ** single file to make it easy to include LEMON in the source tree ** and Makefile of another program. ** ** The author of this program disclaims copyright. */ #define ISSPACE(X) isspace((unsigned char)(X)) #define ISDIGIT(X) isdigit((unsigned char)(X)) #define ISALNUM(X) isalnum((unsigned char)(X)) #define ISALPHA(X) isalpha((unsigned char)(X)) #define ISUPPER(X) isupper((unsigned char)(X)) #define ISLOWER(X) islower((unsigned char)(X)) #define PRIVATE static /* #define PRIVATE */ #ifdef TEST #define MAXRHS 5 /* Set low to exercise exception code */ #else #define MAXRHS 1000 #endif extern void memory_error(); static int showPrecedenceConflict = 0; static char *msort(char *, char **, int (*)(const char *, const char *)); /* ** Compilers are getting increasingly pedantic about type conversions ** as C evolves ever closer to Ada.... To work around the latest problems ** we have to define the following variant of strlen(). */ #define lemonStrlen(X) ((int)strlen(X)) /* ** Compilers are starting to complain about the use of sprintf() and strcpy(), ** saying they are unsafe. So we define our own versions of those routines too. ** ** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and ** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). ** The third is a helper routine for vsnprintf() that adds texts to the end of a ** buffer, making sure the buffer is always zero-terminated. ** ** The string formatter is a minimal subset of stdlib sprintf() supporting only ** a few simply conversions: ** ** %d ** %s ** %.*s ** */ static void lemon_addtext( char *zBuf, /* The buffer to which text is added */ int *pnUsed, /* Slots of the buffer used so far */ const char *zIn, /* Text to add */ int nIn, /* Bytes of text to add. -1 to use strlen() */ int iWidth /* Field width. Negative to left justify */ ) { if (nIn < 0) for (nIn = 0; zIn[nIn]; nIn++) { } while (iWidth > nIn) { zBuf[(*pnUsed)++] = ' '; iWidth--; } if (nIn == 0) return; memcpy(&zBuf[*pnUsed], zIn, nIn); *pnUsed += nIn; while ((-iWidth) > nIn) { zBuf[(*pnUsed)++] = ' '; iWidth++; } zBuf[*pnUsed] = 0; } static int lemon_vsprintf(char *str, const char *zFormat, va_list ap) { int i, j, k, c; int nUsed = 0; const char *z; char zTemp[50]; str[0] = 0; for (i = j = 0; (c = zFormat[i]) != 0; i++) { if (c == '%') { int iWidth = 0; lemon_addtext(str, &nUsed, &zFormat[j], i - j, 0); c = zFormat[++i]; if (ISDIGIT(c) || (c == '-' && ISDIGIT(zFormat[i + 1]))) { if (c == '-') i++; while (ISDIGIT(zFormat[i])) iWidth = iWidth * 10 + zFormat[i++] - '0'; if (c == '-') iWidth = -iWidth; c = zFormat[i]; } if (c == 'd') { int v = va_arg(ap, int); if (v < 0) { lemon_addtext(str, &nUsed, "-", 1, iWidth); v = -v; } else if (v == 0) { lemon_addtext(str, &nUsed, "0", 1, iWidth); } k = 0; while (v > 0) { k++; zTemp[sizeof(zTemp) - k] = (v % 10) + '0'; v /= 10; } lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp) - k], k, iWidth); } else if (c == 's') { z = va_arg(ap, const char *); lemon_addtext(str, &nUsed, z, -1, iWidth); } else if (c == '.' && memcmp(&zFormat[i], ".*s", 3) == 0) { i += 2; k = va_arg(ap, int); z = va_arg(ap, const char *); lemon_addtext(str, &nUsed, z, k, iWidth); } else if (c == '%') { lemon_addtext(str, &nUsed, "%", 1, 0); } else { fprintf(stderr, "illegal format\n"); exit(1); } j = i + 1; } } lemon_addtext(str, &nUsed, &zFormat[j], i - j, 0); return nUsed; } static int lemon_sprintf(char *str, const char *format, ...) { va_list ap; int rc; va_start(ap, format); rc = lemon_vsprintf(str, format, ap); va_end(ap); return rc; } static void lemon_strcpy(char *dest, const char *src) { while ((*(dest++) = *(src++)) != 0) { } } static void lemon_strcat(char *dest, const char *src) { while (*dest) dest++; lemon_strcpy(dest, src); } /* a few forward declarations... */ struct rule; struct lemon; struct action; static struct action *Action_new(void); static struct action *Action_sort(struct action *); /********** From the file "build.h" ************************************/ void FindRulePrecedences(struct lemon *); void FindFirstSets(struct lemon *); void FindStates(struct lemon *); void FindLinks(struct lemon *); void FindFollowSets(struct lemon *); void FindActions(struct lemon *); /********* From the file "configlist.h" *********************************/ void Configlist_init(void); struct config *Configlist_add(struct rule *, int); struct config *Configlist_addbasis(struct rule *, int); void Configlist_closure(struct lemon *); void Configlist_sort(void); void Configlist_sortbasis(void); struct config *Configlist_return(void); struct config *Configlist_basis(void); void Configlist_eat(struct config *); void Configlist_reset(void); /********* From the file "error.h" ***************************************/ void ErrorMsg(const char *, int, const char *, ...); /****** From the file "option.h" ******************************************/ enum option_type { OPT_FLAG = 1, OPT_INT, OPT_DBL, OPT_STR, OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR }; struct s_options { enum option_type type; const char *label; char *arg; const char *message; }; int OptInit(char **, struct s_options *, FILE *); int OptNArgs(void); char *OptArg(int); void OptErr(int); void OptPrint(void); /******** From the file "parse.h" *****************************************/ void Parse(struct lemon *lemp); /********* From the file "plink.h" ***************************************/ struct plink *Plink_new(void); void Plink_add(struct plink **, struct config *); void Plink_copy(struct plink **, struct plink *); void Plink_delete(struct plink *); /********** From the file "report.h" *************************************/ void Reprint(struct lemon *); void ReportOutput(struct lemon *); void ReportTable(struct lemon *, int, int); void ReportHeader(struct lemon *); void CompressTables(struct lemon *); void ResortStates(struct lemon *); /********** From the file "set.h" ****************************************/ void SetSize(int); /* All sets will be of size N */ char *SetNew(void); /* A new set for element 0..N */ void SetFree(char *); /* Deallocate a set */ int SetAdd(char *, int); /* Add element to a set */ int SetUnion(char *, char *); /* A <- A U B, thru element N */ #define SetFind(X, Y) (X[Y]) /* True if Y is in set X */ /********** From the file "struct.h" *************************************/ /* ** Principal data structures for the LEMON parser generator. */ typedef enum { LEMON_FALSE = 0, LEMON_TRUE } Boolean; /* Symbols (terminals and nonterminals) of the grammar are stored ** in the following: */ enum symbol_type { TERMINAL, NONTERMINAL, MULTITERMINAL }; enum e_assoc { LEFT, RIGHT, NONE, UNK }; struct symbol { const char *name; /* Name of the symbol */ int index; /* Index number for this symbol */ enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ struct rule *rule; /* Linked list of rules of this (if an NT) */ struct symbol *fallback; /* fallback token in case this token doesn't parse */ int prec; /* Precedence if defined (-1 otherwise) */ enum e_assoc assoc; /* Associativity if precedence is defined */ char *firstset; /* First-set for all rules of this symbol */ Boolean lambda; /* True if NT and can generate an empty string */ int useCnt; /* Number of times used */ char *destructor; /* Code which executes whenever this symbol is ** popped from the stack during error processing */ int destLineno; /* Line number for start of destructor. Set to ** -1 for duplicate destructors. */ char *datatype; /* The data type of information held by this ** object. Only used if type==NONTERMINAL */ int dtnum; /* The data type number. In the parser, the value ** stack is a union. The .yy%d element of this ** union is the correct data type for this object */ int bContent; /* True if this symbol ever carries content - if ** it is ever more than just syntax */ /* The following fields are used by MULTITERMINALs only */ int nsubsym; /* Number of constituent symbols in the MULTI */ struct symbol **subsym; /* Array of constituent symbols */ }; /* Each production rule in the grammar is stored in the following ** structure. */ struct rule { struct symbol *lhs; /* Left-hand side of the rule */ const char *lhsalias; /* Alias for the LHS (NULL if none) */ int lhsStart; /* True if left-hand side is the start symbol */ int ruleline; /* Line number for the rule */ int nrhs; /* Number of RHS symbols */ struct symbol **rhs; /* The RHS symbols */ const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ int line; /* Line number at which code begins */ const char *code; /* The code executed when this rule is reduced */ const char *codePrefix; /* Setup code before code[] above */ const char *codeSuffix; /* Breakdown code after code[] above */ struct symbol *precsym; /* Precedence symbol for this rule */ int index; /* An index number for this rule */ int iRule; /* Rule number as used in the generated tables */ Boolean noCode; /* True if this rule has no associated C code */ Boolean codeEmitted; /* True if the code has been emitted already */ Boolean canReduce; /* True if this rule is ever reduced */ Boolean doesReduce; /* Reduce actions occur after optimization */ Boolean neverReduce; /* Reduce is theoretically possible, but prevented ** by actions or other outside implementation */ struct rule *nextlhs; /* Next rule with the same LHS */ struct rule *next; /* Next rule in the global list */ }; /* A configuration is a production rule of the grammar together with ** a mark (dot) showing how much of that rule has been processed so far. ** Configurations also contain a follow-set which is a list of terminal ** symbols which are allowed to immediately follow the end of the rule. ** Every configuration is recorded as an instance of the following: */ enum cfgstatus { COMPLETE, INCOMPLETE }; struct config { struct rule *rp; /* The rule upon which the configuration is based */ int dot; /* The parse point */ char *fws; /* Follow-set for this configuration only */ struct plink *fplp; /* Follow-set forward propagation links */ struct plink *bplp; /* Follow-set backwards propagation links */ struct state *stp; /* Pointer to state which contains this */ enum cfgstatus status; /* used during followset and shift computations */ struct config *next; /* Next configuration in the state */ struct config *bp; /* The next basis configuration */ }; enum e_action { SHIFT, ACCEPT, REDUCE, ERROR, SSCONFLICT, /* A shift/shift conflict */ SRCONFLICT, /* Was a reduce, but part of a conflict */ RRCONFLICT, /* Was a reduce, but part of a conflict */ SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ NOT_USED, /* Deleted by compression */ SHIFTREDUCE /* Shift first, then reduce */ }; /* Every shift or reduce operation is stored as one of the following */ struct action { struct symbol *sp; /* The look-ahead symbol */ enum e_action type; union { struct state *stp; /* The new state, if a shift */ struct rule *rp; /* The rule, if a reduce */ } x; struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ struct action *next; /* Next action for this state */ struct action *collide; /* Next action with the same hash */ }; /* Each state of the generated parser's finite state machine ** is encoded as an instance of the following structure. */ struct state { struct config *bp; /* The basis configurations for this state */ struct config *cfp; /* All configurations in this set */ int statenum; /* Sequential number for this state */ struct action *ap; /* List of actions for this state */ int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ int iDfltReduce; /* Default action is to REDUCE by this rule */ struct rule *pDfltReduce; /* The default REDUCE rule. */ int autoReduce; /* True if this is an auto-reduce state */ }; #define NO_OFFSET (-2147483647) /* A followset propagation link indicates that the contents of one ** configuration followset should be propagated to another whenever ** the first changes. */ struct plink { struct config *cfp; /* The configuration to which linked */ struct plink *next; /* The next propagate link */ }; /* The state vector for the entire parser generator is recorded as ** follows. (LEMON uses no global variables and makes little use of ** static variables. Fields in the following structure can be thought ** of as begin global variables in the program.) */ struct lemon { struct state **sorted; /* Table of states sorted by state number */ struct rule *rule; /* List of all rules */ struct rule *startRule; /* First rule */ int nstate; /* Number of states */ int nxstate; /* nstate with tail degenerate states removed */ int nrule; /* Number of rules */ int nruleWithAction; /* Number of rules with actions */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ int minShiftReduce; /* Minimum shift-reduce action value */ int errAction; /* Error action value */ int accAction; /* Accept action value */ int noAction; /* No-op action value */ int minReduce; /* Minimum reduce action */ int maxAction; /* Maximum action value of any kind */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ struct symbol *errsym; /* The error symbol */ struct symbol *wildcard; /* Token that matches anything */ char *name; /* Name of the generated parser */ char *arg; /* Declaration of the 3th argument to parser */ char *ctx; /* Declaration of 2nd argument to constructor */ char *tokentype; /* Type of terminal symbols in the parser stack */ char *vartype; /* The default type of non-terminal symbols */ char *start; /* Name of the start symbol for the grammar */ char *stacksize; /* Size of the parser stack */ char *include; /* Code to put at the start of the C file */ char *error; /* Code to execute when an error is seen */ char *overflow; /* Code to execute on a stack overflow */ char *failure; /* Code to execute on parser failure */ char *accept; /* Code to execute when the parser excepts */ char *extracode; /* Code appended to the generated file */ char *tokendest; /* Code to execute to destroy token data */ char *vardest; /* Code for the default non-terminal destructor */ char *filename; /* Name of the input file */ char *outname; /* Name of the current output file */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ int nlookaheadtab; /* Number of entries in yy_lookahead[] */ int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int printPreprocessed; /* Show preprocessor output on stdout */ int has_fallback; /* True if any %fallback is seen in the grammar */ int nolinenosflag; /* True if #line statements should not be printed */ char *argv0; /* Name of the program */ }; #define MemoryCheck(X) \ if ((X) == 0) { \ memory_error(); \ } /**************** From the file "table.h" *********************************/ /* ** All code in this file has been automatically generated ** from a specification in the file ** "table.q" ** by the associative array code building program "aagen". ** Do not edit this file! Instead, edit the specification ** file, then rerun aagen. */ /* ** Code for processing tables in the LEMON parser generator. */ /* Routines for handling a strings */ const char *Strsafe(const char *); void Strsafe_init(void); int Strsafe_insert(const char *); const char *Strsafe_find(const char *); /* Routines for handling symbols of the grammar */ struct symbol *Symbol_new(const char *); int Symbolcmpp(const void *, const void *); void Symbol_init(void); int Symbol_insert(struct symbol *, const char *); struct symbol *Symbol_find(const char *); struct symbol *Symbol_Nth(int); int Symbol_count(void); struct symbol **Symbol_arrayof(void); /* Routines to manage the state table */ int Configcmp(const char *, const char *); struct state *State_new(void); void State_init(void); int State_insert(struct state *, struct config *); struct state *State_find(struct config *); struct state **State_arrayof(void); /* Routines used for efficiency in Configlist_add */ void Configtable_init(void); int Configtable_insert(struct config *); struct config *Configtable_find(struct config *); void Configtable_clear(int (*)(struct config *)); /****************** From the file "action.c" *******************************/ /* ** Routines processing parser actions in the LEMON parser generator. */ /* Allocate a new parser action */ static struct action *Action_new(void) { static struct action *actionfreelist = 0; struct action *newaction; if (actionfreelist == 0) { int i; int amt = 100; actionfreelist = (struct action *)calloc(amt, sizeof(struct action)); if (actionfreelist == 0) { fprintf(stderr, "Unable to allocate memory for a new parser action."); exit(1); } for (i = 0; i < amt - 1; i++) actionfreelist[i].next = &actionfreelist[i + 1]; actionfreelist[amt - 1].next = 0; } newaction = actionfreelist; actionfreelist = actionfreelist->next; return newaction; } /* Compare two actions for sorting purposes. Return negative, zero, or ** positive if the first action is less than, equal to, or greater than ** the first */ static int actioncmp(struct action *ap1, struct action *ap2) { int rc; rc = ap1->sp->index - ap2->sp->index; if (rc == 0) { rc = (int)ap1->type - (int)ap2->type; } if (rc == 0 && (ap1->type == REDUCE || ap1->type == SHIFTREDUCE)) { rc = ap1->x.rp->index - ap2->x.rp->index; } if (rc == 0) { rc = (int)(ap2 - ap1); } return rc; } /* Sort parser actions */ static struct action *Action_sort(struct action *ap) { ap = (struct action *)msort((char *)ap, (char **)&ap->next, (int (*)(const char *, const char *))actioncmp); return ap; } void Action_add(struct action **app, enum e_action type, struct symbol *sp, char *arg) { struct action *newaction; newaction = Action_new(); newaction->next = *app; *app = newaction; newaction->type = type; newaction->sp = sp; newaction->spOpt = 0; if (type == SHIFT) { newaction->x.stp = (struct state *)arg; } else { newaction->x.rp = (struct rule *)arg; } } /********************** New code to implement the "acttab" module ***********/ /* ** This module implements routines use to construct the yy_action[] table. */ /* ** The state of the yy_action table under construction is an instance of ** the following structure. ** ** The yy_action table maps the pair (state_number, lookahead) into an ** action_number. The table is an array of integers pairs. The state_number ** determines an initial offset into the yy_action array. The lookahead ** value is then added to this initial offset to get an index X into the ** yy_action array. If the aAction[X].lookahead equals the value of the ** of the lookahead input, then the value of the action_number output is ** aAction[X].action. If the lookaheads do not match then the ** default action for the state_number is returned. ** ** All actions associated with a single state_number are first entered ** into aLookahead[] using multiple calls to acttab_action(). Then the ** actions for that single state_number are placed into the aAction[] ** array with a single call to acttab_insert(). The acttab_insert() call ** also resets the aLookahead[] array in preparation for the next ** state number. */ struct lookahead_action { int lookahead; /* Value of the lookahead token */ int action; /* Action to take on the given lookahead */ }; typedef struct acttab acttab; struct acttab { int nAction; /* Number of used slots in aAction[] */ int nActionAlloc; /* Slots allocated for aAction[] */ struct lookahead_action *aAction, /* The yy_action[] table under construction */ *aLookahead; /* A single new transaction set */ int mnLookahead; /* Minimum aLookahead[].lookahead */ int mnAction; /* Action associated with mnLookahead */ int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ int nterminal; /* Number of terminal symbols */ int nsymbol; /* total number of symbols */ }; /* Return the number of entries in the yy_action table */ #define acttab_lookahead_size(X) ((X)->nAction) /* The value for the N-th entry in yy_action */ #define acttab_yyaction(X, N) ((X)->aAction[N].action) /* The value for the N-th entry in yy_lookahead */ #define acttab_yylookahead(X, N) ((X)->aAction[N].lookahead) /* Free all memory associated with the given acttab */ void acttab_free(acttab *p) { free(p->aAction); free(p->aLookahead); free(p); } /* Allocate a new acttab structure */ acttab *acttab_alloc(int nsymbol, int nterminal) { acttab *p = (acttab *)calloc(1, sizeof(*p)); if (p == 0) { fprintf(stderr, "Unable to allocate memory for a new acttab."); exit(1); } memset(p, 0, sizeof(*p)); p->nsymbol = nsymbol; p->nterminal = nterminal; return p; } /* Add a new action to the current transaction set. ** ** This routine is called once for each lookahead for a particular ** state. */ void acttab_action(acttab *p, int lookahead, int action) { if (p->nLookahead >= p->nLookaheadAlloc) { p->nLookaheadAlloc += 25; p->aLookahead = (struct lookahead_action *)realloc( p->aLookahead, sizeof(p->aLookahead[0]) * p->nLookaheadAlloc); if (p->aLookahead == 0) { fprintf(stderr, "malloc failed\n"); exit(1); } } if (p->nLookahead == 0) { p->mxLookahead = lookahead; p->mnLookahead = lookahead; p->mnAction = action; } else { if (p->mxLookahead < lookahead) p->mxLookahead = lookahead; if (p->mnLookahead > lookahead) { p->mnLookahead = lookahead; p->mnAction = action; } } p->aLookahead[p->nLookahead].lookahead = lookahead; p->aLookahead[p->nLookahead].action = action; p->nLookahead++; } /* ** Add the transaction set built up with prior calls to acttab_action() ** into the current action table. Then reset the transaction set back ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. ** ** If the makeItSafe parameter is true, then the offset is chosen so that ** it is impossible to overread the yy_lookaside[] table regardless of ** the lookaside token. This is done for the terminal symbols, as they ** come from external inputs and can contain syntax errors. When makeItSafe ** is false, there is more flexibility in selecting offsets, resulting in ** a smaller table. For non-terminal symbols, which are never syntax errors, ** makeItSafe can be false. */ int acttab_insert(acttab *p, int makeItSafe) { int i, j, k, n, end; assert(p->nLookahead > 0); /* Make sure we have enough space to hold the expanded action table ** in the worst case. The worst case occurs if the transaction set ** must be appended to the current action table */ n = p->nsymbol + 1; if (p->nAction + n >= p->nActionAlloc) { int oldAlloc = p->nActionAlloc; p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; p->aAction = (struct lookahead_action *)realloc( p->aAction, sizeof(p->aAction[0]) * p->nActionAlloc); if (p->aAction == 0) { fprintf(stderr, "malloc failed\n"); exit(1); } for (i = oldAlloc; i < p->nActionAlloc; i++) { p->aAction[i].lookahead = -1; p->aAction[i].action = -1; } } /* Scan the existing action table looking for an offset that is a ** duplicate of the current transaction set. Fall out of the loop ** if and when the duplicate is found. ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ end = makeItSafe ? p->mnLookahead : 0; for (i = p->nAction - 1; i >= end; i--) { if (p->aAction[i].lookahead == p->mnLookahead) { /* All lookaheads and actions in the aLookahead[] transaction ** must match against the candidate aAction[i] entry. */ if (p->aAction[i].action != p->mnAction) continue; for (j = 0; j < p->nLookahead; j++) { k = p->aLookahead[j].lookahead - p->mnLookahead + i; if (k < 0 || k >= p->nAction) break; if (p->aLookahead[j].lookahead != p->aAction[k].lookahead) break; if (p->aLookahead[j].action != p->aAction[k].action) break; } if (j < p->nLookahead) continue; /* No possible lookahead value that is not in the aLookahead[] ** transaction is allowed to match aAction[i] */ n = 0; for (j = 0; j < p->nAction; j++) { if (p->aAction[j].lookahead < 0) continue; if (p->aAction[j].lookahead == j + p->mnLookahead - i) n++; } if (n == p->nLookahead) { break; /* An exact match is found at offset i */ } } } /* If no existing offsets exactly match the current transaction, find an ** an empty offset in the aAction[] table in which we can add the ** aLookahead[] transaction. */ if (i < end) { /* Look for holes in the aAction[] table that fit the current ** aLookahead[] transaction. Leave i set to the offset of the hole. ** If no holes are found, i is left at p->nAction, which means the ** transaction will be appended. */ i = makeItSafe ? p->mnLookahead : 0; for (; i < p->nActionAlloc - p->mxLookahead; i++) { if (p->aAction[i].lookahead < 0) { for (j = 0; j < p->nLookahead; j++) { k = p->aLookahead[j].lookahead - p->mnLookahead + i; if (k < 0) break; if (p->aAction[k].lookahead >= 0) break; } if (j < p->nLookahead) continue; for (j = 0; j < p->nAction; j++) { if (p->aAction[j].lookahead == j + p->mnLookahead - i) break; } if (j == p->nAction) { break; /* Fits in empty slots */ } } } } /* Insert transaction set at index i. */ #if 0 printf("Acttab:"); for(j=0; jnLookahead; j++){ printf(" %d", p->aLookahead[j].lookahead); } printf(" inserted at %d\n", i); #endif for (j = 0; j < p->nLookahead; j++) { k = p->aLookahead[j].lookahead - p->mnLookahead + i; p->aAction[k] = p->aLookahead[j]; if (k >= p->nAction) p->nAction = k + 1; } if (makeItSafe && i + p->nterminal >= p->nAction) p->nAction = i + p->nterminal + 1; p->nLookahead = 0; /* Return the offset that is added to the lookahead in order to get the ** index into yy_action of the action */ return i - p->mnLookahead; } /* ** Return the size of the action table without the trailing syntax error ** entries. */ int acttab_action_size(acttab *p) { int n = p->nAction; while (n > 0 && p->aAction[n - 1].lookahead < 0) { n--; } return n; } /********************** From the file "build.c" *****************************/ /* ** Routines to construction the finite state machine for the LEMON ** parser generator. */ /* Find a precedence symbol of every rule in the grammar. ** ** Those rules which have a precedence symbol coded in the input ** grammar using the "[symbol]" construct will already have the ** rp->precsym field filled. Other rules take as their precedence ** symbol the first RHS symbol with a defined precedence. If there ** are not RHS symbols with a defined precedence, the precedence ** symbol field is left blank. */ void FindRulePrecedences(struct lemon *xp) { struct rule *rp; for (rp = xp->rule; rp; rp = rp->next) { if (rp->precsym == 0) { int i, j; for (i = 0; i < rp->nrhs && rp->precsym == 0; i++) { struct symbol *sp = rp->rhs[i]; if (sp->type == MULTITERMINAL) { for (j = 0; j < sp->nsubsym; j++) { if (sp->subsym[j]->prec >= 0) { rp->precsym = sp->subsym[j]; break; } } } else if (sp->prec >= 0) { rp->precsym = rp->rhs[i]; } } } } return; } /* Find all nonterminals which will generate the empty string. ** Then go back and compute the first sets of every nonterminal. ** The first set is the set of all terminal symbols which can begin ** a string generated by that nonterminal. */ void FindFirstSets(struct lemon *lemp) { int i, j; struct rule *rp; int progress; for (i = 0; i < lemp->nsymbol; i++) { lemp->symbols[i]->lambda = LEMON_FALSE; } for (i = lemp->nterminal; i < lemp->nsymbol; i++) { lemp->symbols[i]->firstset = SetNew(); } /* First compute all lambdas */ do { progress = 0; for (rp = lemp->rule; rp; rp = rp->next) { if (rp->lhs->lambda) continue; for (i = 0; i < rp->nrhs; i++) { struct symbol *sp = rp->rhs[i]; assert(sp->type == NONTERMINAL || sp->lambda == LEMON_FALSE); if (sp->lambda == LEMON_FALSE) break; } if (i == rp->nrhs) { rp->lhs->lambda = LEMON_TRUE; progress = 1; } } } while (progress); /* Now compute all first sets */ do { struct symbol *s1, *s2; progress = 0; for (rp = lemp->rule; rp; rp = rp->next) { s1 = rp->lhs; for (i = 0; i < rp->nrhs; i++) { s2 = rp->rhs[i]; if (s2->type == TERMINAL) { progress += SetAdd(s1->firstset, s2->index); break; } else if (s2->type == MULTITERMINAL) { for (j = 0; j < s2->nsubsym; j++) { progress += SetAdd(s1->firstset, s2->subsym[j]->index); } break; } else if (s1 == s2) { if (s1->lambda == LEMON_FALSE) break; } else { progress += SetUnion(s1->firstset, s2->firstset); if (s2->lambda == LEMON_FALSE) break; } } } } while (progress); return; } /* Compute all LR(0) states for the grammar. Links ** are added to between some states so that the LR(1) follow sets ** can be computed later. */ PRIVATE struct state *getstate(struct lemon *); /* forward reference */ void FindStates(struct lemon *lemp) { struct symbol *sp; struct rule *rp; Configlist_init(); /* Find the start symbol */ if (lemp->start) { sp = Symbol_find(lemp->start); if (sp == 0) { ErrorMsg( lemp->filename, 0, "The specified start symbol \"%s\" is not " "in a nonterminal of the grammar. \"%s\" will be used as the start " "symbol instead.", lemp->start, lemp->startRule->lhs->name); lemp->errorcnt++; sp = lemp->startRule->lhs; } } else { sp = lemp->startRule->lhs; } /* Make sure the start symbol doesn't occur on the right-hand side of ** any rule. Report an error if it does. (YACC would generate a new ** start symbol in this case.) */ for (rp = lemp->rule; rp; rp = rp->next) { int i; for (i = 0; i < rp->nrhs; i++) { if (rp->rhs[i] == sp) { /* FIX ME: Deal with multiterminals */ ErrorMsg( lemp->filename, 0, "The start symbol \"%s\" occurs on the " "right-hand side of a rule. This will result in a parser which " "does not work properly.", sp->name); lemp->errorcnt++; } } } /* The basis configuration set for the first state ** is all rules which have the start symbol as their ** left-hand side */ for (rp = sp->rule; rp; rp = rp->nextlhs) { struct config *newcfp; rp->lhsStart = 1; newcfp = Configlist_addbasis(rp, 0); SetAdd(newcfp->fws, 0); } /* Compute the first state. All other states will be ** computed automatically during the computation of the first one. ** The returned pointer to the first state is not used. */ (void)getstate(lemp); return; } /* Return a pointer to a state which is described by the configuration ** list which has been built from calls to Configlist_add. */ PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ PRIVATE struct state *getstate(struct lemon *lemp) { struct config *cfp, *bp; struct state *stp; /* Extract the sorted basis of the new state. The basis was constructed ** by prior calls to "Configlist_addbasis()". */ Configlist_sortbasis(); bp = Configlist_basis(); /* Get a state with the same basis */ stp = State_find(bp); if (stp) { /* A state with the same basis already exists! Copy all the follow-set ** propagation links from the state under construction into the ** preexisting state, then return a pointer to the preexisting state */ struct config *x, *y; for (x = bp, y = stp->bp; x && y; x = x->bp, y = y->bp) { Plink_copy(&y->bplp, x->bplp); Plink_delete(x->fplp); x->fplp = x->bplp = 0; } cfp = Configlist_return(); Configlist_eat(cfp); } else { /* This really is a new state. Construct all the details */ Configlist_closure(lemp); /* Compute the configuration closure */ Configlist_sort(); /* Sort the configuration closure */ cfp = Configlist_return(); /* Get a pointer to the config list */ stp = State_new(); /* A new state structure */ MemoryCheck(stp); stp->bp = bp; /* Remember the configuration basis */ stp->cfp = cfp; /* Remember the configuration closure */ stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ stp->ap = 0; /* No actions, yet. */ State_insert(stp, stp->bp); /* Add to the state table */ buildshifts(lemp, stp); /* Recursively compute successor states */ } return stp; } /* ** Return true if two symbols are the same. */ int same_symbol(struct symbol *a, struct symbol *b) { int i; if (a == b) return 1; if (a->type != MULTITERMINAL) return 0; if (b->type != MULTITERMINAL) return 0; if (a->nsubsym != b->nsubsym) return 0; for (i = 0; i < a->nsubsym; i++) { if (a->subsym[i] != b->subsym[i]) return 0; } return 1; } /* Construct all successor states to the given state. A "successor" ** state is any state which can be reached by a shift action. */ PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) { struct config *cfp; /* For looping thru the config closure of "stp" */ struct config *bcfp; /* For the inner loop on config closure of "stp" */ struct config *newcfg; /* */ struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ struct state *newstp; /* A pointer to a successor state */ /* Each configuration becomes complete after it contibutes to a successor ** state. Initially, all configurations are incomplete */ for (cfp = stp->cfp; cfp; cfp = cfp->next) cfp->status = INCOMPLETE; /* Loop through all configurations of the state "stp" */ for (cfp = stp->cfp; cfp; cfp = cfp->next) { if (cfp->status == COMPLETE) continue; /* Already used by inner loop */ if (cfp->dot >= cfp->rp->nrhs) continue; /* Can't shift this config */ Configlist_reset(); /* Reset the new config set */ sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ /* For every configuration in the state "stp" which has the symbol "sp" ** following its dot, add the same configuration to the basis set under ** construction but with the dot shifted one symbol to the right. */ for (bcfp = cfp; bcfp; bcfp = bcfp->next) { if (bcfp->status == COMPLETE) continue; /* Already used */ if (bcfp->dot >= bcfp->rp->nrhs) continue; /* Can't shift this one */ bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ if (!same_symbol(bsp, sp)) continue; /* Must be same as for "cfp" */ bcfp->status = COMPLETE; /* Mark this config as used */ newcfg = Configlist_addbasis(bcfp->rp, bcfp->dot + 1); Plink_add(&newcfg->bplp, bcfp); } /* Get a pointer to the state described by the basis configuration set ** constructed in the preceding loop */ newstp = getstate(lemp); /* The state "newstp" is reached from the state "stp" by a shift action ** on the symbol "sp" */ if (sp->type == MULTITERMINAL) { int i; for (i = 0; i < sp->nsubsym; i++) { Action_add(&stp->ap, SHIFT, sp->subsym[i], (char *)newstp); } } else { Action_add(&stp->ap, SHIFT, sp, (char *)newstp); } } } /* ** Construct the propagation links */ void FindLinks(struct lemon *lemp) { int i; struct config *cfp, *other; struct state *stp; struct plink *plp; /* Housekeeping detail: ** Add to every propagate link a pointer back to the state to ** which the link is attached. */ for (i = 0; i < lemp->nstate; i++) { stp = lemp->sorted[i]; for (cfp = stp->cfp; cfp; cfp = cfp->next) { cfp->stp = stp; } } /* Convert all backlinks into forward links. Only the forward ** links are used in the follow-set computation. */ for (i = 0; i < lemp->nstate; i++) { stp = lemp->sorted[i]; for (cfp = stp->cfp; cfp; cfp = cfp->next) { for (plp = cfp->bplp; plp; plp = plp->next) { other = plp->cfp; Plink_add(&other->fplp, cfp); } } } } /* Compute all followsets. ** ** A followset is the set of all symbols which can come immediately ** after a configuration. */ void FindFollowSets(struct lemon *lemp) { int i; struct config *cfp; struct plink *plp; int progress; int change; for (i = 0; i < lemp->nstate; i++) { for (cfp = lemp->sorted[i]->cfp; cfp; cfp = cfp->next) { cfp->status = INCOMPLETE; } } do { progress = 0; for (i = 0; i < lemp->nstate; i++) { for (cfp = lemp->sorted[i]->cfp; cfp; cfp = cfp->next) { if (cfp->status == COMPLETE) continue; for (plp = cfp->fplp; plp; plp = plp->next) { change = SetUnion(plp->cfp->fws, cfp->fws); if (change) { plp->cfp->status = INCOMPLETE; progress = 1; } } cfp->status = COMPLETE; } } } while (progress); } static int resolve_conflict(struct action *, struct action *); /* Compute the reduce actions, and resolve conflicts. */ void FindActions(struct lemon *lemp) { int i, j; struct config *cfp; struct state *stp; struct symbol *sp; struct rule *rp; /* Add all of the reduce actions ** A reduce action is added for each element of the followset of ** a configuration which has its dot at the extreme right. */ for (i = 0; i < lemp->nstate; i++) { /* Loop over all states */ stp = lemp->sorted[i]; for (cfp = stp->cfp; cfp; cfp = cfp->next) { /* Loop over all configurations */ if (cfp->rp->nrhs == cfp->dot) { /* Is dot at extreme right? */ for (j = 0; j < lemp->nterminal; j++) { if (SetFind(cfp->fws, j)) { /* Add a reduce action to the state "stp" which will reduce by the ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ Action_add(&stp->ap, REDUCE, lemp->symbols[j], (char *)cfp->rp); } } } } } /* Add the accepting token */ if (lemp->start) { sp = Symbol_find(lemp->start); if (sp == 0) sp = lemp->startRule->lhs; } else { sp = lemp->startRule->lhs; } /* Add to the first state (which is always the starting state of the ** finite state machine) an action to ACCEPT if the lookahead is the ** start nonterminal. */ Action_add(&lemp->sorted[0]->ap, ACCEPT, sp, 0); /* Resolve conflicts */ for (i = 0; i < lemp->nstate; i++) { struct action *ap, *nap; stp = lemp->sorted[i]; /* assert( stp->ap ); */ stp->ap = Action_sort(stp->ap); for (ap = stp->ap; ap && ap->next; ap = ap->next) { for (nap = ap->next; nap && nap->sp == ap->sp; nap = nap->next) { /* The two actions "ap" and "nap" have the same lookahead. ** Figure out which one should be used */ lemp->nconflict += resolve_conflict(ap, nap); } } } /* Report an error for each rule that can never be reduced. */ for (rp = lemp->rule; rp; rp = rp->next) rp->canReduce = LEMON_FALSE; for (i = 0; i < lemp->nstate; i++) { struct action *ap; for (ap = lemp->sorted[i]->ap; ap; ap = ap->next) { if (ap->type == REDUCE) ap->x.rp->canReduce = LEMON_TRUE; } } for (rp = lemp->rule; rp; rp = rp->next) { if (rp->canReduce) continue; ErrorMsg(lemp->filename, rp->ruleline, "This rule can not be reduced.\n"); lemp->errorcnt++; } } /* Resolve a conflict between the two given actions. If the ** conflict can't be resolved, return non-zero. ** ** NO LONGER TRUE: ** To resolve a conflict, first look to see if either action ** is on an error rule. In that case, take the action which ** is not associated with the error rule. If neither or both ** actions are associated with an error rule, then try to ** use precedence to resolve the conflict. ** ** If either action is a SHIFT, then it must be apx. This ** function won't work if apx->type==REDUCE and apy->type==SHIFT. */ static int resolve_conflict(struct action *apx, struct action *apy) { struct symbol *spx, *spy; int errcnt = 0; assert(apx->sp == apy->sp); /* Otherwise there would be no conflict */ if (apx->type == SHIFT && apy->type == SHIFT) { apy->type = SSCONFLICT; errcnt++; } if (apx->type == SHIFT && apy->type == REDUCE) { spx = apx->sp; spy = apy->x.rp->precsym; if (spy == 0 || spx->prec < 0 || spy->prec < 0) { /* Not enough precedence information. */ apy->type = SRCONFLICT; errcnt++; } else if (spx->prec > spy->prec) { /* higher precedence wins */ apy->type = RD_RESOLVED; } else if (spx->prec < spy->prec) { apx->type = SH_RESOLVED; } else if (spx->prec == spy->prec && spx->assoc == RIGHT) { /* Use operator */ apy->type = RD_RESOLVED; /* associativity */ } else if (spx->prec == spy->prec && spx->assoc == LEFT) { /* to break tie */ apx->type = SH_RESOLVED; } else { assert(spx->prec == spy->prec && spx->assoc == NONE); apx->type = ERROR; } } else if (apx->type == REDUCE && apy->type == REDUCE) { spx = apx->x.rp->precsym; spy = apy->x.rp->precsym; if (spx == 0 || spy == 0 || spx->prec < 0 || spy->prec < 0 || spx->prec == spy->prec) { apy->type = RRCONFLICT; errcnt++; } else if (spx->prec > spy->prec) { apy->type = RD_RESOLVED; } else if (spx->prec < spy->prec) { apx->type = RD_RESOLVED; } } else { assert(apx->type == SH_RESOLVED || apx->type == RD_RESOLVED || apx->type == SSCONFLICT || apx->type == SRCONFLICT || apx->type == RRCONFLICT || apy->type == SH_RESOLVED || apy->type == RD_RESOLVED || apy->type == SSCONFLICT || apy->type == SRCONFLICT || apy->type == RRCONFLICT); /* The REDUCE/SHIFT case cannot happen because SHIFTs come before ** REDUCEs on the list. If we reach this point it must be because ** the parser conflict had already been resolved. */ } return errcnt; } /********************* From the file "configlist.c" *************************/ /* ** Routines to processing a configuration list and building a state ** in the LEMON parser generator. */ static struct config *freelist = 0; /* List of free configurations */ static struct config *current = 0; /* Top of list of configurations */ static struct config **currentend = 0; /* Last on list of configs */ static struct config *basis = 0; /* Top of list of basis configs */ static struct config **basisend = 0; /* End of list of basis configs */ /* Return a pointer to a new configuration */ PRIVATE struct config *newconfig(void) { struct config *newcfg; if (freelist == 0) { int i; int amt = 3; freelist = (struct config *)calloc(amt, sizeof(struct config)); if (freelist == 0) { fprintf(stderr, "Unable to allocate memory for a new configuration."); exit(1); } for (i = 0; i < amt - 1; i++) freelist[i].next = &freelist[i + 1]; freelist[amt - 1].next = 0; } newcfg = freelist; freelist = freelist->next; return newcfg; } /* The configuration "old" is no longer used */ PRIVATE void deleteconfig(struct config *old) { old->next = freelist; freelist = old; } /* Initialized the configuration list builder */ void Configlist_init(void) { current = 0; currentend = ¤t; basis = 0; basisend = &basis; Configtable_init(); return; } /* Initialized the configuration list builder */ void Configlist_reset(void) { current = 0; currentend = ¤t; basis = 0; basisend = &basis; Configtable_clear(0); return; } /* Add another configuration to the configuration list */ struct config *Configlist_add( struct rule *rp, /* The rule */ int dot /* Index into the RHS of the rule where the dot goes */ ) { struct config *cfp, model; assert(currentend != 0); model.rp = rp; model.dot = dot; cfp = Configtable_find(&model); if (cfp == 0) { cfp = newconfig(); cfp->rp = rp; cfp->dot = dot; cfp->fws = SetNew(); cfp->stp = 0; cfp->fplp = cfp->bplp = 0; cfp->next = 0; cfp->bp = 0; *currentend = cfp; currentend = &cfp->next; Configtable_insert(cfp); } return cfp; } /* Add a basis configuration to the configuration list */ struct config *Configlist_addbasis(struct rule *rp, int dot) { struct config *cfp, model; assert(basisend != 0); assert(currentend != 0); model.rp = rp; model.dot = dot; cfp = Configtable_find(&model); if (cfp == 0) { cfp = newconfig(); cfp->rp = rp; cfp->dot = dot; cfp->fws = SetNew(); cfp->stp = 0; cfp->fplp = cfp->bplp = 0; cfp->next = 0; cfp->bp = 0; *currentend = cfp; currentend = &cfp->next; *basisend = cfp; basisend = &cfp->bp; Configtable_insert(cfp); } return cfp; } /* Compute the closure of the configuration list */ void Configlist_closure(struct lemon *lemp) { struct config *cfp, *newcfp; struct rule *rp, *newrp; struct symbol *sp, *xsp; int i, dot; assert(currentend != 0); for (cfp = current; cfp; cfp = cfp->next) { rp = cfp->rp; dot = cfp->dot; if (dot >= rp->nrhs) continue; sp = rp->rhs[dot]; if (sp->type == NONTERMINAL) { if (sp->rule == 0 && sp != lemp->errsym) { ErrorMsg(lemp->filename, rp->line, "Nonterminal \"%s\" has no rules.", sp->name); lemp->errorcnt++; } for (newrp = sp->rule; newrp; newrp = newrp->nextlhs) { newcfp = Configlist_add(newrp, 0); for (i = dot + 1; i < rp->nrhs; i++) { xsp = rp->rhs[i]; if (xsp->type == TERMINAL) { SetAdd(newcfp->fws, xsp->index); break; } else if (xsp->type == MULTITERMINAL) { int k; for (k = 0; k < xsp->nsubsym; k++) { SetAdd(newcfp->fws, xsp->subsym[k]->index); } break; } else { SetUnion(newcfp->fws, xsp->firstset); if (xsp->lambda == LEMON_FALSE) break; } } if (i == rp->nrhs) Plink_add(&cfp->fplp, newcfp); } } } return; } /* Sort the configuration list */ void Configlist_sort(void) { current = (struct config *)msort((char *)current, (char **)&(current->next), Configcmp); currentend = 0; return; } /* Sort the basis configuration list */ void Configlist_sortbasis(void) { basis = (struct config *)msort((char *)current, (char **)&(current->bp), Configcmp); basisend = 0; return; } /* Return a pointer to the head of the configuration list and ** reset the list */ struct config *Configlist_return(void) { struct config *old; old = current; current = 0; currentend = 0; return old; } /* Return a pointer to the head of the configuration list and ** reset the list */ struct config *Configlist_basis(void) { struct config *old; old = basis; basis = 0; basisend = 0; return old; } /* Free all elements of the given configuration list */ void Configlist_eat(struct config *cfp) { struct config *nextcfp; for (; cfp; cfp = nextcfp) { nextcfp = cfp->next; assert(cfp->fplp == 0); assert(cfp->bplp == 0); if (cfp->fws) SetFree(cfp->fws); deleteconfig(cfp); } return; } /***************** From the file "error.c" *********************************/ /* ** Code for printing error message. */ void ErrorMsg(const char *filename, int lineno, const char *format, ...) { va_list ap; fprintf(stderr, "%s:%d: ", filename, lineno); va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); fprintf(stderr, "\n"); } /**************** From the file "main.c" ************************************/ /* ** Main program file for the LEMON parser generator. */ /* Report an out-of-memory condition and abort. This function ** is used mostly by the "MemoryCheck" macro in struct.h */ void memory_error(void) { fprintf(stderr, "Out of memory. Aborting...\n"); exit(1); } static int nDefine = 0; /* Number of -D options on the command line */ static char **azDefine = 0; /* Name of the -D macros */ /* This routine is called with the argument to each -D command-line option. ** Add the macro defined to the azDefine array. */ static void handle_D_option(char *z) { char **paz; nDefine++; azDefine = (char **)realloc(azDefine, sizeof(azDefine[0]) * nDefine); if (azDefine == 0) { fprintf(stderr, "out of memory\n"); exit(1); } paz = &azDefine[nDefine - 1]; *paz = (char *)malloc(lemonStrlen(z) + 1); if (*paz == 0) { fprintf(stderr, "out of memory\n"); exit(1); } lemon_strcpy(*paz, z); for (z = *paz; *z && *z != '='; z++) { } *z = 0; } /* Rember the name of the output directory */ static char *outputDir = NULL; static void handle_d_option(char *z) { outputDir = (char *)malloc(lemonStrlen(z) + 1); if (outputDir == 0) { fprintf(stderr, "out of memory\n"); exit(1); } lemon_strcpy(outputDir, z); } static char *user_templatename = NULL; static void handle_T_option(char *z) { user_templatename = (char *)malloc(lemonStrlen(z) + 1); if (user_templatename == 0) { memory_error(); } lemon_strcpy(user_templatename, z); } /* Merge together to lists of rules ordered by rule.iRule */ static struct rule *Rule_merge(struct rule *pA, struct rule *pB) { struct rule *pFirst = 0; struct rule **ppPrev = &pFirst; while (pA && pB) { if (pA->iRule < pB->iRule) { *ppPrev = pA; ppPrev = &pA->next; pA = pA->next; } else { *ppPrev = pB; ppPrev = &pB->next; pB = pB->next; } } if (pA) { *ppPrev = pA; } else { *ppPrev = pB; } return pFirst; } /* ** Sort a list of rules in order of increasing iRule value */ static struct rule *Rule_sort(struct rule *rp) { unsigned int i; struct rule *pNext; struct rule *x[32]; memset(x, 0, sizeof(x)); while (rp) { pNext = rp->next; rp->next = 0; for (i = 0; i < sizeof(x) / sizeof(x[0]) - 1 && x[i]; i++) { rp = Rule_merge(x[i], rp); x[i] = 0; } x[i] = rp; rp = pNext; } rp = 0; for (i = 0; i < sizeof(x) / sizeof(x[0]); i++) { rp = Rule_merge(x[i], rp); } return rp; } /* forward reference */ static const char *minimum_size_type(int lwr, int upr, int *pnByte); /* Print a single line of the "Parser Stats" output */ static void stats_line(const char *zLabel, int iValue) { int nLabel = lemonStrlen(zLabel); printf(" %s%.*s %5d\n", zLabel, 35 - nLabel, "................................", iValue); } /* The main program. Parse the command line and do it... */ int main(int argc, char **argv) { static int version = 0; static int rpflag = 0; static int basisflag = 0; static int compress = 0; static int quiet = 0; static int statistics = 0; static int mhflag = 0; static int nolinenosflag = 0; static int noResort = 0; static int sqlFlag = 0; static int printPP = 0; static const struct s_options options[] = { {OPT_FLAG, "b", (char *)&basisflag, "Print only the basis in report."}, {OPT_FLAG, "c", (char *)&compress, "Don't compress the action table."}, {OPT_FSTR, "d", (char *)&handle_d_option, "Output directory. Default '.'"}, {OPT_FSTR, "D", (char *)handle_D_option, "Define an %ifdef macro."}, {OPT_FLAG, "E", (char *)&printPP, "Print input file after preprocessing."}, {OPT_FSTR, "f", 0, "Ignored. (Placeholder for -f compiler options.)"}, {OPT_FLAG, "g", (char *)&rpflag, "Print grammar without actions."}, {OPT_FSTR, "I", 0, "Ignored. (Placeholder for '-I' compiler options.)"}, {OPT_FLAG, "m", (char *)&mhflag, "Output a makeheaders compatible file."}, {OPT_FLAG, "l", (char *)&nolinenosflag, "Do not print #line statements."}, {OPT_FSTR, "O", 0, "Ignored. (Placeholder for '-O' compiler options.)"}, {OPT_FLAG, "p", (char *)&showPrecedenceConflict, "Show conflicts resolved by precedence rules"}, {OPT_FLAG, "q", (char *)&quiet, "(Quiet) Don't print the report file."}, {OPT_FLAG, "r", (char *)&noResort, "Do not sort or renumber states"}, {OPT_FLAG, "s", (char *)&statistics, "Print parser stats to standard output."}, {OPT_FLAG, "S", (char *)&sqlFlag, "Generate the *.sql file describing the parser tables."}, {OPT_FLAG, "x", (char *)&version, "Print the version number."}, {OPT_FSTR, "T", (char *)handle_T_option, "Specify a template file."}, {OPT_FSTR, "W", 0, "Ignored. (Placeholder for '-W' compiler options.)"}, {OPT_FLAG, 0, 0, 0}}; int i; int exitcode; struct lemon lem; struct rule *rp; (void)argc; OptInit(argv, options, stderr); if (version) { printf("Lemon version 1.0\n"); exit(0); } if (OptNArgs() != 1) { fprintf(stderr, "Exactly one filename argument is required.\n"); exit(1); } memset(&lem, 0, sizeof(lem)); lem.errorcnt = 0; /* Initialize the machine */ Strsafe_init(); Symbol_init(); State_init(); lem.argv0 = argv[0]; lem.filename = OptArg(0); lem.basisflag = basisflag; lem.nolinenosflag = nolinenosflag; lem.printPreprocessed = printPP; Symbol_new("$"); /* Parse the input file */ Parse(&lem); if (lem.printPreprocessed || lem.errorcnt) exit(lem.errorcnt); if (lem.nrule == 0) { fprintf(stderr, "Empty grammar.\n"); exit(1); } lem.errsym = Symbol_find("error"); /* Count and index the symbols of the grammar */ Symbol_new("{default}"); lem.nsymbol = Symbol_count(); lem.symbols = Symbol_arrayof(); for (i = 0; i < lem.nsymbol; i++) lem.symbols[i]->index = i; qsort(lem.symbols, lem.nsymbol, sizeof(struct symbol *), Symbolcmpp); for (i = 0; i < lem.nsymbol; i++) lem.symbols[i]->index = i; while (lem.symbols[i - 1]->type == MULTITERMINAL) { i--; } assert(strcmp(lem.symbols[i - 1]->name, "{default}") == 0); lem.nsymbol = i - 1; for (i = 1; ISUPPER(lem.symbols[i]->name[0]); i++) ; lem.nterminal = i; /* Assign sequential rule numbers. Start with 0. Put rules that have no ** reduce action C-code associated with them last, so that the switch() ** statement that selects reduction actions will have a smaller jump table. */ for (i = 0, rp = lem.rule; rp; rp = rp->next) { rp->iRule = rp->code ? i++ : -1; } lem.nruleWithAction = i; for (rp = lem.rule; rp; rp = rp->next) { if (rp->iRule < 0) rp->iRule = i++; } lem.startRule = lem.rule; lem.rule = Rule_sort(lem.rule); /* Generate a reprint of the grammar, if requested on the command line */ if (rpflag) { Reprint(&lem); } else { /* Initialize the size for all follow and first sets */ SetSize(lem.nterminal + 1); /* Find the precedence for every production rule (that has one) */ FindRulePrecedences(&lem); /* Compute the lambda-nonterminals and the first-sets for every ** nonterminal */ FindFirstSets(&lem); /* Compute all LR(0) states. Also record follow-set propagation ** links so that the follow-set can be computed later */ lem.nstate = 0; FindStates(&lem); lem.sorted = State_arrayof(); /* Tie up loose ends on the propagation links */ FindLinks(&lem); /* Compute the follow set of every reducible configuration */ FindFollowSets(&lem); /* Compute the action tables */ FindActions(&lem); /* Compress the action tables */ if (compress == 0) CompressTables(&lem); /* Reorder and renumber the states so that states with fewer choices ** occur at the end. This is an optimization that helps make the ** generated parser tables smaller. */ if (noResort == 0) ResortStates(&lem); /* Generate a report of the parser generated. (the "y.output" file) */ if (!quiet) ReportOutput(&lem); /* Generate the source code for the parser */ ReportTable(&lem, mhflag, sqlFlag); /* Produce a header file for use by the scanner. (This step is ** omitted if the "-m" option is used because makeheaders will ** generate the file for us.) */ if (!mhflag) ReportHeader(&lem); } if (statistics) { printf("Parser statistics:\n"); stats_line("terminal symbols", lem.nterminal); stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); stats_line("total symbols", lem.nsymbol); stats_line("rules", lem.nrule); stats_line("states", lem.nxstate); stats_line("conflicts", lem.nconflict); stats_line("action table entries", lem.nactiontab); stats_line("lookahead table entries", lem.nlookaheadtab); stats_line("total table size (bytes)", lem.tablesize); } if (lem.nconflict > 0) { fprintf(stderr, "%d parsing conflicts.\n", lem.nconflict); } /* return 0 on success, 1 on failure. */ exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; exit(exitcode); return (exitcode); } /******************** From the file "msort.c" *******************************/ /* ** A generic merge-sort program. ** ** USAGE: ** Let "ptr" be a pointer to some structure which is at the head of ** a null-terminated list. Then to sort the list call: ** ** ptr = msort(ptr,&(ptr->next),cmpfnc); ** ** In the above, "cmpfnc" is a pointer to a function which compares ** two instances of the structure and returns an integer, as in ** strcmp. The second argument is a pointer to the pointer to the ** second element of the linked list. This address is used to compute ** the offset to the "next" field within the structure. The offset to ** the "next" field must be constant for all structures in the list. ** ** The function returns a new pointer which is the head of the list ** after sorting. ** ** ALGORITHM: ** Merge-sort. */ /* ** Return a pointer to the next structure in the linked list. */ #define NEXT(A) (*(char **)(((char *)A) + offset)) /* ** Inputs: ** a: A sorted, null-terminated linked list. (May be null). ** b: A sorted, null-terminated linked list. (May be null). ** cmp: A pointer to the comparison function. ** offset: Offset in the structure to the "next" field. ** ** Return Value: ** A pointer to the head of a sorted list containing the elements ** of both a and b. ** ** Side effects: ** The "next" pointers for elements in the lists a and b are ** changed. */ static char *merge(char *a, char *b, int (*cmp)(const char *, const char *), int offset) { char *ptr, *head; if (a == 0) { head = b; } else if (b == 0) { head = a; } else { if ((*cmp)(a, b) <= 0) { ptr = a; a = NEXT(a); } else { ptr = b; b = NEXT(b); } head = ptr; while (a && b) { if ((*cmp)(a, b) <= 0) { NEXT(ptr) = a; ptr = a; a = NEXT(a); } else { NEXT(ptr) = b; ptr = b; b = NEXT(b); } } if (a) NEXT(ptr) = a; else NEXT(ptr) = b; } return head; } /* ** Inputs: ** list: Pointer to a singly-linked list of structures. ** next: Pointer to pointer to the second element of the list. ** cmp: A comparison function. ** ** Return Value: ** A pointer to the head of a sorted list containing the elements ** orginally in list. ** ** Side effects: ** The "next" pointers for elements in list are changed. */ #define LISTSIZE 30 static char *msort(char *list, char **next, int (*cmp)(const char *, const char *)) { unsigned long offset; char *ep; char *set[LISTSIZE]; int i; offset = (unsigned long)((char *)next - (char *)list); for (i = 0; i < LISTSIZE; i++) set[i] = 0; while (list) { ep = list; list = NEXT(list); NEXT(ep) = 0; for (i = 0; i < LISTSIZE - 1 && set[i] != 0; i++) { ep = merge(ep, set[i], cmp, offset); set[i] = 0; } set[i] = ep; } ep = 0; for (i = 0; i < LISTSIZE; i++) if (set[i]) ep = merge(set[i], ep, cmp, offset); return ep; } /************************ From the file "option.c" **************************/ static char **lemon_argv; static struct s_options *op; static FILE *errstream; #define ISOPT(X) ((X)[0] == '-' || (X)[0] == '+' || strchr((X), '=') != 0) /* ** Print the command line with a carrot pointing to the k-th character ** of the n-th field. */ static void errline(int n, int k, FILE *err) { int spcnt, i; if (lemon_argv[0]) fprintf(err, "%s", lemon_argv[0]); spcnt = lemonStrlen(lemon_argv[0]) + 1; for (i = 1; i < n && lemon_argv[i]; i++) { fprintf(err, " %s", lemon_argv[i]); spcnt += lemonStrlen(lemon_argv[i]) + 1; } spcnt += k; for (; lemon_argv[i]; i++) fprintf(err, " %s", lemon_argv[i]); if (spcnt < 20) { fprintf(err, "\n%*s^-- here\n", spcnt, ""); } else { fprintf(err, "\n%*shere --^\n", spcnt - 7, ""); } } /* ** Return the index of the N-th non-switch argument. Return -1 ** if N is out of range. */ static int argindex(int n) { int i; int dashdash = 0; if (lemon_argv != 0 && *lemon_argv != 0) { for (i = 1; lemon_argv[i]; i++) { if (dashdash || !ISOPT(lemon_argv[i])) { if (n == 0) return i; n--; } if (strcmp(lemon_argv[i], "--") == 0) dashdash = 1; } } return -1; } static const char emsg[] = "Command line syntax error: "; /* ** Process a flag command line argument. */ static int handleflags(int i, FILE *err) { int v; int errcnt = 0; int j; for (j = 0; op[j].label; j++) { if (strncmp(&lemon_argv[i][1], op[j].label, lemonStrlen(op[j].label)) == 0) break; } v = lemon_argv[i][0] == '-' ? 1 : 0; if (op[j].label == 0) { if (err) { fprintf(err, "%sundefined option.\n", emsg); errline(i, 1, err); } errcnt++; } else if (op[j].arg == 0) { /* Ignore this option */ } else if (op[j].type == OPT_FLAG) { *((int *)op[j].arg) = v; } else if (op[j].type == OPT_FFLAG) { (*(void (*)(int))(op[j].arg))(v); } else if (op[j].type == OPT_FSTR) { (*(void (*)(char *))(op[j].arg))(&lemon_argv[i][2]); } else { if (err) { fprintf(err, "%smissing argument on switch.\n", emsg); errline(i, 1, err); } errcnt++; } return errcnt; } /* ** Process a command line switch which has an argument. */ static int handleswitch(int i, FILE *err) { int lv = 0; double dv = 0.0; char *sv = 0, *end; char *cp; int j; int errcnt = 0; cp = strchr(lemon_argv[i], '='); assert(cp != 0); *cp = 0; for (j = 0; op[j].label; j++) { if (strcmp(lemon_argv[i], op[j].label) == 0) break; } *cp = '='; if (op[j].label == 0) { if (err) { fprintf(err, "%sundefined option.\n", emsg); errline(i, 0, err); } errcnt++; } else { cp++; switch (op[j].type) { case OPT_FLAG: case OPT_FFLAG: if (err) { fprintf(err, "%soption requires an argument.\n", emsg); errline(i, 0, err); } errcnt++; break; case OPT_DBL: case OPT_FDBL: dv = strtod(cp, &end); if (*end) { if (err) { fprintf(err, "%sillegal character in floating-point argument.\n", emsg); errline(i, (int)((char *)end - (char *)lemon_argv[i]), err); } errcnt++; } break; case OPT_INT: case OPT_FINT: lv = strtol(cp, &end, 0); if (*end) { if (err) { fprintf(err, "%sillegal character in integer argument.\n", emsg); errline(i, (int)((char *)end - (char *)lemon_argv[i]), err); } errcnt++; } break; case OPT_STR: case OPT_FSTR: sv = cp; break; } switch (op[j].type) { case OPT_FLAG: case OPT_FFLAG: break; case OPT_DBL: *(double *)(op[j].arg) = dv; break; case OPT_FDBL: (*(void (*)(double))(op[j].arg))(dv); break; case OPT_INT: *(int *)(op[j].arg) = lv; break; case OPT_FINT: (*(void (*)(int))(op[j].arg))((int)lv); break; case OPT_STR: *(char **)(op[j].arg) = sv; break; case OPT_FSTR: (*(void (*)(char *))(op[j].arg))(sv); break; } } return errcnt; } int OptInit(char **a, struct s_options *o, FILE *err) { int errcnt = 0; lemon_argv = a; op = o; errstream = err; if (lemon_argv && *lemon_argv && op) { int i; for (i = 1; lemon_argv[i]; i++) { if (lemon_argv[i][0] == '+' || lemon_argv[i][0] == '-') { errcnt += handleflags(i, err); } else if (strchr(lemon_argv[i], '=')) { errcnt += handleswitch(i, err); } } } if (errcnt > 0) { fprintf(err, "Valid command line options for \"%s\" are:\n", *a); OptPrint(); exit(1); } return 0; } int OptNArgs(void) { int cnt = 0; int dashdash = 0; int i; if (lemon_argv != 0 && lemon_argv[0] != 0) { for (i = 1; lemon_argv[i]; i++) { if (dashdash || !ISOPT(lemon_argv[i])) cnt++; if (strcmp(lemon_argv[i], "--") == 0) dashdash = 1; } } return cnt; } char *OptArg(int n) { int i; i = argindex(n); return i >= 0 ? lemon_argv[i] : 0; } void OptErr(int n) { int i; i = argindex(n); if (i >= 0) errline(i, 0, errstream); } void OptPrint(void) { int i; int max, len; max = 0; for (i = 0; op[i].label; i++) { len = lemonStrlen(op[i].label) + 1; switch (op[i].type) { case OPT_FLAG: case OPT_FFLAG: break; case OPT_INT: case OPT_FINT: len += 9; /* length of "" */ break; case OPT_DBL: case OPT_FDBL: len += 6; /* length of "" */ break; case OPT_STR: case OPT_FSTR: len += 8; /* length of "" */ break; } if (len > max) max = len; } for (i = 0; op[i].label; i++) { switch (op[i].type) { case OPT_FLAG: case OPT_FFLAG: fprintf(errstream, " -%-*s %s\n", max, op[i].label, op[i].message); break; case OPT_INT: case OPT_FINT: fprintf(errstream, " -%s%*s %s\n", op[i].label, (int)(max - lemonStrlen(op[i].label) - 9), "", op[i].message); break; case OPT_DBL: case OPT_FDBL: fprintf(errstream, " -%s%*s %s\n", op[i].label, (int)(max - lemonStrlen(op[i].label) - 6), "", op[i].message); break; case OPT_STR: case OPT_FSTR: fprintf(errstream, " -%s%*s %s\n", op[i].label, (int)(max - lemonStrlen(op[i].label) - 8), "", op[i].message); break; } } } /*********************** From the file "parse.c" ****************************/ /* ** Input file parser for the LEMON parser generator. */ /* The state of the parser */ enum e_state { INITIALIZE, WAITING_FOR_DECL_OR_RULE, WAITING_FOR_DECL_KEYWORD, WAITING_FOR_DECL_ARG, WAITING_FOR_PRECEDENCE_SYMBOL, WAITING_FOR_ARROW, IN_RHS, LHS_ALIAS_1, LHS_ALIAS_2, LHS_ALIAS_3, RHS_ALIAS_1, RHS_ALIAS_2, PRECEDENCE_MARK_1, PRECEDENCE_MARK_2, RESYNC_AFTER_RULE_ERROR, RESYNC_AFTER_DECL_ERROR, WAITING_FOR_DESTRUCTOR_SYMBOL, WAITING_FOR_DATATYPE_SYMBOL, WAITING_FOR_FALLBACK_ID, WAITING_FOR_WILDCARD_ID, WAITING_FOR_CLASS_ID, WAITING_FOR_CLASS_TOKEN, WAITING_FOR_TOKEN_NAME }; struct pstate { char *filename; /* Name of the input file */ int tokenlineno; /* Linenumber at which current token starts */ int errorcnt; /* Number of errors so far */ char *tokenstart; /* Text of current token */ struct lemon *gp; /* Global state vector */ enum e_state state; /* The state of the parser */ struct symbol *fallback; /* The fallback token */ struct symbol *tkclass; /* Token class symbol */ struct symbol *lhs; /* Left-hand side of current rule */ const char *lhsalias; /* Alias for the LHS */ int nrhs; /* Number of right-hand side symbols seen */ struct symbol *rhs[MAXRHS]; /* RHS symbols */ const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ struct rule *prevrule; /* Previous rule parsed */ const char *declkeyword; /* Keyword of a declaration */ char **declargslot; /* Where the declaration argument should be put */ int insertLineMacro; /* Add #line before declaration insert */ int *decllinenoslot; /* Where to write declaration line number */ enum e_assoc declassoc; /* Assign this association to decl arguments */ int preccounter; /* Assign this precedence to decl arguments */ struct rule *firstrule; /* Pointer to first rule in the grammar */ struct rule *lastrule; /* Pointer to the most recently parsed rule */ }; /* Parse a single token */ static void parseonetoken(struct pstate *psp) { const char *x; x = Strsafe(psp->tokenstart); /* Save the token permanently */ #if 0 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno, x,psp->state); #endif switch (psp->state) { case INITIALIZE: psp->prevrule = 0; psp->preccounter = 0; psp->firstrule = psp->lastrule = 0; psp->gp->nrule = 0; /* fall through */ case WAITING_FOR_DECL_OR_RULE: if (x[0] == '%') { psp->state = WAITING_FOR_DECL_KEYWORD; } else if (ISLOWER(x[0])) { psp->lhs = Symbol_new(x); psp->nrhs = 0; psp->lhsalias = 0; psp->state = WAITING_FOR_ARROW; } else if (x[0] == '{') { if (psp->prevrule == 0) { ErrorMsg(psp->filename, psp->tokenlineno, "There is no prior rule upon which to attach the code " "fragment which begins on this line."); psp->errorcnt++; } else if (psp->prevrule->code != 0) { ErrorMsg(psp->filename, psp->tokenlineno, "Code fragment beginning on this line is not the first " "to follow the previous rule."); psp->errorcnt++; } else if (strcmp(x, "{NEVER-REDUCE") == 0) { psp->prevrule->neverReduce = 1; } else { psp->prevrule->line = psp->tokenlineno; psp->prevrule->code = &x[1]; psp->prevrule->noCode = 0; } } else if (x[0] == '[') { psp->state = PRECEDENCE_MARK_1; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Token \"%s\" should be either \"%%\" or a nonterminal name.", x); psp->errorcnt++; } break; case PRECEDENCE_MARK_1: if (!ISUPPER(x[0])) { ErrorMsg(psp->filename, psp->tokenlineno, "The precedence symbol must be a terminal."); psp->errorcnt++; } else if (psp->prevrule == 0) { ErrorMsg(psp->filename, psp->tokenlineno, "There is no prior rule to assign precedence \"[%s]\".", x); psp->errorcnt++; } else if (psp->prevrule->precsym != 0) { ErrorMsg(psp->filename, psp->tokenlineno, "Precedence mark on this line is not the first " "to follow the previous rule."); psp->errorcnt++; } else { psp->prevrule->precsym = Symbol_new(x); } psp->state = PRECEDENCE_MARK_2; break; case PRECEDENCE_MARK_2: if (x[0] != ']') { ErrorMsg(psp->filename, psp->tokenlineno, "Missing \"]\" on precedence mark."); psp->errorcnt++; } psp->state = WAITING_FOR_DECL_OR_RULE; break; case WAITING_FOR_ARROW: if (x[0] == ':' && x[1] == ':' && x[2] == '=') { psp->state = IN_RHS; } else if (x[0] == '(') { psp->state = LHS_ALIAS_1; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Expected to see a \":\" following the LHS symbol \"%s\".", psp->lhs->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_1: if (ISALPHA(x[0])) { psp->lhsalias = x; psp->state = LHS_ALIAS_2; } else { ErrorMsg(psp->filename, psp->tokenlineno, "\"%s\" is not a valid alias for the LHS \"%s\"\n", x, psp->lhs->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_2: if (x[0] == ')') { psp->state = LHS_ALIAS_3; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Missing \")\" following LHS alias name \"%s\".", psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_3: if (x[0] == ':' && x[1] == ':' && x[2] == '=') { psp->state = IN_RHS; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Missing \"->\" following: \"%s(%s)\".", psp->lhs->name, psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case IN_RHS: if (x[0] == '.') { struct rule *rp; rp = (struct rule *)calloc(sizeof(struct rule) + sizeof(struct symbol *) * psp->nrhs + sizeof(char *) * psp->nrhs, 1); if (rp == 0) { ErrorMsg(psp->filename, psp->tokenlineno, "Can't allocate enough memory for this rule."); psp->errorcnt++; psp->prevrule = 0; } else { int i; rp->ruleline = psp->tokenlineno; rp->rhs = (struct symbol **)&rp[1]; rp->rhsalias = (const char **)&(rp->rhs[psp->nrhs]); for (i = 0; i < psp->nrhs; i++) { rp->rhs[i] = psp->rhs[i]; rp->rhsalias[i] = psp->alias[i]; if (rp->rhsalias[i] != 0) { rp->rhs[i]->bContent = 1; } } rp->lhs = psp->lhs; rp->lhsalias = psp->lhsalias; rp->nrhs = psp->nrhs; rp->code = 0; rp->noCode = 1; rp->precsym = 0; rp->index = psp->gp->nrule++; rp->nextlhs = rp->lhs->rule; rp->lhs->rule = rp; rp->next = 0; if (psp->firstrule == 0) { psp->firstrule = psp->lastrule = rp; } else { psp->lastrule->next = rp; psp->lastrule = rp; } psp->prevrule = rp; } psp->state = WAITING_FOR_DECL_OR_RULE; } else if (ISALPHA(x[0])) { if (psp->nrhs >= MAXRHS) { ErrorMsg(psp->filename, psp->tokenlineno, "Too many symbols on RHS of rule beginning at \"%s\".", x); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } else { psp->rhs[psp->nrhs] = Symbol_new(x); psp->alias[psp->nrhs] = 0; psp->nrhs++; } } else if ((x[0] == '|' || x[0] == '/') && psp->nrhs > 0 && ISUPPER(x[1])) { struct symbol *msp = psp->rhs[psp->nrhs - 1]; if (msp->type != MULTITERMINAL) { struct symbol *origsp = msp; msp = (struct symbol *)calloc(1, sizeof(*msp)); memset(msp, 0, sizeof(*msp)); msp->type = MULTITERMINAL; msp->nsubsym = 1; msp->subsym = (struct symbol **)calloc(1, sizeof(struct symbol *)); msp->subsym[0] = origsp; msp->name = origsp->name; psp->rhs[psp->nrhs - 1] = msp; } msp->nsubsym++; msp->subsym = (struct symbol **)realloc( msp->subsym, sizeof(struct symbol *) * msp->nsubsym); msp->subsym[msp->nsubsym - 1] = Symbol_new(&x[1]); if (ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0])) { ErrorMsg(psp->filename, psp->tokenlineno, "Cannot form a compound containing a non-terminal"); psp->errorcnt++; } } else if (x[0] == '(' && psp->nrhs > 0) { psp->state = RHS_ALIAS_1; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Illegal character on RHS of rule: \"%s\".", x); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case RHS_ALIAS_1: if (ISALPHA(x[0])) { psp->alias[psp->nrhs - 1] = x; psp->state = RHS_ALIAS_2; } else { ErrorMsg(psp->filename, psp->tokenlineno, "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", x, psp->rhs[psp->nrhs - 1]->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case RHS_ALIAS_2: if (x[0] == ')') { psp->state = IN_RHS; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Missing \")\" following LHS alias name \"%s\".", psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case WAITING_FOR_DECL_KEYWORD: if (ISALPHA(x[0])) { psp->declkeyword = x; psp->declargslot = 0; psp->decllinenoslot = 0; psp->insertLineMacro = 1; psp->state = WAITING_FOR_DECL_ARG; if (strcmp(x, "name") == 0) { psp->declargslot = &(psp->gp->name); psp->insertLineMacro = 0; } else if (strcmp(x, "include") == 0) { psp->declargslot = &(psp->gp->include); } else if (strcmp(x, "code") == 0) { psp->declargslot = &(psp->gp->extracode); } else if (strcmp(x, "token_destructor") == 0) { psp->declargslot = &psp->gp->tokendest; } else if (strcmp(x, "default_destructor") == 0) { psp->declargslot = &psp->gp->vardest; } else if (strcmp(x, "token_prefix") == 0) { psp->declargslot = &psp->gp->tokenprefix; psp->insertLineMacro = 0; } else if (strcmp(x, "syntax_error") == 0) { psp->declargslot = &(psp->gp->error); } else if (strcmp(x, "parse_accept") == 0) { psp->declargslot = &(psp->gp->accept); } else if (strcmp(x, "parse_failure") == 0) { psp->declargslot = &(psp->gp->failure); } else if (strcmp(x, "stack_overflow") == 0) { psp->declargslot = &(psp->gp->overflow); } else if (strcmp(x, "extra_argument") == 0) { psp->declargslot = &(psp->gp->arg); psp->insertLineMacro = 0; } else if (strcmp(x, "extra_context") == 0) { psp->declargslot = &(psp->gp->ctx); psp->insertLineMacro = 0; } else if (strcmp(x, "token_type") == 0) { psp->declargslot = &(psp->gp->tokentype); psp->insertLineMacro = 0; } else if (strcmp(x, "default_type") == 0) { psp->declargslot = &(psp->gp->vartype); psp->insertLineMacro = 0; } else if (strcmp(x, "stack_size") == 0) { psp->declargslot = &(psp->gp->stacksize); psp->insertLineMacro = 0; } else if (strcmp(x, "start_symbol") == 0) { psp->declargslot = &(psp->gp->start); psp->insertLineMacro = 0; } else if (strcmp(x, "left") == 0) { psp->preccounter++; psp->declassoc = LEFT; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; } else if (strcmp(x, "right") == 0) { psp->preccounter++; psp->declassoc = RIGHT; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; } else if (strcmp(x, "nonassoc") == 0) { psp->preccounter++; psp->declassoc = NONE; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; } else if (strcmp(x, "destructor") == 0) { psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; } else if (strcmp(x, "type") == 0) { psp->state = WAITING_FOR_DATATYPE_SYMBOL; } else if (strcmp(x, "fallback") == 0) { psp->fallback = 0; psp->state = WAITING_FOR_FALLBACK_ID; } else if (strcmp(x, "token") == 0) { psp->state = WAITING_FOR_TOKEN_NAME; } else if (strcmp(x, "wildcard") == 0) { psp->state = WAITING_FOR_WILDCARD_ID; } else if (strcmp(x, "token_class") == 0) { psp->state = WAITING_FOR_CLASS_ID; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Unknown declaration keyword: \"%%%s\".", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } } else { ErrorMsg(psp->filename, psp->tokenlineno, "Illegal declaration keyword: \"%s\".", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case WAITING_FOR_DESTRUCTOR_SYMBOL: if (!ISALPHA(x[0])) { ErrorMsg(psp->filename, psp->tokenlineno, "Symbol name missing after %%destructor keyword"); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else { struct symbol *sp = Symbol_new(x); psp->declargslot = &sp->destructor; psp->decllinenoslot = &sp->destLineno; psp->insertLineMacro = 1; psp->state = WAITING_FOR_DECL_ARG; } break; case WAITING_FOR_DATATYPE_SYMBOL: if (!ISALPHA(x[0])) { ErrorMsg(psp->filename, psp->tokenlineno, "Symbol name missing after %%type keyword"); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else { struct symbol *sp = Symbol_find(x); if ((sp) && (sp->datatype)) { ErrorMsg(psp->filename, psp->tokenlineno, "Symbol %%type \"%s\" already defined", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else { if (!sp) { sp = Symbol_new(x); } psp->declargslot = &sp->datatype; psp->insertLineMacro = 0; psp->state = WAITING_FOR_DECL_ARG; } } break; case WAITING_FOR_PRECEDENCE_SYMBOL: if (x[0] == '.') { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (ISUPPER(x[0])) { struct symbol *sp; sp = Symbol_new(x); if (sp->prec >= 0) { ErrorMsg(psp->filename, psp->tokenlineno, "Symbol \"%s\" has already be given a precedence.", x); psp->errorcnt++; } else { sp->prec = psp->preccounter; sp->assoc = psp->declassoc; } } else { ErrorMsg(psp->filename, psp->tokenlineno, "Can't assign a precedence to \"%s\".", x); psp->errorcnt++; } break; case WAITING_FOR_DECL_ARG: if (x[0] == '{' || x[0] == '\"' || ISALNUM(x[0])) { const char *zOld, *zNew; char *zBuf, *z; int nOld, n, nLine = 0, nNew, nBack; int addLineMacro; char zLine[50]; zNew = x; if (zNew[0] == '"' || zNew[0] == '{') zNew++; nNew = lemonStrlen(zNew); if (*psp->declargslot) { zOld = *psp->declargslot; } else { zOld = ""; } nOld = lemonStrlen(zOld); n = nOld + nNew + 20; addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && psp->tokenlineno > 1 && (psp->decllinenoslot == 0 || psp->decllinenoslot[0] != 0); if (addLineMacro) { for (z = psp->filename, nBack = 0; *z; z++) { if (*z == '\\') nBack++; } lemon_sprintf(zLine, "#line %d ", psp->tokenlineno); nLine = lemonStrlen(zLine); n += nLine + lemonStrlen(psp->filename) + nBack; } *psp->declargslot = (char *)realloc(*psp->declargslot, n); zBuf = *psp->declargslot + nOld; if (addLineMacro) { if (nOld && zBuf[-1] != '\n') { *(zBuf++) = '\n'; } memcpy(zBuf, zLine, nLine); zBuf += nLine; *(zBuf++) = '"'; for (z = psp->filename; *z; z++) { if (*z == '\\') { *(zBuf++) = '\\'; } *(zBuf++) = *z; } *(zBuf++) = '"'; *(zBuf++) = '\n'; } if (psp->decllinenoslot && psp->decllinenoslot[0] == 0) { psp->decllinenoslot[0] = psp->tokenlineno; } memcpy(zBuf, zNew, nNew); zBuf += nNew; *zBuf = 0; psp->state = WAITING_FOR_DECL_OR_RULE; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Illegal argument to %%%s: %s", psp->declkeyword, x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case WAITING_FOR_FALLBACK_ID: if (x[0] == '.') { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (!ISUPPER(x[0])) { ErrorMsg(psp->filename, psp->tokenlineno, "%%fallback argument \"%s\" should be a token", x); psp->errorcnt++; } else { struct symbol *sp = Symbol_new(x); if (psp->fallback == 0) { psp->fallback = sp; } else if (sp->fallback) { ErrorMsg(psp->filename, psp->tokenlineno, "More than one fallback assigned to token %s", x); psp->errorcnt++; } else { sp->fallback = psp->fallback; psp->gp->has_fallback = 1; } } break; case WAITING_FOR_TOKEN_NAME: /* Tokens do not have to be declared before use. But they can be ** in order to control their assigned integer number. The number for ** each token is assigned when it is first seen. So by including ** ** %token ONE TWO THREE ** ** early in the grammar file, that assigns small consecutive values ** to each of the tokens ONE TWO and THREE. */ if (x[0] == '.') { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (!ISUPPER(x[0])) { ErrorMsg(psp->filename, psp->tokenlineno, "%%token argument \"%s\" should be a token", x); psp->errorcnt++; } else { (void)Symbol_new(x); } break; case WAITING_FOR_WILDCARD_ID: if (x[0] == '.') { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (!ISUPPER(x[0])) { ErrorMsg(psp->filename, psp->tokenlineno, "%%wildcard argument \"%s\" should be a token", x); psp->errorcnt++; } else { struct symbol *sp = Symbol_new(x); if (psp->gp->wildcard == 0) { psp->gp->wildcard = sp; } else { ErrorMsg(psp->filename, psp->tokenlineno, "Extra wildcard to token: %s", x); psp->errorcnt++; } } break; case WAITING_FOR_CLASS_ID: if (!ISLOWER(x[0])) { ErrorMsg(psp->filename, psp->tokenlineno, "%%token_class must be followed by an identifier: %s", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else if (Symbol_find(x)) { ErrorMsg(psp->filename, psp->tokenlineno, "Symbol \"%s\" already used", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else { psp->tkclass = Symbol_new(x); psp->tkclass->type = MULTITERMINAL; psp->state = WAITING_FOR_CLASS_TOKEN; } break; case WAITING_FOR_CLASS_TOKEN: if (x[0] == '.') { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (ISUPPER(x[0]) || ((x[0] == '|' || x[0] == '/') && ISUPPER(x[1]))) { struct symbol *msp = psp->tkclass; msp->nsubsym++; msp->subsym = (struct symbol **)realloc( msp->subsym, sizeof(struct symbol *) * msp->nsubsym); if (!ISUPPER(x[0])) x++; msp->subsym[msp->nsubsym - 1] = Symbol_new(x); } else { ErrorMsg(psp->filename, psp->tokenlineno, "%%token_class argument \"%s\" should be a token", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case RESYNC_AFTER_RULE_ERROR: /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; ** break; */ case RESYNC_AFTER_DECL_ERROR: if (x[0] == '.') psp->state = WAITING_FOR_DECL_OR_RULE; if (x[0] == '%') psp->state = WAITING_FOR_DECL_KEYWORD; break; } } /* The text in the input is part of the argument to an %ifdef or %ifndef. ** Evaluate the text as a boolean expression. Return true or false. */ static int eval_preprocessor_boolean(char *z, int lineno) { int neg = 0; int res = 0; int okTerm = 1; int i; for (i = 0; z[i] != 0; i++) { if (ISSPACE(z[i])) continue; if (z[i] == '!') { if (!okTerm) goto pp_syntax_error; neg = !neg; continue; } if (z[i] == '|' && z[i + 1] == '|') { if (okTerm) goto pp_syntax_error; if (res) return 1; i++; okTerm = 1; continue; } if (z[i] == '&' && z[i + 1] == '&') { if (okTerm) goto pp_syntax_error; if (!res) return 0; i++; okTerm = 1; continue; } if (z[i] == '(') { int k; int n = 1; if (!okTerm) goto pp_syntax_error; for (k = i + 1; z[k]; k++) { if (z[k] == ')') { n--; if (n == 0) { z[k] = 0; res = eval_preprocessor_boolean(&z[i + 1], -1); z[k] = ')'; if (res < 0) { i = i - res; goto pp_syntax_error; } i = k; break; } } else if (z[k] == '(') { n++; } else if (z[k] == 0) { i = k; goto pp_syntax_error; } } if (neg) { res = !res; neg = 0; } okTerm = 0; continue; } if (ISALPHA(z[i])) { int j, k, n; if (!okTerm) goto pp_syntax_error; for (k = i + 1; ISALNUM(z[k]) || z[k] == '_'; k++) { } n = k - i; res = 0; for (j = 0; j < nDefine; j++) { if (strncmp(azDefine[j], &z[i], n) == 0 && azDefine[j][n] == 0) { res = 1; break; } } i = k - 1; if (neg) { res = !res; neg = 0; } okTerm = 0; continue; } goto pp_syntax_error; } return res; pp_syntax_error: if (lineno > 0) { fprintf(stderr, "%%if syntax error on line %d.\n", lineno); fprintf(stderr, " %.*s <-- syntax error here\n", i + 1, z); exit(1); } else { return -(i + 1); } } /* Run the preprocessor over the input file text. The global variables ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined ** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and ** comments them out. Text in between is also commented out as appropriate. */ static void preprocess_input(char *z) { int i, j, k; int exclude = 0; int start = 0; int lineno = 1; int start_lineno = 1; for (i = 0; z[i]; i++) { if (z[i] == '\n') lineno++; if (z[i] != '%' || (i > 0 && z[i - 1] != '\n')) continue; if (strncmp(&z[i], "%endif", 6) == 0 && ISSPACE(z[i + 6])) { if (exclude) { exclude--; if (exclude == 0) { for (j = start; j < i; j++) if (z[j] != '\n') z[j] = ' '; } } for (j = i; z[j] && z[j] != '\n'; j++) z[j] = ' '; } else if (strncmp(&z[i], "%else", 5) == 0 && ISSPACE(z[i + 5])) { if (exclude == 1) { exclude = 0; for (j = start; j < i; j++) if (z[j] != '\n') z[j] = ' '; } else if (exclude == 0) { exclude = 1; start = i; start_lineno = lineno; } for (j = i; z[j] && z[j] != '\n'; j++) z[j] = ' '; } else if (strncmp(&z[i], "%ifdef ", 7) == 0 || strncmp(&z[i], "%if ", 4) == 0 || strncmp(&z[i], "%ifndef ", 8) == 0) { if (exclude) { exclude++; } else { int isNot; int iBool; for (j = i; z[j] && !ISSPACE(z[j]); j++) { } iBool = j; isNot = (j == i + 7); while (z[j] && z[j] != '\n') { j++; } k = z[j]; z[j] = 0; exclude = eval_preprocessor_boolean(&z[iBool], lineno); z[j] = k; if (!isNot) exclude = !exclude; if (exclude) { start = i; start_lineno = lineno; } } for (j = i; z[j] && z[j] != '\n'; j++) z[j] = ' '; } } if (exclude) { fprintf(stderr, "unterminated %%ifdef starting on line %d\n", start_lineno); exit(1); } } /* In spite of its name, this function is really a scanner. It read ** in the entire input file (all at once) then tokenizes it. Each ** token is passed to the function "parseonetoken" which builds all ** the appropriate data structures in the global state vector "gp". */ void Parse(struct lemon *gp) { struct pstate *ps; FILE *fp; char *filebuf; unsigned int filesize; int lineno; int c; char *cp, *nextcp; int startline = 0; ps = gc(xcalloc(1, sizeof(struct pstate))); ps->gp = gp; ps->filename = gp->filename; ps->errorcnt = 0; ps->state = INITIALIZE; /* Begin by reading the input file */ fp = fopen(ps->filename, "rb"); if (fp == 0) { ErrorMsg(ps->filename, 0, "Can't open this file for reading."); gp->errorcnt++; return; } fseek(fp, 0, 2); filesize = ftell(fp); rewind(fp); filebuf = (char *)malloc(filesize + 1); if (filesize > 100000000 || filebuf == 0) { ErrorMsg(ps->filename, 0, "Input file too large."); free(filebuf); gp->errorcnt++; fclose(fp); return; } if (fread(filebuf, 1, filesize, fp) != filesize) { ErrorMsg(ps->filename, 0, "Can't read in all %d bytes of this file.", filesize); free(filebuf); gp->errorcnt++; fclose(fp); return; } fclose(fp); filebuf[filesize] = 0; /* Make an initial pass through the file to handle %ifdef and %ifndef */ preprocess_input(filebuf); if (gp->printPreprocessed) { printf("%s\n", filebuf); return; } /* Now scan the text of the input file */ lineno = 1; for (cp = filebuf; (c = *cp) != 0;) { if (c == '\n') lineno++; /* Keep track of the line number */ if (ISSPACE(c)) { cp++; continue; } /* Skip all white space */ if (c == '/' && cp[1] == '/') { /* Skip C++ style comments */ cp += 2; while ((c = *cp) != 0 && c != '\n') cp++; continue; } if (c == '/' && cp[1] == '*') { /* Skip C style comments */ cp += 2; while ((c = *cp) != 0 && (c != '/' || cp[-1] != '*')) { if (c == '\n') lineno++; cp++; } if (c) cp++; continue; } ps->tokenstart = cp; /* Mark the beginning of the token */ ps->tokenlineno = lineno; /* Linenumber on which token begins */ if (c == '\"') { /* String literals */ cp++; while ((c = *cp) != 0 && c != '\"') { if (c == '\n') lineno++; cp++; } if (c == 0) { ErrorMsg(ps->filename, startline, "String starting on this line is not terminated before " "the end of the file."); ps->errorcnt++; nextcp = cp; } else { nextcp = cp + 1; } } else if (c == '{') { /* A block of C code */ int level; cp++; for (level = 1; (c = *cp) != 0 && (level > 1 || c != '}'); cp++) { if (c == '\n') lineno++; else if (c == '{') level++; else if (c == '}') level--; else if (c == '/' && cp[1] == '*') { /* Skip comments */ int prevc; cp = &cp[2]; prevc = 0; while ((c = *cp) != 0 && (c != '/' || prevc != '*')) { if (c == '\n') lineno++; prevc = c; cp++; } } else if (c == '/' && cp[1] == '/') { /* Skip C++ style comments too */ cp = &cp[2]; while ((c = *cp) != 0 && c != '\n') cp++; if (c) lineno++; } else if (c == '\'' || c == '\"') { /* String a character literals */ int startchar, prevc; startchar = c; prevc = 0; for (cp++; (c = *cp) != 0 && (c != startchar || prevc == '\\'); cp++) { if (c == '\n') lineno++; if (prevc == '\\') prevc = 0; else prevc = c; } } } if (c == 0) { ErrorMsg(ps->filename, ps->tokenlineno, "C code starting on this line is not terminated before " "the end of the file."); ps->errorcnt++; nextcp = cp; } else { nextcp = cp + 1; } } else if (ISALNUM(c)) { /* Identifiers */ while ((c = *cp) != 0 && (ISALNUM(c) || c == '_')) cp++; nextcp = cp; } else if (c == ':' && cp[1] == ':' && cp[2] == '=') { /* The operator "::=" */ cp += 3; nextcp = cp; } else if ((c == '/' || c == '|') && ISALPHA(cp[1])) { cp += 2; while ((c = *cp) != 0 && (ISALNUM(c) || c == '_')) cp++; nextcp = cp; } else { /* All other (one character) operators */ cp++; nextcp = cp; } c = *cp; *cp = 0; /* Null terminate the token */ parseonetoken(ps); /* Parse the token */ *cp = (char)c; /* Restore the buffer */ cp = nextcp; } free(filebuf); /* Release the buffer after parsing */ gp->rule = ps->firstrule; gp->errorcnt = ps->errorcnt; } /*************************** From the file "plink.c" *********************/ /* ** Routines processing configuration follow-set propagation links ** in the LEMON parser generator. */ static struct plink *plink_freelist = 0; /* Allocate a new plink */ struct plink *Plink_new(void) { struct plink *newlink; if (plink_freelist == 0) { int i; int amt = 100; plink_freelist = (struct plink *)calloc(amt, sizeof(struct plink)); if (plink_freelist == 0) { fprintf( stderr, "Unable to allocate memory for a new follow-set propagation link.\n"); exit(1); } for (i = 0; i < amt - 1; i++) plink_freelist[i].next = &plink_freelist[i + 1]; plink_freelist[amt - 1].next = 0; } newlink = plink_freelist; plink_freelist = plink_freelist->next; return newlink; } /* Add a plink to a plink list */ void Plink_add(struct plink **plpp, struct config *cfp) { struct plink *newlink; newlink = Plink_new(); newlink->next = *plpp; *plpp = newlink; newlink->cfp = cfp; } /* Transfer every plink on the list "from" to the list "to" */ void Plink_copy(struct plink **to, struct plink *from) { struct plink *nextpl; while (from) { nextpl = from->next; from->next = *to; *to = from; from = nextpl; } } /* Delete every plink on the list */ void Plink_delete(struct plink *plp) { struct plink *nextpl; while (plp) { nextpl = plp->next; plp->next = plink_freelist; plink_freelist = plp; plp = nextpl; } } /*********************** From the file "report.c" **************************/ /* ** Procedures for generating reports and tables in the LEMON parser generator. */ /* Generate a filename with the given suffix. Space to hold the ** name comes from malloc() and must be freed by the calling ** function. */ PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) { char *name; char *cp; char *filename = lemp->filename; int sz; if (outputDir) { cp = strrchr(filename, '/'); if (cp) filename = cp + 1; } sz = lemonStrlen(filename); sz += lemonStrlen(suffix); if (outputDir) sz += lemonStrlen(outputDir) + 1; sz += 5; name = (char *)malloc(sz); if (name == 0) { fprintf(stderr, "Can't allocate space for a filename.\n"); exit(1); } name[0] = 0; if (outputDir) { lemon_strcpy(name, outputDir); lemon_strcat(name, "/"); } lemon_strcat(name, filename); cp = strrchr(name, '.'); if (cp) *cp = 0; lemon_strcat(name, suffix); return name; } /* Open a file with a name based on the name of the input file, ** but with a different (specified) suffix, and return a pointer ** to the stream */ PRIVATE FILE *file_open(struct lemon *lemp, const char *suffix, const char *mode) { FILE *fp; if (lemp->outname) free(lemp->outname); lemp->outname = file_makename(lemp, suffix); fp = fopen(lemp->outname, mode); if (fp == 0 && *mode == 'w') { fprintf(stderr, "Can't open file \"%s\".\n", lemp->outname); lemp->errorcnt++; return 0; } return fp; } /* Print the text of a rule */ void rule_print(FILE *out, struct rule *rp) { int i, j; fprintf(out, "%s", rp->lhs->name); /* if( rp->lhsalias ) fprintf(out,"(%s)",rp->lhsalias); */ fprintf(out, " ::="); for (i = 0; i < rp->nrhs; i++) { struct symbol *sp = rp->rhs[i]; if (sp->type == MULTITERMINAL) { fprintf(out, " %s", sp->subsym[0]->name); for (j = 1; j < sp->nsubsym; j++) { fprintf(out, "|%s", sp->subsym[j]->name); } } else { fprintf(out, " %s", sp->name); } /* if( rp->rhsalias[i] ) fprintf(out,"(%s)",rp->rhsalias[i]); */ } } /* Duplicate the input file without comments and without actions ** on rules */ void Reprint(struct lemon *lemp) { struct rule *rp; struct symbol *sp; int i, j, maxlen, len, ncolumns, skip; printf("// Reprint of input file \"%s\".\n// Symbols:\n", lemp->filename); maxlen = 10; for (i = 0; i < lemp->nsymbol; i++) { sp = lemp->symbols[i]; len = lemonStrlen(sp->name); if (len > maxlen) maxlen = len; } ncolumns = 76 / (maxlen + 5); if (ncolumns < 1) ncolumns = 1; skip = (lemp->nsymbol + ncolumns - 1) / ncolumns; for (i = 0; i < skip; i++) { printf("//"); for (j = i; j < lemp->nsymbol; j += skip) { sp = lemp->symbols[j]; assert(sp->index == j); printf(" %3d %-*.*s", j, maxlen, maxlen, sp->name); } printf("\n"); } for (rp = lemp->rule; rp; rp = rp->next) { rule_print(stdout, rp); printf("."); if (rp->precsym) printf(" [%s]", rp->precsym->name); /* if( rp->code ) printf("\n %s",rp->code); */ printf("\n"); } } /* Print a single rule. */ void RulePrint(FILE *fp, struct rule *rp, int iCursor) { struct symbol *sp; int i, j; fprintf(fp, "%s ::=", rp->lhs->name); for (i = 0; i <= rp->nrhs; i++) { if (i == iCursor) fprintf(fp, " *"); if (i == rp->nrhs) break; sp = rp->rhs[i]; if (sp->type == MULTITERMINAL) { fprintf(fp, " %s", sp->subsym[0]->name); for (j = 1; j < sp->nsubsym; j++) { fprintf(fp, "|%s", sp->subsym[j]->name); } } else { fprintf(fp, " %s", sp->name); } } } /* Print the rule for a configuration. */ void ConfigPrint(FILE *fp, struct config *cfp) { RulePrint(fp, cfp->rp, cfp->dot); } /* #define TEST */ #if 0 /* Print a set */ PRIVATE void SetPrint(out,set,lemp) FILE *out; char *set; struct lemon *lemp; { int i; char *spacer; spacer = ""; fprintf(out,"%12s[",""); for(i=0; interminal; i++){ if( SetFind(set,i) ){ fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); spacer = " "; } } fprintf(out,"]\n"); } /* Print a plink chain */ PRIVATE void PlinkPrint(out,plp,tag) FILE *out; struct plink *plp; char *tag; { while( plp ){ fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); ConfigPrint(out,plp->cfp); fprintf(out,"\n"); plp = plp->next; } } #endif /* Print an action to the given file descriptor. Return FALSE if ** nothing was actually printed. */ int PrintAction(struct action *ap, /* The action to print */ FILE *fp, /* Print the action here */ int indent /* Indent by this amount */ ) { int result = 1; switch (ap->type) { case SHIFT: { struct state *stp = ap->x.stp; fprintf(fp, "%*s shift %-7d", indent, ap->sp->name, stp->statenum); break; } case REDUCE: { struct rule *rp = ap->x.rp; fprintf(fp, "%*s reduce %-7d", indent, ap->sp->name, rp->iRule); RulePrint(fp, rp, -1); break; } case SHIFTREDUCE: { struct rule *rp = ap->x.rp; fprintf(fp, "%*s shift-reduce %-7d", indent, ap->sp->name, rp->iRule); RulePrint(fp, rp, -1); break; } case ACCEPT: fprintf(fp, "%*s accept", indent, ap->sp->name); break; case ERROR: fprintf(fp, "%*s error", indent, ap->sp->name); break; case SRCONFLICT: case RRCONFLICT: fprintf(fp, "%*s reduce %-7d ** Parsing conflict **", indent, ap->sp->name, ap->x.rp->iRule); break; case SSCONFLICT: fprintf(fp, "%*s shift %-7d ** Parsing conflict **", indent, ap->sp->name, ap->x.stp->statenum); break; case SH_RESOLVED: if (showPrecedenceConflict) { fprintf(fp, "%*s shift %-7d -- dropped by precedence", indent, ap->sp->name, ap->x.stp->statenum); } else { result = 0; } break; case RD_RESOLVED: if (showPrecedenceConflict) { fprintf(fp, "%*s reduce %-7d -- dropped by precedence", indent, ap->sp->name, ap->x.rp->iRule); } else { result = 0; } break; case NOT_USED: result = 0; break; } if (result && ap->spOpt) { fprintf(fp, " /* because %s==%s */", ap->sp->name, ap->spOpt->name); } return result; } /* Generate the "*.out" log file */ void ReportOutput(struct lemon *lemp) { int i, n; struct state *stp; struct config *cfp; struct action *ap; struct rule *rp; FILE *fp; fp = file_open(lemp, ".out", "wb"); if (fp == 0) return; for (i = 0; i < lemp->nxstate; i++) { stp = lemp->sorted[i]; fprintf(fp, "State %d:\n", stp->statenum); if (lemp->basisflag) cfp = stp->bp; else cfp = stp->cfp; while (cfp) { char buf[20]; if (cfp->dot == cfp->rp->nrhs) { lemon_sprintf(buf, "(%d)", cfp->rp->iRule); fprintf(fp, " %5s ", buf); } else { fprintf(fp, " "); } ConfigPrint(fp, cfp); fprintf(fp, "\n"); #if 0 SetPrint(fp,cfp->fws,lemp); PlinkPrint(fp,cfp->fplp,"To "); PlinkPrint(fp,cfp->bplp,"From"); #endif if (lemp->basisflag) cfp = cfp->bp; else cfp = cfp->next; } fprintf(fp, "\n"); for (ap = stp->ap; ap; ap = ap->next) { if (PrintAction(ap, fp, 30)) fprintf(fp, "\n"); } fprintf(fp, "\n"); } fprintf(fp, "----------------------------------------------------\n"); fprintf(fp, "Symbols:\n"); fprintf(fp, "The first-set of non-terminals is shown after the name.\n\n"); for (i = 0; i < lemp->nsymbol; i++) { int j; struct symbol *sp; sp = lemp->symbols[i]; fprintf(fp, " %3d: %s", i, sp->name); if (sp->type == NONTERMINAL) { fprintf(fp, ":"); if (sp->lambda) { fprintf(fp, " "); } for (j = 0; j < lemp->nterminal; j++) { if (sp->firstset && SetFind(sp->firstset, j)) { fprintf(fp, " %s", lemp->symbols[j]->name); } } } if (sp->prec >= 0) fprintf(fp, " (precedence=%d)", sp->prec); fprintf(fp, "\n"); } fprintf(fp, "----------------------------------------------------\n"); fprintf(fp, "Syntax-only Symbols:\n"); fprintf(fp, "The following symbols never carry semantic content.\n\n"); for (i = n = 0; i < lemp->nsymbol; i++) { int w; struct symbol *sp = lemp->symbols[i]; if (sp->bContent) continue; w = (int)strlen(sp->name); if (n > 0 && n + w > 75) { fprintf(fp, "\n"); n = 0; } if (n > 0) { fprintf(fp, " "); n++; } fprintf(fp, "%s", sp->name); n += w; } if (n > 0) fprintf(fp, "\n"); fprintf(fp, "----------------------------------------------------\n"); fprintf(fp, "Rules:\n"); for (rp = lemp->rule; rp; rp = rp->next) { fprintf(fp, "%4d: ", rp->iRule); rule_print(fp, rp); fprintf(fp, "."); if (rp->precsym) { fprintf(fp, " [%s precedence=%d]", rp->precsym->name, rp->precsym->prec); } fprintf(fp, "\n"); } fclose(fp); return; } /* Search for the file "name" which is in the same directory as ** the exacutable */ PRIVATE char *pathsearch(char *argv0, char *name, int modemask) { const char *pathlist; char *pathbufptr = 0; char *pathbuf = 0; char *path, *cp; char c; #ifdef __WIN32__ cp = strrchr(argv0, '\\'); #else cp = strrchr(argv0, '/'); #endif if (cp) { c = *cp; *cp = 0; path = (char *)malloc(lemonStrlen(argv0) + lemonStrlen(name) + 2); if (path) lemon_sprintf(path, "%s/%s", argv0, name); *cp = c; } else { pathlist = getenv("PATH"); if (pathlist == 0) pathlist = ".:/bin:/usr/bin"; pathbuf = (char *)malloc(lemonStrlen(pathlist) + 1); path = (char *)malloc(lemonStrlen(pathlist) + lemonStrlen(name) + 2); if ((pathbuf != 0) && (path != 0)) { pathbufptr = pathbuf; lemon_strcpy(pathbuf, pathlist); while (*pathbuf) { cp = strchr(pathbuf, ':'); if (cp == 0) cp = &pathbuf[lemonStrlen(pathbuf)]; c = *cp; *cp = 0; lemon_sprintf(path, "%s/%s", pathbuf, name); *cp = c; if (c == 0) pathbuf[0] = 0; else pathbuf = &cp[1]; if (access(path, modemask) == 0) break; } } free(pathbufptr); } return path; } /* Given an action, compute the integer value for that action ** which is to be put in the action table of the generated machine. ** Return negative if no action should be generated. */ PRIVATE int compute_action(struct lemon *lemp, struct action *ap) { int act; switch (ap->type) { case SHIFT: act = ap->x.stp->statenum; break; case SHIFTREDUCE: { /* Since a SHIFT is inherient after a prior REDUCE, convert any ** SHIFTREDUCE action with a nonterminal on the LHS into a simple ** REDUCE action: */ if (ap->sp->index >= lemp->nterminal) { act = lemp->minReduce + ap->x.rp->iRule; } else { act = lemp->minShiftReduce + ap->x.rp->iRule; } break; } case REDUCE: act = lemp->minReduce + ap->x.rp->iRule; break; case ERROR: act = lemp->errAction; break; case ACCEPT: act = lemp->accAction; break; default: act = -1; break; } return act; } #define LINESIZE 1000 /* The next cluster of routines are for reading the template file ** and writing the results to the generated parser */ /* The first function transfers data from "in" to "out" until ** a line is seen which begins with "%%". The line number is ** tracked. ** ** if name!=0, then any word that begin with "Parse" is changed to ** begin with *name instead. */ PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) { int i, iStart; char *line = gc(xmalloc(LINESIZE)); while (fgets(line, LINESIZE, in) && (line[0] != '%' || line[1] != '%')) { (*lineno)++; iStart = 0; if (name) { for (i = 0; line[i]; i++) { if (line[i] == 'P' && strncmp(&line[i], "Parse", 5) == 0 && (i == 0 || !ISALPHA(line[i - 1]))) { if (i > iStart) fprintf(out, "%.*s", i - iStart, &line[iStart]); fprintf(out, "%s", name); i += 4; iStart = i + 1; } } } fprintf(out, "%s", &line[iStart]); } } /* Skip forward past the header of the template file to the first "%%" */ PRIVATE void tplt_skip_header(FILE *in, int *lineno) { char *line = gc(xmalloc(LINESIZE)); while (fgets(line, LINESIZE, in) && (line[0] != '%' || line[1] != '%')) { (*lineno)++; } } /* The next function finds the template file and opens it, returning ** a pointer to the opened file. */ PRIVATE FILE *tplt_open(struct lemon *lemp) { static const char templatename[] = "third_party/lemon/lempar.c.txt"; char *buf; FILE *in; char *tpltname; char *toFree = 0; char *cp; buf = gc(xmalloc(1000)); /* first, see if user specified a template filename on the command line. */ if (user_templatename != 0) { if (access(user_templatename, 004) == -1) { fprintf(stderr, "Can't find the parser driver template file \"%s\".\n", user_templatename); lemp->errorcnt++; return 0; } in = fopen(user_templatename, "rb"); if (in == 0) { fprintf(stderr, "Can't open the template file \"%s\".\n", user_templatename); lemp->errorcnt++; return 0; } return in; } cp = strrchr(lemp->filename, '.'); if (cp) { lemon_sprintf(buf, "%.*s.lt", (int)(cp - lemp->filename), lemp->filename); } else { lemon_sprintf(buf, "%s.lt", lemp->filename); } if (access(buf, 004) == 0) { tpltname = buf; } else if (access(templatename, 004) == 0) { tpltname = templatename; } else { toFree = tpltname = pathsearch(lemp->argv0, templatename, 0); } if (tpltname == 0) { fprintf(stderr, "Can't find the parser driver template file \"%s\".\n", templatename); lemp->errorcnt++; return 0; } in = fopen(tpltname, "rb"); if (in == 0) { fprintf(stderr, "Can't open the template file \"%s\".\n", tpltname); lemp->errorcnt++; } free(toFree); return in; } /* Print a #line directive line to the output file. */ PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) { fprintf(out, "#line %d \"", lineno); while (*filename) { if (*filename == '\\') putc('\\', out); putc(*filename, out); filename++; } fprintf(out, "\"\n"); } /* Print a string to the file and keep the linenumber up to date */ PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) { if (str == 0) return; while (*str) { putc(*str, out); if (*str == '\n') (*lineno)++; str++; } if (str[-1] != '\n') { putc('\n', out); (*lineno)++; } if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out, *lineno, lemp->outname); } return; } /* ** The following routine emits code for the destructor for the ** symbol sp */ void emit_destructor_code(FILE *out, struct symbol *sp, struct lemon *lemp, int *lineno) { char *cp = 0; if (sp->type == TERMINAL) { cp = lemp->tokendest; if (cp == 0) return; fprintf(out, "{\n"); (*lineno)++; } else if (sp->destructor) { cp = sp->destructor; fprintf(out, "{\n"); (*lineno)++; if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out, sp->destLineno, lemp->filename); } } else if (lemp->vardest) { cp = lemp->vardest; if (cp == 0) return; fprintf(out, "{\n"); (*lineno)++; } else { assert(0); /* Cannot happen */ } for (; *cp; cp++) { if (*cp == '$' && cp[1] == '$') { fprintf(out, "(yypminor->yy%d)", sp->dtnum); cp++; continue; } if (*cp == '\n') (*lineno)++; fputc(*cp, out); } fprintf(out, "\n"); (*lineno)++; if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out, *lineno, lemp->outname); } fprintf(out, "}\n"); (*lineno)++; return; } /* ** Return TRUE (non-zero) if the given symbol has a destructor. */ int has_destructor(struct symbol *sp, struct lemon *lemp) { int ret; if (sp->type == TERMINAL) { ret = lemp->tokendest != 0; } else { ret = lemp->vardest != 0 || sp->destructor != 0; } return ret; } /* ** Append text to a dynamically allocated string. If zText is 0 then ** reset the string to be empty again. Always return the complete text ** of the string (which is overwritten with each call). ** ** n bytes of zText are stored. If n==0 then all of zText up to the first ** \000 terminator is stored. zText can contain up to two instances of ** %d. The values of p1 and p2 are written into the first and second ** %d. ** ** If n==-1, then the previous character is overwritten. */ PRIVATE char *append_str(const char *zText, int n, int p1, int p2) { static char empty[1] = {0}; static char *z = 0; static int alloced = 0; static int used = 0; int c; char zInt[40]; if (zText == 0) { if (used == 0 && z != 0) z[0] = 0; used = 0; return z; } if (n <= 0) { if (n < 0) { used += n; assert(used >= 0); } n = lemonStrlen(zText); } if ((int)(n + sizeof(zInt) * 2 + used) >= alloced) { alloced = n + sizeof(zInt) * 2 + used + 200; z = (char *)realloc(z, alloced); } if (z == 0) return empty; while (n-- > 0) { c = *(zText++); if (c == '%' && n > 0 && zText[0] == 'd') { lemon_sprintf(zInt, "%d", p1); p1 = p2; lemon_strcpy(&z[used], zInt); used += lemonStrlen(&z[used]); zText++; n--; } else { z[used++] = (char)c; } } z[used] = 0; return z; } /* ** Write and transform the rp->code string so that symbols are expanded. ** Populate the rp->codePrefix and rp->codeSuffix strings, as appropriate. ** ** Return 1 if the expanded code requires that "yylhsminor" local variable ** to be defined. */ PRIVATE int translate_code(struct lemon *lemp, struct rule *rp) { char *cp, *xp; int i; int rc = 0; /* True if yylhsminor is used */ int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ char lhsused = 0; /* True if the LHS element has been used */ char lhsdirect; /* True if LHS writes directly into stack */ char used[MAXRHS]; /* True for each RHS element which is used */ char zLhs[50]; /* Convert the LHS symbol into this string */ char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ for (i = 0; i < rp->nrhs; i++) used[i] = 0; lhsused = 0; if (rp->code == 0) { static const char newlinestr[2] = {'\n', '\0'}; rp->code = newlinestr; rp->line = rp->ruleline; rp->noCode = 1; } else { rp->noCode = 0; } if (rp->nrhs == 0) { /* If there are no RHS symbols, then writing directly to the LHS is ok */ lhsdirect = 1; } else if (rp->rhsalias[0] == 0) { /* The left-most RHS symbol has no value. LHS direct is ok. But ** we have to call the distructor on the RHS symbol first. */ lhsdirect = 1; if (has_destructor(rp->rhs[0], lemp)) { append_str(0, 0, 0, 0); append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, rp->rhs[0]->index, 1 - rp->nrhs); rp->codePrefix = Strsafe(append_str(0, 0, 0, 0)); rp->noCode = 0; } } else if (rp->lhsalias == 0) { /* There is no LHS value symbol. */ lhsdirect = 1; } else if (strcmp(rp->lhsalias, rp->rhsalias[0]) == 0) { /* The LHS symbol and the left-most RHS symbol are the same, so ** direct writing is allowed */ lhsdirect = 1; lhsused = 1; used[0] = 1; if (rp->lhs->dtnum != rp->rhs[0]->dtnum) { ErrorMsg(lemp->filename, rp->ruleline, "%s(%s) and %s(%s) share the same label but have " "different datatypes.", rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); lemp->errorcnt++; } } else { lemon_sprintf(zOvwrt, "/*%s-overwrites-%s*/", rp->lhsalias, rp->rhsalias[0]); zSkip = strstr(rp->code, zOvwrt); if (zSkip != 0) { /* The code contains a special comment that indicates that it is safe ** for the LHS label to overwrite left-most RHS label. */ lhsdirect = 1; } else { lhsdirect = 0; } } if (lhsdirect) { sprintf(zLhs, "yymsp[%d].minor.yy%d", 1 - rp->nrhs, rp->lhs->dtnum); } else { rc = 1; sprintf(zLhs, "yylhsminor.yy%d", rp->lhs->dtnum); } append_str(0, 0, 0, 0); /* This const cast is wrong but harmless, if we're careful. */ for (cp = (char *)rp->code; *cp; cp++) { if (cp == zSkip) { append_str(zOvwrt, 0, 0, 0); cp += lemonStrlen(zOvwrt) - 1; dontUseRhs0 = 1; continue; } if (ISALPHA(*cp) && (cp == rp->code || (!ISALNUM(cp[-1]) && cp[-1] != '_'))) { char saved; for (xp = &cp[1]; ISALNUM(*xp) || *xp == '_'; xp++) ; saved = *xp; *xp = 0; if (rp->lhsalias && strcmp(cp, rp->lhsalias) == 0) { append_str(zLhs, 0, 0, 0); cp = xp; lhsused = 1; } else { for (i = 0; i < rp->nrhs; i++) { if (rp->rhsalias[i] && strcmp(cp, rp->rhsalias[i]) == 0) { if (i == 0 && dontUseRhs0) { ErrorMsg(lemp->filename, rp->ruleline, "Label %s used after '%s'.", rp->rhsalias[0], zOvwrt); lemp->errorcnt++; } else if (cp != rp->code && cp[-1] == '@') { /* If the argument is of the form @X then substituted ** the token number of X, not the value of X */ append_str("yymsp[%d].major", -1, i - rp->nrhs + 1, 0); } else { struct symbol *sp = rp->rhs[i]; int dtnum; if (sp->type == MULTITERMINAL) { dtnum = sp->subsym[0]->dtnum; } else { dtnum = sp->dtnum; } append_str("yymsp[%d].minor.yy%d", 0, i - rp->nrhs + 1, dtnum); } cp = xp; used[i] = 1; break; } } } *xp = saved; } append_str(cp, 1, 0, 0); } /* End loop */ /* Main code generation completed */ cp = append_str(0, 0, 0, 0); if (cp && cp[0]) rp->code = Strsafe(cp); append_str(0, 0, 0, 0); /* Check to make sure the LHS has been used */ if (rp->lhsalias && !lhsused) { ErrorMsg(lemp->filename, rp->ruleline, "Label \"%s\" for \"%s(%s)\" is never used.", rp->lhsalias, rp->lhs->name, rp->lhsalias); lemp->errorcnt++; } /* Generate destructor code for RHS minor values which are not referenced. ** Generate error messages for unused labels and duplicate labels. */ for (i = 0; i < rp->nrhs; i++) { if (rp->rhsalias[i]) { if (i > 0) { int j; if (rp->lhsalias && strcmp(rp->lhsalias, rp->rhsalias[i]) == 0) { ErrorMsg( lemp->filename, rp->ruleline, "%s(%s) has the same label as the LHS but is not the left-most " "symbol on the RHS.", rp->rhs[i]->name, rp->rhsalias[i]); lemp->errorcnt++; } for (j = 0; j < i; j++) { if (rp->rhsalias[j] && strcmp(rp->rhsalias[j], rp->rhsalias[i]) == 0) { ErrorMsg(lemp->filename, rp->ruleline, "Label %s used for multiple symbols on the RHS of a rule.", rp->rhsalias[i]); lemp->errorcnt++; break; } } } if (!used[i]) { ErrorMsg(lemp->filename, rp->ruleline, "Label %s for \"%s(%s)\" is never used.", rp->rhsalias[i], rp->rhs[i]->name, rp->rhsalias[i]); lemp->errorcnt++; } } else if (i > 0 && has_destructor(rp->rhs[i], lemp)) { append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, rp->rhs[i]->index, i - rp->nrhs + 1); } } /* If unable to write LHS values directly into the stack, write the ** saved LHS value now. */ if (lhsdirect == 0) { append_str(" yymsp[%d].minor.yy%d = ", 0, 1 - rp->nrhs, rp->lhs->dtnum); append_str(zLhs, 0, 0, 0); append_str(";\n", 0, 0, 0); } /* Suffix code generation complete */ cp = append_str(0, 0, 0, 0); if (cp && cp[0]) { rp->codeSuffix = Strsafe(cp); rp->noCode = 0; } return rc; } /* ** Generate code which executes when the rule "rp" is reduced. Write ** the code to "out". Make sure lineno stays up-to-date. */ PRIVATE void emit_code(FILE *out, struct rule *rp, struct lemon *lemp, int *lineno) { const char *cp; /* Setup code prior to the #line directive */ if (rp->codePrefix && rp->codePrefix[0]) { fprintf(out, "{%s", rp->codePrefix); for (cp = rp->codePrefix; *cp; cp++) { if (*cp == '\n') (*lineno)++; } } /* Generate code to do the reduce action */ if (rp->code) { if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out, rp->line, lemp->filename); } fprintf(out, "{%s", rp->code); for (cp = rp->code; *cp; cp++) { if (*cp == '\n') (*lineno)++; } fprintf(out, "}\n"); (*lineno)++; if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out, *lineno, lemp->outname); } } /* Generate breakdown code that occurs after the #line directive */ if (rp->codeSuffix && rp->codeSuffix[0]) { fprintf(out, "%s", rp->codeSuffix); for (cp = rp->codeSuffix; *cp; cp++) { if (*cp == '\n') (*lineno)++; } } if (rp->codePrefix) { fprintf(out, "}\n"); (*lineno)++; } return; } /* ** Print the definition of the union used for the parser's data stack. ** This union contains fields for every possible data type for tokens ** and nonterminals. In the process of computing and printing this ** union, also set the ".dtnum" field of every terminal and nonterminal ** symbol. */ void print_stack_union( FILE *out, /* The output stream */ struct lemon *lemp, /* The main info structure for this parser */ int *plineno, /* Pointer to the line number */ int mhflag /* True if generating makeheaders output */ ) { int lineno = *plineno; /* The line number of the output */ char **types; /* A hash table of datatypes */ int arraysize; /* Size of the "types" array */ int maxdtlength; /* Maximum length of any ".datatype" field. */ char *stddt; /* Standardized name for a datatype */ int i, j; /* Loop counters */ unsigned hash; /* For hashing the name of a type */ const char *name; /* Name of the parser */ /* Allocate and initialize types[] and allocate stddt[] */ arraysize = lemp->nsymbol * 2; types = (char **)calloc(arraysize, sizeof(char *)); if (types == 0) { fprintf(stderr, "Out of memory.\n"); exit(1); } for (i = 0; i < arraysize; i++) types[i] = 0; maxdtlength = 0; if (lemp->vartype) { maxdtlength = lemonStrlen(lemp->vartype); } for (i = 0; i < lemp->nsymbol; i++) { int len; struct symbol *sp = lemp->symbols[i]; if (sp->datatype == 0) continue; len = lemonStrlen(sp->datatype); if (len > maxdtlength) maxdtlength = len; } stddt = (char *)malloc(maxdtlength * 2 + 1); if (stddt == 0) { fprintf(stderr, "Out of memory.\n"); exit(1); } /* Build a hash table of datatypes. The ".dtnum" field of each symbol ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is ** used for terminal symbols. If there is no %default_type defined then ** 0 is also used as the .dtnum value for nonterminals which do not specify ** a datatype using the %type directive. */ for (i = 0; i < lemp->nsymbol; i++) { struct symbol *sp = lemp->symbols[i]; char *cp; if (sp == lemp->errsym) { sp->dtnum = arraysize + 1; continue; } if (sp->type != NONTERMINAL || (sp->datatype == 0 && lemp->vartype == 0)) { sp->dtnum = 0; continue; } cp = sp->datatype; if (cp == 0) cp = lemp->vartype; j = 0; while (ISSPACE(*cp)) cp++; while (*cp) stddt[j++] = *cp++; while (j > 0 && ISSPACE(stddt[j - 1])) j--; stddt[j] = 0; if (lemp->tokentype && strcmp(stddt, lemp->tokentype) == 0) { sp->dtnum = 0; continue; } hash = 0; for (j = 0; stddt[j]; j++) { hash = hash * 53 + stddt[j]; } hash = (hash & 0x7fffffff) % arraysize; while (types[hash]) { if (strcmp(types[hash], stddt) == 0) { sp->dtnum = hash + 1; break; } hash++; if (hash >= (unsigned)arraysize) hash = 0; } if (types[hash] == 0) { sp->dtnum = hash + 1; types[hash] = (char *)malloc(lemonStrlen(stddt) + 1); if (types[hash] == 0) { fprintf(stderr, "Out of memory.\n"); exit(1); } lemon_strcpy(types[hash], stddt); } } /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ name = lemp->name ? lemp->name : "Parse"; lineno = *plineno; if (mhflag) { fprintf(out, "#if INTERFACE\n"); lineno++; } fprintf(out, "#define %sTOKENTYPE %s\n", name, lemp->tokentype ? lemp->tokentype : "void*"); lineno++; if (mhflag) { fprintf(out, "#endif\n"); lineno++; } fprintf(out, "typedef union {\n"); lineno++; fprintf(out, " int yyinit;\n"); lineno++; fprintf(out, " %sTOKENTYPE yy0;\n", name); lineno++; for (i = 0; i < arraysize; i++) { if (types[i] == 0) continue; fprintf(out, " %s yy%d;\n", types[i], i + 1); lineno++; free(types[i]); } if (lemp->errsym && lemp->errsym->useCnt) { fprintf(out, " int yy%d;\n", lemp->errsym->dtnum); lineno++; } free(stddt); free(types); fprintf(out, "} YYMINORTYPE;\n"); lineno++; *plineno = lineno; } /* ** Return the name of a C datatype able to represent values between ** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof ** for that type (1, 2, or 4) into *pnByte. */ static const char *minimum_size_type(int lwr, int upr, int *pnByte) { const char *zType = "int"; int nByte = 4; if (lwr >= 0) { if (upr <= 255) { zType = "unsigned char"; nByte = 1; } else if (upr < 65535) { zType = "unsigned short int"; nByte = 2; } else { zType = "unsigned int"; nByte = 4; } } else if (lwr >= -127 && upr <= 127) { zType = "signed char"; nByte = 1; } else if (lwr >= -32767 && upr < 32767) { zType = "short"; nByte = 2; } if (pnByte) *pnByte = nByte; return zType; } /* ** Each state contains a set of token transaction and a set of ** nonterminal transactions. Each of these sets makes an instance ** of the following structure. An array of these structures is used ** to order the creation of entries in the yy_action[] table. */ struct axset { struct state *stp; /* A pointer to a state */ int isTkn; /* True to use tokens. False for non-terminals */ int nAction; /* Number of actions */ int iOrder; /* Original order of action sets */ }; /* ** Compare to axset structures for sorting purposes */ static int axset_compare(const void *a, const void *b) { struct axset *p1 = (struct axset *)a; struct axset *p2 = (struct axset *)b; int c; c = p2->nAction - p1->nAction; if (c == 0) { c = p1->iOrder - p2->iOrder; } assert(c != 0 || p1 == p2); return c; } /* ** Write text on "out" that describes the rule "rp". */ static void writeRuleText(FILE *out, struct rule *rp) { int j; fprintf(out, "%s ::=", rp->lhs->name); for (j = 0; j < rp->nrhs; j++) { struct symbol *sp = rp->rhs[j]; if (sp->type != MULTITERMINAL) { fprintf(out, " %s", sp->name); } else { int k; fprintf(out, " %s", sp->subsym[0]->name); for (k = 1; k < sp->nsubsym; k++) { fprintf(out, "|%s", sp->subsym[k]->name); } } } } /* Generate C source code for the parser */ void ReportTable(struct lemon *lemp, int mhflag, /* Output in makeheaders format if true */ int sqlFlag /* Generate the *.sql file too */ ) { FILE *out, *in, *sql; char *line = gc(xmalloc(LINESIZE)); int lineno; struct state *stp; struct action *ap; struct rule *rp; struct acttab *pActtab; int i, j, n, sz; int nLookAhead; int szActionType; /* sizeof(YYACTIONTYPE) */ int szCodeType; /* sizeof(YYCODETYPE) */ const char *name; int mnTknOfst, mxTknOfst; int mnNtOfst, mxNtOfst; struct axset *ax; char *prefix; lemp->minShiftReduce = lemp->nstate; lemp->errAction = lemp->minShiftReduce + lemp->nrule; lemp->accAction = lemp->errAction + 1; lemp->noAction = lemp->accAction + 1; lemp->minReduce = lemp->noAction + 1; lemp->maxAction = lemp->minReduce + lemp->nrule; in = tplt_open(lemp); if (in == 0) return; out = file_open(lemp, ".c.inc", "wb"); if (out == 0) { fclose(in); return; } if (sqlFlag == 0) { sql = 0; } else { sql = file_open(lemp, ".sql", "wb"); if (sql == 0) { fclose(in); fclose(out); return; } fprintf(sql, "BEGIN;\n" "CREATE TABLE symbol(\n" " id INTEGER PRIMARY KEY,\n" " name TEXT NOT NULL,\n" " isTerminal BOOLEAN NOT NULL,\n" " fallback INTEGER REFERENCES symbol" " DEFERRABLE INITIALLY DEFERRED\n" ");\n"); for (i = 0; i < lemp->nsymbol; i++) { fprintf(sql, "INSERT INTO symbol(id,name,isTerminal,fallback)" "VALUES(%d,'%s',%s", i, lemp->symbols[i]->name, i < lemp->nterminal ? "TRUE" : "FALSE"); if (lemp->symbols[i]->fallback) { fprintf(sql, ",%d);\n", lemp->symbols[i]->fallback->index); } else { fprintf(sql, ",NULL);\n"); } } fprintf(sql, "CREATE TABLE rule(\n" " ruleid INTEGER PRIMARY KEY,\n" " lhs INTEGER REFERENCES symbol(id),\n" " txt TEXT\n" ");\n" "CREATE TABLE rulerhs(\n" " ruleid INTEGER REFERENCES rule(ruleid),\n" " pos INTEGER,\n" " sym INTEGER REFERENCES symbol(id)\n" ");\n"); for (i = 0, rp = lemp->rule; rp; rp = rp->next, i++) { assert(i == rp->iRule); fprintf(sql, "INSERT INTO rule(ruleid,lhs,txt)VALUES(%d,%d,'", rp->iRule, rp->lhs->index); writeRuleText(sql, rp); fprintf(sql, "');\n"); for (j = 0; j < rp->nrhs; j++) { struct symbol *sp = rp->rhs[j]; if (sp->type != MULTITERMINAL) { fprintf(sql, "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n", i, j, sp->index); } else { int k; for (k = 0; k < sp->nsubsym; k++) { fprintf(sql, "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n", i, j, sp->subsym[k]->index); } } } } fprintf(sql, "COMMIT;\n"); } lineno = 1; fprintf( out, "/* This file is automatically generated by Lemon from input grammar\n" "** source file \"%s\". */\n", lemp->filename); lineno += 2; /* The first %include directive begins with a C-language comment, ** then skip over the header comment of the template file */ if (lemp->include == 0) lemp->include = ""; for (i = 0; ISSPACE(lemp->include[i]); i++) { if (lemp->include[i] == '\n') { lemp->include += i + 1; i = -1; } } if (lemp->include[0] == '/') { tplt_skip_header(in, &lineno); } else { tplt_xfer(lemp->name, in, out, &lineno); } /* Generate the include code, if any */ tplt_print(out, lemp, lemp->include, &lineno); if (mhflag) { char *incName = file_makename(lemp, ".h.inc"); fprintf(out, "#include \"%s\"\n", incName); lineno++; free(incName); } tplt_xfer(lemp->name, in, out, &lineno); /* Generate #defines for all tokens */ if (lemp->tokenprefix) prefix = lemp->tokenprefix; else prefix = ""; if (mhflag) { fprintf(out, "#if INTERFACE\n"); lineno++; } else { fprintf(out, "#ifndef %s%s\n", prefix, lemp->symbols[1]->name); } for (i = 1; i < lemp->nterminal; i++) { fprintf(out, "#define %s%-30s %2d\n", prefix, lemp->symbols[i]->name, i); lineno++; } fprintf(out, "#endif\n"); lineno++; tplt_xfer(lemp->name, in, out, &lineno); /* Generate the defines */ fprintf(out, "#define YYCODETYPE %s\n", minimum_size_type(0, lemp->nsymbol, &szCodeType)); lineno++; fprintf(out, "#define YYNOCODE %d\n", lemp->nsymbol); lineno++; fprintf(out, "#define YYACTIONTYPE %s\n", minimum_size_type(0, lemp->maxAction, &szActionType)); lineno++; if (lemp->wildcard) { fprintf(out, "#define YYWILDCARD %d\n", lemp->wildcard->index); lineno++; } print_stack_union(out, lemp, &lineno, mhflag); fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; if (lemp->stacksize) { fprintf(out, "#define YYSTACKDEPTH %s\n", lemp->stacksize); lineno++; } else { fprintf(out, "#define YYSTACKDEPTH 100\n"); lineno++; } fprintf(out, "#endif\n"); lineno++; if (mhflag) { fprintf(out, "#if INTERFACE\n"); lineno++; } name = lemp->name ? lemp->name : "Parse"; if (lemp->arg && lemp->arg[0]) { i = lemonStrlen(lemp->arg); while (i >= 1 && ISSPACE(lemp->arg[i - 1])) i--; while (i >= 1 && (ISALNUM(lemp->arg[i - 1]) || lemp->arg[i - 1] == '_')) i--; fprintf(out, "#define %sARG_SDECL %s;\n", name, lemp->arg); lineno++; fprintf(out, "#define %sARG_PDECL ,%s\n", name, lemp->arg); lineno++; fprintf(out, "#define %sARG_PARAM ,%s\n", name, &lemp->arg[i]); lineno++; fprintf(out, "#define %sARG_FETCH %s=yypParser->%s;\n", name, lemp->arg, &lemp->arg[i]); lineno++; fprintf(out, "#define %sARG_STORE yypParser->%s=%s;\n", name, &lemp->arg[i], &lemp->arg[i]); lineno++; } else { fprintf(out, "#define %sARG_SDECL\n", name); lineno++; fprintf(out, "#define %sARG_PDECL\n", name); lineno++; fprintf(out, "#define %sARG_PARAM\n", name); lineno++; fprintf(out, "#define %sARG_FETCH\n", name); lineno++; fprintf(out, "#define %sARG_STORE\n", name); lineno++; } if (lemp->ctx && lemp->ctx[0]) { i = lemonStrlen(lemp->ctx); while (i >= 1 && ISSPACE(lemp->ctx[i - 1])) i--; while (i >= 1 && (ISALNUM(lemp->ctx[i - 1]) || lemp->ctx[i - 1] == '_')) i--; fprintf(out, "#define %sCTX_SDECL %s;\n", name, lemp->ctx); lineno++; fprintf(out, "#define %sCTX_PDECL ,%s\n", name, lemp->ctx); lineno++; fprintf(out, "#define %sCTX_PARAM ,%s\n", name, &lemp->ctx[i]); lineno++; fprintf(out, "#define %sCTX_FETCH %s=yypParser->%s;\n", name, lemp->ctx, &lemp->ctx[i]); lineno++; fprintf(out, "#define %sCTX_STORE yypParser->%s=%s;\n", name, &lemp->ctx[i], &lemp->ctx[i]); lineno++; } else { fprintf(out, "#define %sCTX_SDECL\n", name); lineno++; fprintf(out, "#define %sCTX_PDECL\n", name); lineno++; fprintf(out, "#define %sCTX_PARAM\n", name); lineno++; fprintf(out, "#define %sCTX_FETCH\n", name); lineno++; fprintf(out, "#define %sCTX_STORE\n", name); lineno++; } if (mhflag) { fprintf(out, "#endif\n"); lineno++; } if (lemp->errsym && lemp->errsym->useCnt) { fprintf(out, "#define YYERRORSYMBOL %d\n", lemp->errsym->index); lineno++; fprintf(out, "#define YYERRSYMDT yy%d\n", lemp->errsym->dtnum); lineno++; } if (lemp->has_fallback) { fprintf(out, "#define YYFALLBACK 1\n"); lineno++; } /* Compute the action table, but do not output it yet. The action ** table must be computed before generating the YYNSTATE macro because ** we need to know how many states can be eliminated. */ ax = (struct axset *)calloc(lemp->nxstate * 2, sizeof(ax[0])); if (ax == 0) { fprintf(stderr, "malloc failed\n"); exit(1); } for (i = 0; i < lemp->nxstate; i++) { stp = lemp->sorted[i]; ax[i * 2].stp = stp; ax[i * 2].isTkn = 1; ax[i * 2].nAction = stp->nTknAct; ax[i * 2 + 1].stp = stp; ax[i * 2 + 1].isTkn = 0; ax[i * 2 + 1].nAction = stp->nNtAct; } mxTknOfst = mnTknOfst = 0; mxNtOfst = mnNtOfst = 0; /* In an effort to minimize the action table size, use the heuristic ** of placing the largest action sets first */ for (i = 0; i < lemp->nxstate * 2; i++) ax[i].iOrder = i; qsort(ax, lemp->nxstate * 2, sizeof(ax[0]), axset_compare); pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); for (i = 0; i < lemp->nxstate * 2 && ax[i].nAction > 0; i++) { stp = ax[i].stp; if (ax[i].isTkn) { for (ap = stp->ap; ap; ap = ap->next) { int action; if (ap->sp->index >= lemp->nterminal) continue; action = compute_action(lemp, ap); if (action < 0) continue; acttab_action(pActtab, ap->sp->index, action); } stp->iTknOfst = acttab_insert(pActtab, 1); if (stp->iTknOfst < mnTknOfst) mnTknOfst = stp->iTknOfst; if (stp->iTknOfst > mxTknOfst) mxTknOfst = stp->iTknOfst; } else { for (ap = stp->ap; ap; ap = ap->next) { int action; if (ap->sp->index < lemp->nterminal) continue; if (ap->sp->index == lemp->nsymbol) continue; action = compute_action(lemp, ap); if (action < 0) continue; acttab_action(pActtab, ap->sp->index, action); } stp->iNtOfst = acttab_insert(pActtab, 0); if (stp->iNtOfst < mnNtOfst) mnNtOfst = stp->iNtOfst; if (stp->iNtOfst > mxNtOfst) mxNtOfst = stp->iNtOfst; } #if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ { int jj, nn; for(jj=nn=0; jjnAction; jj++){ if( pActtab->aAction[jj].action<0 ) nn++; } printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", ax[i].nAction, pActtab->nAction, nn); } #endif } free(ax); /* Mark rules that are actually used for reduce actions after all ** optimizations have been applied */ for (rp = lemp->rule; rp; rp = rp->next) rp->doesReduce = LEMON_FALSE; for (i = 0; i < lemp->nxstate; i++) { for (ap = lemp->sorted[i]->ap; ap; ap = ap->next) { if (ap->type == REDUCE || ap->type == SHIFTREDUCE) { ap->x.rp->doesReduce = 1; } } } /* Finish rendering the constants now that the action table has ** been computed */ fprintf(out, "#define YYNSTATE %d\n", lemp->nxstate); lineno++; fprintf(out, "#define YYNRULE %d\n", lemp->nrule); lineno++; fprintf(out, "#define YYNRULE_WITH_ACTION %d\n", lemp->nruleWithAction); lineno++; fprintf(out, "#define YYNTOKEN %d\n", lemp->nterminal); lineno++; fprintf(out, "#define YY_MAX_SHIFT %d\n", lemp->nxstate - 1); lineno++; i = lemp->minShiftReduce; fprintf(out, "#define YY_MIN_SHIFTREDUCE %d\n", i); lineno++; i += lemp->nrule; fprintf(out, "#define YY_MAX_SHIFTREDUCE %d\n", i - 1); lineno++; fprintf(out, "#define YY_ERROR_ACTION %d\n", lemp->errAction); lineno++; fprintf(out, "#define YY_ACCEPT_ACTION %d\n", lemp->accAction); lineno++; fprintf(out, "#define YY_NO_ACTION %d\n", lemp->noAction); lineno++; fprintf(out, "#define YY_MIN_REDUCE %d\n", lemp->minReduce); lineno++; i = lemp->minReduce + lemp->nrule; fprintf(out, "#define YY_MAX_REDUCE %d\n", i - 1); lineno++; tplt_xfer(lemp->name, in, out, &lineno); /* Now output the action table and its associates: ** ** yy_action[] A single table containing all actions. ** yy_lookahead[] A table containing the lookahead for each entry in ** yy_action. Used to detect hash collisions. ** yy_shift_ofst[] For each state, the offset into yy_action for ** shifting terminals. ** yy_reduce_ofst[] For each state, the offset into yy_action for ** shifting non-terminals after a reduce. ** yy_default[] Default action for each state. */ /* Output the yy_action table */ lemp->nactiontab = n = acttab_action_size(pActtab); lemp->tablesize += n * szActionType; fprintf(out, "#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; fprintf(out, "static const YYACTIONTYPE yy_action[] = {\n"); lineno++; for (i = j = 0; i < n; i++) { int action = acttab_yyaction(pActtab, i); if (action < 0) action = lemp->noAction; if (j == 0) fprintf(out, " /* %5d */ ", i); fprintf(out, " %4d,", action); if (j == 9 || i == n - 1) { fprintf(out, "\n"); lineno++; j = 0; } else { j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_lookahead table */ lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab); lemp->tablesize += n * szCodeType; fprintf(out, "static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; for (i = j = 0; i < n; i++) { int la = acttab_yylookahead(pActtab, i); if (la < 0) la = lemp->nsymbol; if (j == 0) fprintf(out, " /* %5d */ ", i); fprintf(out, " %4d,", la); if (j == 9) { fprintf(out, "\n"); lineno++; j = 0; } else { j++; } } /* Add extra entries to the end of the yy_lookahead[] table so that ** yy_shift_ofst[]+iToken will always be a valid index into the array, ** even for the largest possible value of yy_shift_ofst[] and iToken. */ nLookAhead = lemp->nterminal + lemp->nactiontab; while (i < nLookAhead) { if (j == 0) fprintf(out, " /* %5d */ ", i); fprintf(out, " %4d,", lemp->nterminal); if (j == 9) { fprintf(out, "\n"); lineno++; j = 0; } else { j++; } i++; } if (j > 0) { fprintf(out, "\n"); lineno++; } fprintf(out, "};\n"); lineno++; /* Output the yy_shift_ofst[] table */ n = lemp->nxstate; while (n > 0 && lemp->sorted[n - 1]->iTknOfst == NO_OFFSET) n--; fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n - 1); lineno++; fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; fprintf( out, "static const %s yy_shift_ofst[] = {\n", minimum_size_type(mnTknOfst, lemp->nterminal + lemp->nactiontab, &sz)); lineno++; lemp->tablesize += n * sz; for (i = j = 0; i < n; i++) { int ofst; stp = lemp->sorted[i]; ofst = stp->iTknOfst; if (ofst == NO_OFFSET) ofst = lemp->nactiontab; if (j == 0) fprintf(out, " /* %5d */ ", i); fprintf(out, " %4d,", ofst); if (j == 9 || i == n - 1) { fprintf(out, "\n"); lineno++; j = 0; } else { j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ n = lemp->nxstate; while (n > 0 && lemp->sorted[n - 1]->iNtOfst == NO_OFFSET) n--; fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n - 1); lineno++; fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; fprintf(out, "static const %s yy_reduce_ofst[] = {\n", minimum_size_type(mnNtOfst - 1, mxNtOfst, &sz)); lineno++; lemp->tablesize += n * sz; for (i = j = 0; i < n; i++) { int ofst; stp = lemp->sorted[i]; ofst = stp->iNtOfst; if (ofst == NO_OFFSET) ofst = mnNtOfst - 1; if (j == 0) fprintf(out, " /* %5d */ ", i); fprintf(out, " %4d,", ofst); if (j == 9 || i == n - 1) { fprintf(out, "\n"); lineno++; j = 0; } else { j++; } } fprintf(out, "};\n"); lineno++; /* Output the default action table */ fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; n = lemp->nxstate; lemp->tablesize += n * szActionType; for (i = j = 0; i < n; i++) { stp = lemp->sorted[i]; if (j == 0) fprintf(out, " /* %5d */ ", i); if (stp->iDfltReduce < 0) { fprintf(out, " %4d,", lemp->errAction); } else { fprintf(out, " %4d,", stp->iDfltReduce + lemp->minReduce); } if (j == 9 || i == n - 1) { fprintf(out, "\n"); lineno++; j = 0; } else { j++; } } fprintf(out, "};\n"); lineno++; tplt_xfer(lemp->name, in, out, &lineno); /* Generate the table of fallback tokens. */ if (lemp->has_fallback) { int mx = lemp->nterminal - 1; /* 2019-08-28: Generate fallback entries for every token to avoid ** having to do a range check on the index */ /* while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } */ lemp->tablesize += (mx + 1) * szCodeType; for (i = 0; i <= mx; i++) { struct symbol *p = lemp->symbols[i]; if (p->fallback == 0) { fprintf(out, " 0, /* %10s => nothing */\n", p->name); } else { fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, p->name, p->fallback->name); } lineno++; } } tplt_xfer(lemp->name, in, out, &lineno); /* Generate a table containing the symbolic name of every symbol */ for (i = 0; i < lemp->nsymbol; i++) { lemon_sprintf(line, "\"%s\",", lemp->symbols[i]->name); fprintf(out, " /* %4d */ \"%s\",\n", i, lemp->symbols[i]->name); lineno++; } tplt_xfer(lemp->name, in, out, &lineno); /* Generate a table containing a text string that describes every ** rule in the rule set of the grammar. This information is used ** when tracing REDUCE actions. */ for (i = 0, rp = lemp->rule; rp; rp = rp->next, i++) { assert(rp->iRule == i); fprintf(out, " /* %3d */ \"", i); writeRuleText(out, rp); fprintf(out, "\",\n"); lineno++; } tplt_xfer(lemp->name, in, out, &lineno); /* Generate code which executes every time a symbol is popped from ** the stack while processing errors or while destroying the parser. ** (In other words, generate the %destructor actions) */ if (lemp->tokendest) { int once = 1; for (i = 0; i < lemp->nsymbol; i++) { struct symbol *sp = lemp->symbols[i]; if (sp == 0 || sp->type != TERMINAL) continue; if (once) { fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; once = 0; } fprintf(out, " case %d: /* %s */\n", sp->index, sp->name); lineno++; } for (i = 0; i < lemp->nsymbol && lemp->symbols[i]->type != TERMINAL; i++) ; if (i < lemp->nsymbol) { emit_destructor_code(out, lemp->symbols[i], lemp, &lineno); fprintf(out, " break;\n"); lineno++; } } if (lemp->vardest) { struct symbol *dflt_sp = 0; int once = 1; for (i = 0; i < lemp->nsymbol; i++) { struct symbol *sp = lemp->symbols[i]; if (sp == 0 || sp->type == TERMINAL || sp->index <= 0 || sp->destructor != 0) continue; if (once) { fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; once = 0; } fprintf(out, " case %d: /* %s */\n", sp->index, sp->name); lineno++; dflt_sp = sp; } if (dflt_sp != 0) { emit_destructor_code(out, dflt_sp, lemp, &lineno); } fprintf(out, " break;\n"); lineno++; } for (i = 0; i < lemp->nsymbol; i++) { struct symbol *sp = lemp->symbols[i]; if (sp == 0 || sp->type == TERMINAL || sp->destructor == 0) continue; if (sp->destLineno < 0) continue; /* Already emitted */ fprintf(out, " case %d: /* %s */\n", sp->index, sp->name); lineno++; /* Combine duplicate destructors into a single case */ for (j = i + 1; j < lemp->nsymbol; j++) { struct symbol *sp2 = lemp->symbols[j]; if (sp2 && sp2->type != TERMINAL && sp2->destructor && sp2->dtnum == sp->dtnum && strcmp(sp->destructor, sp2->destructor) == 0) { fprintf(out, " case %d: /* %s */\n", sp2->index, sp2->name); lineno++; sp2->destLineno = -1; /* Avoid emitting this destructor again */ } } emit_destructor_code(out, lemp->symbols[i], lemp, &lineno); fprintf(out, " break;\n"); lineno++; } tplt_xfer(lemp->name, in, out, &lineno); /* Generate code which executes whenever the parser stack overflows */ tplt_print(out, lemp, lemp->overflow, &lineno); tplt_xfer(lemp->name, in, out, &lineno); /* Generate the tables of rule information. yyRuleInfoLhs[] and ** yyRuleInfoNRhs[]. ** ** Note: This code depends on the fact that rules are number ** sequentually beginning with 0. */ for (i = 0, rp = lemp->rule; rp; rp = rp->next, i++) { fprintf(out, " %4d, /* (%d) ", rp->lhs->index, i); rule_print(out, rp); fprintf(out, " */\n"); lineno++; } tplt_xfer(lemp->name, in, out, &lineno); for (i = 0, rp = lemp->rule; rp; rp = rp->next, i++) { fprintf(out, " %3d, /* (%d) ", -rp->nrhs, i); rule_print(out, rp); fprintf(out, " */\n"); lineno++; } tplt_xfer(lemp->name, in, out, &lineno); /* Generate code which execution during each REDUCE action */ i = 0; for (rp = lemp->rule; rp; rp = rp->next) { i += translate_code(lemp, rp); } if (i) { fprintf(out, " YYMINORTYPE yylhsminor;\n"); lineno++; } /* First output rules other than the default: rule */ for (rp = lemp->rule; rp; rp = rp->next) { struct rule *rp2; /* Other rules with the same action */ if (rp->codeEmitted) continue; if (rp->noCode) { /* No C code actions, so this will be part of the "default:" rule */ continue; } fprintf(out, " case %d: /* ", rp->iRule); writeRuleText(out, rp); fprintf(out, " */\n"); lineno++; for (rp2 = rp->next; rp2; rp2 = rp2->next) { if (rp2->code == rp->code && rp2->codePrefix == rp->codePrefix && rp2->codeSuffix == rp->codeSuffix) { fprintf(out, " case %d: /* ", rp2->iRule); writeRuleText(out, rp2); fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp2->iRule); lineno++; rp2->codeEmitted = 1; } } emit_code(out, rp, lemp, &lineno); fprintf(out, " break;\n"); lineno++; rp->codeEmitted = 1; } /* Finally, output the default: rule. We choose as the default: all ** empty actions. */ fprintf(out, " default:\n"); lineno++; for (rp = lemp->rule; rp; rp = rp->next) { if (rp->codeEmitted) continue; assert(rp->noCode); fprintf(out, " /* (%d) ", rp->iRule); writeRuleText(out, rp); if (rp->neverReduce) { fprintf(out, " (NEVER REDUCES) */ assert(yyruleno!=%d);\n", rp->iRule); lineno++; } else if (rp->doesReduce) { fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->iRule); lineno++; } else { fprintf(out, " (OPTIMIZED OUT) */ assert(yyruleno!=%d);\n", rp->iRule); lineno++; } } fprintf(out, " break;\n"); lineno++; tplt_xfer(lemp->name, in, out, &lineno); /* Generate code which executes if a parse fails */ tplt_print(out, lemp, lemp->failure, &lineno); tplt_xfer(lemp->name, in, out, &lineno); /* Generate code which executes when a syntax error occurs */ tplt_print(out, lemp, lemp->error, &lineno); tplt_xfer(lemp->name, in, out, &lineno); /* Generate code which executes when the parser accepts its input */ tplt_print(out, lemp, lemp->accept, &lineno); tplt_xfer(lemp->name, in, out, &lineno); /* Append any addition code the user desires */ tplt_print(out, lemp, lemp->extracode, &lineno); acttab_free(pActtab); fclose(in); fclose(out); if (sql) fclose(sql); return; } /* Generate a header file for the parser */ void ReportHeader(struct lemon *lemp) { FILE *out, *in; const char *prefix; char *line = gc(xmalloc(LINESIZE)); char *pattern = gc(xmalloc(LINESIZE)); int i; if (lemp->tokenprefix) prefix = lemp->tokenprefix; else prefix = ""; in = file_open(lemp, ".h.inc", "rb"); if (in) { int nextChar; for (i = 1; i < lemp->nterminal && fgets(line, LINESIZE, in); i++) { lemon_sprintf(pattern, "#define %s%-30s %3d\n", prefix, lemp->symbols[i]->name, i); if (strcmp(line, pattern)) break; } nextChar = fgetc(in); fclose(in); if (i == lemp->nterminal && nextChar == EOF) { /* No change in the file. Don't rewrite it. */ return; } } out = file_open(lemp, ".h.inc", "wb"); if (out) { for (i = 1; i < lemp->nterminal; i++) { fprintf(out, "#define %s%-30s %3d\n", prefix, lemp->symbols[i]->name, i); } fclose(out); } return; } /* Reduce the size of the action tables, if possible, by making use ** of defaults. ** ** In this version, we take the most frequent REDUCE action and make ** it the default. Except, there is no default if the wildcard token ** is a possible look-ahead. */ void CompressTables(struct lemon *lemp) { struct state *stp; struct action *ap, *ap2, *nextap; struct rule *rp, *rp2, *rbest; int nbest, n; int i; int usesWildcard; for (i = 0; i < lemp->nstate; i++) { stp = lemp->sorted[i]; nbest = 0; rbest = 0; usesWildcard = 0; for (ap = stp->ap; ap; ap = ap->next) { if (ap->type == SHIFT && ap->sp == lemp->wildcard) { usesWildcard = 1; } if (ap->type != REDUCE) continue; rp = ap->x.rp; if (rp->lhsStart) continue; if (rp == rbest) continue; n = 1; for (ap2 = ap->next; ap2; ap2 = ap2->next) { if (ap2->type != REDUCE) continue; rp2 = ap2->x.rp; if (rp2 == rbest) continue; if (rp2 == rp) n++; } if (n > nbest) { nbest = n; rbest = rp; } } /* Do not make a default if the number of rules to default ** is not at least 1 or if the wildcard token is a possible ** lookahead. */ if (nbest < 1 || usesWildcard) continue; /* Combine matching REDUCE actions into a single default */ for (ap = stp->ap; ap; ap = ap->next) { if (ap->type == REDUCE && ap->x.rp == rbest) break; } assert(ap); ap->sp = Symbol_new("{default}"); for (ap = ap->next; ap; ap = ap->next) { if (ap->type == REDUCE && ap->x.rp == rbest) ap->type = NOT_USED; } stp->ap = Action_sort(stp->ap); for (ap = stp->ap; ap; ap = ap->next) { if (ap->type == SHIFT) break; if (ap->type == REDUCE && ap->x.rp != rbest) break; } if (ap == 0) { stp->autoReduce = 1; stp->pDfltReduce = rbest; } } /* Make a second pass over all states and actions. Convert ** every action that is a SHIFT to an autoReduce state into ** a SHIFTREDUCE action. */ for (i = 0; i < lemp->nstate; i++) { stp = lemp->sorted[i]; for (ap = stp->ap; ap; ap = ap->next) { struct state *pNextState; if (ap->type != SHIFT) continue; pNextState = ap->x.stp; if (pNextState->autoReduce && pNextState->pDfltReduce != 0) { ap->type = SHIFTREDUCE; ap->x.rp = pNextState->pDfltReduce; } } } /* If a SHIFTREDUCE action specifies a rule that has a single RHS term ** (meaning that the SHIFTREDUCE will land back in the state where it ** started) and if there is no C-code associated with the reduce action, ** then we can go ahead and convert the action to be the same as the ** action for the RHS of the rule. */ for (i = 0; i < lemp->nstate; i++) { stp = lemp->sorted[i]; for (ap = stp->ap; ap; ap = nextap) { nextap = ap->next; if (ap->type != SHIFTREDUCE) continue; rp = ap->x.rp; if (rp->noCode == 0) continue; if (rp->nrhs != 1) continue; #if 1 /* Only apply this optimization to non-terminals. It would be OK to ** apply it to terminal symbols too, but that makes the parser tables ** larger. */ if (ap->sp->index < lemp->nterminal) continue; #endif /* If we reach this point, it means the optimization can be applied */ nextap = ap; for (ap2 = stp->ap; ap2 && (ap2 == ap || ap2->sp != rp->lhs); ap2 = ap2->next) { } assert(ap2 != 0); ap->spOpt = ap2->sp; ap->type = ap2->type; ap->x = ap2->x; } } } /* ** Compare two states for sorting purposes. The smaller state is the ** one with the most non-terminal actions. If they have the same number ** of non-terminal actions, then the smaller is the one with the most ** token actions. */ static int stateResortCompare(const void *a, const void *b) { const struct state *pA = *(const struct state **)a; const struct state *pB = *(const struct state **)b; int n; n = pB->nNtAct - pA->nNtAct; if (n == 0) { n = pB->nTknAct - pA->nTknAct; if (n == 0) { n = pB->statenum - pA->statenum; } } assert(n != 0); return n; } /* ** Renumber and resort states so that states with fewer choices ** occur at the end. Except, keep state 0 as the first state. */ void ResortStates(struct lemon *lemp) { int i; struct state *stp; struct action *ap; for (i = 0; i < lemp->nstate; i++) { stp = lemp->sorted[i]; stp->nTknAct = stp->nNtAct = 0; stp->iDfltReduce = -1; /* Init dflt action to "syntax error" */ stp->iTknOfst = NO_OFFSET; stp->iNtOfst = NO_OFFSET; for (ap = stp->ap; ap; ap = ap->next) { int iAction = compute_action(lemp, ap); if (iAction >= 0) { if (ap->sp->index < lemp->nterminal) { stp->nTknAct++; } else if (ap->sp->index < lemp->nsymbol) { stp->nNtAct++; } else { assert(stp->autoReduce == 0 || stp->pDfltReduce == ap->x.rp); stp->iDfltReduce = iAction; } } } } qsort(&lemp->sorted[1], lemp->nstate - 1, sizeof(lemp->sorted[0]), stateResortCompare); for (i = 0; i < lemp->nstate; i++) { lemp->sorted[i]->statenum = i; } lemp->nxstate = lemp->nstate; while (lemp->nxstate > 1 && lemp->sorted[lemp->nxstate - 1]->autoReduce) { lemp->nxstate--; } } /***************** From the file "set.c" ************************************/ /* ** Set manipulation routines for the LEMON parser generator. */ static int size = 0; /* Set the set size */ void SetSize(int n) { size = n + 1; } /* Allocate a new set */ char *SetNew(void) { char *s; s = (char *)calloc(size, 1); if (s == 0) { memory_error(); } return s; } /* Deallocate a set */ void SetFree(char *s) { free(s); } /* Add a new element to the set. Return TRUE if the element was added ** and FALSE if it was already there. */ int SetAdd(char *s, int e) { int rv; assert(e >= 0 && e < size); rv = s[e]; s[e] = 1; return !rv; } /* Add every element of s2 to s1. Return TRUE if s1 changes. */ int SetUnion(char *s1, char *s2) { int i, progress; progress = 0; for (i = 0; i < size; i++) { if (s2[i] == 0) continue; if (s1[i] == 0) { progress = 1; s1[i] = 1; } } return progress; } /********************** From the file "table.c" ****************************/ /* ** All code in this file has been automatically generated ** from a specification in the file ** "table.q" ** by the associative array code building program "aagen". ** Do not edit this file! Instead, edit the specification ** file, then rerun aagen. */ /* ** Code for processing tables in the LEMON parser generator. */ PRIVATE unsigned strhash(const char *x) { unsigned h = 0; while (*x) h = h * 13 + *(x++); return h; } /* Works like strdup, sort of. Save a string in malloced memory, but ** keep strings in a table so that the same string is not in more ** than one place. */ const char *Strsafe(const char *y) { const char *z; char *cpy; if (y == 0) return 0; z = Strsafe_find(y); if (z == 0 && (cpy = (char *)malloc(lemonStrlen(y) + 1)) != 0) { lemon_strcpy(cpy, y); z = cpy; Strsafe_insert(z); } MemoryCheck(z); return z; } /* There is one instance of the following structure for each ** associative array of type "x1". */ struct s_x1 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x1node *tbl; /* The data stored here */ struct s_x1node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x1". */ typedef struct s_x1node { const char *data; /* The data */ struct s_x1node *next; /* Next entry with the same hash */ struct s_x1node **from; /* Previous link */ } x1node; /* There is only one instance of the array, which is the following */ static struct s_x1 *x1a; /* Allocate a new associative array */ void Strsafe_init(void) { if (x1a) return; x1a = (struct s_x1 *)malloc(sizeof(struct s_x1)); if (x1a) { x1a->size = 1024; x1a->count = 0; x1a->tbl = (x1node *)calloc(1024, sizeof(x1node) + sizeof(x1node *)); if (x1a->tbl == 0) { free(x1a); x1a = 0; } else { int i; x1a->ht = (x1node **)&(x1a->tbl[1024]); for (i = 0; i < 1024; i++) x1a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Strsafe_insert(const char *data) { x1node *np; unsigned h; unsigned ph; if (x1a == 0) return 0; ph = strhash(data); h = ph & (x1a->size - 1); np = x1a->ht[h]; while (np) { if (strcmp(np->data, data) == 0) { /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if (x1a->count >= x1a->size) { /* Need to make the hash table bigger */ int i, arrSize; struct s_x1 array; array.size = arrSize = x1a->size * 2; array.count = x1a->count; array.tbl = (x1node *)calloc(arrSize, sizeof(x1node) + sizeof(x1node *)); if (array.tbl == 0) return 0; /* Fail due to malloc failure */ array.ht = (x1node **)&(array.tbl[arrSize]); for (i = 0; i < arrSize; i++) array.ht[i] = 0; for (i = 0; i < x1a->count; i++) { x1node *oldnp, *newnp; oldnp = &(x1a->tbl[i]); h = strhash(oldnp->data) & (arrSize - 1); newnp = &(array.tbl[i]); if (array.ht[h]) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } free(x1a->tbl); *x1a = array; } /* Insert the new data */ h = ph & (x1a->size - 1); np = &(x1a->tbl[x1a->count++]); np->data = data; if (x1a->ht[h]) x1a->ht[h]->from = &(np->next); np->next = x1a->ht[h]; x1a->ht[h] = np; np->from = &(x1a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ const char *Strsafe_find(const char *key) { unsigned h; x1node *np; if (x1a == 0) return 0; h = strhash(key) & (x1a->size - 1); np = x1a->ht[h]; while (np) { if (strcmp(np->data, key) == 0) break; np = np->next; } return np ? np->data : 0; } /* Return a pointer to the (terminal or nonterminal) symbol "x". ** Create a new symbol if this is the first time "x" has been seen. */ struct symbol *Symbol_new(const char *x) { struct symbol *sp; sp = Symbol_find(x); if (sp == 0) { sp = (struct symbol *)calloc(1, sizeof(struct symbol)); MemoryCheck(sp); sp->name = Strsafe(x); sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; sp->rule = 0; sp->fallback = 0; sp->prec = -1; sp->assoc = UNK; sp->firstset = 0; sp->lambda = LEMON_FALSE; sp->destructor = 0; sp->destLineno = 0; sp->datatype = 0; sp->useCnt = 0; Symbol_insert(sp, sp->name); } sp->useCnt++; return sp; } /* Compare two symbols for sorting purposes. Return negative, ** zero, or positive if a is less then, equal to, or greater ** than b. ** ** Symbols that begin with upper case letters (terminals or tokens) ** must sort before symbols that begin with lower case letters ** (non-terminals). And MULTITERMINAL symbols (created using the ** %token_class directive) must sort at the very end. Other than ** that, the order does not matter. ** ** We find experimentally that leaving the symbols in their original ** order (the order they appeared in the grammar file) gives the ** smallest parser tables in SQLite. */ int Symbolcmpp(const void *_a, const void *_b) { const struct symbol *a = *(const struct symbol **)_a; const struct symbol *b = *(const struct symbol **)_b; int i1 = a->type == MULTITERMINAL ? 3 : a->name[0] > 'Z' ? 2 : 1; int i2 = b->type == MULTITERMINAL ? 3 : b->name[0] > 'Z' ? 2 : 1; return i1 == i2 ? a->index - b->index : i1 - i2; } /* There is one instance of the following structure for each ** associative array of type "x2". */ struct s_x2 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x2node *tbl; /* The data stored here */ struct s_x2node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x2". */ typedef struct s_x2node { struct symbol *data; /* The data */ const char *key; /* The key */ struct s_x2node *next; /* Next entry with the same hash */ struct s_x2node **from; /* Previous link */ } x2node; /* There is only one instance of the array, which is the following */ static struct s_x2 *x2a; /* Allocate a new associative array */ void Symbol_init(void) { if (x2a) return; x2a = (struct s_x2 *)malloc(sizeof(struct s_x2)); if (x2a) { x2a->size = 128; x2a->count = 0; x2a->tbl = (x2node *)calloc(128, sizeof(x2node) + sizeof(x2node *)); if (x2a->tbl == 0) { free(x2a); x2a = 0; } else { int i; x2a->ht = (x2node **)&(x2a->tbl[128]); for (i = 0; i < 128; i++) x2a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Symbol_insert(struct symbol *data, const char *key) { x2node *np; unsigned h; unsigned ph; if (x2a == 0) return 0; ph = strhash(key); h = ph & (x2a->size - 1); np = x2a->ht[h]; while (np) { if (strcmp(np->key, key) == 0) { /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if (x2a->count >= x2a->size) { /* Need to make the hash table bigger */ int i, arrSize; struct s_x2 array; array.size = arrSize = x2a->size * 2; array.count = x2a->count; array.tbl = (x2node *)calloc(arrSize, sizeof(x2node) + sizeof(x2node *)); if (array.tbl == 0) return 0; /* Fail due to malloc failure */ array.ht = (x2node **)&(array.tbl[arrSize]); for (i = 0; i < arrSize; i++) array.ht[i] = 0; for (i = 0; i < x2a->count; i++) { x2node *oldnp, *newnp; oldnp = &(x2a->tbl[i]); h = strhash(oldnp->key) & (arrSize - 1); newnp = &(array.tbl[i]); if (array.ht[h]) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->key = oldnp->key; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } free(x2a->tbl); *x2a = array; } /* Insert the new data */ h = ph & (x2a->size - 1); np = &(x2a->tbl[x2a->count++]); np->key = key; np->data = data; if (x2a->ht[h]) x2a->ht[h]->from = &(np->next); np->next = x2a->ht[h]; x2a->ht[h] = np; np->from = &(x2a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct symbol *Symbol_find(const char *key) { unsigned h; x2node *np; if (x2a == 0) return 0; h = strhash(key) & (x2a->size - 1); np = x2a->ht[h]; while (np) { if (strcmp(np->key, key) == 0) break; np = np->next; } return np ? np->data : 0; } /* Return the n-th data. Return NULL if n is out of range. */ struct symbol *Symbol_Nth(int n) { struct symbol *data; if (x2a && n > 0 && n <= x2a->count) { data = x2a->tbl[n - 1].data; } else { data = 0; } return data; } /* Return the size of the array */ int Symbol_count() { return x2a ? x2a->count : 0; } /* Return an array of pointers to all data in the table. ** The array is obtained from malloc. Return NULL if memory allocation ** problems, or if the array is empty. */ struct symbol **Symbol_arrayof() { struct symbol **array; int i, arrSize; if (x2a == 0) return 0; arrSize = x2a->count; array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); if (array) { for (i = 0; i < arrSize; i++) array[i] = x2a->tbl[i].data; } return array; } /* Compare two configurations */ int Configcmp(const char *_a, const char *_b) { const struct config *a = (struct config *)_a; const struct config *b = (struct config *)_b; int x; x = a->rp->index - b->rp->index; if (x == 0) x = a->dot - b->dot; return x; } /* Compare two states */ PRIVATE int statecmp(struct config *a, struct config *b) { int rc; for (rc = 0; rc == 0 && a && b; a = a->bp, b = b->bp) { rc = a->rp->index - b->rp->index; if (rc == 0) rc = a->dot - b->dot; } if (rc == 0) { if (a) rc = 1; if (b) rc = -1; } return rc; } /* Hash a state */ PRIVATE unsigned statehash(struct config *a) { unsigned h = 0; while (a) { h = h * 571 + a->rp->index * 37 + a->dot; a = a->bp; } return h; } /* Allocate a new state structure */ struct state *State_new() { struct state *newstate; newstate = (struct state *)calloc(1, sizeof(struct state)); MemoryCheck(newstate); return newstate; } /* There is one instance of the following structure for each ** associative array of type "x3". */ struct s_x3 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x3node *tbl; /* The data stored here */ struct s_x3node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x3". */ typedef struct s_x3node { struct state *data; /* The data */ struct config *key; /* The key */ struct s_x3node *next; /* Next entry with the same hash */ struct s_x3node **from; /* Previous link */ } x3node; /* There is only one instance of the array, which is the following */ static struct s_x3 *x3a; /* Allocate a new associative array */ void State_init(void) { if (x3a) return; x3a = (struct s_x3 *)malloc(sizeof(struct s_x3)); if (x3a) { x3a->size = 128; x3a->count = 0; x3a->tbl = (x3node *)calloc(128, sizeof(x3node) + sizeof(x3node *)); if (x3a->tbl == 0) { free(x3a); x3a = 0; } else { int i; x3a->ht = (x3node **)&(x3a->tbl[128]); for (i = 0; i < 128; i++) x3a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int State_insert(struct state *data, struct config *key) { x3node *np; unsigned h; unsigned ph; if (x3a == 0) return 0; ph = statehash(key); h = ph & (x3a->size - 1); np = x3a->ht[h]; while (np) { if (statecmp(np->key, key) == 0) { /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if (x3a->count >= x3a->size) { /* Need to make the hash table bigger */ int i, arrSize; struct s_x3 array; array.size = arrSize = x3a->size * 2; array.count = x3a->count; array.tbl = (x3node *)calloc(arrSize, sizeof(x3node) + sizeof(x3node *)); if (array.tbl == 0) return 0; /* Fail due to malloc failure */ array.ht = (x3node **)&(array.tbl[arrSize]); for (i = 0; i < arrSize; i++) array.ht[i] = 0; for (i = 0; i < x3a->count; i++) { x3node *oldnp, *newnp; oldnp = &(x3a->tbl[i]); h = statehash(oldnp->key) & (arrSize - 1); newnp = &(array.tbl[i]); if (array.ht[h]) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->key = oldnp->key; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } free(x3a->tbl); *x3a = array; } /* Insert the new data */ h = ph & (x3a->size - 1); np = &(x3a->tbl[x3a->count++]); np->key = key; np->data = data; if (x3a->ht[h]) x3a->ht[h]->from = &(np->next); np->next = x3a->ht[h]; x3a->ht[h] = np; np->from = &(x3a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct state *State_find(struct config *key) { unsigned h; x3node *np; if (x3a == 0) return 0; h = statehash(key) & (x3a->size - 1); np = x3a->ht[h]; while (np) { if (statecmp(np->key, key) == 0) break; np = np->next; } return np ? np->data : 0; } /* Return an array of pointers to all data in the table. ** The array is obtained from malloc. Return NULL if memory allocation ** problems, or if the array is empty. */ struct state **State_arrayof(void) { struct state **array; int i, arrSize; if (x3a == 0) return 0; arrSize = x3a->count; array = (struct state **)calloc(arrSize, sizeof(struct state *)); if (array) { for (i = 0; i < arrSize; i++) array[i] = x3a->tbl[i].data; } return array; } /* Hash a configuration */ PRIVATE unsigned confighash(struct config *a) { unsigned h = 0; h = h * 571 + a->rp->index * 37 + a->dot; return h; } /* There is one instance of the following structure for each ** associative array of type "x4". */ struct s_x4 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x4node *tbl; /* The data stored here */ struct s_x4node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x4". */ typedef struct s_x4node { struct config *data; /* The data */ struct s_x4node *next; /* Next entry with the same hash */ struct s_x4node **from; /* Previous link */ } x4node; /* There is only one instance of the array, which is the following */ static struct s_x4 *x4a; /* Allocate a new associative array */ void Configtable_init(void) { if (x4a) return; x4a = (struct s_x4 *)malloc(sizeof(struct s_x4)); if (x4a) { x4a->size = 64; x4a->count = 0; x4a->tbl = (x4node *)calloc(64, sizeof(x4node) + sizeof(x4node *)); if (x4a->tbl == 0) { free(x4a); x4a = 0; } else { int i; x4a->ht = (x4node **)&(x4a->tbl[64]); for (i = 0; i < 64; i++) x4a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Configtable_insert(struct config *data) { x4node *np; unsigned h; unsigned ph; if (x4a == 0) return 0; ph = confighash(data); h = ph & (x4a->size - 1); np = x4a->ht[h]; while (np) { if (Configcmp((const char *)np->data, (const char *)data) == 0) { /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if (x4a->count >= x4a->size) { /* Need to make the hash table bigger */ int i, arrSize; struct s_x4 array; array.size = arrSize = x4a->size * 2; array.count = x4a->count; array.tbl = (x4node *)calloc(arrSize, sizeof(x4node) + sizeof(x4node *)); if (array.tbl == 0) return 0; /* Fail due to malloc failure */ array.ht = (x4node **)&(array.tbl[arrSize]); for (i = 0; i < arrSize; i++) array.ht[i] = 0; for (i = 0; i < x4a->count; i++) { x4node *oldnp, *newnp; oldnp = &(x4a->tbl[i]); h = confighash(oldnp->data) & (arrSize - 1); newnp = &(array.tbl[i]); if (array.ht[h]) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } free(x4a->tbl); *x4a = array; } /* Insert the new data */ h = ph & (x4a->size - 1); np = &(x4a->tbl[x4a->count++]); np->data = data; if (x4a->ht[h]) x4a->ht[h]->from = &(np->next); np->next = x4a->ht[h]; x4a->ht[h] = np; np->from = &(x4a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct config *Configtable_find(struct config *key) { int h; x4node *np; if (x4a == 0) return 0; h = confighash(key) & (x4a->size - 1); np = x4a->ht[h]; while (np) { if (Configcmp((const char *)np->data, (const char *)key) == 0) break; np = np->next; } return np ? np->data : 0; } /* Remove all data from the table. Pass each data to the function "f" ** as it is removed. ("f" may be null to avoid this step.) */ void Configtable_clear(int (*f)(struct config *)) { int i; if (x4a == 0 || x4a->count == 0) return; if (f) for (i = 0; i < x4a->count; i++) (*f)(x4a->tbl[i].data); for (i = 0; i < x4a->size; i++) x4a->ht[i] = 0; x4a->count = 0; return; }