Skip to content

Latest commit

 

History

History
4980 lines (4679 loc) · 146 KB

lemon.c

File metadata and controls

4980 lines (4679 loc) · 146 KB
 
Feb 28, 2009
Feb 28, 2009
1
2
3
4
5
6
/*
* My changes over the original lemon.c from SQLite are encased in
* #if __MOJOSHADER__ blocks. --ryan.
*/
#define __MOJOSHADER__ 1
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
** This file contains all sources (including headers) to the LEMON
** LALR(1) parser generator. The sources have been combined into a
** single file to make it easy to include LEMON in the source tree
** and Makefile of another program.
**
** The author of this program disclaims copyright.
*/
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <assert.h>
#ifndef __WIN32__
# if defined(_WIN32) || defined(WIN32)
# define __WIN32__
# endif
#endif
#ifdef __WIN32__
Nov 19, 2010
Nov 19, 2010
29
30
31
32
33
34
35
#ifdef __cplusplus
extern "C" {
#endif
extern int access(const char *path, int mode);
#ifdef __cplusplus
}
#endif
36
37
38
39
40
41
42
43
44
45
46
47
48
#else
#include <unistd.h>
#endif
/* #define PRIVATE static */
#define PRIVATE
#ifdef TEST
#define MAXRHS 5 /* Set low to exercise exception code */
#else
#define MAXRHS 1000
#endif
Feb 12, 2010
Feb 12, 2010
49
50
static const char **made_files = NULL;
static int made_files_count = 0;
Feb 17, 2010
Feb 17, 2010
51
52
static int successful_exit = 0;
static void LemonAtExit(void)
Feb 12, 2010
Feb 12, 2010
53
54
55
56
{
/* if we failed, delete (most) files we made, to unconfuse build tools. */
int i;
for (i = 0; i < made_files_count; i++) {
Feb 17, 2010
Feb 17, 2010
57
if (!successful_exit) {
Feb 12, 2010
Feb 12, 2010
58
59
60
61
62
63
64
65
remove(made_files[i]);
}
}
free(made_files);
made_files_count = 0;
made_files = NULL;
}
66
67
68
69
70
71
72
73
74
static char *msort(char*,char**,int(*)(const char*,const char*));
/*
** Compilers are getting increasingly pedantic about type conversions
** as C evolves ever closer to Ada.... To work around the latest problems
** we have to define the following variant of strlen().
*/
#define lemonStrlen(X) ((int)strlen(X))
Feb 17, 2010
Feb 17, 2010
75
76
77
78
79
/* a few forward declarations... */
struct rule;
struct lemon;
struct action;
80
81
82
83
84
85
86
87
88
89
90
91
static struct action *Action_new(void);
static struct action *Action_sort(struct action *);
/********** From the file "build.h" ************************************/
void FindRulePrecedences();
void FindFirstSets();
void FindStates();
void FindLinks();
void FindFollowSets();
void FindActions();
/********* From the file "configlist.h" *********************************/
Feb 17, 2010
Feb 17, 2010
92
93
94
95
96
97
98
99
100
101
void Configlist_init(void);
struct config *Configlist_add(struct rule *, int);
struct config *Configlist_addbasis(struct rule *, int);
void Configlist_closure(struct lemon *);
void Configlist_sort(void);
void Configlist_sortbasis(void);
struct config *Configlist_return(void);
struct config *Configlist_basis(void);
void Configlist_eat(struct config *);
void Configlist_reset(void);
102
103
104
105
106
/********* From the file "error.h" ***************************************/
void ErrorMsg(const char *, int,const char *, ...);
/****** From the file "option.h" ******************************************/
Feb 17, 2010
Feb 17, 2010
107
108
enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR,
OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR};
109
struct s_options {
Feb 17, 2010
Feb 17, 2010
110
111
enum option_type type;
const char *label;
112
char *arg;
Feb 17, 2010
Feb 17, 2010
113
const char *message;
Feb 17, 2010
Feb 17, 2010
115
116
117
118
119
int OptInit(char**,struct s_options*,FILE*);
int OptNArgs(void);
char *OptArg(int);
void OptErr(int);
void OptPrint(void);
120
121
/******** From the file "parse.h" *****************************************/
Feb 17, 2010
Feb 17, 2010
122
void Parse(struct lemon *lemp);
123
124
/********* From the file "plink.h" ***************************************/
Feb 17, 2010
Feb 17, 2010
125
126
127
128
struct plink *Plink_new(void);
void Plink_add(struct plink **, struct config *);
void Plink_copy(struct plink **, struct plink *);
void Plink_delete(struct plink *);
129
130
/********** From the file "report.h" *************************************/
Feb 17, 2010
Feb 17, 2010
131
132
133
134
135
136
void Reprint(struct lemon *);
void ReportOutput(struct lemon *);
void ReportTable(struct lemon *, int);
void ReportHeader(struct lemon *);
void CompressTables(struct lemon *);
void ResortStates(struct lemon *);
137
138
/********** From the file "set.h" ****************************************/
Feb 17, 2010
Feb 17, 2010
139
140
141
void SetSize(int); /* All sets will be of size N */
char *SetNew(void); /* A new set for element 0..N */
void SetFree(char*); /* Deallocate a set */
Feb 17, 2010
Feb 17, 2010
143
144
145
char *SetNew(void); /* A new set for element 0..N */
int SetAdd(char*,int); /* Add element to a set */
int SetUnion(char *,char *); /* A <- A U B, thru element N */
146
147
148
149
150
151
152
153
154
155
156
#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */
/********** From the file "struct.h" *************************************/
/*
** Principal data structures for the LEMON parser generator.
*/
typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean;
/* Symbols (terminals and nonterminals) of the grammar are stored
** in the following: */
Feb 17, 2010
Feb 17, 2010
157
158
159
160
161
162
163
164
165
166
167
enum symbol_type {
TERMINAL,
NONTERMINAL,
MULTITERMINAL
};
enum e_assoc {
LEFT,
RIGHT,
NONE,
UNK
};
168
struct symbol {
Feb 17, 2010
Feb 17, 2010
169
const char *name; /* Name of the symbol */
170
int index; /* Index number for this symbol */
Feb 17, 2010
Feb 17, 2010
171
enum symbol_type type; /* Symbols are all either TERMINALS or NTs */
172
173
174
struct rule *rule; /* Linked list of rules of this (if an NT) */
struct symbol *fallback; /* fallback token in case this token doesn't parse */
int prec; /* Precedence if defined (-1 otherwise) */
Feb 17, 2010
Feb 17, 2010
175
enum e_assoc assoc; /* Associativity if precedence is defined */
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
char *firstset; /* First-set for all rules of this symbol */
Boolean lambda; /* True if NT and can generate an empty string */
int useCnt; /* Number of times used */
char *destructor; /* Code which executes whenever this symbol is
** popped from the stack during error processing */
int destLineno; /* Line number for start of destructor */
char *datatype; /* The data type of information held by this
** object. Only used if type==NONTERMINAL */
int dtnum; /* The data type number. In the parser, the value
** stack is a union. The .yy%d element of this
** union is the correct data type for this object */
/* The following fields are used by MULTITERMINALs only */
int nsubsym; /* Number of constituent symbols in the MULTI */
struct symbol **subsym; /* Array of constituent symbols */
};
/* Each production rule in the grammar is stored in the following
** structure. */
struct rule {
struct symbol *lhs; /* Left-hand side of the rule */
Feb 17, 2010
Feb 17, 2010
196
const char *lhsalias; /* Alias for the LHS (NULL if none) */
197
198
199
200
int lhsStart; /* True if left-hand side is the start symbol */
int ruleline; /* Line number for the rule */
int nrhs; /* Number of RHS symbols */
struct symbol **rhs; /* The RHS symbols */
Feb 17, 2010
Feb 17, 2010
201
const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */
202
int line; /* Line number at which code begins */
Feb 17, 2010
Feb 17, 2010
203
const char *code; /* The code executed when this rule is reduced */
204
205
206
207
208
209
210
211
212
213
214
215
struct symbol *precsym; /* Precedence symbol for this rule */
int index; /* An index number for this rule */
Boolean canReduce; /* True if this rule is ever reduced */
struct rule *nextlhs; /* Next rule with the same LHS */
struct rule *next; /* Next rule in the global list */
};
/* A configuration is a production rule of the grammar together with
** a mark (dot) showing how much of that rule has been processed so far.
** Configurations also contain a follow-set which is a list of terminal
** symbols which are allowed to immediately follow the end of the rule.
** Every configuration is recorded as an instance of the following: */
Feb 17, 2010
Feb 17, 2010
216
217
218
219
enum cfgstatus {
COMPLETE,
INCOMPLETE
};
220
221
222
223
224
225
226
struct config {
struct rule *rp; /* The rule upon which the configuration is based */
int dot; /* The parse point */
char *fws; /* Follow-set for this configuration only */
struct plink *fplp; /* Follow-set forward propagation links */
struct plink *bplp; /* Follow-set backwards propagation links */
struct state *stp; /* Pointer to state which contains this */
Feb 17, 2010
Feb 17, 2010
227
enum cfgstatus status; /* used during followset and shift computations */
228
229
230
231
struct config *next; /* Next configuration in the state */
struct config *bp; /* The next basis configuration */
};
Feb 17, 2010
Feb 17, 2010
232
233
234
235
236
237
238
239
240
241
242
243
244
enum e_action {
SHIFT,
ACCEPT,
REDUCE,
ERROR,
SSCONFLICT, /* A shift/shift conflict */
SRCONFLICT, /* Was a reduce, but part of a conflict */
RRCONFLICT, /* Was a reduce, but part of a conflict */
SH_RESOLVED, /* Was a shift. Precedence resolved conflict */
RD_RESOLVED, /* Was reduce. Precedence resolved conflict */
NOT_USED /* Deleted by compression */
};
245
246
247
/* Every shift or reduce operation is stored as one of the following */
struct action {
struct symbol *sp; /* The look-ahead symbol */
Feb 17, 2010
Feb 17, 2010
248
enum e_action type;
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
union {
struct state *stp; /* The new state, if a shift */
struct rule *rp; /* The rule, if a reduce */
} x;
struct action *next; /* Next action for this state */
struct action *collide; /* Next action with the same hash */
};
/* Each state of the generated parser's finite state machine
** is encoded as an instance of the following structure. */
struct state {
struct config *bp; /* The basis configurations for this state */
struct config *cfp; /* All configurations in this set */
int statenum; /* Sequential number for this state */
struct action *ap; /* Array of actions for this state */
int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */
int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */
int iDflt; /* Default action */
};
#define NO_OFFSET (-2147483647)
/* A followset propagation link indicates that the contents of one
** configuration followset should be propagated to another whenever
** the first changes. */
struct plink {
struct config *cfp; /* The configuration to which linked */
struct plink *next; /* The next propagate link */
};
/* The state vector for the entire parser generator is recorded as
** follows. (LEMON uses no global variables and makes little use of
** static variables. Fields in the following structure can be thought
** of as begin global variables in the program.) */
struct lemon {
struct state **sorted; /* Table of states sorted by state number */
struct rule *rule; /* List of all rules */
int nstate; /* Number of states */
int nrule; /* Number of rules */
int nsymbol; /* Number of terminal and nonterminal symbols */
int nterminal; /* Number of terminal symbols */
struct symbol **symbols; /* Sorted array of pointers to symbols */
int errorcnt; /* Number of errors */
struct symbol *errsym; /* The error symbol */
struct symbol *wildcard; /* Token that matches anything */
char *name; /* Name of the generated parser */
char *arg; /* Declaration of the 3th argument to parser */
char *tokentype; /* Type of terminal symbols in the parser stack */
char *vartype; /* The default type of non-terminal symbols */
char *start; /* Name of the start symbol for the grammar */
char *stacksize; /* Size of the parser stack */
char *include; /* Code to put at the start of the C file */
char *error; /* Code to execute when an error is seen */
char *overflow; /* Code to execute on a stack overflow */
char *failure; /* Code to execute on parser failure */
char *accept; /* Code to execute when the parser excepts */
char *extracode; /* Code appended to the generated file */
char *tokendest; /* Code to execute to destroy token data */
char *vardest; /* Code for the default non-terminal destructor */
char *filename; /* Name of the input file */
char *outname; /* Name of the current output file */
char *tokenprefix; /* A prefix added to token names in the .h file */
int nconflict; /* Number of parsing conflicts */
Feb 8, 2010
Feb 8, 2010
311
#if __MOJOSHADER__
Aug 26, 2009
Aug 26, 2009
312
int nexpected; /* Number of expected parsing conflicts */
Feb 8, 2010
Feb 8, 2010
313
#endif
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
int tablesize; /* Size of the parse tables */
int basisflag; /* Print only basis configurations */
int has_fallback; /* True if any %fallback is seen in the grammar */
int nolinenosflag; /* True if #line statements should not be printed */
char *argv0; /* Name of the program */
};
#define MemoryCheck(X) if((X)==0){ \
extern void memory_error(); \
memory_error(); \
}
/**************** From the file "table.h" *********************************/
/*
** All code in this file has been automatically generated
** from a specification in the file
** "table.q"
** by the associative array code building program "aagen".
** Do not edit this file! Instead, edit the specification
** file, then rerun aagen.
*/
/*
** Code for processing tables in the LEMON parser generator.
*/
/* Routines for handling a strings */
Feb 17, 2010
Feb 17, 2010
340
const char *Strsafe(const char *);
Feb 17, 2010
Feb 17, 2010
342
343
344
void Strsafe_init(void);
int Strsafe_insert(const char *);
const char *Strsafe_find(const char *);
345
346
347
/* Routines for handling symbols of the grammar */
Feb 17, 2010
Feb 17, 2010
348
349
350
351
352
353
354
355
struct symbol *Symbol_new(const char *);
int Symbolcmpp(const void *, const void *);
void Symbol_init(void);
int Symbol_insert(struct symbol *, const char *);
struct symbol *Symbol_find(const char *);
struct symbol *Symbol_Nth(int);
int Symbol_count(void);
struct symbol **Symbol_arrayof(void);
356
357
358
/* Routines to manage the state table */
Feb 17, 2010
Feb 17, 2010
359
360
361
362
363
int Configcmp(const char *, const char *);
struct state *State_new(void);
void State_init(void);
int State_insert(struct state *, struct config *);
struct state *State_find(struct config *);
364
365
366
367
struct state **State_arrayof(/* */);
/* Routines used for efficiency in Configlist_add */
Feb 17, 2010
Feb 17, 2010
368
369
370
371
372
void Configtable_init(void);
int Configtable_insert(struct config *);
struct config *Configtable_find(struct config *);
void Configtable_clear(int(*)(struct config *));
373
374
375
376
377
378
379
380
/****************** From the file "action.c" *******************************/
/*
** Routines processing parser actions in the LEMON parser generator.
*/
/* Allocate a new parser action */
static struct action *Action_new(void){
static struct action *freelist = 0;
Feb 17, 2010
Feb 17, 2010
381
struct action *newaction;
382
383
384
385
386
387
388
389
390
391
392
393
if( freelist==0 ){
int i;
int amt = 100;
freelist = (struct action *)calloc(amt, sizeof(struct action));
if( freelist==0 ){
fprintf(stderr,"Unable to allocate memory for a new parser action.");
exit(1);
}
for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
freelist[amt-1].next = 0;
}
Feb 17, 2010
Feb 17, 2010
394
newaction = freelist;
395
freelist = freelist->next;
Feb 17, 2010
Feb 17, 2010
396
return newaction;
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
}
/* Compare two actions for sorting purposes. Return negative, zero, or
** positive if the first action is less than, equal to, or greater than
** the first
*/
static int actioncmp(
struct action *ap1,
struct action *ap2
){
int rc;
rc = ap1->sp->index - ap2->sp->index;
if( rc==0 ){
rc = (int)ap1->type - (int)ap2->type;
}
if( rc==0 && ap1->type==REDUCE ){
rc = ap1->x.rp->index - ap2->x.rp->index;
}
Feb 17, 2010
Feb 17, 2010
415
if( rc==0 ){
Mar 3, 2010
Mar 3, 2010
416
rc = (int) (ap2 - ap1);
Feb 17, 2010
Feb 17, 2010
417
}
418
419
420
421
422
423
424
425
426
427
428
429
return rc;
}
/* Sort parser actions */
static struct action *Action_sort(
struct action *ap
){
ap = (struct action *)msort((char *)ap,(char **)&ap->next,
(int(*)(const char*,const char*))actioncmp);
return ap;
}
Feb 17, 2010
Feb 17, 2010
430
431
432
433
434
435
436
437
438
439
440
441
void Action_add(
struct action **app,
enum e_action type,
struct symbol *sp,
char *arg
){
struct action *newaction;
newaction = Action_new();
newaction->next = *app;
*app = newaction;
newaction->type = type;
newaction->sp = sp;
442
if( type==SHIFT ){
Feb 17, 2010
Feb 17, 2010
443
newaction->x.stp = (struct state *)arg;
444
}else{
Feb 17, 2010
Feb 17, 2010
445
newaction->x.rp = (struct rule *)arg;
446
447
448
449
450
451
452
453
454
}
}
/********************** New code to implement the "acttab" module ***********/
/*
** This module implements routines use to construct the yy_action[] table.
*/
/*
** The state of the yy_action table under construction is an instance of
Feb 17, 2010
Feb 17, 2010
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
** the following structure.
**
** The yy_action table maps the pair (state_number, lookahead) into an
** action_number. The table is an array of integers pairs. The state_number
** determines an initial offset into the yy_action array. The lookahead
** value is then added to this initial offset to get an index X into the
** yy_action array. If the aAction[X].lookahead equals the value of the
** of the lookahead input, then the value of the action_number output is
** aAction[X].action. If the lookaheads do not match then the
** default action for the state_number is returned.
**
** All actions associated with a single state_number are first entered
** into aLookahead[] using multiple calls to acttab_action(). Then the
** actions for that single state_number are placed into the aAction[]
** array with a single call to acttab_insert(). The acttab_insert() call
** also resets the aLookahead[] array in preparation for the next
** state number.
Feb 17, 2010
Feb 17, 2010
473
474
475
476
struct lookahead_action {
int lookahead; /* Value of the lookahead token */
int action; /* Action to take on the given lookahead */
};
477
478
479
480
typedef struct acttab acttab;
struct acttab {
int nAction; /* Number of used slots in aAction[] */
int nActionAlloc; /* Slots allocated for aAction[] */
Feb 17, 2010
Feb 17, 2010
481
482
struct lookahead_action
*aAction, /* The yy_action[] table under construction */
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
*aLookahead; /* A single new transaction set */
int mnLookahead; /* Minimum aLookahead[].lookahead */
int mnAction; /* Action associated with mnLookahead */
int mxLookahead; /* Maximum aLookahead[].lookahead */
int nLookahead; /* Used slots in aLookahead[] */
int nLookaheadAlloc; /* Slots allocated in aLookahead[] */
};
/* Return the number of entries in the yy_action table */
#define acttab_size(X) ((X)->nAction)
/* The value for the N-th entry in yy_action */
#define acttab_yyaction(X,N) ((X)->aAction[N].action)
/* The value for the N-th entry in yy_lookahead */
#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead)
/* Free all memory associated with the given acttab */
void acttab_free(acttab *p){
free( p->aAction );
free( p->aLookahead );
free( p );
}
/* Allocate a new acttab structure */
acttab *acttab_alloc(void){
Feb 17, 2010
Feb 17, 2010
509
acttab *p = (acttab *) calloc( 1, sizeof(*p) );
510
511
512
513
514
515
516
517
if( p==0 ){
fprintf(stderr,"Unable to allocate memory for a new acttab.");
exit(1);
}
memset(p, 0, sizeof(*p));
return p;
}
Feb 17, 2010
Feb 17, 2010
518
519
520
521
/* Add a new action to the current transaction set.
**
** This routine is called once for each lookahead for a particular
** state.
522
523
524
525
*/
void acttab_action(acttab *p, int lookahead, int action){
if( p->nLookahead>=p->nLookaheadAlloc ){
p->nLookaheadAlloc += 25;
Feb 17, 2010
Feb 17, 2010
526
p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead,
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
if( p->aLookahead==0 ){
fprintf(stderr,"malloc failed\n");
exit(1);
}
}
if( p->nLookahead==0 ){
p->mxLookahead = lookahead;
p->mnLookahead = lookahead;
p->mnAction = action;
}else{
if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
if( p->mnLookahead>lookahead ){
p->mnLookahead = lookahead;
p->mnAction = action;
}
}
p->aLookahead[p->nLookahead].lookahead = lookahead;
p->aLookahead[p->nLookahead].action = action;
p->nLookahead++;
}
/*
** Add the transaction set built up with prior calls to acttab_action()
** into the current action table. Then reset the transaction set back
** to an empty set in preparation for a new round of acttab_action() calls.
**
** Return the offset into the action table of the new transaction.
*/
int acttab_insert(acttab *p){
int i, j, k, n;
assert( p->nLookahead>0 );
/* Make sure we have enough space to hold the expanded action table
** in the worst case. The worst case occurs if the transaction set
** must be appended to the current action table
*/
n = p->mxLookahead + 1;
if( p->nAction + n >= p->nActionAlloc ){
int oldAlloc = p->nActionAlloc;
p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
Feb 17, 2010
Feb 17, 2010
568
p->aAction = (struct lookahead_action *) realloc( p->aAction,
569
570
571
572
573
574
575
576
577
578
579
sizeof(p->aAction[0])*p->nActionAlloc);
if( p->aAction==0 ){
fprintf(stderr,"malloc failed\n");
exit(1);
}
for(i=oldAlloc; i<p->nActionAlloc; i++){
p->aAction[i].lookahead = -1;
p->aAction[i].action = -1;
}
}
Feb 17, 2010
Feb 17, 2010
580
581
582
/* Scan the existing action table looking for an offset that is a
** duplicate of the current transaction set. Fall out of the loop
** if and when the duplicate is found.
583
584
585
**
** i is the index in p->aAction[] where p->mnLookahead is inserted.
*/
Feb 17, 2010
Feb 17, 2010
586
587
588
589
for(i=p->nAction-1; i>=0; i--){
if( p->aAction[i].lookahead==p->mnLookahead ){
/* All lookaheads and actions in the aLookahead[] transaction
** must match against the candidate aAction[i] entry. */
590
591
592
593
594
595
596
597
if( p->aAction[i].action!=p->mnAction ) continue;
for(j=0; j<p->nLookahead; j++){
k = p->aLookahead[j].lookahead - p->mnLookahead + i;
if( k<0 || k>=p->nAction ) break;
if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
if( p->aLookahead[j].action!=p->aAction[k].action ) break;
}
if( j<p->nLookahead ) continue;
Feb 17, 2010
Feb 17, 2010
598
599
600
/* No possible lookahead value that is not in the aLookahead[]
** transaction is allowed to match aAction[i] */
601
602
603
604
605
606
n = 0;
for(j=0; j<p->nAction; j++){
if( p->aAction[j].lookahead<0 ) continue;
if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
}
if( n==p->nLookahead ){
Feb 17, 2010
Feb 17, 2010
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
break; /* An exact match is found at offset i */
}
}
}
/* If no existing offsets exactly match the current transaction, find an
** an empty offset in the aAction[] table in which we can add the
** aLookahead[] transaction.
*/
if( i<0 ){
/* Look for holes in the aAction[] table that fit the current
** aLookahead[] transaction. Leave i set to the offset of the hole.
** If no holes are found, i is left at p->nAction, which means the
** transaction will be appended. */
for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){
if( p->aAction[i].lookahead<0 ){
for(j=0; j<p->nLookahead; j++){
k = p->aLookahead[j].lookahead - p->mnLookahead + i;
if( k<0 ) break;
if( p->aAction[k].lookahead>=0 ) break;
}
if( j<p->nLookahead ) continue;
for(j=0; j<p->nAction; j++){
if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
}
if( j==p->nAction ){
break; /* Fits in empty slots */
}
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
}
}
}
/* Insert transaction set at index i. */
for(j=0; j<p->nLookahead; j++){
k = p->aLookahead[j].lookahead - p->mnLookahead + i;
p->aAction[k] = p->aLookahead[j];
if( k>=p->nAction ) p->nAction = k+1;
}
p->nLookahead = 0;
/* Return the offset that is added to the lookahead in order to get the
** index into yy_action of the action */
return i - p->mnLookahead;
}
/********************** From the file "build.c" *****************************/
/*
** Routines to construction the finite state machine for the LEMON
** parser generator.
*/
/* Find a precedence symbol of every rule in the grammar.
**
** Those rules which have a precedence symbol coded in the input
** grammar using the "[symbol]" construct will already have the
** rp->precsym field filled. Other rules take as their precedence
** symbol the first RHS symbol with a defined precedence. If there
** are not RHS symbols with a defined precedence, the precedence
** symbol field is left blank.
*/
Feb 17, 2010
Feb 17, 2010
666
void FindRulePrecedences(struct lemon *xp)
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
{
struct rule *rp;
for(rp=xp->rule; rp; rp=rp->next){
if( rp->precsym==0 ){
int i, j;
for(i=0; i<rp->nrhs && rp->precsym==0; i++){
struct symbol *sp = rp->rhs[i];
if( sp->type==MULTITERMINAL ){
for(j=0; j<sp->nsubsym; j++){
if( sp->subsym[j]->prec>=0 ){
rp->precsym = sp->subsym[j];
break;
}
}
}else if( sp->prec>=0 ){
rp->precsym = rp->rhs[i];
}
}
}
}
return;
}
/* Find all nonterminals which will generate the empty string.
** Then go back and compute the first sets of every nonterminal.
** The first set is the set of all terminal symbols which can begin
** a string generated by that nonterminal.
*/
Feb 17, 2010
Feb 17, 2010
695
void FindFirstSets(struct lemon *lemp)
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
{
int i, j;
struct rule *rp;
int progress;
for(i=0; i<lemp->nsymbol; i++){
lemp->symbols[i]->lambda = LEMON_FALSE;
}
for(i=lemp->nterminal; i<lemp->nsymbol; i++){
lemp->symbols[i]->firstset = SetNew();
}
/* First compute all lambdas */
do{
progress = 0;
for(rp=lemp->rule; rp; rp=rp->next){
if( rp->lhs->lambda ) continue;
for(i=0; i<rp->nrhs; i++){
struct symbol *sp = rp->rhs[i];
if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break;
}
if( i==rp->nrhs ){
rp->lhs->lambda = LEMON_TRUE;
progress = 1;
}
}
}while( progress );
/* Now compute all first sets */
do{
struct symbol *s1, *s2;
progress = 0;
for(rp=lemp->rule; rp; rp=rp->next){
s1 = rp->lhs;
for(i=0; i<rp->nrhs; i++){
s2 = rp->rhs[i];
if( s2->type==TERMINAL ){
progress += SetAdd(s1->firstset,s2->index);
break;
}else if( s2->type==MULTITERMINAL ){
for(j=0; j<s2->nsubsym; j++){
progress += SetAdd(s1->firstset,s2->subsym[j]->index);
}
break;
}else if( s1==s2 ){
if( s1->lambda==LEMON_FALSE ) break;
}else{
progress += SetUnion(s1->firstset,s2->firstset);
if( s2->lambda==LEMON_FALSE ) break;
}
}
}
}while( progress );
return;
}
/* Compute all LR(0) states for the grammar. Links
** are added to between some states so that the LR(1) follow sets
** can be computed later.
*/
Feb 17, 2010
Feb 17, 2010
756
757
PRIVATE struct state *getstate(struct lemon *); /* forward reference */
void FindStates(struct lemon *lemp)
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
{
struct symbol *sp;
struct rule *rp;
Configlist_init();
/* Find the start symbol */
if( lemp->start ){
sp = Symbol_find(lemp->start);
if( sp==0 ){
ErrorMsg(lemp->filename,0,
"The specified start symbol \"%s\" is not \
in a nonterminal of the grammar. \"%s\" will be used as the start \
symbol instead.",lemp->start,lemp->rule->lhs->name);
lemp->errorcnt++;
sp = lemp->rule->lhs;
}
}else{
sp = lemp->rule->lhs;
}
/* Make sure the start symbol doesn't occur on the right-hand side of
** any rule. Report an error if it does. (YACC would generate a new
** start symbol in this case.) */
for(rp=lemp->rule; rp; rp=rp->next){
int i;
for(i=0; i<rp->nrhs; i++){
if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */
ErrorMsg(lemp->filename,0,
"The start symbol \"%s\" occurs on the \
right-hand side of a rule. This will result in a parser which \
does not work properly.",sp->name);
lemp->errorcnt++;
}
}
}
/* The basis configuration set for the first state
** is all rules which have the start symbol as their
** left-hand side */
for(rp=sp->rule; rp; rp=rp->nextlhs){
struct config *newcfp;
rp->lhsStart = 1;
newcfp = Configlist_addbasis(rp,0);
SetAdd(newcfp->fws,0);
}
/* Compute the first state. All other states will be
** computed automatically during the computation of the first one.
** The returned pointer to the first state is not used. */
(void)getstate(lemp);
return;
}
/* Return a pointer to a state which is described by the configuration
** list which has been built from calls to Configlist_add.
*/
Feb 17, 2010
Feb 17, 2010
815
816
PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */
PRIVATE struct state *getstate(struct lemon *lemp)
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
{
struct config *cfp, *bp;
struct state *stp;
/* Extract the sorted basis of the new state. The basis was constructed
** by prior calls to "Configlist_addbasis()". */
Configlist_sortbasis();
bp = Configlist_basis();
/* Get a state with the same basis */
stp = State_find(bp);
if( stp ){
/* A state with the same basis already exists! Copy all the follow-set
** propagation links from the state under construction into the
** preexisting state, then return a pointer to the preexisting state */
struct config *x, *y;
for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
Plink_copy(&y->bplp,x->bplp);
Plink_delete(x->fplp);
x->fplp = x->bplp = 0;
}
cfp = Configlist_return();
Configlist_eat(cfp);
}else{
/* This really is a new state. Construct all the details */
Configlist_closure(lemp); /* Compute the configuration closure */
Configlist_sort(); /* Sort the configuration closure */
cfp = Configlist_return(); /* Get a pointer to the config list */
stp = State_new(); /* A new state structure */
MemoryCheck(stp);
stp->bp = bp; /* Remember the configuration basis */
stp->cfp = cfp; /* Remember the configuration closure */
stp->statenum = lemp->nstate++; /* Every state gets a sequence number */
stp->ap = 0; /* No actions, yet. */
State_insert(stp,stp->bp); /* Add to the state table */
buildshifts(lemp,stp); /* Recursively compute successor states */
}
return stp;
}
/*
** Return true if two symbols are the same.
*/
Feb 17, 2010
Feb 17, 2010
860
int same_symbol(struct symbol *a, struct symbol *b)
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
{
int i;
if( a==b ) return 1;
if( a->type!=MULTITERMINAL ) return 0;
if( b->type!=MULTITERMINAL ) return 0;
if( a->nsubsym!=b->nsubsym ) return 0;
for(i=0; i<a->nsubsym; i++){
if( a->subsym[i]!=b->subsym[i] ) return 0;
}
return 1;
}
/* Construct all successor states to the given state. A "successor"
** state is any state which can be reached by a shift action.
*/
Feb 17, 2010
Feb 17, 2010
876
PRIVATE void buildshifts(struct lemon *lemp, struct state *stp)
877
878
879
{
struct config *cfp; /* For looping thru the config closure of "stp" */
struct config *bcfp; /* For the inner loop on config closure of "stp" */
Feb 17, 2010
Feb 17, 2010
880
struct config *newcfg; /* */
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
struct symbol *sp; /* Symbol following the dot in configuration "cfp" */
struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */
struct state *newstp; /* A pointer to a successor state */
/* Each configuration becomes complete after it contibutes to a successor
** state. Initially, all configurations are incomplete */
for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
/* Loop through all configurations of the state "stp" */
for(cfp=stp->cfp; cfp; cfp=cfp->next){
if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */
if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */
Configlist_reset(); /* Reset the new config set */
sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */
/* For every configuration in the state "stp" which has the symbol "sp"
** following its dot, add the same configuration to the basis set under
** construction but with the dot shifted one symbol to the right. */
for(bcfp=cfp; bcfp; bcfp=bcfp->next){
if( bcfp->status==COMPLETE ) continue; /* Already used */
if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */
if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */
bcfp->status = COMPLETE; /* Mark this config as used */
Feb 17, 2010
Feb 17, 2010
905
906
newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
Plink_add(&newcfg->bplp,bcfp);
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
}
/* Get a pointer to the state described by the basis configuration set
** constructed in the preceding loop */
newstp = getstate(lemp);
/* The state "newstp" is reached from the state "stp" by a shift action
** on the symbol "sp" */
if( sp->type==MULTITERMINAL ){
int i;
for(i=0; i<sp->nsubsym; i++){
Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp);
}
}else{
Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
}
}
}
/*
** Construct the propagation links
*/
Feb 17, 2010
Feb 17, 2010
929
void FindLinks(struct lemon *lemp)
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
{
int i;
struct config *cfp, *other;
struct state *stp;
struct plink *plp;
/* Housekeeping detail:
** Add to every propagate link a pointer back to the state to
** which the link is attached. */
for(i=0; i<lemp->nstate; i++){
stp = lemp->sorted[i];
for(cfp=stp->cfp; cfp; cfp=cfp->next){
cfp->stp = stp;
}
}
/* Convert all backlinks into forward links. Only the forward
** links are used in the follow-set computation. */
for(i=0; i<lemp->nstate; i++){
stp = lemp->sorted[i];
for(cfp=stp->cfp; cfp; cfp=cfp->next){
for(plp=cfp->bplp; plp; plp=plp->next){
other = plp->cfp;
Plink_add(&other->fplp,cfp);
}
}
}
}
/* Compute all followsets.
**
** A followset is the set of all symbols which can come immediately
** after a configuration.
*/
Feb 17, 2010
Feb 17, 2010
964
void FindFollowSets(struct lemon *lemp)
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
{
int i;
struct config *cfp;
struct plink *plp;
int progress;
int change;
for(i=0; i<lemp->nstate; i++){
for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
cfp->status = INCOMPLETE;
}
}
do{
progress = 0;
for(i=0; i<lemp->nstate; i++){
for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
if( cfp->status==COMPLETE ) continue;
for(plp=cfp->fplp; plp; plp=plp->next){
change = SetUnion(plp->cfp->fws,cfp->fws);
if( change ){
plp->cfp->status = INCOMPLETE;
progress = 1;
}
}
cfp->status = COMPLETE;
}
}
}while( progress );
}
Feb 17, 2010
Feb 17, 2010
996
static int resolve_conflict(struct action *,struct action *, struct symbol *);
997
998
999
/* Compute the reduce actions, and resolve conflicts.
*/
Feb 17, 2010
Feb 17, 2010
1000
void FindActions(struct lemon *lemp)