Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Some initial effort for a parser/lexer/preprocessor. Not even close t…
…o done! Took some of MojoShader as a template for this work.
- Loading branch information
0 parents
commit f6cff25
Showing
4 changed files
with
974 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
You have no license to use this code at this time. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
/** | ||
* MojoBASIC; a modern reimplementation of QuickBASIC. | ||
* | ||
* Please see the file LICENSE.txt in the source's root directory. | ||
* | ||
* This file written by Ryan C. Gordon. | ||
*/ | ||
|
||
// This started as a copy/paste from MojoShader, which was written by Ryan and | ||
// is also under the zlib license. https://icculus.org/mojoshader/ | ||
|
||
// This was originally based on examples/pp-c.re from re2c: http://re2c.org/ | ||
// re2c is public domain code. | ||
// | ||
// You build mojobasic_lexer.cpp from the .re file with re2c... | ||
// re2c -is -o mojobasic_lexer.cpp mojobasic_lexer.re | ||
// | ||
// Changes to the lexer are done to the .re file, not the C++ code! | ||
|
||
#define __MOJOBASIC_INTERNAL__ 1 | ||
#include "mojobasic_internal.h" | ||
|
||
typedef unsigned char uchar; | ||
|
||
/*!max:re2c */ | ||
#define RET(t) return update_state(s, eoi, cursor, token, (Token) t) | ||
#define YYCTYPE uchar | ||
#define YYCURSOR cursor | ||
#define YYLIMIT limit | ||
#define YYMARKER s->lexer_marker | ||
#define YYFILL(n) { if ((n) == 1) { cursor = sentinel; limit = cursor + YYMAXFILL; eoi = 1; } } | ||
|
||
static uchar sentinel[YYMAXFILL]; | ||
|
||
static Token update_state(IncludeState *s, int eoi, const uchar *cur, | ||
const uchar *tok, const Token val) | ||
{ | ||
if (eoi) | ||
{ | ||
s->bytes_left = 0; | ||
s->source = (const char *) s->source_base + s->orig_length; | ||
if ( (tok >= sentinel) && (tok < (sentinel+YYMAXFILL)) ) | ||
s->token = s->source; | ||
else | ||
s->token = (const char *) tok; | ||
} // if | ||
else | ||
{ | ||
s->bytes_left -= (unsigned int) (cur - ((const uchar *) s->source)); | ||
s->source = (const char *) cur; | ||
s->token = (const char *) tok; | ||
} // else | ||
s->tokenlen = (unsigned int) (s->source - s->token); | ||
s->tokenval = val; | ||
return val; | ||
} // update_state | ||
|
||
Token preprocessor_lexer(IncludeState *s) | ||
{ | ||
const uchar *cursor = (const uchar *) s->source; | ||
const uchar *token = cursor; | ||
const uchar *matchptr; | ||
const uchar *limit = cursor + s->bytes_left; | ||
int eoi = 0; | ||
|
||
/*!re2c | ||
ANY = [\000-\377]; | ||
ANYLEGAL = [a-zA-Z0-9_/'*=+%^&|!#<>()[{}.,~^:;? \t\v\f\r\n\-\]\\]; | ||
O = [0-7]; | ||
D = [0-9]; | ||
L = [a-zA-Z_]; | ||
H = [a-fA-F0-9]; | ||
E = [Ee] [+-]? D+; | ||
FS = [fFhH]; | ||
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); | ||
NEWLINE = ("\r\n" | "\r" | "\n"); | ||
WHITESPACE = [ \t\v\f]+; | ||
SUFFIX = [$!@#$%&]; | ||
*/ | ||
|
||
scanner_loop: | ||
if (YYLIMIT == YYCURSOR) YYFILL(1); | ||
token = cursor; | ||
|
||
/*!re2c | ||
"'" { goto singlelinecomment; } | ||
"REM" NEWLINE { s->line++; RET('\n'); } | ||
"REM" WHITESPACE { goto singlelinecomment; } | ||
L (L|D)* SUFFIX? { RET(TOKEN_IDENTIFIER); } | ||
("&H" H+) | ("&O" O+) | (D+) { RET(TOKEN_INT_LITERAL); } | ||
(D+ E) | (D* "." D+ E?) | (D+ "." D* E?) { RET(TOKEN_FLOAT_LITERAL); } | ||
(["] ANY* ["]) { RET(TOKEN_STRING_LITERAL); } | ||
"<=" { RET(TOKEN_LEQ); } | ||
">=" { RET(TOKEN_GEQ); } | ||
"<>" { RET(TOKEN_NEQ); } | ||
"(" { RET('('); } | ||
")" { RET(')'); } | ||
"." { RET('.'); } | ||
"," { RET(','); } | ||
"-" { RET('-'); } | ||
"+" { RET('+'); } | ||
"*" { RET('*'); } | ||
"/" { RET('/'); } | ||
"<" { RET('<'); } | ||
">" { RET('>'); } | ||
":" { RET(':'); } | ||
"=" { RET('='); } | ||
";" { RET(';'); } | ||
"\000" { if (eoi) { RET(TOKEN_EOI); } goto bad_chars; } | ||
WHITESPACE { if (s->report_whitespace) RET(' '); goto scanner_loop; } | ||
NEWLINE { s->line++; RET('\n'); } | ||
ANY { goto bad_chars; } | ||
*/ | ||
|
||
singlelinecomment: | ||
if (YYLIMIT == YYCURSOR) YYFILL(1); | ||
matchptr = cursor; | ||
|
||
/*!re2c | ||
WHITESPACE { goto singlelinecomment; } | ||
"$" { RET(TOKEN_METACOMMAND); } | ||
ANY { cursor = matchptr; goto singlelinecomment_loop; } | ||
*/ | ||
|
||
singlelinecomment_loop: | ||
if (YYLIMIT == YYCURSOR) YYFILL(1); | ||
matchptr = cursor; | ||
|
||
/*!re2c | ||
NEWLINE { | ||
s->line++; | ||
if (s->report_comments) | ||
{ | ||
cursor = matchptr; // so we RET('\n') next. | ||
RET(TOKEN_SINGLE_COMMENT); | ||
} | ||
token = matchptr; | ||
RET('\n'); | ||
} | ||
"\000" { | ||
if (eoi) | ||
{ | ||
if (s->report_comments) | ||
RET(TOKEN_SINGLE_COMMENT); | ||
else | ||
RET(TOKEN_EOI); | ||
} | ||
goto singlelinecomment_loop; | ||
} | ||
ANY { goto singlelinecomment_loop; } | ||
*/ | ||
|
||
bad_chars: | ||
if (YYLIMIT == YYCURSOR) YYFILL(1); | ||
/*!re2c | ||
ANYLEGAL { cursor--; RET(TOKEN_BAD_CHARS); } | ||
"\000" { | ||
if (eoi) | ||
{ | ||
assert( !((token >= sentinel) && | ||
(token < sentinel+YYMAXFILL)) ); | ||
eoi = 0; | ||
cursor = (uchar *) s->source_base + s->orig_length; | ||
RET(TOKEN_BAD_CHARS); // next call will be EOI. | ||
} | ||
goto bad_chars; | ||
} | ||
ANY { goto bad_chars; } | ||
*/ | ||
|
||
assert(0 && "Shouldn't hit this code"); | ||
RET(TOKEN_UNKNOWN); | ||
} // preprocessor_lexer | ||
|
||
// end of mojobasic_lexer.re (or .cpp) ... | ||
|
Oops, something went wrong.