From a8e24641c06590712b44324b2c198be92704e134 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Tue, 17 Feb 2009 08:40:03 -0500 Subject: [PATCH] Enormous amount of tapdancing to handle EOI better in the lexer. --- mojoshader_internal.h | 1 + mojoshader_lexer.re | 63 +++++++++++++++++++++++++++++---------- mojoshader_preprocessor.c | 1 + 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/mojoshader_internal.h b/mojoshader_internal.h index 6a7cb8c9..8c6465c0 100644 --- a/mojoshader_internal.h +++ b/mojoshader_internal.h @@ -395,6 +395,7 @@ typedef struct IncludeState const char *token; const unsigned char *lexer_marker; int report_whitespace; + unsigned int orig_length; unsigned int bytes_left; unsigned int line; Conditional *conditional_stack; diff --git a/mojoshader_lexer.re b/mojoshader_lexer.re index fb678397..f17c0bc6 100644 --- a/mojoshader_lexer.re +++ b/mojoshader_lexer.re @@ -24,18 +24,34 @@ typedef unsigned char uchar; -#define RET(t) do { update_state(s, cursor, token); return t; } while (0) +/*!max:re2c */ +#define RET(t) do { update_state(s, eoi, cursor, token); return t; } while (0) #define YYCTYPE uchar #define YYCURSOR cursor #define YYLIMIT limit #define YYMARKER s->lexer_marker -#define YYFILL(n) { if ((n) == 1) { RET(TOKEN_EOI); } } +#define YYFILL(n) { if ((n) == 1) { cursor = sentinel; limit = cursor + YYMAXFILL; eoi = 1; } } -static void update_state(IncludeState *s, const uchar *cur, const uchar *tok) +static uchar sentinel[YYMAXFILL]; + +static void update_state(IncludeState *s, int eoi, + const uchar *cur, const uchar *tok) { - s->bytes_left -= (unsigned int) (cur - ((const uchar *) s->source)); - s->source = (const char *) cur; - s->token = (const char *) tok; + if (eoi) + { + s->bytes_left = 0; + s->source = (const char *) s->source_base + s->orig_length; + if ( (tok >= sentinel) && (tok < (sentinel+YYMAXFILL)) ) + s->token = s->source; + else + s->token = (const char *) tok; + } // if + else + { + s->bytes_left -= (unsigned int) (cur - ((const uchar *) s->source)); + s->source = (const char *) cur; + s->token = (const char *) tok; + } // else } // update_state Token preprocessor_internal_lexer(IncludeState *s) @@ -44,14 +60,13 @@ Token preprocessor_internal_lexer(IncludeState *s) const uchar *token; const uchar *matchptr; const uchar *limit = cursor + s->bytes_left; + int eoi = 0; int saw_newline = 0; scanner_loop: + if (YYLIMIT == YYCURSOR) YYFILL(1); token = cursor; - if (YYLIMIT == YYCURSOR) - RET(TOKEN_EOI); - /*!re2c ANY = [\000-\377]; ANYLEGAL = [a-zA-Z0-9_/'*=+%^&|!#<>()[{}.,~^:;? \t\v\f\r\n\-\]\\]; @@ -132,6 +147,8 @@ scanner_loop: "=" { RET('='); } "?" { RET('?'); } + "\000" { if (eoi) { RET(TOKEN_EOI); } goto bad_chars; } + PP "include" { RET(TOKEN_PP_INCLUDE); } PP "line" { RET(TOKEN_PP_LINE); } PP "define" { RET(TOKEN_PP_DEFINE); } @@ -150,8 +167,7 @@ scanner_loop: */ multilinecomment: - if (YYLIMIT == YYCURSOR) - RET(TOKEN_INCOMPLETE_COMMENT); + if (YYLIMIT == YYCURSOR) YYFILL(1); matchptr = cursor; // The "*\/" is just to avoid screwing up text editor syntax highlighting. /*!re2c @@ -168,24 +184,39 @@ multilinecomment: saw_newline = 1; goto multilinecomment; } + "\000" { + if (eoi) + RET(TOKEN_INCOMPLETE_COMMENT); + goto multilinecomment; + } ANY { goto multilinecomment; } */ singlelinecomment: - if (YYLIMIT == YYCURSOR) - RET(TOKEN_EOI); + if (YYLIMIT == YYCURSOR) YYFILL(1); matchptr = cursor; /*!re2c NEWLINE { s->line++; token = matchptr; RET('\n'); } + "\000" { if (eoi) { RET(TOKEN_EOI); } goto singlelinecomment; } ANY { goto singlelinecomment; } */ bad_chars: - if (YYLIMIT == YYCURSOR) - RET(TOKEN_BAD_CHARS); - + if (YYLIMIT == YYCURSOR) YYFILL(1); /*!re2c ANYLEGAL { cursor--; RET(TOKEN_BAD_CHARS); } + "\000" { + if (eoi) + { + assert( !((token >= sentinel) && + (token < sentinel+YYMAXFILL)) ); + eoi = 0; + cursor = (uchar *) s->source_base + s->orig_length; + RET(TOKEN_BAD_CHARS); // next call will be EOI. + } + goto bad_chars; + } + ANY { goto bad_chars; } */ diff --git a/mojoshader_preprocessor.c b/mojoshader_preprocessor.c index 52b458ec..bab74543 100644 --- a/mojoshader_preprocessor.c +++ b/mojoshader_preprocessor.c @@ -564,6 +564,7 @@ static int push_source(Context *ctx, const char *fname, const char *source, state->source_base = source; state->source = source; state->token = source; + state->orig_length = srclen; state->bytes_left = srclen; state->line = linenum; state->next = ctx->include_stack;