From 67966432d48c3e905be9d6e72d7e9e53a16b4557 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Wed, 11 Feb 2009 21:28:48 -0500 Subject: [PATCH] Reworked preprocessor newline management. My thought about the semicolon and assembly comments was a brain fart, newlines will have to go through to the assembler, and we'll just mirror Direct3D's preprocessor output behaviour in MOJOSHADER_preprocess() instead. This gets rid of the nasty insert_token nonsense, as an added benefit. --- mojoshader_internal.h | 3 - mojoshader_lexer.re | 20 +++++-- mojoshader_preprocessor.c | 112 +++++++++++++++++++------------------- 3 files changed, 70 insertions(+), 65 deletions(-) diff --git a/mojoshader_internal.h b/mojoshader_internal.h index 7631e0fc..31e7439e 100644 --- a/mojoshader_internal.h +++ b/mojoshader_internal.h @@ -320,9 +320,6 @@ typedef struct IncludeState { char *filename; int included; - Token insert_token; - Token insert_token2; - char insert_tokchar; const char *source_base; const char *source; const char *token; diff --git a/mojoshader_lexer.re b/mojoshader_lexer.re index 9fa1e2a2..8f78f2d8 100644 --- a/mojoshader_lexer.re +++ b/mojoshader_lexer.re @@ -24,7 +24,7 @@ typedef unsigned char uchar; -#define RET(t) { update_state(s, cursor, token); return t; } +#define RET(t) do { update_state(s, cursor, token); return t; } while (0) #define YYCTYPE uchar #define YYCURSOR cursor #define YYLIMIT limit @@ -43,6 +43,7 @@ Token preprocessor_internal_lexer(IncludeState *s) const uchar *cursor = (const uchar *) s->source; const uchar *token; const uchar *limit = cursor + s->bytes_left; + int saw_newline = 0; scanner_loop: token = cursor; @@ -131,7 +132,7 @@ scanner_loop: PP "error" { RET(TOKEN_PP_ERROR); } WHITESPACE { goto scanner_loop; } - NEWLINE { s->line++; goto scanner_loop; } + NEWLINE { s->line++; RET('\n'); } any { printf("bad char\n"); goto scanner_loop; } */ @@ -140,8 +141,17 @@ multilinecomment: RET(TOKEN_PP_INCOMPLETE_COMMENT); // The "*\/" is just to avoid screwing up text editor syntax highlighting. /*!re2c - "*\/" { goto scanner_loop; } - NEWLINE { s->line++; goto multilinecomment; } + "*\/" { + if (saw_newline) + RET('\n'); + goto scanner_loop; + } + NEWLINE { + s->line++; + token = cursor-1; + saw_newline = 1; + goto multilinecomment; + } any { goto multilinecomment; } */ @@ -149,7 +159,7 @@ singlelinecomment: if (YYLIMIT == YYCURSOR) RET(TOKEN_EOI); /*!re2c - NEWLINE { s->line++; goto scanner_loop; } + NEWLINE { s->line++; token = cursor-1; RET('\n'); } any { goto singlelinecomment; } */ diff --git a/mojoshader_preprocessor.c b/mojoshader_preprocessor.c index 1a697509..6687d98e 100644 --- a/mojoshader_preprocessor.c +++ b/mojoshader_preprocessor.c @@ -214,8 +214,6 @@ static int push_source(Context *ctx, const char *fname, const char *source, state->source_base = source; state->source = source; state->token = source; - state->insert_token = TOKEN_UNKNOWN; - state->insert_token2 = TOKEN_UNKNOWN; state->bytes_left = srclen; state->line = 1; state->next = ctx->include_stack; @@ -344,24 +342,6 @@ static inline const char *_preprocessor_nexttoken(Preprocessor *_ctx, return NULL; // we're done! } // if - if (state->insert_token != TOKEN_UNKNOWN) - { - state->insert_tokchar = (char) state->insert_token; - *_token = state->insert_token; - *_len = 1; - state->insert_token = TOKEN_UNKNOWN; - return &state->insert_tokchar; - } // if - - else if (state->insert_token2 != TOKEN_UNKNOWN) - { - state->insert_tokchar = (char) state->insert_token2; - *_token = state->insert_token2; - *_len = 1; - state->insert_token2 = TOKEN_UNKNOWN; - return &state->insert_tokchar; - } // if - Token token = preprocessor_internal_lexer(state); if (token == TOKEN_EOI) { @@ -370,23 +350,6 @@ static inline const char *_preprocessor_nexttoken(Preprocessor *_ctx, continue; // pick up again after parent's #include line. } // if - // Microsoft's preprocessor is weird. - // It ignores newlines, and then inserts its own around certain - // tokens. For example, after a semicolon. This allows HLSL code to - // be mostly readable, and lets the ';' work as a single line comment - // in the assembler. - if ( (token == ((Token) ';')) || (token == ((Token) '}')) ) - state->insert_token = (Token) '\n'; - else if (token == ((Token) '{')) - { - state->insert_token = (Token) '{'; - state->insert_token2 = (Token) '\n'; - state->insert_tokchar = '\n'; - *_token = (Token) '\n'; - *_len = 1; - return &state->insert_tokchar; - } // else if - *_token = token; *_len = (unsigned int) (state->source - state->token); return state->token; @@ -642,6 +605,12 @@ const MOJOSHADER_preprocessData *MOJOSHADER_preprocess(const char *source, MOJOSHADER_includeClose include_close, MOJOSHADER_malloc m, MOJOSHADER_free f, void *d) { + #ifdef _WINDOWS + static const char endline[] = { '\r', '\n' }; + #else + static const char endline[] = { '\n' }; + #endif + ErrorList *errors = NULL; int error_count = 0; @@ -669,34 +638,61 @@ include_close = (MOJOSHADER_includeClose) 0x1; int nl = 1; int indent = 0; unsigned int len = 0; + int out_of_memory = 0; while ((tokstr = preprocessor_nexttoken(pp, &len, &token)) != NULL) { - #ifdef _WINDOWS - static const char endline[] = { '\r', '\n' }; - #else - static const char endline[] = { '\n' }; - #endif - - const int isnewline = (token == ((Token) '\n')); - if (isnewline) + int isnewline = 0; + + assert(token != TOKEN_EOI); + + if (!out_of_memory) + out_of_memory = preprocessor_outofmemory(pp); + + // Microsoft's preprocessor is weird. + // It ignores newlines, and then inserts its own around certain + // tokens. For example, after a semicolon. This allows HLSL code to + // be mostly readable, instead of a stream of tokens. + if (token == ((Token) '\n')) + ; // ignore. + + else if ( (token == ((Token) '}')) || (token == ((Token) ';')) ) { - tokstr = endline; // convert to platform-specific. - len = sizeof (endline); - } // if + if (!out_of_memory) + { + if ( (token == ((Token) '}')) && (indent > 0) ) + indent--; - if ((token == ((Token) '}')) && (indent > 0)) - indent--; + out_of_memory = + (!indent_buffer(&buffer, indent, nl, m, d)) || + (!add_to_buffer(&buffer, tokstr, len, m, d)) || + (!add_to_buffer(&buffer, endline, sizeof (endline), m, d)); - int out_of_memory = preprocessor_outofmemory(pp); + isnewline = 1; + } // if + } // if - if ((!out_of_memory) && (!isnewline)) - out_of_memory = !indent_buffer(&buffer, indent, nl, m, d); + else if (token == ((Token) '{')) + { + if (!out_of_memory) + { + out_of_memory = + (!add_to_buffer(&buffer,endline,sizeof (endline),m,d)) || + (!add_to_buffer(&buffer, "{", 1, m, d)) || + (!add_to_buffer(&buffer,endline,sizeof (endline),m,d)); + indent++; + isnewline = 1; + } // if + } // else if - if (!out_of_memory) - out_of_memory = !add_to_buffer(&buffer, tokstr, len, m, d); + else + { + if (!out_of_memory) + { + out_of_memory = (!indent_buffer(&buffer, indent, nl, m, d)) || + (!add_to_buffer(&buffer, tokstr, len, m, d)); - if (token == ((Token) '{')) - indent++; + } // if + } // else nl = isnewline; @@ -760,6 +756,8 @@ include_close = (MOJOSHADER_includeClose) 0x1; } // if } // while + assert((token == TOKEN_EOI) || (out_of_memory)); + preprocessor_end(pp); const size_t total_bytes = buffer.total_bytes;