Reworked preprocessor newline management.
My thought about the semicolon and assembly comments was a brain fart,
newlines will have to go through to the assembler, and we'll just mirror
Direct3D's preprocessor output behaviour in MOJOSHADER_preprocess() instead.
This gets rid of the nasty insert_token nonsense, as an added benefit.
--- a/mojoshader_internal.h Wed Feb 11 16:49:00 2009 -0500
+++ b/mojoshader_internal.h Wed Feb 11 21:28:48 2009 -0500
@@ -320,9 +320,6 @@
{
char *filename;
int included;
- Token insert_token;
- Token insert_token2;
- char insert_tokchar;
const char *source_base;
const char *source;
const char *token;
--- a/mojoshader_lexer.re Wed Feb 11 16:49:00 2009 -0500
+++ b/mojoshader_lexer.re Wed Feb 11 21:28:48 2009 -0500
@@ -24,7 +24,7 @@
typedef unsigned char uchar;
-#define RET(t) { update_state(s, cursor, token); return t; }
+#define RET(t) do { update_state(s, cursor, token); return t; } while (0)
#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT limit
@@ -43,6 +43,7 @@
const uchar *cursor = (const uchar *) s->source;
const uchar *token;
const uchar *limit = cursor + s->bytes_left;
+ int saw_newline = 0;
scanner_loop:
token = cursor;
@@ -131,7 +132,7 @@
PP "error" { RET(TOKEN_PP_ERROR); }
WHITESPACE { goto scanner_loop; }
- NEWLINE { s->line++; goto scanner_loop; }
+ NEWLINE { s->line++; RET('\n'); }
any { printf("bad char\n"); goto scanner_loop; }
*/
@@ -140,8 +141,17 @@
RET(TOKEN_PP_INCOMPLETE_COMMENT);
// The "*\/" is just to avoid screwing up text editor syntax highlighting.
/*!re2c
- "*\/" { goto scanner_loop; }
- NEWLINE { s->line++; goto multilinecomment; }
+ "*\/" {
+ if (saw_newline)
+ RET('\n');
+ goto scanner_loop;
+ }
+ NEWLINE {
+ s->line++;
+ token = cursor-1;
+ saw_newline = 1;
+ goto multilinecomment;
+ }
any { goto multilinecomment; }
*/
@@ -149,7 +159,7 @@
if (YYLIMIT == YYCURSOR)
RET(TOKEN_EOI);
/*!re2c
- NEWLINE { s->line++; goto scanner_loop; }
+ NEWLINE { s->line++; token = cursor-1; RET('\n'); }
any { goto singlelinecomment; }
*/
--- a/mojoshader_preprocessor.c Wed Feb 11 16:49:00 2009 -0500
+++ b/mojoshader_preprocessor.c Wed Feb 11 21:28:48 2009 -0500
@@ -214,8 +214,6 @@
state->source_base = source;
state->source = source;
state->token = source;
- state->insert_token = TOKEN_UNKNOWN;
- state->insert_token2 = TOKEN_UNKNOWN;
state->bytes_left = srclen;
state->line = 1;
state->next = ctx->include_stack;
@@ -344,24 +342,6 @@
return NULL; // we're done!
} // if
- if (state->insert_token != TOKEN_UNKNOWN)
- {
- state->insert_tokchar = (char) state->insert_token;
- *_token = state->insert_token;
- *_len = 1;
- state->insert_token = TOKEN_UNKNOWN;
- return &state->insert_tokchar;
- } // if
-
- else if (state->insert_token2 != TOKEN_UNKNOWN)
- {
- state->insert_tokchar = (char) state->insert_token2;
- *_token = state->insert_token2;
- *_len = 1;
- state->insert_token2 = TOKEN_UNKNOWN;
- return &state->insert_tokchar;
- } // if
-
Token token = preprocessor_internal_lexer(state);
if (token == TOKEN_EOI)
{
@@ -370,23 +350,6 @@
continue; // pick up again after parent's #include line.
} // if
- // Microsoft's preprocessor is weird.
- // It ignores newlines, and then inserts its own around certain
- // tokens. For example, after a semicolon. This allows HLSL code to
- // be mostly readable, and lets the ';' work as a single line comment
- // in the assembler.
- if ( (token == ((Token) ';')) || (token == ((Token) '}')) )
- state->insert_token = (Token) '\n';
- else if (token == ((Token) '{'))
- {
- state->insert_token = (Token) '{';
- state->insert_token2 = (Token) '\n';
- state->insert_tokchar = '\n';
- *_token = (Token) '\n';
- *_len = 1;
- return &state->insert_tokchar;
- } // else if
-
*_token = token;
*_len = (unsigned int) (state->source - state->token);
return state->token;
@@ -642,6 +605,12 @@
MOJOSHADER_includeClose include_close,
MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
{
+ #ifdef _WINDOWS
+ static const char endline[] = { '\r', '\n' };
+ #else
+ static const char endline[] = { '\n' };
+ #endif
+
ErrorList *errors = NULL;
int error_count = 0;
@@ -669,34 +638,61 @@
int nl = 1;
int indent = 0;
unsigned int len = 0;
+ int out_of_memory = 0;
while ((tokstr = preprocessor_nexttoken(pp, &len, &token)) != NULL)
{
- #ifdef _WINDOWS
- static const char endline[] = { '\r', '\n' };
- #else
- static const char endline[] = { '\n' };
- #endif
+ int isnewline = 0;
+
+ assert(token != TOKEN_EOI);
+
+ if (!out_of_memory)
+ out_of_memory = preprocessor_outofmemory(pp);
+
+ // Microsoft's preprocessor is weird.
+ // It ignores newlines, and then inserts its own around certain
+ // tokens. For example, after a semicolon. This allows HLSL code to
+ // be mostly readable, instead of a stream of tokens.
+ if (token == ((Token) '\n'))
+ ; // ignore.
- const int isnewline = (token == ((Token) '\n'));
- if (isnewline)
+ else if ( (token == ((Token) '}')) || (token == ((Token) ';')) )
{
- tokstr = endline; // convert to platform-specific.
- len = sizeof (endline);
+ if (!out_of_memory)
+ {
+ if ( (token == ((Token) '}')) && (indent > 0) )
+ indent--;
+
+ out_of_memory =
+ (!indent_buffer(&buffer, indent, nl, m, d)) ||
+ (!add_to_buffer(&buffer, tokstr, len, m, d)) ||
+ (!add_to_buffer(&buffer, endline, sizeof (endline), m, d));
+
+ isnewline = 1;
+ } // if
} // if
- if ((token == ((Token) '}')) && (indent > 0))
- indent--;
-
- int out_of_memory = preprocessor_outofmemory(pp);
+ else if (token == ((Token) '{'))
+ {
+ if (!out_of_memory)
+ {
+ out_of_memory =
+ (!add_to_buffer(&buffer,endline,sizeof (endline),m,d)) ||
+ (!add_to_buffer(&buffer, "{", 1, m, d)) ||
+ (!add_to_buffer(&buffer,endline,sizeof (endline),m,d));
+ indent++;
+ isnewline = 1;
+ } // if
+ } // else if
- if ((!out_of_memory) && (!isnewline))
- out_of_memory = !indent_buffer(&buffer, indent, nl, m, d);
+ else
+ {
+ if (!out_of_memory)
+ {
+ out_of_memory = (!indent_buffer(&buffer, indent, nl, m, d)) ||
+ (!add_to_buffer(&buffer, tokstr, len, m, d));
- if (!out_of_memory)
- out_of_memory = !add_to_buffer(&buffer, tokstr, len, m, d);
-
- if (token == ((Token) '{'))
- indent++;
+ } // if
+ } // else
nl = isnewline;
@@ -760,6 +756,8 @@
} // if
} // while
+ assert((token == TOKEN_EOI) || (out_of_memory));
+
preprocessor_end(pp);
const size_t total_bytes = buffer.total_bytes;