From 67966432d48c3e905be9d6e72d7e9e53a16b4557 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 11 Feb 2009 21:28:48 -0500
Subject: [PATCH] Reworked preprocessor newline management.

My thought about the semicolon and assembly comments was a brain fart,
 newlines will have to go through to the assembler, and we'll just mirror
 Direct3D's preprocessor output behaviour in MOJOSHADER_preprocess() instead.

This gets rid of the nasty insert_token nonsense, as an added benefit.
---
 mojoshader_internal.h     |   3 -
 mojoshader_lexer.re       |  20 +++++--
 mojoshader_preprocessor.c | 112 +++++++++++++++++++-------------------
 3 files changed, 70 insertions(+), 65 deletions(-)

diff --git a/mojoshader_internal.h b/mojoshader_internal.h
index 7631e0fc..31e7439e 100644
--- a/mojoshader_internal.h
+++ b/mojoshader_internal.h
@@ -320,9 +320,6 @@ typedef struct IncludeState
 {
     char *filename;
     int included;
-    Token insert_token;
-    Token insert_token2;
-    char insert_tokchar;
     const char *source_base;
     const char *source;
     const char *token;
diff --git a/mojoshader_lexer.re b/mojoshader_lexer.re
index 9fa1e2a2..8f78f2d8 100644
--- a/mojoshader_lexer.re
+++ b/mojoshader_lexer.re
@@ -24,7 +24,7 @@
 
 typedef unsigned char uchar;
 
-#define RET(t) { update_state(s, cursor, token); return t; }
+#define RET(t) do { update_state(s, cursor, token); return t; } while (0)
 #define YYCTYPE uchar
 #define YYCURSOR cursor
 #define YYLIMIT limit
@@ -43,6 +43,7 @@ Token preprocessor_internal_lexer(IncludeState *s)
     const uchar *cursor = (const uchar *) s->source;
     const uchar *token;
     const uchar *limit = cursor + s->bytes_left;
+    int saw_newline = 0;
 
 scanner_loop:
     token = cursor;
@@ -131,7 +132,7 @@ scanner_loop:
     PP "error"      { RET(TOKEN_PP_ERROR); }
 
     WHITESPACE      { goto scanner_loop; }
-    NEWLINE         { s->line++; goto scanner_loop; }
+    NEWLINE         { s->line++; RET('\n'); }
     any             { printf("bad char\n"); goto scanner_loop; }
 */
 
@@ -140,8 +141,17 @@ multilinecomment:
         RET(TOKEN_PP_INCOMPLETE_COMMENT);
 // The "*\/" is just to avoid screwing up text editor syntax highlighting.
 /*!re2c
-    "*\/"           { goto scanner_loop; }
-    NEWLINE         { s->line++; goto multilinecomment; }
+    "*\/"           {
+                        if (saw_newline)
+                            RET('\n');
+                        goto scanner_loop;
+                    }
+    NEWLINE         {
+                        s->line++;
+                        token = cursor-1;
+                        saw_newline = 1;
+                        goto multilinecomment;
+                    }
     any             { goto multilinecomment; }
 */
 
@@ -149,7 +159,7 @@ singlelinecomment:
     if (YYLIMIT == YYCURSOR)
         RET(TOKEN_EOI);
 /*!re2c
-    NEWLINE         { s->line++; goto scanner_loop; }
+    NEWLINE         { s->line++; token = cursor-1; RET('\n'); }
     any             { goto singlelinecomment; }
 */
 
diff --git a/mojoshader_preprocessor.c b/mojoshader_preprocessor.c
index 1a697509..6687d98e 100644
--- a/mojoshader_preprocessor.c
+++ b/mojoshader_preprocessor.c
@@ -214,8 +214,6 @@ static int push_source(Context *ctx, const char *fname, const char *source,
     state->source_base = source;
     state->source = source;
     state->token = source;
-    state->insert_token = TOKEN_UNKNOWN;
-    state->insert_token2 = TOKEN_UNKNOWN;
     state->bytes_left = srclen;
     state->line = 1;
     state->next = ctx->include_stack;
@@ -344,24 +342,6 @@ static inline const char *_preprocessor_nexttoken(Preprocessor *_ctx,
             return NULL;  // we're done!
         } // if
 
-        if (state->insert_token != TOKEN_UNKNOWN)
-        {
-            state->insert_tokchar = (char) state->insert_token;
-            *_token = state->insert_token;
-            *_len = 1;
-            state->insert_token = TOKEN_UNKNOWN;
-            return &state->insert_tokchar;
-        } // if
-
-        else if (state->insert_token2 != TOKEN_UNKNOWN)
-        {
-            state->insert_tokchar = (char) state->insert_token2;
-            *_token = state->insert_token2;
-            *_len = 1;
-            state->insert_token2 = TOKEN_UNKNOWN;
-            return &state->insert_tokchar;
-        } // if
-
         Token token = preprocessor_internal_lexer(state);
         if (token == TOKEN_EOI)
         {
@@ -370,23 +350,6 @@ static inline const char *_preprocessor_nexttoken(Preprocessor *_ctx,
             continue;  // pick up again after parent's #include line.
         } // if
 
-        // Microsoft's preprocessor is weird.
-        // It ignores newlines, and then inserts its own around certain
-        //  tokens. For example, after a semicolon. This allows HLSL code to
-        //  be mostly readable, and lets the ';' work as a single line comment
-        //  in the assembler.
-        if ( (token == ((Token) ';')) || (token == ((Token) '}')) )
-            state->insert_token = (Token) '\n';
-        else if (token == ((Token) '{'))
-        {
-            state->insert_token = (Token) '{';
-            state->insert_token2 = (Token) '\n';
-            state->insert_tokchar = '\n';
-            *_token = (Token) '\n';
-            *_len = 1;
-            return &state->insert_tokchar;
-        } // else if
-
         *_token = token;
         *_len = (unsigned int) (state->source - state->token);
         return state->token;
@@ -642,6 +605,12 @@ const MOJOSHADER_preprocessData *MOJOSHADER_preprocess(const char *source,
                              MOJOSHADER_includeClose include_close,
                              MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
 {
+    #ifdef _WINDOWS
+    static const char endline[] = { '\r', '\n' };
+    #else
+    static const char endline[] = { '\n' };
+    #endif
+
     ErrorList *errors = NULL;
     int error_count = 0;
 
@@ -669,34 +638,61 @@ include_close = (MOJOSHADER_includeClose) 0x1;
     int nl = 1;
     int indent = 0;
     unsigned int len = 0;
+    int out_of_memory = 0;
     while ((tokstr = preprocessor_nexttoken(pp, &len, &token)) != NULL)
     {
-        #ifdef _WINDOWS
-        static const char endline[] = { '\r', '\n' };
-        #else
-        static const char endline[] = { '\n' };
-        #endif
-
-        const int isnewline = (token == ((Token) '\n'));
-        if (isnewline)
+        int isnewline = 0;
+
+        assert(token != TOKEN_EOI);
+
+        if (!out_of_memory)
+            out_of_memory = preprocessor_outofmemory(pp);
+
+        // Microsoft's preprocessor is weird.
+        // It ignores newlines, and then inserts its own around certain
+        //  tokens. For example, after a semicolon. This allows HLSL code to
+        //  be mostly readable, instead of a stream of tokens.
+        if (token == ((Token) '\n'))
+            ; // ignore.
+
+        else if ( (token == ((Token) '}')) || (token == ((Token) ';')) )
         {
-            tokstr = endline;  // convert to platform-specific.
-            len = sizeof (endline);
-        } // if
+            if (!out_of_memory)
+            {
+                if ( (token == ((Token) '}')) && (indent > 0) )
+                    indent--;
 
-        if ((token == ((Token) '}')) && (indent > 0))
-            indent--;
+                out_of_memory =
+                    (!indent_buffer(&buffer, indent, nl, m, d)) ||
+                    (!add_to_buffer(&buffer, tokstr, len, m, d)) ||
+                    (!add_to_buffer(&buffer, endline, sizeof (endline), m, d));
 
-        int out_of_memory = preprocessor_outofmemory(pp);
+                isnewline = 1;
+            } // if
+        } // if
 
-        if ((!out_of_memory) && (!isnewline))
-            out_of_memory = !indent_buffer(&buffer, indent, nl, m, d);
+        else if (token == ((Token) '{'))
+        {
+            if (!out_of_memory)
+            {
+                out_of_memory =
+                    (!add_to_buffer(&buffer,endline,sizeof (endline),m,d)) ||
+                    (!add_to_buffer(&buffer, "{", 1, m, d)) ||
+                    (!add_to_buffer(&buffer,endline,sizeof (endline),m,d));
+                indent++;
+                isnewline = 1;
+            } // if
+        } // else if
 
-        if (!out_of_memory)
-            out_of_memory = !add_to_buffer(&buffer, tokstr, len, m, d);
+        else
+        {
+            if (!out_of_memory)
+            {
+                out_of_memory = (!indent_buffer(&buffer, indent, nl, m, d)) ||
+                                (!add_to_buffer(&buffer, tokstr, len, m, d));
 
-        if (token == ((Token) '{'))
-            indent++;
+            } // if
+        } // else
 
         nl = isnewline;
 
@@ -760,6 +756,8 @@ include_close = (MOJOSHADER_includeClose) 0x1;
         } // if
     } // while
     
+    assert((token == TOKEN_EOI) || (out_of_memory));
+
     preprocessor_end(pp);
 
     const size_t total_bytes = buffer.total_bytes;