From 1737a3c575aea8477837eae506f61059e237c844 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Fri, 13 Feb 2009 01:08:50 -0500
Subject: [PATCH] Reworked and documented preprocessor tokens.

---
 mojoshader_internal.h     | 28 +++++++++++++++++++++++-----
 mojoshader_lexer.re       |  6 +++---
 mojoshader_preprocessor.c |  2 +-
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/mojoshader_internal.h b/mojoshader_internal.h
index b80d62cf..63121f0f 100644
--- a/mojoshader_internal.h
+++ b/mojoshader_internal.h
@@ -289,6 +289,10 @@ typedef struct ErrorList
 typedef enum
 {
     TOKEN_UNKNOWN = 256,  // start past ASCII character values.
+
+    // These are all C-like constructs. Tokens < 256 may be single
+    //  chars (like '+' or whatever). These are just multi-char sequences
+    //  (like "+=" or whatever).
     TOKEN_IDENTIFIER,
     TOKEN_INT_LITERAL,
     TOKEN_FLOAT_LITERAL,
@@ -314,6 +318,24 @@ typedef enum
     TOKEN_EQL,
     TOKEN_NEQ,
     TOKEN_HASHHASH,
+
+    // This is returned at the end of input...no more to process.
+    TOKEN_EOI,
+
+    // This is returned for char sequences we think are bogus. You'll have
+    //  to judge for yourself. In most cases, you'll probably just fail with
+    //  bogus syntax without explicitly checking for this token.
+    TOKEN_BAD_CHARS,
+
+    // This is returned if there's an error condition (the error is returned
+    //  as a NULL-terminated string from preprocessor_nexttoken(), instead
+    //  of actual token data). You can continue getting tokens after this
+    //  is reported. It happens for things like missing #includes, etc.
+    TOKEN_PREPROCESSING_ERROR,
+
+    TOKEN_INCOMPLETE_COMMENT,  // caught, becomes TOKEN_PREPROCESSING_ERROR
+
+    // These are all caught by the preprocessor. Caller won't ever see them.
     TOKEN_PP_INCLUDE,
     TOKEN_PP_LINE,
     TOKEN_PP_DEFINE,
@@ -324,11 +346,7 @@ typedef enum
     TOKEN_PP_ELSE,
     TOKEN_PP_ELIF,
     TOKEN_PP_ENDIF,
-    TOKEN_PP_ERROR,
-    TOKEN_PP_INCOMPLETE_COMMENT,
-    TOKEN_PP_BAD_CHARS,
-    TOKEN_EOI,
-    TOKEN_PREPROCESSING_ERROR
+    TOKEN_PP_ERROR,  // caught, becomes TOKEN_PREPROCESSING_ERROR
 } Token;
 
 
diff --git a/mojoshader_lexer.re b/mojoshader_lexer.re
index 6934e20e..ea06f22f 100644
--- a/mojoshader_lexer.re
+++ b/mojoshader_lexer.re
@@ -151,7 +151,7 @@ scanner_loop:
 
 multilinecomment:
     if (YYLIMIT == YYCURSOR)
-        RET(TOKEN_PP_INCOMPLETE_COMMENT);
+        RET(TOKEN_INCOMPLETE_COMMENT);
     matchptr = cursor;
 // The "*\/" is just to avoid screwing up text editor syntax highlighting.
 /*!re2c
@@ -180,10 +180,10 @@ singlelinecomment:
 
 bad_chars:
     if (YYLIMIT == YYCURSOR)
-        RET(TOKEN_PP_BAD_CHARS);
+        RET(TOKEN_BAD_CHARS);
 
 /*!re2c
-    ANYLEGAL        { cursor--; RET(TOKEN_PP_BAD_CHARS); }
+    ANYLEGAL        { cursor--; RET(TOKEN_BAD_CHARS); }
     ANY             { goto bad_chars; }
 */
 
diff --git a/mojoshader_preprocessor.c b/mojoshader_preprocessor.c
index e034128e..2b6fc5ba 100644
--- a/mojoshader_preprocessor.c
+++ b/mojoshader_preprocessor.c
@@ -423,7 +423,7 @@ static inline const char *_preprocessor_nexttoken(Preprocessor *_ctx,
             continue;  // pick up again after parent's #include line.
         } // if
 
-        else if (token == TOKEN_PP_INCOMPLETE_COMMENT)
+        else if (token == TOKEN_INCOMPLETE_COMMENT)
         {
             fail(ctx, "Incomplete multiline comment");
             continue;  // will return at top of loop.