mojoshader_compiler.c
branchcalculator-experiment
changeset 827 2f955ce29b7b
parent 792 e0c5fc7d71c3
child 835 743e14b386f3
--- a/mojoshader_compiler.c	Tue Feb 09 01:52:08 2010 -0500
+++ b/mojoshader_compiler.c	Tue Feb 09 02:55:38 2010 -0500
@@ -5,46 +5,327 @@
 #define LEMON_SUPPORT_TRACING 1
 #endif
 
-typedef struct TokenData
+typedef union TokenData
 {
-    const char *token;
-    unsigned int tokenlen;
+    int64 i64;
+    double dbl;
+    const char *string;
 } TokenData;
 
+typedef struct StringBucket
+{
+    char *string;
+    struct StringBucket *next;
+} StringBucket;
+
 typedef struct Context
 {
+    int isfail;
+    int out_of_memory;
+    MOJOSHADER_malloc malloc;
+    MOJOSHADER_free free;
+    void *malloc_data;
+    int error_count;
+    ErrorList *errors;
     Preprocessor *preprocessor;
-    const char *token;
-    unsigned int tokenlen;
-    Token tokenval;
-    unsigned int parse_errors;
-    TokenData usertypes[512];  // !!! FIXME: dynamic allocation
-    int usertype_count;  // !!! FIXME: dynamic allocation
+    StringBucket *string_hashtable[256];
+    const char *usertypes[512];  // !!! FIXME: dynamic allocation
+    int usertype_count;
 } Context;
 
 
-static void add_usertype(Context *ctx, const char *token, unsigned int len)
+// Convenience functions for allocators...
+
+static inline void out_of_memory(Context *ctx)
+{
+    ctx->isfail = ctx->out_of_memory = 1;
+} // out_of_memory
+
+static inline void *Malloc(Context *ctx, const size_t len)
+{
+    void *retval = ctx->malloc((int) len, ctx->malloc_data);
+    if (retval == NULL)
+        out_of_memory(ctx);
+    return retval;
+} // Malloc
+
+static inline char *StrDup(Context *ctx, const char *str)
+{
+    char *retval = (char *) Malloc(ctx, strlen(str) + 1);
+    if (retval != NULL)
+        strcpy(retval, str);
+    return retval;
+} // StrDup
+
+static inline void Free(Context *ctx, void *ptr)
+{
+    if (ptr != NULL)  // check for NULL in case of dumb free() impl.
+        ctx->free(ptr, ctx->malloc_data);
+} // Free
+
+typedef enum Operator
+{
+    OP_START_RANGE_UNARY,
+    OP_POSTINCREMENT,
+    OP_POSTDECREMENT,
+    OP_PREINCREMENT,
+    OP_PREDECREMENT,
+    OP_NEGATE,
+    OP_COMPLEMENT,
+    OP_NOT,
+    OP_END_RANGE_UNARY,
+
+    OP_START_RANGE_BINARY,
+    OP_DEREF_ARRAY,
+    OP_CALLFUNC,
+    OP_DEREF_STRUCT,
+    OP_COMMA,
+    OP_MULTIPLY,
+    OP_DIVIDE,
+    OP_MODULO,
+    OP_ADD,
+    OP_SUBTRACT,
+    OP_LSHIFT,
+    OP_RSHIFT,
+    OP_LESSTHAN,
+    OP_GREATERTHAN,
+    OP_LESSTHANOREQUAL,
+    OP_GREATERTHANOREQUAL,
+    OP_EQUAL,
+    OP_NOTEQUAL,
+    OP_BINARYAND,
+    OP_BINARYXOR,
+    OP_BINARYOR,
+    OP_LOGICALAND,
+    OP_LOGICALOR,
+    OP_ASSIGN,
+    OP_MULASSIGN,
+    OP_DIVASSIGN,
+    OP_MODASSIGN,
+    OP_ADDASSIGN,
+    OP_SUBASSIGN,
+    OP_LSHIFTASSIGN,
+    OP_RSHIFTASSIGN,
+    OP_ANDASSIGN,
+    OP_XORASSIGN,
+    OP_ORASSIGN,
+    OP_END_RANGE_BINARY,
+
+    OP_START_RANGE_TERNARY,
+    OP_CONDITIONAL,
+    OP_END_RANGE_TERNARY,
+
+    OP_START_RANGE_DATA,
+    OP_IDENTIFIER,
+    OP_INT_LITERAL,
+    OP_FLOAT_LITERAL,
+    OP_STRING_LITERAL,
+    OP_END_RANGE_DATA,
+} Operator;
+
+static inline int operator_is_unary(const Operator op)
+{
+    return ((op > OP_START_RANGE_UNARY) && (op < OP_END_RANGE_UNARY));
+} // operator_is_unary
+
+static inline int operator_is_binary(const Operator op)
+{
+    return ((op > OP_START_RANGE_BINARY) && (op < OP_END_RANGE_BINARY));
+} // operator_is_binary
+
+static inline int operator_is_ternary(const Operator op)
+{
+    return ((op > OP_START_RANGE_TERNARY) && (op < OP_END_RANGE_TERNARY));
+} // operator_is_ternary
+
+
+typedef struct Expression
+{
+    Operator op;  // operator
+} Expression;
+
+#define NEW_EXPR(cls) \
+    cls *retval = Malloc(ctx, sizeof (cls)); \
+    if (retval == NULL) { return NULL; }
+
+typedef struct ExpressionUnary
+{
+    Operator op;  // operator
+    Expression *operand;
+} ExpressionUnary;
+
+typedef struct ExpressionBinary
+{
+    Operator op;  // operator
+    Expression *left;
+    Expression *right;
+} ExpressionBinary;
+
+typedef struct ExpressionTernary
+{
+    Operator op;  // operator
+    Expression *left;
+    Expression *center;
+    Expression *right;
+} ExpressionTernary;
+
+typedef struct ExpressionIdentifier
+{
+    Operator op;  // Always OP_IDENTIFIER
+    const char *identifier;
+} ExpressionIdentifier;
+
+typedef struct ExpressionIntLiteral
+{
+    Operator op;  // Always OP_INT_LITERAL
+    int64 value;
+} ExpressionIntLiteral;
+
+typedef struct ExpressionFloatLiteral
+{
+    Operator op;  // Always OP_FLOAT_LITERAL
+    double value;
+} ExpressionFloatLiteral;
+
+typedef struct ExpressionStringLiteral
+{
+    Operator op;  // Always OP_STRING_LITERAL
+    const char *string;
+} ExpressionStringLiteral;
+
+static Expression *new_unary_expr(Context *ctx, const Operator op,
+                                  Expression *operand)
+{
+    NEW_EXPR(ExpressionUnary);
+    assert(operator_is_unary(op));
+    retval->op = op;
+    retval->operand = operand;
+    return (Expression *) retval;
+} // new_unary_expr
+
+static Expression *new_binary_expr(Context *ctx, const Operator op,
+                                   Expression *left, Expression *right)
+{
+    NEW_EXPR(ExpressionBinary);
+    assert(operator_is_binary(op));
+    retval->op = op;
+    retval->left = left;
+    retval->right = right;
+    return (Expression *) retval;
+} // new_binary_expr
+
+static Expression *new_ternary_expr(Context *ctx, const Operator op,
+                                    Expression *left, Expression *center,
+                                    Expression *right)
+{
+    NEW_EXPR(ExpressionTernary);
+    assert(operator_is_ternary(op));
+    retval->op = op;
+    retval->left = left;
+    retval->center = center;
+    retval->right = right;
+    return (Expression *) retval;
+} // new_ternary_expr
+
+static Expression *new_identifier_expr(Context *ctx, const char *string)
+{
+    NEW_EXPR(ExpressionIdentifier);
+    retval->op = OP_IDENTIFIER;
+    retval->identifier = string;  // cached; don't copy string.
+    return (Expression *) retval;
+} // new_identifier_expr
+
+static inline int64 strtoi64(const char *str, unsigned int len)
+{
+    int64 retval = 0;
+    int64 mult = 1;
+    int i = 0;
+
+    while ((len) && (*str == ' '))
+    {
+        str++;
+        len--;
+    } // while
+
+    if ((len) && (*str == '-'))
+    {
+        mult = -1;
+        str++;
+        len--;
+    } // if
+
+    while (i < len)
+    {
+        const char ch = str[i];
+        if ((ch < '0') || (ch > '9'))
+            break;
+        i++;
+    } // while
+
+    while (--i >= 0)
+    {
+        const char ch = str[i];
+        retval += ((int64) (ch - '0')) * mult;
+        mult *= 10;
+    } // while
+
+    return retval;
+} // strtoi64
+
+static Expression *new_literal_int_expr(Context *ctx, const int64 value)
+{
+    NEW_EXPR(ExpressionIntLiteral);
+    retval->op = OP_INT_LITERAL;
+    retval->value = value;
+    return (Expression *) retval;
+} // new_literal_int_expr
+
+static inline double strtodouble(const char *_str, unsigned int len)
+{
+    // !!! FIXME: laziness prevails.
+    char *str = (char *) alloca(len+1);
+    memcpy(str, _str, len);
+    str[len] = '\0';
+    return strtod(str, NULL);
+} // strtodouble
+
+static Expression *new_literal_float_expr(Context *ctx, const double dbl)
+{
+    NEW_EXPR(ExpressionFloatLiteral);
+    retval->op = OP_FLOAT_LITERAL;
+    retval->value = dbl;
+    return (Expression *) retval;
+} // new_literal_float_expr
+
+static Expression *new_literal_string_expr(Context *ctx, const char *string)
+{
+    NEW_EXPR(ExpressionStringLiteral);
+    retval->op = OP_STRING_LITERAL;
+    retval->string = string;  // cached; don't copy string.
+    return (Expression *) retval;
+} // new_string_literal_expr
+
+
+static void add_usertype(Context *ctx, const char *sym)
 {
     // !!! FIXME: error if this is a reserved keyword.
     // !!! FIXME: dynamic allocation
     assert(ctx->usertype_count < STATICARRAYLEN(ctx->usertypes));
-    ctx->usertypes[ctx->usertype_count].token = token;
-    ctx->usertypes[ctx->usertype_count].tokenlen = len;
+    ctx->usertypes[ctx->usertype_count++] = sym;
     ctx->usertype_count++;
 } // add_usertype
 
-static int is_usertype(const Context *ctx)
+static int is_usertype(const Context *ctx, const char *token,
+                       const unsigned int tokenlen)
 {
     // !!! FIXME: dynamic allocation
     // !!! FIXME: should probably redesign this anyhow.
     int i;
     for (i = 0; i < ctx->usertype_count; i++)
     {
-        if (ctx->usertypes[i].tokenlen == ctx->tokenlen)
-        {
-            if (memcmp(ctx->usertypes[i].token, ctx->token, ctx->tokenlen)==0)
-                return 1;
-        } // if
+        const char *type = ctx->usertypes[i];
+        if (strncmp(type, token, tokenlen) == 0)
+            return type[tokenlen] == '\0';
     } // for
 
     return 0;
@@ -55,8 +336,112 @@
 #define __MOJOSHADER_HLSL_COMPILER__ 1
 #include "mojoshader_parser_hlsl.h"
 
+
+static void free_expr(Context *ctx, Expression *expr)
+{
+    if (operator_is_unary(expr->op))
+    {
+        const ExpressionUnary *unary = (const ExpressionUnary *) expr;
+        free_expr(ctx, unary->operand);
+    } // if
+    else if (operator_is_binary(expr->op))
+    {
+        const ExpressionBinary *binary = (const ExpressionBinary *) expr;
+        free_expr(ctx, binary->left);
+        free_expr(ctx, binary->right);
+    } // else if
+    else if (operator_is_ternary(expr->op))
+    {
+        const ExpressionTernary *ternary = (const ExpressionTernary *) expr;
+        free_expr(ctx, ternary->left);
+        free_expr(ctx, ternary->center);
+        free_expr(ctx, ternary->right);
+    } // else if
+
+    // don't need to free extra fields in other types at the moment.
+
+    Free(ctx, expr);
+} // free_expr
+
+// !!! FIXME: sort of cut-and-paste from the preprocessor...
+
+// this is djb's xor hashing function.
+static inline uint32 hash_string_djbxor(const char *str, unsigned int len)
+{
+    register uint32 hash = 5381;
+    while (len--)
+        hash = ((hash << 5) + hash) ^ *(str++);
+    return hash;
+} // hash_string_djbxor
+
+static inline uint8 hash_string(const char *str, const unsigned int len)
+{
+    return (uint8) hash_string_djbxor(str, len);
+} // hash_string
+
+static const char *cache_string(Context *ctx, const char *str,
+                                const unsigned int len)
+{
+    const uint8 hash = hash_string(str, len);
+    StringBucket *bucket = ctx->string_hashtable[hash];
+    StringBucket *prev = NULL;
+    while (bucket)
+    {
+        const char *bstr = bucket->string;
+        if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0))
+        {
+            // Matched! Move this to the front of the list.
+            if (prev != NULL)
+            {
+                assert(prev->next == bucket);
+                prev->next = bucket->next;
+                bucket->next = ctx->string_hashtable[hash];
+                ctx->string_hashtable[hash] = bucket;
+            } // if
+            return bstr; // already cached
+        } // if
+        prev = bucket;
+        bucket = bucket->next;
+    } // while
+
+    // no match, add to the table.
+    bucket = (StringBucket *) Malloc(ctx, sizeof (StringBucket));
+    if (bucket == NULL)
+        return NULL;
+    bucket->string = (char *) Malloc(ctx, len + 1);
+    if (bucket->string == NULL)
+    {
+        Free(ctx, bucket);
+        return NULL;
+    } // if
+    memcpy(bucket->string, str, len);
+    bucket->string[len] = '\0';
+    bucket->next = ctx->string_hashtable[hash];
+    ctx->string_hashtable[hash] = bucket;
+    return bucket->string;
+} // cache_string
+
+static void free_string_cache(Context *ctx)
+{
+    size_t i;
+    for (i = 0; i < STATICARRAYLEN(ctx->string_hashtable); i++)
+    {
+        StringBucket *bucket = ctx->string_hashtable[i];
+        ctx->string_hashtable[i] = NULL;
+        while (bucket)
+        {
+            StringBucket *next = bucket->next;
+            Free(ctx, bucket->string);
+            Free(ctx, bucket);
+            bucket = next;
+        } // while
+    } // for
+} // free_string_cache
+
+
 // This does not check correctness (POSITIONT993842 passes, etc).
-static int is_semantic(const Context *ctx)
+static int is_semantic(const Context *ctx, const char *token,
+                       const unsigned int tokenlen)
 {
     static const char *names[] = {
         "BINORMAL", "BLENDINDICES", "BLENDWEIGHT",
@@ -71,9 +456,9 @@
     {
         const char *name = *i;
         const size_t namelen = strlen(name);
-        if (ctx->tokenlen < namelen)
+        if (tokenlen < namelen)
             continue;
-        else if (memcmp(ctx->token, name, namelen) != 0)
+        else if (memcmp(token, name, namelen) != 0)
             continue;
 
         for (name += namelen; *name; name++)
@@ -90,9 +475,10 @@
 } // is_semantic
 
 
-static int convert_to_lemon_token(const Context *ctx)
+static int convert_to_lemon_token(const Context *ctx, const char *token,
+                                  unsigned int tokenlen, const Token tokenval)
 {
-    switch (ctx->tokenval)
+    switch (tokenval)
     {
         case ((Token) ','): return TOKEN_HLSL_COMMA;
         case ((Token) '='): return TOKEN_HLSL_ASSIGN;
@@ -143,7 +529,7 @@
         case ((Token) '}'): return TOKEN_HLSL_RBRACE;
 
         case ((Token) TOKEN_IDENTIFIER):
-            #define tokencmp(t) ((ctx->tokenlen == strlen(t)) && (memcmp(ctx->token, t, ctx->tokenlen) == 0))
+            #define tokencmp(t) ((tokenlen == strlen(t)) && (memcmp(token, t, tokenlen) == 0))
             //case ((Token) ''): return TOKEN_HLSL_TYPECAST
             //if (tokencmp("")) return TOKEN_HLSL_TYPE_NAME
             //if (tokencmp("...")) return TOKEN_HLSL_ELIPSIS
@@ -335,9 +721,9 @@
 
             #undef tokencmp
 
-            if (is_semantic(ctx))
+            if (is_semantic(ctx, token, tokenlen))
                 return TOKEN_HLSL_SEMANTIC;
-            else if (is_usertype(ctx))
+            else if (is_usertype(ctx, token, tokenlen))
                 return TOKEN_HLSL_USERTYPE;
             return TOKEN_HLSL_IDENTIFIER;
 
@@ -352,18 +738,27 @@
 
 
 void MOJOSHADER_compile(const char *filename,
-                             const char *source, unsigned int sourcelen,
-                             const MOJOSHADER_preprocessorDefine *defines,
-                             unsigned int define_count,
-                             MOJOSHADER_includeOpen include_open,
-                             MOJOSHADER_includeClose include_close,
-                             MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
+                        const char *source, unsigned int sourcelen,
+                        const MOJOSHADER_preprocessorDefine *defines,
+                        unsigned int define_count,
+                        MOJOSHADER_includeOpen include_open,
+                        MOJOSHADER_includeClose include_close,
+                        MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
 {
     Context ctx;
+    TokenData data;
+    unsigned int tokenlen;
+    Token tokenval;
+    const char *token;
+    int lemon_token;
+
     if (m == NULL) m = MOJOSHADER_internal_malloc;
     if (f == NULL) f = MOJOSHADER_internal_free;
 
     memset(&ctx, '\0', sizeof (Context));
+    ctx.malloc = m;
+    ctx.free = f;
+    ctx.malloc_data = d;
     ctx.preprocessor = preprocessor_start(filename, source, sourcelen,
                                            include_open, include_close,
                                            defines, define_count, 0, m, f, d);
@@ -375,15 +770,37 @@
     #endif
 
     do {
-        ctx.token = preprocessor_nexttoken(ctx.preprocessor,
-                                                &ctx.tokenlen,
-                                                &ctx.tokenval);
+        token = preprocessor_nexttoken(ctx.preprocessor, &tokenlen, &tokenval);
+        lemon_token = convert_to_lemon_token(&ctx, token, tokenlen, tokenval);
+        switch (lemon_token)
+        {
+            case TOKEN_HLSL_INT_CONSTANT:
+                data.i64 = strtoi64(token, tokenlen);
+                break;
+
+            case TOKEN_HLSL_FLOAT_CONSTANT:
+                data.dbl = strtodouble(token, tokenlen);
+                break;
 
-        TokenData token = { ctx.token, ctx.tokenlen };
-        ParseHLSL(pParser, convert_to_lemon_token(&ctx), token, &ctx);
-    } while (ctx.tokenval != TOKEN_EOI);
+            case TOKEN_HLSL_SEMANTIC:
+            case TOKEN_HLSL_USERTYPE:
+            case TOKEN_HLSL_STRING_LITERAL:
+            case TOKEN_HLSL_IDENTIFIER:
+                data.string = cache_string(&ctx, token, tokenlen);
+                break;
+
+            default:
+                data.i64 = 0;
+                break;
+        } // switch
+
+        ParseHLSL(pParser, lemon_token, data, &ctx);
+    } while ((!ctx.isfail) && (tokenval != TOKEN_EOI));
+
     ParseHLSLFree(pParser, f, d);
-}
+    // !!! FIXME: destruct (ctx) here.
+    free_string_cache(&ctx);
+} // MOJOSHADER_compile
 
 // end of mojoshader_compiler.c ...