Cleaned up TokenData FIXMEs, and added string cache. calculator-experiment
authorRyan C. Gordon <icculus@icculus.org>
Mon, 08 Feb 2010 23:51:32 -0500
branchcalculator-experiment
changeset 823 48757134a880
parent 822 fa78ed1fe469
child 824 f4b3ba2b435d
Cleaned up TokenData FIXMEs, and added string cache.
calculator.c
calculator.lemon
--- a/calculator.c	Mon Feb 08 04:42:51 2010 -0500
+++ b/calculator.c	Mon Feb 08 23:51:32 2010 -0500
@@ -5,12 +5,19 @@
 #define LEMON_SUPPORT_TRACING 1
 #endif
 
-typedef struct TokenData
+typedef union TokenData
 {
-    const char *token;
-    unsigned int tokenlen;
+    int64 i64;
+    double dbl;
+    const char *string;
 } TokenData;
 
+typedef struct StringBucket
+{
+    char *string;
+    struct StringBucket *next;
+} StringBucket;
+
 typedef struct Context
 {
     int isfail;
@@ -21,6 +28,8 @@
     int error_count;
     ErrorList *errors;
     Preprocessor *preprocessor;
+    StringBucket *string_hashtable[256];
+    // !!! FIXME: do these really need to be in here?
     const char *token;
     unsigned int tokenlen;
     Token tokenval;
@@ -187,18 +196,6 @@
     const char *string;
 } ExpressionStringLiteral;
 
-static const char *new_identifier(Context *ctx, const TokenData *data)
-{
-    // !!! FIXME: this needs to cache strings.
-    const unsigned int len = data->tokenlen;
-    char *retval = Malloc(ctx, len + 1);
-    if (retval == NULL)
-        return NULL;
-    memcpy(retval, data->token, len);
-    retval[len] = '\0';
-    return retval;
-} // new_identifier
-
 static Expression *new_unary_expr(Context *ctx, const Operator op,
                                   Expression *operand)
 {
@@ -233,11 +230,11 @@
     return (Expression *) retval;
 } // new_ternary_expr
 
-static Expression *new_identifier_expr(Context *ctx, const char *identifier)
+static Expression *new_identifier_expr(Context *ctx, const TokenData *data)
 {
     NEW_EXPR(ExpressionIdentifier);
     retval->op = OP_IDENTIFIER;
-    retval->identifier = identifier;
+    retval->identifier = data->string;  // cached; don't copy string.
     return (Expression *) retval;
 } // new_identifier_expr
 
@@ -282,7 +279,7 @@
 {
     NEW_EXPR(ExpressionIntLiteral);
     retval->op = OP_INT_LITERAL;
-    retval->value = strtoi64(data->token, data->tokenlen);
+    retval->value = data->i64;
     return (Expression *) retval;
 } // new_literal_int_expr
 
@@ -299,7 +296,7 @@
 {
     NEW_EXPR(ExpressionFloatLiteral);
     retval->op = OP_FLOAT_LITERAL;
-    retval->value = strtodouble(data->token, data->tokenlen);
+    retval->value = data->dbl;
     return (Expression *) retval;
 } // new_literal_float_expr
 
@@ -307,7 +304,7 @@
 {
     NEW_EXPR(ExpressionStringLiteral);
     retval->op = OP_STRING_LITERAL;
-    retval->string = new_identifier(ctx, data);
+    retval->string = data->string;  // cached; don't copy string.
     return (Expression *) retval;
 } // new_string_literal_expr
 
@@ -517,14 +514,9 @@
         free_expr(ctx, ternary->center);
         free_expr(ctx, ternary->right);
     } // else if
-    else if (expr->op == OP_STRING_LITERAL)
-    {
-        Free(ctx, (void *) ((ExpressionStringLiteral *)expr)->string);
-    } // else if
-    else if (expr->op == OP_IDENTIFIER)
-    {
-        Free(ctx, (void *) ((ExpressionIdentifier *)expr)->identifier);
-    } // else if
+
+    // don't need to free extra fields in other types at the moment.
+
     Free(ctx, expr);
 } // free_expr
 
@@ -536,6 +528,82 @@
 } // parse_complete
 
 
+// !!! FIXME: sort of cut-and-paste from the preprocessor...
+
+// this is djb's xor hashing function.
+static inline uint32 hash_string_djbxor(const char *str, unsigned int len)
+{
+    register uint32 hash = 5381;
+    while (len--)
+        hash = ((hash << 5) + hash) ^ *(str++);
+    return hash;
+} // hash_string_djbxor
+
+static inline uint8 hash_string(const char *str, const unsigned int len)
+{
+    return (uint8) hash_string_djbxor(str, len);
+} // hash_string
+
+static const char *cache_string(Context *ctx, const char *str,
+                                const unsigned int len)
+{
+    const uint8 hash = hash_string(str, len);
+    StringBucket *bucket = ctx->string_hashtable[hash];
+    StringBucket *prev = NULL;
+    while (bucket)
+    {
+        const char *bstr = bucket->string;
+        if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0))
+        {
+            // Matched! Move this to the front of the list.
+            if (prev != NULL)
+            {
+                assert(prev->next == bucket);
+                prev->next = bucket->next;
+                bucket->next = ctx->string_hashtable[hash];
+                ctx->string_hashtable[hash] = bucket;
+            } // if
+            return bstr; // already cached
+        } // if
+        prev = bucket;
+        bucket = bucket->next;
+    } // while
+
+    // no match, add to the table.
+    bucket = (StringBucket *) Malloc(ctx, sizeof (StringBucket));
+    if (bucket == NULL)
+        return NULL;
+    bucket->string = (char *) Malloc(ctx, len + 1);
+    if (bucket->string == NULL)
+    {
+        Free(ctx, bucket);
+        return NULL;
+    } // if
+    memcpy(bucket->string, str, len);
+    bucket->string[len] = '\0';
+    bucket->next = ctx->string_hashtable[hash];
+    ctx->string_hashtable[hash] = bucket;
+    return bucket->string;
+} // cache_string
+
+static void free_string_cache(Context *ctx)
+{
+    size_t i;
+    for (i = 0; i < STATICARRAYLEN(ctx->string_hashtable); i++)
+    {
+        StringBucket *bucket = ctx->string_hashtable[i];
+        ctx->string_hashtable[i] = NULL;
+        while (bucket)
+        {
+            StringBucket *next = bucket->next;
+            Free(ctx, bucket->string);
+            Free(ctx, bucket);
+            bucket = next;
+        } // while
+    } // for
+} // free_string_cache
+
+
 // This is where the actual parsing happens. It's Lemon-generated!
 #define __MOJOSHADER_CALC_COMPILER__ 1
 #include "calculator.h"
@@ -610,6 +678,8 @@
                              MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
 {
     Context ctx;
+    TokenData data;
+
     if (m == NULL) m = MOJOSHADER_internal_malloc;
     if (f == NULL) f = MOJOSHADER_internal_free;
 
@@ -628,17 +698,37 @@
     #endif
 
     do {
-        ctx.token = preprocessor_nexttoken(ctx.preprocessor,
-                                                &ctx.tokenlen,
-                                                &ctx.tokenval);
-        // !!! FIXME: this can't refer directly to pointers in the stream,
-        // !!! FIXME:  as they can be free()'d before we actually use them
-        // !!! FIXME:  when a rule reduces down later.
-        TokenData token = { ctx.token, ctx.tokenlen };
-        ParseCalculator(pParser, convert_to_lemon_token(&ctx), token, &ctx);
+        ctx.token = preprocessor_nexttoken(ctx.preprocessor, &ctx.tokenlen,
+                                           &ctx.tokenval);
+
+        const int lemon_token = convert_to_lemon_token(&ctx);
+        switch (lemon_token)
+        {
+            case TOKEN_CALC_INT_CONSTANT:
+                data.i64 = strtoi64(ctx.token, ctx.tokenlen);
+                break;
+
+            case TOKEN_CALC_FLOAT_CONSTANT:
+                data.dbl = strtodouble(ctx.token, ctx.tokenlen);
+                break;
+
+            case TOKEN_CALC_STRING_LITERAL:
+            case TOKEN_CALC_IDENTIFIER:
+                data.string = cache_string(&ctx, ctx.token, ctx.tokenlen);
+                break;
+
+            default:
+                data.i64 = 0;
+                break;
+        } // switch
+
+        ParseCalculator(pParser, lemon_token, data, &ctx);
     } while ((!ctx.isfail) && (ctx.tokenval != TOKEN_EOI));
+
     ParseCalculatorFree(pParser, f, d);
-}
+    // !!! FIXME: destruct (ctx) here.
+    free_string_cache(&ctx);
+} // MOJOSHADER_compile
 
 int main(int argc, char **argv)
 {
--- a/calculator.lemon	Mon Feb 08 04:42:51 2010 -0500
+++ b/calculator.lemon	Mon Feb 08 23:51:32 2010 -0500
@@ -74,13 +74,14 @@
 
 calculator ::= expression(B). { parse_complete(ctx, B); }
 
-%type identifier { const char * }
+// !!! FIXME: why is this a non-terminal?
+%type identifier { TokenData }
 %destructor identifier { (void) ctx; }  // !!! FIXME: remove this later, it's just to shut up the compiler for now.
-identifier(A) ::= IDENTIFIER(B). { A = new_identifier(ctx, &B); }
+identifier(A) ::= IDENTIFIER(B). { A = B; }
 
 // the expression stuff is based on Jeff Lee's ANSI C grammar.
 %type primary_expr { Expression * }
-primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, B); }
+primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, &B); }
 primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, &B); }
 primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, &B); }
 primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, &B); }
@@ -92,7 +93,7 @@
 postfix_expr(A) ::= postfix_expr(B) LPAREN RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, NULL); }
 postfix_expr(A) ::= postfix_expr(B) LPAREN argument_expr_list(C) RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, C); }
 //postfix_expr(A) ::= datatype(B) LPAREN argument_expr_list(C) RPAREN. { A = new_constructor_expr(ctx, B, C); } // HLSL constructor
-postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, C)); }
+postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, &C)); }
 postfix_expr(A) ::= postfix_expr(B) PLUSPLUS. { A = new_unary_expr(ctx, OP_POSTINCREMENT, B); }
 postfix_expr(A) ::= postfix_expr(B) MINUSMINUS. { A = new_unary_expr(ctx, OP_POSTDECREMENT, B); }