From 6f1a4c797b79cbe93286c2c384d045a014faa2b0 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 23:51:32 -0500 Subject: [PATCH] Cleaned up TokenData FIXMEs, and added string cache. --HG-- branch : calculator-experiment --- calculator.c | 164 ++++++++++++++++++++++++++++++++++++----------- calculator.lemon | 9 +-- 2 files changed, 132 insertions(+), 41 deletions(-) diff --git a/calculator.c b/calculator.c index c94bd67a..7a09a6d5 100644 --- a/calculator.c +++ b/calculator.c @@ -5,12 +5,19 @@ #define LEMON_SUPPORT_TRACING 1 #endif -typedef struct TokenData +typedef union TokenData { - const char *token; - unsigned int tokenlen; + int64 i64; + double dbl; + const char *string; } TokenData; +typedef struct StringBucket +{ + char *string; + struct StringBucket *next; +} StringBucket; + typedef struct Context { int isfail; @@ -21,6 +28,8 @@ typedef struct Context int error_count; ErrorList *errors; Preprocessor *preprocessor; + StringBucket *string_hashtable[256]; + // !!! FIXME: do these really need to be in here? const char *token; unsigned int tokenlen; Token tokenval; @@ -187,18 +196,6 @@ typedef struct ExpressionStringLiteral const char *string; } ExpressionStringLiteral; -static const char *new_identifier(Context *ctx, const TokenData *data) -{ - // !!! FIXME: this needs to cache strings. - const unsigned int len = data->tokenlen; - char *retval = Malloc(ctx, len + 1); - if (retval == NULL) - return NULL; - memcpy(retval, data->token, len); - retval[len] = '\0'; - return retval; -} // new_identifier - static Expression *new_unary_expr(Context *ctx, const Operator op, Expression *operand) { @@ -233,11 +230,11 @@ static Expression *new_ternary_expr(Context *ctx, const Operator op, return (Expression *) retval; } // new_ternary_expr -static Expression *new_identifier_expr(Context *ctx, const char *identifier) +static Expression *new_identifier_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionIdentifier); retval->op = OP_IDENTIFIER; - retval->identifier = identifier; + retval->identifier = data->string; // cached; don't copy string. return (Expression *) retval; } // new_identifier_expr @@ -282,7 +279,7 @@ static Expression *new_literal_int_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionIntLiteral); retval->op = OP_INT_LITERAL; - retval->value = strtoi64(data->token, data->tokenlen); + retval->value = data->i64; return (Expression *) retval; } // new_literal_int_expr @@ -299,7 +296,7 @@ static Expression *new_literal_float_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionFloatLiteral); retval->op = OP_FLOAT_LITERAL; - retval->value = strtodouble(data->token, data->tokenlen); + retval->value = data->dbl; return (Expression *) retval; } // new_literal_float_expr @@ -307,7 +304,7 @@ static Expression *new_literal_string_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionStringLiteral); retval->op = OP_STRING_LITERAL; - retval->string = new_identifier(ctx, data); + retval->string = data->string; // cached; don't copy string. return (Expression *) retval; } // new_string_literal_expr @@ -517,14 +514,9 @@ static void free_expr(Context *ctx, Expression *expr) free_expr(ctx, ternary->center); free_expr(ctx, ternary->right); } // else if - else if (expr->op == OP_STRING_LITERAL) - { - Free(ctx, (void *) ((ExpressionStringLiteral *)expr)->string); - } // else if - else if (expr->op == OP_IDENTIFIER) - { - Free(ctx, (void *) ((ExpressionIdentifier *)expr)->identifier); - } // else if + + // don't need to free extra fields in other types at the moment. + Free(ctx, expr); } // free_expr @@ -536,6 +528,82 @@ static void parse_complete(Context *ctx, Expression *expr) } // parse_complete +// !!! FIXME: sort of cut-and-paste from the preprocessor... + +// this is djb's xor hashing function. +static inline uint32 hash_string_djbxor(const char *str, unsigned int len) +{ + register uint32 hash = 5381; + while (len--) + hash = ((hash << 5) + hash) ^ *(str++); + return hash; +} // hash_string_djbxor + +static inline uint8 hash_string(const char *str, const unsigned int len) +{ + return (uint8) hash_string_djbxor(str, len); +} // hash_string + +static const char *cache_string(Context *ctx, const char *str, + const unsigned int len) +{ + const uint8 hash = hash_string(str, len); + StringBucket *bucket = ctx->string_hashtable[hash]; + StringBucket *prev = NULL; + while (bucket) + { + const char *bstr = bucket->string; + if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0)) + { + // Matched! Move this to the front of the list. + if (prev != NULL) + { + assert(prev->next == bucket); + prev->next = bucket->next; + bucket->next = ctx->string_hashtable[hash]; + ctx->string_hashtable[hash] = bucket; + } // if + return bstr; // already cached + } // if + prev = bucket; + bucket = bucket->next; + } // while + + // no match, add to the table. + bucket = (StringBucket *) Malloc(ctx, sizeof (StringBucket)); + if (bucket == NULL) + return NULL; + bucket->string = (char *) Malloc(ctx, len + 1); + if (bucket->string == NULL) + { + Free(ctx, bucket); + return NULL; + } // if + memcpy(bucket->string, str, len); + bucket->string[len] = '\0'; + bucket->next = ctx->string_hashtable[hash]; + ctx->string_hashtable[hash] = bucket; + return bucket->string; +} // cache_string + +static void free_string_cache(Context *ctx) +{ + size_t i; + for (i = 0; i < STATICARRAYLEN(ctx->string_hashtable); i++) + { + StringBucket *bucket = ctx->string_hashtable[i]; + ctx->string_hashtable[i] = NULL; + while (bucket) + { + StringBucket *next = bucket->next; + Free(ctx, bucket->string); + Free(ctx, bucket); + bucket = next; + } // while + } // for +} // free_string_cache + + // This is where the actual parsing happens. It's Lemon-generated! #define __MOJOSHADER_CALC_COMPILER__ 1 #include "calculator.h" @@ -610,6 +678,8 @@ static void MOJOSHADER_compile(const char *filename, MOJOSHADER_malloc m, MOJOSHADER_free f, void *d) { Context ctx; + TokenData data; + if (m == NULL) m = MOJOSHADER_internal_malloc; if (f == NULL) f = MOJOSHADER_internal_free; @@ -628,17 +698,37 @@ static void MOJOSHADER_compile(const char *filename, #endif do { - ctx.token = preprocessor_nexttoken(ctx.preprocessor, - &ctx.tokenlen, - &ctx.tokenval); - // !!! FIXME: this can't refer directly to pointers in the stream, - // !!! FIXME: as they can be free()'d before we actually use them - // !!! FIXME: when a rule reduces down later. - TokenData token = { ctx.token, ctx.tokenlen }; - ParseCalculator(pParser, convert_to_lemon_token(&ctx), token, &ctx); + ctx.token = preprocessor_nexttoken(ctx.preprocessor, &ctx.tokenlen, + &ctx.tokenval); + + const int lemon_token = convert_to_lemon_token(&ctx); + switch (lemon_token) + { + case TOKEN_CALC_INT_CONSTANT: + data.i64 = strtoi64(ctx.token, ctx.tokenlen); + break; + + case TOKEN_CALC_FLOAT_CONSTANT: + data.dbl = strtodouble(ctx.token, ctx.tokenlen); + break; + + case TOKEN_CALC_STRING_LITERAL: + case TOKEN_CALC_IDENTIFIER: + data.string = cache_string(&ctx, ctx.token, ctx.tokenlen); + break; + + default: + data.i64 = 0; + break; + } // switch + + ParseCalculator(pParser, lemon_token, data, &ctx); } while ((!ctx.isfail) && (ctx.tokenval != TOKEN_EOI)); + ParseCalculatorFree(pParser, f, d); -} + // !!! FIXME: destruct (ctx) here. + free_string_cache(&ctx); +} // MOJOSHADER_compile int main(int argc, char **argv) { diff --git a/calculator.lemon b/calculator.lemon index 329b3920..3c2f10f6 100644 --- a/calculator.lemon +++ b/calculator.lemon @@ -74,13 +74,14 @@ calculator ::= expression(B). { parse_complete(ctx, B); } -%type identifier { const char * } +// !!! FIXME: why is this a non-terminal? +%type identifier { TokenData } %destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now. -identifier(A) ::= IDENTIFIER(B). { A = new_identifier(ctx, &B); } +identifier(A) ::= IDENTIFIER(B). { A = B; } // the expression stuff is based on Jeff Lee's ANSI C grammar. %type primary_expr { Expression * } -primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, B); } +primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, &B); } primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, &B); } primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, &B); } primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, &B); } @@ -92,7 +93,7 @@ postfix_expr(A) ::= postfix_expr(B) LBRACKET expression(C) RBRACKET. { A = new_b postfix_expr(A) ::= postfix_expr(B) LPAREN RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, NULL); } postfix_expr(A) ::= postfix_expr(B) LPAREN argument_expr_list(C) RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, C); } //postfix_expr(A) ::= datatype(B) LPAREN argument_expr_list(C) RPAREN. { A = new_constructor_expr(ctx, B, C); } // HLSL constructor -postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, C)); } +postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, &C)); } postfix_expr(A) ::= postfix_expr(B) PLUSPLUS. { A = new_unary_expr(ctx, OP_POSTINCREMENT, B); } postfix_expr(A) ::= postfix_expr(B) MINUSMINUS. { A = new_unary_expr(ctx, OP_POSTDECREMENT, B); }