Skip to content

Commit

Permalink
Cleaned up TokenData FIXMEs, and added string cache.
Browse files Browse the repository at this point in the history
--HG--
branch : calculator-experiment
  • Loading branch information
icculus committed Feb 9, 2010
1 parent 8e12f4a commit 6f1a4c7
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 41 deletions.
164 changes: 127 additions & 37 deletions calculator.c
Expand Up @@ -5,12 +5,19 @@
#define LEMON_SUPPORT_TRACING 1
#endif

typedef struct TokenData
typedef union TokenData
{
const char *token;
unsigned int tokenlen;
int64 i64;
double dbl;
const char *string;
} TokenData;

typedef struct StringBucket
{
char *string;
struct StringBucket *next;
} StringBucket;

typedef struct Context
{
int isfail;
Expand All @@ -21,6 +28,8 @@ typedef struct Context
int error_count;
ErrorList *errors;
Preprocessor *preprocessor;
StringBucket *string_hashtable[256];
// !!! FIXME: do these really need to be in here?
const char *token;
unsigned int tokenlen;
Token tokenval;
Expand Down Expand Up @@ -187,18 +196,6 @@ typedef struct ExpressionStringLiteral
const char *string;
} ExpressionStringLiteral;

static const char *new_identifier(Context *ctx, const TokenData *data)
{
// !!! FIXME: this needs to cache strings.
const unsigned int len = data->tokenlen;
char *retval = Malloc(ctx, len + 1);
if (retval == NULL)
return NULL;
memcpy(retval, data->token, len);
retval[len] = '\0';
return retval;
} // new_identifier

static Expression *new_unary_expr(Context *ctx, const Operator op,
Expression *operand)
{
Expand Down Expand Up @@ -233,11 +230,11 @@ static Expression *new_ternary_expr(Context *ctx, const Operator op,
return (Expression *) retval;
} // new_ternary_expr

static Expression *new_identifier_expr(Context *ctx, const char *identifier)
static Expression *new_identifier_expr(Context *ctx, const TokenData *data)
{
NEW_EXPR(ExpressionIdentifier);
retval->op = OP_IDENTIFIER;
retval->identifier = identifier;
retval->identifier = data->string; // cached; don't copy string.
return (Expression *) retval;
} // new_identifier_expr

Expand Down Expand Up @@ -282,7 +279,7 @@ static Expression *new_literal_int_expr(Context *ctx, const TokenData *data)
{
NEW_EXPR(ExpressionIntLiteral);
retval->op = OP_INT_LITERAL;
retval->value = strtoi64(data->token, data->tokenlen);
retval->value = data->i64;
return (Expression *) retval;
} // new_literal_int_expr

Expand All @@ -299,15 +296,15 @@ static Expression *new_literal_float_expr(Context *ctx, const TokenData *data)
{
NEW_EXPR(ExpressionFloatLiteral);
retval->op = OP_FLOAT_LITERAL;
retval->value = strtodouble(data->token, data->tokenlen);
retval->value = data->dbl;
return (Expression *) retval;
} // new_literal_float_expr

static Expression *new_literal_string_expr(Context *ctx, const TokenData *data)
{
NEW_EXPR(ExpressionStringLiteral);
retval->op = OP_STRING_LITERAL;
retval->string = new_identifier(ctx, data);
retval->string = data->string; // cached; don't copy string.
return (Expression *) retval;
} // new_string_literal_expr

Expand Down Expand Up @@ -517,14 +514,9 @@ static void free_expr(Context *ctx, Expression *expr)
free_expr(ctx, ternary->center);
free_expr(ctx, ternary->right);
} // else if
else if (expr->op == OP_STRING_LITERAL)
{
Free(ctx, (void *) ((ExpressionStringLiteral *)expr)->string);
} // else if
else if (expr->op == OP_IDENTIFIER)
{
Free(ctx, (void *) ((ExpressionIdentifier *)expr)->identifier);
} // else if

// don't need to free extra fields in other types at the moment.

Free(ctx, expr);
} // free_expr

Expand All @@ -536,6 +528,82 @@ static void parse_complete(Context *ctx, Expression *expr)
} // parse_complete


// !!! FIXME: sort of cut-and-paste from the preprocessor...

// this is djb's xor hashing function.
static inline uint32 hash_string_djbxor(const char *str, unsigned int len)
{
register uint32 hash = 5381;
while (len--)
hash = ((hash << 5) + hash) ^ *(str++);
return hash;
} // hash_string_djbxor

static inline uint8 hash_string(const char *str, const unsigned int len)
{
return (uint8) hash_string_djbxor(str, len);
} // hash_string

static const char *cache_string(Context *ctx, const char *str,
const unsigned int len)
{
const uint8 hash = hash_string(str, len);
StringBucket *bucket = ctx->string_hashtable[hash];
StringBucket *prev = NULL;
while (bucket)
{
const char *bstr = bucket->string;
if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0))
{
// Matched! Move this to the front of the list.
if (prev != NULL)
{
assert(prev->next == bucket);
prev->next = bucket->next;
bucket->next = ctx->string_hashtable[hash];
ctx->string_hashtable[hash] = bucket;
} // if
return bstr; // already cached
} // if
prev = bucket;
bucket = bucket->next;
} // while

// no match, add to the table.
bucket = (StringBucket *) Malloc(ctx, sizeof (StringBucket));
if (bucket == NULL)
return NULL;
bucket->string = (char *) Malloc(ctx, len + 1);
if (bucket->string == NULL)
{
Free(ctx, bucket);
return NULL;
} // if
memcpy(bucket->string, str, len);
bucket->string[len] = '\0';
bucket->next = ctx->string_hashtable[hash];
ctx->string_hashtable[hash] = bucket;
return bucket->string;
} // cache_string

static void free_string_cache(Context *ctx)
{
size_t i;
for (i = 0; i < STATICARRAYLEN(ctx->string_hashtable); i++)
{
StringBucket *bucket = ctx->string_hashtable[i];
ctx->string_hashtable[i] = NULL;
while (bucket)
{
StringBucket *next = bucket->next;
Free(ctx, bucket->string);
Free(ctx, bucket);
bucket = next;
} // while
} // for
} // free_string_cache


// This is where the actual parsing happens. It's Lemon-generated!
#define __MOJOSHADER_CALC_COMPILER__ 1
#include "calculator.h"
Expand Down Expand Up @@ -610,6 +678,8 @@ static void MOJOSHADER_compile(const char *filename,
MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
{
Context ctx;
TokenData data;

if (m == NULL) m = MOJOSHADER_internal_malloc;
if (f == NULL) f = MOJOSHADER_internal_free;

Expand All @@ -628,17 +698,37 @@ static void MOJOSHADER_compile(const char *filename,
#endif

do {
ctx.token = preprocessor_nexttoken(ctx.preprocessor,
&ctx.tokenlen,
&ctx.tokenval);
// !!! FIXME: this can't refer directly to pointers in the stream,
// !!! FIXME: as they can be free()'d before we actually use them
// !!! FIXME: when a rule reduces down later.
TokenData token = { ctx.token, ctx.tokenlen };
ParseCalculator(pParser, convert_to_lemon_token(&ctx), token, &ctx);
ctx.token = preprocessor_nexttoken(ctx.preprocessor, &ctx.tokenlen,
&ctx.tokenval);

const int lemon_token = convert_to_lemon_token(&ctx);
switch (lemon_token)
{
case TOKEN_CALC_INT_CONSTANT:
data.i64 = strtoi64(ctx.token, ctx.tokenlen);
break;

case TOKEN_CALC_FLOAT_CONSTANT:
data.dbl = strtodouble(ctx.token, ctx.tokenlen);
break;

case TOKEN_CALC_STRING_LITERAL:
case TOKEN_CALC_IDENTIFIER:
data.string = cache_string(&ctx, ctx.token, ctx.tokenlen);
break;

default:
data.i64 = 0;
break;
} // switch

ParseCalculator(pParser, lemon_token, data, &ctx);
} while ((!ctx.isfail) && (ctx.tokenval != TOKEN_EOI));

ParseCalculatorFree(pParser, f, d);
}
// !!! FIXME: destruct (ctx) here.
free_string_cache(&ctx);
} // MOJOSHADER_compile

int main(int argc, char **argv)
{
Expand Down
9 changes: 5 additions & 4 deletions calculator.lemon
Expand Up @@ -74,13 +74,14 @@

calculator ::= expression(B). { parse_complete(ctx, B); }

%type identifier { const char * }
// !!! FIXME: why is this a non-terminal?
%type identifier { TokenData }
%destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now.
identifier(A) ::= IDENTIFIER(B). { A = new_identifier(ctx, &B); }
identifier(A) ::= IDENTIFIER(B). { A = B; }

// the expression stuff is based on Jeff Lee's ANSI C grammar.
%type primary_expr { Expression * }
primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, B); }
primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, &B); }
primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, &B); }
primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, &B); }
primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, &B); }
Expand All @@ -92,7 +93,7 @@ postfix_expr(A) ::= postfix_expr(B) LBRACKET expression(C) RBRACKET. { A = new_b
postfix_expr(A) ::= postfix_expr(B) LPAREN RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, NULL); }
postfix_expr(A) ::= postfix_expr(B) LPAREN argument_expr_list(C) RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, C); }
//postfix_expr(A) ::= datatype(B) LPAREN argument_expr_list(C) RPAREN. { A = new_constructor_expr(ctx, B, C); } // HLSL constructor
postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, C)); }
postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, &C)); }
postfix_expr(A) ::= postfix_expr(B) PLUSPLUS. { A = new_unary_expr(ctx, OP_POSTINCREMENT, B); }
postfix_expr(A) ::= postfix_expr(B) MINUSMINUS. { A = new_unary_expr(ctx, OP_POSTDECREMENT, B); }

Expand Down

0 comments on commit 6f1a4c7

Please sign in to comment.