From 7fa864c2a0536909489902ee3f04fc106349828d Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Wed, 24 Feb 2010 01:21:21 -0500 Subject: [PATCH] Formalized the compiler's string cache into a real API. Moved the compiler and preprocessor to use it, and dumped their separate implementations of the same thing. --- mojoshader_common.c | 158 ++++++++++++++++++++++++++++++++++- mojoshader_compiler.c | 132 ++++------------------------- mojoshader_internal.h | 12 +++ mojoshader_parser_hlsl.lemon | 40 ++++----- mojoshader_preprocessor.c | 80 ++++-------------- 5 files changed, 216 insertions(+), 206 deletions(-) diff --git a/mojoshader_common.c b/mojoshader_common.c index 50684bbf..3baf0d7c 100644 --- a/mojoshader_common.c +++ b/mojoshader_common.c @@ -136,13 +136,22 @@ int hash_remove(HashTable *table, const void *key) // this is djb's xor hashing function. -uint32 hash_hash_string(const void *_sym) +static inline uint32 hash_string_djbxor(const char *str, size_t len) { - register const char *sym = (const char *) _sym; register uint32 hash = 5381; - while (*sym) - hash = ((hash << 5) + hash) ^ *(sym++); + while (len--) + hash = ((hash << 5) + hash) ^ *(str++); return hash; +} // hash_string_djbxor + +static inline uint32 hash_string(const char *str, size_t len) +{ + return hash_string_djbxor(str, len); +} // hash_string + +uint32 hash_hash_string(const void *sym) +{ + return hash_string(sym, strlen(sym)); } // hash_hash_string int hash_keymatch_string(const void *a, const void *b) @@ -150,5 +159,146 @@ int hash_keymatch_string(const void *a, const void *b) return (strcmp((const char *) a, (const char *) b) == 0); } // hash_keymatch_string + +// The string cache... + +typedef struct StringBucket +{ + char *string; + struct StringBucket *next; +} StringBucket; + +struct StringCache +{ + StringBucket **hashtable; + uint32 table_size; + MOJOSHADER_malloc m; + MOJOSHADER_free f; + void *d; +}; + +const char *stringcache(StringCache *cache, const char *str) +{ + return stringcache_len(cache, str, strlen(str)); +} // stringcache + +const char *stringcache_len(StringCache *cache, const char *str, + const unsigned int len) +{ + const uint8 hash = hash_string(str, len) & (cache->table_size-1); + StringBucket *bucket = cache->hashtable[hash]; + StringBucket *prev = NULL; + while (bucket) + { + const char *bstr = bucket->string; + if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0)) + { + // Matched! Move this to the front of the list. + if (prev != NULL) + { + assert(prev->next == bucket); + prev->next = bucket->next; + bucket->next = cache->hashtable[hash]; + cache->hashtable[hash] = bucket; + } // if + return bstr; // already cached + } // if + prev = bucket; + bucket = bucket->next; + } // while + + // no match, add to the table. + bucket = (StringBucket *) cache->m(sizeof (StringBucket), cache->d); + if (bucket == NULL) + return NULL; + bucket->string = (char *) cache->m(len + 1, cache->d); + if (bucket->string == NULL) + { + cache->f(bucket, cache->d); + return NULL; + } // if + memcpy(bucket->string, str, len); + bucket->string[len] = '\0'; + bucket->next = cache->hashtable[hash]; + cache->hashtable[hash] = bucket; + return bucket->string; +} // stringcache_len + +const char *stringcache_fmt(StringCache *cache, const char *fmt, ...) +{ + char buf[128]; // use the stack if reasonable. + char *ptr = NULL; + int len = 0; // number of chars, NOT counting null-terminator! + va_list ap; + + va_start(ap, fmt); + len = vsnprintf(buf, sizeof (buf), fmt, ap); + va_end(ap); + + if (len > sizeof (buf)) + { + ptr = (char *) cache->m(len, cache->d); + if (ptr == NULL) + return NULL; + + va_start(ap, fmt); + vsnprintf(ptr, len, fmt, ap); + va_end(ap); + } // if + + const char *retval = stringcache_len(cache, ptr ? ptr : buf, len); + if (ptr != NULL) + cache->f(ptr, cache->d); + + return retval; +} // stringcache_fmt + +StringCache *stringcache_create(MOJOSHADER_malloc m, MOJOSHADER_free f, void *d) +{ + const uint32 initial_table_size = 256; + const size_t tablelen = sizeof (StringBucket *) * initial_table_size; + StringCache *cache = (StringCache *) m(sizeof (StringCache), d); + if (!cache) + return NULL; + memset(cache, '\0', sizeof (StringCache)); + + cache->hashtable = (StringBucket **) m(tablelen, d); + if (!cache->hashtable) + { + f(cache, d); + return NULL; + } // if + memset(cache->hashtable, '\0', tablelen); + + cache->table_size = initial_table_size; + cache->m = m; + cache->f = f; + cache->d = d; + return cache; +} // stringcache_create + +void stringcache_destroy(StringCache *cache) +{ + MOJOSHADER_free f = cache->f; + void *d = cache->d; + size_t i; + + for (i = 0; i < cache->table_size; i++) + { + StringBucket *bucket = cache->hashtable[i]; + cache->hashtable[i] = NULL; + while (bucket) + { + StringBucket *next = bucket->next; + f(bucket->string, d); + f(bucket, d); + bucket = next; + } // while + } // for + + f(cache->hashtable, d); + f(cache, d); +} // stringcache_destroy + // end of mojoshader_common.c ... diff --git a/mojoshader_compiler.c b/mojoshader_compiler.c index ffa0dc7d..4224027f 100644 --- a/mojoshader_compiler.c +++ b/mojoshader_compiler.c @@ -35,12 +35,6 @@ typedef union TokenData const char *string; } TokenData; -typedef struct StringBucket -{ - char *string; - struct StringBucket *next; -} StringBucket; - // Structures that make up the parse tree... @@ -547,7 +541,7 @@ typedef struct Context void *malloc_data; int error_count; ErrorList *errors; - StringBucket *string_hashtable[256]; + StringCache *strcache; const char *sourcefile; // current source file that we're parsing. unsigned int sourceline; // current line in sourcefile that we're parsing. UserTypeMap usertypes; @@ -1430,93 +1424,6 @@ static int is_usertype(const Context *ctx, const char *token) } // is_usertype -// !!! FIXME: sort of cut-and-paste from the preprocessor... - -// this is djb's xor hashing function. -static inline uint32 hash_string_djbxor(const char *str, unsigned int len) -{ - register uint32 hash = 5381; - while (len--) - hash = ((hash << 5) + hash) ^ *(str++); - return hash; -} // hash_string_djbxor - -static inline uint8 hash_string(const char *str, const unsigned int len) -{ - return (uint8) hash_string_djbxor(str, len); -} // hash_string - -static const char *cache_string(Context *ctx, const char *str, - const unsigned int len) -{ - const uint8 hash = hash_string(str, len); - StringBucket *bucket = ctx->string_hashtable[hash]; - StringBucket *prev = NULL; - while (bucket) - { - const char *bstr = bucket->string; - if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0)) - { - // Matched! Move this to the front of the list. - if (prev != NULL) - { - assert(prev->next == bucket); - prev->next = bucket->next; - bucket->next = ctx->string_hashtable[hash]; - ctx->string_hashtable[hash] = bucket; - } // if - return bstr; // already cached - } // if - prev = bucket; - bucket = bucket->next; - } // while - - // no match, add to the table. - bucket = (StringBucket *) Malloc(ctx, sizeof (StringBucket)); - if (bucket == NULL) - return NULL; - bucket->string = (char *) Malloc(ctx, len + 1); - if (bucket->string == NULL) - { - Free(ctx, bucket); - return NULL; - } // if - memcpy(bucket->string, str, len); - bucket->string[len] = '\0'; - bucket->next = ctx->string_hashtable[hash]; - ctx->string_hashtable[hash] = bucket; - return bucket->string; -} // cache_string - -static const char *cache_string_fmt(Context *ctx, const char *fmt, ...) -{ - char buf[128]; // use the stack if reasonable. - char *ptr = NULL; - int len = 0; // number of chars, NOT counting null-terminator! - va_list ap; - - va_start(ap, fmt); - len = vsnprintf(buf, sizeof (buf), fmt, ap); - va_end(ap); - - if (len > sizeof (buf)) - { - ptr = (char *) Malloc(ctx, len); - if (ptr == NULL) - return NULL; - - va_start(ap, fmt); - vsnprintf(ptr, len, fmt, ap); - va_end(ap); - } // if - - const char *retval = cache_string(ctx, ptr ? ptr : buf, len); - if (ptr != NULL) - Free(ctx, ptr); - - return retval; -} // cache_string_fmt - // This is where the actual parsing happens. It's Lemon-generated! #define __MOJOSHADER_HLSL_COMPILER__ 1 #include "mojoshader_parser_hlsl.h" @@ -2434,7 +2341,7 @@ static int convert_to_lemon_token(Context *ctx, const char *token, #undef tokencmp // get a canonical copy of the string now, as we'll need it. - token = cache_string(ctx, token, tokenlen); + token = stringcache_len(ctx->strcache, token, tokenlen); if (is_usertype(ctx, token)) return TOKEN_HLSL_USERTYPE; return TOKEN_HLSL_IDENTIFIER; @@ -2446,24 +2353,6 @@ static int convert_to_lemon_token(Context *ctx, const char *token, return 0; } // convert_to_lemon_token -// !!! FIXME: unify this code with the string cache in the preprocessor. -static void free_string_cache(Context *ctx) -{ - size_t i; - for (i = 0; i < STATICARRAYLEN(ctx->string_hashtable); i++) - { - StringBucket *bucket = ctx->string_hashtable[i]; - ctx->string_hashtable[i] = NULL; - while (bucket) - { - StringBucket *next = bucket->next; - Free(ctx, bucket->string); - Free(ctx, bucket); - bucket = next; - } // while - } // for -} // free_string_cache - static void destroy_context(Context *ctx) { if (ctx != NULL) @@ -2474,7 +2363,10 @@ static void destroy_context(Context *ctx) // !!! FIXME: free ctx->errors delete_compilation_unit(ctx, ctx->ast); destroy_usertypemap(ctx); - free_string_cache(ctx); + + if (ctx->strcache) + stringcache_destroy(ctx->strcache); + f(ctx, d); } // if } // destroy_context @@ -2494,6 +2386,8 @@ static Context *build_context(MOJOSHADER_malloc m, MOJOSHADER_free f, void *d) ctx->malloc_data = d; //ctx->parse_phase = MOJOSHADER_PARSEPHASE_NOTSTARTED; create_usertypemap(ctx); // !!! FIXME: check for failure. + ctx->strcache = stringcache_create(m, f, d); // !!! FIXME: check for failure. + return ctx; } // build_context @@ -2541,13 +2435,15 @@ static void parse_source(Context *ctx, const char *filename, int j; for (j = 1; j <= 4; j++) { + // "float2" int len = snprintf(buf, sizeof (buf), "%s%d", types[i], j); - add_usertype(ctx, cache_string(ctx, buf, len)); // "float2" + add_usertype(ctx, stringcache_len(ctx->strcache, buf, len)); int k; for (k = 1; k <= 4; k++) { + // "float2x2" len = snprintf(buf, sizeof (buf), "%s%dx%d", types[i], j, k); - add_usertype(ctx, cache_string(ctx, buf, len)); // "float2x2" + add_usertype(ctx, stringcache_len(ctx->strcache, buf, len)); } // for } // for } // for @@ -2563,7 +2459,7 @@ static void parse_source(Context *ctx, const char *filename, } // if fname = preprocessor_sourcepos(pp, &ctx->sourceline); - ctx->sourcefile = fname ? cache_string(ctx, fname, strlen(fname)) : 0; + ctx->sourcefile = fname ? stringcache(ctx->strcache, fname) : 0; if (tokenval == TOKEN_BAD_CHARS) { @@ -2591,7 +2487,7 @@ static void parse_source(Context *ctx, const char *filename, case TOKEN_HLSL_USERTYPE: case TOKEN_HLSL_STRING_LITERAL: case TOKEN_HLSL_IDENTIFIER: - data.string = cache_string(ctx, token, tokenlen); + data.string = stringcache_len(ctx->strcache, token, tokenlen); break; default: diff --git a/mojoshader_internal.h b/mojoshader_internal.h index bfd870eb..cf6fa043 100644 --- a/mojoshader_internal.h +++ b/mojoshader_internal.h @@ -172,6 +172,18 @@ int hash_find(const HashTable *table, const void *key, const void **_value); uint32 hash_hash_string(const void *sym); int hash_keymatch_string(const void *a, const void *b); + +// String caching... + +typedef struct StringCache StringCache; +StringCache *stringcache_create(MOJOSHADER_malloc m,MOJOSHADER_free f,void *d); +const char *stringcache(StringCache *cache, const char *str); +const char *stringcache_len(StringCache *cache, const char *str, + const unsigned int len); +const char *stringcache_fmt(StringCache *cache, const char *fmt, ...); +void stringcache_destroy(StringCache *cache); + + // This is the ID for a D3DXSHADER_CONSTANTTABLE in the bytecode comments. #define CTAB_ID 0x42415443 // 0x42415443 == 'CTAB' #define CTAB_SIZE 28 // sizeof (D3DXSHADER_CONSTANTTABLE). diff --git a/mojoshader_parser_hlsl.lemon b/mojoshader_parser_hlsl.lemon index d89bdf83..440e8541 100644 --- a/mojoshader_parser_hlsl.lemon +++ b/mojoshader_parser_hlsl.lemon @@ -310,26 +310,26 @@ datatype(A) ::= intrinsic_datatype(B). { A = B; } datatype(A) ::= USERTYPE(B). { A = B.string; } %type datatype_sampler { const char * } -datatype_sampler(A) ::= SAMPLER. { A = cache_string_fmt(ctx, "s1"); } -datatype_sampler(A) ::= SAMPLER1D. { A = cache_string_fmt(ctx, "s1"); } -datatype_sampler(A) ::= SAMPLER2D. { A = cache_string_fmt(ctx, "s2"); } -datatype_sampler(A) ::= SAMPLER3D. { A = cache_string_fmt(ctx, "s3"); } -datatype_sampler(A) ::= SAMPLERCUBE. { A = cache_string_fmt(ctx, "sc"); } -datatype_sampler(A) ::= SAMPLER_STATE. { A = cache_string_fmt(ctx, "ss"); } -datatype_sampler(A) ::= SAMPLERSTATE. { A = cache_string_fmt(ctx, "ss"); } -datatype_sampler(A) ::= SAMPLERCOMPARISONSTATE. { A = cache_string_fmt(ctx, "sS"); } +datatype_sampler(A) ::= SAMPLER. { A = stringcache_fmt(ctx->strcache, "s1"); } +datatype_sampler(A) ::= SAMPLER1D. { A = stringcache_fmt(ctx->strcache, "s1"); } +datatype_sampler(A) ::= SAMPLER2D. { A = stringcache_fmt(ctx->strcache, "s2"); } +datatype_sampler(A) ::= SAMPLER3D. { A = stringcache_fmt(ctx->strcache, "s3"); } +datatype_sampler(A) ::= SAMPLERCUBE. { A = stringcache_fmt(ctx->strcache, "sc"); } +datatype_sampler(A) ::= SAMPLER_STATE. { A = stringcache_fmt(ctx->strcache, "ss"); } +datatype_sampler(A) ::= SAMPLERSTATE. { A = stringcache_fmt(ctx->strcache, "ss"); } +datatype_sampler(A) ::= SAMPLERCOMPARISONSTATE. { A = stringcache_fmt(ctx->strcache, "sS"); } %type datatype_scalar { const char * } -datatype_scalar(A) ::= BOOL. { A = cache_string_fmt(ctx, "b"); } -datatype_scalar(A) ::= INT. { A = cache_string_fmt(ctx, "i"); } -datatype_scalar(A) ::= UINT. { A = cache_string_fmt(ctx, "u"); } -datatype_scalar(A) ::= HALF. { A = cache_string_fmt(ctx, "h"); } -datatype_scalar(A) ::= FLOAT. { A = cache_string_fmt(ctx, "f"); } -datatype_scalar(A) ::= DOUBLE. { A = cache_string_fmt(ctx, "d"); } -datatype_scalar(A) ::= STRING. { A = cache_string_fmt(ctx, "S"); } // this is for the effects framework, not HLSL. -datatype_scalar(A) ::= SNORM FLOAT. { A = cache_string_fmt(ctx, "Fs"); } -datatype_scalar(A) ::= UNORM FLOAT. { A = cache_string_fmt(ctx, "Fu"); } -datatype_scalar(A) ::= BUFFER LT datatype_scalar(B) GT. { A = cache_string_fmt(ctx, "B%s", B); } +datatype_scalar(A) ::= BOOL. { A = stringcache_fmt(ctx->strcache, "b"); } +datatype_scalar(A) ::= INT. { A = stringcache_fmt(ctx->strcache, "i"); } +datatype_scalar(A) ::= UINT. { A = stringcache_fmt(ctx->strcache, "u"); } +datatype_scalar(A) ::= HALF. { A = stringcache_fmt(ctx->strcache, "h"); } +datatype_scalar(A) ::= FLOAT. { A = stringcache_fmt(ctx->strcache, "f"); } +datatype_scalar(A) ::= DOUBLE. { A = stringcache_fmt(ctx->strcache, "d"); } +datatype_scalar(A) ::= STRING. { A = stringcache_fmt(ctx->strcache, "S"); } // this is for the effects framework, not HLSL. +datatype_scalar(A) ::= SNORM FLOAT. { A = stringcache_fmt(ctx->strcache, "Fs"); } +datatype_scalar(A) ::= UNORM FLOAT. { A = stringcache_fmt(ctx->strcache, "Fu"); } +datatype_scalar(A) ::= BUFFER LT datatype_scalar(B) GT. { A = stringcache_fmt(ctx->strcache, "B%s", B); } // !!! FIXME: MSDN suggests that the matrix ones are just typedefs inserted // !!! FIXME: before parsing begins, like: @@ -337,10 +337,10 @@ datatype_scalar(A) ::= BUFFER LT datatype_scalar(B) GT. { A = cache_string_fmt(c // !!! FIXME: ...maybe we can rip these out of the grammar and just create // !!! FIXME: them at startup? %type datatype_vector { const char * } -datatype_vector(A) ::= VECTOR LT datatype_scalar(B) COMMA INT_CONSTANT(C) GT. { A = cache_string_fmt(ctx, "v%d%s", (int) C.i64, B); } +datatype_vector(A) ::= VECTOR LT datatype_scalar(B) COMMA INT_CONSTANT(C) GT. { A = stringcache_fmt(ctx->strcache, "v%d%s", (int) C.i64, B); } %type datatype_matrix { const char * } -datatype_matrix(A) ::= MATRIX LT datatype_scalar(B) COMMA INT_CONSTANT(C) COMMA INT_CONSTANT(D) GT. { A = cache_string_fmt(ctx, "m%d%d%s", (int) C.i64, (int) D.i64, B); } +datatype_matrix(A) ::= MATRIX LT datatype_scalar(B) COMMA INT_CONSTANT(C) COMMA INT_CONSTANT(D) GT. { A = stringcache_fmt(ctx->strcache, "m%d%d%s", (int) C.i64, (int) D.i64, B); } %type statement_block { Statement * } %destructor statement_block { delete_statement(ctx, $$); } diff --git a/mojoshader_preprocessor.c b/mojoshader_preprocessor.c index ade215da..4b51a01d 100644 --- a/mojoshader_preprocessor.c +++ b/mojoshader_preprocessor.c @@ -37,16 +37,6 @@ static void print_debug_lexing_position(IncludeState *s) #define print_debug_lexing_position(s) #endif - - -// Simple linked list to cache source filenames, so we don't have to copy -// the same string over and over for each opcode. -typedef struct FilenameCache -{ - char *filename; - struct FilenameCache *next; -} FilenameCache; - typedef struct Context { int isfail; @@ -59,7 +49,7 @@ typedef struct Context IncludeState *include_pool; Define *define_hashtable[256]; Define *define_pool; - FilenameCache *filename_cache; + StringCache *filename_cache; MOJOSHADER_includeOpen open_callback; MOJOSHADER_includeClose close_callback; MOJOSHADER_malloc malloc; @@ -507,54 +497,6 @@ static void put_all_defines(Context *ctx) } // put_all_defines -// filename cache stuff... - -static const char *cache_filename(Context *ctx, const char *fname) -{ - if (fname == NULL) - return NULL; - - // !!! FIXME: this could be optimized into a hash table, but oh well. - FilenameCache *item = ctx->filename_cache; - while (item != NULL) - { - if (strcmp(item->filename, fname) == 0) - return item->filename; - item = item->next; - } // while - - // new cache item. - item = (FilenameCache *) Malloc(ctx, sizeof (FilenameCache)); - if (item == NULL) - return NULL; - - item->filename = StrDup(ctx, fname); - if (item->filename == NULL) - { - Free(ctx, item); - return NULL; - } // if - - item->next = ctx->filename_cache; - ctx->filename_cache = item; - - return item->filename; -} // cache_filename - - -static void free_filename_cache(Context *ctx) -{ - FilenameCache *item = ctx->filename_cache; - while (item != NULL) - { - FilenameCache *next = item->next; - Free(ctx, item->filename); - Free(ctx, item); - item = next; - } // while -} // free_filename_cache - - static int push_source(Context *ctx, const char *fname, const char *source, unsigned int srclen, unsigned int linenum, MOJOSHADER_includeClose close_callback, Define *defs) @@ -568,9 +510,10 @@ static int push_source(Context *ctx, const char *fname, const char *source, if (fname != NULL) { - state->filename = cache_filename(ctx, fname); + state->filename = stringcache(ctx->filename_cache, fname); if (state->filename == NULL) { + out_of_memory(ctx); put_include(ctx, state); return 0; } // if @@ -661,7 +604,7 @@ Preprocessor *preprocessor_start(const char *fname, const char *source, Context *ctx = (Context *) m(sizeof (Context), d); if (ctx == NULL) - return 0; + return NULL; memset(ctx, '\0', sizeof (Context)); ctx->malloc = m; @@ -670,6 +613,12 @@ Preprocessor *preprocessor_start(const char *fname, const char *source, ctx->open_callback = open_callback; ctx->close_callback = close_callback; ctx->asm_comments = asm_comments; + ctx->filename_cache = stringcache_create(m, f, d); + if (ctx->filename_cache == 0) + { + f(ctx, d); + return NULL; + } // if // let the usual preprocessor parser sort these out. char *define_include = NULL; @@ -728,7 +677,9 @@ void preprocessor_end(Preprocessor *_ctx) put_all_defines(ctx); - free_filename_cache(ctx); + if (ctx->filename_cache != NULL) + stringcache_destroy(ctx->filename_cache); + free_define_pool(ctx); free_conditional_pool(ctx); free_include_pool(ctx); @@ -890,8 +841,9 @@ static void handle_pp_line(Context *ctx) return; } // if - const char *cached = cache_filename(ctx, filename); - assert((cached != NULL) || (ctx->out_of_memory)); + const char *cached = stringcache(ctx->filename_cache, filename); + if (cached == NULL) + out_of_memory(ctx); state->filename = cached; state->line = linenum; } // handle_pp_line