Formalized the compiler's string cache into a real API.
authorRyan C. Gordon <icculus@icculus.org>
Wed, 24 Feb 2010 01:21:21 -0500
changeset 858 d51537335896
parent 857 905ad877371b
child 859 824d67791db0
Formalized the compiler's string cache into a real API. Moved the compiler and preprocessor to use it, and dumped their separate implementations of the same thing.
mojoshader_common.c
mojoshader_compiler.c
mojoshader_internal.h
mojoshader_parser_hlsl.lemon
mojoshader_preprocessor.c
--- a/mojoshader_common.c	Tue Feb 23 17:38:00 2010 -0500
+++ b/mojoshader_common.c	Wed Feb 24 01:21:21 2010 -0500
@@ -136,13 +136,22 @@
 
 
 // this is djb's xor hashing function.
-uint32 hash_hash_string(const void *_sym)
+static inline uint32 hash_string_djbxor(const char *str, size_t len)
 {
-    register const char *sym = (const char *) _sym;
     register uint32 hash = 5381;
-    while (*sym)
-        hash = ((hash << 5) + hash) ^ *(sym++);
+    while (len--)
+        hash = ((hash << 5) + hash) ^ *(str++);
     return hash;
+} // hash_string_djbxor
+
+static inline uint32 hash_string(const char *str, size_t len)
+{
+    return hash_string_djbxor(str, len);
+} // hash_string
+
+uint32 hash_hash_string(const void *sym)
+{
+    return hash_string(sym, strlen(sym));
 } // hash_hash_string
 
 int hash_keymatch_string(const void *a, const void *b)
@@ -150,5 +159,146 @@
     return (strcmp((const char *) a, (const char *) b) == 0);
 } // hash_keymatch_string
 
+
+// The string cache...
+
+typedef struct StringBucket
+{
+    char *string;
+    struct StringBucket *next;
+} StringBucket;
+
+struct StringCache
+{
+    StringBucket **hashtable;
+    uint32 table_size;
+    MOJOSHADER_malloc m;
+    MOJOSHADER_free f;
+    void *d;
+};
+
+const char *stringcache(StringCache *cache, const char *str)
+{
+    return stringcache_len(cache, str, strlen(str));
+} // stringcache
+
+const char *stringcache_len(StringCache *cache, const char *str,
+                             const unsigned int len)
+{
+    const uint8 hash = hash_string(str, len) & (cache->table_size-1);
+    StringBucket *bucket = cache->hashtable[hash];
+    StringBucket *prev = NULL;
+    while (bucket)
+    {
+        const char *bstr = bucket->string;
+        if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0))
+        {
+            // Matched! Move this to the front of the list.
+            if (prev != NULL)
+            {
+                assert(prev->next == bucket);
+                prev->next = bucket->next;
+                bucket->next = cache->hashtable[hash];
+                cache->hashtable[hash] = bucket;
+            } // if
+            return bstr; // already cached
+        } // if
+        prev = bucket;
+        bucket = bucket->next;
+    } // while
+
+    // no match, add to the table.
+    bucket = (StringBucket *) cache->m(sizeof (StringBucket), cache->d);
+    if (bucket == NULL)
+        return NULL;
+    bucket->string = (char *) cache->m(len + 1, cache->d);
+    if (bucket->string == NULL)
+    {
+        cache->f(bucket, cache->d);
+        return NULL;
+    } // if
+    memcpy(bucket->string, str, len);
+    bucket->string[len] = '\0';
+    bucket->next = cache->hashtable[hash];
+    cache->hashtable[hash] = bucket;
+    return bucket->string;
+} // stringcache_len
+
+const char *stringcache_fmt(StringCache *cache, const char *fmt, ...)
+{
+    char buf[128];  // use the stack if reasonable.
+    char *ptr = NULL;
+    int len = 0;  // number of chars, NOT counting null-terminator!
+    va_list ap;
+
+    va_start(ap, fmt);
+    len = vsnprintf(buf, sizeof (buf), fmt, ap);
+    va_end(ap);
+
+    if (len > sizeof (buf))
+    {
+        ptr = (char *) cache->m(len, cache->d);
+        if (ptr == NULL)
+            return NULL;
+
+        va_start(ap, fmt);
+        vsnprintf(ptr, len, fmt, ap);
+        va_end(ap);
+    } // if
+
+    const char *retval = stringcache_len(cache, ptr ? ptr : buf, len);
+    if (ptr != NULL)
+        cache->f(ptr, cache->d);
+
+    return retval;
+} // stringcache_fmt
+
+StringCache *stringcache_create(MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
+{
+    const uint32 initial_table_size = 256;
+    const size_t tablelen = sizeof (StringBucket *) * initial_table_size;
+    StringCache *cache = (StringCache *) m(sizeof (StringCache), d);
+    if (!cache)
+        return NULL;
+    memset(cache, '\0', sizeof (StringCache));
+
+    cache->hashtable = (StringBucket **) m(tablelen, d);
+    if (!cache->hashtable)
+    {
+        f(cache, d);
+        return NULL;
+    } // if
+    memset(cache->hashtable, '\0', tablelen);
+
+    cache->table_size = initial_table_size;
+    cache->m = m;
+    cache->f = f;
+    cache->d = d;
+    return cache;
+} // stringcache_create
+
+void stringcache_destroy(StringCache *cache)
+{
+    MOJOSHADER_free f = cache->f;
+    void *d = cache->d;
+    size_t i;
+
+    for (i = 0; i < cache->table_size; i++)
+    {
+        StringBucket *bucket = cache->hashtable[i];
+        cache->hashtable[i] = NULL;
+        while (bucket)
+        {
+            StringBucket *next = bucket->next;
+            f(bucket->string, d);
+            f(bucket, d);
+            bucket = next;
+        } // while
+    } // for
+
+    f(cache->hashtable, d);
+    f(cache, d);
+} // stringcache_destroy
+
 // end of mojoshader_common.c ...
 
--- a/mojoshader_compiler.c	Tue Feb 23 17:38:00 2010 -0500
+++ b/mojoshader_compiler.c	Wed Feb 24 01:21:21 2010 -0500
@@ -35,12 +35,6 @@
     const char *string;
 } TokenData;
 
-typedef struct StringBucket
-{
-    char *string;
-    struct StringBucket *next;
-} StringBucket;
-
 
 // Structures that make up the parse tree...
 
@@ -547,7 +541,7 @@
     void *malloc_data;
     int error_count;
     ErrorList *errors;
-    StringBucket *string_hashtable[256];
+    StringCache *strcache;
     const char *sourcefile;  // current source file that we're parsing.
     unsigned int sourceline; // current line in sourcefile that we're parsing.
     UserTypeMap usertypes;
@@ -1430,93 +1424,6 @@
 } // is_usertype
 
 
-// !!! FIXME: sort of cut-and-paste from the preprocessor...
-
-// this is djb's xor hashing function.
-static inline uint32 hash_string_djbxor(const char *str, unsigned int len)
-{
-    register uint32 hash = 5381;
-    while (len--)
-        hash = ((hash << 5) + hash) ^ *(str++);
-    return hash;
-} // hash_string_djbxor
-
-static inline uint8 hash_string(const char *str, const unsigned int len)
-{
-    return (uint8) hash_string_djbxor(str, len);
-} // hash_string
-
-static const char *cache_string(Context *ctx, const char *str,
-                                const unsigned int len)
-{
-    const uint8 hash = hash_string(str, len);
-    StringBucket *bucket = ctx->string_hashtable[hash];
-    StringBucket *prev = NULL;
-    while (bucket)
-    {
-        const char *bstr = bucket->string;
-        if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0))
-        {
-            // Matched! Move this to the front of the list.
-            if (prev != NULL)
-            {
-                assert(prev->next == bucket);
-                prev->next = bucket->next;
-                bucket->next = ctx->string_hashtable[hash];
-                ctx->string_hashtable[hash] = bucket;
-            } // if
-            return bstr; // already cached
-        } // if
-        prev = bucket;
-        bucket = bucket->next;
-    } // while
-
-    // no match, add to the table.
-    bucket = (StringBucket *) Malloc(ctx, sizeof (StringBucket));
-    if (bucket == NULL)
-        return NULL;
-    bucket->string = (char *) Malloc(ctx, len + 1);
-    if (bucket->string == NULL)
-    {
-        Free(ctx, bucket);
-        return NULL;
-    } // if
-    memcpy(bucket->string, str, len);
-    bucket->string[len] = '\0';
-    bucket->next = ctx->string_hashtable[hash];
-    ctx->string_hashtable[hash] = bucket;
-    return bucket->string;
-} // cache_string
-
-static const char *cache_string_fmt(Context *ctx, const char *fmt, ...)
-{
-    char buf[128];  // use the stack if reasonable.
-    char *ptr = NULL;
-    int len = 0;  // number of chars, NOT counting null-terminator!
-    va_list ap;
-
-    va_start(ap, fmt);
-    len = vsnprintf(buf, sizeof (buf), fmt, ap);
-    va_end(ap);
-
-    if (len > sizeof (buf))
-    {
-        ptr = (char *) Malloc(ctx, len);
-        if (ptr == NULL)
-            return NULL;
-
-        va_start(ap, fmt);
-        vsnprintf(ptr, len, fmt, ap);
-        va_end(ap);
-    } // if
-
-    const char *retval = cache_string(ctx, ptr ? ptr : buf, len);
-    if (ptr != NULL)
-        Free(ctx, ptr);
-
-    return retval;
-} // cache_string_fmt
-
 // This is where the actual parsing happens. It's Lemon-generated!
 #define __MOJOSHADER_HLSL_COMPILER__ 1
 #include "mojoshader_parser_hlsl.h"
@@ -2434,7 +2341,7 @@
             #undef tokencmp
 
             // get a canonical copy of the string now, as we'll need it.
-            token = cache_string(ctx, token, tokenlen);
+            token = stringcache_len(ctx->strcache, token, tokenlen);
             if (is_usertype(ctx, token))
                 return TOKEN_HLSL_USERTYPE;
             return TOKEN_HLSL_IDENTIFIER;
@@ -2446,24 +2353,6 @@
     return 0;
 } // convert_to_lemon_token
 
-// !!! FIXME: unify this code with the string cache in the preprocessor.
-static void free_string_cache(Context *ctx)
-{
-    size_t i;
-    for (i = 0; i < STATICARRAYLEN(ctx->string_hashtable); i++)
-    {
-        StringBucket *bucket = ctx->string_hashtable[i];
-        ctx->string_hashtable[i] = NULL;
-        while (bucket)
-        {
-            StringBucket *next = bucket->next;
-            Free(ctx, bucket->string);
-            Free(ctx, bucket);
-            bucket = next;
-        } // while
-    } // for
-} // free_string_cache
-
 static void destroy_context(Context *ctx)
 {
     if (ctx != NULL)
@@ -2474,7 +2363,10 @@
         // !!! FIXME: free ctx->errors
         delete_compilation_unit(ctx, ctx->ast);
         destroy_usertypemap(ctx);
-        free_string_cache(ctx);
+
+        if (ctx->strcache)
+            stringcache_destroy(ctx->strcache);
+
         f(ctx, d);
     } // if
 } // destroy_context
@@ -2494,6 +2386,8 @@
     ctx->malloc_data = d;
     //ctx->parse_phase = MOJOSHADER_PARSEPHASE_NOTSTARTED;
     create_usertypemap(ctx);  // !!! FIXME: check for failure.
+    ctx->strcache = stringcache_create(m, f, d);  // !!! FIXME: check for failure.
+
     return ctx;
 } // build_context
 
@@ -2541,13 +2435,15 @@
         int j;
         for (j = 1; j <= 4; j++)
         {
+            // "float2"
             int len = snprintf(buf, sizeof (buf), "%s%d", types[i], j);
-            add_usertype(ctx, cache_string(ctx, buf, len));  // "float2"
+            add_usertype(ctx, stringcache_len(ctx->strcache, buf, len));
             int k;
             for (k = 1; k <= 4; k++)
             {
+                // "float2x2"
                 len = snprintf(buf, sizeof (buf), "%s%dx%d", types[i], j, k);
-                add_usertype(ctx, cache_string(ctx, buf, len));  // "float2x2"
+                add_usertype(ctx, stringcache_len(ctx->strcache, buf, len));
             } // for
         } // for
     } // for
@@ -2563,7 +2459,7 @@
         } // if
 
         fname = preprocessor_sourcepos(pp, &ctx->sourceline);
-        ctx->sourcefile = fname ? cache_string(ctx, fname, strlen(fname)) : 0;
+        ctx->sourcefile = fname ? stringcache(ctx->strcache, fname) : 0;
 
         if (tokenval == TOKEN_BAD_CHARS)
         {
@@ -2591,7 +2487,7 @@
             case TOKEN_HLSL_USERTYPE:
             case TOKEN_HLSL_STRING_LITERAL:
             case TOKEN_HLSL_IDENTIFIER:
-                data.string = cache_string(ctx, token, tokenlen);
+                data.string = stringcache_len(ctx->strcache, token, tokenlen);
                 break;
 
             default:
--- a/mojoshader_internal.h	Tue Feb 23 17:38:00 2010 -0500
+++ b/mojoshader_internal.h	Wed Feb 24 01:21:21 2010 -0500
@@ -172,6 +172,18 @@
 uint32 hash_hash_string(const void *sym);
 int hash_keymatch_string(const void *a, const void *b);
 
+
+// String caching...
+
+typedef struct StringCache StringCache;
+StringCache *stringcache_create(MOJOSHADER_malloc m,MOJOSHADER_free f,void *d);
+const char *stringcache(StringCache *cache, const char *str);
+const char *stringcache_len(StringCache *cache, const char *str,
+                            const unsigned int len);
+const char *stringcache_fmt(StringCache *cache, const char *fmt, ...);
+void stringcache_destroy(StringCache *cache);
+
+
 // This is the ID for a D3DXSHADER_CONSTANTTABLE in the bytecode comments.
 #define CTAB_ID 0x42415443  // 0x42415443 == 'CTAB'
 #define CTAB_SIZE 28  // sizeof (D3DXSHADER_CONSTANTTABLE).
--- a/mojoshader_parser_hlsl.lemon	Tue Feb 23 17:38:00 2010 -0500
+++ b/mojoshader_parser_hlsl.lemon	Wed Feb 24 01:21:21 2010 -0500
@@ -310,26 +310,26 @@
 datatype(A) ::= USERTYPE(B). { A = B.string; }
 
 %type datatype_sampler { const char * }
-datatype_sampler(A) ::= SAMPLER. { A = cache_string_fmt(ctx, "s1"); }
-datatype_sampler(A) ::= SAMPLER1D. { A = cache_string_fmt(ctx, "s1"); }
-datatype_sampler(A) ::= SAMPLER2D. { A = cache_string_fmt(ctx, "s2"); }
-datatype_sampler(A) ::= SAMPLER3D. { A = cache_string_fmt(ctx, "s3"); }
-datatype_sampler(A) ::= SAMPLERCUBE. { A = cache_string_fmt(ctx, "sc"); }
-datatype_sampler(A) ::= SAMPLER_STATE. { A = cache_string_fmt(ctx, "ss"); }
-datatype_sampler(A) ::= SAMPLERSTATE. { A = cache_string_fmt(ctx, "ss"); }
-datatype_sampler(A) ::= SAMPLERCOMPARISONSTATE. { A = cache_string_fmt(ctx, "sS"); }
+datatype_sampler(A) ::= SAMPLER. { A = stringcache_fmt(ctx->strcache, "s1"); }
+datatype_sampler(A) ::= SAMPLER1D. { A = stringcache_fmt(ctx->strcache, "s1"); }
+datatype_sampler(A) ::= SAMPLER2D. { A = stringcache_fmt(ctx->strcache, "s2"); }
+datatype_sampler(A) ::= SAMPLER3D. { A = stringcache_fmt(ctx->strcache, "s3"); }
+datatype_sampler(A) ::= SAMPLERCUBE. { A = stringcache_fmt(ctx->strcache, "sc"); }
+datatype_sampler(A) ::= SAMPLER_STATE. { A = stringcache_fmt(ctx->strcache, "ss"); }
+datatype_sampler(A) ::= SAMPLERSTATE. { A = stringcache_fmt(ctx->strcache, "ss"); }
+datatype_sampler(A) ::= SAMPLERCOMPARISONSTATE. { A = stringcache_fmt(ctx->strcache, "sS"); }
 
 %type datatype_scalar { const char * }
-datatype_scalar(A) ::= BOOL. { A = cache_string_fmt(ctx, "b"); }
-datatype_scalar(A) ::= INT. { A = cache_string_fmt(ctx, "i"); }
-datatype_scalar(A) ::= UINT. { A = cache_string_fmt(ctx, "u"); }
-datatype_scalar(A) ::= HALF. { A = cache_string_fmt(ctx, "h"); }
-datatype_scalar(A) ::= FLOAT. { A = cache_string_fmt(ctx, "f"); }
-datatype_scalar(A) ::= DOUBLE. { A = cache_string_fmt(ctx, "d"); }
-datatype_scalar(A) ::= STRING. { A = cache_string_fmt(ctx, "S"); } // this is for the effects framework, not HLSL.
-datatype_scalar(A) ::= SNORM FLOAT. { A = cache_string_fmt(ctx, "Fs"); }
-datatype_scalar(A) ::= UNORM FLOAT. { A = cache_string_fmt(ctx, "Fu"); }
-datatype_scalar(A) ::= BUFFER LT datatype_scalar(B) GT. { A = cache_string_fmt(ctx, "B%s", B); }
+datatype_scalar(A) ::= BOOL. { A = stringcache_fmt(ctx->strcache, "b"); }
+datatype_scalar(A) ::= INT. { A = stringcache_fmt(ctx->strcache, "i"); }
+datatype_scalar(A) ::= UINT. { A = stringcache_fmt(ctx->strcache, "u"); }
+datatype_scalar(A) ::= HALF. { A = stringcache_fmt(ctx->strcache, "h"); }
+datatype_scalar(A) ::= FLOAT. { A = stringcache_fmt(ctx->strcache, "f"); }
+datatype_scalar(A) ::= DOUBLE. { A = stringcache_fmt(ctx->strcache, "d"); }
+datatype_scalar(A) ::= STRING. { A = stringcache_fmt(ctx->strcache, "S"); } // this is for the effects framework, not HLSL.
+datatype_scalar(A) ::= SNORM FLOAT. { A = stringcache_fmt(ctx->strcache, "Fs"); }
+datatype_scalar(A) ::= UNORM FLOAT. { A = stringcache_fmt(ctx->strcache, "Fu"); }
+datatype_scalar(A) ::= BUFFER LT datatype_scalar(B) GT. { A = stringcache_fmt(ctx->strcache, "B%s", B); }
 
 // !!! FIXME: MSDN suggests that the matrix ones are just typedefs inserted
 // !!! FIXME:  before parsing begins, like:
@@ -337,10 +337,10 @@
 // !!! FIXME:  ...maybe we can rip these out of the grammar and just create
 // !!! FIXME:  them at startup?
 %type datatype_vector { const char * }
-datatype_vector(A) ::= VECTOR LT datatype_scalar(B) COMMA INT_CONSTANT(C) GT. { A = cache_string_fmt(ctx, "v%d%s", (int) C.i64, B); }
+datatype_vector(A) ::= VECTOR LT datatype_scalar(B) COMMA INT_CONSTANT(C) GT. { A = stringcache_fmt(ctx->strcache, "v%d%s", (int) C.i64, B); }
 
 %type datatype_matrix { const char * }
-datatype_matrix(A) ::= MATRIX LT datatype_scalar(B) COMMA INT_CONSTANT(C) COMMA INT_CONSTANT(D) GT. { A = cache_string_fmt(ctx, "m%d%d%s", (int) C.i64, (int) D.i64, B); }
+datatype_matrix(A) ::= MATRIX LT datatype_scalar(B) COMMA INT_CONSTANT(C) COMMA INT_CONSTANT(D) GT. { A = stringcache_fmt(ctx->strcache, "m%d%d%s", (int) C.i64, (int) D.i64, B); }
 
 %type statement_block { Statement * }
 %destructor statement_block { delete_statement(ctx, $$); }
--- a/mojoshader_preprocessor.c	Tue Feb 23 17:38:00 2010 -0500
+++ b/mojoshader_preprocessor.c	Wed Feb 24 01:21:21 2010 -0500
@@ -37,16 +37,6 @@
 #define print_debug_lexing_position(s)
 #endif
 
-
-
-// Simple linked list to cache source filenames, so we don't have to copy
-//  the same string over and over for each opcode.
-typedef struct FilenameCache
-{
-    char *filename;
-    struct FilenameCache *next;
-} FilenameCache;
-
 typedef struct Context
 {
     int isfail;
@@ -59,7 +49,7 @@
     IncludeState *include_pool;
     Define *define_hashtable[256];
     Define *define_pool;
-    FilenameCache *filename_cache;
+    StringCache *filename_cache;
     MOJOSHADER_includeOpen open_callback;
     MOJOSHADER_includeClose close_callback;
     MOJOSHADER_malloc malloc;
@@ -507,54 +497,6 @@
 } // put_all_defines
 
 
-// filename cache stuff...
-
-static const char *cache_filename(Context *ctx, const char *fname)
-{
-    if (fname == NULL)
-        return NULL;
-
-    // !!! FIXME: this could be optimized into a hash table, but oh well.
-    FilenameCache *item = ctx->filename_cache;
-    while (item != NULL)
-    {
-        if (strcmp(item->filename, fname) == 0)
-            return item->filename;
-        item = item->next;
-    } // while
-
-    // new cache item.
-    item = (FilenameCache *) Malloc(ctx, sizeof (FilenameCache));
-    if (item == NULL)
-        return NULL;
-
-    item->filename = StrDup(ctx, fname);
-    if (item->filename == NULL)
-    {
-        Free(ctx, item);
-        return NULL;
-    } // if
-
-    item->next = ctx->filename_cache;
-    ctx->filename_cache = item;
-
-    return item->filename;
-} // cache_filename
-
-
-static void free_filename_cache(Context *ctx)
-{
-    FilenameCache *item = ctx->filename_cache;
-    while (item != NULL)
-    {
-        FilenameCache *next = item->next;
-        Free(ctx, item->filename);
-        Free(ctx, item);
-        item = next;
-    } // while
-} // free_filename_cache
-
-
 static int push_source(Context *ctx, const char *fname, const char *source,
                        unsigned int srclen, unsigned int linenum,
                        MOJOSHADER_includeClose close_callback, Define *defs)
@@ -568,9 +510,10 @@
 
     if (fname != NULL)
     {
-        state->filename = cache_filename(ctx, fname);
+        state->filename = stringcache(ctx->filename_cache, fname);
         if (state->filename == NULL)
         {
+            out_of_memory(ctx);
             put_include(ctx, state);
             return 0;
         } // if
@@ -661,7 +604,7 @@
 
     Context *ctx = (Context *) m(sizeof (Context), d);
     if (ctx == NULL)
-        return 0;
+        return NULL;
 
     memset(ctx, '\0', sizeof (Context));
     ctx->malloc = m;
@@ -670,6 +613,12 @@
     ctx->open_callback = open_callback;
     ctx->close_callback = close_callback;
     ctx->asm_comments = asm_comments;
+    ctx->filename_cache = stringcache_create(m, f, d);
+    if (ctx->filename_cache == 0)
+    {
+        f(ctx, d);
+        return NULL;
+    } // if
 
     // let the usual preprocessor parser sort these out.
     char *define_include = NULL;
@@ -728,7 +677,9 @@
 
     put_all_defines(ctx);
 
-    free_filename_cache(ctx);
+    if (ctx->filename_cache != NULL)
+        stringcache_destroy(ctx->filename_cache);
+
     free_define_pool(ctx);
     free_conditional_pool(ctx);
     free_include_pool(ctx);
@@ -890,8 +841,9 @@
         return;
     } // if
 
-    const char *cached = cache_filename(ctx, filename);
-    assert((cached != NULL) || (ctx->out_of_memory));
+    const char *cached = stringcache(ctx->filename_cache, filename);
+    if (cached == NULL)
+        out_of_memory(ctx);
     state->filename = cached;
     state->line = linenum;
 } // handle_pp_line