From cc2882f9efbd99f88628df3c792ca268420de248 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 26 Oct 2010 02:06:23 -0400
Subject: [PATCH] First shot at semantic analysis (take the AST and type check,
 make sane, etc).

---
 mojoshader_compiler.c        | 833 ++++++++++++++++++++++++++++++++++-
 mojoshader_parser_hlsl.lemon |  40 +-
 2 files changed, 847 insertions(+), 26 deletions(-)

diff --git a/mojoshader_compiler.c b/mojoshader_compiler.c
index 54de5bff..e52f04bf 100644
--- a/mojoshader_compiler.c
+++ b/mojoshader_compiler.c
@@ -596,6 +596,7 @@ typedef struct Context
     //  with the rest of the context. This makes it so we can compare common
     //  strings by pointer without having to hash them every time, so long as
     //  we're comparing a string pointer we know came from this string cache.
+    // The first batch are simplifed datatype strings ("b" == bool, etc).
     const char *str_b;  // "b"
     const char *str_f;  // "f"
     const char *str_i;  // "i"
@@ -603,6 +604,17 @@ typedef struct Context
     const char *str_h;  // "h"
     const char *str_d;  // "d"
     const char *str_s;  // "s"
+    const char *str_S;  // "S"
+    const char *str_s1;  // "s1"
+    const char *str_s2;  // "s2"
+    const char *str_s3;  // "s3"
+    const char *str_sc;  // "sc"
+    const char *str_ss;  // "ss"
+    const char *str_sS;  // "sS"
+    const char *str_Fs;  // "Fs"
+    const char *str_Fu;  // "Fu"
+    const char *str_ns;  // "ns"
+    const char *str_nu;  // "nu"
 } Context;
 
 
@@ -666,6 +678,9 @@ static int create_symbolmap(Context *ctx, SymbolMap *map)
 static void push_symbol(Context *ctx, SymbolMap *map,
                         const char *sym, const char *datatype)
 {
+    // !!! FIXME: decide if this symbol is defined, and if so, if it's in
+    // !!! FIXME:  the current scope.
+
     SymbolScope *item = (SymbolScope *) Malloc(ctx, sizeof (SymbolScope));
     if (item == NULL)
         return;
@@ -676,7 +691,7 @@ static void push_symbol(Context *ctx, SymbolMap *map,
         {
             Free(ctx, item);
             return;
-        }
+        } // if
     } // if
 
     item->symbol = sym;  // cached strings, don't copy.
@@ -729,6 +744,23 @@ static inline void pop_scope(Context *ctx)
     pop_symbol_scope(ctx, &ctx->variables);
 } // push_scope
 
+static const char *find_symbol(Context *ctx, SymbolMap *map, const char *sym)
+{
+    const void *value = NULL;
+    hash_find(map->hash, sym, &value);
+    return (const char *) value;
+} // find_symbol
+
+static inline const char *find_usertype(Context *ctx, const char *sym)
+{
+    return find_symbol(ctx, &ctx->usertypes, sym);
+} // find_usertype
+
+static inline const char *find_variable(Context *ctx, const char *sym)
+{
+    return find_symbol(ctx, &ctx->variables, sym);
+} // find_variable
+
 static void destroy_symbolmap(Context *ctx, SymbolMap *map)
 {
     while (map->scope)
@@ -1010,6 +1042,7 @@ static void delete_scalar_or_array(Context *ctx, ScalarOrArray *soa)
 static Typedef *new_typedef(Context *ctx, int isconst, const char *datatype,
                             ScalarOrArray *soa)
 {
+    // we correct this datatype to the final string during semantic analysis.
     NEW_AST_NODE(retval, Typedef, AST_TYPEDEF);
     retval->isconst = isconst;
     retval->datatype = datatype;
@@ -1550,11 +1583,13 @@ static void delete_statement(Context *ctx, Statement *stmt)
     // don't free (stmt) here, the class-specific functions do it.
 } // delete_statement
 
-static int is_usertype(const Context *ctx, const char *token)
+static const char *get_usertype(const Context *ctx, const char *token)
 {
     const void *value;  // search all scopes.
-    return hash_find(ctx->usertypes.hash, token, &value);
-} // is_usertype
+    if (!hash_find(ctx->usertypes.hash, token, &value))
+        return NULL;
+    return (const char *) value;
+} // get_usertype
 
 
 // This is where the actual parsing happens. It's Lemon-generated!
@@ -2312,6 +2347,780 @@ static void print_ast(const int substmt, void *ast)
 } // print_ast
 
 
+/*
+ * datatype strings...
+ *
+ * "v" == void
+ * "b" == bool
+ * "i" == int
+ * "u" == uint
+ * "h" == half
+ * "f" == float
+ * "ns" == snorm float
+ * "nu" == unorm float
+ * "B{*}" == buffer
+ * "V{#,*}" == vector
+ * "M{#,#,*}" == matrix
+ * "d" == double
+ * "a{#,*}" == array
+ * "S" == string
+ * "X{*}" == struct
+ * "s1" == sampler1D
+ * "s2" == sampler2D
+ * "s3" == sampler3D
+ * "sc" == samplerCUBE
+ * "ss" == sampler_state
+ * "sS" == SamplerComparisonState
+ * "U{*}" == user type.
+ * "F{*,*}" == function
+ */
+
+
+static void require_numeric_datatype(Context *ctx, const char *datatype)
+{
+    if (datatype == ctx->str_f) return;  // float
+    if (datatype == ctx->str_i) return;  // int
+    if (datatype == ctx->str_b) return;  // bool
+    if (datatype == ctx->str_u) return;  // uint
+    if (datatype == ctx->str_h) return;  // half
+    if (datatype == ctx->str_d) return;  // double
+    fail(ctx, "Expected numeric type");  // !!! FIXME: fmt.
+    // !!! FIXME: replace AST node with an AST_OP_INT_LITERAL zero, keep going.
+} // require_numeric_datatype
+
+static void require_integer_datatype(Context *ctx, const char *datatype)
+{
+    if (datatype == ctx->str_i) return;  // int
+    if (datatype == ctx->str_u) return;  // uint
+    fail(ctx, "Expected integer type");  // !!! FIXME: fmt.
+    // !!! FIXME: replace AST node with an AST_OP_INT_LITERAL zero, keep going.
+} // require_integer_datatype
+
+static void require_boolean_datatype(Context *ctx, const char *datatype)
+{
+    if (datatype == ctx->str_b) return;  // bool
+    if (datatype == ctx->str_i) return;  // int
+    if (datatype == ctx->str_u) return;  // uint
+    fail(ctx, "Expected boolean type");  // !!! FIXME: fmt.
+    // !!! FIXME: replace AST node with an AST_OP_BOOLEAN_LITERAL false, keep going.
+} // require_numeric_datatype
+
+
+static void require_array_datatype(Context *ctx, const char *datatype)
+{
+    if (datatype[0] != 'a')
+        fail(ctx, "expected array");
+        // !!! FIXME: delete array dereference for further processing.
+} // require_array_datatype
+
+
+static void require_struct_datatype(Context *ctx, const char *datatype)
+{
+    if (datatype[0] != 'X')
+        fail(ctx, "expected struct");
+        // !!! FIXME: delete struct dereference for further processing.
+} // require_array_datatype
+
+
+static const char *require_function_datatype(Context *ctx, const char *datatype)
+{
+    if (datatype[0] != 'F')
+    {
+        fail(ctx, "expected function");
+        // !!! FIXME: delete function call for further processing.
+        return ctx->str_i;
+    } // if
+
+    assert(datatype[1] == '{');
+    datatype += 2;
+    const char *ptr = strchr(datatype, ',');
+    assert(ptr != NULL);
+    return stringcache_len(ctx->strcache, datatype, (unsigned int) (ptr-datatype));
+} // require_function_datatype
+
+
+// Extract the individual element type from an array datatype.
+static const char *array_element_datatype(Context *ctx, const char *datatype)
+{
+    const char *ptr;
+    const char *ptr2;
+    unsigned int depth = 1;
+    assert(datatype[0] == 'a');
+    assert(datatype[1] == '{');
+    ptr = strchr(datatype+2, ',');
+    assert(ptr != NULL);
+    ptr++;
+    for (ptr2 = ptr; depth > 0; ptr2++)
+    {
+        const char ch = *ptr2;
+        assert(ch != '\0');
+        if (ch == '{')
+            depth++;
+        else if (ch == '}')
+            depth--;
+    } // for
+
+    return stringcache_len(ctx->strcache, ptr, (unsigned int) (ptr2 - ptr));
+} // array_element_datatype
+
+
+// This tests two datatypes to see if they are compatible, and adds cast
+//  operator nodes to the AST if the program was relying on implicit
+//  casts between then. Will fail() if the datatypes can't be coerced
+//  with a cast at all. (left) can be NULL to say that its datatype is
+//  set in stone (an lvalue, for example). No other NULLs are allowed.
+// Returns final datatype used once implicit casting is complete.
+// The datatypes must be pointers from the string cache.
+static const char *add_type_coercion(Context *ctx,
+                                     Expression **left, const char *ldatatype,
+                                     Expression **right, const char *rdatatype)
+{
+    // !!! FIXME: this whole function is probably naive at best.
+
+    if (ldatatype == rdatatype)
+        return ldatatype;   // they already match, so we're done.
+
+    struct {
+        const char *datatype;
+        const int bits;
+        const int is_unsigned;
+        const int floating;
+    } typeinf[] = {
+        { ctx->str_b,  1, 1, 0 },
+        { ctx->str_h, 16, 0, 1 },
+        { ctx->str_i, 32, 0, 0 },
+        { ctx->str_u, 32, 1, 0 },
+        { ctx->str_f, 32, 0, 1 },
+        { ctx->str_d, 64, 0, 1 },
+    };
+
+    int l, r;
+    for (l = 0; l < STATICARRAYLEN(typeinf); l++)
+    {
+        if (typeinf[l].datatype == ldatatype)
+            break;
+    } // for
+    for (r = 0; r < STATICARRAYLEN(typeinf); r++)
+    {
+        if (typeinf[r].datatype == rdatatype)
+            break;
+    } // for
+
+    enum { CHOOSE_NEITHER, CHOOSE_LEFT, CHOOSE_RIGHT } choice = CHOOSE_NEITHER;
+    if ((l < STATICARRAYLEN(typeinf)) && (r < STATICARRAYLEN(typeinf)))
+    {
+        if (left == NULL)
+            choice = CHOOSE_LEFT;  // we need to force to the lvalue.
+        else if (typeinf[l].bits > typeinf[r].bits)
+            choice = CHOOSE_LEFT;
+        else if (typeinf[l].bits < typeinf[r].bits)
+            choice = CHOOSE_RIGHT;
+        else if (typeinf[l].floating && !typeinf[r].floating)
+            choice = CHOOSE_LEFT;
+        else if (!typeinf[l].floating && typeinf[r].floating)
+            choice = CHOOSE_RIGHT;
+        else if (typeinf[l].is_unsigned && !typeinf[r].is_unsigned)
+            choice = CHOOSE_LEFT;
+        else if (!typeinf[l].is_unsigned && typeinf[r].is_unsigned)
+            choice = CHOOSE_RIGHT;
+    } // if
+
+    if (choice == CHOOSE_LEFT)
+    {
+        *right = new_cast_expr(ctx, ldatatype, *right);
+        return ldatatype;
+    } // if
+    else if (choice == CHOOSE_RIGHT)
+    {
+        *left = new_cast_expr(ctx, rdatatype, *left);
+        return rdatatype;
+    } // else if
+
+    assert(choice == CHOOSE_NEITHER);
+    fail(ctx, "incompatible data types");
+    // Ditch original (*right), force a literal value that matches
+    //  ldatatype, so further processing is normalized.
+    // !!! FIXME: force (right) to match (left).
+    delete_expr(ctx, *right);
+    *right = new_cast_expr(ctx, ldatatype, new_literal_int_expr(ctx, 0));
+    return ldatatype;
+} // add_type_coercion
+
+
+// Go through the AST and make sure all datatypes check out okay. For datatypes
+//  that are compatible but are relying on an implicit cast, we add explicit
+//  casts to the AST here, so further processing doesn't have to worry about
+//  type coercion.
+// For things that are incompatible, we generate errors and
+//  then replace them with reasonable defaults so further processing can
+//  continue (but code generation will be skipped due to errors).
+// This means further processing can assume the AST is sane and not have to
+//  spend effort verifying it again.
+static const char *type_check_ast(Context *ctx, void *ast)
+{
+    if (!ast)
+        return NULL;
+
+    // upkeep so we report correct error locations...
+    ctx->sourcefile = ((ASTGeneric *) ast)->ast.filename;
+    ctx->sourceline = ((ASTGeneric *) ast)->ast.line;
+
+    switch ( ((ASTGeneric *) ast)->ast.type )
+    {
+        case AST_OP_POSTINCREMENT:
+        case AST_OP_POSTDECREMENT:
+        case AST_OP_PREINCREMENT:
+        case AST_OP_PREDECREMENT:
+        case AST_OP_COMPLEMENT:
+        case AST_OP_NEGATE:
+        {
+            ExpressionUnary *expr = (ExpressionUnary *) ast;
+            const char *datatype = type_check_ast(ctx, expr->operand);
+            require_numeric_datatype(ctx, datatype);
+            return datatype;
+        } // case
+
+        case AST_OP_NOT:
+        {
+            ExpressionUnary *expr = (ExpressionUnary *) ast;
+            const char *datatype = type_check_ast(ctx, expr->operand);
+            require_boolean_datatype(ctx, datatype);
+            return datatype;
+        } // case
+
+        case AST_OP_DEREF_ARRAY:
+        {
+            ExpressionBinary *expr = (ExpressionBinary *) ast;
+            const char *datatype = type_check_ast(ctx, expr->left);
+            const char *datatype2 = type_check_ast(ctx, expr->right);
+            require_array_datatype(ctx, datatype);
+            require_numeric_datatype(ctx, datatype2);
+            add_type_coercion(ctx, NULL, ctx->str_i, &expr->right, datatype2);
+            return array_element_datatype(ctx, datatype);
+        } // case
+
+        case AST_OP_DEREF_STRUCT:
+        {
+            ExpressionDerefStruct *expr = (ExpressionDerefStruct *) ast;
+            const char *datatype = type_check_ast(ctx, expr->identifier);
+            require_struct_datatype(ctx, datatype);
+// !!! FIXME: map member to datatype
+datatype = "!!! FIXME";
+            return datatype;
+        } // case
+
+        case AST_OP_COMMA:
+        {
+            // evaluate and throw away left, return right.
+            ExpressionBinary *expr = (ExpressionBinary *) ast;
+            type_check_ast(ctx, expr->left);
+            return type_check_ast(ctx, expr->right);
+        } // case
+
+        case AST_OP_MULTIPLY:
+        case AST_OP_DIVIDE:
+        case AST_OP_MODULO:
+        case AST_OP_ADD:
+        case AST_OP_SUBTRACT:
+        case AST_OP_LSHIFT:
+        case AST_OP_RSHIFT:
+        {
+            ExpressionBinary *expr = (ExpressionBinary *) ast;
+            const char *datatype = type_check_ast(ctx, expr->left);
+            const char *datatype2 = type_check_ast(ctx, expr->right);
+            require_numeric_datatype(ctx, datatype);
+            require_numeric_datatype(ctx, datatype2);
+            return add_type_coercion(ctx, &expr->left, datatype,
+                                     &expr->right, datatype2);
+        } // case
+
+        case AST_OP_LESSTHAN:
+        case AST_OP_GREATERTHAN:
+        case AST_OP_LESSTHANOREQUAL:
+        case AST_OP_GREATERTHANOREQUAL:
+        case AST_OP_NOTEQUAL:
+        case AST_OP_EQUAL:
+        {
+            ExpressionBinary *expr = (ExpressionBinary *) ast;
+            const char *datatype = type_check_ast(ctx, expr->left);
+            const char *datatype2 = type_check_ast(ctx, expr->right);
+            add_type_coercion(ctx, &expr->left, datatype,
+                              &expr->right, datatype2);
+            return ctx->str_b;
+        } // case
+
+        case AST_OP_BINARYAND:
+        case AST_OP_BINARYXOR:
+        case AST_OP_BINARYOR:
+        {
+            ExpressionBinary *expr = (ExpressionBinary *) ast;
+            const char *datatype = type_check_ast(ctx, expr->left);
+            const char *datatype2 = type_check_ast(ctx, expr->right);
+            require_integer_datatype(ctx, datatype);
+            require_integer_datatype(ctx, datatype2);
+            return add_type_coercion(ctx, &expr->left, datatype,
+                                     &expr->right, datatype2);
+        } // case
+
+        case AST_OP_LOGICALAND:
+        case AST_OP_LOGICALOR:
+        {
+            ExpressionBinary *expr = (ExpressionBinary *) ast;
+            const char *datatype = type_check_ast(ctx, expr->left);
+            const char *datatype2 = type_check_ast(ctx, expr->right);
+            require_boolean_datatype(ctx, datatype);
+            require_boolean_datatype(ctx, datatype2);
+            add_type_coercion(ctx, &expr->left, datatype,
+                              &expr->right, datatype2);
+            return ctx->str_b;
+        } // case
+
+        case AST_OP_ASSIGN:
+        case AST_OP_MULASSIGN:
+        case AST_OP_DIVASSIGN:
+        case AST_OP_MODASSIGN:
+        case AST_OP_ADDASSIGN:
+        case AST_OP_SUBASSIGN:
+        case AST_OP_LSHIFTASSIGN:
+        case AST_OP_RSHIFTASSIGN:
+        case AST_OP_ANDASSIGN:
+        case AST_OP_XORASSIGN:
+        case AST_OP_ORASSIGN:
+        {
+            ExpressionBinary *expr = (ExpressionBinary *) ast;
+            const char *datatype = type_check_ast(ctx, expr->left);
+            const char *datatype2 = type_check_ast(ctx, expr->right);
+            add_type_coercion(ctx, NULL, datatype, &expr->right, datatype2);
+            return datatype;
+        } // case
+
+        case AST_OP_CONDITIONAL:
+        {
+            ExpressionTernary *tern = (ExpressionTernary *) ast;
+            const char *datatype = type_check_ast(ctx, tern->left);
+            const char *datatype2 = type_check_ast(ctx, tern->center);
+            const char *datatype3 = type_check_ast(ctx, tern->right);
+            require_boolean_datatype(ctx, datatype);
+            return add_type_coercion(ctx, &tern->center, datatype2,
+                                     &tern->right, datatype3);
+        } // case
+
+        case AST_OP_IDENTIFIER:
+        {
+            ExpressionIdentifier *expr = (ExpressionIdentifier *) ast;
+            const char *datatype = find_variable(ctx, expr->identifier);
+            if (datatype == NULL)
+            {
+                fail(ctx, "Unknown identifier");
+                // !!! FIXME: replace with a sane default, move on.
+                datatype = ctx->str_i;
+            } // if
+            return datatype;
+        } // case
+
+        case AST_OP_INT_LITERAL:
+            return ctx->str_i;
+
+        case AST_OP_FLOAT_LITERAL:
+            return ctx->str_f;
+
+        case AST_OP_STRING_LITERAL:
+            return ctx->str_S;
+
+        case AST_OP_BOOLEAN_LITERAL:
+            return ctx->str_b;
+
+        case AST_ARGUMENTS:
+        {
+            Arguments *arguments = (Arguments *) ast;
+            const char *datatype = type_check_ast(ctx, arguments->argument);
+            if (arguments->next != NULL)
+            {
+                const char *datatype2 = type_check_ast(ctx, arguments->next);
+                datatype = stringcache_fmt(ctx->strcache, "%s%s",
+                                           datatype, datatype2);
+            } // if
+            return datatype;
+        } // case
+
+        case AST_OP_CALLFUNC:
+        {
+            ExpressionCallFunction *expr = (ExpressionCallFunction *) ast;
+            const char *datatype = type_check_ast(ctx, expr->identifier);
+            /*const char *datatype2 =*/ type_check_ast(ctx, expr->args);
+            const char *retval = require_function_datatype(ctx, datatype);
+// !!! FIXME: test each arg against function datatype.
+            return retval;  // this is the datatype of the func's return value.
+        } // case
+
+        case AST_OP_CONSTRUCTOR:
+        {
+            ExpressionConstructor *expr = (ExpressionConstructor *) ast;
+// !!! FIXME: test each arg against constructor datatype.
+            type_check_ast(ctx, expr->args);
+            return expr->datatype;
+        } // case
+
+        case AST_OP_CAST:
+        {
+            ExpressionCast *expr = (ExpressionCast *) ast;
+            const char *datatype = expr->datatype;
+            const char *datatype2 = type_check_ast(ctx, expr->operand);
+            // you still need type coercion, since you could do a wrong cast,
+            //  like "int x = (short) mychar;"
+            add_type_coercion(ctx, NULL, datatype, &expr->operand, datatype2);
+            return datatype;
+        } // case
+
+        case AST_STATEMENT_BREAK:
+        case AST_STATEMENT_CONTINUE:
+        case AST_STATEMENT_DISCARD:
+        case AST_STATEMENT_EMPTY:
+        {
+            type_check_ast(ctx, ((Statement *) ast)->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_EXPRESSION:
+        {
+            ExpressionStatement *stmt = (ExpressionStatement *) ast;
+            // !!! FIXME: warn about expressions without a side-effect here?
+            type_check_ast(ctx, stmt->expr);  // !!! FIXME: This is named badly...
+            type_check_ast(ctx, stmt->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_IF:
+        {
+            IfStatement *stmt = (IfStatement *) ast;
+            push_scope(ctx);  // new scope for "if ((int x = blah()) != 0)"
+            type_check_ast(ctx, stmt->expr);
+            type_check_ast(ctx, stmt->statement);
+            pop_scope(ctx);
+            type_check_ast(ctx, stmt->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_TYPEDEF:
+        {
+            TypedefStatement *stmt = (TypedefStatement *) ast;
+            type_check_ast(ctx, stmt->type_info);
+            type_check_ast(ctx, stmt->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_SWITCH:
+        {
+            SwitchStatement *stmt = (SwitchStatement *) ast;
+            SwitchCases *cases = stmt->cases;
+            const char *datatype = type_check_ast(ctx, stmt->expr);
+            while (cases)
+            {
+                const char *datatype2 = type_check_ast(ctx, cases->expr);
+                add_type_coercion(ctx, NULL, datatype,
+                                  &cases->expr, datatype2);
+                type_check_ast(ctx, cases->statement);
+                cases = cases->next;
+            } // while
+            return NULL;
+        } // case
+
+        case AST_SWITCH_CASE:
+        {
+            assert(0 && "Should have been handled by AST_STATEMENT_SWITCH.");
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_STRUCT:
+        {
+            StructStatement *stmt = (StructStatement *) ast;
+            type_check_ast(ctx, stmt->struct_info);
+            type_check_ast(ctx, stmt->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_VARDECL:
+        {
+            VarDeclStatement *stmt = (VarDeclStatement *) ast;
+            type_check_ast(ctx, stmt->declaration);
+            type_check_ast(ctx, stmt->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_BLOCK:
+        {
+            BlockStatement *bs = (BlockStatement *) ast;
+            push_scope(ctx);  // new vars declared here live until '}'.
+            type_check_ast(ctx, bs->statements);
+            pop_scope(ctx);
+            type_check_ast(ctx, bs->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_FOR:
+        {
+            ForStatement *fs = (ForStatement *) ast;
+            push_scope(ctx);  // new scope for "for (int x = 0; ...)"
+            type_check_ast(ctx, fs->var_decl);
+            type_check_ast(ctx, fs->initializer);
+            type_check_ast(ctx, fs->looptest);
+            type_check_ast(ctx, fs->counter);
+            type_check_ast(ctx, fs->statement);
+            pop_scope(ctx);
+            type_check_ast(ctx, fs->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_DO:
+        {
+            DoStatement *ds = (DoStatement *) ast;
+            type_check_ast(ctx, ds->statement);
+            push_scope(ctx);  // new scope for "while ((int x = blah()) != 0)"
+            type_check_ast(ctx, ds->expr);
+            pop_scope(ctx);
+            type_check_ast(ctx, ds->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_WHILE:
+        {
+            WhileStatement *ws = (WhileStatement *) ast;
+            push_scope(ctx);  // new scope for "while ((int x = blah()) != 0)"
+            type_check_ast(ctx, ws->expr);
+            type_check_ast(ctx, ws->statement);
+            pop_scope(ctx);
+            type_check_ast(ctx, ws->next);
+            return NULL;
+        } // case
+
+        case AST_STATEMENT_RETURN:
+        {
+            ReturnStatement *stmt = (ReturnStatement *) ast;
+            type_check_ast(ctx, stmt->expr);
+            type_check_ast(ctx, stmt->next);
+            return NULL;
+        } // case
+
+        case AST_COMPUNIT_FUNCTION:
+        {
+            CompilationUnitFunction *unit = (CompilationUnitFunction *) ast;
+            const char *sig = get_usertype(ctx, unit->declaration->identifier);
+            if (sig == NULL)
+            {
+                // add function declaration if we've not seen it.
+                sig = unit->declaration->datatype;
+                push_usertype(ctx, unit->declaration->identifier, sig);
+            } // if
+
+            // declarations can be done multiple times if they match.
+            else if (sig != unit->declaration->datatype)
+            {
+                fail(ctx, "function sigs don't match");
+            } // else
+
+            push_scope(ctx);  // so function params are in function scope.
+            type_check_ast(ctx, unit->declaration);
+            if (unit->definition == NULL)
+                pop_scope(ctx);
+            else
+            {
+                type_check_ast(ctx, unit->definition);
+                pop_scope(ctx);
+                push_variable(ctx, unit->declaration->identifier, sig);
+            } // else
+
+            type_check_ast(ctx, unit->next);
+            return NULL;
+        } // case
+
+        case AST_COMPUNIT_TYPEDEF:
+        {
+            CompilationUnitTypedef *unit = (CompilationUnitTypedef *) ast;
+            type_check_ast(ctx, unit->type_info);
+            type_check_ast(ctx, unit->next);
+            return NULL;
+        } // case
+
+        case AST_COMPUNIT_STRUCT:
+        {
+            CompilationUnitStruct *unit = (CompilationUnitStruct *) ast;
+            type_check_ast(ctx, unit->struct_info);
+            type_check_ast(ctx, unit->next);
+            return NULL;
+        } // case
+
+        case AST_COMPUNIT_VARIABLE:
+        {
+            CompilationUnitVariable *unit = (CompilationUnitVariable *) ast;
+            type_check_ast(ctx, unit->declaration);
+            type_check_ast(ctx, unit->next);
+            return NULL;
+        } // case
+
+        case AST_SCALAR_OR_ARRAY:
+        {
+            ScalarOrArray *soa = (ScalarOrArray *) ast;
+            const char *datatype = type_check_ast(ctx, soa->dimension);
+            require_integer_datatype(ctx, datatype);
+            assert(0); // !!! FIXME: figure out datatype of identifier.
+            return NULL;
+        } // case
+
+        case AST_TYPEDEF:
+        {
+            ScalarOrArray *soa = ((Typedef *) ast)->details;
+            const char *datatype = get_usertype(ctx, soa->identifier);
+            if (datatype != NULL)
+            {
+                fail(ctx, "typedef already defined");
+                return datatype;
+            } // if
+
+            datatype = ((Typedef *) ast)->datatype;
+
+            // don't walk into AST_SCALAR_OR_ARRAY here, since it can't resolve the identifier.
+            // !!! FIXME: SCALAR_OR_ARRAY is sort of a mess.
+            // !!! FIXME: this part is cut and paste.
+            assert( (soa->isarray && soa->dimension) ||
+                    (!soa->isarray && !soa->dimension) );
+
+            if (soa->isarray)
+            {
+                if (soa->dimension->ast.type != AST_OP_INT_LITERAL)
+                {
+                    fail(ctx, "Expected integer");
+                    delete_expr(ctx, soa->dimension);  // make sane.
+                    soa->dimension = new_literal_int_expr(ctx, 1);
+                } // if
+
+                int64 dim = ((ExpressionIntLiteral *) soa->dimension)->value;
+                datatype = stringcache_fmt(ctx->strcache, "a{%lld,%s}",
+                                           (long long) dim, datatype);
+            } // if
+
+            ((Typedef *) ast)->datatype = datatype;  // make sane.
+            push_usertype(ctx, soa->identifier, datatype);
+            return datatype;
+        } // case
+
+        case AST_FUNCTION_PARAMS:
+        {
+            FunctionParameters *params = (FunctionParameters *) ast;
+            push_variable(ctx, params->identifier, params->datatype);
+            type_check_ast(ctx, params->initializer);
+            type_check_ast(ctx, params->next);
+            return NULL;
+        } // case
+
+        case AST_FUNCTION_SIGNATURE:
+        {
+            FunctionSignature *sig = (FunctionSignature *) ast;
+            type_check_ast(ctx, sig->params);
+            return sig->datatype;
+        } // case
+
+        case AST_STRUCT_DECLARATION:
+        {
+            StructDeclaration *decl = (StructDeclaration *) ast;
+            const char *datatype = type_check_ast(ctx, decl->members);
+            datatype = stringcache_fmt(ctx->strcache, "X{%s}", datatype);
+            push_usertype(ctx, decl->name, datatype);
+            return stringcache_fmt(ctx->strcache, "U{%s}", decl->name);
+        } // case
+
+        case AST_STRUCT_MEMBER:
+        {
+            StructMembers *members = (StructMembers *) ast;
+            const char *dtype = type_check_ast(ctx, members->details);
+            const char *dtype2 = type_check_ast(ctx, members->next);
+            if (dtype2)
+                return stringcache_fmt(ctx->strcache, "%s%s", dtype, dtype2);
+            return dtype;
+        } // case
+
+        case AST_VARIABLE_DECLARATION:
+        {
+            VariableDeclaration *decl = (VariableDeclaration *) ast;
+            ScalarOrArray *soa = decl->details;
+            const char *datatype;
+            const char *datatype2;
+
+            // this is true now, but we'll fill in ->datatype no matter what.
+            assert( (decl->datatype && !decl->anonymous_datatype) ||
+                    (!decl->datatype && decl->anonymous_datatype) );
+
+            // fix up if necessary.
+            if (decl->anonymous_datatype != NULL)
+                decl->datatype = type_check_ast(ctx, decl->anonymous_datatype);
+            datatype = decl->datatype;
+
+            // don't walk into AST_SCALAR_OR_ARRAY here, since it can't resolve the identifier.
+            // !!! FIXME: SCALAR_OR_ARRAY is sort of a mess.
+            // !!! FIXME: this part is cut and paste.
+            assert( (soa->isarray && soa->dimension) ||
+                    (!soa->isarray && !soa->dimension) );
+
+            if (soa->isarray)
+            {
+                if (soa->dimension->ast.type != AST_OP_INT_LITERAL)
+                {
+                    fail(ctx, "Expected integer");
+                    delete_expr(ctx, soa->dimension);  // make sane.
+                    soa->dimension = new_literal_int_expr(ctx, 1);
+                } // if
+
+                int64 dim = ((ExpressionIntLiteral *) soa->dimension)->value;
+                datatype = stringcache_fmt(ctx->strcache, "a{%lld,%s}",
+                                           (long long) dim, datatype);
+            } // if
+
+            decl->datatype = datatype;  // make sane.
+            push_variable(ctx, soa->identifier, datatype);
+            datatype2 = type_check_ast(ctx, decl->initializer);
+            add_type_coercion(ctx, NULL, datatype, &decl->initializer, datatype2);
+
+            type_check_ast(ctx, decl->annotations);
+            type_check_ast(ctx, decl->lowlevel);
+
+            datatype2 = type_check_ast(ctx, decl->next);
+            assert(datatype == datatype2);
+            return datatype;
+        } // case
+
+        case AST_ANNOTATION:
+        {
+            Annotations *anno = (Annotations *) ast;
+            while (anno)
+            {
+                type_check_ast(ctx, anno->initializer);
+                anno = anno->next;
+            } // while
+            return NULL;
+        } // case
+
+        case AST_PACK_OFFSET:
+        case AST_VARIABLE_LOWLEVEL:
+            return NULL;  // no-op (for now, at least).
+
+        default:
+            assert(0 && "unexpected type");
+    } // switch
+
+    return NULL;
+} // type_check_ast
+
+
+
+static inline void semantic_analysis(Context *ctx)
+{
+    type_check_ast(ctx, ctx->ast);
+    // !!! FIXME: build an IR here.
+    delete_compilation_unit(ctx, ctx->ast);  // done with the AST, nuke it.
+    ctx->ast = NULL;
+    // !!! FIXME: do everything else.  :)
+} // semantic_analysis
+
+
 static inline int64 strtoi64(const char *str, unsigned int len)
 {
     int64 retval = 0;
@@ -2522,12 +3331,11 @@ static int convert_to_lemon_token(Context *ctx, const char *token,
             if (tokencmp("noExpressionOptimizations")) return TOKEN_HLSL_NOEXPRESSIONOPTIMIZATIONS;
             if (tokencmp("unused")) return TOKEN_HLSL_UNUSED;
             if (tokencmp("xps")) return TOKEN_HLSL_XPS;
-
             #undef tokencmp
 
             // get a canonical copy of the string now, as we'll need it.
             token = stringcache_len(ctx->strcache, token, tokenlen);
-            if (is_usertype(ctx, token))
+            if (get_usertype(ctx, token) != NULL)
                 return TOKEN_HLSL_USERTYPE;
             return TOKEN_HLSL_IDENTIFIER;
 
@@ -2583,6 +3391,17 @@ static Context *build_context(MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
     ctx->str_h = stringcache(ctx->strcache, "h");
     ctx->str_d = stringcache(ctx->strcache, "d");
     ctx->str_s = stringcache(ctx->strcache, "s");
+    ctx->str_S = stringcache(ctx->strcache, "S");
+    ctx->str_s1 = stringcache(ctx->strcache, "s1");
+    ctx->str_s2 = stringcache(ctx->strcache, "s2");
+    ctx->str_s3 = stringcache(ctx->strcache, "s3");
+    ctx->str_sc = stringcache(ctx->strcache, "sc");
+    ctx->str_ss = stringcache(ctx->strcache, "ss");
+    ctx->str_sS = stringcache(ctx->strcache, "sS");
+    ctx->str_Fs = stringcache(ctx->strcache, "Fs");
+    ctx->str_Fu = stringcache(ctx->strcache, "Fu");
+    ctx->str_ns = stringcache(ctx->strcache, "ns");
+    ctx->str_nu = stringcache(ctx->strcache, "nu");
 
     return ctx;
 } // build_context
@@ -2751,6 +3570,8 @@ void MOJOSHADER_compile(const char *filename,
 
     print_ast(0, ctx->ast);
 
+    semantic_analysis(ctx);
+
     destroy_context(ctx);
 
     // !!! FIXME: report success/error.
diff --git a/mojoshader_parser_hlsl.lemon b/mojoshader_parser_hlsl.lemon
index ff66e0b7..5e23b848 100644
--- a/mojoshader_parser_hlsl.lemon
+++ b/mojoshader_parser_hlsl.lemon
@@ -311,26 +311,26 @@ datatype(A) ::= intrinsic_datatype(B). { A = B; }
 datatype(A) ::= USERTYPE(B). { A = B.string; }
 
 %type datatype_sampler { const char * }
-datatype_sampler(A) ::= SAMPLER. { A = stringcache_fmt(ctx->strcache, "s1"); }
-datatype_sampler(A) ::= SAMPLER1D. { A = stringcache_fmt(ctx->strcache, "s1"); }
-datatype_sampler(A) ::= SAMPLER2D. { A = stringcache_fmt(ctx->strcache, "s2"); }
-datatype_sampler(A) ::= SAMPLER3D. { A = stringcache_fmt(ctx->strcache, "s3"); }
-datatype_sampler(A) ::= SAMPLERCUBE. { A = stringcache_fmt(ctx->strcache, "sc"); }
-datatype_sampler(A) ::= SAMPLER_STATE. { A = stringcache_fmt(ctx->strcache, "ss"); }
-datatype_sampler(A) ::= SAMPLERSTATE. { A = stringcache_fmt(ctx->strcache, "ss"); }
-datatype_sampler(A) ::= SAMPLERCOMPARISONSTATE. { A = stringcache_fmt(ctx->strcache, "sS"); }
+datatype_sampler(A) ::= SAMPLER. { A = ctx->str_s1; }
+datatype_sampler(A) ::= SAMPLER1D. { A = ctx->str_s1; }
+datatype_sampler(A) ::= SAMPLER2D. { A = ctx->str_s2; }
+datatype_sampler(A) ::= SAMPLER3D. { A = ctx->str_s3; }
+datatype_sampler(A) ::= SAMPLERCUBE. { A = ctx->str_sc; }
+datatype_sampler(A) ::= SAMPLER_STATE. { A = ctx->str_ss; }
+datatype_sampler(A) ::= SAMPLERSTATE. { A = ctx->str_ss; }
+datatype_sampler(A) ::= SAMPLERCOMPARISONSTATE. { A = ctx->str_sS; }
 
 %type datatype_scalar { const char * }
-datatype_scalar(A) ::= BOOL. { A = stringcache_fmt(ctx->strcache, "b"); }
-datatype_scalar(A) ::= INT. { A = stringcache_fmt(ctx->strcache, "i"); }
-datatype_scalar(A) ::= UINT. { A = stringcache_fmt(ctx->strcache, "u"); }
-datatype_scalar(A) ::= HALF. { A = stringcache_fmt(ctx->strcache, "h"); }
-datatype_scalar(A) ::= FLOAT. { A = stringcache_fmt(ctx->strcache, "f"); }
-datatype_scalar(A) ::= DOUBLE. { A = stringcache_fmt(ctx->strcache, "d"); }
-datatype_scalar(A) ::= STRING. { A = stringcache_fmt(ctx->strcache, "S"); } // this is for the effects framework, not HLSL.
-datatype_scalar(A) ::= SNORM FLOAT. { A = stringcache_fmt(ctx->strcache, "Fs"); }
-datatype_scalar(A) ::= UNORM FLOAT. { A = stringcache_fmt(ctx->strcache, "Fu"); }
-datatype_scalar(A) ::= BUFFER LT datatype_scalar(B) GT. { A = stringcache_fmt(ctx->strcache, "B%s", B); }
+datatype_scalar(A) ::= BOOL. { A = ctx->str_b; }
+datatype_scalar(A) ::= INT. { A = ctx->str_i; }
+datatype_scalar(A) ::= UINT. { A = ctx->str_u; }
+datatype_scalar(A) ::= HALF. { A = ctx->str_h; }
+datatype_scalar(A) ::= FLOAT. { A = ctx->str_f; }
+datatype_scalar(A) ::= DOUBLE. { A = ctx->str_d; }
+datatype_scalar(A) ::= STRING. { A = ctx->str_S; } // this is for the effects framework, not HLSL.
+datatype_scalar(A) ::= SNORM FLOAT. { A = ctx->str_ns; }
+datatype_scalar(A) ::= UNORM FLOAT. { A = ctx->str_nu; }
+datatype_scalar(A) ::= BUFFER LT datatype_scalar(B) GT. { A = stringcache_fmt(ctx->strcache, "B{%s}", B); }
 
 // !!! FIXME: MSDN suggests that the matrix ones are just typedefs inserted
 // !!! FIXME:  before parsing begins, like:
@@ -338,10 +338,10 @@ datatype_scalar(A) ::= BUFFER LT datatype_scalar(B) GT. { A = stringcache_fmt(ct
 // !!! FIXME:  ...maybe we can rip these out of the grammar and just create
 // !!! FIXME:  them at startup?
 %type datatype_vector { const char * }
-datatype_vector(A) ::= VECTOR LT datatype_scalar(B) COMMA INT_CONSTANT(C) GT. { A = stringcache_fmt(ctx->strcache, "v%d%s", (int) C.i64, B); }
+datatype_vector(A) ::= VECTOR LT datatype_scalar(B) COMMA INT_CONSTANT(C) GT. { A = stringcache_fmt(ctx->strcache, "V{%d,%s}", (int) C.i64, B); }
 
 %type datatype_matrix { const char * }
-datatype_matrix(A) ::= MATRIX LT datatype_scalar(B) COMMA INT_CONSTANT(C) COMMA INT_CONSTANT(D) GT. { A = stringcache_fmt(ctx->strcache, "m%d%d%s", (int) C.i64, (int) D.i64, B); }
+datatype_matrix(A) ::= MATRIX LT datatype_scalar(B) COMMA INT_CONSTANT(C) COMMA INT_CONSTANT(D) GT. { A = stringcache_fmt(ctx->strcache, "M{%d,%d,%s}", (int) C.i64, (int) D.i64, B); }
 
 %type statement_block { Statement * }
 %destructor statement_block { delete_statement(ctx, $$); }