Initial work on assembler. Not even close to done.
authorRyan C. Gordon <icculus@icculus.org>
Thu, 04 Dec 2008 21:36:11 -0500
changeset 465 0a75f98f785b
parent 464 eba4cf79437f
child 466 c2aa844013f1
Initial work on assembler. Not even close to done.
CMakeLists.txt
mojoshader.c
mojoshader.h
mojoshader_assembler.c
mojoshader_internal.h
mojoshader_opengl.c
--- a/CMakeLists.txt	Tue Dec 02 06:24:35 2008 -0500
+++ b/CMakeLists.txt	Thu Dec 04 21:36:11 2008 -0500
@@ -40,7 +40,11 @@
     ADD_DEFINITIONS(-TP)  # force .c files to compile as C++.
 ENDIF(MSVC)
 
-ADD_LIBRARY(mojoshader STATIC mojoshader.c mojoshader_opengl.c)
+ADD_LIBRARY(mojoshader STATIC
+    mojoshader.c
+    mojoshader_assembler.c
+    mojoshader_opengl.c
+)
 
 FIND_PACKAGE(SDL)
 IF(SDL_FOUND)
--- a/mojoshader.c	Tue Dec 02 06:24:35 2008 -0500
+++ b/mojoshader.c	Thu Dec 04 21:36:11 2008 -0500
@@ -14,42 +14,6 @@
 #define __MOJOSHADER_INTERNAL__ 1
 #include "mojoshader_internal.h"
 
-// we need to reference this by explicit value occasionally.
-#define OPCODE_RET 28
-
-typedef enum
-{
-    REG_TYPE_TEMP = 0,
-    REG_TYPE_INPUT = 1,
-    REG_TYPE_CONST = 2,
-    REG_TYPE_ADDRESS = 3,
-    REG_TYPE_TEXTURE = 3,  // ALSO 3!
-    REG_TYPE_RASTOUT = 4,
-    REG_TYPE_ATTROUT = 5,
-    REG_TYPE_TEXCRDOUT = 6,
-    REG_TYPE_OUTPUT = 6,  // ALSO 6!
-    REG_TYPE_CONSTINT = 7,
-    REG_TYPE_COLOROUT = 8,
-    REG_TYPE_DEPTHOUT = 9,
-    REG_TYPE_SAMPLER = 10,
-    REG_TYPE_CONST2 = 11,
-    REG_TYPE_CONST3 = 12,
-    REG_TYPE_CONST4 = 13,
-    REG_TYPE_CONSTBOOL = 14,
-    REG_TYPE_LOOP = 15,
-    REG_TYPE_TEMPFLOAT16 = 16,
-    REG_TYPE_MISCTYPE = 17,
-    REG_TYPE_LABEL = 18,
-    REG_TYPE_PREDICATE = 19,
-    REG_TYPE_MAX = 19
-} RegisterType;
-
-typedef enum
-{
-    TEXTURE_TYPE_2D = 2,
-    TEXTURE_TYPE_CUBE = 3,
-    TEXTURE_TYPE_VOLUME = 4,
-} TextureType;
 
 // predeclare.
 typedef struct Context Context;
@@ -122,21 +86,6 @@
     const_array_varname_function get_const_array_varname;
 } Profile;
 
-typedef enum
-{
-    RASTOUT_TYPE_POSITION = 0,
-    RASTOUT_TYPE_FOG = 1,
-    RASTOUT_TYPE_POINT_SIZE = 2,
-    RASTOUT_TYPE_MAX = 2
-} RastOutType;
-
-typedef enum
-{
-    MISCTYPE_TYPE_POSITION = 0,
-    MISCTYPE_TYPE_FACE = 1,
-    MISCTYPE_TYPE_MAX = 1
-} MiscTypeType;
-
 
 // A simple linked list of strings, so we can build the final output without
 //  realloc()ing for each new line, and easily insert lines into the middle
@@ -181,68 +130,6 @@
     struct RegisterList *next;
 } RegisterList;
 
-// result modifiers.
-// !!! FIXME: why isn't this an enum?
-#define MOD_SATURATE 0x01
-#define MOD_PP 0x02
-#define MOD_CENTROID 0x04
-
-// source modifiers.
-typedef enum
-{
-    SRCMOD_NONE,
-    SRCMOD_NEGATE,
-    SRCMOD_BIAS,
-    SRCMOD_BIASNEGATE,
-    SRCMOD_SIGN,
-    SRCMOD_SIGNNEGATE,
-    SRCMOD_COMPLEMENT,
-    SRCMOD_X2,
-    SRCMOD_X2NEGATE,
-    SRCMOD_DZ,
-    SRCMOD_DW,
-    SRCMOD_ABS,
-    SRCMOD_ABSNEGATE,
-    SRCMOD_NOT,
-    SRCMOD_TOTAL
-} SourceMod;
-
-
-typedef struct
-{
-    const uint32 *token;   // this is the unmolested token in the stream.
-    int regnum;
-    int relative;
-    int writemask;   // xyzw or rgba (all four, not split out).
-    int writemask0;  // x or red
-    int writemask1;  // y or green
-    int writemask2;  // z or blue
-    int writemask3;  // w or alpha
-    int orig_writemask;   // writemask before mojoshader tweaks it.
-    int result_mod;
-    int result_shift;
-    RegisterType regtype;
-} DestArgInfo;
-
-typedef struct
-{
-    const uint32 *token;   // this is the unmolested token in the stream.
-    int regnum;
-    int swizzle;  // xyzw (all four, not split out).
-    int swizzle_x;
-    int swizzle_y;
-    int swizzle_z;
-    int swizzle_w;
-    SourceMod src_mod;
-    RegisterType regtype;
-    int relative;
-    RegisterType relative_regtype;
-    int relative_regnum;
-    int relative_component;
-    const VariableList *relative_array;
-} SourceArgInfo;
-
-
 
 #define SCRATCH_BUFFER_SIZE 128
 #define SCRATCH_BUFFERS 32
@@ -738,28 +625,6 @@
 } // replicate_swizzle
 
 
-static inline int scalar_register(const RegisterType regtype, const int regnum)
-{
-    switch (regtype)
-    {
-        case REG_TYPE_DEPTHOUT:
-        case REG_TYPE_CONSTBOOL:
-        case REG_TYPE_PREDICATE:
-        case REG_TYPE_LOOP:
-            return 1;
-
-        case REG_TYPE_MISCTYPE:
-            if ( ((const MiscTypeType) regnum) == MISCTYPE_TYPE_FACE )
-                return 1;
-            return 0;
-
-        default: break;
-    } // switch
-
-    return 0;
-} // scalar_register
-
-
 static inline int no_swizzle(const int swizzle)
 {
     return (swizzle == 0xE4);  // 0xE4 == 11100100 ... 0 1 2 3. No swizzle.
@@ -6365,125 +6230,23 @@
 //  of the instruction token.
 static const Instruction instructions[] =
 {
-    // INSTRUCTION_STATE means this opcode has to update the state machine
-    //  (we're entering an ELSE block, etc). INSTRUCTION means there's no
-    //  state, just go straight to the emitters.
-    #define INSTRUCTION_STATE(op, slots, argsseq, t) { \
-        #op, slots, t, parse_args_##argsseq, state_##op, PROFILE_EMITTERS(op) \
-    }
-    #define INSTRUCTION(op, slots, argsseq, t) { \
-        #op, slots, t, parse_args_##argsseq, 0, PROFILE_EMITTERS(op) \
-    }
-
-    // !!! FIXME: Some of these MOJOSHADER_TYPE_ANYs need to have their scope
-    // !!! FIXME:  reduced to just PIXEL or VERTEX.
-
-    INSTRUCTION(NOP, 1, NULL, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(MOV, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(ADD, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(SUB, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(MAD, 1, DSSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(MUL, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(RCP, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(RSQ, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(DP3, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(DP4, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(MIN, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(MAX, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(SLT, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(SGE, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(EXP, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(LOG, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(LIT, 3, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(DST, 1, DSS, MOJOSHADER_TYPE_VERTEX),
-    INSTRUCTION(LRP, 2, DSSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(FRC, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(M4X4, 4, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(M4X3, 3, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(M3X4, 4, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(M3X3, 3, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(M3X2, 2, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(CALL, 2, S, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(CALLNZ, 3, SS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(LOOP, 3, SS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(RET, 1, NULL, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(ENDLOOP, 2, NULL, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(LABEL, 0, S, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(DCL, 0, DCL, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(POW, 3, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(CRS, 2, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(SGN, 3, DSSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(ABS, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(NRM, 3, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(SINCOS, 8, SINCOS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(REP, 3, S, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(ENDREP, 2, NULL, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(IF, 3, S, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(IFC, 3, SS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(ELSE, 1, NULL, MOJOSHADER_TYPE_ANY),  // !!! FIXME: state!
-    INSTRUCTION(ENDIF, 1, NULL, MOJOSHADER_TYPE_ANY), // !!! FIXME: state!
-    INSTRUCTION_STATE(BREAK, 1, NULL, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(BREAKC, 3, SS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(MOVA, 1, DS, MOJOSHADER_TYPE_VERTEX),
-    INSTRUCTION_STATE(DEFB, 0, DEFB, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(DEFI, 0, DEF, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION_STATE(TEXCRD, 1, TEXCRD, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION_STATE(TEXKILL, 2, D, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION_STATE(TEXLD, 1, TEXLD, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXBEM, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXBEML, 2, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXREG2AR, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXREG2GB, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXM3X2PAD, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXM3X2TEX, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXM3X3PAD, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXM3X3TEX, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
-    INSTRUCTION(TEXM3X3SPEC, 1, DSS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXM3X3VSPEC, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(EXPP, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(LOGP, 1, DS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(CND, 1, DSSS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION_STATE(DEF, 0, DEF, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(TEXREG2RGB, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXDP3TEX, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXM3X2DEPTH, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXDP3, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXM3X3, 1, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXDEPTH, 1, D, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION_STATE(CMP, 1, DSSS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(BEM, 2, DSS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION_STATE(DP2ADD, 2, DSSS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(DSX, 2, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(DSY, 2, DS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION(TEXLDD, 3, DSSSS, MOJOSHADER_TYPE_PIXEL),
-    INSTRUCTION_STATE(SETP, 1, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(TEXLDL, 2, DSS, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION_STATE(BREAKP, 3, S, MOJOSHADER_TYPE_ANY),
-
-    // !!! FIXME: TEXLDB?
+    #define INSTRUCTION_STATE(op, opstr, slots, a, t) { \
+        opstr, slots, t, parse_args_##a, state_##op, PROFILE_EMITTERS(op) \
+    },
+
+    #define INSTRUCTION(op, opstr, slots, a, t) { \
+        opstr, slots, t, parse_args_##a, 0, PROFILE_EMITTERS(op) \
+    },
+
+    #define MOJOSHADER_DO_INSTRUCTION_TABLE 1
+    #include "mojoshader_internal.h"
+    #undef MOJOSHADER_DO_INSTRUCTION_TABLE
 
     #undef INSTRUCTION
     #undef INSTRUCTION_STATE
 };
 
 
-
 // parse various token types...
 
 static int parse_instruction_token(Context *ctx)
@@ -6506,6 +6269,8 @@
 
     if ((token & 0x80000000) != 0)
         return fail(ctx, "instruction token high bit must be zero.");  // so says msdn.
+    else if (instruction->opcode_string == NULL)
+        return fail(ctx, "Unknown opcode.");
 
     if (coissue)
     {
@@ -6743,6 +6508,7 @@
 
 static int parse_phase_token(Context *ctx)
 {
+    // !!! FIXME: needs state; allow only one phase token per shader, I think?
     if (SWAP32(*(ctx->tokens)) != 0x0000FFFD) // phase token always 0x0000FFFD.
         return 0;  // not us, eat no tokens.
     else if ( (!shader_is_pixel(ctx)) || (!shader_version_exactly(ctx, 1, 4)) )
@@ -6781,16 +6547,6 @@
 } // parse_token
 
 
-// #define this to force app to supply an allocator, so there's no reference
-//  to the C runtime's malloc() and free()...
-#if MOJOSHADER_FORCE_ALLOCATOR
-#define internal_malloc NULL
-#define internal_free NULL
-#else
-static void *internal_malloc(int bytes, void *d) { return malloc(bytes); }
-static void internal_free(void *ptr, void *d) { free(ptr); }
-#endif
-
 static int find_profile_id(const char *profile)
 {
     int i;
--- a/mojoshader.h	Tue Dec 02 06:24:35 2008 -0500
+++ b/mojoshader.h	Thu Dec 04 21:36:11 2008 -0500
@@ -485,6 +485,109 @@
 
 
 
+/* Assembler interface... */
+
+/*
+ * Structure used to return data from assembling of a shader...
+ */
+typedef struct MOJOSHADER_assembleData
+{
+    /*
+     * Human-readable error, if there is one. Will be NULL if there was no
+     *  error. The string will be UTF-8 encoded, and English only. Most of
+     *  these shouldn't be shown to the end-user anyhow.
+     */
+    const char *error;
+
+    /*
+     * Line number of error, if there is one. Will be zero if there isn't.
+     */
+    unsigned int error_line;
+
+    /*
+     * Bytes of output from assembling. Binary data. Will be NULL on error.
+     */
+    const char *output;
+
+    /*
+     * Byte count for output. Will be 0 on error.
+     */
+    int output_len;
+
+    /*
+     * Count of Direct3D instruction slots used. As with Microsoft's own
+     *  assembler, this value is just a rough estimate, as unpredicable
+     *  real-world factors make the actual value vary at least a little from
+     *  this count. Still, it can give you a rough idea of the size of your
+     *  shader. Will be zero on error.
+     */
+    int instruction_count;
+
+    /*
+     * The type of shader we parsed. Will be MOJOSHADER_TYPE_UNKNOWN on error.
+     */
+    MOJOSHADER_shaderType shader_type;
+
+    /*
+     * The shader's major version. If this was a "vs_3_0", this would be 3.
+     */
+    int major_ver;
+
+    /*
+     * The shader's minor version. If this was a "ps_1_4", this would be 4.
+     *  Two notes: for "vs_2_x", this is 1, and for "vs_3_sw", this is 255.
+     */
+    int minor_ver;
+
+    /*
+     * This is the malloc implementation you passed to MOJOSHADER_parse().
+     */
+    MOJOSHADER_malloc malloc;
+
+    /*
+     * This is the free implementation you passed to MOJOSHADER_parse().
+     */
+    MOJOSHADER_free free;
+
+    /*
+     * This is the pointer you passed as opaque data for your allocator.
+     */
+    void *malloc_data;
+} MOJOSHADER_assembleData;
+
+/*
+ * This function is optional. Use this to convert Direct3D shader assembly
+ *  language into bytecode, which can be handled by MOJOSHADER_parse().
+ *
+ * (source) is an ASCII, NULL-terminated string of valid Direct3D shader
+ *  assembly source code.
+ *
+ * As assembling requires some memory to be allocated, you may provide a
+ *  custom allocator to this function, which will be used to allocate/free
+ *  memory. They function just like malloc() and free(). We do not use
+ *  realloc(). If you don't care, pass NULL in for the allocator functions.
+ *  If your allocator needs instance-specific data, you may supply it with the
+ *  (d) parameter. This pointer is passed as-is to your (m) and (f) functions.
+ *
+ * This function is thread safe, so long as (m) and (f) are too, and that
+ *  (source) remains intact for the duration of the call. This allows you
+ *  to assemble several shaders on separate CPU cores at the same time.
+ */
+const MOJOSHADER_assembleData *MOJOSHADER_assemble(const char *source,
+                            MOJOSHADER_malloc m, MOJOSHADER_free f, void *d);
+
+/*
+ * Call this to dispose of assembling results when you are done with them.
+ *  This will call the MOJOSHADER_free function you provided to
+ *  MOJOSHADER_assemble() multiple times, if you provided one.
+ *  Passing a NULL here is a safe no-op.
+ *
+ * This function is thread safe, so long as any allocator you passed into
+ *  MOJOSHADER_assemble() is, too.
+ */
+void MOJOSHADER_freeAssembleData(const MOJOSHADER_assembleData *data);
+
+
 
 /* OpenGL interface... */
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mojoshader_assembler.c	Thu Dec 04 21:36:11 2008 -0500
@@ -0,0 +1,1424 @@
+/**
+ * MojoShader; generate shader programs from bytecode of compiled
+ *  Direct3D shaders.
+ *
+ * Please see the file LICENSE.txt in the source's root directory.
+ *
+ *  This file written by Ryan C. Gordon.
+ */
+
+#define __MOJOSHADER_INTERNAL__ 1
+#include "mojoshader_internal.h"
+
+#define DEBUG_TOKENIZER 1
+
+
+typedef struct Context Context;
+
+// Context...this is state that changes as we assemble a shader...
+struct Context
+{
+    MOJOSHADER_malloc malloc;
+    MOJOSHADER_free free;
+    void *malloc_data;
+    const char *failstr;
+    const char *source;
+    MOJOSHADER_shaderType shader_type;
+    uint8 major_ver;
+    uint8 minor_ver;
+    unsigned int instruction_count;
+    unsigned int linenum;
+    char prevchar;
+    char token[64];
+    char pushedback;
+    char pushback_token[64];
+    uint32 tokenbuf[16];
+    int tokenbufpos;
+    int centroid_allowed;
+    DestArgInfo dest_arg;
+};
+
+
+// Convenience functions for allocators...
+
+static MOJOSHADER_assembleData out_of_mem_data = {
+    "Out of memory", 0, 0, 0, MOJOSHADER_TYPE_UNKNOWN, 0, 0, 0, 0, 0
+};
+
+
+static const char *out_of_mem_str = "Out of memory";
+static inline int out_of_memory(Context *ctx)
+{
+    if (ctx->failstr == NULL)
+        ctx->failstr = out_of_mem_str;  // fail() would call malloc().
+    return FAIL;
+} // out_of_memory
+
+static inline void *Malloc(Context *ctx, const size_t len)
+{
+    void *retval = ctx->malloc((int) len, ctx->malloc_data);
+    if (retval == NULL)
+        out_of_memory(ctx);
+    return retval;
+} // Malloc
+
+static inline void Free(Context *ctx, void *ptr)
+{
+    if (ptr != NULL)  // check for NULL in case of dumb free() impl.
+        ctx->free(ptr, ctx->malloc_data);
+} // Free
+
+static int failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
+static int failf(Context *ctx, const char *fmt, ...)
+{
+    if (ctx->failstr == NULL)  // don't change existing error.
+    {
+        char scratch = 0;
+        va_list ap;
+        va_start(ap, fmt);
+        const int len = vsnprintf(&scratch, sizeof (scratch), fmt, ap);
+        va_end(ap);
+
+        char *failstr = (char *) Malloc(ctx, len + 1);
+        if (failstr != NULL)
+        {
+            va_start(ap, fmt);
+            vsnprintf(failstr, len + 1, fmt, ap);  // rebuild it.
+            va_end(ap);
+            ctx->failstr = failstr;
+        } // if
+    } // if
+
+    return FAIL;
+} // failf
+
+static inline int fail(Context *ctx, const char *reason)
+{
+    return failf(ctx, "%s", reason);
+} // fail
+
+static inline int isfail(const Context *ctx)
+{
+    return (ctx->failstr != NULL);
+} // isfail
+
+
+// Shader model version magic...
+
+static inline uint32 ver_ui32(const uint8 major, const uint8 minor)
+{
+    return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 0 : (minor)) );
+} // version_ui32
+
+static inline int shader_version_supported(const uint8 maj, const uint8 min)
+{
+    return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR));
+} // shader_version_supported
+
+static inline int shader_version_atleast(const Context *ctx, const uint8 maj,
+                                         const uint8 min)
+{
+    return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min));
+} // shader_version_atleast
+
+static inline int shader_version_exactly(const Context *ctx, const uint8 maj,
+                                         const uint8 min)
+{
+    return ((ctx->major_ver == maj) && (ctx->minor_ver == min));
+} // shader_version_exactly
+
+static inline int shader_is_pixel(const Context *ctx)
+{
+    return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL);
+} // shader_is_pixel
+
+static inline int shader_is_vertex(const Context *ctx)
+{
+    return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX);
+} // shader_is_vertex
+
+
+extern void writeme(void);
+
+static void output_token_noswap(Context *ctx, const uint32 token)
+{
+    if (isfail(ctx))
+        return;
+
+    writeme();
+} // output_token_noswap
+
+
+static inline void output_token(Context *ctx, const uint32 token)
+{
+    output_token_noswap(ctx, SWAP32(token));
+} // output_token
+
+
+static void output_comment_bytes(Context *ctx, const uint8 *buf, size_t len)
+{
+    if (len > (0xFFFF * 4))  // length is stored as token count, in 16 bits.
+        fail(ctx, "Comment field is too big");
+    else if (!isfail(ctx))
+    {
+        const uint32 tokencount = (len / 4) + ((len % 4) ? 1 : 0);
+        output_token(ctx, 0xFFFE | (tokencount << 16));
+        while (len >= 4)
+        {
+            output_token_noswap(ctx, *((const uint32 *) buf));
+            len -= 4;
+            buf += 4;
+        } // while
+
+        if (len > 0)  // handle spillover...
+        {
+            union { uint8 ui8[4]; uint32 ui32; } overflow;
+            overflow.ui32 = 0;
+            memcpy(overflow.ui8, buf, len);
+            output_token_noswap(ctx, overflow.ui32);
+        } // if
+    } // else if
+} // output_comment_bytes
+
+
+static inline void output_comment_string(Context *ctx, const char *str)
+{
+    output_comment_bytes(ctx, (const uint8 *) str, strlen(str));
+} // output_comment_string
+
+
+static int _tokenize(Context *ctx)
+{
+    int idx = 0;
+
+    if (isfail(ctx))
+        return FAIL;
+
+    if (ctx->pushedback)
+    {
+        ctx->pushedback = 0;
+        return NOFAIL;
+    } // if
+
+    while (1)
+    {
+        if (idx >= sizeof (ctx->token))
+            return fail(ctx, "buffer overflow");
+
+        char ch = *ctx->source;
+        if (ch == '\t')
+            ch = ' ';  // collapse tabs into single spaces.
+        else if (ch == '\r')
+        {
+            if (ctx->source[1] == '\n')
+               continue;  // ignore '\r' if this is "\r\n" ...
+            ch = '\n';
+        } // else if
+
+        if ((ch > '0') && (ch < '9'))
+        {
+            // starting a number, but rest of current token was not number.
+            if ((idx > 0) && ((ctx->prevchar < '0') || (ctx->prevchar > '9')))
+            {
+                ctx->token[idx++] = '\0';
+                return NOFAIL;
+            } // if
+        } // if
+        else
+        {
+            // starting a non-number, but rest of current token was numbers.
+            if ((idx > 0) && ((ctx->prevchar >= '0') || (ctx->prevchar <= '9')))
+            {
+                ctx->token[idx++] = '\0';
+                return NOFAIL;
+            } // if
+        } // else
+
+        switch (ch)
+        {
+            case '/':
+            case ';':  // !!! FIXME: comment, right?
+                if (idx != 0)  // finish off existing token.
+                    ctx->token[idx] = '\0';
+                else
+                {
+                    ctx->token[idx++] = ch;
+                    ctx->source++;
+                    if ((ch == '/') && (ctx->source[1] == '/'))
+                    {
+                        ctx->token[idx++] = '/';
+                        ctx->source++;
+                    } // if
+                    ctx->token[idx++] = '\0';
+                } // else
+                return NOFAIL;
+
+            case ' ':
+                if (ctx->prevch == ' ')
+                    break;   // multiple whitespace collapses into one.
+                // intentional fall-through...
+
+            case '_':
+            case '[':
+            case ']':
+            case '(':
+            case ')':
+            case '!':
+            case '+':
+            case '-':
+            case ',':
+            case '.':
+            case '\n':
+                if (idx != 0)  // finish off existing token.
+                    ctx->token[idx] = '\0';
+                else  // this is a token in itself.
+                {
+                    if (ch == '\n')
+                        ctx->linenum++;
+                    ctx->source++;
+                    ctx->token[idx++] = ch;
+                    ctx->token[idx++] = '\0';
+                } // else
+                return NOFAIL;
+
+            case '\0':
+                ctx->token[idx] = '\0';
+                if (idx != 0)  // had any chars? It's a token.
+                    return NOFAIL;
+                return END_OF_STREAM;
+
+            default:
+                ctx->source++;
+                ctx->token[idx++] = ch;
+                break;
+        } // switch
+
+        ctx->prevchar = ch;
+    } // while
+
+    return fail(ctx, "???");  // shouldn't hit this.
+} // _tokenize
+
+
+static inline int tokenize(Context *ctx)
+{
+    const int rc = _tokenize(ctx);
+    #if DEBUG_TOKENIZER
+    printf("TOKENIZE: %s '%s'\n",
+           (rc == END_OF_STREAM) ? "END_OF_STREAM" :
+           (rc == FAIL) ? "FAIL" :
+           (rc == NOFAIL) ? "NOFAIL" : "???",
+           ctx->token);
+    #endif
+    return rc;
+} // tokenize
+
+
+static inline void pushback(Context *ctx)
+{
+    #if DEBUG_TOKENIZER
+    printf("PUSHBACK\n");
+    #endif
+
+    if (ctx->pushedback)
+        fail(ctx, "BUG: Double pushback in parser");
+    else
+        ctx->pushedback = 1;
+} // pushback
+
+
+static int nexttoken(Context *ctx, const int ignoreeol,
+                     const int ignorewhitespace, const int eolok,
+                     const int eosok)
+{
+    int rc = NOFAIL;
+
+    while ((rc = tokenize(ctx)) == NOFAIL)
+    {
+        if (strcmp(ctx->token, "\n") == 0)
+        {
+            if (ignoreeol)
+                continue;
+            else if (!eolok)
+                return fail(ctx, "Unexpected EOL");
+        } // if
+
+        else if (strcmp(ctx->token, " ") == 0)
+        {
+            if (ignorewhitespace)
+                continue;
+        } // else if
+
+        // skip comments...
+        else if ((strcmp(ctx->token, "//") == 0) || (strcmp(ctx->token, ";") == 0))
+        {
+            while ((rc = tokenize(ctx)) == NOFAIL)
+            {
+                if (strcmp(ctx->token, "\n") == 0)
+                    break;
+            } // while
+        } // if
+
+        break;
+    } // while
+
+    #if DEBUG_TOKENIZER
+    printf("NEXTTOKEN: %s '%s'\n",
+           (rc == END_OF_STREAM) ? "END_OF_STREAM" :
+           (rc == FAIL) ? "FAIL" :
+           (rc == NOFAIL) ? "NOFAIL" : "???",
+           ctx->token);
+    #endif
+
+    if ((rc == END_OF_STREAM) && (!eosok))
+        return fail(ctx, "Unexpected EOF");
+
+    return rc;
+} // nexttoken
+
+
+static int require_endline(Context *ctx)
+{
+    const int rc = nexttoken(ctx, 0, 1, 1, 1);
+    if (rc == FAIL)
+        return FAIL;
+    else if (rc == END_OF_STREAM)
+        return NOFAIL;  // we'll call this an EOL.
+    else if (strcmp(ctx->token, "\n") != 0)
+        return fail(ctx, "Endline expected");
+    return NOFAIL;
+} // require_endline
+
+
+
+// !!! FIXME: merge parse_* into mojoshader.c to reduce cut-and-paste.
+// !!! FIXME:  we need to merge Context, which is the nastiest part.
+
+static int set_result_shift(Context *ctx, DestArgInfo *info, const int val)
+{
+    if (info->result_shift != 0)
+        return fail(ctx, "Multiple result shift modifiers");
+    info->result_shift = val;
+    return NOFAIL;
+} // set_result_shift
+
+
+static int parse_register_name(Context *ctx, RegisterType *rtype, int *rnum)
+{
+    if (nexttoken(ctx, 0, 1, 0, 0) == FAIL)
+        return FAIL;
+
+    // !!! FIXME: some of these registers are only valid for some shader types.
+    int neednum = 1;
+    int regnum = 0;
+    const char *t = ctx->token;
+    RegisterType regtype = REG_TYPE_TEMP;
+    if (strcasecmp(t, "r") == 0)
+        regtype = REG_TYPE_TEMP;
+    else if (strcasecmp(t, "v") == 0)
+        regtype = REG_TYPE_INPUT;
+    else if (strcasecmp(t, "c") == 0)
+        regtype = REG_TYPE_CONST;
+    else if (strcasecmp(t, "i") == 0)
+        regtype = REG_TYPE_CONSTINT;
+    else if (strcasecmp(t, "b") == 0)
+        regtype = REG_TYPE_CONSTBOOL;
+    else if (strcasecmp(t, "oC") == 0)
+        regtype = REG_TYPE_COLOROUT;
+    else if (strcasecmp(t, "oDepth") == 0)
+        regtype = REG_TYPE_DEPTHOUT;
+    else if (strcasecmp(t, "s") == 0)
+        regtype = REG_TYPE_SAMPLER;
+    else if (strcasecmp(t, "oD") == 0)
+        regtype = REG_TYPE_ATTROUT;
+    else if (strcasecmp(t, "l") == 0)
+        regtype = REG_TYPE_LABEL;
+    else if (strcasecmp(t, "p") == 0)
+        regtype = REG_TYPE_PREDICATE;
+    else if (strcasecmp(t, "aL") == 0)
+    {
+        regtype = REG_TYPE_LOOP;
+        neednum = 0;
+    } // else if
+    else if (strcasecmp(t, "o") == 0)
+    {
+        if (!shader_is_vertex(ctx) || !shader_version_atleast(ctx, 3, 0))
+            return fail(ctx, "Output register not valid in this shader type");
+        regtype = REG_TYPE_OUTPUT;
+    } // else if
+    else if (strcasecmp(t, "oT") == 0)
+    {
+        if (shader_is_vertex(ctx) || shader_version_atleast(ctx, 3, 0))
+            return fail(ctx, "Output register not valid in this shader type");
+        regtype = REG_TYPE_OUTPUT;
+    } // else if
+    else if (strcasecmp(t, "a") == 0)
+    {
+        if (!shader_is_vertex(ctx))
+            return fail(ctx, "Address register only valid in vertex shaders.");
+        regtype = REG_TYPE_ADDRESS;
+    } // else if
+    else if (strcasecmp(t, "t") == 0)
+    {
+        if (!shader_is_pixel(ctx))
+            return fail(ctx, "Address register only valid in pixel shaders.");
+        regtype = REG_TYPE_ADDRESS;
+    } // else if
+    else if (strcasecmp(t, "vPos") == 0)
+    {
+        regtype = REG_TYPE_MISCTYPE;
+        regnum = (int) MISCTYPE_TYPE_POSITION;
+        neednum = 0;
+    } // else if
+    else if (strcasecmp(t, "vFace") == 0)
+    {
+        regtype = REG_TYPE_MISCTYPE;
+        regnum = (int) MISCTYPE_TYPE_FACE;
+        neednum = 0;
+    } // else if
+    else if (strcasecmp(t, "oPos") == 0)
+    {
+        regtype = REG_TYPE_RASTOUT;
+        regnum = (int) RASTOUT_TYPE_POSITION;
+        neednum = 0;
+    } // else if
+    else if (strcasecmp(t, "oFog") == 0)
+    {
+        regtype = REG_TYPE_RASTOUT;
+        regnum = (int) RASTOUT_TYPE_FOG;
+        neednum = 0;
+    } // else if
+    else if (strcasecmp(t, "oPts") == 0)
+    {
+        regtype = REG_TYPE_RASTOUT;
+        regnum = (int) RASTOUT_TYPE_POINT_SIZE;
+        neednum = 0;
+    } // else if
+        
+    //case REG_TYPE_TEMPFLOAT16:  // !!! FIXME: don't know this asm string
+
+    else
+    {
+        return fail(ctx, "expected register type");
+    } // else
+
+    if (neednum)
+    {
+        if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+            return FAIL;
+
+        //minor = atoi(ctx->token);
+        char *endptr = NULL;
+        const long val = strtol(ctx->token, &endptr, 10);
+        regnum = (int) val;
+        if ((*ctx->token == '\0') || (*endptr != '\0'))
+            return fail(ctx, "Invalid version string");
+    } // if
+
+    // split up REG_TYPE_CONST
+    if (regtype == REG_TYPE_CONST)
+    {
+        if (regnum < 2048)
+        {
+            regtype = REG_TYPE_CONST;
+            regnum -= 0;
+        } // if
+        else if (regnum < 4096)
+        {
+            regtype = REG_TYPE_CONST2;
+            regnum -= 2048;
+        } // if
+        else if (regnum < 6144)
+        {
+            regtype = REG_TYPE_CONST3;
+            regnum -= 4096;
+        } // if
+        else if (regnum < 8192)
+        {
+            regtype = REG_TYPE_CONST4;
+            regnum -= 6144;
+        } // if
+        else
+        {
+            return fail(ctx, "Invalid const register index");
+        } // else
+    } // if
+
+    *rtype = regtype;
+    *rnum = regnum;
+
+    return NOFAIL;
+} // parse_register_name
+
+
+static int parse_destination_token(Context *ctx, DestArgInfo *info)
+{
+    // !!! FIXME: recheck against the spec for ranges (like RASTOUT values, etc).
+
+    memset(info, '\0', sizeof (info));
+    info->token = token;
+
+    // See if there are destination modifiers on the instruction itself...
+    while (1)
+    {
+        if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+            return FAIL;
+        else if (strcmp(ctx->token, " ") == 0)
+            break;  // done with modifiers.
+        else if (strcmp(ctx->token, "_") != 0)
+            return fail(ctx, "Expected modifier or whitespace");
+        else if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+            return FAIL;
+        else if (strcasecmp(ctx->token, "x2") == 0)
+            set_result_shift(ctx, info, 0x1);
+        else if (strcasecmp(ctx->token, "x4") == 0)
+            set_result_shift(ctx, info, 0x2);
+        else if (strcasecmp(ctx->token, "x8") == 0)
+            set_result_shift(ctx, info, 0x3);
+        else if (strcasecmp(ctx->token, "d8") == 0)
+            set_result_shift(ctx, info, 0xD);
+        else if (strcasecmp(ctx->token, "d4") == 0)
+            set_result_shift(ctx, info, 0xE);
+        else if (strcasecmp(ctx->token, "d2") == 0)
+            set_result_shift(ctx, info, 0xF);
+        else if (strcasecmp(ctx->token, "sat") == 0)
+            info->result_mod |= MOD_SATURATE;
+        else if (strcasecmp(ctx->token, "pp") == 0)
+            info->result_mod |= MOD_PP;
+        else if (strcasecmp(ctx->token, "centroid") == 0)
+            info->result_mod |= MOD_CENTROID;
+        else
+            return fail(ctx, "Expected modifier");
+    } // while
+
+    if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+        return FAIL;
+
+    // !!! FIXME: predicates.
+    if (strcmp(ctx->token, "(") == 0)
+        return fail(ctx, "Predicates unsupported at this time");
+    pushback(ctx);  // parse_register_name calls nexttoken().
+
+    if (parse_register_name(ctx, &info->regtype, &info->regnum) == FAIL)
+        return FAIL;
+
+    if (nexttoken(ctx, 0, 0, 1, 1) == FAIL)
+        return FAIL;
+
+    // !!! FIXME: can dest registers do relative addressing?
+
+    if (strcmp(ctx->token, ".") != 0)
+    {
+        info->writemask = 0xF;
+        info->writemask0 = info->writemask1 = info->writemask2 = info->writemask3 = 1;
+        pushback(ctx);  // no explicit writemask; do full mask.
+    } // if
+    else if (scalar_register(info->regtype, info->regnum))
+        return fail(ctx, "Writemask specified for scalar register");
+    else if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+        return FAIL;
+    else if (ctx->token[0] == '\0')
+        return fail(ctx, "Invalid writemask");
+    else
+    {
+        char *ptr = ctx->token;
+        info->writemask0 = info->writemask1 = info->writemask2 = info->writemask3 = 0;
+        if (*ptr = 'x') { info->writemask0 = 1; ptr++; }
+        if (*ptr = 'y') { info->writemask1 = 1; ptr++; }
+        if (*ptr = 'z') { info->writemask2 = 1; ptr++; }
+        if (*ptr = 'w') { info->writemask3 = 1; ptr++; }
+        if ((ptr == ctx->token) && (is_pixel_shader(ctx))
+        {
+            if (*ptr = 'r') { info->writemask0 = 1; ptr++; }
+            if (*ptr = 'g') { info->writemask1 = 1; ptr++; }
+            if (*ptr = 'b') { info->writemask2 = 1; ptr++; }
+            if (*ptr = 'a') { info->writemask3 = 1; ptr++; }
+        } // if
+
+        if (*ptr != '\0')
+            return fail(ctx, "Invalid writemask");
+
+        info->writemask = ( ((info->writemask0 & 0x1) << 0) |
+                            ((info->writemask1 & 0x1) << 1) |
+                            ((info->writemask2 & 0x1) << 2) |
+                            ((info->writemask3 & 0x1) << 3) );
+    } // else
+
+    info->orig_writemask = info->writemask;
+
+    // !!! FIXME: cut and paste from mojoshader.c ...
+    if (info->relative)
+    {
+        if (!shader_is_vertex(ctx))
+            return fail(ctx, "Relative addressing in non-vertex shader");
+        else if (!shader_version_atleast(ctx, 3, 0))
+            return fail(ctx, "Relative addressing in vertex shader version < 3.0");
+        else if (!ctx->have_ctab)  // it's hard to do this efficiently without!
+            return fail(ctx, "relative addressing unsupported without a CTAB");
+        // !!! FIXME: I don't have a shader that has a relative dest currently.
+        return fail(ctx, "Relative addressing of dest tokens is unsupported");
+    } // if
+
+    const int s = info->result_shift;
+    if (s != 0)
+    {
+        if (!shader_is_pixel(ctx))
+            return fail(ctx, "Result shift scale in non-pixel shader");
+        else if (shader_version_atleast(ctx, 2, 0))
+            return fail(ctx, "Result shift scale in pixel shader version >= 2.0");
+        else if ( ! (((s >= 1) && (s <= 3)) || ((s >= 0xD) && (s <= 0xF))) )
+            return fail(ctx, "Result shift scale isn't 1 to 3, or 13 to 15.");
+    } // if
+
+    if (info->result_mod & MOD_PP)  // Partial precision (pixel shaders only)
+    {
+        if (!shader_is_pixel(ctx))
+            return fail(ctx, "Partial precision result mod in non-pixel shader");
+    } // if
+
+    if (info->result_mod & MOD_CENTROID)  // Centroid (pixel shaders only)
+    {
+        if (!shader_is_pixel(ctx))
+            return fail(ctx, "Centroid result mod in non-pixel shader");
+        else if (!ctx->centroid_allowed)  // only on DCL opcodes!
+            return fail(ctx, "Centroid modifier not allowed here");
+    } // if
+
+    // !!! FIXME: from msdn:
+    //  "_sat cannot be used with instructions writing to output o# registers."
+    // !!! FIXME: actually, just go over this page:
+    //  http://msdn.microsoft.com/archive/default.asp?url=/archive/en-us/directx9_c/directx/graphics/reference/shaders/ps_instructionmodifiers.asp
+
+    if (ctx->tokenbufpos >= STATICARRAYLEN(ctx->tokenbuf))
+        return fail(ctx, "Too many tokens");
+
+    ctx->tokenbuf[ctx->tokenbufpos++] =
+            ( ((((uint32) 0x80000000)) << 0) |
+              ((((uint32) info->regnum) & 0x7ff) << 0) |
+              ((((uint32) info->relative) & 0x1) << 13) |
+              ((((uint32) info->result_mod) & 0xF) << 20) |
+              ((((uint32) info->result_shift) & 0xF) << 24) |
+              ((((uint32) info->regtype) & 0x7) << 28) |
+              ((((uint32) info->regtype) & 0x18) << 8) );
+
+    return 1;
+} // parse_destination_token
+
+
+static int parse_args_NULL(Context *ctx)
+{
+    return (isfail(ctx) ? FAIL : 1);
+} // parse_args_NULL
+
+
+static int parse_args_DEF(Context *ctx)
+{
+    if (parse_destination_token(ctx, &ctx->dest_arg) == FAIL)
+        return FAIL;
+
+    if (ctx->dest_arg.relative)  // I'm pretty sure this is illegal...?
+        return fail(ctx, "relative addressing in DEFB");
+
+sdfsdf
+// !!! FIXME: parse out def.
+    ctx->dwords[0] = SWAP32(ctx->tokens[0]);
+    ctx->dwords[1] = SWAP32(ctx->tokens[1]);
+    ctx->dwords[2] = SWAP32(ctx->tokens[2]);
+    ctx->dwords[3] = SWAP32(ctx->tokens[3]);
+
+    return 6;
+} // parse_args_DEF
+
+
+static int parse_args_DEFB(Context *ctx)
+{
+    if (parse_destination_token(ctx, &ctx->dest_arg) == FAIL)
+        return FAIL;
+
+    if (ctx->dest_arg.relative)  // I'm pretty sure this is illegal...?
+        return fail(ctx, "relative addressing in DEFB");
+
+    ctx->dwords[0] = *(ctx->tokens) ? 1 : 0;
+
+    return 3;
+} // parse_args_DEFB
+
+
+static int valid_texture_type(const uint32 ttype)
+{
+    switch ((const TextureType) ttype)
+    {
+        case TEXTURE_TYPE_2D:
+        case TEXTURE_TYPE_CUBE:
+        case TEXTURE_TYPE_VOLUME:
+            return 1;  // it's okay.
+    } // switch
+
+    return 0;
+} // valid_texture_type
+
+
+// !!! FIXME: this function is kind of a mess.
+// !!! FIXME: cut-and-paste from mojoshader.c ...
+static int parse_args_DCL(Context *ctx)
+{
+    int unsupported = 0;
+    char usage[sizeof (ctx->token)];
+
+static int nexttoken(Context *ctx, const int ignoreeol,
+                     const int ignorewhitespace, const int eolok,
+                     const int eosok)
+
+    char usagestr[sizeof (ctx->token)];
+    static const char *usagestrs[] = {
+        "position", "blendweight", "blendindices", "normal", "psize",
+        "texcoord", "tangent", "binormal", "tessfactor", "positiont",
+        "color", "fog", "depth", "sample"
+    };
+
+    if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+        return FAIL;
+    else if (strcmp(ctx->token, " ") == 0)
+    {
+        pushback(ctx);
+        usagestr[0] = '\0';
+    } // else if
+    else if (strcmp(ctx->token, "_") != 0)
+        return fail(ctx, "Expected register or usage");
+    else if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+        return FAIL;
+    else
+        strcpy(usagestr, ctx->token);
+
+    if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+        return FAIL;
+    else if (strcmp(ctx->token, " ") == 0)
+        return fail(ctx, "Expected whitespace");
+    
+    ctx->centroid_allowed = 1;
+    ctx->tokenbufpos++;
+    const int parse_dest_rc = parse_destination_token(ctx, &ctx->dest_arg);
+    ctx->centroid_allowed = 0;
+    if (parse_dest_rc == FAIL)
+        return FAIL;
+
+    if (ctx->dest_arg.result_shift != 0)  // I'm pretty sure this is illegal...?
+        return fail(ctx, "shift scale in DCL");
+    else if (ctx->dest_arg.relative)  // I'm pretty sure this is illegal...?
+        return fail(ctx, "relative addressing in DCL");
+
+    const RegisterType regtype = ctx->dest_arg.regtype;
+    const int regnum = ctx->dest_arg.regnum;
+    if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
+    {
+        if (regtype == REG_TYPE_INPUT)
+        {
+            token |=
+            const uint32 usage = (token & 0xF);
+            const uint32 index = ((token >> 16) & 0xF);
+            reserved_mask = 0x7FF0FFE0;
+            ctx->dwords[0] = usage;
+            ctx->dwords[1] = index;
+        } // if
+
+        else if (regtype == REG_TYPE_MISCTYPE)
+        {
+            const MiscTypeType mt = (MiscTypeType) regnum;
+            if (mt == MISCTYPE_TYPE_POSITION)
+                reserved_mask = 0x7FFFFFFF;
+            else if (mt == MISCTYPE_TYPE_FACE)
+            {
+                reserved_mask = 0x7FFFFFFF;
+                if (!writemask_xyzw(ctx->dest_arg.orig_writemask))
+                    return fail(ctx, "DCL face writemask must be full");
+                else if (ctx->dest_arg.result_mod != 0)
+                    return fail(ctx, "DCL face result modifier must be zero");
+                else if (ctx->dest_arg.result_shift != 0)
+                    return fail(ctx, "DCL face shift scale must be zero");
+            } // else if
+            else
+            {
+                unsupported = 1;
+            } // else
+
+            ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_UNKNOWN;
+            ctx->dwords[1] = 0;
+        } // else if
+
+        else if (regtype == REG_TYPE_TEXTURE)
+        {
+            const uint32 usage = (token & 0xF);
+            const uint32 index = ((token >> 16) & 0xF);
+            if (usage == MOJOSHADER_USAGE_TEXCOORD)
+            {
+                if (index > 7)
+                    return fail(ctx, "DCL texcoord usage must have 0-7 index");
+            } // if
+            else if (usage == MOJOSHADER_USAGE_COLOR)
+            {
+                if (index != 0)
+                    return fail(ctx, "DCL color usage must have 0 index");
+            } // else if
+            else
+            {
+                return fail(ctx, "Invalid DCL texture usage");
+            } // else
+
+            reserved_mask = 0x7FF0FFE0;
+            ctx->dwords[0] = usage;
+            ctx->dwords[1] = index;
+        } // else if
+
+        else if (regtype == REG_TYPE_SAMPLER)
+        {
+            const uint32 ttype = ((token >> 27) & 0xF);
+            if (!valid_texture_type(ttype))
+                return fail(ctx, "unknown sampler texture type");
+            reserved_mask = 0x7FFFFFF;
+            ctx->dwords[0] = ttype;
+        } // else if
+
+        else
+        {
+            unsupported = 1;
+        } // else
+    } // if
+
+    else if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 2, 0)) )
+    {
+        if (regtype == REG_TYPE_INPUT)
+        {
+            ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_COLOR;
+            ctx->dwords[1] = regnum;
+            reserved_mask = 0x7FFFFFFF;
+        } // if
+        else if (regtype == REG_TYPE_TEXTURE)
+        {
+            ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_TEXCOORD;
+            ctx->dwords[1] = regnum;
+            reserved_mask = 0x7FFFFFFF;
+        } // else if
+        else if (regtype == REG_TYPE_SAMPLER)
+        {
+            const uint32 ttype = ((token >> 27) & 0xF);
+            if (!valid_texture_type(ttype))
+                return fail(ctx, "unknown sampler texture type");
+            reserved_mask = 0x7FFFFFF;
+            ctx->dwords[0] = ttype;
+        } // else if
+        else
+        {
+            unsupported = 1;
+        } // else
+    } // if
+
+    else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
+    {
+        if ((regtype == REG_TYPE_INPUT) || (regtype == REG_TYPE_OUTPUT))
+        {
+            const uint32 usage = (token & 0xF);
+            const uint32 index = ((token >> 16) & 0xF);
+            reserved_mask = 0x7FF0FFE0;
+            ctx->dwords[0] = usage;
+            ctx->dwords[1] = index;
+        } // if
+        else
+        {
+            unsupported = 1;
+        } // else
+    } // else if
+
+    else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 2, 0)) )
+    {
+        if (regtype == REG_TYPE_INPUT)
+        {
+            const uint32 usage = (token & 0xF);
+            const uint32 index = ((token >> 16) & 0xF);
+            reserved_mask = 0x7FF0FFE0;
+            ctx->dwords[0] = usage;
+            ctx->dwords[1] = index;
+        } // if
+        else
+        {
+            unsupported = 1;
+        } // else
+    } // else if
+
+    else
+    {
+        unsupported = 1;
+    } // else
+
+    if (unsupported)
+        return fail(ctx, "invalid DCL register type for this shader model");
+
+    if ((token & reserved_mask) != 0)
+        return fail(ctx, "reserved bits in DCL dword aren't zero");
+
+    return 3;
+} // parse_args_DCL
+
+
+static int parse_args_D(Context *ctx)
+{
+    int retval = 1;
+    retval += parse_destination_token(ctx, &ctx->dest_arg);
+    return isfail(ctx) ? FAIL : retval;
+} // parse_args_D
+
+
+static int parse_args_S(Context *ctx)
+{
+    int retval = 1;
+    retval += parse_source_token(ctx, &ctx->source_args[0]);
+    return isfail(ctx) ? FAIL : retval;
+} // parse_args_S
+
+
+static int parse_args_SS(Context *ctx)
+{
+    int retval = 1;
+    retval += parse_source_token(ctx, &ctx->source_args[0]);
+    retval += parse_source_token(ctx, &ctx->source_args[1]);
+    return isfail(ctx) ? FAIL : retval;
+} // parse_args_SS
+
+
+static int parse_args_DS(Context *ctx)
+{
+    int retval = 1;
+    retval += parse_destination_token(ctx, &ctx->dest_arg);
+    retval += parse_source_token(ctx, &ctx->source_args[0]);
+    return isfail(ctx) ? FAIL : retval;
+} // parse_args_DS
+
+
+static int parse_args_DSS(Context *ctx)
+{
+    int retval = 1;
+    retval += parse_destination_token(ctx, &ctx->dest_arg);
+    retval += parse_source_token(ctx, &ctx->source_args[0]);
+    retval += parse_source_token(ctx, &ctx->source_args[1]);
+    return isfail(ctx) ? FAIL : retval;
+} // parse_args_DSS
+
+
+static int parse_args_DSSS(Context *ctx)
+{
+    int retval = 1;
+    retval += parse_destination_token(ctx, &ctx->dest_arg);
+    retval += parse_source_token(ctx, &ctx->source_args[0]);
+    retval += parse_source_token(ctx, &ctx->source_args[1]);
+    retval += parse_source_token(ctx, &ctx->source_args[2]);
+    return isfail(ctx) ? FAIL : retval;
+} // parse_args_DSSS
+
+
+static int parse_args_DSSSS(Context *ctx)
+{
+    int retval = 1;
+    retval += parse_destination_token(ctx, &ctx->dest_arg);
+    retval += parse_source_token(ctx, &ctx->source_args[0]);
+    retval += parse_source_token(ctx, &ctx->source_args[1]);
+    retval += parse_source_token(ctx, &ctx->source_args[2]);
+    retval += parse_source_token(ctx, &ctx->source_args[3]);
+    return isfail(ctx) ? FAIL : retval;
+} // parse_args_DSSSS
+
+
+static int parse_args_SINCOS(Context *ctx)
+{
+    // this opcode needs extra registers for sm2 and lower.
+    if (!shader_version_atleast(ctx, 3, 0))
+        return parse_args_DSSS(ctx);
+    return parse_args_DS(ctx);
+} // parse_args_SINCOS
+
+
+static int parse_args_TEXCRD(Context *ctx)
+{
+    // added extra register in ps_1_4.
+    if (shader_version_atleast(ctx, 1, 4))
+        return parse_args_DS(ctx);
+    return parse_args_D(ctx);
+} // parse_args_TEXCRD
+
+
+static int parse_args_TEXLD(Context *ctx)
+{
+    // different registers in px_1_3, ps_1_4, and ps_2_0!
+    if (shader_version_atleast(ctx, 2, 0))
+        return parse_args_DSS(ctx);
+    else if (shader_version_atleast(ctx, 1, 4))
+        return parse_args_DS(ctx);
+    return parse_args_D(ctx);
+} // parse_args_TEXLD
+
+
+
+
+
+
+
+// one args function for each possible sequence of opcode arguments.
+typedef int (*args_function)(Context *ctx);
+
+// one state function for each opcode where we have state machine updates.
+typedef void (*state_function)(Context *ctx);
+
+// Lookup table for instruction opcodes...
+typedef struct
+{
+    const char *opcode_string;
+    int slots;  // number of instruction slots this opcode eats.
+    MOJOSHADER_shaderType shader_types;  // mask of types that can use opcode.
+    args_function parse_args;
+    state_function state;
+} Instruction;
+
+
+static const Instruction instructions[] =
+{
+/*  !!! FIXME: push this through mojoshader.c's state machine.
+    #define INSTRUCTION_STATE(op, opstr, slots, a, t) { \
+        opstr, slots, t, parse_args_##a, state_##op \
+    },
+*/
+    #define INSTRUCTION_STATE(op, opstr, slots, a, t) { \
+        opstr, slots, t, parse_args_##a, 0 \
+    },
+
+    #define INSTRUCTION(op, opstr, slots, a, t) { \
+        opstr, slots, t, parse_args_##a, 0 \
+    },
+
+    #define MOJOSHADER_DO_INSTRUCTION_TABLE 1
+    #include "mojoshader_internal.h"
+    #undef MOJOSHADER_DO_INSTRUCTION_TABLE
+
+    #undef INSTRUCTION
+    #undef INSTRUCTION_STATE
+};
+
+static int parse_condition(Context *ctx, uint32 *controls)
+{
+    if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+        return 0;
+    else if (strcmp(ctx->token, "_") != 0)
+    {
+        pushback(ctx);
+        return 0;
+    } // else if
+
+    if (nexttoken(ctx, 0, 0, 0, 0) == FAIL)
+        return 0;
+    else
+    {
+        int i;
+        static const char *comps[] = {"", "gt", "eq", "ge", "lt", "ne", "le"};
+        for (i = 1; i < STATICARRAYLEN(comps); i++)
+        {
+            if (strcasecmp(ctx->token, comps[i]) == 0)
+            {
+                *controls = i;
+                return 1;
+            }
+        } // for
+
+        fail(ctx, "Expected comparison token");
+        return 0;
+    } // else if
+
+    return 0;
+} // parse_condition
+
+
+static int parse_instruction_token(Context *ctx)
+{
+    int coissue = 0;
+    int predicated = 0;
+
+    if (strcmp(ctx->token, "+") == 0)
+    {
+        if (nexttoken(ctx, 0, 1, 0, 0) == FAIL)
+            return FAIL;
+        coissue = 1;
+    } // if
+
+    if (coissue)
+    {
+        if (!shader_is_pixel(ctx))
+            return fail(ctx, "coissue instruction on non-pixel shader");
+        else if (shader_version_atleast(ctx, 2, 0))
+            return fail(ctx, "coissue instruction in Shader Model >= 2.0");
+    } // if
+
+    int i;
+    int valid_opcode = 0;
+    const Instruction *instruction = NULL;
+    for (i = 0; i < STATICARRAYLEN(instructions); i++)
+    {
+        instruction = &instructions[i];
+        if (instruction->opcode_string == NULL)
+            continue;  // skip this.
+        else if (strcasecmp(ctx->token, instruction->opcode_string) != 0)
+            continue;  // not us.
+        valid_opcode = 1;
+        break;
+    } // for
+
+    uint32 opcode = (uint32) i;
+    uint32 controls = 0;
+
+    if (!valid_opcode)
+        return failf(ctx, "Unknown instruction '%s'", ctx->token);
+
+    // This might need to be IFC instead of IF.
+    if (strcmp(instruction->opcode_string, "IF") == 0)
+    {
+        if (parse_condition(ctx, &controls))
+            opcode = OPCODE_IFC;
+    } // if
+
+    // This might need to be BREAKC instead of BREAK.
+    else if (strcmp(instruction->opcode_string, "BREAK") == 0)
+    {
+        if (parse_condition(ctx, &controls))
+            opcode = OPCODE_BREAKC;
+    } // else if
+
+    // SETP has a conditional code, always.
+    else if (strcmp(instruction->opcode_string, "SETP") == 0)
+    {
+        if (!parse_condition(ctx, &controls))
+            return fail(ctx, "SETP requires a condition");
+    } // else if
+
+    instruction = &instructions[opcode];  // ...in case this changed.
+
+    if ((ctx->shader_type & instruction->shader_types) == 0)
+    {
+        return failf(ctx, "opcode '%s' not available in this shader type.",
+                     instruction->opcode_string);
+    } // if
+
+    // !!! FIXME: predicated instructions
+
+    ctx->tokenbufpos = 0;
+
+    const int tokcount = instruction->parse_args(ctx);
+    if (isfail(ctx))
+        return FAIL;
+
+    if (instruction->state != NULL)
+        instruction->state(ctx);
+
+    if (isfail(ctx))
+        return FAIL;
+
+    ctx->instruction_count += instruction->slots;
+    const uint32 insttoks = shader_version_atleast(ctx, 2, 0) ? tokcount : 0;
+
+    // write out the instruction token.
+    output_token(ctx, ((opcode & 0xFFFF) << 0) |
+                      ((controls & 0xFF) << 16) |
+                      ((insttoks & 0xF) << 24) |
+                      ((coissue) ? 0x40000000 : 0x00000000) |
+                      ((predicated) ? 0x10000000 : 0x00000000) );
+
+    // write out the argument tokens.
+    for (i = 0; i < (tokcount-1); i++)
+        output_token(ctx, ctx->tokenbuf[i]);
+
+    return NOFAIL;
+} // parse_instruction_token
+
+
+static int parse_version_token(Context *ctx)
+{
+    if (nexttoken(ctx, 1, 1, 0, 0) == FAIL)
+        return FAIL;
+
+    uint32 shader_type = 0;
+
+    const char *t = ctx->token;
+    switch (t[0])
+    {
+        case 'v':
+            shader_type = 0xFFFE;
+            ctx->shader_type = MOJOSHADER_TYPE_VERTEX;
+            break;
+        case 'p':
+            shader_type = 0xFFFF;
+            ctx->shader_type = MOJOSHADER_TYPE_PIXEL;
+            break;
+        // !!! FIXME: geometry shaders?
+        default: return fail(ctx, "Expected version string");
+    } // switch
+
+    if ((t[1] != 's') || (t[2] != '_') || (t[4] != '_'))
+        return fail(ctx, "Expected version string");
+
+    ctx->major_ver = t[3] - '0';
+
+    const char *minstr = &t[5];
+    if (strcasecmp(minstr, "x") == 0)
+        ctx->minor_ver = 1;
+    else if (strcasecmp(minstr, "sw") == 0)
+        ctx->minor_ver = 255;
+    else if (strcmp(minstr, "0") == 0)
+        ctx->minor_ver = 0;
+    else
+    {
+        //minor = atoi(minstr);
+        char *endptr = NULL;
+        const long val = strtol(minstr, &endptr, 10);
+        ctx->minor_ver = (uint8) val;
+        if ((*minstr == '\0') || (*endptr != '\0') || (val < 0) || (val > 255))
+            return fail(ctx, "Invalid version string");
+    } // else
+
+    // !!! FIXME: 1.x and 4.x?
+    if ((ctx->major_ver < 2) || (ctx->major_ver > MAX_SHADER_MAJOR))
+        return fail(ctx, "Unsupported shader model");
+
+    if (require_endline(ctx) == FAIL)
+        return FAIL;
+
+    output_token(ctx, (((uint32) shader_type) << 16) |
+                      (((uint32) ctx->major_ver) << 8) |
+                      (((uint32) ctx->minor_ver) << 0) );
+    return NOFAIL;
+} // parse_version_token
+
+
+static int parse_phase_token(Context *ctx)
+{
+    if (require_endline(ctx) == FAIL)
+        return FAIL;
+
+    // !!! FIXME: needs state; allow only one phase token per shader, I think?
+    if ( (!shader_is_pixel(ctx)) || (!shader_version_exactly(ctx, 1, 4)) )
+        return fail(ctx, "phase token only available in 1.4 pixel shaders");
+    output_token(ctx, 0x0000FFFD); // phase token always 0x0000FFFD.
+    return NOFAIL;
+} // parse_phase_token
+
+
+static int parse_end_token(Context *ctx)
+{
+    if (require_endline(ctx) == FAIL)
+        return FAIL;
+
+    // We don't emit the end token bits here, since it's valid for a shader
+    //  to not specify an "end" string at all; it's implicit, in that case.
+    // Instead, we make sure if we see "end" that it's the last thing we see.
+    if (nexttoken(ctx, 1, 1, 0, 1) != END_OF_STREAM)
+        return fail(ctx, "Content after END token");
+    return NOFAIL;
+} // parse_end_token
+
+
+static int parse_token(Context *ctx)
+{
+    const char *t = ctx->token;
+    if (strcasecmp(t, "end") == 0)
+        return parse_end_token(ctx);
+    else if (strcasecmp(t, "phase") == 0)
+        return parse_phase_token(ctx);
+    return parse_instruction_token(ctx);
+} // parse_token
+
+
+static Context *build_context(const char *source, MOJOSHADER_malloc m,
+                              MOJOSHADER_free f, void *d)
+{
+    if (m == NULL) m = internal_malloc;
+    if (f == NULL) f = internal_free;
+
+    Context *ctx = (Context *) m(sizeof (Context), d);
+    if (ctx == NULL)
+        return NULL;
+
+    memset(ctx, '\0', sizeof (Context));
+    ctx->malloc = m;
+    ctx->free = f;
+    ctx->malloc_data = d;
+    ctx->source = source;
+    ctx->linenum = 1;
+
+    return ctx;
+} // build_context
+
+
+static void destroy_context(Context *ctx)
+{
+    if (ctx != NULL)
+    {
+        MOJOSHADER_free f = ((ctx->free != NULL) ? ctx->free : internal_free);
+        void *d = ctx->malloc_data;
+        if ((ctx->failstr != NULL) && (ctx->failstr != out_of_mem_str))
+            f((void *) ctx->failstr, d);
+        f(ctx, d);
+    } // if
+} // destroy_context
+
+
+
+// API entry point...
+
+const MOJOSHADER_assembleData *MOJOSHADER_assemble(const char *source,
+                            MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
+{
+    MOJOSHADER_assembleData *retval = NULL;
+    Context *ctx = NULL;
+
+    if ( ((m == NULL) && (f != NULL)) || ((m != NULL) && (f == NULL)) )
+        return &out_of_mem_data;  // supply both or neither.
+
+    ctx = build_context(source, m, f, d);
+    if (ctx == NULL)
+        return &out_of_mem_data;
+
+    // Version token always comes first.
+    parse_version_token(ctx);
+
+    const char *credit = "Generated by MojoShader assembler revision "
+                         MOJOSHADER_CHANGESET
+                         ", http://icculus.org/mojoshader/";
+    output_comment_string(ctx, credit);
+
+    // !!! FIXME: insert CTAB here.
+
+    // parse out the rest of the tokens after the version token...
+    while (nexttoken(ctx, 1, 1, 0, 1) == NOFAIL)
+        parse_token(ctx);
+
+    output_token(ctx, 0x0000FFFF);   // end token always 0x0000FFFF.
+
+    retval = build_assembledata(ctx);
+    destroy_context(ctx);
+    return retval;
+} // MOJOSHADER_assemble
+
+
+void MOJOSHADER_freeAssembleData(const MOJOSHADER_assembleData *_data)
+{
+    MOJOSHADER_assembleData *data = (MOJOSHADER_assembleData *) _data;
+    if ((data == NULL) || (data == &out_of_mem_data))
+        return;  // no-op.
+
+    MOJOSHADER_free f = (data->free == NULL) ? internal_free : data->free;
+    void *d = data->malloc_data;
+
+    if (data->output != NULL)  // check for NULL in case of dumb free() impl.
+        f((void *) data->output, d);
+
+    if ((data->error != NULL) && (data->error != out_of_mem_str))
+        f((void *) data->error, d);
+
+    f(data, d);
+} // MOJOSHADER_freeParseData
+
+// end of mojoshader_assembler.c ...
+
--- a/mojoshader_internal.h	Tue Dec 02 06:24:35 2008 -0500
+++ b/mojoshader_internal.h	Thu Dec 04 21:36:11 2008 -0500
@@ -59,6 +59,7 @@
 #ifdef _MSC_VER
 #include <malloc.h>
 #define snprintf _snprintf
+#define strcasecmp stricmp
 typedef unsigned __int8 uint8;
 typedef unsigned __int16 uint16;
 typedef unsigned __int32 uint32;
@@ -116,7 +117,267 @@
 #   define SWAP32(x) (x)
 #endif
 
+
+// we need to reference these by explicit value occasionally...
+#define OPCODE_RET 28
+#define OPCODE_IFC 41
+#define OPCODE_BREAKC 45
+
+// #define this to force app to supply an allocator, so there's no reference
+//  to the C runtime's malloc() and free()...
+#if MOJOSHADER_FORCE_ALLOCATOR
+#define internal_malloc NULL
+#define internal_free NULL
+#else
+static void *internal_malloc(int bytes, void *d) { return malloc(bytes); }
+static void internal_free(void *ptr, void *d) { free(ptr); }
+#endif
+
+
+// result modifiers.
+// !!! FIXME: why isn't this an enum?
+#define MOD_SATURATE 0x01
+#define MOD_PP 0x02
+#define MOD_CENTROID 0x04
+
+typedef enum
+{
+    REG_TYPE_TEMP = 0,
+    REG_TYPE_INPUT = 1,
+    REG_TYPE_CONST = 2,
+    REG_TYPE_ADDRESS = 3,
+    REG_TYPE_TEXTURE = 3,  // ALSO 3!
+    REG_TYPE_RASTOUT = 4,
+    REG_TYPE_ATTROUT = 5,
+    REG_TYPE_TEXCRDOUT = 6,
+    REG_TYPE_OUTPUT = 6,  // ALSO 6!
+    REG_TYPE_CONSTINT = 7,
+    REG_TYPE_COLOROUT = 8,
+    REG_TYPE_DEPTHOUT = 9,
+    REG_TYPE_SAMPLER = 10,
+    REG_TYPE_CONST2 = 11,
+    REG_TYPE_CONST3 = 12,
+    REG_TYPE_CONST4 = 13,
+    REG_TYPE_CONSTBOOL = 14,
+    REG_TYPE_LOOP = 15,
+    REG_TYPE_TEMPFLOAT16 = 16,
+    REG_TYPE_MISCTYPE = 17,
+    REG_TYPE_LABEL = 18,
+    REG_TYPE_PREDICATE = 19,
+    REG_TYPE_MAX = 19
+} RegisterType;
+
+typedef enum
+{
+    TEXTURE_TYPE_2D = 2,
+    TEXTURE_TYPE_CUBE = 3,
+    TEXTURE_TYPE_VOLUME = 4,
+} TextureType;
+
+typedef enum
+{
+    RASTOUT_TYPE_POSITION = 0,
+    RASTOUT_TYPE_FOG = 1,
+    RASTOUT_TYPE_POINT_SIZE = 2,
+    RASTOUT_TYPE_MAX = 2
+} RastOutType;
+
+typedef enum
+{
+    MISCTYPE_TYPE_POSITION = 0,
+    MISCTYPE_TYPE_FACE = 1,
+    MISCTYPE_TYPE_MAX = 1
+} MiscTypeType;
+
+// source modifiers.
+typedef enum
+{
+    SRCMOD_NONE,
+    SRCMOD_NEGATE,
+    SRCMOD_BIAS,
+    SRCMOD_BIASNEGATE,
+    SRCMOD_SIGN,
+    SRCMOD_SIGNNEGATE,
+    SRCMOD_COMPLEMENT,
+    SRCMOD_X2,
+    SRCMOD_X2NEGATE,
+    SRCMOD_DZ,
+    SRCMOD_DW,
+    SRCMOD_ABS,
+    SRCMOD_ABSNEGATE,
+    SRCMOD_NOT,
+    SRCMOD_TOTAL
+} SourceMod;
+
+
+typedef struct
+{
+    const uint32 *token;   // this is the unmolested token in the stream.
+    int regnum;
+    int relative;
+    int writemask;   // xyzw or rgba (all four, not split out).
+    int writemask0;  // x or red
+    int writemask1;  // y or green
+    int writemask2;  // z or blue
+    int writemask3;  // w or alpha
+    int orig_writemask;   // writemask before mojoshader tweaks it.
+    int result_mod;
+    int result_shift;
+    RegisterType regtype;
+} DestArgInfo;
+
+typedef struct
+{
+    const uint32 *token;   // this is the unmolested token in the stream.
+    int regnum;
+    int swizzle;  // xyzw (all four, not split out).
+    int swizzle_x;
+    int swizzle_y;
+    int swizzle_z;
+    int swizzle_w;
+    SourceMod src_mod;
+    RegisterType regtype;
+    int relative;
+    RegisterType relative_regtype;
+    int relative_regnum;
+    int relative_component;
+    const VariableList *relative_array;
+} SourceArgInfo;
+
+static inline int scalar_register(const RegisterType regtype, const int regnum)
+{
+    switch (regtype)
+    {
+        case REG_TYPE_DEPTHOUT:
+        case REG_TYPE_CONSTBOOL:
+        case REG_TYPE_PREDICATE:
+        case REG_TYPE_LOOP:
+            return 1;
+
+        case REG_TYPE_MISCTYPE:
+            if ( ((const MiscTypeType) regnum) == MISCTYPE_TYPE_FACE )
+                return 1;
+            return 0;
+
+        default: break;
+    } // switch
+
+    return 0;
+} // scalar_register
+
 #endif  // _INCLUDE_MOJOSHADER_INTERNAL_H_
 
+
+#if MOJOSHADER_DO_INSTRUCTION_TABLE
+// These have to be in the right order! Arrays are indexed by the value
+//  of the instruction token.
+
+// INSTRUCTION_STATE means this opcode has to update the state machine
+//  (we're entering an ELSE block, etc). INSTRUCTION means there's no
+//  state, just go straight to the emitters.
+
+// !!! FIXME: Some of these MOJOSHADER_TYPE_ANYs need to have their scope
+// !!! FIXME:  reduced to just PIXEL or VERTEX.
+
+INSTRUCTION(NOP, "NOP", 1, NULL, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(MOV, "MOV", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(ADD, "ADD", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(SUB, "SUB", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(MAD, "MAD", 1, DSSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(MUL, "MUL", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(RCP, "RCP", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(RSQ, "RSQ", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(DP3, "DP3", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(DP4, "DP4", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(MIN, "MIN", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(MAX, "MAX", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(SLT, "SLT", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(SGE, "SGE", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(EXP, "EXP", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(LOG, "LOG", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(LIT, "LIT", 3, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(DST, "DST", 1, DSS, MOJOSHADER_TYPE_VERTEX)
+INSTRUCTION(LRP, "LRP", 2, DSSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(FRC, "FRC", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(M4X4, "M4X4", 4, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(M4X3, "M4X3", 3, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(M3X4, "M3X4", 4, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(M3X3, "M3X3", 3, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(M3X2, "M3X2", 2, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(CALL, "CALL", 2, S, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(CALLNZ, "CALLNZ", 3, SS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(LOOP, "LOOP", 3, SS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(RET, "RET", 1, NULL, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(ENDLOOP, "ENDLOOP", 2, NULL, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(LABEL, "LABEL", 0, S, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(DCL, "DCL", 0, DCL, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(POW, "POW", 3, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(CRS, "CRS", 2, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(SGN, "SGN", 3, DSSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(ABS, "ABS", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(NRM, "NRM", 3, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(SINCOS, "SINCOS", 8, SINCOS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(REP, "REP", 3, S, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(ENDREP, "ENDREP", 2, NULL, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(IF, "IF", 3, S, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(IFC, "IF", 3, SS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(ELSE, "ELSE", 1, NULL, MOJOSHADER_TYPE_ANY)  // !!! FIXME: state!
+INSTRUCTION(ENDIF, "ENDIF", 1, NULL, MOJOSHADER_TYPE_ANY) // !!! FIXME: state!
+INSTRUCTION_STATE(BREAK, "BREAK", 1, NULL, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(BREAKC, "BREAK", 3, SS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(MOVA, "MOVA", 1, DS, MOJOSHADER_TYPE_VERTEX)
+INSTRUCTION_STATE(DEFB, "DEFB", 0, DEFB, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(DEFI, "DEFI", 0, DEF, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION_STATE(TEXCRD, "TEXCRD", 1, TEXCRD, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION_STATE(TEXKILL, "TEXKILL", 2, D, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION_STATE(TEXLD, "TEXLD", 1, TEXLD, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXBEM, "TEXBEM", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXBEML, "TEXBEML", 2, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXREG2AR, "TEXREG2AR", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXREG2GB, "TEXREG2GB", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXM3X2PAD, "TEXM3X2PAD", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXM3X2TEX, "TEXM3X2TEX", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXM3X3PAD, "TEXM3X3PAD", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXM3X3TEX, "TEXM3X3TEX", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(RESERVED, 0, 0, NULL, MOJOSHADER_TYPE_UNKNOWN)
+INSTRUCTION(TEXM3X3SPEC, "TEXM3X3SPEC", 1, DSS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXM3X3VSPEC, "TEXM3X3VSPEC", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(EXPP, "EXPP", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(LOGP, "LOGP", 1, DS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(CND, "CND", 1, DSSS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION_STATE(DEF, "DEF", 0, DEF, MOJOSHADER_TYPE_ANY)
+INSTRUCTION(TEXREG2RGB, "TEXREG2RGB", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXDP3TEX, "TEXDP3TEX", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXM3X2DEPTH, "TEXM3X2DEPTH", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXDP3, "TEXDP3", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXM3X3, "TEXM3X3", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXDEPTH, "TEXDEPTH", 1, D, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION_STATE(CMP, "CMP", 1, DSSS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(BEM, "BEM", 2, DSS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION_STATE(DP2ADD, "DP2ADD", 2, DSSS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(DSX, "DSX", 2, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(DSY, "DSY", 2, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION(TEXLDD, "TEXLDD", 3, DSSSS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION_STATE(SETP, "SETP", 1, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(TEXLDL, "TEXLDL", 2, DSS, MOJOSHADER_TYPE_ANY)
+INSTRUCTION_STATE(BREAKP, "BREAKP", 3, S, MOJOSHADER_TYPE_ANY)
+// !!! FIXME: TEXLDB?
+#endif
+
 // end of mojoshader_internal.h ...
 
--- a/mojoshader_opengl.c	Tue Dec 02 06:24:35 2008 -0500
+++ b/mojoshader_opengl.c	Thu Dec 04 21:36:11 2008 -0500
@@ -247,16 +247,6 @@
 #endif
 
 
-// #define this to force app to supply an allocator, so there's no reference
-//  to the C runtime's malloc() and free()...
-#if MOJOSHADER_FORCE_ALLOCATOR
-#define internal_malloc NULL
-#define internal_free NULL
-#else
-static void *internal_malloc(int bytes, void *d) { return malloc(bytes); }
-static void internal_free(void *ptr, void *d) { free(ptr); }
-#endif
-
 static inline void *Malloc(const size_t len)
 {
     void *retval = ctx->malloc_fn((int) len, ctx->malloc_data);