Reorganize profiles into their own files
authorCaleb Cornett <caleb.cornett@outlook.com>
Tue, 23 Apr 2019 14:43:10 -0400
changeset 1199 b8ece252a201
parent 1198 84ae1b18fdfe
child 1200 eb1e5280a5a9
Reorganize profiles into their own files
CMakeLists.txt
mojoshader.c
profiles/mojoshader_profile.h
profiles/mojoshader_profile_arb1.c
profiles/mojoshader_profile_bytecode.c
profiles/mojoshader_profile_common.c
profiles/mojoshader_profile_d3d.c
profiles/mojoshader_profile_glsl.c
profiles/mojoshader_profile_metal.c
--- a/CMakeLists.txt	Tue Apr 23 13:28:17 2019 -0400
+++ b/CMakeLists.txt	Tue Apr 23 14:43:10 2019 -0400
@@ -154,6 +154,12 @@
     mojoshader.c
     mojoshader_common.c
     mojoshader_opengl.c
+    profiles/mojoshader_profile_arb1.c
+    profiles/mojoshader_profile_bytecode.c
+    profiles/mojoshader_profile_d3d.c
+    profiles/mojoshader_profile_glsl.c
+    profiles/mojoshader_profile_metal.c
+    profiles/mojoshader_profile_common.c
 )
 IF(EFFECT_SUPPORT)
     TARGET_SOURCES(mojoshader PRIVATE
--- a/mojoshader.c	Tue Apr 23 13:28:17 2019 -0400
+++ b/mojoshader.c	Tue Apr 23 14:43:10 2019 -0400
@@ -16,502 +16,7 @@
 // - A maximum of three temp registers can be used in a single instruction.
 
 #define __MOJOSHADER_INTERNAL__ 1
-#include "mojoshader_internal.h"
-
-typedef struct ConstantsList
-{
-    MOJOSHADER_constant constant;
-    struct ConstantsList *next;
-} ConstantsList;
-
-typedef struct VariableList
-{
-    MOJOSHADER_uniformType type;
-    int index;
-    int count;
-    ConstantsList *constant;
-    int used;
-    int emit_position;  // used in some profiles.
-    struct VariableList *next;
-} VariableList;
-
-typedef struct RegisterList
-{
-    RegisterType regtype;
-    int regnum;
-    MOJOSHADER_usage usage;
-    unsigned int index;
-    int writemask;
-    int misc;
-    int written;
-    const VariableList *array;
-    struct RegisterList *next;
-} RegisterList;
-
-typedef struct
-{
-    const uint32 *token;   // this is the unmolested token in the stream.
-    int regnum;
-    int swizzle;  // xyzw (all four, not split out).
-    int swizzle_x;
-    int swizzle_y;
-    int swizzle_z;
-    int swizzle_w;
-    SourceMod src_mod;
-    RegisterType regtype;
-    int relative;
-    RegisterType relative_regtype;
-    int relative_regnum;
-    int relative_component;
-    const VariableList *relative_array;
-} SourceArgInfo;
-
-struct Profile;  // predeclare.
-
-typedef struct CtabData
-{
-    int have_ctab;
-    int symbol_count;
-    MOJOSHADER_symbol *symbols;
-} CtabData;
-
-// Context...this is state that changes as we parse through a shader...
-typedef struct Context
-{
-    int isfail;
-    int out_of_memory;
-    MOJOSHADER_malloc malloc;
-    MOJOSHADER_free free;
-    void *malloc_data;
-    int current_position;
-    const uint32 *orig_tokens;
-    const uint32 *tokens;
-    uint32 tokencount;
-    int know_shader_size;
-    const MOJOSHADER_swizzle *swizzles;
-    unsigned int swizzles_count;
-    const MOJOSHADER_samplerMap *samplermap;
-    unsigned int samplermap_count;
-    Buffer *output;
-    Buffer *preflight;
-    Buffer *globals;
-    Buffer *inputs;
-    Buffer *outputs;
-    Buffer *helpers;
-    Buffer *subroutines;
-    Buffer *mainline_intro;
-    Buffer *mainline_arguments;
-    Buffer *mainline_top;
-    Buffer *mainline;
-    Buffer *postflight;
-    Buffer *ignore;
-    Buffer *output_stack[3];
-    int indent_stack[3];
-    int output_stack_len;
-    int indent;
-    const char *shader_type_str;
-    const char *endline;
-    const char *mainfn;
-    int endline_len;
-    int profileid;
-    const struct Profile *profile;
-    MOJOSHADER_shaderType shader_type;
-    uint8 major_ver;
-    uint8 minor_ver;
-    DestArgInfo dest_arg;
-    SourceArgInfo source_args[5];
-    SourceArgInfo predicate_arg;  // for predicated instructions.
-    uint32 dwords[4];
-    uint32 version_token;
-    int instruction_count;
-    uint32 instruction_controls;
-    uint32 previous_opcode;
-    int coissue;
-    int loops;
-    int reps;
-    int max_reps;
-    int cmps;
-    int scratch_registers;
-    int max_scratch_registers;
-    int branch_labels_stack_index;
-    int branch_labels_stack[32];
-    int assigned_branch_labels;
-    int assigned_vertex_attributes;
-    int last_address_reg_component;
-    RegisterList used_registers;
-    RegisterList defined_registers;
-    ErrorList *errors;
-    int constant_count;
-    ConstantsList *constants;
-    int uniform_count;
-    int uniform_float4_count;
-    int uniform_int4_count;
-    int uniform_bool_count;
-    RegisterList uniforms;
-    int attribute_count;
-    RegisterList attributes;
-    int sampler_count;
-    RegisterList samplers;
-    VariableList *variables;  // variables to register mapping.
-    int centroid_allowed;
-    CtabData ctab;
-    int have_relative_input_registers;
-    int have_multi_color_outputs;
-    int determined_constants_arrays;
-    int predicated;
-    int uses_pointsize;
-    int uses_fog;
-
-    // !!! FIXME: move these into SUPPORT_PROFILE sections.
-    int glsl_generated_lit_helper;
-    int glsl_generated_texldd_setup;
-    int glsl_generated_texm3x3spec_helper;
-    int arb1_wrote_position;
-    // !!! FIXME: move these into SUPPORT_PROFILE sections.
-
-    int have_preshader;
-    int ignores_ctab;
-    int reset_texmpad;
-    int texm3x2pad_dst0;
-    int texm3x2pad_src0;
-    int texm3x3pad_dst0;
-    int texm3x3pad_src0;
-    int texm3x3pad_dst1;
-    int texm3x3pad_src1;
-    MOJOSHADER_preshader *preshader;
-
-#if SUPPORT_PROFILE_ARB1_NV
-    int profile_supports_nv2;
-    int profile_supports_nv3;
-    int profile_supports_nv4;
-#endif
-#if SUPPORT_PROFILE_GLSL120
-    int profile_supports_glsl120;
-#endif
-#if SUPPORT_PROFILE_GLSLES
-    int profile_supports_glsles;
-#endif
-
-#if SUPPORT_PROFILE_METAL
-    int metal_need_header_common;
-    int metal_need_header_math;
-    int metal_need_header_relational;
-    int metal_need_header_geometric;
-    int metal_need_header_graphics;
-    int metal_need_header_texture;
-#endif
-} Context;
-
-
-// Use these macros so we can remove all bits of these profiles from the build.
-#if SUPPORT_PROFILE_ARB1_NV
-#define support_nv2(ctx) ((ctx)->profile_supports_nv2)
-#define support_nv3(ctx) ((ctx)->profile_supports_nv3)
-#define support_nv4(ctx) ((ctx)->profile_supports_nv4)
-#else
-#define support_nv2(ctx) (0)
-#define support_nv3(ctx) (0)
-#define support_nv4(ctx) (0)
-#endif
-
-#if SUPPORT_PROFILE_GLSL120
-#define support_glsl120(ctx) ((ctx)->profile_supports_glsl120)
-#else
-#define support_glsl120(ctx) (0)
-#endif
-
-#if SUPPORT_PROFILE_GLSLES
-#define support_glsles(ctx) ((ctx)->profile_supports_glsles)
-#else
-#define support_glsles(ctx) (0)
-#endif
-
-
-// Profile entry points...
-
-// one emit function for each opcode in each profile.
-typedef void (*emit_function)(Context *ctx);
-
-// one emit function for starting output in each profile.
-typedef void (*emit_start)(Context *ctx, const char *profilestr);
-
-// one emit function for ending output in each profile.
-typedef void (*emit_end)(Context *ctx);
-
-// one emit function for phase opcode output in each profile.
-typedef void (*emit_phase)(Context *ctx);
-
-// one emit function for finalizing output in each profile.
-typedef void (*emit_finalize)(Context *ctx);
-
-// one emit function for global definitions in each profile.
-typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum);
-
-// one emit function for relative uniform arrays in each profile.
-typedef void (*emit_array)(Context *ctx, VariableList *var);
-
-// one emit function for relative constants arrays in each profile.
-typedef void (*emit_const_array)(Context *ctx,
-                                 const struct ConstantsList *constslist,
-                                 int base, int size);
-
-// one emit function for uniforms in each profile.
-typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum,
-                             const VariableList *var);
-
-// one emit function for samplers in each profile.
-typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype,
-                             int texbem);
-
-// one emit function for attributes in each profile.
-typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
-                               MOJOSHADER_usage usage, int index, int wmask,
-                               int flags);
-
-// one args function for each possible sequence of opcode arguments.
-typedef int (*args_function)(Context *ctx);
-
-// one state function for each opcode where we have state machine updates.
-typedef void (*state_function)(Context *ctx);
-
-// one function for varnames in each profile.
-typedef const char *(*varname_function)(Context *c, RegisterType t, int num);
-
-// one function for const var array in each profile.
-typedef const char *(*const_array_varname_function)(Context *c, int base, int size);
-
-typedef struct Profile
-{
-    const char *name;
-    emit_start start_emitter;
-    emit_end end_emitter;
-    emit_phase phase_emitter;
-    emit_global global_emitter;
-    emit_array array_emitter;
-    emit_const_array const_array_emitter;
-    emit_uniform uniform_emitter;
-    emit_sampler sampler_emitter;
-    emit_attribute attribute_emitter;
-    emit_finalize finalize_emitter;
-    varname_function get_varname;
-    const_array_varname_function get_const_array_varname;
-} Profile;
-
-
-// !!! FIXME: cut and paste between every damned source file follows...
-// !!! FIXME: We need to make some sort of ContextBase that applies to all
-// !!! FIXME:  files and move this stuff to mojoshader_common.c ...
-
-static inline void out_of_memory(Context *ctx)
-{
-    ctx->isfail = ctx->out_of_memory = 1;
-} // out_of_memory
-
-static inline void *Malloc(Context *ctx, const size_t len)
-{
-    void *retval = ctx->malloc((int) len, ctx->malloc_data);
-    if (retval == NULL)
-        out_of_memory(ctx);
-    return retval;
-} // Malloc
-
-static inline char *StrDup(Context *ctx, const char *str)
-{
-    char *retval = (char *) Malloc(ctx, strlen(str) + 1);
-    if (retval != NULL)
-        strcpy(retval, str);
-    return retval;
-} // StrDup
-
-static inline void Free(Context *ctx, void *ptr)
-{
-    ctx->free(ptr, ctx->malloc_data);
-} // Free
-
-static void * MOJOSHADERCALL MallocBridge(int bytes, void *data)
-{
-    return Malloc((Context *) data, (size_t) bytes);
-} // MallocBridge
-
-static void MOJOSHADERCALL FreeBridge(void *ptr, void *data)
-{
-    Free((Context *) data, ptr);
-} // FreeBridge
-
-
-// jump between output sections in the context...
-
-static int set_output(Context *ctx, Buffer **section)
-{
-    // only create output sections on first use.
-    if (*section == NULL)
-    {
-        *section = buffer_create(256, MallocBridge, FreeBridge, ctx);
-        if (*section == NULL)
-            return 0;
-    } // if
-
-    ctx->output = *section;
-    return 1;
-} // set_output
-
-static void push_output(Context *ctx, Buffer **section)
-{
-    assert(ctx->output_stack_len < (int) (STATICARRAYLEN(ctx->output_stack)));
-    ctx->output_stack[ctx->output_stack_len] = ctx->output;
-    ctx->indent_stack[ctx->output_stack_len] = ctx->indent;
-    ctx->output_stack_len++;
-    if (!set_output(ctx, section))
-        return;
-    ctx->indent = 0;
-} // push_output
-
-static inline void pop_output(Context *ctx)
-{
-    assert(ctx->output_stack_len > 0);
-    ctx->output_stack_len--;
-    ctx->output = ctx->output_stack[ctx->output_stack_len];
-    ctx->indent = ctx->indent_stack[ctx->output_stack_len];
-} // pop_output
-
-
-
-// Shader model version magic...
-
-static inline uint32 ver_ui32(const uint8 major, const uint8 minor)
-{
-    return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 1 : (minor)) );
-} // version_ui32
-
-static inline int shader_version_supported(const uint8 maj, const uint8 min)
-{
-    return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR));
-} // shader_version_supported
-
-static inline int shader_version_atleast(const Context *ctx, const uint8 maj,
-                                         const uint8 min)
-{
-    return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min));
-} // shader_version_atleast
-
-static inline int shader_version_exactly(const Context *ctx, const uint8 maj,
-                                         const uint8 min)
-{
-    return ((ctx->major_ver == maj) && (ctx->minor_ver == min));
-} // shader_version_exactly
-
-static inline int shader_is_pixel(const Context *ctx)
-{
-    return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL);
-} // shader_is_pixel
-
-static inline int shader_is_vertex(const Context *ctx)
-{
-    return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX);
-} // shader_is_vertex
-
-
-static inline int isfail(const Context *ctx)
-{
-    return ctx->isfail;
-} // isfail
-
-
-static void failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
-static void failf(Context *ctx, const char *fmt, ...)
-{
-    ctx->isfail = 1;
-    if (ctx->out_of_memory)
-        return;
-
-    // no filename at this level (we pass a NULL to errorlist_add_va()...)
-    va_list ap;
-    va_start(ap, fmt);
-    errorlist_add_va(ctx->errors, NULL, ctx->current_position, fmt, ap);
-    va_end(ap);
-} // failf
-
-
-static inline void fail(Context *ctx, const char *reason)
-{
-    failf(ctx, "%s", reason);
-} // fail
-
-
-static void output_line(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
-static void output_line(Context *ctx, const char *fmt, ...)
-{
-    assert(ctx->output != NULL);
-    if (isfail(ctx))
-        return;  // we failed previously, don't go on...
-
-    const int indent = ctx->indent;
-    if (indent > 0)
-    {
-        char *indentbuf = (char *) alloca(indent);
-        memset(indentbuf, '\t', indent);
-        buffer_append(ctx->output, indentbuf, indent);
-    } // if
-
-    va_list ap;
-    va_start(ap, fmt);
-    buffer_append_va(ctx->output, fmt, ap);
-    va_end(ap);
-
-    buffer_append(ctx->output, ctx->endline, ctx->endline_len);
-} // output_line
-
-
-static inline void output_blank_line(Context *ctx)
-{
-    assert(ctx->output != NULL);
-    if (!isfail(ctx))
-        buffer_append(ctx->output, ctx->endline, ctx->endline_len);
-} // output_blank_line
-
-
-// !!! FIXME: this is sort of nasty.
-static void floatstr(Context *ctx, char *buf, size_t bufsize, float f,
-                     int leavedecimal)
-{
-    const size_t len = MOJOSHADER_printFloat(buf, bufsize, f);
-    if ((len+2) >= bufsize)
-        fail(ctx, "BUG: internal buffer is too small");
-    else
-    {
-        char *end = buf + len;
-        char *ptr = strchr(buf, '.');
-        if (ptr == NULL)
-        {
-            if (leavedecimal)
-                strcat(buf, ".0");
-            return;  // done.
-        } // if
-
-        while (--end != ptr)
-        {
-            if (*end != '0')
-            {
-                end++;
-                break;
-            } // if
-        } // while
-        if ((leavedecimal) && (end == ptr))
-            end += 2;
-        *end = '\0';  // chop extra '0' or all decimal places off.
-    } // else
-} // floatstr
-
-static inline TextureType cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type)
-{
-    return (TextureType) (((int) type) + 2);
-} // cvtMojoToD3DSamplerType
-
-static inline MOJOSHADER_samplerType cvtD3DToMojoSamplerType(const TextureType type)
-{
-    return (MOJOSHADER_samplerType) (((int) type) - 2);
-} // cvtD3DToMojoSamplerType
-
+#include "profiles/mojoshader_profile.h"
 
 // Deal with register lists...  !!! FIXME: I sort of hate this.
 
@@ -525,71 +30,6 @@
     } // while
 } // free_reglist
 
-static inline uint32 reg_to_ui32(const RegisterType regtype, const int regnum)
-{
-    return ( ((uint32) regnum) | (((uint32) regtype) << 16) );
-} // reg_to_uint32
-
-// !!! FIXME: ditch this for a hash table.
-static RegisterList *reglist_insert(Context *ctx, RegisterList *prev,
-                                    const RegisterType regtype,
-                                    const int regnum)
-{
-    const uint32 newval = reg_to_ui32(regtype, regnum);
-    RegisterList *item = prev->next;
-    while (item != NULL)
-    {
-        const uint32 val = reg_to_ui32(item->regtype, item->regnum);
-        if (newval == val)
-            return item;  // already set, so we're done.
-        else if (newval < val)  // insert it here.
-            break;
-        else // if (newval > val)
-        {
-            // keep going, we're not to the insertion point yet.
-            prev = item;
-            item = item->next;
-        } // else
-    } // while
-
-    // we need to insert an entry after (prev).
-    item = (RegisterList *) Malloc(ctx, sizeof (RegisterList));
-    if (item != NULL)
-    {
-        item->regtype = regtype;
-        item->regnum = regnum;
-        item->usage = MOJOSHADER_USAGE_UNKNOWN;
-        item->index = 0;
-        item->writemask = 0;
-        item->misc = 0;
-        item->written = 0;
-        item->array = NULL;
-        item->next = prev->next;
-        prev->next = item;
-    } // if
-
-    return item;
-} // reglist_insert
-
-static RegisterList *reglist_find(const RegisterList *prev,
-                                  const RegisterType rtype, const int regnum)
-{
-    const uint32 newval = reg_to_ui32(rtype, regnum);
-    RegisterList *item = prev->next;
-    while (item != NULL)
-    {
-        const uint32 val = reg_to_ui32(item->regtype, item->regnum);
-        if (newval == val)
-            return item;  // here it is.
-        else if (newval < val)  // should have been here if it existed.
-            return NULL;
-        else // if (newval > val)
-            item = item->next;
-    } // while
-
-    return NULL;  // wasn't in the list.
-} // reglist_find
-
 static inline const RegisterList *reglist_exists(RegisterList *prev,
                                                  const RegisterType regtype,
                                                  const int regnum)
@@ -604,33 +44,6 @@
     return (reg && reg->written);
 } // register_was_written
 
-static inline RegisterList *set_used_register(Context *ctx,
-                                              const RegisterType regtype,
-                                              const int regnum,
-                                              const int written)
-{
-    RegisterList *reg = NULL;
-    if ((regtype == REG_TYPE_COLOROUT) && (regnum > 0))
-        ctx->have_multi_color_outputs = 1;
-
-    reg = reglist_insert(ctx, &ctx->used_registers, regtype, regnum);
-    if (reg && written)
-        reg->written = 1;
-    return reg;
-} // set_used_register
-
-static inline int get_used_register(Context *ctx, const RegisterType regtype,
-                                    const int regnum)
-{
-    return (reglist_exists(&ctx->used_registers, regtype, regnum) != NULL);
-} // get_used_register
-
-static inline void set_defined_register(Context *ctx, const RegisterType rtype,
-                                        const int regnum)
-{
-    reglist_insert(ctx, &ctx->defined_registers, rtype, regnum);
-} // set_defined_register
-
 static inline int get_defined_register(Context *ctx, const RegisterType rtype,
                                        const int regnum)
 {
@@ -653,6 +66,16 @@
         ctx->uses_fog = 1;  // note that we have to check this later.
 } // add_attribute_register
 
+static inline TextureType cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type)
+{
+    return (TextureType) (((int) type) + 2);
+} // cvtMojoToD3DSamplerType
+
+static inline MOJOSHADER_samplerType cvtD3DToMojoSamplerType(const TextureType type)
+{
+    return (MOJOSHADER_samplerType) (((int) type) - 2);
+} // cvtD3DToMojoSamplerType
+
 static inline void add_sampler(Context *ctx, const int regnum,
                                TextureType ttype, const int texbem)
 {
@@ -679,80 +102,6 @@
     item->misc |= texbem;
 } // add_sampler
 
-
-static inline int writemask_xyzw(const int writemask)
-{
-    return (writemask == 0xF);  // 0xF == 1111. No explicit mask (full!).
-} // writemask_xyzw
-
-
-static inline int writemask_xyz(const int writemask)
-{
-    return (writemask == 0x7);  // 0x7 == 0111. (that is: xyz)
-} // writemask_xyz
-
-
-static inline int writemask_xy(const int writemask)
-{
-    return (writemask == 0x3);  // 0x3 == 0011. (that is: xy)
-} // writemask_xy
-
-
-static inline int writemask_x(const int writemask)
-{
-    return (writemask == 0x1);  // 0x1 == 0001. (that is: x)
-} // writemask_x
-
-
-static inline int writemask_y(const int writemask)
-{
-    return (writemask == 0x2);  // 0x1 == 0010. (that is: y)
-} // writemask_y
-
-
-static inline int replicate_swizzle(const int swizzle)
-{
-    return ( (((swizzle >> 0) & 0x3) == ((swizzle >> 2) & 0x3)) &&
-             (((swizzle >> 2) & 0x3) == ((swizzle >> 4) & 0x3)) &&
-             (((swizzle >> 4) & 0x3) == ((swizzle >> 6) & 0x3)) );
-} // replicate_swizzle
-
-
-static inline int no_swizzle(const int swizzle)
-{
-    return (swizzle == 0xE4);  // 0xE4 == 11100100 ... 0 1 2 3. No swizzle.
-} // no_swizzle
-
-
-static inline int vecsize_from_writemask(const int m)
-{
-    return (m & 1) + ((m >> 1) & 1) + ((m >> 2) & 1) + ((m >> 3) & 1);
-} // vecsize_from_writemask
-
-
-static inline void set_dstarg_writemask(DestArgInfo *dst, const int mask)
-{
-    dst->writemask = mask;
-    dst->writemask0 = ((mask >> 0) & 1);
-    dst->writemask1 = ((mask >> 1) & 1);
-    dst->writemask2 = ((mask >> 2) & 1);
-    dst->writemask3 = ((mask >> 3) & 1);
-} // set_dstarg_writemask
-
-
-static int allocate_scratch_register(Context *ctx)
-{
-    const int retval = ctx->scratch_registers++;
-    if (retval >= ctx->max_scratch_registers)
-        ctx->max_scratch_registers = retval + 1;
-    return retval;
-} // allocate_scratch_register
-
-static int allocate_branch_label(Context *ctx)
-{
-    return ctx->assigned_branch_labels++;
-} // allocate_branch_label
-
 static inline void adjust_token_position(Context *ctx, const int incr)
 {
     ctx->tokens += incr;
@@ -760,991 +109,132 @@
     ctx->current_position += incr * sizeof (uint32);
 } // adjust_token_position
 
-
-// D3D stuff that's used in more than just the d3d profile...
-
-static int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type,
-                    const RegisterType rtype, const int rnum)
-{
-    const int uses_psize = ctx->uses_pointsize;
-    const int uses_fog = ctx->uses_fog;
-    if ( (rtype == REG_TYPE_OUTPUT) && ((uses_psize) || (uses_fog)) )
-    {
-        const RegisterList *reg = reglist_find(&ctx->attributes, rtype, rnum);
-        if (reg != NULL)
-        {
-            const MOJOSHADER_usage usage = reg->usage;
-            return ( (uses_psize && (usage == MOJOSHADER_USAGE_POINTSIZE)) ||
-                     (uses_fog && (usage == MOJOSHADER_USAGE_FOG)) );
-        } // if
-    } // if
-
-    return scalar_register(shader_type, rtype, rnum);
-} // isscalar
-
-static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' };
-
-
-static const char *usagestrs[] = {
-    "_position", "_blendweight", "_blendindices", "_normal", "_psize",
-    "_texcoord", "_tangent", "_binormal", "_tessfactor", "_positiont",
-    "_color", "_fog", "_depth", "_sample"
-};
-
-static const char *get_D3D_register_string(Context *ctx,
-                                           RegisterType regtype,
-                                           int regnum, char *regnum_str,
-                                           size_t regnum_size)
-{
-    const char *retval = NULL;
-    int has_number = 1;
-
-    switch (regtype)
-    {
-        case REG_TYPE_TEMP:
-            retval = "r";
-            break;
-
-        case REG_TYPE_INPUT:
-            retval = "v";
-            break;
-
-        case REG_TYPE_CONST:
-            retval = "c";
-            break;
-
-        case REG_TYPE_ADDRESS:  // (or REG_TYPE_TEXTURE, same value.)
-            retval = shader_is_vertex(ctx) ? "a" : "t";
-            break;
-
-        case REG_TYPE_RASTOUT:
-            switch ((RastOutType) regnum)
-            {
-                case RASTOUT_TYPE_POSITION: retval = "oPos"; break;
-                case RASTOUT_TYPE_FOG: retval = "oFog"; break;
-                case RASTOUT_TYPE_POINT_SIZE: retval = "oPts"; break;
-            } // switch
-            has_number = 0;
-            break;
-
-        case REG_TYPE_ATTROUT:
-            retval = "oD";
-            break;
-
-        case REG_TYPE_OUTPUT: // (or REG_TYPE_TEXCRDOUT, same value.)
-            if (shader_is_vertex(ctx) && shader_version_atleast(ctx, 3, 0))
-                retval = "o";
-            else
-                retval = "oT";
-            break;
-
-        case REG_TYPE_CONSTINT:
-            retval = "i";
-            break;
-
-        case REG_TYPE_COLOROUT:
-            retval = "oC";
-            break;
-
-        case REG_TYPE_DEPTHOUT:
-            retval = "oDepth";
-            has_number = 0;
-            break;
-
-        case REG_TYPE_SAMPLER:
-            retval = "s";
-            break;
-
-        case REG_TYPE_CONSTBOOL:
-            retval = "b";
-            break;
-
-        case REG_TYPE_LOOP:
-            retval = "aL";
-            has_number = 0;
-            break;
-
-        case REG_TYPE_MISCTYPE:
-            switch ((const MiscTypeType) regnum)
-            {
-                case MISCTYPE_TYPE_POSITION: retval = "vPos"; break;
-                case MISCTYPE_TYPE_FACE: retval = "vFace"; break;
-            } // switch
-            has_number = 0;
-            break;
-
-        case REG_TYPE_LABEL:
-            retval = "l";
-            break;
-
-        case REG_TYPE_PREDICATE:
-            retval = "p";
-            break;
-
-        //case REG_TYPE_TEMPFLOAT16:  // !!! FIXME: don't know this asm string
-        default:
-            fail(ctx, "unknown register type");
-            retval = "???";
-            has_number = 0;
-            break;
-    } // switch
-
-    if (has_number)
-        snprintf(regnum_str, regnum_size, "%u", (uint) regnum);
-    else
-        regnum_str[0] = '\0';
-
-    return retval;
-} // get_D3D_register_string
-
-
-// !!! FIXME: can we split the profile code out to separate source files?
+// Generate emitter declarations for each profile with this macro...
+
+#define PREDECLARE_PROFILE(prof) \
+    void emit_##prof##_start(Context *ctx, const char *profilestr); \
+    void emit_##prof##_end(Context *ctx); \
+    void emit_##prof##_phase(Context *ctx); \
+    void emit_##prof##_finalize(Context *ctx); \
+    void emit_##prof##_global(Context *ctx, RegisterType regtype, int regnum);\
+    void emit_##prof##_array(Context *ctx, VariableList *var); \
+    void emit_##prof##_const_array(Context *ctx, const ConstantsList *clist, \
+                                   int base, int size); \
+    void emit_##prof##_uniform(Context *ctx, RegisterType regtype, int regnum,\
+                               const VariableList *var); \
+    void emit_##prof##_sampler(Context *ctx, int stage, TextureType ttype, \
+                               int tb); \
+    void emit_##prof##_attribute(Context *ctx, RegisterType regtype, \
+                                 int regnum, MOJOSHADER_usage usage, \
+                                 int index, int wmask, int flags); \
+    void emit_##prof##_NOP(Context *ctx); \
+    void emit_##prof##_MOV(Context *ctx); \
+    void emit_##prof##_ADD(Context *ctx); \
+    void emit_##prof##_SUB(Context *ctx); \
+    void emit_##prof##_MAD(Context *ctx); \
+    void emit_##prof##_MUL(Context *ctx); \
+    void emit_##prof##_RCP(Context *ctx); \
+    void emit_##prof##_RSQ(Context *ctx); \
+    void emit_##prof##_DP3(Context *ctx); \
+    void emit_##prof##_DP4(Context *ctx); \
+    void emit_##prof##_MIN(Context *ctx); \
+    void emit_##prof##_MAX(Context *ctx); \
+    void emit_##prof##_SLT(Context *ctx); \
+    void emit_##prof##_SGE(Context *ctx); \
+    void emit_##prof##_EXP(Context *ctx); \
+    void emit_##prof##_LOG(Context *ctx); \
+    void emit_##prof##_LIT(Context *ctx); \
+    void emit_##prof##_DST(Context *ctx); \
+    void emit_##prof##_LRP(Context *ctx); \
+    void emit_##prof##_FRC(Context *ctx); \
+    void emit_##prof##_M4X4(Context *ctx); \
+    void emit_##prof##_M4X3(Context *ctx); \
+    void emit_##prof##_M3X4(Context *ctx); \
+    void emit_##prof##_M3X3(Context *ctx); \
+    void emit_##prof##_M3X2(Context *ctx); \
+    void emit_##prof##_CALL(Context *ctx); \
+    void emit_##prof##_CALLNZ(Context *ctx); \
+    void emit_##prof##_LOOP(Context *ctx); \
+    void emit_##prof##_ENDLOOP(Context *ctx); \
+    void emit_##prof##_LABEL(Context *ctx); \
+    void emit_##prof##_DCL(Context *ctx); \
+    void emit_##prof##_POW(Context *ctx); \
+    void emit_##prof##_CRS(Context *ctx); \
+    void emit_##prof##_SGN(Context *ctx); \
+    void emit_##prof##_ABS(Context *ctx); \
+    void emit_##prof##_NRM(Context *ctx); \
+    void emit_##prof##_SINCOS(Context *ctx); \
+    void emit_##prof##_REP(Context *ctx); \
+    void emit_##prof##_ENDREP(Context *ctx); \
+    void emit_##prof##_IF(Context *ctx); \
+    void emit_##prof##_IFC(Context *ctx); \
+    void emit_##prof##_ELSE(Context *ctx); \
+    void emit_##prof##_ENDIF(Context *ctx); \
+    void emit_##prof##_BREAK(Context *ctx); \
+    void emit_##prof##_BREAKC(Context *ctx); \
+    void emit_##prof##_MOVA(Context *ctx); \
+    void emit_##prof##_DEFB(Context *ctx); \
+    void emit_##prof##_DEFI(Context *ctx); \
+    void emit_##prof##_TEXCRD(Context *ctx); \
+    void emit_##prof##_TEXKILL(Context *ctx); \
+    void emit_##prof##_TEXLD(Context *ctx); \
+    void emit_##prof##_TEXBEM(Context *ctx); \
+    void emit_##prof##_TEXBEML(Context *ctx); \
+    void emit_##prof##_TEXREG2AR(Context *ctx); \
+    void emit_##prof##_TEXREG2GB(Context *ctx); \
+    void emit_##prof##_TEXM3X2PAD(Context *ctx); \
+    void emit_##prof##_TEXM3X2TEX(Context *ctx); \
+    void emit_##prof##_TEXM3X3PAD(Context *ctx); \
+    void emit_##prof##_TEXM3X3TEX(Context *ctx); \
+    void emit_##prof##_TEXM3X3SPEC(Context *ctx); \
+    void emit_##prof##_TEXM3X3VSPEC(Context *ctx); \
+    void emit_##prof##_EXPP(Context *ctx); \
+    void emit_##prof##_LOGP(Context *ctx); \
+    void emit_##prof##_CND(Context *ctx); \
+    void emit_##prof##_DEF(Context *ctx); \
+    void emit_##prof##_TEXREG2RGB(Context *ctx); \
+    void emit_##prof##_TEXDP3TEX(Context *ctx); \
+    void emit_##prof##_TEXM3X2DEPTH(Context *ctx); \
+    void emit_##prof##_TEXDP3(Context *ctx); \
+    void emit_##prof##_TEXM3X3(Context *ctx); \
+    void emit_##prof##_TEXDEPTH(Context *ctx); \
+    void emit_##prof##_CMP(Context *ctx); \
+    void emit_##prof##_BEM(Context *ctx); \
+    void emit_##prof##_DP2ADD(Context *ctx); \
+    void emit_##prof##_DSX(Context *ctx); \
+    void emit_##prof##_DSY(Context *ctx); \
+    void emit_##prof##_TEXLDD(Context *ctx); \
+    void emit_##prof##_SETP(Context *ctx); \
+    void emit_##prof##_TEXLDL(Context *ctx); \
+    void emit_##prof##_BREAKP(Context *ctx); \
+    void emit_##prof##_RESERVED(Context *ctx); \
+    void emit_##prof##_RET(Context *ctx); \
+    const char *get_##prof##_varname(Context *ctx, RegisterType rt, \
+                                     int regnum); \
+    const char *get_##prof##_const_array_varname(Context *ctx, \
+                                                 int base, int size);
+
+// Check for profile support...
 
 #define AT_LEAST_ONE_PROFILE 0
 
+#if !SUPPORT_PROFILE_BYTECODE
+#define PROFILE_EMITTER_BYTECODE(op)
+#else
+#undef AT_LEAST_ONE_PROFILE
+#define AT_LEAST_ONE_PROFILE 1
+#define PROFILE_EMITTER_BYTECODE(op) emit_BYTECODE_##op,
+PREDECLARE_PROFILE(BYTECODE)
+#endif
+
 #if !SUPPORT_PROFILE_D3D
 #define PROFILE_EMITTER_D3D(op)
 #else
 #undef AT_LEAST_ONE_PROFILE
 #define AT_LEAST_ONE_PROFILE 1
 #define PROFILE_EMITTER_D3D(op) emit_D3D_##op,
-
-static const char *make_D3D_srcarg_string_in_buf(Context *ctx,
-                                                 const SourceArgInfo *arg,
-                                                 char *buf, size_t buflen)
-{
-    const char *premod_str = "";
-    const char *postmod_str = "";
-    switch (arg->src_mod)
-    {
-        case SRCMOD_NEGATE:
-            premod_str = "-";
-            break;
-
-        case SRCMOD_BIASNEGATE:
-            premod_str = "-";
-            // fall through.
-        case SRCMOD_BIAS:
-            postmod_str = "_bias";
-            break;
-
-        case SRCMOD_SIGNNEGATE:
-            premod_str = "-";
-            // fall through.
-        case SRCMOD_SIGN:
-            postmod_str = "_bx2";
-            break;
-
-        case SRCMOD_COMPLEMENT:
-            premod_str = "1-";
-            break;
-
-        case SRCMOD_X2NEGATE:
-            premod_str = "-";
-            // fall through.
-        case SRCMOD_X2:
-            postmod_str = "_x2";
-            break;
-
-        case SRCMOD_DZ:
-            postmod_str = "_dz";
-            break;
-
-        case SRCMOD_DW:
-            postmod_str = "_dw";
-            break;
-
-        case SRCMOD_ABSNEGATE:
-            premod_str = "-";
-            // fall through.
-        case SRCMOD_ABS:
-            postmod_str = "_abs";
-            break;
-
-        case SRCMOD_NOT:
-            premod_str = "!";
-            break;
-
-        case SRCMOD_NONE:
-        case SRCMOD_TOTAL:
-             break;  // stop compiler whining.
-    } // switch
-
-
-    char regnum_str[16];
-    const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
-                                                      arg->regnum, regnum_str,
-                                                      sizeof (regnum_str));
-
-    if (regtype_str == NULL)
-    {
-        fail(ctx, "Unknown source register type.");
-        *buf = '\0';
-        return buf;
-    } // if
-
-    const char *rel_lbracket = "";
-    const char *rel_rbracket = "";
-    char rel_swizzle[4] = { '\0' };
-    char rel_regnum_str[16] = { '\0' };
-    const char *rel_regtype_str = "";
-    if (arg->relative)
-    {
-        if (arg->relative_regtype == REG_TYPE_LOOP)
-        {
-            rel_swizzle[0] = '\0';
-            rel_swizzle[1] = '\0';
-            rel_swizzle[2] = '\0';
-        } // if
-        else
-        {
-            rel_swizzle[0] = '.';
-            rel_swizzle[1] = swizzle_channels[arg->relative_component];
-            rel_swizzle[2] = '\0';
-        } // else
-
-        rel_lbracket = "[";
-        rel_rbracket = "]";
-        rel_regtype_str = get_D3D_register_string(ctx, arg->relative_regtype,
-                                                  arg->relative_regnum,
-                                                  rel_regnum_str,
-                                                  sizeof (rel_regnum_str));
-
-        if (regtype_str == NULL)
-        {
-            fail(ctx, "Unknown relative source register type.");
-            *buf = '\0';
-            return buf;
-        } // if
-    } // if
-
-    char swizzle_str[6];
-    size_t i = 0;
-    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
-    if (!scalar && !no_swizzle(arg->swizzle))
-    {
-        swizzle_str[i++] = '.';
-        swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
-        swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
-        swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
-        swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
-
-        // .xyzz is the same as .xyz, .z is the same as .zzzz, etc.
-        while (swizzle_str[i-1] == swizzle_str[i-2])
-            i--;
-    } // if
-    swizzle_str[i] = '\0';
-    assert(i < sizeof (swizzle_str));
-
-    // !!! FIXME: c12[a0.x] actually needs to be c[a0.x + 12]
-    snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s",
-             premod_str, regtype_str, regnum_str, postmod_str,
-             rel_lbracket, rel_regtype_str, rel_regnum_str, rel_swizzle,
-             rel_rbracket, swizzle_str);
-    // !!! FIXME: make sure the scratch buffer was large enough.
-    return buf;
-} // make_D3D_srcarg_string_in_buf
-
-
-static const char *make_D3D_destarg_string(Context *ctx, char *buf,
-                                           const size_t buflen)
-{
-    const DestArgInfo *arg = &ctx->dest_arg;
-
-    const char *result_shift_str = "";
-    switch (arg->result_shift)
-    {
-        case 0x1: result_shift_str = "_x2"; break;
-        case 0x2: result_shift_str = "_x4"; break;
-        case 0x3: result_shift_str = "_x8"; break;
-        case 0xD: result_shift_str = "_d8"; break;
-        case 0xE: result_shift_str = "_d4"; break;
-        case 0xF: result_shift_str = "_d2"; break;
-    } // switch
-
-    const char *sat_str = (arg->result_mod & MOD_SATURATE) ? "_sat" : "";
-    const char *pp_str = (arg->result_mod & MOD_PP) ? "_pp" : "";
-    const char *cent_str = (arg->result_mod & MOD_CENTROID) ? "_centroid" : "";
-
-    char regnum_str[16];
-    const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
-                                                      arg->regnum, regnum_str,
-                                                      sizeof (regnum_str));
-    if (regtype_str == NULL)
-    {
-        fail(ctx, "Unknown destination register type.");
-        *buf = '\0';
-        return buf;
-    } // if
-
-    char writemask_str[6];
-    size_t i = 0;
-    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
-    if (!scalar && !writemask_xyzw(arg->writemask))
-    {
-        writemask_str[i++] = '.';
-        if (arg->writemask0) writemask_str[i++] = 'x';
-        if (arg->writemask1) writemask_str[i++] = 'y';
-        if (arg->writemask2) writemask_str[i++] = 'z';
-        if (arg->writemask3) writemask_str[i++] = 'w';
-    } // if
-    writemask_str[i] = '\0';
-    assert(i < sizeof (writemask_str));
-
-    const char *pred_left = "";
-    const char *pred_right = "";
-    char pred[32] = { '\0' };
-    if (ctx->predicated)
-    {
-        pred_left = "(";
-        pred_right = ") ";
-        make_D3D_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
-                                      pred, sizeof (pred));
-    } // if
-
-    // may turn out something like "_x2_sat_pp_centroid (!p0.x) r0.xyzw" ...
-    snprintf(buf, buflen, "%s%s%s%s %s%s%s%s%s%s",
-             result_shift_str, sat_str, pp_str, cent_str,
-             pred_left, pred, pred_right,
-             regtype_str, regnum_str, writemask_str);
-    // !!! FIXME: make sure the scratch buffer was large enough.
-    return buf;
-} // make_D3D_destarg_string
-
-
-static const char *make_D3D_srcarg_string(Context *ctx, const size_t idx,
-                                          char *buf, size_t buflen)
-{
-    if (idx >= STATICARRAYLEN(ctx->source_args))
-    {
-        fail(ctx, "Too many source args");
-        *buf = '\0';
-        return buf;
-    } // if
-
-    const SourceArgInfo *arg = &ctx->source_args[idx];
-    return make_D3D_srcarg_string_in_buf(ctx, arg, buf, buflen);
-} // make_D3D_srcarg_string
-
-static const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt,
-                                           int regnum, char *buf,
-                                           const size_t len)
-{
-    char regnum_str[16];
-    const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
-                                              regnum_str, sizeof (regnum_str));
-    snprintf(buf,len,"%s%s", regtype_str, regnum_str);
-    return buf;
-} // get_D3D_varname_in_buf
-
-
-static const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum)
-{
-    char buf[64];
-    get_D3D_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
-    return StrDup(ctx, buf);
-} // get_D3D_varname
-
-
-static const char *get_D3D_const_array_varname(Context *ctx, int base, int size)
-{
-    char buf[64];
-    snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
-    return StrDup(ctx, buf);
-} // get_D3D_const_array_varname
-
-
-static void emit_D3D_start(Context *ctx, const char *profilestr)
-{
-    const uint major = (uint) ctx->major_ver;
-    const uint minor = (uint) ctx->minor_ver;
-    char minor_str[16];
-
-    ctx->ignores_ctab = 1;
-
-    if (minor == 0xFF)
-        strcpy(minor_str, "sw");
-    else if ((major > 1) && (minor == 1))
-        strcpy(minor_str, "x");  // for >= SM2, apparently this is "x". Weird.
-    else
-        snprintf(minor_str, sizeof (minor_str), "%u", (uint) minor);
-
-    output_line(ctx, "%s_%u_%s", ctx->shader_type_str, major, minor_str);
-} // emit_D3D_start
-
-
-static void emit_D3D_end(Context *ctx)
-{
-    output_line(ctx, "end");
-} // emit_D3D_end
-
-
-static void emit_D3D_phase(Context *ctx)
-{
-    output_line(ctx, "phase");
-} // emit_D3D_phase
-
-
-static void emit_D3D_finalize(Context *ctx)
-{
-    // no-op.
-} // emit_D3D_finalize
-
-
-static void emit_D3D_global(Context *ctx, RegisterType regtype, int regnum)
-{
-    // no-op.
-} // emit_D3D_global
-
-
-static void emit_D3D_array(Context *ctx, VariableList *var)
-{
-    // no-op.
-} // emit_D3D_array
-
-
-static void emit_D3D_const_array(Context *ctx, const ConstantsList *clist,
-                                 int base, int size)
-{
-    // no-op.
-} // emit_D3D_const_array
-
-
-static void emit_D3D_uniform(Context *ctx, RegisterType regtype, int regnum,
-                             const VariableList *var)
-{
-    // no-op.
-} // emit_D3D_uniform
-
-
-static void emit_D3D_sampler(Context *ctx, int s, TextureType ttype, int tb)
-{
-    // no-op.
-} // emit_D3D_sampler
-
-
-static void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum,
-                               MOJOSHADER_usage usage, int index, int wmask,
-                               int flags)
-{
-    // no-op.
-} // emit_D3D_attribute
-
-
-static void emit_D3D_RESERVED(Context *ctx)
-{
-    // do nothing; fails in the state machine.
-} // emit_D3D_RESERVED
-
-
-// Generic D3D opcode emitters. A list of macros generate all the entry points
-//  that call into these...
-
-static char *lowercase(char *dst, const char *src)
-{
-    int i = 0;
-    do
-    {
-        const char ch = src[i];
-        dst[i] = (((ch >= 'A') && (ch <= 'Z')) ? (ch - ('A' - 'a')) : ch);
-    } while (src[i++]);
-    return dst;
-} // lowercase
-
-
-static void emit_D3D_opcode_d(Context *ctx, const char *opcode)
-{
-    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
-    output_line(ctx, "%s%s%s", ctx->coissue ? "+" : "", opcode, dst);
-} // emit_D3D_opcode_d
-
-
-static void emit_D3D_opcode_s(Context *ctx, const char *opcode)
-{
-    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
-    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
-    output_line(ctx, "%s%s %s", ctx->coissue ? "+" : "", opcode, src0);
-} // emit_D3D_opcode_s
-
-
-static void emit_D3D_opcode_ss(Context *ctx, const char *opcode)
-{
-    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
-    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
-    output_line(ctx, "%s%s %s, %s", ctx->coissue ? "+" : "", opcode, src0, src1);
-} // emit_D3D_opcode_ss
-
-
-static void emit_D3D_opcode_ds(Context *ctx, const char *opcode)
-{
-    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
-    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
-    output_line(ctx, "%s%s%s, %s", ctx->coissue ? "+" : "", opcode, dst, src0);
-} // emit_D3D_opcode_ds
-
-
-static void emit_D3D_opcode_dss(Context *ctx, const char *opcode)
-{
-    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
-    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
-    output_line(ctx, "%s%s%s, %s, %s", ctx->coissue ? "+" : "",
-                opcode, dst, src0, src1);
-} // emit_D3D_opcode_dss
-
-
-static void emit_D3D_opcode_dsss(Context *ctx, const char *opcode)
-{
-    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
-    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
-    output_line(ctx, "%s%s%s, %s, %s, %s", ctx->coissue ? "+" : "", 
-                opcode, dst, src0, src1, src2);
-} // emit_D3D_opcode_dsss
-
-
-static void emit_D3D_opcode_dssss(Context *ctx, const char *opcode)
-{
-    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
-    char src3[64]; make_D3D_srcarg_string(ctx, 3, src3, sizeof (src3));
-    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
-    output_line(ctx,"%s%s%s, %s, %s, %s, %s", ctx->coissue ? "+" : "",
-                opcode, dst, src0, src1, src2, src3);
-} // emit_D3D_opcode_dssss
-
-
-static void emit_D3D_opcode(Context *ctx, const char *opcode)
-{
-    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
-    output_line(ctx, "%s%s", ctx->coissue ? "+" : "", opcode);
-} // emit_D3D_opcode
-
-
-#define EMIT_D3D_OPCODE_FUNC(op) \
-    static void emit_D3D_##op(Context *ctx) { \
-        emit_D3D_opcode(ctx, #op); \
-    }
-#define EMIT_D3D_OPCODE_D_FUNC(op) \
-    static void emit_D3D_##op(Context *ctx) { \
-        emit_D3D_opcode_d(ctx, #op); \
-    }
-#define EMIT_D3D_OPCODE_S_FUNC(op) \
-    static void emit_D3D_##op(Context *ctx) { \
-        emit_D3D_opcode_s(ctx, #op); \
-    }
-#define EMIT_D3D_OPCODE_SS_FUNC(op) \
-    static void emit_D3D_##op(Context *ctx) { \
-        emit_D3D_opcode_ss(ctx, #op); \
-    }
-#define EMIT_D3D_OPCODE_DS_FUNC(op) \
-    static void emit_D3D_##op(Context *ctx) { \
-        emit_D3D_opcode_ds(ctx, #op); \
-    }
-#define EMIT_D3D_OPCODE_DSS_FUNC(op) \
-    static void emit_D3D_##op(Context *ctx) { \
-        emit_D3D_opcode_dss(ctx, #op); \
-    }
-#define EMIT_D3D_OPCODE_DSSS_FUNC(op) \
-    static void emit_D3D_##op(Context *ctx) { \
-        emit_D3D_opcode_dsss(ctx, #op); \
-    }
-#define EMIT_D3D_OPCODE_DSSSS_FUNC(op) \
-    static void emit_D3D_##op(Context *ctx) { \
-        emit_D3D_opcode_dssss(ctx, #op); \
-    }
-
-EMIT_D3D_OPCODE_FUNC(NOP)
-EMIT_D3D_OPCODE_DS_FUNC(MOV)
-EMIT_D3D_OPCODE_DSS_FUNC(ADD)
-EMIT_D3D_OPCODE_DSS_FUNC(SUB)
-EMIT_D3D_OPCODE_DSSS_FUNC(MAD)
-EMIT_D3D_OPCODE_DSS_FUNC(MUL)
-EMIT_D3D_OPCODE_DS_FUNC(RCP)
-EMIT_D3D_OPCODE_DS_FUNC(RSQ)
-EMIT_D3D_OPCODE_DSS_FUNC(DP3)
-EMIT_D3D_OPCODE_DSS_FUNC(DP4)
-EMIT_D3D_OPCODE_DSS_FUNC(MIN)
-EMIT_D3D_OPCODE_DSS_FUNC(MAX)
-EMIT_D3D_OPCODE_DSS_FUNC(SLT)
-EMIT_D3D_OPCODE_DSS_FUNC(SGE)
-EMIT_D3D_OPCODE_DS_FUNC(EXP)
-EMIT_D3D_OPCODE_DS_FUNC(LOG)
-EMIT_D3D_OPCODE_DS_FUNC(LIT)
-EMIT_D3D_OPCODE_DSS_FUNC(DST)
-EMIT_D3D_OPCODE_DSSS_FUNC(LRP)
-EMIT_D3D_OPCODE_DS_FUNC(FRC)
-EMIT_D3D_OPCODE_DSS_FUNC(M4X4)
-EMIT_D3D_OPCODE_DSS_FUNC(M4X3)
-EMIT_D3D_OPCODE_DSS_FUNC(M3X4)
-EMIT_D3D_OPCODE_DSS_FUNC(M3X3)
-EMIT_D3D_OPCODE_DSS_FUNC(M3X2)
-EMIT_D3D_OPCODE_S_FUNC(CALL)
-EMIT_D3D_OPCODE_SS_FUNC(CALLNZ)
-EMIT_D3D_OPCODE_SS_FUNC(LOOP)
-EMIT_D3D_OPCODE_FUNC(RET)
-EMIT_D3D_OPCODE_FUNC(ENDLOOP)
-EMIT_D3D_OPCODE_S_FUNC(LABEL)
-EMIT_D3D_OPCODE_DSS_FUNC(POW)
-EMIT_D3D_OPCODE_DSS_FUNC(CRS)
-EMIT_D3D_OPCODE_DSSS_FUNC(SGN)
-EMIT_D3D_OPCODE_DS_FUNC(ABS)
-EMIT_D3D_OPCODE_DS_FUNC(NRM)
-EMIT_D3D_OPCODE_S_FUNC(REP)
-EMIT_D3D_OPCODE_FUNC(ENDREP)
-EMIT_D3D_OPCODE_S_FUNC(IF)
-EMIT_D3D_OPCODE_FUNC(ELSE)
-EMIT_D3D_OPCODE_FUNC(ENDIF)
-EMIT_D3D_OPCODE_FUNC(BREAK)
-EMIT_D3D_OPCODE_DS_FUNC(MOVA)
-EMIT_D3D_OPCODE_D_FUNC(TEXKILL)
-EMIT_D3D_OPCODE_DS_FUNC(TEXBEM)
-EMIT_D3D_OPCODE_DS_FUNC(TEXBEML)
-EMIT_D3D_OPCODE_DS_FUNC(TEXREG2AR)
-EMIT_D3D_OPCODE_DS_FUNC(TEXREG2GB)
-EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2PAD)
-EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2TEX)
-EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3PAD)
-EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3TEX)
-EMIT_D3D_OPCODE_DSS_FUNC(TEXM3X3SPEC)
-EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3VSPEC)
-EMIT_D3D_OPCODE_DS_FUNC(EXPP)
-EMIT_D3D_OPCODE_DS_FUNC(LOGP)
-EMIT_D3D_OPCODE_DSSS_FUNC(CND)
-EMIT_D3D_OPCODE_DS_FUNC(TEXREG2RGB)
-EMIT_D3D_OPCODE_DS_FUNC(TEXDP3TEX)
-EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2DEPTH)
-EMIT_D3D_OPCODE_DS_FUNC(TEXDP3)
-EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3)
-EMIT_D3D_OPCODE_D_FUNC(TEXDEPTH)
-EMIT_D3D_OPCODE_DSSS_FUNC(CMP)
-EMIT_D3D_OPCODE_DSS_FUNC(BEM)
-EMIT_D3D_OPCODE_DSSS_FUNC(DP2ADD)
-EMIT_D3D_OPCODE_DS_FUNC(DSX)
-EMIT_D3D_OPCODE_DS_FUNC(DSY)
-EMIT_D3D_OPCODE_DSSSS_FUNC(TEXLDD)
-EMIT_D3D_OPCODE_DSS_FUNC(TEXLDL)
-EMIT_D3D_OPCODE_S_FUNC(BREAKP)
-
-// special cases for comparison opcodes...
-static const char *get_D3D_comparison_string(Context *ctx)
-{
-    static const char *comps[] = {
-        "", "_gt", "_eq", "_ge", "_lt", "_ne", "_le"
-    };
-
-    if (ctx->instruction_controls >= STATICARRAYLEN(comps))
-    {
-        fail(ctx, "unknown comparison control");
-        return "";
-    } // if
-
-    return comps[ctx->instruction_controls];
-} // get_D3D_comparison_string
-
-static void emit_D3D_BREAKC(Context *ctx)
-{
-    char op[16];
-    snprintf(op, sizeof (op), "break%s", get_D3D_comparison_string(ctx));
-    emit_D3D_opcode_ss(ctx, op);
-} // emit_D3D_BREAKC
-
-static void emit_D3D_IFC(Context *ctx)
-{
-    char op[16];
-    snprintf(op, sizeof (op), "if%s", get_D3D_comparison_string(ctx));
-    emit_D3D_opcode_ss(ctx, op);
-} // emit_D3D_IFC
-
-static void emit_D3D_SETP(Context *ctx)
-{
-    char op[16];
-    snprintf(op, sizeof (op), "setp%s", get_D3D_comparison_string(ctx));
-    emit_D3D_opcode_dss(ctx, op);
-} // emit_D3D_SETP
-
-static void emit_D3D_DEF(Context *ctx)
-{
-    char dst[64];
-    make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
-    char val0[32];
-    char val1[32];
-    char val2[32];
-    char val3[32];
-    floatstr(ctx, val0, sizeof (val0), val[0], 0);
-    floatstr(ctx, val1, sizeof (val1), val[1], 0);
-    floatstr(ctx, val2, sizeof (val2), val[2], 0);
-    floatstr(ctx, val3, sizeof (val3), val[3], 0);
-    output_line(ctx, "def%s, %s, %s, %s, %s", dst, val0, val1, val2, val3);
-} // emit_D3D_DEF
-
-static void emit_D3D_DEFI(Context *ctx)
-{
-    char dst[64];
-    make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    const int32 *x = (const int32 *) ctx->dwords;
-    output_line(ctx, "defi%s, %d, %d, %d, %d", dst,
-                (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
-} // emit_D3D_DEFI
-
-static void emit_D3D_DEFB(Context *ctx)
-{
-    char dst[64];
-    make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    output_line(ctx, "defb%s, %s", dst, ctx->dwords[0] ? "true" : "false");
-} // emit_D3D_DEFB
-
-
-static void emit_D3D_DCL(Context *ctx)
-{
-    char dst[64];
-    make_D3D_destarg_string(ctx, dst, sizeof (dst));
-    const DestArgInfo *arg = &ctx->dest_arg;
-    const char *usage_str = "";
-    char index_str[16] = { '\0' };
-
-    if (arg->regtype == REG_TYPE_SAMPLER)
-    {
-        switch ((const TextureType) ctx->dwords[0])
-        {
-            case TEXTURE_TYPE_2D: usage_str = "_2d"; break;
-            case TEXTURE_TYPE_CUBE: usage_str = "_cube"; break;
-            case TEXTURE_TYPE_VOLUME: usage_str = "_volume"; break;
-            default: fail(ctx, "unknown sampler texture type"); return;
-        } // switch
-    } // if
-
-    else if (arg->regtype == REG_TYPE_MISCTYPE)
-    {
-        switch ((const MiscTypeType) arg->regnum)
-        {
-            case MISCTYPE_TYPE_POSITION:
-            case MISCTYPE_TYPE_FACE:
-                usage_str = "";  // just become "dcl vFace" or whatever.
-                break;
-            default: fail(ctx, "unknown misc register type"); return;
-        } // switch
-    } // else if
-
-    else
-    {
-        const uint32 usage = ctx->dwords[0];
-        const uint32 index = ctx->dwords[1];
-        usage_str = usagestrs[usage];
-        if (index != 0)
-            snprintf(index_str, sizeof (index_str), "%u", (uint) index);
-    } // else
-
-    output_line(ctx, "dcl%s%s%s", usage_str, index_str, dst);
-} // emit_D3D_DCL
-
-
-static void emit_D3D_TEXCRD(Context *ctx)
-{
-    // this opcode looks and acts differently depending on the shader model.
-    if (shader_version_atleast(ctx, 1, 4))
-        emit_D3D_opcode_ds(ctx, "texcrd");
-    else
-        emit_D3D_opcode_d(ctx, "texcoord");
-} // emit_D3D_TEXCOORD
-
-static void emit_D3D_TEXLD(Context *ctx)
-{
-    // this opcode looks and acts differently depending on the shader model.
-    if (shader_version_atleast(ctx, 2, 0))
-    {
-        if (ctx->instruction_controls == CONTROL_TEXLD)
-           emit_D3D_opcode_dss(ctx, "texld");
-        else if (ctx->instruction_controls == CONTROL_TEXLDP)
-           emit_D3D_opcode_dss(ctx, "texldp");
-        else if (ctx->instruction_controls == CONTROL_TEXLDB)
-           emit_D3D_opcode_dss(ctx, "texldb");
-    } // if
-
-    else if (shader_version_atleast(ctx, 1, 4))
-    {
-        emit_D3D_opcode_ds(ctx, "texld");
-    } // else if
-
-    else
-    {
-        emit_D3D_opcode_d(ctx, "tex");
-    } // else
-} // emit_D3D_TEXLD
-
-static void emit_D3D_SINCOS(Context *ctx)
-{
-    // this opcode needs extra registers for sm2 and lower.
-    if (!shader_version_atleast(ctx, 3, 0))
-        emit_D3D_opcode_dsss(ctx, "sincos");
-    else
-        emit_D3D_opcode_ds(ctx, "sincos");
-} // emit_D3D_SINCOS
-
-
-#undef EMIT_D3D_OPCODE_FUNC
-#undef EMIT_D3D_OPCODE_D_FUNC
-#undef EMIT_D3D_OPCODE_S_FUNC
-#undef EMIT_D3D_OPCODE_SS_FUNC
-#undef EMIT_D3D_OPCODE_DS_FUNC
-#undef EMIT_D3D_OPCODE_DSS_FUNC
-#undef EMIT_D3D_OPCODE_DSSS_FUNC
-#undef EMIT_D3D_OPCODE_DSSSS_FUNC
-
-#endif  // SUPPORT_PROFILE_D3D
-
-
-#if !SUPPORT_PROFILE_BYTECODE
-#define PROFILE_EMITTER_BYTECODE(op)
-#else
-#undef AT_LEAST_ONE_PROFILE
-#define AT_LEAST_ONE_PROFILE 1
-#define PROFILE_EMITTER_BYTECODE(op) emit_BYTECODE_##op,
-
-static void emit_BYTECODE_start(Context *ctx, const char *profilestr)
-{
-    ctx->ignores_ctab = 1;
-} // emit_BYTECODE_start
-
-static void emit_BYTECODE_finalize(Context *ctx)
-{
-    // just copy the whole token stream and make all other emitters no-ops.
-    if (set_output(ctx, &ctx->mainline))
-    {
-        const size_t len = ((size_t) (ctx->tokens - ctx->orig_tokens)) * sizeof (uint32);
-        buffer_append(ctx->mainline, (const char *) ctx->orig_tokens, len);
-    } // if
-} // emit_BYTECODE_finalize
-
-static void emit_BYTECODE_end(Context *ctx) {}
-static void emit_BYTECODE_phase(Context *ctx) {}
-static void emit_BYTECODE_global(Context *ctx, RegisterType t, int n) {}
-static void emit_BYTECODE_array(Context *ctx, VariableList *var) {}
-static void emit_BYTECODE_sampler(Context *c, int s, TextureType t, int tb) {}
-static void emit_BYTECODE_const_array(Context *ctx, const ConstantsList *c,
-                                         int base, int size) {}
-static void emit_BYTECODE_uniform(Context *ctx, RegisterType t, int n,
-                                  const VariableList *var) {}
-static void emit_BYTECODE_attribute(Context *ctx, RegisterType t, int n,
-                                       MOJOSHADER_usage u, int i, int w,
-                                       int f) {}
-
-static const char *get_BYTECODE_varname(Context *ctx, RegisterType rt, int regnum)
-{
-    char regnum_str[16];
-    const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
-                                              regnum_str, sizeof (regnum_str));
-    char buf[64];
-    snprintf(buf, sizeof (buf), "%s%s", regtype_str, regnum_str);
-    return StrDup(ctx, buf);
-} // get_BYTECODE_varname
-
-static const char *get_BYTECODE_const_array_varname(Context *ctx, int base, int size)
-{
-    char buf[64];
-    snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
-    return StrDup(ctx, buf);
-} // get_BYTECODE_const_array_varname
-
-#define EMIT_BYTECODE_OPCODE_FUNC(op) \
-    static void emit_BYTECODE_##op(Context *ctx) {}
-
-EMIT_BYTECODE_OPCODE_FUNC(RESERVED)
-EMIT_BYTECODE_OPCODE_FUNC(NOP)
-EMIT_BYTECODE_OPCODE_FUNC(MOV)
-EMIT_BYTECODE_OPCODE_FUNC(ADD)
-EMIT_BYTECODE_OPCODE_FUNC(SUB)
-EMIT_BYTECODE_OPCODE_FUNC(MAD)
-EMIT_BYTECODE_OPCODE_FUNC(MUL)
-EMIT_BYTECODE_OPCODE_FUNC(RCP)
-EMIT_BYTECODE_OPCODE_FUNC(RSQ)
-EMIT_BYTECODE_OPCODE_FUNC(DP3)
-EMIT_BYTECODE_OPCODE_FUNC(DP4)
-EMIT_BYTECODE_OPCODE_FUNC(MIN)
-EMIT_BYTECODE_OPCODE_FUNC(MAX)
-EMIT_BYTECODE_OPCODE_FUNC(SLT)
-EMIT_BYTECODE_OPCODE_FUNC(SGE)
-EMIT_BYTECODE_OPCODE_FUNC(EXP)
-EMIT_BYTECODE_OPCODE_FUNC(LOG)
-EMIT_BYTECODE_OPCODE_FUNC(LIT)
-EMIT_BYTECODE_OPCODE_FUNC(DST)
-EMIT_BYTECODE_OPCODE_FUNC(LRP)
-EMIT_BYTECODE_OPCODE_FUNC(FRC)
-EMIT_BYTECODE_OPCODE_FUNC(M4X4)
-EMIT_BYTECODE_OPCODE_FUNC(M4X3)
-EMIT_BYTECODE_OPCODE_FUNC(M3X4)
-EMIT_BYTECODE_OPCODE_FUNC(M3X3)
-EMIT_BYTECODE_OPCODE_FUNC(M3X2)
-EMIT_BYTECODE_OPCODE_FUNC(CALL)
-EMIT_BYTECODE_OPCODE_FUNC(CALLNZ)
-EMIT_BYTECODE_OPCODE_FUNC(LOOP)
-EMIT_BYTECODE_OPCODE_FUNC(RET)
-EMIT_BYTECODE_OPCODE_FUNC(ENDLOOP)
-EMIT_BYTECODE_OPCODE_FUNC(LABEL)
-EMIT_BYTECODE_OPCODE_FUNC(POW)
-EMIT_BYTECODE_OPCODE_FUNC(CRS)
-EMIT_BYTECODE_OPCODE_FUNC(SGN)
-EMIT_BYTECODE_OPCODE_FUNC(ABS)
-EMIT_BYTECODE_OPCODE_FUNC(NRM)
-EMIT_BYTECODE_OPCODE_FUNC(SINCOS)
-EMIT_BYTECODE_OPCODE_FUNC(REP)
-EMIT_BYTECODE_OPCODE_FUNC(ENDREP)
-EMIT_BYTECODE_OPCODE_FUNC(IF)
-EMIT_BYTECODE_OPCODE_FUNC(ELSE)
-EMIT_BYTECODE_OPCODE_FUNC(ENDIF)
-EMIT_BYTECODE_OPCODE_FUNC(BREAK)
-EMIT_BYTECODE_OPCODE_FUNC(MOVA)
-EMIT_BYTECODE_OPCODE_FUNC(TEXKILL)
-EMIT_BYTECODE_OPCODE_FUNC(TEXBEM)
-EMIT_BYTECODE_OPCODE_FUNC(TEXBEML)
-EMIT_BYTECODE_OPCODE_FUNC(TEXREG2AR)
-EMIT_BYTECODE_OPCODE_FUNC(TEXREG2GB)
-EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2PAD)
-EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2TEX)
-EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3PAD)
-EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3TEX)
-EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3SPEC)
-EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3VSPEC)
-EMIT_BYTECODE_OPCODE_FUNC(EXPP)
-EMIT_BYTECODE_OPCODE_FUNC(LOGP)
-EMIT_BYTECODE_OPCODE_FUNC(CND)
-EMIT_BYTECODE_OPCODE_FUNC(TEXREG2RGB)
-EMIT_BYTECODE_OPCODE_FUNC(TEXDP3TEX)
-EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2DEPTH)
-EMIT_BYTECODE_OPCODE_FUNC(TEXDP3)
-EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3)
-EMIT_BYTECODE_OPCODE_FUNC(TEXDEPTH)
-EMIT_BYTECODE_OPCODE_FUNC(CMP)
-EMIT_BYTECODE_OPCODE_FUNC(BEM)
-EMIT_BYTECODE_OPCODE_FUNC(DP2ADD)
-EMIT_BYTECODE_OPCODE_FUNC(DSX)
-EMIT_BYTECODE_OPCODE_FUNC(DSY)
-EMIT_BYTECODE_OPCODE_FUNC(TEXLDD)
-EMIT_BYTECODE_OPCODE_FUNC(TEXLDL)
-EMIT_BYTECODE_OPCODE_FUNC(BREAKP)
-EMIT_BYTECODE_OPCODE_FUNC(BREAKC)
-EMIT_BYTECODE_OPCODE_FUNC(IFC)
-EMIT_BYTECODE_OPCODE_FUNC(SETP)
-EMIT_BYTECODE_OPCODE_FUNC(DEF)
-EMIT_BYTECODE_OPCODE_FUNC(DEFI)
-EMIT_BYTECODE_OPCODE_FUNC(DEFB)
-EMIT_BYTECODE_OPCODE_FUNC(DCL)
-EMIT_BYTECODE_OPCODE_FUNC(TEXCRD)
-EMIT_BYTECODE_OPCODE_FUNC(TEXLD)
-
-#undef EMIT_BYTECODE_OPCODE_FUNC
-
-#endif  // SUPPORT_PROFILE_BYTECODE
-
+PREDECLARE_PROFILE(D3D)
+#endif
 
 #if !SUPPORT_PROFILE_GLSL
 #define PROFILE_EMITTER_GLSL(op)
@@ -1752,4593 +242,17 @@
 #undef AT_LEAST_ONE_PROFILE
 #define AT_LEAST_ONE_PROFILE 1
 #define PROFILE_EMITTER_GLSL(op) emit_GLSL_##op,
-
-#define EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \
-    static void emit_GLSL_##op(Context *ctx) { \
-        fail(ctx, #op " unimplemented in glsl profile"); \
-    }
-
-static inline const char *get_GLSL_register_string(Context *ctx,
-                        const RegisterType regtype, const int regnum,
-                        char *regnum_str, const size_t regnum_size)
-{
-    // turns out these are identical at the moment.
-    return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
-} // get_GLSL_register_string
-
-static const char *get_GLSL_uniform_type(Context *ctx, const RegisterType rtype)
-{
-    switch (rtype)
-    {
-        case REG_TYPE_CONST: return "vec4";
-        case REG_TYPE_CONSTINT: return "ivec4";
-        case REG_TYPE_CONSTBOOL: return "bool";
-        default: fail(ctx, "BUG: used a uniform we don't know how to define.");
-    } // switch
-
-    return NULL;
-} // get_GLSL_uniform_type
-
-static const char *get_GLSL_varname_in_buf(Context *ctx, RegisterType rt,
-                                           int regnum, char *buf,
-                                           const size_t len)
-{
-    char regnum_str[16];
-    const char *regtype_str = get_GLSL_register_string(ctx, rt, regnum,
-                                              regnum_str, sizeof (regnum_str));
-    snprintf(buf,len,"%s_%s%s", ctx->shader_type_str, regtype_str, regnum_str);
-    return buf;
-} // get_GLSL_varname_in_buf
-
-
-static const char *get_GLSL_varname(Context *ctx, RegisterType rt, int regnum)
-{
-    char buf[64];
-    get_GLSL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
-    return StrDup(ctx, buf);
-} // get_GLSL_varname
-
-
-static inline const char *get_GLSL_const_array_varname_in_buf(Context *ctx,
-                                                const int base, const int size,
-                                                char *buf, const size_t buflen)
-{
-    const char *type = ctx->shader_type_str;
-    snprintf(buf, buflen, "%s_const_array_%d_%d", type, base, size);
-    return buf;
-} // get_GLSL_const_array_varname_in_buf
-
-static const char *get_GLSL_const_array_varname(Context *ctx, int base, int size)
-{
-    char buf[64];
-    get_GLSL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
-    return StrDup(ctx, buf);
-} // get_GLSL_const_array_varname
-
-
-static inline const char *get_GLSL_input_array_varname(Context *ctx,
-                                                char *buf, const size_t buflen)
-{
-    snprintf(buf, buflen, "%s", "vertex_input_array");
-    return buf;
-} // get_GLSL_input_array_varname
-
-
-static const char *get_GLSL_uniform_array_varname(Context *ctx,
-                                                  const RegisterType regtype,
-                                                  char *buf, const size_t len)
-{
-    const char *shadertype = ctx->shader_type_str;
-    const char *type = get_GLSL_uniform_type(ctx, regtype);
-    snprintf(buf, len, "%s_uniforms_%s", shadertype, type);
-    return buf;
-} // get_GLSL_uniform_array_varname
-
-static const char *get_GLSL_destarg_varname(Context *ctx, char *buf, size_t len)
-{
-    const DestArgInfo *arg = &ctx->dest_arg;
-    return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
-} // get_GLSL_destarg_varname
-
-static const char *get_GLSL_srcarg_varname(Context *ctx, const size_t idx,
-                                           char *buf, size_t len)
-{
-    if (idx >= STATICARRAYLEN(ctx->source_args))
-    {
-        fail(ctx, "Too many source args");
-        *buf = '\0';
-        return buf;
-    } // if
-
-    const SourceArgInfo *arg = &ctx->source_args[idx];
-    return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
-} // get_GLSL_srcarg_varname
-
-
-static const char *make_GLSL_destarg_assign(Context *, char *, const size_t,
-                                            const char *, ...) ISPRINTF(4,5);
-
-static const char *make_GLSL_destarg_assign(Context *ctx, char *buf,
-                                            const size_t buflen,
-                                            const char *fmt, ...)
-{
-    int need_parens = 0;
-    const DestArgInfo *arg = &ctx->dest_arg;
-
-    if (arg->writemask == 0)
-    {
-        *buf = '\0';
-        return buf;  // no writemask? It's a no-op.
-    } // if
-
-    char clampbuf[32] = { '\0' };
-    const char *clampleft = "";
-    const char *clampright = "";
-    if (arg->result_mod & MOD_SATURATE)
-    {
-        const int vecsize = vecsize_from_writemask(arg->writemask);
-        clampleft = "clamp(";
-        if (vecsize == 1)
-            clampright = ", 0.0, 1.0)";
-        else
-        {
-            snprintf(clampbuf, sizeof (clampbuf),
-                     ", vec%d(0.0), vec%d(1.0))", vecsize, vecsize);
-            clampright = clampbuf;
-        } // else
-    } // if
-
-    // MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
-
-    // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
-    assert((arg->result_mod & MOD_CENTROID) == 0);
-
-    if (ctx->predicated)
-    {
-        fail(ctx, "predicated destinations unsupported");  // !!! FIXME
-        *buf = '\0';
-        return buf;
-    } // if
-
-    char operation[256];
-    va_list ap;
-    va_start(ap, fmt);
-    const int len = vsnprintf(operation, sizeof (operation), fmt, ap);
-    va_end(ap);
-    if (len >= sizeof (operation))
-    {
-        fail(ctx, "operation string too large");  // I'm lazy.  :P
-        *buf = '\0';
-        return buf;
-    } // if
-
-    const char *result_shift_str = "";
-    switch (arg->result_shift)
-    {
-        case 0x1: result_shift_str = " * 2.0"; break;
-        case 0x2: result_shift_str = " * 4.0"; break;
-        case 0x3: result_shift_str = " * 8.0"; break;
-        case 0xD: result_shift_str = " / 8.0"; break;
-        case 0xE: result_shift_str = " / 4.0"; break;
-        case 0xF: result_shift_str = " / 2.0"; break;
-    } // switch
-    need_parens |= (result_shift_str[0] != '\0');
-
-    char regnum_str[16];
-    const char *regtype_str = get_GLSL_register_string(ctx, arg->regtype,
-                                                       arg->regnum, regnum_str,
-                                                       sizeof (regnum_str));
-    char writemask_str[6];
-    size_t i = 0;
-    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
-    if (!scalar && !writemask_xyzw(arg->writemask))
-    {
-        writemask_str[i++] = '.';
-        if (arg->writemask0) writemask_str[i++] = 'x';
-        if (arg->writemask1) writemask_str[i++] = 'y';
-        if (arg->writemask2) writemask_str[i++] = 'z';
-        if (arg->writemask3) writemask_str[i++] = 'w';
-    } // if
-    writemask_str[i] = '\0';
-    assert(i < sizeof (writemask_str));
-
-    const char *leftparen = (need_parens) ? "(" : "";
-    const char *rightparen = (need_parens) ? ")" : "";
-
-    snprintf(buf, buflen, "%s_%s%s%s = %s%s%s%s%s%s;",
-             ctx->shader_type_str, regtype_str, regnum_str, writemask_str,
-             clampleft, leftparen, operation, rightparen, result_shift_str,
-             clampright);
-    // !!! FIXME: make sure the scratch buffer was large enough.
-    return buf;
-} // make_GLSL_destarg_assign
-
-
-static char *make_GLSL_swizzle_string(char *swiz_str, const size_t strsize,
-                                      const int swizzle, const int writemask)
-{
-    size_t i = 0;
-    if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) )
-    {
-        const int writemask0 = (writemask >> 0) & 0x1;
-        const int writemask1 = (writemask >> 1) & 0x1;
-        const int writemask2 = (writemask >> 2) & 0x1;
-        const int writemask3 = (writemask >> 3) & 0x1;
-
-        const int swizzle_x = (swizzle >> 0) & 0x3;
-        const int swizzle_y = (swizzle >> 2) & 0x3;
-        const int swizzle_z = (swizzle >> 4) & 0x3;
-        const int swizzle_w = (swizzle >> 6) & 0x3;
-
-        swiz_str[i++] = '.';
-        if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x];
-        if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y];
-        if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z];
-        if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w];
-    } // if
-    assert(i < strsize);
-    swiz_str[i] = '\0';
-    return swiz_str;
-} // make_GLSL_swizzle_string
-
-
-static const char *make_GLSL_srcarg_string(Context *ctx, const size_t idx,
-                                           const int writemask, char *buf,
-                                           const size_t buflen)
-{
-    *buf = '\0';
-
-    if (idx >= STATICARRAYLEN(ctx->source_args))
-    {
-        fail(ctx, "Too many source args");
-        return buf;
-    } // if
-
-    const SourceArgInfo *arg = &ctx->source_args[idx];
-
-    const char *premod_str = "";
-    const char *postmod_str = "";
-    switch (arg->src_mod)
-    {
-        case SRCMOD_NEGATE:
-            premod_str = "-";
-            break;
-
-        case SRCMOD_BIASNEGATE:
-            premod_str = "-(";
-            postmod_str = " - 0.5)";
-            break;
-
-        case SRCMOD_BIAS:
-            premod_str = "(";
-            postmod_str = " - 0.5)";
-            break;
-
-        case SRCMOD_SIGNNEGATE:
-            premod_str = "-((";
-            postmod_str = " - 0.5) * 2.0)";
-            break;
-
-        case SRCMOD_SIGN:
-            premod_str = "((";
-            postmod_str = " - 0.5) * 2.0)";
-            break;
-
-        case SRCMOD_COMPLEMENT:
-            premod_str = "(1.0 - ";
-            postmod_str = ")";
-            break;
-
-        case SRCMOD_X2NEGATE:
-            premod_str = "-(";
-            postmod_str = " * 2.0)";
-            break;
-
-        case SRCMOD_X2:
-            premod_str = "(";
-            postmod_str = " * 2.0)";
-            break;
-
-        case SRCMOD_DZ:
-            fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME
-            postmod_str = "_dz";
-            break;
-
-        case SRCMOD_DW:
-            fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME
-            postmod_str = "_dw";
-            break;
-
-        case SRCMOD_ABSNEGATE:
-            premod_str = "-abs(";
-            postmod_str = ")";
-            break;
-
-        case SRCMOD_ABS:
-            premod_str = "abs(";
-            postmod_str = ")";
-            break;
-
-        case SRCMOD_NOT:
-            premod_str = "!";
-            break;
-
-        case SRCMOD_NONE:
-        case SRCMOD_TOTAL:
-             break;  // stop compiler whining.
-    } // switch
-
-    const char *regtype_str = NULL;
-
-    if (!arg->relative)
-    {
-        regtype_str = get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum,
-                                              (char *) alloca(64), 64);
-    } // if
-
-    const char *rel_lbracket = "";
-    char rel_offset[32] = { '\0' };
-    const char *rel_rbracket = "";
-    char rel_swizzle[4] = { '\0' };
-    const char *rel_regtype_str = "";
-    if (arg->relative)
-    {
-        if (arg->regtype == REG_TYPE_INPUT)
-            regtype_str=get_GLSL_input_array_varname(ctx,(char*)alloca(64),64);
-        else
-        {
-            assert(arg->regtype == REG_TYPE_CONST);
-            const int arrayidx = arg->relative_array->index;
-            const int offset = arg->regnum - arrayidx;
-            assert(offset >= 0);
-            if (arg->relative_array->constant)
-            {
-                const int arraysize = arg->relative_array->count;
-                regtype_str = get_GLSL_const_array_varname_in_buf(ctx,
-                                arrayidx, arraysize, (char *) alloca(64), 64);
-                if (offset != 0)
-                    snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset);
-            } // if
-            else
-            {
-                regtype_str = get_GLSL_uniform_array_varname(ctx, arg->regtype,
-                                                      (char *) alloca(64), 64);
-                if (offset == 0)
-                {
-                    snprintf(rel_offset, sizeof (rel_offset),
-                             "ARRAYBASE_%d + ", arrayidx);
-                } // if
-                else
-                {
-                    snprintf(rel_offset, sizeof (rel_offset),
-                             "(ARRAYBASE_%d + %d) + ", arrayidx, offset);
-                } // else
-            } // else
-        } // else
-
-        rel_lbracket = "[";
-
-        if (arg->relative_regtype == REG_TYPE_LOOP)
-        {
-            rel_regtype_str = "aL";
-            rel_swizzle[0] = '\0';
-            rel_swizzle[1] = '\0';
-            rel_swizzle[2] = '\0';
-        } // if
-        else
-        {
-            rel_regtype_str = get_GLSL_varname_in_buf(ctx, arg->relative_regtype,
-                                                      arg->relative_regnum,
-                                                      (char *) alloca(64), 64);
-            rel_swizzle[0] = '.';
-            rel_swizzle[1] = swizzle_channels[arg->relative_component];
-            rel_swizzle[2] = '\0';
-        } // else
-        rel_rbracket = "]";
-    } // if
-
-    char swiz_str[6] = { '\0' };
-    if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum))
-    {
-        make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
-                                 arg->swizzle, writemask);
-    } // if
-
-    if (regtype_str == NULL)
-    {
-        fail(ctx, "Unknown source register type.");
-        return buf;
-    } // if
-
-    snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s",
-             premod_str, regtype_str, rel_lbracket, rel_offset,
-             rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str,
-             postmod_str);
-    // !!! FIXME: make sure the scratch buffer was large enough.
-    return buf;
-} // make_GLSL_srcarg_string
-
-// generate some convenience functions.
-#define MAKE_GLSL_SRCARG_STRING_(mask, bitmask) \
-    static inline const char *make_GLSL_srcarg_string_##mask(Context *ctx, \
-                                                const size_t idx, char *buf, \
-                                                const size_t buflen) { \
-        return make_GLSL_srcarg_string(ctx, idx, bitmask, buf, buflen); \
-    }
-MAKE_GLSL_SRCARG_STRING_(x, (1 << 0))
-MAKE_GLSL_SRCARG_STRING_(y, (1 << 1))
-MAKE_GLSL_SRCARG_STRING_(z, (1 << 2))
-MAKE_GLSL_SRCARG_STRING_(w, (1 << 3))
-MAKE_GLSL_SRCARG_STRING_(scalar, (1 << 0))
-MAKE_GLSL_SRCARG_STRING_(full, 0xF)
-MAKE_GLSL_SRCARG_STRING_(masked, ctx->dest_arg.writemask)
-MAKE_GLSL_SRCARG_STRING_(vec3, 0x7)
-MAKE_GLSL_SRCARG_STRING_(vec2, 0x3)
-#undef MAKE_GLSL_SRCARG_STRING_
-
-// special cases for comparison opcodes...
-
-static const char *get_GLSL_comparison_string_scalar(Context *ctx)
-{
-    static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" };
-    if (ctx->instruction_controls >= STATICARRAYLEN(comps))
-    {
-        fail(ctx, "unknown comparison control");
-        return "";
-    } // if
-
-    return comps[ctx->instruction_controls];
-} // get_GLSL_comparison_string_scalar
-
-static const char *get_GLSL_comparison_string_vector(Context *ctx)
-{
-    static const char *comps[] = {
-        "", "greaterThan", "equal", "greaterThanEqual", "lessThan",
-        "notEqual", "lessThanEqual"
-    };
-
-    if (ctx->instruction_controls >= STATICARRAYLEN(comps))
-    {
-        fail(ctx, "unknown comparison control");
-        return "";
-    } // if
-
-    return comps[ctx->instruction_controls];
-} // get_GLSL_comparison_string_vector
-
-
-static void emit_GLSL_start(Context *ctx, const char *profilestr)
-{
-    if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx))
-    {
-        failf(ctx, "Shader type %u unsupported in this profile.",
-              (uint) ctx->shader_type);
-        return;
-    } // if
-
-    else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL) == 0)
-    {
-        // No gl_FragData[] before GLSL 1.10, so we have to force the version.
-        push_output(ctx, &ctx->preflight);
-        output_line(ctx, "#version 110");
-        pop_output(ctx);
-    } // else if
-
-    #if SUPPORT_PROFILE_GLSL120
-    else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL120) == 0)
-    {
-        ctx->profile_supports_glsl120 = 1;
-        push_output(ctx, &ctx->preflight);
-        output_line(ctx, "#version 120");
-        pop_output(ctx);
-    } // else if
-    #endif
-
-    #if SUPPORT_PROFILE_GLSLES
-    else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSLES) == 0)
-    {
-        ctx->profile_supports_glsles = 1;
-        push_output(ctx, &ctx->preflight);
-        output_line(ctx, "#version 100");
-        if (shader_is_vertex(ctx))
-            output_line(ctx, "precision highp float;");
-        else
-            output_line(ctx, "precision mediump float;");
-        output_line(ctx, "precision mediump int;");
-        pop_output(ctx);
-    } // else if
-    #endif
-
-    else
-    {
-        failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
-        return;
-    } // else
-
-    push_output(ctx, &ctx->mainline_intro);
-    output_line(ctx, "void main()");
-    output_line(ctx, "{");
-    pop_output(ctx);
-
-    set_output(ctx, &ctx->mainline);
-    ctx->indent++;
-} // emit_GLSL_start
-
-static void emit_GLSL_RET(Context *ctx);
-static void emit_GLSL_end(Context *ctx)
-{
-    // ps_1_* writes color to r0 instead oC0. We move it to the right place.
-    // We don't have to worry about a RET opcode messing this up, since
-    //  RET isn't available before ps_2_0.
-    if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
-    {
-        const char *shstr = ctx->shader_type_str;
-        set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
-        output_line(ctx, "%s_oC0 = %s_r0;", shstr, shstr);
-    } // if
-    else if (shader_is_vertex(ctx))
-    {
-#ifdef MOJOSHADER_FLIP_RENDERTARGET
-        output_line(ctx, "gl_Position.y = gl_Position.y * vpFlip;");
-#endif
-#ifdef MOJOSHADER_DEPTH_CLIPPING
-        output_line(ctx, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;");
-#endif
-    } // else if
-
-    // force a RET opcode if we're at the end of the stream without one.
-    if (ctx->previous_opcode != OPCODE_RET)
-        emit_GLSL_RET(ctx);
-} // emit_GLSL_end
-
-static void emit_GLSL_phase(Context *ctx)
-{
-    // no-op in GLSL.
-} // emit_GLSL_phase
-
-static void output_GLSL_uniform_array(Context *ctx, const RegisterType regtype,
-                                      const int size)
-{
-    if (size > 0)
-    {
-        char buf[64];
-        get_GLSL_uniform_array_varname(ctx, regtype, buf, sizeof (buf));
-        const char *typ;
-        switch (regtype)
-        {
-            case REG_TYPE_CONST: typ = "vec4"; break;
-            case REG_TYPE_CONSTINT: typ ="ivec4"; break;
-            case REG_TYPE_CONSTBOOL: typ = "bool"; break;
-            default:
-            {
-                fail(ctx, "BUG: used a uniform we don't know how to define.");
-                return;
-            } // default
-        } // switch
-        output_line(ctx, "uniform %s %s[%d];", typ, buf, size);
-    } // if
-} // output_GLSL_uniform_array
-
-static void emit_GLSL_finalize(Context *ctx)
-{
-    // throw some blank lines around to make source more readable.
-    push_output(ctx, &ctx->globals);
-    output_blank_line(ctx);
-    pop_output(ctx);
-
-    // If we had a relative addressing of REG_TYPE_INPUT, we need to build
-    //  an array for it at the start of main(). GLSL doesn't let you specify
-    //  arrays of attributes.
-    //vec4 blah_array[BIGGEST_ARRAY];
-    if (ctx->have_relative_input_registers) // !!! FIXME
-        fail(ctx, "Relative addressing of input registers not supported.");
-
-    push_output(ctx, &ctx->preflight);
-    output_GLSL_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count);
-    output_GLSL_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count);
-    output_GLSL_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count);
-#ifdef MOJOSHADER_FLIP_RENDERTARGET
-    if (shader_is_vertex(ctx))
-        output_line(ctx, "uniform float vpFlip;");
-#endif
-    pop_output(ctx);
-} // emit_GLSL_finalize
-
-static void emit_GLSL_global(Context *ctx, RegisterType regtype, int regnum)
-{
-    char varname[64];
-    get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
-
-    push_output(ctx, &ctx->globals);
-    switch (regtype)
-    {
-        case REG_TYPE_ADDRESS:
-            if (shader_is_vertex(ctx))
-                output_line(ctx, "ivec4 %s;", varname);
-            else if (shader_is_pixel(ctx))  // actually REG_TYPE_TEXTURE.
-            {
-                // We have to map texture registers to temps for ps_1_1, since
-                //  they work like temps, initialize with tex coords, and the
-                //  ps_1_1 TEX opcode expects to overwrite it.
-                if (!shader_version_atleast(ctx, 1, 4))
-                {
-#if SUPPORT_PROFILE_GLSLES
-                    // GLSL ES does not have gl_TexCoord
-                    if (support_glsles(ctx))
-                        output_line(ctx, "vec4 %s = io_%i_%i;",
-                                    varname, MOJOSHADER_USAGE_TEXCOORD, regnum);
-                    else
-#endif
-                    output_line(ctx, "vec4 %s = gl_TexCoord[%d];",
-                                varname, regnum);
-                } // if
-            } // else if
-            break;
-        case REG_TYPE_PREDICATE:
-            output_line(ctx, "bvec4 %s;", varname);
-            break;
-        case REG_TYPE_TEMP:
-            output_line(ctx, "vec4 %s;", varname);
-            break;
-        case REG_TYPE_LOOP:
-            break; // no-op. We declare these in for loops at the moment.
-        case REG_TYPE_LABEL:
-            break; // no-op. If we see it here, it means we optimized it out.
-        default:
-            fail(ctx, "BUG: we used a register we don't know how to define.");
-            break;
-    } // switch
-    pop_output(ctx);
-} // emit_GLSL_global
-
-static void emit_GLSL_array(Context *ctx, VariableList *var)
-{
-    // All uniforms (except constant arrays, which only get pushed once at
-    //  compile time) are now packed into a single array, so we can batch
-    //  the uniform transfers. So this doesn't actually define an array
-    //  here; the one, big array is emitted during finalization instead.
-    // However, we need to #define the offset into the one, big array here,
-    //  and let dereferences use that #define.
-    const int base = var->index;
-    const int glslbase = ctx->uniform_float4_count;
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "#define ARRAYBASE_%d %d", base, glslbase);
-    pop_output(ctx);
-    var->emit_position = glslbase;
-} // emit_GLSL_array
-
-static void emit_GLSL_const_array(Context *ctx, const ConstantsList *clist,
-                                  int base, int size)
-{
-    char varname[64];
-    get_GLSL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname));
-
-#if 0
-    // !!! FIXME: fails on Nvidia's and Apple's GL, even with #version 120.
-    // !!! FIXME:  (the 1.20 spec says it should work, though, I think...)
-    if (support_glsl120(ctx))
-    {
-        // GLSL 1.20 can do constant arrays.
-        const char *cstr = NULL;
-        push_output(ctx, &ctx->globals);
-        output_line(ctx, "const vec4 %s[%d] = vec4[%d](", varname, size, size);
-        ctx->indent++;
-
-        int i;
-        for (i = 0; i < size; i++)
-        {
-            while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
-                clist = clist->next;
-            assert(clist->constant.index == (base + i));
-
-            char val0[32];
-            char val1[32];
-            char val2[32];
-            char val3[32];
-            floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
-            floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
-            floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
-            floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
-
-            output_line(ctx, "vec4(%s, %s, %s, %s)%s", val0, val1, val2, val3,
-                        (i < (size-1)) ? "," : "");
-
-            clist = clist->next;
-        } // for
-
-        ctx->indent--;
-        output_line(ctx, ");");
-        pop_output(ctx);
-    } // if
-
-    else
-#endif
-    {
-        // stock GLSL 1.0 can't do constant arrays, so make a uniform array
-        //  and have the OpenGL glue assign it at link time. Lame!
-        push_output(ctx, &ctx->globals);
-        output_line(ctx, "uniform vec4 %s[%d];", varname, size);
-        pop_output(ctx);
-    } // else
-} // emit_GLSL_const_array
-
-static void emit_GLSL_uniform(Context *ctx, RegisterType regtype, int regnum,
-                              const VariableList *var)
-{
-    // Now that we're pushing all the uniforms as one big array, pack these
-    //  down, so if we only use register c439, it'll actually map to
-    //  glsl_uniforms_vec4[0]. As we push one big array, this will prevent
-    //  uploading unused data.
-
-    char varname[64];
-    char name[64];
-    int index = 0;
-
-    get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
-
-    push_output(ctx, &ctx->globals);
-
-    if (var == NULL)
-    {
-        get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
-
-        if (regtype == REG_TYPE_CONST)
-            index = ctx->uniform_float4_count;
-        else if (regtype == REG_TYPE_CONSTINT)
-            index = ctx->uniform_int4_count;
-        else if (regtype == REG_TYPE_CONSTBOOL)
-            index = ctx->uniform_bool_count;
-        else  // get_GLSL_uniform_array_varname() would have called fail().
-            assert(isfail(ctx));
-
-        output_line(ctx, "#define %s %s[%d]", varname, name, index);
-    } // if
-
-    else
-    {
-        const int arraybase = var->index;
-        if (var->constant)
-        {
-            get_GLSL_const_array_varname_in_buf(ctx, arraybase, var->count,
-                                                name, sizeof (name));
-            index = (regnum - arraybase);
-        } // if
-        else
-        {
-            assert(var->emit_position != -1);
-            get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
-            index = (regnum - arraybase) + var->emit_position;
-        } // else
-
-        output_line(ctx, "#define %s %s[%d]", varname, name, index);
-    } // else
-
-    pop_output(ctx);
-} // emit_GLSL_uniform
-
-static void emit_GLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb)
-{
-    const char *type = "";
-    switch (ttype)
-    {
-        case TEXTURE_TYPE_2D: type = "sampler2D"; break;
-        case TEXTURE_TYPE_CUBE: type = "samplerCube"; break;
-        case TEXTURE_TYPE_VOLUME: type = "sampler3D"; break;
-        default: fail(ctx, "BUG: used a sampler we don't know how to define.");
-    } // switch
-
-    char var[64];
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var));
-
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "uniform %s %s;", type, var);
-    if (tb)  // This sampler used a ps_1_1 TEXBEM opcode?
-    {
-        char name[64];
-        const int index = ctx->uniform_float4_count;
-        ctx->uniform_float4_count += 2;
-        get_GLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name));
-        output_line(ctx, "#define %s_texbem %s[%d]", var, name, index);
-        output_line(ctx, "#define %s_texbeml %s[%d]", var, name, index+1);
-    } // if
-    pop_output(ctx);
-} // emit_GLSL_sampler
-
-static void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum,
-                                MOJOSHADER_usage usage, int index, int wmask,
-                                int flags)
-{
-    // !!! FIXME: this function doesn't deal with write masks at all yet!
-    const char *usage_str = NULL;
-    const char *arrayleft = "";
-    const char *arrayright = "";
-    char index_str[16] = { '\0' };
-    char var[64];
-
-    get_GLSL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var));
-
-    //assert((flags & MOD_PP) == 0);  // !!! FIXME: is PP allowed?
-
-    if (index != 0)  // !!! FIXME: a lot of these MUST be zero.
-        snprintf(index_str, sizeof (index_str), "%u", (uint) index);
-
-    if (shader_is_vertex(ctx))
-    {
-        // pre-vs3 output registers.
-        // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
-        //  output registers.
-        if (!shader_version_atleast(ctx, 3, 0))
-        {
-            if (regtype == REG_TYPE_RASTOUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                index = regnum;
-                switch ((const RastOutType) regnum)
-                {
-                    case RASTOUT_TYPE_POSITION:
-                        usage = MOJOSHADER_USAGE_POSITION;
-                        break;
-                    case RASTOUT_TYPE_FOG:
-                        usage = MOJOSHADER_USAGE_FOG;
-                        break;
-                    case RASTOUT_TYPE_POINT_SIZE:
-                        usage = MOJOSHADER_USAGE_POINTSIZE;
-                        break;
-                } // switch
-            } // if
-
-            else if (regtype == REG_TYPE_ATTROUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                usage = MOJOSHADER_USAGE_COLOR;
-                index = regnum;
-            } // else if
-
-            else if (regtype == REG_TYPE_TEXCRDOUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                usage = MOJOSHADER_USAGE_TEXCOORD;
-                index = regnum;
-            } // else if
-        } // if
-
-        // to avoid limitations of various GL entry points for input
-        // attributes (glSecondaryColorPointer() can only take 3 component
-        // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
-        // issues), we set up all inputs as generic vertex attributes, so we
-        // can pass data in just about any form, and ignore the built-in GLSL
-        // attributes like gl_SecondaryColor. Output needs to use the the
-        // built-ins, though, but we don't have to worry about the GL entry
-        // point limitations there.
-
-        if (regtype == REG_TYPE_INPUT)
-        {
-            push_output(ctx, &ctx->globals);
-            output_line(ctx, "attribute vec4 %s;", var);
-            pop_output(ctx);
-        } // if
-
-        else if (regtype == REG_TYPE_OUTPUT)
-        {
-            switch (usage)
-            {
-                case MOJOSHADER_USAGE_POSITION:
-                    if (index == 0)
-                    {
-                        usage_str = "gl_Position";
-                    } // if
-                    break;
-                case MOJOSHADER_USAGE_POINTSIZE:
-                    usage_str = "gl_PointSize";
-                    break;
-                case MOJOSHADER_USAGE_COLOR:
-#if SUPPORT_PROFILE_GLSLES
-                    if (support_glsles(ctx))
-                        break; // GLSL ES does not have gl_FrontColor
-#endif
-                    index_str[0] = '\0';  // no explicit number.
-                    if (index == 0)
-                    {
-                        usage_str = "gl_FrontColor";
-                    } // if
-                    else if (index == 1)
-                    {
-                        usage_str = "gl_FrontSecondaryColor";
-                    } // else if
-                    break;
-                case MOJOSHADER_USAGE_FOG:
-                    usage_str = "gl_FogFragCoord";
-                    break;
-                case MOJOSHADER_USAGE_TEXCOORD:
-#if SUPPORT_PROFILE_GLSLES
-                    if (support_glsles(ctx))
-                        break; // GLSL ES does not have gl_TexCoord
-#endif
-                    snprintf(index_str, sizeof (index_str), "%u", (uint) index);
-                    usage_str = "gl_TexCoord";
-                    arrayleft = "[";
-                    arrayright = "]";
-                    break;
-                default:
-                    // !!! FIXME: we need to deal with some more built-in varyings here.
-                    break;
-            } // switch
-
-            // !!! FIXME: the #define is a little hacky, but it means we don't
-            // !!! FIXME:  have to track these separately if this works.
-            push_output(ctx, &ctx->globals);
-            // no mapping to built-in var? Just make it a regular global, pray.
-            if (usage_str == NULL)
-            {
-#if SUPPORT_PROFILE_GLSLES
-                if (support_glsles(ctx))
-                    output_line(ctx, "varying highp vec4 io_%i_%i;", usage, index);
-                else
-#endif
-                output_line(ctx, "varying vec4 io_%i_%i;", usage, index);
-                output_line(ctx, "#define %s io_%i_%i", var, usage, index);
-            } // if
-            else
-            {
-                output_line(ctx, "#define %s %s%s%s%s", var, usage_str,
-                            arrayleft, index_str, arrayright);
-            } // else
-            pop_output(ctx);
-        } // else if
-
-        else
-        {
-            fail(ctx, "unknown vertex shader attribute register");
-        } // else
-    } // if
-
-    else if (shader_is_pixel(ctx))
-    {
-        // samplers DCLs get handled in emit_GLSL_sampler().
-
-        if (flags & MOD_CENTROID)  // !!! FIXME
-        {
-            failf(ctx, "centroid unsupported in %s profile", ctx->profile->name);
-            return;
-        } // if
-
-        if (regtype == REG_TYPE_COLOROUT)
-        {
-            if (!ctx->have_multi_color_outputs)
-                usage_str = "gl_FragColor";  // maybe faster?
-            else
-            {
-                snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
-                usage_str = "gl_FragData";
-                arrayleft = "[";
-                arrayright = "]";
-            } // else
-        } // if
-
-        else if (regtype == REG_TYPE_DEPTHOUT)
-            usage_str = "gl_FragDepth";
-
-        // !!! FIXME: can you actualy have a texture register with COLOR usage?
-        else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
-        {
-#if SUPPORT_PROFILE_GLSLES
-            if (!support_glsles(ctx))
-            {
-#endif
-            if (usage == MOJOSHADER_USAGE_TEXCOORD)
-            {
-                // ps_1_1 does a different hack for this attribute.
-                //  Refer to emit_GLSL_global()'s REG_TYPE_ADDRESS code.
-                if (shader_version_atleast(ctx, 1, 4))
-                {
-                    snprintf(index_str, sizeof (index_str), "%u", (uint) index);
-                    usage_str = "gl_TexCoord";
-                    arrayleft = "[";
-                    arrayright = "]";
-                } // if
-            } // if
-
-            else if (usage == MOJOSHADER_USAGE_COLOR)
-            {
-                index_str[0] = '\0';  // no explicit number.
-                if (index == 0)
-                {
-                    usage_str = "gl_Color";
-                } // if
-                else if (index == 1)
-                {
-                    usage_str = "gl_SecondaryColor";
-                } // else if
-                // FIXME: Does this even matter when we have varyings? -flibit
-                // else
-                //    fail(ctx, "unsupported color index");
-            } // else if
-#if SUPPORT_PROFILE_GLSLES
-            } // if
-#endif
-        } // else if
-
-        else if (regtype == REG_TYPE_MISCTYPE)
-        {
-            const MiscTypeType mt = (MiscTypeType) regnum;
-            if (mt == MISCTYPE_TYPE_FACE)
-            {
-                push_output(ctx, &ctx->globals);
-                output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;", var);
-                pop_output(ctx);
-            } // if
-            else if (mt == MISCTYPE_TYPE_POSITION)
-            {
-                index_str[0] = '\0';  // no explicit number.
-                usage_str = "gl_FragCoord";  // !!! FIXME: is this the same coord space as D3D?
-            } // else if
-            else
-            {
-                fail(ctx, "BUG: unhandled misc register");
-            } // else
-        } // else if
-
-        else
-        {
-            fail(ctx, "unknown pixel shader attribute register");
-        } // else
-
-        push_output(ctx, &ctx->globals);
-        // no mapping to built-in var? Just make it a regular global, pray.
-        if (usage_str == NULL)
-        {
-#if SUPPORT_PROFILE_GLSLES
-            if (support_glsles(ctx))
-                output_line(ctx, "varying highp vec4 io_%i_%i;", usage, index);
-            else
+PREDECLARE_PROFILE(GLSL)
 #endif
-            output_line(ctx, "varying vec4 io_%i_%i;", usage, index);
-            output_line(ctx, "#define %s io_%i_%i", var, usage, index);
-        } // if
-        else
-        {
-            output_line(ctx, "#define %s %s%s%s%s", var, usage_str,
-                        arrayleft, index_str, arrayright);
-        } // else
-        pop_output(ctx);
-    } // else if
-
-    else
-    {
-        fail(ctx, "Unknown shader type");  // state machine should catch this.
-    } // else
-} // emit_GLSL_attribute
-
-static void emit_GLSL_NOP(Context *ctx)
-{
-    // no-op is a no-op.  :)
-} // emit_GLSL_NOP
-
-static void emit_GLSL_MOV(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_MOV
-
-static void emit_GLSL_ADD(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_ADD
-
-static void emit_GLSL_SUB(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_SUB
-
-static void emit_GLSL_MAD(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_MAD
-
-static void emit_GLSL_MUL(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_MUL
-
-static void emit_GLSL_RCP(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_RCP
-
-static void emit_GLSL_RSQ(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "inversesqrt(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_RSQ
-
-static void emit_GLSL_dotprod(Context *ctx, const char *src0, const char *src1,
-                              const char *extra)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char castleft[16] = { '\0' };
-    const char *castright = "";
-    if (vecsize != 1)
-    {
-        snprintf(castleft, sizeof (castleft), "vec%d(", vecsize);
-        castright = ")";
-    } // if
-
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s",
-                             castleft, src0, src1, extra, castright);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_dotprod
-
-static void emit_GLSL_DP3(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
-    emit_GLSL_dotprod(ctx, src0, src1, "");
-} // emit_GLSL_DP3
-
-static void emit_GLSL_DP4(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_full(ctx, 1, src1, sizeof (src1));
-    emit_GLSL_dotprod(ctx, src0, src1, "");
-} // emit_GLSL_DP4
-
-static void emit_GLSL_MIN(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_MIN
-
-static void emit_GLSL_MAX(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_MAX
-
-static void emit_GLSL_SLT(Context *ctx)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-
-    // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants.
-    if (vecsize == 1)
-        make_GLSL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1);
-    else
-    {
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                 "vec%d(lessThan(%s, %s))",
-                                 vecsize, src0, src1);
-    } // else
-    output_line(ctx, "%s", code);
-} // emit_GLSL_SLT
-
-static void emit_GLSL_SGE(Context *ctx)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-
-    // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants.
-    if (vecsize == 1)
-    {
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                 "float(%s >= %s)", src0, src1);
-    } // if
-    else
-    {
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                 "vec%d(greaterThanEqual(%s, %s))",
-                                 vecsize, src0, src1);
-    } // else
-    output_line(ctx, "%s", code);
-} // emit_GLSL_SGE
-
-static void emit_GLSL_EXP(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_EXP
-
-static void emit_GLSL_LOG(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_LOG
-
-static void emit_GLSL_LIT_helper(Context *ctx)
-{
-    const char *maxp = "127.9961"; // value from the dx9 reference.
-
-    if (ctx->glsl_generated_lit_helper)
-        return;
-
-    ctx->glsl_generated_lit_helper = 1;
-
-    push_output(ctx, &ctx->helpers);
-    output_line(ctx, "vec4 LIT(const vec4 src)");
-    output_line(ctx, "{"); ctx->indent++;
-    output_line(ctx,   "float power = clamp(src.w, -%s, %s);",maxp,maxp);
-    output_line(ctx,   "vec4 retval = vec4(1.0, 0.0, 0.0, 1.0);");
-    output_line(ctx,   "if (src.x > 0.0) {"); ctx->indent++;
-    output_line(ctx,     "retval.y = src.x;");
-    output_line(ctx,     "if (src.y > 0.0) {"); ctx->indent++;
-    output_line(ctx,       "retval.z = pow(src.y, power);"); ctx->indent--;
-    output_line(ctx,     "}"); ctx->indent--;
-    output_line(ctx,   "}");
-    output_line(ctx,   "return retval;"); ctx->indent--;
-    output_line(ctx, "}");
-    output_blank_line(ctx);
-    pop_output(ctx);
-} // emit_GLSL_LIT_helper
-
-static void emit_GLSL_LIT(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    emit_GLSL_LIT_helper(ctx);
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_LIT
-
-static void emit_GLSL_DST(Context *ctx)
-{
-    // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
-    char src0_y[64]; make_GLSL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y));
-    char src1_y[64]; make_GLSL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y));
-    char src0_z[64]; make_GLSL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z));
-    char src1_w[64]; make_GLSL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w));
-
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                             "vec4(1.0, %s * %s, %s, %s)",
-                             src0_y, src1_y, src0_z, src1_w);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_DST
-
-static void emit_GLSL_LRP(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)",
-                             src2, src1, src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_LRP
-
-static void emit_GLSL_FRC(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_FRC
-
-static void emit_GLSL_M4X4(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
-    char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
-    char row3[64]; make_GLSL_srcarg_string_full(ctx, 4, row3, sizeof (row3));
-    char code[256];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                    "vec4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))",
-                    src0, row0, src0, row1, src0, row2, src0, row3);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_M4X4
-
-static void emit_GLSL_M4X3(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
-    char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
-    char code[256];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
-                                src0, row0, src0, row1, src0, row2);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_M4X3
-
-static void emit_GLSL_M3X4(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
-    char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
-    char row3[64]; make_GLSL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3));
-
-    char code[256];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                "vec4(dot(%s, %s), dot(%s, %s), "
-                                     "dot(%s, %s), dot(%s, %s))",
-                                src0, row0, src0, row1,
-                                src0, row2, src0, row3);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_M3X4
-
-static void emit_GLSL_M3X3(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
-    char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
-    char code[256];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
-                                src0, row0, src0, row1, src0, row2);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_M3X3
-
-static void emit_GLSL_M3X2(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
-
-    char code[256];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                "vec2(dot(%s, %s), dot(%s, %s))",
-                                src0, row0, src0, row1);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_M3X2
-
-static void emit_GLSL_CALL(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    if (ctx->loops > 0)
-        output_line(ctx, "%s(aL);", src0);
-    else
-        output_line(ctx, "%s();", src0);
-} // emit_GLSL_CALL
-
-static void emit_GLSL_CALLNZ(Context *ctx)
-{
-    // !!! FIXME: if src1 is a constbool that's true, we can remove the
-    // !!! FIXME:  if. If it's false, we can make this a no-op.
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-
-    if (ctx->loops > 0)
-        output_line(ctx, "if (%s) { %s(aL); }", src1, src0);
-    else
-        output_line(ctx, "if (%s) { %s(); }", src1, src0);
-} // emit_GLSL_CALLNZ
-
-static void emit_GLSL_LOOP(Context *ctx)
-{
-    // !!! FIXME: swizzle?
-    char var[64]; get_GLSL_srcarg_varname(ctx, 1, var, sizeof (var));
-    assert(ctx->source_args[0].regnum == 0);  // in case they add aL1 someday.
-    output_line(ctx, "{");
-    ctx->indent++;
-    output_line(ctx, "const int aLend = %s.x + %s.y;", var, var);
-    output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var);
-    ctx->indent++;
-} // emit_GLSL_LOOP
-
-static void emit_GLSL_RET(Context *ctx)
-{
-    // thankfully, the MSDN specs say a RET _has_ to end a function...no
-    //  early returns. So if you hit one, you know you can safely close
-    //  a high-level function.
-    ctx->indent--;
-    output_line(ctx, "}");
-    output_blank_line(ctx);
-    set_output(ctx, &ctx->subroutines);  // !!! FIXME: is this for LABEL? Maybe set it there so we don't allocate unnecessarily.
-} // emit_GLSL_RET
-
-static void emit_GLSL_ENDLOOP(Context *ctx)
-{
-    ctx->indent--;
-    output_line(ctx, "}");
-    ctx->indent--;
-    output_line(ctx, "}");
-} // emit_GLSL_ENDLOOP
-
-static void emit_GLSL_LABEL(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    const int label = ctx->source_args[0].regnum;
-    RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
-    assert(ctx->output == ctx->subroutines);  // not mainline, etc.
-    assert(ctx->indent == 0);  // we shouldn't be in the middle of a function.
-
-    // MSDN specs say CALL* has to come before the LABEL, so we know if we
-    //  can ditch the entire function here as unused.
-    if (reg == NULL)
-        set_output(ctx, &ctx->ignore);  // Func not used. Parse, but don't output.
-
-    // !!! FIXME: it would be nice if we could determine if a function is
-    // !!! FIXME:  only called once and, if so, forcibly inline it.
-
-    const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
-    output_line(ctx, "void %s(%s)", src0, uses_loopreg);
-    output_line(ctx, "{");
-    ctx->indent++;
-} // emit_GLSL_LABEL
-
-static void emit_GLSL_DCL(Context *ctx)
-{
-    // no-op. We do this in our emit_attribute() and emit_uniform().
-} // emit_GLSL_DCL
-
-static void emit_GLSL_POW(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                             "pow(abs(%s), %s)", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_POW
-
-static void emit_GLSL_CRS(Context *ctx)
-{
-    // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
-    char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                             "cross(%s, %s)", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_CRS
-
-static void emit_GLSL_SGN(Context *ctx)
-{
-    // (we don't need the temporary registers specified for the D3D opcode.)
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_SGN
-
-static void emit_GLSL_ABS(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_ABS
-
-static void emit_GLSL_NRM(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_NRM
-
-static void emit_GLSL_SINCOS(Context *ctx)
-{
-    // we don't care about the temp registers that <= sm2 demands; ignore them.
-    //  sm2 also talks about what components are left untouched vs. undefined,
-    //  but we just leave those all untouched with GLSL write masks (which
-    //  would fulfill the "undefined" requirement, too).
-    const int mask = ctx->dest_arg.writemask;
-    char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    char code[128] = { '\0' };
-
-    if (writemask_x(mask))
-        make_GLSL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0);
-    else if (writemask_y(mask))
-        make_GLSL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0);
-    else if (writemask_xy(mask))
-    {
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                 "vec2(cos(%s), sin(%s))", src0, src0);
-    } // else if
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_SINCOS
-
-static void emit_GLSL_REP(Context *ctx)
-{
-    // !!! FIXME:
-    // msdn docs say legal loop values are 0 to 255. We can check DEFI values
-    //  at parse time, but if they are pulling a value from a uniform, do
-    //  we clamp here?
-    // !!! FIXME: swizzle is legal here, right?
-    char src0[64]; make_GLSL_srcarg_string_x(ctx, 0, src0, sizeof (src0));
-    const uint rep = (uint) ctx->reps;
-    output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {",
-                rep, rep, src0, rep);
-    ctx->indent++;
-} // emit_GLSL_REP
-
-static void emit_GLSL_ENDREP(Context *ctx)
-{
-    ctx->indent--;
-    output_line(ctx, "}");
-} // emit_GLSL_ENDREP
-
-static void emit_GLSL_IF(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    output_line(ctx, "if (%s) {", src0);
-    ctx->indent++;
-} // emit_GLSL_IF
-
-static void emit_GLSL_IFC(Context *ctx)
-{
-    const char *comp = get_GLSL_comparison_string_scalar(ctx);
-    char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
-    output_line(ctx, "if (%s %s %s) {", src0, comp, src1);
-    ctx->indent++;
-} // emit_GLSL_IFC
-
-static void emit_GLSL_ELSE(Context *ctx)
-{
-    ctx->indent--;
-    output_line(ctx, "} else {");
-    ctx->indent++;
-} // emit_GLSL_ELSE
-
-static void emit_GLSL_ENDIF(Context *ctx)
-{
-    ctx->indent--;
-    output_line(ctx, "}");
-} // emit_GLSL_ENDIF
-
-static void emit_GLSL_BREAK(Context *ctx)
-{
-    output_line(ctx, "break;");
-} // emit_GLSL_BREAK
-
-static void emit_GLSL_BREAKC(Context *ctx)
-{
-    const char *comp = get_GLSL_comparison_string_scalar(ctx);
-    char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
-    output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1);
-} // emit_GLSL_BREAKC
-
-static void emit_GLSL_MOVA(Context *ctx)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-
-    if (vecsize == 1)
-    {
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                 "int(floor(abs(%s) + 0.5) * sign(%s))",
-                                 src0, src0);
-    } // if
-
-    else
-    {
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                            "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s))",
-                            vecsize, src0, vecsize, src0);
-    } // else
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_MOVA
-
-static void emit_GLSL_DEFB(Context *ctx)
-{
-    char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "const bool %s = %s;",
-                varname, ctx->dwords[0] ? "true" : "false");
-    pop_output(ctx);
-} // emit_GLSL_DEFB
-
-static void emit_GLSL_DEFI(Context *ctx)
-{
-    char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
-    const int32 *x = (const int32 *) ctx->dwords;
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "const ivec4 %s = ivec4(%d, %d, %d, %d);",
-                varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
-    pop_output(ctx);
-} // emit_GLSL_DEFI
-
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
-
-static void emit_GLSL_TEXKILL(Context *ctx)
-{
-    char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-    output_line(ctx, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;", dst);
-} // emit_GLSL_TEXKILL
-
-static void glsl_texld(Context *ctx, const int texldd)
-{
-    if (!shader_version_atleast(ctx, 1, 4))
-    {
-        DestArgInfo *info = &ctx->dest_arg;
-        char dst[64];
-        char sampler[64];
-        char code[128] = {0};
-
-        assert(!texldd);
-
-        RegisterList *sreg;
-        sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum);
-        const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-
-        // !!! FIXME: this code counts on the register not having swizzles, etc.
-        get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-        get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                                sampler, sizeof (sampler));
-
-        if (ttype == TEXTURE_TYPE_2D)
-        {
-            make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                     "texture2D(%s, %s.xy)",
-                                     sampler, dst);
-        }
-        else if (ttype == TEXTURE_TYPE_CUBE)
-        {
-            make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                     "textureCube(%s, %s.xyz)",
-                                     sampler, dst);
-        }
-        else if (ttype == TEXTURE_TYPE_VOLUME)
-        {
-            make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                     "texture3D(%s, %s.xyz)",
-                                     sampler, dst);
-        }
-        else
-        {
-            fail(ctx, "unexpected texture type");
-        } // else
-        output_line(ctx, "%s", code);
-    } // if
-
-    else if (!shader_version_atleast(ctx, 2, 0))
-    {
-        // ps_1_4 is different, too!
-        fail(ctx, "TEXLD == Shader Model 1.4 unimplemented.");  // !!! FIXME
-        return;
-    } // else if
-
-    else
-    {
-        const SourceArgInfo *samp_arg = &ctx->source_args[1];
-        RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
-                                          samp_arg->regnum);
-        const char *funcname = NULL;
-        char src0[64] = { '\0' };
-        char src1[64]; get_GLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD?
-        char src2[64] = { '\0' };
-        char src3[64] = { '\0' };
-
-        if (sreg == NULL)
-        {
-            fail(ctx, "TEXLD using undeclared sampler");
-            return;
-        } // if
-
-        if (texldd)
-        {
-            if (sreg->index == TEXTURE_TYPE_2D)
-            {
-                make_GLSL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2));
-                make_GLSL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3));
-            } // if
-            else
-            {
-                assert((sreg->index == TEXTURE_TYPE_CUBE) || (sreg->index == TEXTURE_TYPE_VOLUME));
-                make_GLSL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2));
-                make_GLSL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3));
-            } // else
-        } // if
-
-        // !!! FIXME: can TEXLDD set instruction_controls?
-        // !!! FIXME: does the d3d bias value map directly to GLSL?
-        const char *biassep = "";
-        char bias[64] = { '\0' };
-        if (ctx->instruction_controls == CONTROL_TEXLDB)
-        {
-            biassep = ", ";
-            make_GLSL_srcarg_string_w(ctx, 0, bias, sizeof (bias));
-        } // if
-
-        switch ((const TextureType) sreg->index)
-        {
-            case TEXTURE_TYPE_2D:
-                if (ctx->instruction_controls == CONTROL_TEXLDP)
-                {
-                    funcname = "texture2DProj";
-                    make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-                } // if
-                else  // texld/texldb
-                {
-                    funcname = "texture2D";
-                    make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
-                } // else
-                break;
-            case TEXTURE_TYPE_CUBE:
-                if (ctx->instruction_controls == CONTROL_TEXLDP)
-                    fail(ctx, "TEXLDP on a cubemap");  // !!! FIXME: is this legal?
-                funcname = "textureCube";
-                make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-                break;
-            case TEXTURE_TYPE_VOLUME:
-                if (ctx->instruction_controls == CONTROL_TEXLDP)
-                {
-                    funcname = "texture3DProj";
-                    make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-                } // if
-                else  // texld/texldb
-                {
-                    funcname = "texture3D";
-                    make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-                } // else
-                break;
-            default:
-                fail(ctx, "unknown texture type");
-                return;
-        } // switch
-
-        assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum));
-        char swiz_str[6] = { '\0' };
-        make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
-                                 samp_arg->swizzle, ctx->dest_arg.writemask);
-
-        char code[128];
-        if (texldd)
-        {
-            make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                     "%sGrad(%s, %s, %s, %s)%s", funcname,
-                                     src1, src0, src2, src3, swiz_str);
-        } // if
-        else
-        {
-            make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                     "%s(%s, %s%s%s)%s", funcname,
-                                     src1, src0, biassep, bias, swiz_str);
-        } // else
-
-        output_line(ctx, "%s", code);
-    } // else
-} // glsl_texld
-
-static void emit_GLSL_TEXLD(Context *ctx)
-{
-    glsl_texld(ctx, 0);
-} // emit_GLSL_TEXLD
-    
-
-static void emit_GLSL_TEXBEM(Context *ctx)
-{
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-    char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
-    char sampler[64];
-    char code[512];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-        "texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
-        " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))",
-        sampler,
-        dst, sampler, src, sampler, src,
-        dst, sampler, src, sampler, src);
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_TEXBEM
-
-
-static void emit_GLSL_TEXBEML(Context *ctx)
-{
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-    char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
-    char sampler[64];
-    char code[512];
-
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-        "(texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
-        " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *"
-        " ((%s.z * %s_texbeml.x) + %s_texbem.y)",
-        sampler,
-        dst, sampler, src, sampler, src,
-        dst, sampler, src, sampler, src,
-        src, sampler, sampler);
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_TEXBEML
-
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME
-
-
-static void emit_GLSL_TEXM3X2PAD(Context *ctx)
-{
-    // no-op ... work happens in emit_GLSL_TEXM3X2TEX().
-} // emit_GLSL_TEXM3X2PAD
-
-static void emit_GLSL_TEXM3X2TEX(Context *ctx)
-{
-    if (ctx->texm3x2pad_src0 == -1)
-        return;
-
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char sampler[64];
-    char code[512];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
-                            src0, sizeof (src0));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
-                            src1, sizeof (src1));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src2, sizeof (src2));
-    get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-        "texture2D(%s, vec2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))",
-        sampler, src0, src1, src2, dst);
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_TEXM3X2TEX
-
-static void emit_GLSL_TEXM3X3PAD(Context *ctx)
-{
-    // no-op ... work happens in emit_GLSL_TEXM3X3*().
-} // emit_GLSL_TEXM3X3PAD
-
-static void emit_GLSL_TEXM3X3TEX(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char sampler[64];
-    char code[512];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
-                                      info->regnum);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
-
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-        "texture%s(%s,"
-            " vec3(dot(%s.xyz, %s.xyz),"
-            " dot(%s.xyz, %s.xyz),"
-            " dot(%s.xyz, %s.xyz)))",
-        ttypestr, sampler, src0, src1, src2, src3, dst, src4);
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_TEXM3X3TEX
-
-static void emit_GLSL_TEXM3X3SPEC_helper(Context *ctx)
-{
-    if (ctx->glsl_generated_texm3x3spec_helper)
-        return;
-
-    ctx->glsl_generated_texm3x3spec_helper = 1;
-
-    push_output(ctx, &ctx->helpers);
-    output_line(ctx, "vec3 TEXM3X3SPEC_reflection(const vec3 normal, const vec3 eyeray)");
-    output_line(ctx, "{"); ctx->indent++;
-    output_line(ctx,   "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--;
-    output_line(ctx, "}");
-    output_blank_line(ctx);
-    pop_output(ctx);
-} // emit_GLSL_TEXM3X3SPEC_helper
-
-static void emit_GLSL_TEXM3X3SPEC(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char src5[64];
-    char sampler[64];
-    char code[512];
-
-    emit_GLSL_TEXM3X3SPEC_helper(ctx);
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
-                            src5, sizeof (src5));
-    get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
-                                      info->regnum);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
-
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-        "texture%s(%s, "
-            "TEXM3X3SPEC_reflection("
-                "vec3("
-                    "dot(%s.xyz, %s.xyz), "
-                    "dot(%s.xyz, %s.xyz), "
-                    "dot(%s.xyz, %s.xyz)"
-                "),"
-                "%s.xyz,"
-            ")"
-        ")",
-        ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5);
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_TEXM3X3SPEC
-
-static void emit_GLSL_TEXM3X3VSPEC(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char sampler[64];
-    char code[512];
-
-    emit_GLSL_TEXM3X3SPEC_helper(ctx);
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
-                                      info->regnum);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
-
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-        "texture%s(%s, "
-            "TEXM3X3SPEC_reflection("
-                "vec3("
-                    "dot(%s.xyz, %s.xyz), "
-                    "dot(%s.xyz, %s.xyz), "
-                    "dot(%s.xyz, %s.xyz)"
-                "), "
-                "vec3(%s.w, %s.w, %s.w)"
-            ")"
-        ")",
-        ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst);
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_TEXM3X3VSPEC
-
-static void emit_GLSL_EXPP(Context *ctx)
-{
-    // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation.
-    emit_GLSL_EXP(ctx);  // I guess this is just partial precision EXP?
-} // emit_GLSL_EXPP
-
-static void emit_GLSL_LOGP(Context *ctx)
-{
-    // LOGP is just low-precision LOG, but we'll take the higher precision.
-    emit_GLSL_LOG(ctx);
-} // emit_GLSL_LOGP
-
-// common code between CMP and CND.
-static void emit_GLSL_comparison_operations(Context *ctx, const char *cmp)
-{
-    int i, j;
-    DestArgInfo *dst = &ctx->dest_arg;
-    const SourceArgInfo *srcarg0 = &ctx->source_args[0];
-    const int origmask = dst->writemask;
-    int used_swiz[4] = { 0, 0, 0, 0 };
-    const int writemask[4] = { dst->writemask0, dst->writemask1,
-                               dst->writemask2, dst->writemask3 };
-    const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y,
-                              srcarg0->swizzle_z, srcarg0->swizzle_w };
-
-    for (i = 0; i < 4; i++)
-    {
-        int mask = (1 << i);
-
-        if (!writemask[i]) continue;
-        if (used_swiz[i]) continue;
-
-        // This is a swizzle we haven't checked yet.
-        used_swiz[i] = 1;
-
-        // see if there are any other elements swizzled to match (.yyyy)
-        for (j = i + 1; j < 4; j++)
-        {
-            if (!writemask[j]) continue;
-            if (src0swiz[i] != src0swiz[j]) continue;
-            mask |= (1 << j);
-            used_swiz[j] = 1;
-        } // for
-
-        // okay, (mask) should be the writemask of swizzles we like.
-
-        //return make_GLSL_srcarg_string(ctx, idx, (1 << 0));
-
-        char src0[64];
-        char src1[64];
-        char src2[64];
-        make_GLSL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0));
-        make_GLSL_srcarg_string(ctx, 1, mask, src1, sizeof (src1));
-        make_GLSL_srcarg_string(ctx, 2, mask, src2, sizeof (src2));
-
-        set_dstarg_writemask(dst, mask);
-
-        char code[128];
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                 "((%s %s) ? %s : %s)",
-                                 src0, cmp, src1, src2);
-        output_line(ctx, "%s", code);
-    } // for
-
-    set_dstarg_writemask(dst, origmask);
-} // emit_GLSL_comparison_operations
-
-static void emit_GLSL_CND(Context *ctx)
-{
-    emit_GLSL_comparison_operations(ctx, "> 0.5");
-} // emit_GLSL_CND
-
-static void emit_GLSL_DEF(Context *ctx)
-{
-    const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
-    char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
-    char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
-    char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
-    char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
-    char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
-
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "const vec4 %s = vec4(%s, %s, %s, %s);",
-                varname, val0, val1, val2, val3);
-    pop_output(ctx);
-} // emit_GLSL_DEF
-
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME
-
-static void emit_GLSL_TEXM3X3(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char code[512];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
-
-    make_GLSL_destarg_assign(ctx, code, sizeof (code),
-        "vec4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)",
-        src0, src1, src2, src3, dst, src4);
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_TEXM3X3
-
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME
-
-static void emit_GLSL_CMP(Context *ctx)
-{
-    emit_GLSL_comparison_operations(ctx, ">= 0.0");
-} // emit_GLSL_CMP
-
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME
-
-static void emit_GLSL_DP2ADD(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_GLSL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2));
-    char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2);
-    emit_GLSL_dotprod(ctx, src0, src1, extra);
-} // emit_GLSL_DP2ADD
-
-static void emit_GLSL_DSX(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdx(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_DSX
-
-static void emit_GLSL_DSY(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdy(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_GLSL_DSY
-
-static void emit_GLSL_TEXLDD(Context *ctx)
-{
-    // !!! FIXME:
-    // GLSL 1.30 introduced textureGrad() for this, but it looks like the
-    //  functions are overloaded instead of texture2DGrad() (etc).
-
-    // GL_shader_texture_lod and GL_EXT_gpu_shader4 added texture2DGrad*(),
-    //  so we'll use them if available. Failing that, we'll just fallback
-    //  to a regular texture2D call and hope the mipmap it chooses is close
-    //  enough.
-    if (!ctx->glsl_generated_texldd_setup)
-    {
-        ctx->glsl_generated_texldd_setup = 1;
-        push_output(ctx, &ctx->preflight);
-        output_line(ctx, "#if GL_ARB_shader_texture_lod");
-        output_line(ctx, "#extension GL_ARB_shader_texture_lod : enable");
-        output_line(ctx, "#define texture2DGrad texture2DGradARB");
-        output_line(ctx, "#define texture2DProjGrad texture2DProjARB");
-        output_line(ctx, "#elif GL_EXT_gpu_shader4");
-        output_line(ctx, "#extension GL_EXT_gpu_shader4 : enable");
-        output_line(ctx, "#else");
-        output_line(ctx, "#define texture2DGrad(a,b,c,d) texture2D(a,b)");
-        output_line(ctx, "#define texture2DProjGrad(a,b,c,d) texture2DProj(a,b)");
-        output_line(ctx, "#endif");
-        output_blank_line(ctx);
-        pop_output(ctx);
-    } // if
-
-    glsl_texld(ctx, 1);
-} // emit_GLSL_TEXLDD
-
-static void emit_GLSL_SETP(Context *ctx)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-
-    // destination is always predicate register (which is type bvec4).
-    if (vecsize == 1)
-    {
-        const char *comp = get_GLSL_comparison_string_scalar(ctx);
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                 "(%s %s %s)", src0, comp, src1);
-    } // if
-    else
-    {
-        const char *comp = get_GLSL_comparison_string_vector(ctx);
-        make_GLSL_destarg_assign(ctx, code, sizeof (code),
-                                 "%s(%s, %s)", comp, src0, src1);
-    } // else
-
-    output_line(ctx, "%s", code);
-} // emit_GLSL_SETP
-
-static void emit_GLSL_TEXLDL(Context *ctx)
-{
-    // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins
-    // !!! FIXME:  from fragment shaders for some inexplicable reason.
-    // !!! FIXME:  For now, you'll just have to suffer with the potentially
-    // !!! FIXME:  wrong mipmap until I can figure something out.
-    emit_GLSL_TEXLD(ctx);
-} // emit_GLSL_TEXLDL
-
-static void emit_GLSL_BREAKP(Context *ctx)
-{
-    char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    output_line(ctx, "if (%s) { break; }", src0);
-} // emit_GLSL_BREAKP
-
-static void emit_GLSL_RESERVED(Context *ctx)
-{
-    // do nothing; fails in the state machine.
-} // emit_GLSL_RESERVED
-
-#endif  // SUPPORT_PROFILE_GLSL
-
-
-// !!! FIXME: A lot of this is cut-and-paste from the GLSL version.
+
 #if !SUPPORT_PROFILE_METAL
 #define PROFILE_EMITTER_METAL(op)
 #else
 #undef AT_LEAST_ONE_PROFILE
 #define AT_LEAST_ONE_PROFILE 1
 #define PROFILE_EMITTER_METAL(op) emit_METAL_##op,
-
-#define EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(op) \
-    static void emit_METAL_##op(Context *ctx) { \
-        fail(ctx, #op " unimplemented in Metal profile"); \
-    }
-
-static inline const char *get_METAL_register_string(Context *ctx,
-                        const RegisterType regtype, const int regnum,
-                        char *regnum_str, const size_t regnum_size)
-{
-    // turns out these are identical at the moment.
-    return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
-} // get_METAL_register_string
-
-static const char *get_METAL_uniform_type(Context *ctx, const RegisterType rtype)
-{
-    switch (rtype)
-    {
-        case REG_TYPE_CONST: return "float4";
-        case REG_TYPE_CONSTINT: return "int4";
-        case REG_TYPE_CONSTBOOL: return "bool";
-        default: fail(ctx, "BUG: used a uniform we don't know how to define.");
-    } // switch
-
-    return NULL;
-} // get_METAL_uniform_type
-
-static const char *get_METAL_varname_in_buf(Context *ctx, RegisterType rt,
-                                           int regnum, char *buf,
-                                           const size_t len)
-{
-    char regnum_str[16];
-    const char *regtype_str = get_METAL_register_string(ctx, rt, regnum,
-                                              regnum_str, sizeof (regnum_str));
-
-    // We don't separate vars with vs_ or ps_ here, because, for the most part,
-    //  there are only local vars in Metal shaders.
-    snprintf(buf, len, "%s%s", regtype_str, regnum_str);
-    return buf;
-} // get_METAL_varname_in_buf
-
-
-static const char *get_METAL_varname(Context *ctx, RegisterType rt, int regnum)
-{
-    char buf[64];
-    get_METAL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
-    return StrDup(ctx, buf);
-} // get_METAL_varname
-
-
-static inline const char *get_METAL_const_array_varname_in_buf(Context *ctx,
-                                                const int base, const int size,
-                                                char *buf, const size_t buflen)
-{
-    snprintf(buf, buflen, "const_array_%d_%d", base, size);
-    return buf;
-} // get_METAL_const_array_varname_in_buf
-
-static const char *get_METAL_const_array_varname(Context *ctx, int base, int size)
-{
-    char buf[64];
-    get_METAL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
-    return StrDup(ctx, buf);
-} // get_METAL_const_array_varname
-
-
-static inline const char *get_METAL_input_array_varname(Context *ctx,
-                                                char *buf, const size_t buflen)
-{
-    snprintf(buf, buflen, "%s", "vertex_input_array");
-    return buf;
-} // get_METAL_input_array_varname
-
-
-static const char *get_METAL_uniform_array_varname(Context *ctx,
-                                                  const RegisterType regtype,
-                                                  char *buf, const size_t len)
-{
-    const char *shadertype = ctx->shader_type_str;
-    const char *type = get_METAL_uniform_type(ctx, regtype);
-    snprintf(buf, len, "uniforms.uniforms_%s", type);
-    return buf;
-} // get_METAL_uniform_array_varname
-
-static const char *get_METAL_destarg_varname(Context *ctx, char *buf, size_t len)
-{
-    const DestArgInfo *arg = &ctx->dest_arg;
-    return get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
-} // get_METAL_destarg_varname
-
-static const char *get_METAL_srcarg_varname(Context *ctx, const size_t idx,
-                                           char *buf, size_t len)
-{
-    if (idx >= STATICARRAYLEN(ctx->source_args))
-    {
-        fail(ctx, "Too many source args");
-        *buf = '\0';
-        return buf;
-    } // if
-
-    const SourceArgInfo *arg = &ctx->source_args[idx];
-    return get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
-} // get_METAL_srcarg_varname
-
-
-static const char *make_METAL_destarg_assign(Context *, char *, const size_t,
-                                            const char *, ...) ISPRINTF(4,5);
-
-static const char *make_METAL_destarg_assign(Context *ctx, char *buf,
-                                            const size_t buflen,
-                                            const char *fmt, ...)
-{
-    int need_parens = 0;
-    const DestArgInfo *arg = &ctx->dest_arg;
-
-    if (arg->writemask == 0)
-    {
-        *buf = '\0';
-        return buf;  // no writemask? It's a no-op.
-    } // if
-
-    char clampbuf[32] = { '\0' };
-    const char *clampleft = "";
-    const char *clampright = "";
-    if (arg->result_mod & MOD_SATURATE)
-    {
-        ctx->metal_need_header_common = 1;
-        const int vecsize = vecsize_from_writemask(arg->writemask);
-        clampleft = "clamp(";
-        if (vecsize == 1)
-            clampright = ", 0.0, 1.0)";
-        else
-        {
-            snprintf(clampbuf, sizeof (clampbuf),
-                     ", float%d(0.0), float%d(1.0))", vecsize, vecsize);
-            clampright = clampbuf;
-        } // else
-    } // if
-
-    // MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
-
-    // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
-    assert((arg->result_mod & MOD_CENTROID) == 0);
-
-    if (ctx->predicated)
-    {
-        fail(ctx, "predicated destinations unsupported");  // !!! FIXME
-        *buf = '\0';
-        return buf;
-    } // if
-
-    char operation[256];
-    va_list ap;
-    va_start(ap, fmt);
-    const int len = vsnprintf(operation, sizeof (operation), fmt, ap);
-    va_end(ap);
-    if (len >= sizeof (operation))
-    {
-        fail(ctx, "operation string too large");  // I'm lazy.  :P
-        *buf = '\0';
-        return buf;
-    } // if
-
-    const char *result_shift_str = "";
-    switch (arg->result_shift)
-    {
-        case 0x1: result_shift_str = " * 2.0"; break;
-        case 0x2: result_shift_str = " * 4.0"; break;
-        case 0x3: result_shift_str = " * 8.0"; break;
-        case 0xD: result_shift_str = " / 8.0"; break;
-        case 0xE: result_shift_str = " / 4.0"; break;
-        case 0xF: result_shift_str = " / 2.0"; break;
-    } // switch
-    need_parens |= (result_shift_str[0] != '\0');
-
-    char regnum_str[16];
-    const char *regtype_str = get_METAL_register_string(ctx, arg->regtype,
-                                                       arg->regnum, regnum_str,
-                                                       sizeof (regnum_str));
-    char writemask_str[6];
-    size_t i = 0;
-    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
-    if (!scalar && !writemask_xyzw(arg->writemask))
-    {
-        writemask_str[i++] = '.';
-        if (arg->writemask0) writemask_str[i++] = 'x';
-        if (arg->writemask1) writemask_str[i++] = 'y';
-        if (arg->writemask2) writemask_str[i++] = 'z';
-        if (arg->writemask3) writemask_str[i++] = 'w';
-    } // if
-    writemask_str[i] = '\0';
-    assert(i < sizeof (writemask_str));
-
-    const char *leftparen = (need_parens) ? "(" : "";
-    const char *rightparen = (need_parens) ? ")" : "";
-
-    snprintf(buf, buflen, "%s%s%s = %s%s%s%s%s%s;",
-             regtype_str, regnum_str, writemask_str,
-             clampleft, leftparen, operation, rightparen, result_shift_str,
-             clampright);
-    // !!! FIXME: make sure the scratch buffer was large enough.
-    return buf;
-} // make_METAL_destarg_assign
-
-
-static char *make_METAL_swizzle_string(char *swiz_str, const size_t strsize,
-                                      const int swizzle, const int writemask)
-{
-    size_t i = 0;
-    if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) )
-    {
-        const int writemask0 = (writemask >> 0) & 0x1;
-        const int writemask1 = (writemask >> 1) & 0x1;
-        const int writemask2 = (writemask >> 2) & 0x1;
-        const int writemask3 = (writemask >> 3) & 0x1;
-
-        const int swizzle_x = (swizzle >> 0) & 0x3;
-        const int swizzle_y = (swizzle >> 2) & 0x3;
-        const int swizzle_z = (swizzle >> 4) & 0x3;
-        const int swizzle_w = (swizzle >> 6) & 0x3;
-
-        swiz_str[i++] = '.';
-        if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x];
-        if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y];
-        if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z];
-        if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w];
-    } // if
-    assert(i < strsize);
-    swiz_str[i] = '\0';
-    return swiz_str;
-} // make_METAL_swizzle_string
-
-
-static const char *make_METAL_srcarg_string(Context *ctx, const size_t idx,
-                                           const int writemask, char *buf,
-                                           const size_t buflen)
-{
-    *buf = '\0';
-
-    if (idx >= STATICARRAYLEN(ctx->source_args))
-    {
-        fail(ctx, "Too many source args");
-        return buf;
-    } // if
-
-    const SourceArgInfo *arg = &ctx->source_args[idx];
-
-    const char *premod_str = "";
-    const char *postmod_str = "";
-    switch (arg->src_mod)
-    {
-        case SRCMOD_NEGATE:
-            premod_str = "-";
-            break;
-
-        case SRCMOD_BIASNEGATE:
-            premod_str = "-(";
-            postmod_str = " - 0.5)";
-            break;
-
-        case SRCMOD_BIAS:
-            premod_str = "(";
-            postmod_str = " - 0.5)";
-            break;
-
-        case SRCMOD_SIGNNEGATE:
-            premod_str = "-((";
-            postmod_str = " - 0.5) * 2.0)";
-            break;
-
-        case SRCMOD_SIGN:
-            premod_str = "((";
-            postmod_str = " - 0.5) * 2.0)";
-            break;
-
-        case SRCMOD_COMPLEMENT:
-            premod_str = "(1.0 - ";
-            postmod_str = ")";
-            break;
-
-        case SRCMOD_X2NEGATE:
-            premod_str = "-(";
-            postmod_str = " * 2.0)";
-            break;
-
-        case SRCMOD_X2:
-            premod_str = "(";
-            postmod_str = " * 2.0)";
-            break;
-
-        case SRCMOD_DZ:
-            fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME
-            postmod_str = "_dz";
-            break;
-
-        case SRCMOD_DW:
-            fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME
-            postmod_str = "_dw";
-            break;
-
-        case SRCMOD_ABSNEGATE:
-            ctx->metal_need_header_math = 1;
-            premod_str = "-abs(";
-            postmod_str = ")";
-            break;
-
-        case SRCMOD_ABS:
-            ctx->metal_need_header_math = 1;
-            premod_str = "abs(";
-            postmod_str = ")";
-            break;
-
-        case SRCMOD_NOT:
-            premod_str = "!";
-            break;
-
-        case SRCMOD_NONE:
-        case SRCMOD_TOTAL:
-             break;  // stop compiler whining.
-    } // switch
-
-    const char *regtype_str = NULL;
-
-    if (!arg->relative)
-    {
-        regtype_str = get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum,
-                                              (char *) alloca(64), 64);
-    } // if
-
-    const char *rel_lbracket = "";
-    char rel_offset[32] = { '\0' };
-    const char *rel_rbracket = "";
-    char rel_swizzle[4] = { '\0' };
-    const char *rel_regtype_str = "";
-    if (arg->relative)
-    {
-        if (arg->regtype == REG_TYPE_INPUT)
-            regtype_str=get_METAL_input_array_varname(ctx,(char*)alloca(64),64);
-        else
-        {
-            assert(arg->regtype == REG_TYPE_CONST);
-            const int arrayidx = arg->relative_array->index;
-            const int offset = arg->regnum - arrayidx;
-            assert(offset >= 0);
-            if (arg->relative_array->constant)
-            {
-                const int arraysize = arg->relative_array->count;
-                regtype_str = get_METAL_const_array_varname_in_buf(ctx,
-                                arrayidx, arraysize, (char *) alloca(64), 64);
-                if (offset != 0)
-                    snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset);
-            } // if
-            else
-            {
-                regtype_str = get_METAL_uniform_array_varname(ctx, arg->regtype,
-                                                      (char *) alloca(64), 64);
-                if (offset == 0)
-                {
-                    snprintf(rel_offset, sizeof (rel_offset),
-                             "ARRAYBASE_%d + ", arrayidx);
-                } // if
-                else
-                {
-                    snprintf(rel_offset, sizeof (rel_offset),
-                             "(ARRAYBASE_%d + %d) + ", arrayidx, offset);
-                } // else
-            } // else
-        } // else
-
-        rel_lbracket = "[";
-
-        rel_regtype_str = get_METAL_varname_in_buf(ctx, arg->relative_regtype,
-                                                  arg->relative_regnum,
-                                                  (char *) alloca(64), 64);
-        rel_swizzle[0] = '.';
-        rel_swizzle[1] = swizzle_channels[arg->relative_component];
-        rel_swizzle[2] = '\0';
-        rel_rbracket = "]";
-    } // if
-
-    char swiz_str[6] = { '\0' };
-    if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum))
-    {
-        make_METAL_swizzle_string(swiz_str, sizeof (swiz_str),
-                                 arg->swizzle, writemask);
-    } // if
-
-    if (regtype_str == NULL)
-    {
-        fail(ctx, "Unknown source register type.");
-        return buf;
-    } // if
-
-    snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s",
-             premod_str, regtype_str, rel_lbracket, rel_offset,
-             rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str,
-             postmod_str);
-    // !!! FIXME: make sure the scratch buffer was large enough.
-    return buf;
-} // make_METAL_srcarg_string
-
-// generate some convenience functions.
-#define MAKE_METAL_SRCARG_STRING_(mask, bitmask) \
-    static inline const char *make_METAL_srcarg_string_##mask(Context *ctx, \
-                                                const size_t idx, char *buf, \
-                                                const size_t buflen) { \
-        return make_METAL_srcarg_string(ctx, idx, bitmask, buf, buflen); \
-    }
-MAKE_METAL_SRCARG_STRING_(x, (1 << 0))
-MAKE_METAL_SRCARG_STRING_(y, (1 << 1))
-MAKE_METAL_SRCARG_STRING_(z, (1 << 2))
-MAKE_METAL_SRCARG_STRING_(w, (1 << 3))
-MAKE_METAL_SRCARG_STRING_(scalar, (1 << 0))
-MAKE_METAL_SRCARG_STRING_(full, 0xF)
-MAKE_METAL_SRCARG_STRING_(masked, ctx->dest_arg.writemask)
-MAKE_METAL_SRCARG_STRING_(vec3, 0x7)
-MAKE_METAL_SRCARG_STRING_(vec2, 0x3)
-#undef MAKE_METAL_SRCARG_STRING_
-
-// special cases for comparison opcodes...
-
-static const char *get_METAL_comparison_string_scalar(Context *ctx)
-{
-    static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" };
-    if (ctx->instruction_controls >= STATICARRAYLEN(comps))
-    {
-        fail(ctx, "unknown comparison control");
-        return "";
-    } // if
-
-    return comps[ctx->instruction_controls];
-} // get_METAL_comparison_string_scalar
-
-static const char *get_METAL_comparison_string_vector(Context *ctx)
-{
-    return get_METAL_comparison_string_scalar(ctx);  // standard C operators work for vectors in Metal.
-} // get_METAL_comparison_string_vector
-
-
-static void emit_METAL_start(Context *ctx, const char *profilestr)
-{
-    if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx))
-    {
-        failf(ctx, "Shader type %u unsupported in this profile.",
-              (uint) ctx->shader_type);
-        return;
-    } // if
-
-    if (!ctx->mainfn)
-    {
-        if (shader_is_vertex(ctx))
-            ctx->mainfn = StrDup(ctx, "VertexShader");
-        else if (shader_is_pixel(ctx))
-            ctx->mainfn = StrDup(ctx, "FragmentShader");
-    } // if
-
-    set_output(ctx, &ctx->mainline);
-    ctx->indent++;
-} // emit_METAL_start
-
-static void emit_METAL_RET(Context *ctx);
-static void emit_METAL_end(Context *ctx)
-{
-    // !!! FIXME: maybe handle this at a higher level?
-    // ps_1_* writes color to r0 instead oC0. We move it to the right place.
-    // We don't have to worry about a RET opcode messing this up, since
-        //  RET isn't available before ps_2_0.
-    if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
-    {
-        set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
-        output_line(ctx, "oC0 = r0;");
-    } // if
-
-    // !!! FIXME: maybe handle this at a higher level?
-    // force a RET opcode if we're at the end of the stream without one.
-    if (ctx->previous_opcode != OPCODE_RET)
-        emit_METAL_RET(ctx);
-} // emit_METAL_end
-
-static void emit_METAL_phase(Context *ctx)
-{
-    // no-op in Metal.
-} // emit_METAL_phase
-
-static void emit_METAL_finalize(Context *ctx)
-{
-    // If we had a relative addressing of REG_TYPE_INPUT, we need to build
-    //  an array for it at the start of main(). GLSL doesn't let you specify
-    //  arrays of attributes.
-    //float4 blah_array[BIGGEST_ARRAY];
-    if (ctx->have_relative_input_registers) // !!! FIXME
-        fail(ctx, "Relative addressing of input registers not supported.");
-
-    // Insert header includes we need...
-    push_output(ctx, &ctx->preflight);
-    #define INC_METAL_HEADER(name) \
-        if (ctx->metal_need_header_##name) { \
-            output_line(ctx, "#include <metal_" #name ">"); \
-        }
-    INC_METAL_HEADER(common);
-    INC_METAL_HEADER(math);
-    INC_METAL_HEADER(relational);
-    INC_METAL_HEADER(geometric);
-    INC_METAL_HEADER(graphics);
-    INC_METAL_HEADER(texture);
-    #undef INC_METAL_HEADER
-    output_blank_line(ctx);
-    output_line(ctx, "using namespace metal;");
-    output_blank_line(ctx);
-    pop_output(ctx);
-
-    // Fill in the shader's mainline function signature.
-    push_output(ctx, &ctx->mainline_intro);
-    output_line(ctx, "%s %s%s %s (",
-                shader_is_vertex(ctx) ? "vertex" : "fragment",
-                ctx->outputs ? ctx->mainfn : "void",
-                ctx->outputs ? "_Output" : "", ctx->mainfn);
-    pop_output(ctx);
-
-    push_output(ctx, &ctx->mainline_arguments);
-    ctx->indent++;
-
-    const int uniform_count = ctx->uniform_float4_count + ctx->uniform_int4_count + ctx->uniform_bool_count;
-    int commas = 0;
-    if (uniform_count) commas++;
-    if (ctx->inputs) commas++;
-    if (commas) commas--;
-
-    if (uniform_count > 0)
-    {
-        push_output(ctx, &ctx->globals);
-        output_line(ctx, "struct %s_Uniforms", ctx->mainfn);
-        output_line(ctx, "{");
-        ctx->indent++;
-        if (ctx->uniform_float4_count > 0)
-            output_line(ctx, "float4 uniforms_float4[%d];", ctx->uniform_float4_count);
-        if (ctx->uniform_int4_count > 0)
-            output_line(ctx, "int4 uniforms_int4[%d];", ctx->uniform_int4_count);
-        if (ctx->uniform_bool_count > 0)
-            output_line(ctx, "bool uniforms_bool[%d];", ctx->uniform_bool_count);
-        ctx->indent--;
-        output_line(ctx, "};");
-        pop_output(ctx);
-
-        output_line(ctx, "constant %s_Uniforms &uniforms [[buffer(16)]]%s", ctx->mainfn, commas ? "," : "");
-        commas--;
-    } // if
-
-    if (ctx->inputs)
-    {
-        output_line(ctx, "%s_Input input [[stage_in]]%s", ctx->mainfn, commas ? "," : "");
-        commas--;
-    } // if
-
-    ctx->indent--;
-    output_line(ctx, ") {");
-    if (ctx->outputs)
-    {
-        ctx->indent++;
-        output_line(ctx, "%s_Output output;", ctx->mainfn);
-
-        push_output(ctx, &ctx->mainline);
-        ctx->indent++;
-        output_line(ctx, "return output;");
-        pop_output(ctx);
-    } // if
-    pop_output(ctx);
-
-    if (ctx->inputs)
-    {
-        push_output(ctx, &ctx->inputs);
-        output_line(ctx, "};");
-        output_blank_line(ctx);
-        pop_output(ctx);
-    } // if
-
-    if (ctx->outputs)
-    {
-        push_output(ctx, &ctx->outputs);
-        output_line(ctx, "};");
-        output_blank_line(ctx);
-        pop_output(ctx);
-    } // if
-
-    // throw some blank lines around to make source more readable.
-    if (ctx->globals)  // don't add a blank line if the section is empty.
-    {
-        push_output(ctx, &ctx->globals);
-        output_blank_line(ctx);
-        pop_output(ctx);
-    } // if
-} // emit_METAL_finalize
-
-static void emit_METAL_global(Context *ctx, RegisterType regtype, int regnum)
-{
-    char varname[64];
-    get_METAL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
-
-    // These aren't actually global in metal, set them up at top of mainline.
-    push_output(ctx, &ctx->mainline_top);
-    ctx->indent++;
-
-    switch (regtype)
-    {
-        case REG_TYPE_ADDRESS:
-            if (shader_is_vertex(ctx))
-                output_line(ctx, "int4 %s;", varname);
-            else if (shader_is_pixel(ctx))  // actually REG_TYPE_TEXTURE.
-            {
-                // We have to map texture registers to temps for ps_1_1, since
-                //  they work like temps, initialize with tex coords, and the
-                //  ps_1_1 TEX opcode expects to overwrite it.
-                if (!shader_version_atleast(ctx, 1, 4))
-                    output_line(ctx, "float4 %s = input.%s;",varname,varname);
-            } // else if
-            break;
-        case REG_TYPE_PREDICATE:
-            output_line(ctx, "bool4 %s;", varname);
-            break;
-        case REG_TYPE_TEMP:
-            output_line(ctx, "float4 %s;", varname);
-            break;
-        case REG_TYPE_LOOP:
-            break; // no-op. We declare these in for loops at the moment.
-        case REG_TYPE_LABEL:
-            break; // no-op. If we see it here, it means we optimized it out.
-        default:
-            fail(ctx, "BUG: we used a register we don't know how to define.");
-            break;
-    } // switch
-
-    pop_output(ctx);
-} // emit_METAL_global
-
-static void emit_METAL_array(Context *ctx, VariableList *var)
-{
-    // All uniforms (except constant arrays, which are literally constant
-    //  data embedded in Metal shaders) are now packed into a single array,
-    //  so we can batch the uniform transfers. So this doesn't actually
-    //  define an array here; the one, big array is emitted during
-    //  finalization instead.
-    // However, we need to #define the offset into the one, big array here,
-    //  and let dereferences use that #define.
-    const int base = var->index;
-    const int metalbase = ctx->uniform_float4_count;
-    push_output(ctx, &ctx->mainline_top);
-    ctx->indent++;
-    output_line(ctx, "const int ARRAYBASE_%d = %d;", base, metalbase);
-    pop_output(ctx);
-    var->emit_position = metalbase;
-} // emit_METAL_array
-
-static void emit_METAL_const_array(Context *ctx, const ConstantsList *clist,
-                                   int base, int size)
-{
-    char varname[64];
-    get_METAL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname));
-
-    const char *cstr = NULL;
-    push_output(ctx, &ctx->mainline_top);
-    ctx->indent++;
-    output_line(ctx, "const float4 %s[%d] = {", varname, size);
-    ctx->indent++;
-
-    int i;
-    for (i = 0; i < size; i++)
-    {
-        while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
-            clist = clist->next;
-        assert(clist->constant.index == (base + i));
-
-        char val0[32];
-        char val1[32];
-        char val2[32];
-        char val3[32];
-        floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
-        floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
-        floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
-        floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
-
-        output_line(ctx, "float4(%s, %s, %s, %s)%s", val0, val1, val2, val3,
-                        (i < (size-1)) ? "," : "");
-
-        clist = clist->next;
-    } // for
-
-    ctx->indent--;
-    output_line(ctx, "};");
-    output_line(ctx, "(void) %s[0];", varname);  // stop compiler warnings.
-    pop_output(ctx);
-} // emit_METAL_const_array
-
-static void emit_METAL_uniform(Context *ctx, RegisterType regtype, int regnum,
-                              const VariableList *var)
-{
-    // Now that we're pushing all the uniforms as one struct, pack these
-    //  down, so if we only use register c439, it'll actually map to
-    //  uniforms.uniforms_float4[0]. As we push one big struct, this will
-    //  prevent uploading unused data.
-
-    const char *utype = get_METAL_uniform_type(ctx, regtype);
-    char varname[64];
-    char name[64];
-    int index = 0;
-
-    get_METAL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
-
-    push_output(ctx, &ctx->mainline_top);
-    ctx->indent++;
-
-    if (var == NULL)
-    {
-        get_METAL_uniform_array_varname(ctx, regtype, name, sizeof (name));
-
-        if (regtype == REG_TYPE_CONST)
-            index = ctx->uniform_float4_count;
-        else if (regtype == REG_TYPE_CONSTINT)
-            index = ctx->uniform_int4_count;
-        else if (regtype == REG_TYPE_CONSTBOOL)
-            index = ctx->uniform_bool_count;
-        else  // get_METAL_uniform_array_varname() would have called fail().
-            assert(isfail(ctx));
-
-        // !!! FIXME: can cause unused var warnings in Clang...
-        //output_line(ctx, "constant %s &%s = %s[%d];", utype, varname, name, index);
-        output_line(ctx, "#define %s %s[%d]", varname, name, index);
-        push_output(ctx, &ctx->mainline);
-        ctx->indent++;
-        output_line(ctx, "#undef %s", varname);  // !!! FIXME: gross.
-        pop_output(ctx);
-    } // if
-
-    else
-    {
-        const int arraybase = var->index;
-        if (var->constant)
-        {
-            get_METAL_const_array_varname_in_buf(ctx, arraybase, var->count,
-                                                name, sizeof (name));
-            index = (regnum - arraybase);
-        } // if
-        else
-        {
-            assert(var->emit_position != -1);
-            get_METAL_uniform_array_varname(ctx, regtype, name, sizeof (name));
-            index = (regnum - arraybase) + var->emit_position;
-        } // else
-
-        // !!! FIXME: might trigger unused var warnings in Clang.
-        //output_line(ctx, "constant %s &%s = %s[%d];", utype, varname, name, index);
-        output_line(ctx, "#define %s %s[%d];", varname, name, index);
-        push_output(ctx, &ctx->mainline);
-        ctx->indent++;
-        output_line(ctx, "#undef %s", varname);  // !!! FIXME: gross.
-        pop_output(ctx);
-    } // else
-
-    pop_output(ctx);
-} // emit_METAL_uniform
-
-static void emit_METAL_sampler(Context *ctx,int stage,TextureType ttype,int tb)
-{
-    char var[64];
-    const char *texsuffix = NULL;
-    switch (ttype)
-    {
-        case TEXTURE_TYPE_2D: texsuffix = "2d"; break;
-        case TEXTURE_TYPE_CUBE: texsuffix = "cube"; break;
-        case TEXTURE_TYPE_VOLUME: texsuffix = "3d"; break;
-        default: assert(!"unexpected texture type"); return;
-    } // switch
-
-    get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var));
-
-    push_output(ctx, &ctx->mainline_arguments);
-    ctx->indent++;
-    output_line(ctx, "texture%s<float> %s_texture [[texture(%d)]],",
-                texsuffix, var, stage);
-    output_line(ctx, "sampler %s [[sampler(%d)]],", var, stage);
-    pop_output(ctx);
-
-    if (tb)  // This sampler used a ps_1_1 TEXBEM opcode?
-    {
-        push_output(ctx, &ctx->mainline_top);
-        ctx->indent++;
-        char name[64];
-        const int index = ctx->uniform_float4_count;
-        ctx->uniform_float4_count += 2;
-        get_METAL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name));
-        output_line(ctx, "constant float4 &%s_texbem = %s[%d];", var, name, index);
-        output_line(ctx, "constant float4 &%s_texbeml = %s[%d];", var, name, index+1);
-        pop_output(ctx);
-    } // if
-} // emit_METAL_sampler
-
-static void emit_METAL_attribute(Context *ctx, RegisterType regtype, int regnum,
-                                MOJOSHADER_usage usage, int index, int wmask,
-                                int flags)
-{
-    // !!! FIXME: this function doesn't deal with write masks at all yet!
-    const char *usage_str = NULL;
-    char index_str[16] = { '\0' };
-    char var[64];
-
-    get_METAL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var));
-
-    //assert((flags & MOD_PP) == 0);  // !!! FIXME: is PP allowed?
-
-    if (index != 0)  // !!! FIXME: a lot of these MUST be zero.
-        snprintf(index_str, sizeof (index_str), "%u", (uint) index);
-
-    if (shader_is_vertex(ctx))
-    {
-        // pre-vs3 output registers.
-        // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
-        //  output registers.
-        if (!shader_version_atleast(ctx, 3, 0))
-        {
-            if (regtype == REG_TYPE_RASTOUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                index = regnum;
-                switch ((const RastOutType) regnum)
-                {
-                    case RASTOUT_TYPE_POSITION:
-                        usage = MOJOSHADER_USAGE_POSITION;
-                        break;
-                    case RASTOUT_TYPE_FOG:
-                        usage = MOJOSHADER_USAGE_FOG;
-                        break;
-                    case RASTOUT_TYPE_POINT_SIZE:
-                        usage = MOJOSHADER_USAGE_POINTSIZE;
-                        break;
-                } // switch
-            } // if
-
-            else if (regtype == REG_TYPE_ATTROUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                usage = MOJOSHADER_USAGE_COLOR;
-                index = regnum;
-            } // else if
-
-            else if (regtype == REG_TYPE_TEXCRDOUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                usage = MOJOSHADER_USAGE_TEXCOORD;
-                index = regnum;
-            } // else if
-        } // if
-
-        if (regtype == REG_TYPE_INPUT)
-        {
-            push_output(ctx, &ctx->inputs);
-            if (buffer_size(ctx->inputs) == 0)
-            {
-                output_line(ctx, "struct %s_Input", ctx->mainfn);
-                output_line(ctx, "{");
-            } // if
-
-            ctx->indent++;
-            output_line(ctx, "float4 %s [[attribute(%d)]];", var, regnum);
-            pop_output(ctx);
-
-            push_output(ctx, &ctx->mainline_top);
-            ctx->indent++;
-            // !!! FIXME: might trigger unused var warnings in Clang.
-            //output_line(ctx, "constant float4 &%s = input.%s;", var, var);
-            output_line(ctx, "#define %s input.%s", var, var);
-            pop_output(ctx);
-            push_output(ctx, &ctx->mainline);
-            ctx->indent++;
-            output_line(ctx, "#undef %s", var);  // !!! FIXME: gross.
-            pop_output(ctx);
-        } // if
-
-        else if (regtype == REG_TYPE_OUTPUT)
-        {
-            push_output(ctx, &ctx->outputs);
-            if (buffer_size(ctx->outputs) == 0)
-            {
-                output_line(ctx, "struct %s_Output", ctx->mainfn);
-                output_line(ctx, "{");
-            } // if
-
-            ctx->indent++;
-
-            switch (usage)
-            {
-                case MOJOSHADER_USAGE_POSITION:
-                    output_line(ctx, "float4 %s [[position]];", var);
-                    break;
-                case MOJOSHADER_USAGE_POINTSIZE:
-                    output_line(ctx, "float4 %s [[point_size]];", var);
-                    break;
-                case MOJOSHADER_USAGE_COLOR:
-                    output_line(ctx, "float4 %s [[user(color%d)]];", var, index);
-                    break;
-                case MOJOSHADER_USAGE_FOG:
-                    output_line(ctx, "float4 %s [[user(fog)]];", var);
-                    break;
-                case MOJOSHADER_USAGE_TEXCOORD:
-                    output_line(ctx, "float4 %s [[user(texcoord%d)]];", var, index);
-                    break;
-                default:
-                    // !!! FIXME: we need to deal with some more built-in varyings here.
-                    break;
-            } // switch
-
-            pop_output(ctx);
-
-            push_output(ctx, &ctx->mainline_top);
-            ctx->indent++;
-            // !!! FIXME: this doesn't work.
-            //output_line(ctx, "float4 &%s = output.%s;", var, var);
-            output_line(ctx, "#define %s output.%s", var, var);
-            pop_output(ctx);
-            push_output(ctx, &ctx->mainline);
-            ctx->indent++;
-            output_line(ctx, "#undef %s", var);  // !!! FIXME: gross.
-            pop_output(ctx);
-        } // else if
-
-        else
-        {
-            fail(ctx, "unknown vertex shader attribute register");
-        } // else
-    } // if
-
-    else if (shader_is_pixel(ctx))
-    {
-        // samplers DCLs get handled in emit_METAL_sampler().
-
-        if (flags & MOD_CENTROID)  // !!! FIXME
-        {
-            failf(ctx, "centroid unsupported in %s profile", ctx->profile->name);
-            return;
-        } // if
-
-        if ((regtype == REG_TYPE_COLOROUT) || (regtype == REG_TYPE_DEPTHOUT))
-        {
-            push_output(ctx, &ctx->outputs);
-            if (buffer_size(ctx->outputs) == 0)
-            {
-                output_line(ctx, "struct %s_Output", ctx->mainfn);
-                output_line(ctx, "{");
-            } // if
-            ctx->indent++;
-
-            if (regtype == REG_TYPE_COLOROUT)
-                output_line(ctx, "float4 %s [[color(%d)]];", var, regnum);
-            else if (regtype == REG_TYPE_DEPTHOUT)
-                output_line(ctx, "float %s [[depth(any)]];", var);
-
-            pop_output(ctx);
-
-            push_output(ctx, &ctx->mainline_top);
-            ctx->indent++;
-            // !!! FIXME: this doesn't work.
-            //output_line(ctx, "float%s &%s = output.%s;", (regtype == REG_TYPE_DEPTHOUT) ? "" : "4", var, var);
-            output_line(ctx, "#define %s output.%s", var, var);
-            pop_output(ctx);
-            push_output(ctx, &ctx->mainline);
-            ctx->indent++;
-            output_line(ctx, "#undef %s", var);  // !!! FIXME: gross.
-            pop_output(ctx);
-        } // if
-
-        // !!! FIXME: can you actualy have a texture register with COLOR usage?
-        else if ((regtype == REG_TYPE_TEXTURE) ||
-                 (regtype == REG_TYPE_INPUT) ||
-                 (regtype == REG_TYPE_MISCTYPE))
-        {
-            int skipreference = 0;
-            push_output(ctx, &ctx->inputs);
-            if (buffer_size(ctx->inputs) == 0)
-            {
-                output_line(ctx, "struct %s_Input", ctx->mainfn);
-                output_line(ctx, "{");
-            } // if
-            ctx->indent++;
-
-            if (regtype == REG_TYPE_MISCTYPE)
-            {
-                const MiscTypeType mt = (MiscTypeType) regnum;
-                if (mt == MISCTYPE_TYPE_FACE)
-                    output_line(ctx, "bool %s [[front_facing]];", var);
-                else if (mt == MISCTYPE_TYPE_POSITION)
-                    output_line(ctx, "float4 %s [[position]];", var);
-                else
-                    fail(ctx, "BUG: unhandled misc register");
-            } // else if
-
-            else
-            {
-                if (usage == MOJOSHADER_USAGE_TEXCOORD)
-                {
-                    // ps_1_1 does a different hack for this attribute.
-                    //  Refer to emit_METAL_global()'s REG_TYPE_ADDRESS code.
-                    if (!shader_version_atleast(ctx, 1, 4))
-                        skipreference = 1;
-                    output_line(ctx, "float4 %s [[user(texcoord%d)]];", var, index);
-                } // if
-
-                else if (usage == MOJOSHADER_USAGE_COLOR)
-                    output_line(ctx, "float4 %s [[user(color%d)]];", var, index);
-
-                else if (usage == MOJOSHADER_USAGE_FOG)
-                    output_line(ctx, "float4 %s [[user(fog)]];", var);
-            } // else
-
-            pop_output(ctx);
-
-            // !!! FIXME: can cause unused var warnings in Clang...
-            #if 0
-            push_output(ctx, &ctx->mainline_top);
-            ctx->indent++;
-            if ((regtype == REG_TYPE_MISCTYPE)&&(regnum == MISCTYPE_TYPE_FACE))
-                output_line(ctx, "constant bool &%s = input.%s;", var, var);
-            else if (!skipreference)
-                output_line(ctx, "constant float4 &%s = input.%s;", var, var);
-            pop_output(ctx);
-            #endif
-
-            if (!skipreference)
-            {
-                push_output(ctx, &ctx->mainline_top);
-                ctx->indent++;
-                output_line(ctx, "#define %s input.%s", var, var);
-                pop_output(ctx);
-                push_output(ctx, &ctx->mainline);
-                ctx->indent++;
-                output_line(ctx, "#undef %s", var);  // !!! FIXME: gross.
-                pop_output(ctx);
-            } // if
-        } // else if
-
-        else
-        {
-            fail(ctx, "unknown pixel shader attribute register");
-        } // else
-    } // else if
-
-    else
-    {
-        fail(ctx, "Unknown shader type");  // state machine should catch this.
-    } // else
-} // emit_METAL_attribute
-
-static void emit_METAL_NOP(Context *ctx)
-{
-    // no-op is a no-op.  :)
-} // emit_METAL_NOP
-
-static void emit_METAL_MOV(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "%s", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_MOV
-
-static void emit_METAL_ADD(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_ADD
-
-static void emit_METAL_SUB(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_SUB
-
-static void emit_METAL_MAD(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_METAL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
-    char code[128];
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2);
-    output_line(ctx, "%s", code);
-} // emit_METAL_MAD
-
-static void emit_METAL_MUL(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_MUL
-
-static void emit_METAL_RCP(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_RCP
-
-static void emit_METAL_RSQ(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_math = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "rsqrt(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_RSQ
-
-static void emit_METAL_dotprod(Context *ctx, const char *src0, const char *src1,
-                              const char *extra)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char castleft[16] = { '\0' };
-    const char *castright = "";
-    if (vecsize != 1)
-    {
-        snprintf(castleft, sizeof (castleft), "float%d(", vecsize);
-        castright = ")";
-    } // if
-
-    char code[128];
-    ctx->metal_need_header_geometric = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s",
-                             castleft, src0, src1, extra, castright);
-    output_line(ctx, "%s", code);
-} // emit_METAL_dotprod
-
-static void emit_METAL_DP3(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
-    emit_METAL_dotprod(ctx, src0, src1, "");
-} // emit_METAL_DP3
-
-static void emit_METAL_DP4(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_full(ctx, 1, src1, sizeof (src1));
-    emit_METAL_dotprod(ctx, src0, src1, "");
-} // emit_METAL_DP4
-
-static void emit_METAL_MIN(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    ctx->metal_need_header_math = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_MIN
-
-static void emit_METAL_MAX(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    ctx->metal_need_header_math = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_MAX
-
-static void emit_METAL_SLT(Context *ctx)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-
-    // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants.
-    if (vecsize == 1)
-        make_METAL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1);
-    else
-    {
-        make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                  "float%d(%s < %s)", vecsize, src0, src1);
-    } // else
-    output_line(ctx, "%s", code);
-} // emit_METAL_SLT
-
-static void emit_METAL_SGE(Context *ctx)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-
-    // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants.
-    if (vecsize == 1)
-    {
-        make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                 "float(%s >= %s)", src0, src1);
-    } // if
-    else
-    {
-        make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                  "float%d(%s >= %s)", vecsize, src0, src1);
-    } // else
-    output_line(ctx, "%s", code);
-} // emit_METAL_SGE
-
-static void emit_METAL_EXP(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_math = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_EXP
-
-static void emit_METAL_LOG(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_math = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_LOG
-
-static void emit_METAL_LIT_helper(Context *ctx)
-{
-    const char *maxp = "127.9961"; // value from the dx9 reference.
-
-    if (ctx->glsl_generated_lit_helper)
-        return;
-
-    ctx->glsl_generated_lit_helper = 1;
-    ctx->metal_need_header_common = 1;
-    ctx->metal_need_header_math = 1;
-
-    push_output(ctx, &ctx->helpers);
-    output_line(ctx, "static float4 LIT(const float4 src)");
-    output_line(ctx, "{"); ctx->indent++;
-    output_line(ctx,   "const float power = clamp(src.w, -%s, %s);",maxp,maxp);
-    output_line(ctx,   "float4 retval = float4(1.0, 0.0, 0.0, 1.0);");
-    output_line(ctx,   "if (src.x > 0.0) {"); ctx->indent++;
-    output_line(ctx,     "retval.y = src.x;");
-    output_line(ctx,     "if (src.y > 0.0) {"); ctx->indent++;
-    output_line(ctx,       "retval.z = pow(src.y, power);"); ctx->indent--;
-    output_line(ctx,     "}"); ctx->indent--;
-    output_line(ctx,   "}");
-    output_line(ctx,   "return retval;"); ctx->indent--;
-    output_line(ctx, "}");
-    output_blank_line(ctx);
-    pop_output(ctx);
-} // emit_METAL_LIT_helper
-
-static void emit_METAL_LIT(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    emit_METAL_LIT_helper(ctx);
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_LIT
-
-static void emit_METAL_DST(Context *ctx)
-{
-    // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
-    char src0_y[64]; make_METAL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y));
-    char src1_y[64]; make_METAL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y));
-    char src0_z[64]; make_METAL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z));
-    char src1_w[64]; make_METAL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w));
-
-    char code[128];
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                             "float4(1.0, %s * %s, %s, %s)",
-                             src0_y, src1_y, src0_z, src1_w);
-    output_line(ctx, "%s", code);
-} // emit_METAL_DST
-
-static void emit_METAL_LRP(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_METAL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
-    char code[128];
-    ctx->metal_need_header_common = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)",
-                             src2, src1, src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_LRP
-
-static void emit_METAL_FRC(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_math = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_FRC
-
-static void emit_METAL_M4X4(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_METAL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_METAL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
-    char row2[64]; make_METAL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
-    char row3[64]; make_METAL_srcarg_string_full(ctx, 4, row3, sizeof (row3));
-    char code[256];
-    ctx->metal_need_header_geometric = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                    "float4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))",
-                    src0, row0, src0, row1, src0, row2, src0, row3);
-    output_line(ctx, "%s", code);
-} // emit_METAL_M4X4
-
-static void emit_METAL_M4X3(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_METAL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_METAL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
-    char row2[64]; make_METAL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
-    char code[256];
-    ctx->metal_need_header_geometric = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                "float3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
-                                src0, row0, src0, row1, src0, row2);
-    output_line(ctx, "%s", code);
-} // emit_METAL_M4X3
-
-static void emit_METAL_M3X4(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
-    char row2[64]; make_METAL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
-    char row3[64]; make_METAL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3));
-    char code[256];
-    ctx->metal_need_header_geometric = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                "float4(dot(%s, %s), dot(%s, %s), "
-                                     "dot(%s, %s), dot(%s, %s))",
-                                src0, row0, src0, row1,
-                                src0, row2, src0, row3);
-    output_line(ctx, "%s", code);
-} // emit_METAL_M3X4
-
-static void emit_METAL_M3X3(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
-    char row2[64]; make_METAL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
-    char code[256];
-    ctx->metal_need_header_geometric = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                "float3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
-                                src0, row0, src0, row1, src0, row2);
-    output_line(ctx, "%s", code);
-} // emit_METAL_M3X3
-
-static void emit_METAL_M3X2(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
-    char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
-    char code[256];
-    ctx->metal_need_header_geometric = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                "float2(dot(%s, %s), dot(%s, %s))",
-                                src0, row0, src0, row1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_M3X2
-
-static void emit_METAL_CALL(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    if (ctx->loops > 0)
-        output_line(ctx, "%s(aL);", src0);
-    else
-        output_line(ctx, "%s();", src0);
-} // emit_METAL_CALL
-
-static void emit_METAL_CALLNZ(Context *ctx)
-{
-    // !!! FIXME: if src1 is a constbool that's true, we can remove the
-    // !!! FIXME:  if. If it's false, we can make this a no-op.
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-
-    if (ctx->loops > 0)
-        output_line(ctx, "if (%s) { %s(aL); }", src1, src0);
-    else
-        output_line(ctx, "if (%s) { %s(); }", src1, src0);
-} // emit_METAL_CALLNZ
-
-static void emit_METAL_LOOP(Context *ctx)
-{
-    // !!! FIXME: swizzle?
-    char var[64]; get_METAL_srcarg_varname(ctx, 1, var, sizeof (var));
-    assert(ctx->source_args[0].regnum == 0);  // in case they add aL1 someday.
-    output_line(ctx, "{");
-    ctx->indent++;
-    output_line(ctx, "const int aLend = %s.x + %s.y;", var, var);
-    output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var);
-    ctx->indent++;
-} // emit_METAL_LOOP
-
-static void emit_METAL_RET(Context *ctx)
-{
-    // thankfully, the MSDN specs say a RET _has_ to end a function...no
-    //  early returns. So if you hit one, you know you can safely close
-    //  a high-level function.
-    push_output(ctx, &ctx->postflight);
-    output_line(ctx, "}");
-    output_blank_line(ctx);
-    set_output(ctx, &ctx->subroutines);  // !!! FIXME: is this for LABEL? Maybe set it there so we don't allocate unnecessarily.
-} // emit_METAL_RET
-
-static void emit_METAL_ENDLOOP(Context *ctx)
-{
-    ctx->indent--;
-    output_line(ctx, "}");
-    ctx->indent--;
-    output_line(ctx, "}");
-} // emit_METAL_ENDLOOP
-
-static void emit_METAL_LABEL(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    const int label = ctx->source_args[0].regnum;
-    RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
-    assert(ctx->output == ctx->subroutines);  // not mainline, etc.
-    assert(ctx->indent == 0);  // we shouldn't be in the middle of a function.
-
-    // MSDN specs say CALL* has to come before the LABEL, so we know if we
-    //  can ditch the entire function here as unused.
-    if (reg == NULL)
-        set_output(ctx, &ctx->ignore);  // Func not used. Parse, but don't output.
-
-    // !!! FIXME: it would be nice if we could determine if a function is
-    // !!! FIXME:  only called once and, if so, forcibly inline it.
-
-    // !!! FIXME: this worked in GLSL because all our state is global to the shader,
-    // !!! FIXME:  but in metal we kept it local to the shader mainline.
-    // !!! FIXME:  Can we do C++11 lambdas in Metal to have nested functions?  :)
-
-    const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
-    output_line(ctx, "static void %s(%s)", src0, uses_loopreg);
-    output_line(ctx, "{");
-    ctx->indent++;
-} // emit_METAL_LABEL
-
-static void emit_METAL_DCL(Context *ctx)
-{
-    // no-op. We do this in our emit_attribute() and emit_uniform().
-} // emit_METAL_DCL
-
-static void emit_METAL_POW(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    ctx->metal_need_header_math = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                             "pow(abs(%s), %s)", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_POW
-
-static void emit_METAL_CRS(Context *ctx)
-{
-    // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
-    char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
-    char code[128];
-    ctx->metal_need_header_geometric = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                             "cross(%s, %s)", src0, src1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_CRS
-
-static void emit_METAL_SGN(Context *ctx)
-{
-    // (we don't need the temporary registers specified for the D3D opcode.)
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_common = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_SGN
-
-static void emit_METAL_ABS(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_math = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_ABS
-
-static void emit_METAL_NRM(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_geometric = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_NRM
-
-static void emit_METAL_SINCOS(Context *ctx)
-{
-    // we don't care about the temp registers that <= sm2 demands; ignore them.
-    //  sm2 also talks about what components are left untouched vs. undefined,
-    //  but we just leave those all untouched with Metal write masks (which
-    //  would fulfill the "undefined" requirement, too).
-    const int mask = ctx->dest_arg.writemask;
-    char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    char code[128] = { '\0' };
-
-    ctx->metal_need_header_math = 1;
-    if (writemask_x(mask))
-        make_METAL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0);
-    else if (writemask_y(mask))
-        make_METAL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0);
-    else if (writemask_xy(mask))
-    {
-        // !!! FIXME: can use sincos(), but need to assign cos to a temp, since it needs a reference.
-        make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                 "float2(cos(%s), sin(%s))", src0, src0);
-    } // else if
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_SINCOS
-
-static void emit_METAL_REP(Context *ctx)
-{
-    // !!! FIXME:
-    // msdn docs say legal loop values are 0 to 255. We can check DEFI values
-    //  at parse time, but if they are pulling a value from a uniform, do
-    //  we clamp here?
-    // !!! FIXME: swizzle is legal here, right?
-    char src0[64]; make_METAL_srcarg_string_x(ctx, 0, src0, sizeof (src0));
-    const uint rep = (uint) ctx->reps;
-    output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {",
-                rep, rep, src0, rep);
-    ctx->indent++;
-} // emit_METAL_REP
-
-static void emit_METAL_ENDREP(Context *ctx)
-{
-    ctx->indent--;
-    output_line(ctx, "}");
-} // emit_METAL_ENDREP
-
-static void emit_METAL_IF(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    output_line(ctx, "if (%s) {", src0);
-    ctx->indent++;
-} // emit_METAL_IF
-
-static void emit_METAL_IFC(Context *ctx)
-{
-    const char *comp = get_METAL_comparison_string_scalar(ctx);
-    char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
-    output_line(ctx, "if (%s %s %s) {", src0, comp, src1);
-    ctx->indent++;
-} // emit_METAL_IFC
-
-static void emit_METAL_ELSE(Context *ctx)
-{
-    ctx->indent--;
-    output_line(ctx, "} else {");
-    ctx->indent++;
-} // emit_METAL_ELSE
-
-static void emit_METAL_ENDIF(Context *ctx)
-{
-    ctx->indent--;
-    output_line(ctx, "}");
-} // emit_METAL_ENDIF
-
-static void emit_METAL_BREAK(Context *ctx)
-{
-    output_line(ctx, "break;");
-} // emit_METAL_BREAK
-
-static void emit_METAL_BREAKC(Context *ctx)
-{
-    const char *comp = get_METAL_comparison_string_scalar(ctx);
-    char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
-    output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1);
-} // emit_METAL_BREAKC
-
-static void emit_METAL_MOVA(Context *ctx)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-
-    ctx->metal_need_header_math = 1;
-    ctx->metal_need_header_common = 1;
-
-    if (vecsize == 1)
-    {
-        make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                 "int(floor(abs(%s) + 0.5) * sign(%s))",
-                                 src0, src0);
-    } // if
-
-    else
-    {
-        make_METAL_destarg_assign(ctx, code, sizeof (code),
-                            "int%d(floor(abs(%s) + float%d(0.5)) * sign(%s))",
-                            vecsize, src0, vecsize, src0);
-    } // else
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_MOVA
-
-static void emit_METAL_DEFB(Context *ctx)
-{
-    char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname));
-    push_output(ctx, &ctx->mainline_top);
-    ctx->indent++;
-    output_line(ctx, "const bool %s = %s;",
-                varname, ctx->dwords[0] ? "true" : "false");
-    pop_output(ctx);
-} // emit_METAL_DEFB
-
-static void emit_METAL_DEFI(Context *ctx)
-{
-    char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname));
-    const int32 *x = (const int32 *) ctx->dwords;
-    push_output(ctx, &ctx->mainline_top);
-    ctx->indent++;
-    output_line(ctx, "const int4 %s = int4(%d, %d, %d, %d);",
-                varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
-    pop_output(ctx);
-} // emit_METAL_DEFI
-
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
-
-static void emit_METAL_TEXKILL(Context *ctx)
-{
-    char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-    ctx->metal_need_header_relational = 1;
-    ctx->metal_need_header_graphics = 1;
-    output_line(ctx, "if (any(%s.xyz < float3(0.0))) discard_fragment();", dst);
-} // emit_METAL_TEXKILL
-
-static void metal_texld(Context *ctx, const int texldd)
-{
-    ctx->metal_need_header_texture = 1;
-    if (!shader_version_atleast(ctx, 1, 4))
-    {
-        DestArgInfo *info = &ctx->dest_arg;
-        char dst[64];
-        char sampler[64];
-        char code[128] = {0};
-
-        assert(!texldd);
-
-        RegisterList *sreg;
-        sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum);
-        const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-
-        char swizzle[4] = { 'x', 'y', 'z', '\0' };
-        if (ttype == TEXTURE_TYPE_2D)
-            swizzle[2] = '\0';  // "xy" instead of "xyz".
-
-        // !!! FIXME: this code counts on the register not having swizzles, etc.
-        get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-        get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                                 sampler, sizeof (sampler));
-
-        make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                  "%s_texture.sample(%s, %s.%s)",
-                                  sampler, sampler, dst, swizzle);
-        output_line(ctx, "%s", code);
-    } // if
-
-    else if (!shader_version_atleast(ctx, 2, 0))
-    {
-        // ps_1_4 is different, too!
-        fail(ctx, "TEXLD == Shader Model 1.4 unimplemented.");  // !!! FIXME
-        return;
-    } // else if
-
-    else
-    {
-        const SourceArgInfo *samp_arg = &ctx->source_args[1];
-        RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
-                                          samp_arg->regnum);
-        const char *funcname = NULL;
-        char src0[64] = { '\0' };
-        char src1[64]; get_METAL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD?
-        char src2[64] = { '\0' };
-        char src3[64] = { '\0' };
-
-        if (sreg == NULL)
-        {
-            fail(ctx, "TEXLD using undeclared sampler");
-            return;
-        } // if
-
-        const char *grad = "";
-        if (texldd)
-        {
-            switch ((const TextureType) sreg->index)
-            {
-                case TEXTURE_TYPE_2D:
-                    grad = "2d";
-                    make_METAL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2));
-                    make_METAL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3));
-                    break;
-                case TEXTURE_TYPE_VOLUME:
-                    grad = "3d";
-                    make_METAL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2));
-                    make_METAL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3));
-                    break;
-                case TEXTURE_TYPE_CUBE:
-                    grad = "cube";
-                    make_METAL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2));
-                    make_METAL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3));
-                    break;
-            } // switch
-        } // if
-
-        // !!! FIXME: can TEXLDD set instruction_controls?
-        // !!! FIXME: does the d3d bias value map directly to Metal?
-        const char *biasleft = "";
-        const char *biasright = "";
-        char bias[64] = { '\0' };
-        if (ctx->instruction_controls == CONTROL_TEXLDB)
-        {
-            biasleft = ", bias(";
-            make_METAL_srcarg_string_w(ctx, 0, bias, sizeof (bias));
-            biasright = ")";
-        } // if
-
-        // Metal doesn't have a texture2DProj() function, but you just divide
-        // your texcoords by texcoords.w to achieve it anyhow, so DIY.
-        const char *projop = "";
-        char proj[64] = { '\0' };
-        if (ctx->instruction_controls == CONTROL_TEXLDP)
-        {
-            if (sreg->index == TEXTURE_TYPE_CUBE)
-                fail(ctx, "TEXLDP on a cubemap");  // !!! FIXME: is this legal?
-            projop = " / ";
-            make_METAL_srcarg_string_w(ctx, 0, proj, sizeof (proj));
-        } // if
-
-        switch ((const TextureType) sreg->index)
-        {
-            case TEXTURE_TYPE_2D:
-                make_METAL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
-                break;
-
-            case TEXTURE_TYPE_CUBE:
-            case TEXTURE_TYPE_VOLUME:
-                make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
-                break;
-
-            default:
-                fail(ctx, "unknown texture type");
-                return;
-        } // switch
-
-        assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum));
-        char swiz_str[6] = { '\0' };
-        make_METAL_swizzle_string(swiz_str, sizeof (swiz_str),
-                                 samp_arg->swizzle, ctx->dest_arg.writemask);
-
-        char code[128];
-        if (texldd)
-        {
-            make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                     "%s_texture.sample(%s, %s, gradient%s(%s, %s))%s",
-                                     src1, src1, src0, grad, src2, src3, swiz_str);
-        } // if
-        else
-        {
-            make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                     "%s_texture.sample(%s, %s%s%s%s%s%s)%s",
-                                     src1, src1, src0, projop, proj,
-                                     biasleft, bias, biasright, swiz_str);
-        } // else
-
-        output_line(ctx, "%s", code);
-    } // else
-} // metal_texld
-
-static void emit_METAL_TEXLD(Context *ctx)
-{
-    metal_texld(ctx, 0);
-} // emit_METAL_TEXLD
-    
-
-static void emit_METAL_TEXBEM(Context *ctx)
-{
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-    char src[64]; get_METAL_srcarg_varname(ctx, 0, src, sizeof (src));
-    char sampler[64];
-    char code[512];
-
-    ctx->metal_need_header_texture = 1;
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-        "%s_texture.sample(%s, float2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
-        " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))",
-        sampler, sampler,
-        dst, sampler, src, sampler, src,
-        dst, sampler, src, sampler, src);
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_TEXBEM
-
-
-static void emit_METAL_TEXBEML(Context *ctx)
-{
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-    char src[64]; get_METAL_srcarg_varname(ctx, 0, src, sizeof (src));
-    char sampler[64];
-    char code[512];
-
-    ctx->metal_need_header_texture = 1;
-
-    get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-        "(%s_texture.sample(%s, float2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
-        " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *"
-        " ((%s.z * %s_texbeml.x) + %s_texbem.y)",
-        sampler, sampler,
-        dst, sampler, src, sampler, src,
-        dst, sampler, src, sampler, src,
-        src, sampler, sampler);
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_TEXBEML
-
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME
-
-
-static void emit_METAL_TEXM3X2PAD(Context *ctx)
-{
-    // no-op ... work happens in emit_METAL_TEXM3X2TEX().
-} // emit_METAL_TEXM3X2PAD
-
-static void emit_METAL_TEXM3X2TEX(Context *ctx)
-{
-    if (ctx->texm3x2pad_src0 == -1)
-        return;
-
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char sampler[64];
-    char code[512];
-
-    ctx->metal_need_header_texture = 1;
-    ctx->metal_need_header_geometric = 1;
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
-                            src0, sizeof (src0));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
-                            src1, sizeof (src1));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src2, sizeof (src2));
-    get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-        "%s_texture.sample(%s, float2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))",
-        sampler, sampler, src0, src1, src2, dst);
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_TEXM3X2TEX
-
-static void emit_METAL_TEXM3X3PAD(Context *ctx)
-{
-    // no-op ... work happens in emit_METAL_TEXM3X3*().
-} // emit_METAL_TEXM3X3PAD
-
-static void emit_METAL_TEXM3X3TEX(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char sampler[64];
-    char code[512];
-
-    ctx->metal_need_header_texture = 1;
-    ctx->metal_need_header_geometric = 1;
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
-                                      info->regnum);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
-
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-        "texture%s(%s,"
-            " float3(dot(%s.xyz, %s.xyz),"
-            " dot(%s.xyz, %s.xyz),"
-            " dot(%s.xyz, %s.xyz)))",
-        ttypestr, sampler, src0, src1, src2, src3, dst, src4);
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_TEXM3X3TEX
-
-static void emit_METAL_TEXM3X3SPEC_helper(Context *ctx)
-{
-    if (ctx->glsl_generated_texm3x3spec_helper)
-        return;
-
-    ctx->glsl_generated_texm3x3spec_helper = 1;
-
-    push_output(ctx, &ctx->helpers);
-    output_line(ctx, "float3 TEXM3X3SPEC_reflection(const float3 normal, const float3 eyeray)");
-    output_line(ctx, "{"); ctx->indent++;
-    output_line(ctx,   "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--;
-    output_line(ctx, "}");
-    output_blank_line(ctx);
-    pop_output(ctx);
-} // emit_METAL_TEXM3X3SPEC_helper
-
-static void emit_METAL_TEXM3X3SPEC(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char src5[64];
-    char sampler[64];
-    char code[512];
-
-    ctx->metal_need_header_texture = 1;
-    ctx->metal_need_header_geometric = 1;
-
-    emit_METAL_TEXM3X3SPEC_helper(ctx);
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
-                            src5, sizeof (src5));
-    get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
-                                      info->regnum);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
-
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-        "texture%s(%s, "
-            "TEXM3X3SPEC_reflection("
-                "float3("
-                    "dot(%s.xyz, %s.xyz), "
-                    "dot(%s.xyz, %s.xyz), "
-                    "dot(%s.xyz, %s.xyz)"
-                "),"
-                "%s.xyz,"
-            ")"
-        ")",
-        ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5);
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_TEXM3X3SPEC
-
-static void emit_METAL_TEXM3X3VSPEC(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    DestArgInfo *info = &ctx->dest_arg;
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char sampler[64];
-    char code[512];
-
-    ctx->metal_need_header_texture = 1;
-    ctx->metal_need_header_geometric = 1;
-
-    emit_METAL_TEXM3X3SPEC_helper(ctx);
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
-                            sampler, sizeof (sampler));
-
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
-                                      info->regnum);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
-
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-        "texture%s(%s, "
-            "TEXM3X3SPEC_reflection("
-                "float3("
-                    "dot(%s.xyz, %s.xyz), "
-                    "dot(%s.xyz, %s.xyz), "
-                    "dot(%s.xyz, %s.xyz)"
-                "), "
-                "float3(%s.w, %s.w, %s.w)"
-            ")"
-        ")",
-        ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst);
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_TEXM3X3VSPEC
-
-static void emit_METAL_EXPP(Context *ctx)
-{
-    // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation.
-    emit_METAL_EXP(ctx);  // I guess this is just partial precision EXP?
-} // emit_METAL_EXPP
-
-static void emit_METAL_LOGP(Context *ctx)
-{
-    // LOGP is just low-precision LOG, but we'll take the higher precision.
-    emit_METAL_LOG(ctx);
-} // emit_METAL_LOGP
-
-// common code between CMP and CND.
-static void emit_METAL_comparison_operations(Context *ctx, const char *cmp)
-{
-    int i, j;
-    DestArgInfo *dst = &ctx->dest_arg;
-    const SourceArgInfo *srcarg0 = &ctx->source_args[0];
-    const int origmask = dst->writemask;
-    int used_swiz[4] = { 0, 0, 0, 0 };
-    const int writemask[4] = { dst->writemask0, dst->writemask1,
-                               dst->writemask2, dst->writemask3 };
-    const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y,
-                              srcarg0->swizzle_z, srcarg0->swizzle_w };
-
-    for (i = 0; i < 4; i++)
-    {
-        int mask = (1 << i);
-
-        if (!writemask[i]) continue;
-        if (used_swiz[i]) continue;
-
-        // This is a swizzle we haven't checked yet.
-        used_swiz[i] = 1;
-
-        // see if there are any other elements swizzled to match (.yyyy)
-        for (j = i + 1; j < 4; j++)
-        {
-            if (!writemask[j]) continue;
-            if (src0swiz[i] != src0swiz[j]) continue;
-            mask |= (1 << j);
-            used_swiz[j] = 1;
-        } // for
-
-        // okay, (mask) should be the writemask of swizzles we like.
-
-        //return make_METAL_srcarg_string(ctx, idx, (1 << 0));
-
-        char src0[64];
-        char src1[64];
-        char src2[64];
-        make_METAL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0));
-        make_METAL_srcarg_string(ctx, 1, mask, src1, sizeof (src1));
-        make_METAL_srcarg_string(ctx, 2, mask, src2, sizeof (src2));
-
-        set_dstarg_writemask(dst, mask);
-
-        char code[128];
-        make_METAL_destarg_assign(ctx, code, sizeof (code),
-                                 "((%s %s) ? %s : %s)",
-                                 src0, cmp, src1, src2);
-        output_line(ctx, "%s", code);
-    } // for
-
-    set_dstarg_writemask(dst, origmask);
-} // emit_METAL_comparison_operations
-
-static void emit_METAL_CND(Context *ctx)
-{
-    emit_METAL_comparison_operations(ctx, "> 0.5");
-} // emit_METAL_CND
-
-static void emit_METAL_DEF(Context *ctx)
-{
-    const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
-    char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname));
-    char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
-    char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
-    char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
-    char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
-
-    push_output(ctx, &ctx->mainline_top);
-    ctx->indent++;
-    // The "(void) %s;" is to make the compiler not warn if this isn't used.
-    output_line(ctx, "const float4 %s = float4(%s, %s, %s, %s); (void) %s;",
-                varname, val0, val1, val2, val3, varname);
-    pop_output(ctx);
-} // emit_METAL_DEF
-
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME
-
-static void emit_METAL_TEXM3X3(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char code[512];
-
-    ctx->metal_need_header_geometric = 1;
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_METAL_destarg_varname(ctx, dst, sizeof (dst));
-
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-        "float4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)",
-        src0, src1, src2, src3, dst, src4);
-
-    output_line(ctx, "%s", code);
-} // emit_METAL_TEXM3X3
-
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME
-
-static void emit_METAL_CMP(Context *ctx)
-{
-    emit_METAL_comparison_operations(ctx, ">= 0.0");
-} // emit_METAL_CMP
-
-EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME
-
-static void emit_METAL_DP2ADD(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_METAL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2));
-    char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2);
-    emit_METAL_dotprod(ctx, src0, src1, extra);
-} // emit_METAL_DP2ADD
-
-static void emit_METAL_DSX(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_graphics = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "dfdx(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_DSX
-
-static void emit_METAL_DSY(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char code[128];
-    ctx->metal_need_header_graphics = 1;
-    make_METAL_destarg_assign(ctx, code, sizeof (code), "dfdy(%s)", src0);
-    output_line(ctx, "%s", code);
-} // emit_METAL_DSY
-
-static void emit_METAL_TEXLDD(Context *ctx)
-{
-    metal_texld(ctx, 1);
-} // emit_METAL_TEXLDD
-
-static void emit_METAL_SETP(Context *ctx)
-{
-    const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
-    char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
-    char code[128];
-
-    // destination is always predicate register (which is type bvec4).
-    const char *comp = (vecsize == 1) ?
-            get_METAL_comparison_string_scalar(ctx) :
-            get_METAL_comparison_string_vector(ctx);
-
-    make_METAL_destarg_assign(ctx, code, sizeof (code),
-                              "(%s %s %s)", src0, comp, src1);
-    output_line(ctx, "%s", code);
-} // emit_METAL_SETP
-
-static void emit_METAL_TEXLDL(Context *ctx)
-{
-    // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins
-    // !!! FIXME:  from fragment shaders for some inexplicable reason.
-    // !!! FIXME:  Maybe Metal can do it, but I haven't looked into it yet.
-    emit_METAL_TEXLD(ctx);
-} // emit_METAL_TEXLDL
-
-static void emit_METAL_BREAKP(Context *ctx)
-{
-    char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
-    output_line(ctx, "if (%s) { break; }", src0);
-} // emit_METAL_BREAKP
-
-static void emit_METAL_RESERVED(Context *ctx)
-{
-    // do nothing; fails in the state machine.
-} // emit_METAL_RESERVED
-
-#endif  // SUPPORT_PROFILE_METAL
-
+PREDECLARE_PROFILE(METAL)
+#endif
 
 #if !SUPPORT_PROFILE_ARB1
 #define PROFILE_EMITTER_ARB1(op)
@@ -6346,2219 +260,8 @@
 #undef AT_LEAST_ONE_PROFILE
 #define AT_LEAST_ONE_PROFILE 1
 #define PROFILE_EMITTER_ARB1(op) emit_ARB1_##op,
-
-static inline const char *get_ARB1_register_string(Context *ctx,
-                        const RegisterType regtype, const int regnum,
-                        char *regnum_str, const size_t regnum_size)
-{
-    // turns out these are identical at the moment.
-    return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
-} // get_ARB1_register_string
-
-static const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf,
-                                                  const size_t buflen)
-{
-    const int scratch = allocate_scratch_register(ctx);
-    snprintf(buf, buflen, "scratch%d", scratch);
-    return buf;
-} // allocate_ARB1_scratch_reg_name
-
-static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id,
-                                                char *buf, const size_t buflen)
-{
-    snprintf(buf, buflen, "branch_label%d", id);
-    return buf;
-} // get_ARB1_branch_label_name
-
-static const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt,
-                                           const int regnum, char *buf,
-                                           const size_t buflen)
-{
-    // turns out these are identical at the moment.
-    return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen);
-} // get_ARB1_varname_in_buf
-
-static const char *get_ARB1_varname(Context *ctx, const RegisterType rt,
-                                    const int regnum)
-{
-    // turns out these are identical at the moment.
-    return get_D3D_varname(ctx, rt, regnum);
-} // get_ARB1_varname
-
-
-static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx,
-                                                const int base, const int size,
-                                                char *buf, const size_t buflen)
-{
-    snprintf(buf, buflen, "c_array_%d_%d", base, size);
-    return buf;
-} // get_ARB1_const_array_varname_in_buf
-
-
-static const char *get_ARB1_const_array_varname(Context *ctx, int base, int size)
-{
-    char buf[64];
-    get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
-    return StrDup(ctx, buf);
-} // get_ARB1_const_array_varname
-
-
-static const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
-                                                  const SourceArgInfo *arg,
-                                                  char *buf, size_t buflen)
-{
-    // !!! FIXME: this can hit pathological cases where we look like this...
-    //
-    //    dp3 r1.xyz, t0_bx2, t0_bx2
-    //    mad r1.xyz, t0_bias, 1-r1, t0_bx2
-    //
-    // ...which do a lot of duplicate work in arb1...
-    //
-    //    SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
-    //    MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 };
-    //    SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 };
-    //    MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 };
-    //    DP3 r1.xyz, scratch0, scratch1;
-    //    SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
-    //    SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1;
-    //    SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 };
-    //    MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 };
-    //    MAD r1.xyz, scratch0, scratch1, scratch2;
-    //
-    // ...notice that the dp3 calculates the same value into two scratch
-    //  registers. This case is easier to handle; just see if multiple
-    //  source args are identical, build it up once, and use the same
-    //  scratch register for multiple arguments in that opcode.
-    //  Even better still, only calculate things once across instructions,
-    //  and be smart about letting it linger in a scratch register until we
-    //  definitely don't need the calculation anymore. That's harder to
-    //  write, though.
-
-    char regnum_str[16] = { '\0' };
-
-    // !!! FIXME: use get_ARB1_varname_in_buf() instead?
-    const char *regtype_str = NULL;
-    if (!arg->relative)
-    {
-        regtype_str = get_ARB1_register_string(ctx, arg->regtype,
-                                               arg->regnum, regnum_str,
-                                               sizeof (regnum_str));
-    } // if
-
-    const char *rel_lbracket = "";
-    char rel_offset[32] = { '\0' };
-    const char *rel_rbracket = "";
-    char rel_swizzle[4] = { '\0' };
-    const char *rel_regtype_str = "";
-    if (arg->relative)
-    {
-        rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype,
-                                                  arg->relative_regnum,
-                                                  (char *) alloca(64), 64);
-
-        rel_swizzle[0] = '.';
-        rel_swizzle[1] = swizzle_channels[arg->relative_component];
-        rel_swizzle[2] = '\0';
-
-        if (!support_nv2(ctx))
-        {
-            // The address register in ARB1 only allows the '.x' component, so
-            //  we need to load the component we need from a temp vector
-            //  register into .x as needed.
-            assert(arg->relative_regtype == REG_TYPE_ADDRESS);
-            assert(arg->relative_regnum == 0);
-            if (ctx->last_address_reg_component != arg->relative_component)
-            {
-                output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str,
-                            arg->relative_regnum,
-                            swizzle_channels[arg->relative_component]);
-                ctx->last_address_reg_component = arg->relative_component;
-            } // if
-
-            rel_swizzle[1] = 'x';
-        } // if
-
-        if (arg->regtype == REG_TYPE_INPUT)
-            regtype_str = "vertex.attrib";
-        else
-        {
-            assert(arg->regtype == REG_TYPE_CONST);
-            const int arrayidx = arg->relative_array->index;
-            const int arraysize = arg->relative_array->count;
-            const int offset = arg->regnum - arrayidx;
-            assert(offset >= 0);
-            regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx,
-                                           arraysize, (char *) alloca(64), 64);
-            if (offset != 0)
-                snprintf(rel_offset, sizeof (rel_offset), " + %d", offset);
-        } // else
-
-        rel_lbracket = "[";
-        rel_rbracket = "]";
-    } // if
-
-    // This is the source register with everything but swizzle and source mods.
-    snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str,
-             rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
-             rel_rbracket);
-
-    // Some of the source mods need to generate instructions to a temp
-    //  register, in which case we'll replace the register name.
-    const SourceMod mod = arg->src_mod;
-    const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) ||
-                          ((mod == SRCMOD_ABS) && support_nv2(ctx)) );
-
-    if (!inplace)
-    {
-        const size_t len = 64;
-        char *stackbuf = (char *) alloca(len);
-        regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len);
-        regnum_str[0] = '\0'; // move value to scratch register.
-        rel_lbracket = "";   // scratch register won't use array.
-        rel_rbracket = "";
-        rel_offset[0] = '\0';
-        rel_swizzle[0] = '\0';
-        rel_regtype_str = "";
-    } // if
-
-    const char *premod_str = "";
-    const char *postmod_str = "";
-    switch (mod)
-    {
-        case SRCMOD_NEGATE:
-            premod_str = "-";
-            break;
-
-        case SRCMOD_BIASNEGATE:
-            premod_str = "-";
-            // fall through.
-        case SRCMOD_BIAS:
-            output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };",
-                        regtype_str, buf);
-            break;
-
-        case SRCMOD_SIGNNEGATE:
-            premod_str = "-";
-            // fall through.
-        case SRCMOD_SIGN:
-            output_line(ctx,
-                "MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };",
-                regtype_str, buf);
-            break;
-
-        case SRCMOD_COMPLEMENT:
-            output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;",
-                        regtype_str, buf);
-            break;
-
-        case SRCMOD_X2NEGATE:
-            premod_str = "-";
-            // fall through.
-        case SRCMOD_X2:
-            output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };",
-                        regtype_str, buf);
-            break;
-
-        case SRCMOD_DZ:
-            fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
-            postmod_str = "_dz";
-            break;
-
-        case SRCMOD_DW:
-            fail(ctx, "SRCMOD_DW currently unsupported in arb1");
-            postmod_str = "_dw";
-            break;
-
-        case SRCMOD_ABSNEGATE:
-            premod_str = "-";
-            // fall through.
-        case SRCMOD_ABS:
-            if (!support_nv2(ctx))  // GL_NV_vertex_program2_option adds this.
-                output_line(ctx, "ABS %s, %s;", regtype_str, buf);
-            else
-            {
-                premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
-                postmod_str = "|";
-            } // else
-            break;
-
-        case SRCMOD_NOT:
-            fail(ctx, "SRCMOD_NOT currently unsupported in arb1");
-            premod_str = "!";
-            break;
-
-        case SRCMOD_NONE:
-        case SRCMOD_TOTAL:
-             break;  // stop compiler whining.
-    } // switch
-
-    char swizzle_str[6];
-    size_t i = 0;
-
-    if (support_nv4(ctx))  // vFace must be output as "vFace.x" in nv4.
-    {
-        if (arg->regtype == REG_TYPE_MISCTYPE)
-        {
-            if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE )
-            {
-                swizzle_str[i++] = '.';
-                swizzle_str[i++] = 'x';
-            } // if
-        } // if
-    } // if
-
-    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
-    if (!scalar && !no_swizzle(arg->swizzle))
-    {
-        swizzle_str[i++] = '.';
-
-        // .xxxx is the same as .x, but .xx is illegal...scalar or full!
-        if (replicate_swizzle(arg->swizzle))
-            swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
-        else
-        {
-            swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
-            swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
-            swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
-            swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
-        } // else
-    } // if
-    swizzle_str[i] = '\0';
-    assert(i < sizeof (swizzle_str));
-
-    snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str,
-             regtype_str, regnum_str, rel_lbracket,
-             rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket,
-             swizzle_str, postmod_str);
-    // !!! FIXME: make sure the scratch buffer was large enough.
-    return buf;
-} // make_ARB1_srcarg_string_in_buf
-
-static const char *get_ARB1_destarg_varname(Context *ctx, char *buf,
-                                            const size_t buflen)
-{
-    const DestArgInfo *arg = &ctx->dest_arg;
-    return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
-} // get_ARB1_destarg_varname
-
-static const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx,
-                                           char *buf, const size_t buflen)
-{
-    if (idx >= STATICARRAYLEN(ctx->source_args))
-    {
-        fail(ctx, "Too many source args");
-        *buf = '\0';
-        return buf;
-    } // if
-
-    const SourceArgInfo *arg = &ctx->source_args[idx];
-    return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
-} // get_ARB1_srcarg_varname
-
-
-static const char *make_ARB1_destarg_string(Context *ctx, char *buf,
-                                            const size_t buflen)
-{
-    const DestArgInfo *arg = &ctx->dest_arg;
-
-    *buf = '\0';
-
-    const char *sat_str = "";
-    if (arg->result_mod & MOD_SATURATE)
-    {
-        // nv4 can use ".SAT" in all program types.
-        // For less than nv4, the "_SAT" modifier is only available in
-        //  fragment shaders. Every thing else will fake it later in
-        //  emit_ARB1_dest_modifiers() ...
-        if (support_nv4(ctx))
-            sat_str = ".SAT";
-        else if (shader_is_pixel(ctx))
-            sat_str = "_SAT";
-    } // if
-
-    const char *pp_str = "";
-    if (arg->result_mod & MOD_PP)
-    {
-        // Most ARB1 profiles can't do partial precision (MOD_PP), but that's
-        //  okay. The spec says lots of Direct3D implementations ignore the
-        //  flag anyhow.
-        if (support_nv4(ctx))
-            pp_str = "H";
-    } // if
-
-    // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
-    assert((arg->result_mod & MOD_CENTROID) == 0);
-
-    char regnum_str[16];
-    const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype,
-                                                       arg->regnum, regnum_str,
-                                                       sizeof (regnum_str));
-    if (regtype_str == NULL)
-    {
-        fail(ctx, "Unknown destination register type.");
-        return buf;
-    } // if
-
-    char writemask_str[6];
-    size_t i = 0;
-    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
-    if (!scalar && !writemask_xyzw(arg->writemask))
-    {
-        writemask_str[i++] = '.';
-        if (arg->writemask0) writemask_str[i++] = 'x';
-        if (arg->writemask1) writemask_str[i++] = 'y';
-        if (arg->writemask2) writemask_str[i++] = 'z';
-        if (arg->writemask3) writemask_str[i++] = 'w';
-    } // if
-    writemask_str[i] = '\0';
-    assert(i < sizeof (writemask_str));
-
-    //const char *pred_left = "";
-    //const char *pred_right = "";
-    char pred[32] = { '\0' };
-    if (ctx->predicated)
-    {
-        fail(ctx, "dest register predication currently unsupported in arb1");
-        return buf;
-        //pred_left = "(";
-        //pred_right = ") ";
-        make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
-                                       pred, sizeof (pred));
-    } // if
-
-    snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str,
-             regtype_str, regnum_str, writemask_str);
-    // !!! FIXME: make sure the scratch buffer was large enough.
-    return buf;
-} // make_ARB1_destarg_string
-
-
-static void emit_ARB1_dest_modifiers(Context *ctx)
-{
-    const DestArgInfo *arg = &ctx->dest_arg;
-
-    if (arg->result_shift != 0x0)
-    {
-        char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-        const char *multiplier = NULL;
-
-        switch (arg->result_shift)
-        {
-            case 0x1: multiplier = "2.0"; break;
-            case 0x2: multiplier = "4.0"; break;
-            case 0x3: multiplier = "8.0"; break;
-            case 0xD: multiplier = "0.125"; break;
-            case 0xE: multiplier = "0.25"; break;
-            case 0xF: multiplier = "0.5"; break;
-        } // switch
-
-        if (multiplier != NULL)
-        {
-            char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
-            output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier);
-        } // if
-    } // if
-
-    if (arg->result_mod & MOD_SATURATE)
-    {
-        // nv4 and/or pixel shaders just used the "SAT" modifier, instead.
-        if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) )
-        {
-            char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
-            char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-            output_line(ctx, "MIN%s, %s, 1.0;", dst, var);
-            output_line(ctx, "MAX%s, %s, 0.0;", dst, var);
-        } // if
-    } // if
-} // emit_ARB1_dest_modifiers
-
-
-static const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx,
-                                           char *buf, const size_t buflen)
-{
-    if (idx >= STATICARRAYLEN(ctx->source_args))
-    {
-        fail(ctx, "Too many source args");
-        *buf = '\0';
-        return buf;
-    } // if
-
-    const SourceArgInfo *arg = &ctx->source_args[idx];
-    return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen);
-} // make_ARB1_srcarg_string
-
-static void emit_ARB1_opcode_ds(Context *ctx, const char *opcode)
-{
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-    output_line(ctx, "%s%s, %s;", opcode, dst, src0);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_opcode_ds
-
-static void emit_ARB1_opcode_dss(Context *ctx, const char *opcode)
-{
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
-    output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_opcode_dss
-
-static void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode)
-{
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
-    output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_opcode_dsss
-
-
-#define EMIT_ARB1_OPCODE_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        emit_ARB1_opcode(ctx, #op); \
-    }
-#define EMIT_ARB1_OPCODE_D_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        emit_ARB1_opcode_d(ctx, #op); \
-    }
-#define EMIT_ARB1_OPCODE_S_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        emit_ARB1_opcode_s(ctx, #op); \
-    }
-#define EMIT_ARB1_OPCODE_SS_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        emit_ARB1_opcode_ss(ctx, #op); \
-    }
-#define EMIT_ARB1_OPCODE_DS_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        emit_ARB1_opcode_ds(ctx, #op); \
-    }
-#define EMIT_ARB1_OPCODE_DSS_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        emit_ARB1_opcode_dss(ctx, #op); \
-    }
-#define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        emit_ARB1_opcode_dsss(ctx, #op); \
-    }
-#define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        emit_ARB1_opcode_dssss(ctx, #op); \
-    }
-#define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \
-    static void emit_ARB1_##op(Context *ctx) { \
-        failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \
-    }
-
-
-static void emit_ARB1_start(Context *ctx, const char *profilestr)
-{
-    const char *shader_str = NULL;
-    const char *shader_full_str = NULL;
-    if (shader_is_vertex(ctx))
-    {
-        shader_str = "vp";
-        shader_full_str = "vertex";
-    } // if
-    else if (shader_is_pixel(ctx))
-    {
-        shader_str = "fp";
-        shader_full_str = "fragment";
-    } // else if
-    else
-    {
-        failf(ctx, "Shader type %u unsupported in this profile.",
-              (uint) ctx->shader_type);
-        return;
-    } // if
-
-    set_output(ctx, &ctx->preflight);
-
-    if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0)
-        output_line(ctx, "!!ARB%s1.0", shader_str);
-
-    #if SUPPORT_PROFILE_ARB1_NV
-    else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0)
-    {
-        ctx->profile_supports_nv2 = 1;
-        output_line(ctx, "!!ARB%s1.0", shader_str);
-        output_line(ctx, "OPTION NV_%s_program2;", shader_full_str);
-    } // else if
-
-    else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0)
-    {
-        // there's no NV_fragment_program3, so just use 2.
-        const int ver = shader_is_pixel(ctx) ? 2 : 3;
-        ctx->profile_supports_nv2 = 1;
-        ctx->profile_supports_nv3 = 1;
-        output_line(ctx, "!!ARB%s1.0", shader_str);
-        output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver);
-    } // else if
-
-    else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0)
-    {
-        ctx->profile_supports_nv2 = 1;
-        ctx->profile_supports_nv3 = 1;
-        ctx->profile_supports_nv4 = 1;
-        output_line(ctx, "!!NV%s4.0", shader_str);
-    } // else if
-    #endif
-
-    else
-    {
-        failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
-    } // else
-
-    set_output(ctx, &ctx->mainline);
-} // emit_ARB1_start
-
-static void emit_ARB1_end(Context *ctx)
-{
-    // ps_1_* writes color to r0 instead oC0. We move it to the right place.
-    // We don't have to worry about a RET opcode messing this up, since
-    //  RET isn't available before ps_2_0.
-    if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
-    {
-        set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
-        output_line(ctx, "MOV oC0, r0;");
-    } // if
-
-    output_line(ctx, "END");
-} // emit_ARB1_end
-
-static void emit_ARB1_phase(Context *ctx)
-{
-    // no-op in arb1.
-} // emit_ARB1_phase
-
-static inline const char *arb1_float_temp(const Context *ctx)
-{
-    // nv4 lets you specify data type.
-    return (support_nv4(ctx)) ? "FLOAT TEMP" : "TEMP";
-} // arb1_float_temp
-
-static void emit_ARB1_finalize(Context *ctx)
-{
-    push_output(ctx, &ctx->preflight);
-
-    if (shader_is_vertex(ctx) && !ctx->arb1_wrote_position)
-        output_line(ctx, "OPTION ARB_position_invariant;");
-
-    if (shader_is_pixel(ctx) && ctx->have_multi_color_outputs)
-        output_line(ctx, "OPTION ARB_draw_buffers;");
-
-    pop_output(ctx);
-
-    const char *tmpstr = arb1_float_temp(ctx);
-    int i;
-    push_output(ctx, &ctx->globals);
-    for (i = 0; i < ctx->max_scratch_registers; i++)
-    {
-        char buf[64];
-        allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
-        output_line(ctx, "%s %s;", tmpstr, buf);
-    } // for
-
-    // nv2 fragment programs (and anything nv4) have a real REP/ENDREP.
-    if ( (support_nv2(ctx)) && (!shader_is_pixel(ctx)) && (!support_nv4(ctx)) )
-    {
-        // set up temps for nv2 REP/ENDREP emulation through branching.
-        for (i = 0; i < ctx->max_reps; i++)
-            output_line(ctx, "TEMP rep%d;", i);
-    } // if
-
-    pop_output(ctx);
-    assert(ctx->scratch_registers == ctx->max_scratch_registers);
-} // emit_ARB1_finalize
-
-static void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum)
-{
-    // !!! FIXME: dependency on ARB1 profile.  // !!! FIXME about FIXME: huh?
-    char varname[64];
-    get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
-
-    push_output(ctx, &ctx->globals);
-    switch (regtype)
-    {
-        case REG_TYPE_ADDRESS:
-            if (shader_is_pixel(ctx))  // actually REG_TYPE_TEXTURE.
-            {
-                // We have to map texture registers to temps for ps_1_1, since
-                //  they work like temps, initialize with tex coords, and the
-                //  ps_1_1 TEX opcode expects to overwrite it.
-                if (!shader_version_atleast(ctx, 1, 4))
-                {
-                    output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
-                    push_output(ctx, &ctx->mainline_top);
-                    output_line(ctx, "MOV %s, fragment.texcoord[%d];",
-                                varname, regnum);
-                    pop_output(ctx);
-                } // if
-                break;
-            } // if
-
-            // nv4 replaced address registers with generic int registers.
-            if (support_nv4(ctx))
-                output_line(ctx, "INT TEMP %s;", varname);
-            else
-            {
-                // nv2 has four-component address already, but stock arb1 has
-                //  to emulate it in a temporary, and move components to the
-                //  scalar ADDRESS register on demand.
-                output_line(ctx, "ADDRESS %s;", varname);
-                if (!support_nv2(ctx))
-                    output_line(ctx, "TEMP addr%d;", regnum);
-            } // else
-            break;
-
-        //case REG_TYPE_PREDICATE:
-        //    output_line(ctx, "bvec4 %s;", varname);
-        //    break;
-        case REG_TYPE_TEMP:
-            output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
-            break;
-        //case REG_TYPE_LOOP:
-        //    break; // no-op. We declare these in for loops at the moment.
-        //case REG_TYPE_LABEL:
-        //    break; // no-op. If we see it here, it means we optimized it out.
-        default:
-            fail(ctx, "BUG: we used a register we don't know how to define.");
-            break;
-    } // switch
-    pop_output(ctx);
-} // emit_ARB1_global
-
-static void emit_ARB1_array(Context *ctx, VariableList *var)
-{
-    // All uniforms are now packed tightly into the program.local array,
-    //  instead of trying to map them to the d3d registers. So this needs to
-    //  map to the next piece of the array we haven't used yet. Thankfully,
-    //  arb1 lets you make a PARAM array that maps to a subset of another
-    //  array; we don't need to do offsets, since myarray[0] can map to
-    //  program.local[5] without any extra math from us.
-    const int base = var->index;
-    const int size = var->count;
-    const int arb1base = ctx->uniform_float4_count +
-                         ctx->uniform_int4_count +
-                         ctx->uniform_bool_count;
-    char varname[64];
-    get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname,
-                size, arb1base, (arb1base + size) - 1);
-    pop_output(ctx);
-    var->emit_position = arb1base;
-} // emit_ARB1_array
-
-static void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist,
-                                  int base, int size)
-{
-    char varname[64];
-    get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
-    int i;
-
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "PARAM %s[%d] = {", varname, size);
-    ctx->indent++;
-
-    for (i = 0; i < size; i++)
-    {
-        while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
-            clist = clist->next;
-        assert(clist->constant.index == (base + i));
-
-        char val0[32];
-        char val1[32];
-        char val2[32];
-        char val3[32];
-        floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
-        floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
-        floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
-        floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
-
-        output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3,
-                    (i < (size-1)) ? "," : "");
-
-        clist = clist->next;
-    } // for
-
-    ctx->indent--;
-    output_line(ctx, "};");
-    pop_output(ctx);
-} // emit_ARB1_const_array
-
-static void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum,
-                              const VariableList *var)
-{
-    // We pack these down into the program.local array, so if we only use
-    //  register c439, it'll actually map to program.local[0]. This will
-    //  prevent overflows when we actually have enough resources to run.
-
-    const char *arrayname = "program.local";
-    int index = 0;
-
-    char varname[64];
-    get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
-
-    push_output(ctx, &ctx->globals);
-
-    if (var == NULL)
-    {
-        // all types share one array (rather, all types convert to float4).
-        index = ctx->uniform_float4_count + ctx->uniform_int4_count +
-                ctx->uniform_bool_count;
-    } // if
-
-    else
-    {
-        const int arraybase = var->index;
-        if (var->constant)
-        {
-            const int arraysize = var->count;
-            arrayname = get_ARB1_const_array_varname_in_buf(ctx, arraybase,
-                                        arraysize, (char *) alloca(64), 64);
-            index = (regnum - arraybase);
-        } // if
-        else
-        {
-            assert(var->emit_position != -1);
-            index = (regnum - arraybase) + var->emit_position;
-        } // else
-    } // else
-
-    output_line(ctx, "PARAM %s = %s[%d];", varname, arrayname, index);
-    pop_output(ctx);
-} // emit_ARB1_uniform
-
-static void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb)
-{
-    // this is mostly a no-op...you don't predeclare samplers in arb1.
-
-    if (tb)  // This sampler used a ps_1_1 TEXBEM opcode?
-    {
-        const int index = ctx->uniform_float4_count + ctx->uniform_int4_count +
-                          ctx->uniform_bool_count;
-        char var[64];
-        get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var));
-        push_output(ctx, &ctx->globals);
-        output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index);
-        output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1);
-        pop_output(ctx);
-        ctx->uniform_float4_count += 2;
-    } // if
-} // emit_ARB1_sampler
-
-// !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute().
-static void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum,
-                                MOJOSHADER_usage usage, int index, int wmask,
-                                int flags)
-{
-    // !!! FIXME: this function doesn't deal with write masks at all yet!
-    const char *usage_str = NULL;
-    const char *arrayleft = "";
-    const char *arrayright = "";
-    char index_str[16] = { '\0' };
-
-    char varname[64];
-    get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
-
-    //assert((flags & MOD_PP) == 0);  // !!! FIXME: is PP allowed?
-
-    if (index != 0)  // !!! FIXME: a lot of these MUST be zero.
-        snprintf(index_str, sizeof (index_str), "%u", (uint) index);
-
-    if (shader_is_vertex(ctx))
-    {
-        // pre-vs3 output registers.
-        // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
-        //  output registers.
-        if (!shader_version_atleast(ctx, 3, 0))
-        {
-            if (regtype == REG_TYPE_RASTOUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                index = regnum;
-                switch ((const RastOutType) regnum)
-                {
-                    case RASTOUT_TYPE_POSITION:
-                        usage = MOJOSHADER_USAGE_POSITION;
-                        break;
-                    case RASTOUT_TYPE_FOG:
-                        usage = MOJOSHADER_USAGE_FOG;
-                        break;
-                    case RASTOUT_TYPE_POINT_SIZE:
-                        usage = MOJOSHADER_USAGE_POINTSIZE;
-                        break;
-                } // switch
-            } // if
-
-            else if (regtype == REG_TYPE_ATTROUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                usage = MOJOSHADER_USAGE_COLOR;
-                index = regnum;
-            } // else if
-
-            else if (regtype == REG_TYPE_TEXCRDOUT)
-            {
-                regtype = REG_TYPE_OUTPUT;
-                usage = MOJOSHADER_USAGE_TEXCOORD;
-                index = regnum;
-            } // else if
-        } // if
-
-        // to avoid limitations of various GL entry points for input
-        // attributes (glSecondaryColorPointer() can only take 3 component
-        // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
-        // issues), we set up all inputs as generic vertex attributes, so we
-        // can pass data in just about any form, and ignore the built-in GLSL
-        // attributes like gl_SecondaryColor. Output needs to use the the
-        // built-ins, though, but we don't have to worry about the GL entry
-        // point limitations there.
-
-        if (regtype == REG_TYPE_INPUT)
-        {
-            const int attr = ctx->assigned_vertex_attributes++;
-            push_output(ctx, &ctx->globals);
-            output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr);
-            pop_output(ctx);
-        } // if
-
-        else if (regtype == REG_TYPE_OUTPUT)
-        {
-            switch (usage)
-            {
-                case MOJOSHADER_USAGE_POSITION:
-                    ctx->arb1_wrote_position = 1;
-                    usage_str = "result.position";
-                    break;
-                case MOJOSHADER_USAGE_POINTSIZE:
-                    usage_str = "result.pointsize";
-                    break;
-                case MOJOSHADER_USAGE_COLOR:
-                    index_str[0] = '\0';  // no explicit number.
-                    if (index == 0)
-                        usage_str = "result.color.primary";
-                    else if (index == 1)
-                        usage_str = "result.color.secondary";
-                    break;
-                case MOJOSHADER_USAGE_FOG:
-                    usage_str = "result.fogcoord";
-                    break;
-                case MOJOSHADER_USAGE_TEXCOORD:
-                    snprintf(index_str, sizeof (index_str), "%u", (uint) index);
-                    usage_str = "result.texcoord";
-                    arrayleft = "[";
-                    arrayright = "]";
-                    break;
-                default:
-                    // !!! FIXME: we need to deal with some more built-in varyings here.
-                    break;
-            } // switch
-
-            // !!! FIXME: the #define is a little hacky, but it means we don't
-            // !!! FIXME:  have to track these separately if this works.
-            push_output(ctx, &ctx->globals);
-            // no mapping to built-in var? Just make it a regular global, pray.
-            if (usage_str == NULL)
-                output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
-            else
-            {
-                output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str,
-                            arrayleft, index_str, arrayright);
-            } // else
-            pop_output(ctx);
-        } // else if
-
-        else
-        {
-            fail(ctx, "unknown vertex shader attribute register");
-        } // else
-    } // if
-
-    else if (shader_is_pixel(ctx))
-    {
-        const char *paramtype_str = "ATTRIB";
-
-        // samplers DCLs get handled in emit_ARB1_sampler().
-
-        if (flags & MOD_CENTROID)
-        {
-            if (!support_nv4(ctx))  // GL_NV_fragment_program4 adds centroid.
-            {
-                // !!! FIXME: should we just wing it without centroid here?
-                failf(ctx, "centroid unsupported in %s profile",
-                      ctx->profile->name);
-                return;
-            } // if
-
-            paramtype_str = "CENTROID ATTRIB";
-        } // if
-
-        if (regtype == REG_TYPE_COLOROUT)
-        {
-            paramtype_str = "OUTPUT";
-            usage_str = "result.color";
-            if (ctx->have_multi_color_outputs)
-            {
-                // We have to gamble that you have GL_ARB_draw_buffers.
-                // You probably do at this point if you have a sane setup.
-                snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
-                arrayleft = "[";
-                arrayright = "]";
-            } // if
-        } // if
-
-        else if (regtype == REG_TYPE_DEPTHOUT)
-        {
-            paramtype_str = "OUTPUT";
-            usage_str = "result.depth";
-        } // else if
-
-        // !!! FIXME: can you actualy have a texture register with COLOR usage?
-        else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
-        {
-            if (usage == MOJOSHADER_USAGE_TEXCOORD)
-            {
-                // ps_1_1 does a different hack for this attribute.
-                //  Refer to emit_ARB1_global()'s REG_TYPE_TEXTURE code.
-                if (shader_version_atleast(ctx, 1, 4))
-                {
-                    snprintf(index_str, sizeof (index_str), "%u", (uint) index);
-                    usage_str = "fragment.texcoord";
-                    arrayleft = "[";
-                    arrayright = "]";
-                } // if
-            } // if
-
-            else if (usage == MOJOSHADER_USAGE_COLOR)
-            {
-                index_str[0] = '\0';  // no explicit number.
-                if (index == 0)
-                    usage_str = "fragment.color.primary";
-                else if (index == 1)
-                    usage_str = "fragment.color.secondary";
-                else
-                    fail(ctx, "unsupported color index");
-            } // else if
-        } // else if
-
-        else if (regtype == REG_TYPE_MISCTYPE)
-        {
-            const MiscTypeType mt = (MiscTypeType) regnum;
-            if (mt == MISCTYPE_TYPE_FACE)
-            {
-                if (support_nv4(ctx))  // FINALLY, a vFace equivalent in nv4!
-                {
-                    index_str[0] = '\0';  // no explicit number.
-                    usage_str = "fragment.facing";
-                } // if
-                else
-                {
-                    failf(ctx, "vFace unsupported in %s profile",
-                          ctx->profile->name);
-                } // else
-            } // if
-            else if (mt == MISCTYPE_TYPE_POSITION)
-            {
-                index_str[0] = '\0';  // no explicit number.
-                usage_str = "fragment.position";  // !!! FIXME: is this the same coord space as D3D?
-            } // else if
-            else
-            {
-                fail(ctx, "BUG: unhandled misc register");
-            } // else
-        } // else if
-
-        else
-        {
-            fail(ctx, "unknown pixel shader attribute register");
-        } // else
-
-        if (usage_str != NULL)
-        {
-            push_output(ctx, &ctx->globals);
-            output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname,
-                        usage_str, arrayleft, index_str, arrayright);
-            pop_output(ctx);
-        } // if
-    } // else if
-
-    else
-    {
-        fail(ctx, "Unknown shader type");  // state machine should catch this.
-    } // else
-} // emit_ARB1_attribute
-
-static void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ }
-
-static void emit_ARB1_NOP(Context *ctx)
-{
-    // There is no NOP in arb1. Just don't output anything here.
-} // emit_ARB1_NOP
-
-EMIT_ARB1_OPCODE_DS_FUNC(MOV)
-EMIT_ARB1_OPCODE_DSS_FUNC(ADD)
-EMIT_ARB1_OPCODE_DSS_FUNC(SUB)
-EMIT_ARB1_OPCODE_DSSS_FUNC(MAD)
-EMIT_ARB1_OPCODE_DSS_FUNC(MUL)
-EMIT_ARB1_OPCODE_DS_FUNC(RCP)
-
-static void emit_ARB1_RSQ(Context *ctx)
-{
-    // nv4 doesn't force abs() on this, so negative values will generate NaN.
-    // The spec says you should force the abs() yourself.
-    if (!support_nv4(ctx))
-    {
-        emit_ARB1_opcode_ds(ctx, "RSQ");  // pre-nv4 implies ABS.
-        return;
-    } // if
-
-    // we can optimize this to use nv2's |abs| construct in some cases.
-    if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
-         (ctx->source_args[0].src_mod == SRCMOD_NEGATE) ||
-         (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
-        ctx->source_args[0].src_mod = SRCMOD_ABS;
-
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-
-    if (ctx->source_args[0].src_mod == SRCMOD_ABS)
-        output_line(ctx, "RSQ%s, %s;", dst, src0);
-    else
-    {
-        char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
-        output_line(ctx, "ABS %s, %s;", buf, src0);
-        output_line(ctx, "RSQ%s, %s.x;", dst, buf);
-    } // else
-
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_RSQ
-
-EMIT_ARB1_OPCODE_DSS_FUNC(DP3)
-EMIT_ARB1_OPCODE_DSS_FUNC(DP4)
-EMIT_ARB1_OPCODE_DSS_FUNC(MIN)
-EMIT_ARB1_OPCODE_DSS_FUNC(MAX)
-EMIT_ARB1_OPCODE_DSS_FUNC(SLT)
-EMIT_ARB1_OPCODE_DSS_FUNC(SGE)
-
-static void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
-
-static void arb1_log(Context *ctx, const char *opcode)
-{
-    // !!! FIXME: SRCMOD_NEGATE can be made into SRCMOD_ABS here, too
-    // we can optimize this to use nv2's |abs| construct in some cases.
-    if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
-         (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
-        ctx->source_args[0].src_mod = SRCMOD_ABS;
-
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-
-    if (ctx->source_args[0].src_mod == SRCMOD_ABS)
-        output_line(ctx, "%s%s, %s;", opcode, dst, src0);
-    else
-    {
-        char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
-        output_line(ctx, "ABS %s, %s;", buf, src0);
-        output_line(ctx, "%s%s, %s.x;", opcode, dst, buf);
-    } // else
-
-    emit_ARB1_dest_modifiers(ctx);
-} // arb1_log
-
-
-static void emit_ARB1_LOG(Context *ctx)
-{
-    arb1_log(ctx, "LG2");
-} // emit_ARB1_LOG
-
-
-EMIT_ARB1_OPCODE_DS_FUNC(LIT)
-EMIT_ARB1_OPCODE_DSS_FUNC(DST)
-
-static void emit_ARB1_LRP(Context *ctx)
-{
-    if (shader_is_pixel(ctx))  // fragment shaders have a matching LRP opcode.
-        emit_ARB1_opcode_dsss(ctx, "LRP");
-    else
-    {
-        char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-        char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-        char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
-        char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
-        char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
-
-        // LRP is: dest = src2 + src0 * (src1 - src2)
-        output_line(ctx, "SUB %s, %s, %s;", buf, src1, src2);
-        output_line(ctx, "MAD%s, %s, %s, %s;", dst, buf, src0, src2);
-        emit_ARB1_dest_modifiers(ctx);
-    } // else
-} // emit_ARB1_LRP
-
-EMIT_ARB1_OPCODE_DS_FUNC(FRC)
-
-static void arb1_MxXy(Context *ctx, const int x, const int y)
-{
-    DestArgInfo *dstarg = &ctx->dest_arg;
-    const int origmask = dstarg->writemask;
-    char src0[64];
-    int i;
-
-    make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-
-    for (i = 0; i < y; i++)
-    {
-        char dst[64];
-        char row[64];
-        make_ARB1_srcarg_string(ctx, i + 1, row, sizeof (row));
-        set_dstarg_writemask(dstarg, 1 << i);
-        make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-        output_line(ctx, "DP%d%s, %s, %s;", x, dst, src0, row);
-    } // for
-
-    set_dstarg_writemask(dstarg, origmask);
-    emit_ARB1_dest_modifiers(ctx);
-} // arb1_MxXy
-
-static void emit_ARB1_M4X4(Context *ctx) { arb1_MxXy(ctx, 4, 4); }
-static void emit_ARB1_M4X3(Context *ctx) { arb1_MxXy(ctx, 4, 3); }
-static void emit_ARB1_M3X4(Context *ctx) { arb1_MxXy(ctx, 3, 4); }
-static void emit_ARB1_M3X3(Context *ctx) { arb1_MxXy(ctx, 3, 3); }
-static void emit_ARB1_M3X2(Context *ctx) { arb1_MxXy(ctx, 3, 2); }
-
-static void emit_ARB1_CALL(Context *ctx)
-{
-    if (!support_nv2(ctx))  // no branching in stock ARB1.
-    {
-        failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
-        return;
-    } // if
-
-    char labelstr[64];
-    get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
-    output_line(ctx, "CAL %s;", labelstr);
-} // emit_ARB1_CALL
-
-static void emit_ARB1_CALLNZ(Context *ctx)
-{
-    // !!! FIXME: if src1 is a constbool that's true, we can remove the
-    // !!! FIXME:  if. If it's false, we can make this a no-op.
-
-    if (!support_nv2(ctx))  // no branching in stock ARB1.
-        failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
-    else
-    {
-        // !!! FIXME: double-check this.
-        char labelstr[64];
-        char scratch[64];
-        char src1[64];
-        get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
-        get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
-        allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
-        output_line(ctx, "MOVC %s, %s;", scratch, src1);
-        output_line(ctx, "CAL %s (NE.x);", labelstr);
-    } // else
-} // emit_ARB1_CALLNZ
-
-// !!! FIXME: needs BRA in nv2, LOOP in nv2 fragment progs, and REP in nv4.
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP)
-
-static void emit_ARB1_RET(Context *ctx)
-{
-    // don't fail() if no nv2...maybe we're just ending the mainline?
-    //  if we're ending a LABEL that had no CALL, this would all be written
-    //  to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will
-    //  just end up throwing all this code out.
-    if (support_nv2(ctx))  // no branching in stock ARB1.
-        output_line(ctx, "RET;");
-    set_output(ctx, &ctx->mainline); // in case we were ignoring this function.
-} // emit_ARB1_RET
-
-
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP)
-
-static void emit_ARB1_LABEL(Context *ctx)
-{
-    if (!support_nv2(ctx))  // no branching in stock ARB1.
-        return;  // don't fail()...maybe we never use it, but do fail in CALL.
-
-    const int label = ctx->source_args[0].regnum;
-    RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
-
-    // MSDN specs say CALL* has to come before the LABEL, so we know if we
-    //  can ditch the entire function here as unused.
-    if (reg == NULL)
-        set_output(ctx, &ctx->ignore);  // Func not used. Parse, but don't output.
-
-    // !!! FIXME: it would be nice if we could determine if a function is
-    // !!! FIXME:  only called once and, if so, forcibly inline it.
-
-    //const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
-    char labelstr[64];
-    get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
-    output_line(ctx, "%s:", labelstr);
-} // emit_ARB1_LABEL
-
-
-static void emit_ARB1_POW(Context *ctx)
-{
-    // we can optimize this to use nv2's |abs| construct in some cases.
-    if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
-         (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
-        ctx->source_args[0].src_mod = SRCMOD_ABS;
-
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
-
-    if (ctx->source_args[0].src_mod == SRCMOD_ABS)
-        output_line(ctx, "POW%s, %s, %s;", dst, src0, src1);
-    else
-    {
-        char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
-        output_line(ctx, "ABS %s, %s;", buf, src0);
-        output_line(ctx, "POW%s, %s.x, %s;", dst, buf, src1);
-    } // else
-
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_POW
-
-static void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); }
-
-static void emit_ARB1_SGN(Context *ctx)
-{
-    if (support_nv2(ctx))
-        emit_ARB1_opcode_ds(ctx, "SSG");
-    else
-    {
-        char dst[64];
-        char src0[64];
-        char scratch1[64];
-        char scratch2[64];
-        make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-        make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-        allocate_ARB1_scratch_reg_name(ctx, scratch1, sizeof (scratch1));
-        allocate_ARB1_scratch_reg_name(ctx, scratch2, sizeof (scratch2));
-        output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0);
-        output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0);
-        output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2);
-        emit_ARB1_dest_modifiers(ctx);
-    } // else
-} // emit_ARB1_SGN
-
-EMIT_ARB1_OPCODE_DS_FUNC(ABS)
-
-static void emit_ARB1_NRM(Context *ctx)
-{
-    // nv2 fragment programs (and anything nv4) have a real NRM.
-    if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
-        emit_ARB1_opcode_ds(ctx, "NRM");
-    else
-    {
-        char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-        char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-        char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
-        output_line(ctx, "DP3 %s.w, %s, %s;", buf, src0, src0);
-        output_line(ctx, "RSQ %s.w, %s.w;", buf, buf);
-        output_line(ctx, "MUL%s, %s.w, %s;", dst, buf, src0);
-        emit_ARB1_dest_modifiers(ctx);
-    } // else
-} // emit_ARB1_NRM
-
-
-static void emit_ARB1_SINCOS(Context *ctx)
-{
-    // we don't care about the temp registers that <= sm2 demands; ignore them.
-    const int mask = ctx->dest_arg.writemask;
-
-    // arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes.
-    if ((shader_is_pixel(ctx)) || (support_nv4(ctx)))
-    {
-        char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-        char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-        if (writemask_x(mask))
-            output_line(ctx, "COS%s, %s;", dst, src0);
-        else if (writemask_y(mask))
-            output_line(ctx, "SIN%s, %s;", dst, src0);
-        else if (writemask_xy(mask))
-            output_line(ctx, "SCS%s, %s;", dst, src0);
-    } // if
-
-    // nv2+ profiles have sin and cos opcodes.
-    else if (support_nv2(ctx))
-    {
-        char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-        char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-        if (writemask_x(mask))
-            output_line(ctx, "COS %s.x, %s;", dst, src0);
-        else if (writemask_y(mask))
-            output_line(ctx, "SIN %s.y, %s;", dst, src0);
-        else if (writemask_xy(mask))
-        {
-            output_line(ctx, "SIN %s.x, %s;", dst, src0);
-            output_line(ctx, "COS %s.y, %s;", dst, src0);
-        } // else if
-    } // if
-
-    else  // big nasty.
-    {
-        char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-        char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
-        const int need_sin = (writemask_x(mask) || writemask_xy(mask));
-        const int need_cos = (writemask_y(mask) || writemask_xy(mask));
-        char scratch[64];
-
-        if (need_sin || need_cos)
-            allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
-
-        // These sin() and cos() approximations originally found here:
-        //    http://www.devmaster.net/forums/showthread.php?t=5784
-        //
-        // const float B = 4.0f / M_PI;
-        // const float C = -4.0f / (M_PI * M_PI);
-        // float y = B * x + C * x * fabs(x);
-        //
-        // // optional better precision...
-        // const float P = 0.225f;
-        // y = P * (y * fabs(y) - y) + y;
-        //
-        //
-        // That first thing can be reduced to:
-        // const float y = ((1.2732395447351626861510701069801f * x) +
-        //             ((-0.40528473456935108577551785283891f * x) * fabs(x)));
-
-        if (need_sin)
-        {
-            // !!! FIXME: use SRCMOD_ABS here?
-            output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
-            output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
-            output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
-            output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
-        } // if
-
-        // cosine is sin(x + M_PI/2), but you have to wrap x to pi:
-        //  if (x+(M_PI/2) > M_PI)
-        //      x -= 2 * M_PI;
-        //
-        // which is...
-        //  if (x+(1.57079637050628662109375) > 3.1415927410125732421875)
-        //      x += -6.283185482025146484375;
-
-        if (need_cos)
-        {
-            output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0);
-            output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch);
-            output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch);
-            output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
-            output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
-            output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
-            output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
-        } // if
-    } // else
-
-    // !!! FIXME: might not have done anything. Don't emit if we didn't.
-    if (!isfail(ctx))
-        emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_SINCOS
-
-
-static void emit_ARB1_REP(Context *ctx)
-{
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-
-    // nv2 fragment programs (and everything nv4) have a real REP.
-    if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
-        output_line(ctx, "REP %s;", src0);
-
-    else if (support_nv2(ctx))
-    {
-        // no REP, but we can use branches.
-        char failbranch[32];
-        char topbranch[32];
-        const int toplabel = allocate_branch_label(ctx);
-        const int faillabel = allocate_branch_label(ctx);
-        get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
-        get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
-
-        assert(((size_t) ctx->branch_labels_stack_index) <
-                STATICARRAYLEN(ctx->branch_labels_stack)-1);
-
-        ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel;
-        ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel;
-
-        char scratch[32];
-        snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
-        output_line(ctx, "MOVC %s.x, %s;", scratch, src0);
-        output_line(ctx, "BRA %s (LE.x);", failbranch);
-        output_line(ctx, "%s:", topbranch);
-    } // else if
-
-    else  // stock ARB1 has no branching.
-    {
-        fail(ctx, "branching unsupported in this profile");
-    } // else
-} // emit_ARB1_REP
-
-
-static void emit_ARB1_ENDREP(Context *ctx)
-{
-    // nv2 fragment programs (and everything nv4) have a real ENDREP.
-    if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
-        output_line(ctx, "ENDREP;");
-
-    else if (support_nv2(ctx))
-    {
-        // no ENDREP, but we can use branches.
-        assert(ctx->branch_labels_stack_index >= 2);
-
-        char failbranch[32];
-        char topbranch[32];
-        const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
-        const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
-        get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
-        get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
-
-        char scratch[32];
-        snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
-        output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch);
-        output_line(ctx, "BRA %s (GT.x);", topbranch);
-        output_line(ctx, "%s:", failbranch);
-    } // else if
-
-    else  // stock ARB1 has no branching.
-    {
-        fail(ctx, "branching unsupported in this profile");
-    } // else
-} // emit_ARB1_ENDREP
-
-
-static void nv2_if(Context *ctx)
-{
-    // The condition code register MUST be set up before this!
-    // nv2 fragment programs (and everything nv4) have a real IF.
-    if ( (support_nv4(ctx)) || (shader_is_pixel(ctx)) )
-        output_line(ctx, "IF EQ.x;");
-    else
-    {
-        // there's no IF construct, but we can use a branch to a label.
-        char failbranch[32];
-        const int label = allocate_branch_label(ctx);
-        get_ARB1_branch_label_name(ctx, label, failbranch, sizeof (failbranch));
-
-        assert(((size_t) ctx->branch_labels_stack_index)
-                 < STATICARRAYLEN(ctx->branch_labels_stack));
-
-        ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label;
-
-        // !!! FIXME: should this be NE? (EQ would jump to the ELSE for the IF condition, right?).
-        output_line(ctx, "BRA %s (EQ.x);", failbranch);
-    } // else
-} // nv2_if
-
-
-static void emit_ARB1_IF(Context *ctx)
-{
-    if (support_nv2(ctx))
-    {
-        char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
-        char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
-        output_line(ctx, "MOVC %s.x, %s;", buf, src0);
-        nv2_if(ctx);
-    } // if
-
-    else  // stock ARB1 has no branching.
-    {
-        failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
-    } // else
-} // emit_ARB1_IF
-
-
-static void emit_ARB1_ELSE(Context *ctx)
-{
-    // nv2 fragment programs (and everything nv4) have a real ELSE.
-    if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
-        output_line(ctx, "ELSE;");
-
-    else if (support_nv2(ctx))
-    {
-        // there's no ELSE construct, but we can use a branch to a label.
-        assert(ctx->branch_labels_stack_index > 0);
-
-        // At the end of the IF block, unconditionally jump to the ENDIF.
-        const int endlabel = allocate_branch_label(ctx);
-        char endbranch[32];
-        get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
-        output_line(ctx, "BRA %s;", endbranch);
-
-        // Now mark the ELSE section with a lable.
-        const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1];
-        char elsebranch[32];
-        get_ARB1_branch_label_name(ctx,elselabel,elsebranch,sizeof(elsebranch));
-        output_line(ctx, "%s:", elsebranch);
-
-        // Replace the ELSE label with the ENDIF on the label stack.
-        ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel;
-    } // else if
-
-    else  // stock ARB1 has no branching.
-    {
-        failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
-    } // else
-} // emit_ARB1_ELSE
-
-
-static void emit_ARB1_ENDIF(Context *ctx)
-{
-    // nv2 fragment programs (and everything nv4) have a real ENDIF.
-    if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
-        output_line(ctx, "ENDIF;");
-
-    else if (support_nv2(ctx))
-    {
-        // there's no ENDIF construct, but we can use a branch to a label.
-        assert(ctx->branch_labels_stack_index > 0);
-        const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
-        char endbranch[32];
-        get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
-        output_line(ctx, "%s:", endbranch);
-    } // if
-
-    else  // stock ARB1 has no branching.
-    {
-        failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
-    } // else
-} // emit_ARB1_ENDIF
-
-
-static void emit_ARB1_BREAK(Context *ctx)
-{
-    // nv2 fragment programs (and everything nv4) have a real BREAK.
-    if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
-        output_line(ctx, "BRK;");
-
-    else if (support_nv2(ctx))
-    {
-        // no BREAK, but we can use branches.
-        assert(ctx->branch_labels_stack_index >= 2);
-        const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index];
-        char failbranch[32];
-        get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
-        output_line(ctx, "BRA %s;", failbranch);
-    } // else if
-
-    else  // stock ARB1 has no branching.
-    {
-        failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
-    } // else
-} // emit_ARB1_BREAK
-
-
-static void emit_ARB1_MOVA(Context *ctx)
-{
-    // nv2 and nv3 can use the ARR opcode.
-    // But nv4 removed ARR (and ADDRESS registers!). Just ROUND to an INT.
-    if (support_nv4(ctx))
-        emit_ARB1_opcode_ds(ctx, "ROUND.S");  // !!! FIXME: don't use a modifier here.
-    else if ((support_nv2(ctx)) || (support_nv3(ctx)))
-        emit_ARB1_opcode_ds(ctx, "ARR");
-    else
-    {
-        char src0[64];
-        char scratch[64];
-        char addr[32];
-
-        make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-        allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
-        snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum);
-
-        // !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE.
-
-        // ARL uses floor(), but D3D expects round-to-nearest.
-        // There is probably a more efficient way to do this.
-        if (shader_is_pixel(ctx))  // CMP only exists in fragment programs.  :/
-            output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0);
-        else
-        {
-            output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0);
-            output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch);
-        } // else
-
-        output_line(ctx, "ABS %s, %s;", addr, src0);
-        output_line(ctx, "ADD %s, %s, 0.5;", addr, addr);
-        output_line(ctx, "FLR %s, %s;", addr, addr);
-        output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch);
-
-        // we don't handle these right now, since emit_ARB1_dest_modifiers(ctx)
-        //  wants to look at dest_arg, not our temp register.
-        assert(ctx->dest_arg.result_mod == 0);
-        assert(ctx->dest_arg.result_shift == 0);
-
-        // we assign to the actual address register as needed.
-        ctx->last_address_reg_component = -1;
-    } // else
-} // emit_ARB1_MOVA
-
-
-static void emit_ARB1_TEXKILL(Context *ctx)
-{
-    // d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle.
-    //  We just map the x component to w. If it's negative, the fragment
-    //  would discard anyhow, otherwise, it'll pass through okay. This saves
-    //  us a temp register.
-    char dst[64];
-    get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-    output_line(ctx, "KIL %s.xyzx;", dst);
-} // emit_ARB1_TEXKILL
-
-static void arb1_texbem(Context *ctx, const int luminance)
-{
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    const int stage = ctx->dest_arg.regnum;
-    char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-    char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src));
-    char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
-    char sampler[64];
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage,
-                            sampler, sizeof (sampler));
-
-    output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src);
-    output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp);
-    output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst);
-    output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage);
-
-    if (luminance)  // TEXBEML, not just TEXBEM?
-    {
-        output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;",
-                    tmp, src, sampler, sampler);
-        output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp);
-    } // if
-
-    emit_ARB1_dest_modifiers(ctx);
-} // arb1_texbem
-
-static void emit_ARB1_TEXBEM(Context *ctx)
-{
-    arb1_texbem(ctx, 0);
-} // emit_ARB1_TEXBEM
-
-static void emit_ARB1_TEXBEML(Context *ctx)
-{
-    arb1_texbem(ctx, 1);
-} // emit_ARB1_TEXBEML
-
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR)
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB)
-
-
-static void emit_ARB1_TEXM3X2PAD(Context *ctx)
-{
-    // no-op ... work happens in emit_ARB1_TEXM3X2TEX().
-} // emit_ARB1_TEXM3X2PAD
-
-static void emit_ARB1_TEXM3X2TEX(Context *ctx)
-{
-    if (ctx->texm3x2pad_src0 == -1)
-        return;
-
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    const int stage = ctx->dest_arg.regnum;
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
-                            src0, sizeof (src0));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
-                            src1, sizeof (src1));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src2, sizeof (src2));
-    get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-
-    output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, dst);
-    output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
-    output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, dst, stage);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_TEXM3X2TEX
-
-
-static void emit_ARB1_TEXM3X3PAD(Context *ctx)
-{
-    // no-op ... work happens in emit_ARB1_TEXM3X3*().
-} // emit_ARB1_TEXM3X3PAD
-
-
-static void emit_ARB1_TEXM3X3TEX(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    const int stage = ctx->dest_arg.regnum;
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
-
-    output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
-    output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
-    output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
-    output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, dst, stage, ttypestr);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_TEXM3X3TEX
-
-static void emit_ARB1_TEXM3X3SPEC(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char src5[64];
-    char tmp[64];
-    char tmp2[64];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    const int stage = ctx->dest_arg.regnum;
-    allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
-    allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
-                            src5, sizeof (src5));
-    get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
-
-    output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
-    output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
-    output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
-    output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst);    // normal * normal
-    output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, src5);  // normal * eyeray
-
-    // !!! FIXME: This is goofy. There's got to be a way to do vector-wide
-    // !!! FIXME:  divides or reciprocals...right?
-    output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
-    output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
-    output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
-    output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
-    output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
-
-    output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
-    output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, src5);
-    output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_TEXM3X3SPEC
-
-static void emit_ARB1_TEXM3X3VSPEC(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-    char tmp[64];
-    char tmp2[64];
-    char tmp3[64];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    const int stage = ctx->dest_arg.regnum;
-    allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
-    allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
-    allocate_ARB1_scratch_reg_name(ctx, tmp3, sizeof (tmp3));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
-    const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
-    const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
-
-    output_line(ctx, "MOV %s.x, %s.w;", tmp3, src0);
-    output_line(ctx, "MOV %s.y, %s.w;", tmp3, src2);
-    output_line(ctx, "MOV %s.z, %s.w;", tmp3, dst);
-    output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
-    output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
-    output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
-    output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst);    // normal * normal
-    output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, tmp3);  // normal * eyeray
-
-    // !!! FIXME: This is goofy. There's got to be a way to do vector-wide
-    // !!! FIXME:  divides or reciprocals...right?
-    output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
-    output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
-    output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
-    output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
-    output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
-
-    output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
-    output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, tmp3);
-    output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_TEXM3X3VSPEC
-
-static void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
-static void emit_ARB1_LOGP(Context *ctx) { arb1_log(ctx, "LG2"); }
-
-static void emit_ARB1_CND(Context *ctx)
-{
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
-    char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
-
-    // CND compares against 0.5, but we need to compare against 0.0...
-    //  ...subtract to make up the difference.
-    output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", tmp, src0);
-    // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
-    //  switch src1 and src2 to get the same results.
-    output_line(ctx, "CMP%s, %s, %s, %s;", dst, tmp, src2, src1);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_CND
-
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB)
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX)
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH)
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3)
-
-static void emit_ARB1_TEXM3X3(Context *ctx)
-{
-    if (ctx->texm3x3pad_src1 == -1)
-        return;
-
-    char dst[64];
-    char src0[64];
-    char src1[64];
-    char src2[64];
-    char src3[64];
-    char src4[64];
-
-    // !!! FIXME: this code counts on the register not having swizzles, etc.
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
-                            src0, sizeof (src0));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
-                            src1, sizeof (src1));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
-                            src2, sizeof (src2));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
-                            src3, sizeof (src3));
-    get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
-                            src4, sizeof (src4));
-    get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-
-    output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
-    output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
-    output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
-    output_line(ctx, "MOV %s.w, { 1.0, 1.0, 1.0, 1.0 };", dst);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_TEXM3X3
-
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH)
-
-static void emit_ARB1_CMP(Context *ctx)
-{
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-    char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
-    char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
-    // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
-    //  switch src1 and src2 to get the same results.
-    output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1);
-    emit_ARB1_dest_modifiers(ctx);
-} // emit_ARB1_CMP
-
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM)
-
-
-static void emit_ARB1_DP2ADD(Context *ctx)
-{
-    if (support_nv4(ctx))  // nv4 has a built-in equivalent to DP2ADD.
-        emit_ARB1_opcode_dsss(ctx, "DP2A");
-    else
-    {
-        char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-        char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
-        char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
-        char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
-        char scratch[64];
-
-        // DP2ADD is:
-        //  dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz
-        allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
-        output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1);
-        output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch);
-        output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2);
-        emit_ARB1_dest_modifiers(ctx);
-    } // else
-} // emit_ARB1_DP2ADD
-
-
-static void emit_ARB1_DSX(Context *ctx)
-{
-    if (support_nv2(ctx))  // nv2 has a built-in equivalent to DSX.
-        emit_ARB1_opcode_ds(ctx, "DDX");
-    else
-        failf(ctx, "DSX unsupported in %s profile", ctx->profile->name);
-} // emit_ARB1_DSX
-
-
-static void emit_ARB1_DSY(Context *ctx)
-{
-    if (support_nv2(ctx))  // nv2 has a built-in equivalent to DSY.
-        emit_ARB1_opcode_ds(ctx, "DDY");
-    else
-        failf(ctx, "DSY unsupported in %s profile", ctx->profile->name);
-} // emit_ARB1_DSY
-
-static void arb1_texld(Context *ctx, const char *opcode, const int texldd)
-{
-    // !!! FIXME: Hack: "TEXH" is invalid in nv4. Fix this more cleanly.
-    if ((ctx->dest_arg.result_mod & MOD_PP) && (support_nv4(ctx)))
-        ctx->dest_arg.result_mod &= ~MOD_PP;
-
-    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
-
-    const int sm1 = !shader_version_atleast(ctx, 1, 4);
-    const int regnum = sm1 ? ctx->dest_arg.regnum : ctx->source_args[1].regnum;
-    RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, regnum);
-
-    const char *ttype = NULL;
-    char src0[64];
-    if (sm1)
-        get_ARB1_destarg_varname(ctx, src0, sizeof (src0));
-    else
-        get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
-    //char src1[64]; get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));  // !!! FIXME: SRC_MOD?
-
-    char src2[64] = { 0 };
-    char src3[64] = { 0 };
-
-    if (texldd)
-    {
-        make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
-        make_ARB1_srcarg_string(ctx, 3, src3, sizeof (src3));
-    } // if
-
-    // !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters.
-    if (sreg == NULL)
-    {
-        fail(ctx, "TEXLD using undeclared sampler");
-        return;
-    } // if
-
-    // SM1 only specifies dst, so don't check swizzle there.
-    if ( !sm1 && (!no_swizzle(ctx->source_args[1].swizzle)) )
-    {
-        // !!! FIXME: does this ever actually happen?
-        fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment");
-    } // if
-
-    switch ((const TextureType) sreg->index)
-    {
-        case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"?
-        case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break;
-        case TEXTURE_TYPE_VOLUME: ttype = "3D"; break;
-        default: fail(ctx, "unknown texture type"); return;
-    } // switch
-
-    if (texldd)
-    {
-        output_line(ctx, "%s%s, %s, %s, %s, texture[%d], %s;", opcode, dst,
-                    src0, src2, src3, regnum, ttype);
-    } // if
-    else
-    {
-        output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0,
-                    regnum, ttype);
-    } // else
-} // arb1_texld
-
-
-static void emit_ARB1_TEXLDD(Context *ctx)
-{
-    // With GL_NV_fragment_program2, we can use the TXD opcode.
-    //  In stock arb1, we can settle for a standard texld, which isn't
-    //  perfect, but oh well.
-    if (support_nv2(ctx))
-        arb1_texld(ctx, "TXD", 1);
-    else
-        arb1_texld(ctx, "TEX", 0);
-} // emit_ARB1_TEXLDD
-
-
-static void emit_ARB1_TEXLDL(Context *ctx)
-{
-    if ((shader_is_vertex(ctx)) && (!support_nv3(ctx)))
-    {
-        failf(ctx, "Vertex shader TEXLDL unsupported in %s profile",
-              ctx->profile->name);
-        return;
-    } // if
-
-    else if ((shader_is_pixel(ctx)) && (!support_nv2(ctx)))
-    {
-        failf(ctx, "Pixel shader TEXLDL unsupported in %s profile",
-              ctx->profile->name);
-        return;
-    } // if
-
-    // !!! FIXME: this doesn't map exactly to TEXLDL. Review this.
-    arb1_texld(ctx, "TXL", 0);
-} // emit_ARB1_TEXLDL
-
-
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP)
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC)
-
-static void emit_ARB1_IFC(Context *ctx)
-{
-    if (support_nv2(ctx))
-    {
-        static const char *comps[] = {
-            "", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC"
-        };
-
-        if (ctx->instruction_controls >= STATICARRAYLEN(comps))
-        {
-            fail(ctx, "unknown comparison control");
-            return;
-        } // if
-
-        char src0[64];
-        char src1[64];
-        char scratch[64];
-
-        const char *comp = comps[ctx->instruction_controls];
-        get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
-        get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
-        allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
-        output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1);
-        nv2_if(ctx);
-    } // if
-
-    else  // stock ARB1 has no branching.
-    {
-        failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
-    } // else
-} // emit_ARB1_IFC
-
-
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP)
-
-static void emit_ARB1_DEF(Context *ctx)
-{
-    const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
-    char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-    char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
-    char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
-    char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
-    char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
-
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "PARAM %s = { %s, %s, %s, %s };",
-                dst, val0, val1, val2, val3);
-    pop_output(ctx);
-} // emit_ARB1_DEF
-
-static void emit_ARB1_DEFI(Context *ctx)
-{
-    char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-    const int32 *x = (const int32 *) ctx->dwords;
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "PARAM %s = { %d, %d, %d, %d };",
-                dst, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
-    pop_output(ctx);
-} // emit_ARB1_DEFI
-
-static void emit_ARB1_DEFB(Context *ctx)
-{
-    char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
-    push_output(ctx, &ctx->globals);
-    output_line(ctx, "PARAM %s = %d;", dst, ctx->dwords[0] ? 1 : 0);
-    pop_output(ctx);
-} // emit_ARB1_DEFB
-
-static void emit_ARB1_DCL(Context *ctx)
-{
-    // no-op. We do this in our emit_attribute() and emit_uniform().
-} // emit_ARB1_DCL
-
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
-
-static void emit_ARB1_TEXLD(Context *ctx)
-{
-    if (!shader_version_atleast(ctx, 1, 4))
-    {
-        arb1_texld(ctx, "TEX", 0);
-        return;
-    } // if
-
-    else if (!shader_version_atleast(ctx, 2, 0))
-    {
-        // ps_1_4 is different, too!
-        fail(ctx, "TEXLD == Shader Model 1.4 unimplemented.");  // !!! FIXME
-        return;
-    } // if
-
-    // !!! FIXME: do texldb and texldp map between OpenGL and D3D correctly?
-    if (ctx->instruction_controls == CONTROL_TEXLD)
-        arb1_texld(ctx, "TEX", 0);
-    else if (ctx->instruction_controls == CONTROL_TEXLDP)
-        arb1_texld(ctx, "TXP", 0);
-    else if (ctx->instruction_controls == CONTROL_TEXLDB)
-        arb1_texld(ctx, "TXB", 0);
-} // emit_ARB1_TEXLD
-
-#endif  // SUPPORT_PROFILE_ARB1
-
+PREDECLARE_PROFILE(ARB1)
+#endif
 
 #if !AT_LEAST_ONE_PROFILE
 #error No profiles are supported. Fix your build.
@@ -8611,7 +314,6 @@
     { MOJOSHADER_PROFILE_NV4, MOJOSHADER_PROFILE_ARB1 },
 };
 
-
 // The PROFILE_EMITTER_* items MUST be in the same order as profiles[]!
 #define PROFILE_EMITTERS(op) { \
      PROFILE_EMITTER_D3D(op) \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/profiles/mojoshader_profile.h	Tue Apr 23 14:43:10 2019 -0400
@@ -0,0 +1,365 @@
+/**
+ * MojoShader; generate shader programs from bytecode of compiled
+ *  Direct3D shaders.
+ *
+ * Please see the file LICENSE.txt in the source's root directory.
+ *
+ *  This file written by Ryan C. Gordon.
+ */
+
+#ifndef MOJOSHADER_PROFILE_H
+#define MOJOSHADER_PROFILE_H
+
+#include "../mojoshader_internal.h"
+
+typedef struct ConstantsList
+{
+    MOJOSHADER_constant constant;
+    struct ConstantsList *next;
+} ConstantsList;
+
+typedef struct VariableList
+{
+    MOJOSHADER_uniformType type;
+    int index;
+    int count;
+    ConstantsList *constant;
+    int used;
+    int emit_position;  // used in some profiles.
+    struct VariableList *next;
+} VariableList;
+
+typedef struct RegisterList
+{
+    RegisterType regtype;
+    int regnum;
+    MOJOSHADER_usage usage;
+    unsigned int index;
+    int writemask;
+    int misc;
+    int written;
+    const VariableList *array;
+    struct RegisterList *next;
+} RegisterList;
+
+typedef struct
+{
+    const uint32 *token;   // this is the unmolested token in the stream.
+    int regnum;
+    int swizzle;  // xyzw (all four, not split out).
+    int swizzle_x;
+    int swizzle_y;
+    int swizzle_z;
+    int swizzle_w;
+    SourceMod src_mod;
+    RegisterType regtype;
+    int relative;
+    RegisterType relative_regtype;
+    int relative_regnum;
+    int relative_component;
+    const VariableList *relative_array;
+} SourceArgInfo;
+
+struct Profile;  // predeclare.
+
+typedef struct CtabData
+{
+    int have_ctab;
+    int symbol_count;
+    MOJOSHADER_symbol *symbols;
+} CtabData;
+
+// Context...this is state that changes as we parse through a shader...
+typedef struct Context
+{
+    int isfail;
+    int out_of_memory;
+    MOJOSHADER_malloc malloc;
+    MOJOSHADER_free free;
+    void *malloc_data;
+    int current_position;
+    const uint32 *orig_tokens;
+    const uint32 *tokens;
+    uint32 tokencount;
+    int know_shader_size;
+    const MOJOSHADER_swizzle *swizzles;
+    unsigned int swizzles_count;
+    const MOJOSHADER_samplerMap *samplermap;
+    unsigned int samplermap_count;
+    Buffer *output;
+    Buffer *preflight;
+    Buffer *globals;
+    Buffer *inputs;
+    Buffer *outputs;
+    Buffer *helpers;
+    Buffer *subroutines;
+    Buffer *mainline_intro;
+    Buffer *mainline_arguments;
+    Buffer *mainline_top;
+    Buffer *mainline;
+    Buffer *postflight;
+    Buffer *ignore;
+    Buffer *output_stack[3];
+    int indent_stack[3];
+    int output_stack_len;
+    int indent;
+    const char *shader_type_str;
+    const char *endline;
+    const char *mainfn;
+    int endline_len;
+    int profileid;
+    const struct Profile *profile;
+    MOJOSHADER_shaderType shader_type;
+    uint8 major_ver;
+    uint8 minor_ver;
+    DestArgInfo dest_arg;
+    SourceArgInfo source_args[5];
+    SourceArgInfo predicate_arg;  // for predicated instructions.
+    uint32 dwords[4];
+    uint32 version_token;
+    int instruction_count;
+    uint32 instruction_controls;
+    uint32 previous_opcode;
+    int coissue;
+    int loops;
+    int reps;
+    int max_reps;
+    int cmps;
+    int scratch_registers;
+    int max_scratch_registers;
+    int branch_labels_stack_index;
+    int branch_labels_stack[32];
+    int assigned_branch_labels;
+    int assigned_vertex_attributes;
+    int last_address_reg_component;
+    RegisterList used_registers;
+    RegisterList defined_registers;
+    ErrorList *errors;
+    int constant_count;
+    ConstantsList *constants;
+    int uniform_count;
+    int uniform_float4_count;
+    int uniform_int4_count;
+    int uniform_bool_count;
+    RegisterList uniforms;
+    int attribute_count;
+    RegisterList attributes;
+    int sampler_count;
+    RegisterList samplers;
+    VariableList *variables;  // variables to register mapping.
+    int centroid_allowed;
+    CtabData ctab;
+    int have_relative_input_registers;
+    int have_multi_color_outputs;
+    int determined_constants_arrays;
+    int predicated;
+    int uses_pointsize;
+    int uses_fog;
+
+    // !!! FIXME: move these into SUPPORT_PROFILE sections.
+    int glsl_generated_lit_helper;
+    int glsl_generated_texldd_setup;
+    int glsl_generated_texm3x3spec_helper;
+    int arb1_wrote_position;
+    // !!! FIXME: move these into SUPPORT_PROFILE sections.
+
+    int have_preshader;
+    int ignores_ctab;
+    int reset_texmpad;
+    int texm3x2pad_dst0;
+    int texm3x2pad_src0;
+    int texm3x3pad_dst0;
+    int texm3x3pad_src0;
+    int texm3x3pad_dst1;
+    int texm3x3pad_src1;
+    MOJOSHADER_preshader *preshader;
+
+#if SUPPORT_PROFILE_ARB1_NV
+    int profile_supports_nv2;
+    int profile_supports_nv3;
+    int profile_supports_nv4;
+#endif
+#if SUPPORT_PROFILE_GLSL120
+    int profile_supports_glsl120;
+#endif
+#if SUPPORT_PROFILE_GLSLES
+    int profile_supports_glsles;
+#endif
+
+#if SUPPORT_PROFILE_METAL
+    int metal_need_header_common;
+    int metal_need_header_math;
+    int metal_need_header_relational;
+    int metal_need_header_geometric;
+    int metal_need_header_graphics;
+    int metal_need_header_texture;
+#endif
+} Context;
+
+// Use these macros so we can remove all bits of these profiles from the build.
+#if SUPPORT_PROFILE_ARB1_NV
+#define support_nv2(ctx) ((ctx)->profile_supports_nv2)
+#define support_nv3(ctx) ((ctx)->profile_supports_nv3)
+#define support_nv4(ctx) ((ctx)->profile_supports_nv4)
+#else
+#define support_nv2(ctx) (0)
+#define support_nv3(ctx) (0)
+#define support_nv4(ctx) (0)
+#endif
+
+#if SUPPORT_PROFILE_GLSL120
+#define support_glsl120(ctx) ((ctx)->profile_supports_glsl120)
+#else
+#define support_glsl120(ctx) (0)
+#endif
+
+#if SUPPORT_PROFILE_GLSLES
+#define support_glsles(ctx) ((ctx)->profile_supports_glsles)
+#else
+#define support_glsles(ctx) (0)
+#endif
+
+// Profile entry points...
+
+// one emit function for each opcode in each profile.
+typedef void (*emit_function)(Context *ctx);
+
+// one emit function for starting output in each profile.
+typedef void (*emit_start)(Context *ctx, const char *profilestr);
+
+// one emit function for ending output in each profile.
+typedef void (*emit_end)(Context *ctx);
+
+// one emit function for phase opcode output in each profile.
+typedef void (*emit_phase)(Context *ctx);
+
+// one emit function for finalizing output in each profile.
+typedef void (*emit_finalize)(Context *ctx);
+
+// one emit function for global definitions in each profile.
+typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum);
+
+// one emit function for relative uniform arrays in each profile.
+typedef void (*emit_array)(Context *ctx, VariableList *var);
+
+// one emit function for relative constants arrays in each profile.
+typedef void (*emit_const_array)(Context *ctx,
+                                 const struct ConstantsList *constslist,
+                                 int base, int size);
+
+// one emit function for uniforms in each profile.
+typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum,
+                             const VariableList *var);
+
+// one emit function for samplers in each profile.
+typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype,
+                             int texbem);
+
+// one emit function for attributes in each profile.
+typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
+                               MOJOSHADER_usage usage, int index, int wmask,
+                               int flags);
+
+// one args function for each possible sequence of opcode arguments.
+typedef int (*args_function)(Context *ctx);
+
+// one state function for each opcode where we have state machine updates.
+typedef void (*state_function)(Context *ctx);
+
+// one function for varnames in each profile.
+typedef const char *(*varname_function)(Context *c, RegisterType t, int num);
+
+// one function for const var array in each profile.
+typedef const char *(*const_array_varname_function)(Context *c, int base, int size);
+
+typedef struct Profile
+{
+    const char *name;
+    emit_start start_emitter;
+    emit_end end_emitter;
+    emit_phase phase_emitter;
+    emit_global global_emitter;
+    emit_array array_emitter;
+    emit_const_array const_array_emitter;
+    emit_uniform uniform_emitter;
+    emit_sampler sampler_emitter;
+    emit_attribute attribute_emitter;
+    emit_finalize finalize_emitter;
+    varname_function get_varname;
+    const_array_varname_function get_const_array_varname;
+} Profile;
+
+// Common utilities...
+
+void out_of_memory(Context *ctx);
+void *Malloc(Context *ctx, const size_t len);
+char *StrDup(Context *ctx, const char *str);
+void Free(Context *ctx, void *ptr);
+void * MOJOSHADERCALL MallocBridge(int bytes, void *data);
+void MOJOSHADERCALL FreeBridge(void *ptr, void *data);
+
+int set_output(Context *ctx, Buffer **section);
+void push_output(Context *ctx, Buffer **section);
+void pop_output(Context *ctx);
+
+uint32 ver_ui32(const uint8 major, const uint8 minor);
+int shader_version_supported(const uint8 maj, const uint8 min);
+int shader_version_atleast(const Context *ctx, const uint8 maj,
+                           const uint8 min);
+int shader_version_exactly(const Context *ctx, const uint8 maj,
+                           const uint8 min);
+int shader_is_pixel(const Context *ctx);
+int shader_is_vertex(const Context *ctx);
+
+int isfail(const Context *ctx);
+void failf(Context *ctx, const char *fmt, ...);
+void fail(Context *ctx, const char *reason);
+
+void output_line(Context *ctx, const char *fmt, ...);
+void output_blank_line(Context *ctx);
+
+void floatstr(Context *ctx, char *buf, size_t bufsize, float f,
+              int leavedecimal);
+
+RegisterList *reglist_insert(Context *ctx, RegisterList *prev,
+                             const RegisterType regtype,
+                             const int regnum);
+RegisterList *reglist_find(const RegisterList *prev,
+                           const RegisterType rtype,
+                           const int regnum);
+RegisterList *set_used_register(Context *ctx,
+                                const RegisterType regtype,
+                                const int regnum,
+                                const int written);
+void set_defined_register(Context *ctx, const RegisterType rtype,
+                          const int regnum);
+
+int writemask_xyzw(const int writemask);
+int writemask_xyz(const int writemask);
+int writemask_xy(const int writemask);
+int writemask_x(const int writemask);
+int writemask_y(const int writemask);
+int replicate_swizzle(const int swizzle);
+int no_swizzle(const int swizzle);
+int vecsize_from_writemask(const int m);
+void set_dstarg_writemask(DestArgInfo *dst, const int mask);
+
+int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type,
+             const RegisterType rtype, const int rnum);
+
+static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' };
+
+const char *get_D3D_register_string(Context *ctx,
+                                    RegisterType regtype,
+                                    int regnum, char *regnum_str,
+                                    size_t regnum_size);
+
+// !!! FIXME: These should stay in the mojoshader_profile_d3d file
+// !!! FIXME: but ARB1 relies on them, so we have to move them here.
+// !!! FIXME: If/when we kill off ARB1, we can move these back.
+const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt,
+                                   int regnum, char *buf,
+                                   const size_t len);
+const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum);
+
+#endif
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/profiles/mojoshader_profile_arb1.c	Tue Apr 23 14:43:10 2019 -0400
@@ -0,0 +1,2252 @@
+/**
+ * MojoShader; generate shader programs from bytecode of compiled
+ *  Direct3D shaders.
+ *
+ * Please see the file LICENSE.txt in the source's root directory.
+ *
+ *  This file written by Ryan C. Gordon.
+ */
+
+#pragma GCC visibility push(hidden)
+
+#define __MOJOSHADER_INTERNAL__ 1
+#include "mojoshader_profile.h"
+
+#if SUPPORT_PROFILE_ARB1
+
+static inline const char *get_ARB1_register_string(Context *ctx,
+                        const RegisterType regtype, const int regnum,
+                        char *regnum_str, const size_t regnum_size)
+{
+    // turns out these are identical at the moment.
+    return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
+} // get_ARB1_register_string
+
+int allocate_scratch_register(Context *ctx)
+{
+    const int retval = ctx->scratch_registers++;
+    if (retval >= ctx->max_scratch_registers)
+        ctx->max_scratch_registers = retval + 1;
+    return retval;
+} // allocate_scratch_register
+
+int allocate_branch_label(Context *ctx)
+{
+    return ctx->assigned_branch_labels++;
+} // allocate_branch_label
+
+const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf,
+                                           const size_t buflen)
+{
+    const int scratch = allocate_scratch_register(ctx);
+    snprintf(buf, buflen, "scratch%d", scratch);
+    return buf;
+} // allocate_ARB1_scratch_reg_name
+
+static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id,
+                                                char *buf, const size_t buflen)
+{
+    snprintf(buf, buflen, "branch_label%d", id);
+    return buf;
+} // get_ARB1_branch_label_name
+
+const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt,
+                                    const int regnum, char *buf,
+                                    const size_t buflen)
+{
+    // turns out these are identical at the moment.
+    return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen);
+} // get_ARB1_varname_in_buf
+
+const char *get_ARB1_varname(Context *ctx, const RegisterType rt,
+                             const int regnum)
+{
+    // turns out these are identical at the moment.
+    return get_D3D_varname(ctx, rt, regnum);
+} // get_ARB1_varname
+
+
+static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx,
+                                                const int base, const int size,
+                                                char *buf, const size_t buflen)
+{
+    snprintf(buf, buflen, "c_array_%d_%d", base, size);
+    return buf;
+} // get_ARB1_const_array_varname_in_buf
+
+
+const char *get_ARB1_const_array_varname(Context *ctx, int base, int size)
+{
+    char buf[64];
+    get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
+    return StrDup(ctx, buf);
+} // get_ARB1_const_array_varname
+
+
+const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
+                                           const SourceArgInfo *arg,
+                                           char *buf, size_t buflen)
+{
+    // !!! FIXME: this can hit pathological cases where we look like this...
+    //
+    //    dp3 r1.xyz, t0_bx2, t0_bx2
+    //    mad r1.xyz, t0_bias, 1-r1, t0_bx2
+    //
+    // ...which do a lot of duplicate work in arb1...
+    //
+    //    SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
+    //    MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 };
+    //    SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 };
+    //    MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 };
+    //    DP3 r1.xyz, scratch0, scratch1;
+    //    SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
+    //    SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1;
+    //    SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 };
+    //    MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 };
+    //    MAD r1.xyz, scratch0, scratch1, scratch2;
+    //
+    // ...notice that the dp3 calculates the same value into two scratch
+    //  registers. This case is easier to handle; just see if multiple
+    //  source args are identical, build it up once, and use the same
+    //  scratch register for multiple arguments in that opcode.
+    //  Even better still, only calculate things once across instructions,
+    //  and be smart about letting it linger in a scratch register until we
+    //  definitely don't need the calculation anymore. That's harder to
+    //  write, though.
+
+    char regnum_str[16] = { '\0' };
+
+    // !!! FIXME: use get_ARB1_varname_in_buf() instead?
+    const char *regtype_str = NULL;
+    if (!arg->relative)
+    {
+        regtype_str = get_ARB1_register_string(ctx, arg->regtype,
+                                               arg->regnum, regnum_str,
+                                               sizeof (regnum_str));
+    } // if
+
+    const char *rel_lbracket = "";
+    char rel_offset[32] = { '\0' };
+    const char *rel_rbracket = "";
+    char rel_swizzle[4] = { '\0' };
+    const char *rel_regtype_str = "";
+    if (arg->relative)
+    {
+        rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype,
+                                                  arg->relative_regnum,
+                                                  (char *) alloca(64), 64);
+
+        rel_swizzle[0] = '.';
+        rel_swizzle[1] = swizzle_channels[arg->relative_component];
+        rel_swizzle[2] = '\0';
+
+        if (!support_nv2(ctx))
+        {
+            // The address register in ARB1 only allows the '.x' component, so
+            //  we need to load the component we need from a temp vector
+            //  register into .x as needed.
+            assert(arg->relative_regtype == REG_TYPE_ADDRESS);
+            assert(arg->relative_regnum == 0);
+            if (ctx->last_address_reg_component != arg->relative_component)
+            {
+                output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str,
+                            arg->relative_regnum,
+                            swizzle_channels[arg->relative_component]);
+                ctx->last_address_reg_component = arg->relative_component;
+            } // if
+
+            rel_swizzle[1] = 'x';
+        } // if
+
+        if (arg->regtype == REG_TYPE_INPUT)
+            regtype_str = "vertex.attrib";
+        else
+        {
+            assert(arg->regtype == REG_TYPE_CONST);
+            const int arrayidx = arg->relative_array->index;
+            const int arraysize = arg->relative_array->count;
+            const int offset = arg->regnum - arrayidx;
+            assert(offset >= 0);
+            regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx,
+                                           arraysize, (char *) alloca(64), 64);
+            if (offset != 0)
+                snprintf(rel_offset, sizeof (rel_offset), " + %d", offset);
+        } // else
+
+        rel_lbracket = "[";
+        rel_rbracket = "]";
+    } // if
+
+    // This is the source register with everything but swizzle and source mods.
+    snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str,
+             rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
+             rel_rbracket);
+
+    // Some of the source mods need to generate instructions to a temp
+    //  register, in which case we'll replace the register name.
+    const SourceMod mod = arg->src_mod;
+    const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) ||
+                          ((mod == SRCMOD_ABS) && support_nv2(ctx)) );
+
+    if (!inplace)
+    {
+        const size_t len = 64;
+        char *stackbuf = (char *) alloca(len);
+        regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len);
+        regnum_str[0] = '\0'; // move value to scratch register.
+        rel_lbracket = "";   // scratch register won't use array.
+        rel_rbracket = "";
+        rel_offset[0] = '\0';
+        rel_swizzle[0] = '\0';
+        rel_regtype_str = "";
+    } // if
+
+    const char *premod_str = "";
+    const char *postmod_str = "";
+    switch (mod)
+    {
+        case SRCMOD_NEGATE:
+            premod_str = "-";
+            break;
+
+        case SRCMOD_BIASNEGATE:
+            premod_str = "-";
+            // fall through.
+        case SRCMOD_BIAS:
+            output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };",
+                        regtype_str, buf);
+            break;
+
+        case SRCMOD_SIGNNEGATE:
+            premod_str = "-";
+            // fall through.
+        case SRCMOD_SIGN:
+            output_line(ctx,
+                "MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };",
+                regtype_str, buf);
+            break;
+
+        case SRCMOD_COMPLEMENT:
+            output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;",
+                        regtype_str, buf);
+            break;
+
+        case SRCMOD_X2NEGATE:
+            premod_str = "-";
+            // fall through.
+        case SRCMOD_X2:
+            output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };",
+                        regtype_str, buf);
+            break;
+
+        case SRCMOD_DZ:
+            fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
+            postmod_str = "_dz";
+            break;
+
+        case SRCMOD_DW:
+            fail(ctx, "SRCMOD_DW currently unsupported in arb1");
+            postmod_str = "_dw";
+            break;
+
+        case SRCMOD_ABSNEGATE:
+            premod_str = "-";
+            // fall through.
+        case SRCMOD_ABS:
+            if (!support_nv2(ctx))  // GL_NV_vertex_program2_option adds this.
+                output_line(ctx, "ABS %s, %s;", regtype_str, buf);
+            else
+            {
+                premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
+                postmod_str = "|";
+            } // else
+            break;
+
+        case SRCMOD_NOT:
+            fail(ctx, "SRCMOD_NOT currently unsupported in arb1");
+            premod_str = "!";
+            break;
+
+        case SRCMOD_NONE:
+        case SRCMOD_TOTAL:
+             break;  // stop compiler whining.
+    } // switch
+
+    char swizzle_str[6];
+    size_t i = 0;
+
+    if (support_nv4(ctx))  // vFace must be output as "vFace.x" in nv4.
+    {
+        if (arg->regtype == REG_TYPE_MISCTYPE)
+        {
+            if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE )
+            {
+                swizzle_str[i++] = '.';
+                swizzle_str[i++] = 'x';
+            } // if
+        } // if
+    } // if
+
+    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
+    if (!scalar && !no_swizzle(arg->swizzle))
+    {
+        swizzle_str[i++] = '.';
+
+        // .xxxx is the same as .x, but .xx is illegal...scalar or full!
+        if (replicate_swizzle(arg->swizzle))
+            swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
+        else
+        {
+            swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
+            swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
+            swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
+            swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
+        } // else
+    } // if
+    swizzle_str[i] = '\0';
+    assert(i < sizeof (swizzle_str));
+
+    snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str,
+             regtype_str, regnum_str, rel_lbracket,
+             rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket,
+             swizzle_str, postmod_str);
+    // !!! FIXME: make sure the scratch buffer was large enough.
+    return buf;
+} // make_ARB1_srcarg_string_in_buf
+
+const char *get_ARB1_destarg_varname(Context *ctx, char *buf,
+                                     const size_t buflen)
+{
+    const DestArgInfo *arg = &ctx->dest_arg;
+    return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
+} // get_ARB1_destarg_varname
+
+const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx,
+                                    char *buf, const size_t buflen)
+{
+    if (idx >= STATICARRAYLEN(ctx->source_args))
+    {
+        fail(ctx, "Too many source args");
+        *buf = '\0';
+        return buf;
+    } // if
+
+    const SourceArgInfo *arg = &ctx->source_args[idx];
+    return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
+} // get_ARB1_srcarg_varname
+
+
+const char *make_ARB1_destarg_string(Context *ctx, char *buf,
+                                     const size_t buflen)
+{
+    const DestArgInfo *arg = &ctx->dest_arg;
+
+    *buf = '\0';
+
+    const char *sat_str = "";
+    if (arg->result_mod & MOD_SATURATE)
+    {
+        // nv4 can use ".SAT" in all program types.
+        // For less than nv4, the "_SAT" modifier is only available in
+        //  fragment shaders. Every thing else will fake it later in
+        //  emit_ARB1_dest_modifiers() ...
+        if (support_nv4(ctx))
+            sat_str = ".SAT";
+        else if (shader_is_pixel(ctx))
+            sat_str = "_SAT";
+    } // if
+
+    const char *pp_str = "";
+    if (arg->result_mod & MOD_PP)
+    {
+        // Most ARB1 profiles can't do partial precision (MOD_PP), but that's
+        //  okay. The spec says lots of Direct3D implementations ignore the
+        //  flag anyhow.
+        if (support_nv4(ctx))
+            pp_str = "H";
+    } // if
+
+    // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
+    assert((arg->result_mod & MOD_CENTROID) == 0);
+
+    char regnum_str[16];
+    const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype,
+                                                       arg->regnum, regnum_str,
+                                                       sizeof (regnum_str));
+    if (regtype_str == NULL)
+    {
+        fail(ctx, "Unknown destination register type.");
+        return buf;
+    } // if
+
+    char writemask_str[6];
+    size_t i = 0;
+    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
+    if (!scalar && !writemask_xyzw(arg->writemask))
+    {
+        writemask_str[i++] = '.';
+        if (arg->writemask0) writemask_str[i++] = 'x';
+        if (arg->writemask1) writemask_str[i++] = 'y';
+        if (arg->writemask2) writemask_str[i++] = 'z';
+        if (arg->writemask3) writemask_str[i++] = 'w';
+    } // if
+    writemask_str[i] = '\0';
+    assert(i < sizeof (writemask_str));
+
+    //const char *pred_left = "";
+    //const char *pred_right = "";
+    char pred[32] = { '\0' };
+    if (ctx->predicated)
+    {
+        fail(ctx, "dest register predication currently unsupported in arb1");
+        return buf;
+        //pred_left = "(";
+        //pred_right = ") ";
+        make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
+                                       pred, sizeof (pred));
+    } // if
+
+    snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str,
+             regtype_str, regnum_str, writemask_str);
+    // !!! FIXME: make sure the scratch buffer was large enough.
+    return buf;
+} // make_ARB1_destarg_string
+
+
+void emit_ARB1_dest_modifiers(Context *ctx)
+{
+    const DestArgInfo *arg = &ctx->dest_arg;
+
+    if (arg->result_shift != 0x0)
+    {
+        char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
+        const char *multiplier = NULL;
+
+        switch (arg->result_shift)
+        {
+            case 0x1: multiplier = "2.0"; break;
+            case 0x2: multiplier = "4.0"; break;
+            case 0x3: multiplier = "8.0"; break;
+            case 0xD: multiplier = "0.125"; break;
+            case 0xE: multiplier = "0.25"; break;
+            case 0xF: multiplier = "0.5"; break;
+        } // switch
+
+        if (multiplier != NULL)
+        {
+            char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
+            output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier);
+        } // if
+    } // if
+
+    if (arg->result_mod & MOD_SATURATE)
+    {
+        // nv4 and/or pixel shaders just used the "SAT" modifier, instead.
+        if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) )
+        {
+            char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
+            char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
+            output_line(ctx, "MIN%s, %s, 1.0;", dst, var);
+            output_line(ctx, "MAX%s, %s, 0.0;", dst, var);
+        } // if
+    } // if
+} // emit_ARB1_dest_modifiers
+
+
+const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx,
+                                    char *buf, const size_t buflen)
+{
+    if (idx >= STATICARRAYLEN(ctx->source_args))
+    {
+        fail(ctx, "Too many source args");
+        *buf = '\0';
+        return buf;
+    } // if
+
+    const SourceArgInfo *arg = &ctx->source_args[idx];
+    return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen);
+} // make_ARB1_srcarg_string
+
+void emit_ARB1_opcode_ds(Context *ctx, const char *opcode)
+{
+    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
+    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
+    output_line(ctx, "%s%s, %s;", opcode, dst, src0);
+    emit_ARB1_dest_modifiers(ctx);
+} // emit_ARB1_opcode_ds
+
+void emit_ARB1_opcode_dss(Context *ctx, const char *opcode)
+{
+    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
+    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
+    char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
+    output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1);
+    emit_ARB1_dest_modifiers(ctx);
+} // emit_ARB1_opcode_dss
+
+void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode)
+{
+    char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
+    char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
+    char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
+    char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
+    output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2);
+    emit_ARB1_dest_modifiers(ctx);
+} // emit_ARB1_opcode_dsss
+
+
+#define EMIT_ARB1_OPCODE_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        emit_ARB1_opcode(ctx, #op); \
+    }
+#define EMIT_ARB1_OPCODE_D_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        emit_ARB1_opcode_d(ctx, #op); \
+    }
+#define EMIT_ARB1_OPCODE_S_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        emit_ARB1_opcode_s(ctx, #op); \
+    }
+#define EMIT_ARB1_OPCODE_SS_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        emit_ARB1_opcode_ss(ctx, #op); \
+    }
+#define EMIT_ARB1_OPCODE_DS_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        emit_ARB1_opcode_ds(ctx, #op); \
+    }
+#define EMIT_ARB1_OPCODE_DSS_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        emit_ARB1_opcode_dss(ctx, #op); \
+    }
+#define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        emit_ARB1_opcode_dsss(ctx, #op); \
+    }
+#define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        emit_ARB1_opcode_dssss(ctx, #op); \
+    }
+#define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \
+    void emit_ARB1_##op(Context *ctx) { \
+        failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \
+    }
+
+
+void emit_ARB1_start(Context *ctx, const char *profilestr)
+{
+    const char *shader_str = NULL;
+    const char *shader_full_str = NULL;
+    if (shader_is_vertex(ctx))
+    {
+        shader_str = "vp";
+        shader_full_str = "vertex";
+    } // if
+    else if (shader_is_pixel(ctx))
+    {
+        shader_str = "fp";
+        shader_full_str = "fragment";
+    } // else if
+    else
+    {
+        failf(ctx, "Shader type %u unsupported in this profile.",
+              (uint) ctx->shader_type);
+        return;
+    } // if
+
+    set_output(ctx, &ctx->preflight);
+
+    if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0)
+        output_line(ctx, "!!ARB%s1.0", shader_str);
+
+    #if SUPPORT_PROFILE_ARB1_NV
+    else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0)
+    {
+        ctx->profile_supports_nv2 = 1;
+        output_line(ctx, "!!ARB%s1.0", shader_str);
+        output_line(ctx, "OPTION NV_%s_program2;", shader_full_str);
+    } // else if
+
+    else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0)
+    {
+        // there's no NV_fragment_program3, so just use 2.
+        const int ver = shader_is_pixel(ctx) ? 2 : 3;
+        ctx->profile_supports_nv2 = 1;
+        ctx->profile_supports_nv3 = 1;
+        output_line(ctx, "!!AR