First shot at GLSL/ARB1 support for TEXBEM and TEXBEML opcodes.
authorRyan C. Gordon <icculus@icculus.org>
Tue, 17 Apr 2012 00:07:33 -0400
changeset 1090 636ffcd3f14a
parent 1089 b965d0942dff
child 1091 4edfe78c14f8
First shot at GLSL/ARB1 support for TEXBEM and TEXBEML opcodes.
mojoshader.c
mojoshader.h
mojoshader_internal.h
mojoshader_opengl.c
utils/testparse.c
--- a/mojoshader.c	Mon Apr 16 23:46:42 2012 -0400
+++ b/mojoshader.c	Tue Apr 17 00:07:33 2012 -0400
@@ -220,7 +220,8 @@
                              const VariableList *var);
 
 // one emit function for samplers in each profile.
-typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype);
+typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype,
+                             int texbem);
 
 // one emit function for attributes in each profile.
 typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
@@ -618,12 +619,14 @@
 } // add_attribute_register
 
 static inline void add_sampler(Context *ctx, const RegisterType rtype,
-                               const int regnum, const TextureType ttype)
+                               const int regnum, const TextureType ttype,
+                               const int texbem)
 {
     // !!! FIXME: make sure it doesn't exist?
     // !!! FIXME:  (ps_1_1 assume we can add it multiple times...)
     RegisterList *item = reglist_insert(ctx, &ctx->samplers, rtype, regnum);
     item->index = (int) ttype;
+    item->misc |= texbem;
 } // add_sampler
 
 
@@ -1154,7 +1157,7 @@
 } // emit_D3D_uniform
 
 
-static void emit_D3D_sampler(Context *ctx, int stage, TextureType ttype)
+static void emit_D3D_sampler(Context *ctx, int s, TextureType ttype, int tb)
 {
     // no-op.
 } // emit_D3D_sampler
@@ -1561,7 +1564,7 @@
 static void emit_BYTECODE_finalize(Context *ctx) {}
 static void emit_BYTECODE_global(Context *ctx, RegisterType t, int n) {}
 static void emit_BYTECODE_array(Context *ctx, VariableList *var) {}
-static void emit_BYTECODE_sampler(Context *ctx, int s, TextureType ttype) {}
+static void emit_BYTECODE_sampler(Context *c, int s, TextureType t, int tb) {}
 static void emit_BYTECODE_const_array(Context *ctx, const ConstantsList *c,
                                          int base, int size) {}
 static void emit_BYTECODE_uniform(Context *ctx, RegisterType t, int n,
@@ -2391,7 +2394,7 @@
     pop_output(ctx);
 } // emit_GLSL_uniform
 
-static void emit_GLSL_sampler(Context *ctx, int stage, TextureType ttype)
+static void emit_GLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb)
 {
     const char *type = "";
     switch (ttype)
@@ -2407,6 +2410,15 @@
 
     push_output(ctx, &ctx->globals);
     output_line(ctx, "uniform %s %s;", type, var);
+    if (tb)  // This sampler used a ps_1_1 TEXBEM opcode?
+    {
+        char name[64];
+        const int index = ctx->uniform_float4_count;
+        ctx->uniform_float4_count += 2;
+        get_GLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name));
+        output_line(ctx, "#define %s_texbem %s[%d]", var, name, index);
+        output_line(ctx, "#define %s_texbeml %s[%d]", var, name, index+1);
+    } // if
     pop_output(ctx);
 } // emit_GLSL_sampler
 
@@ -3329,8 +3341,53 @@
 } // emit_GLSL_TEXLD
     
 
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXBEM)  // !!! FIXME
-EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXBEML) // !!! FIXME
+static void emit_GLSL_TEXBEM(Context *ctx)
+{
+    DestArgInfo *info = &ctx->dest_arg;
+    char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
+    char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
+    char sampler[64];
+    char code[512];
+
+    // Note that this code counts on the register not having swizzles, etc.
+    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
+                            sampler, sizeof (sampler));
+
+    make_GLSL_destarg_assign(ctx, code, sizeof (code),
+        "texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
+        " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))",
+        sampler,
+        dst, sampler, src, sampler, src,
+        dst, sampler, src, sampler, src);
+
+    output_line(ctx, "%s", code);
+} // emit_GLSL_TEXBEM
+
+
+static void emit_GLSL_TEXBEML(Context *ctx)
+{
+    // Note that this code counts on the register not having swizzles, etc.
+    DestArgInfo *info = &ctx->dest_arg;
+    char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
+    char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
+    char sampler[64];
+    char code[512];
+
+    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
+                            sampler, sizeof (sampler));
+
+    make_GLSL_destarg_assign(ctx, code, sizeof (code),
+        "(texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
+        " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *"
+        " ((%s.z * %s_texbeml.x) + %s_texbem.y)",
+        sampler,
+        dst, sampler, src, sampler, src,
+        dst, sampler, src, sampler, src,
+        src, sampler, sampler);
+
+    output_line(ctx, "%s", code);
+} // emit_GLSL_TEXBEML
+
 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME
 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME
 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2PAD) // !!! FIXME
@@ -4303,9 +4360,22 @@
     pop_output(ctx);
 } // emit_ARB1_uniform
 
-static void emit_ARB1_sampler(Context *ctx, int stage, TextureType ttype)
-{
-    // this is a no-op...you don't predeclare samplers in arb1.
+static void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb)
+{
+    // this is mostly a no-op...you don't predeclare samplers in arb1.
+
+    if (tb)  // This sampler used a ps_1_1 TEXBEM opcode?
+    {
+        const int index = ctx->uniform_float4_count + ctx->uniform_int4_count +
+                          ctx->uniform_bool_count;
+        char var[64];
+        get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var));
+        push_output(ctx, &ctx->globals);
+        output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index);
+        output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1);
+        pop_output(ctx);
+        ctx->uniform_float4_count += 2;
+    } // if
 } // emit_ARB1_sampler
 
 // !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute().
@@ -5171,8 +5241,40 @@
     output_line(ctx, "KIL %s.xyzx;", dst);
 } // emit_ARB1_TEXKILL
 
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXBEM)
-EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXBEML)
+static void arb1_texbem(Context *ctx, const int luminance)
+{
+    // Note that this code counts on the register not having swizzles, etc.
+    const int stage = ctx->dest_arg.regnum;
+    char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
+    char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src));
+    char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
+    char sampler[64];
+    get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage,
+                            sampler, sizeof (sampler));
+
+    output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src);
+    output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp);
+    output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst);
+    output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage);
+
+    if (luminance)  // TEXBEML, not just TEXBEM?
+    {
+        output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;",
+                    tmp, src, sampler, sampler);
+        output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp);
+    } // if
+} // arb1_texbem
+
+static void emit_ARB1_TEXBEM(Context *ctx)
+{
+    arb1_texbem(ctx, 0);
+} // emit_ARB1_TEXBEM
+
+static void emit_ARB1_TEXBEML(Context *ctx)
+{
+    arb1_texbem(ctx, 1);
+} // emit_ARB1_TEXBEML
+
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR)
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB)
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2PAD)
@@ -6411,7 +6513,7 @@
     else if (shader_is_pixel(ctx))
     {
         if (regtype == REG_TYPE_SAMPLER)
-            add_sampler(ctx, regtype, regnum, (TextureType) ctx->dwords[0]);
+            add_sampler(ctx, regtype, regnum, (TextureType) ctx->dwords[0], 0);
         else
         {
             const MOJOSHADER_usage usage = (MOJOSHADER_usage) ctx->dwords[0];
@@ -6822,6 +6924,65 @@
     // !!! FIXME: there are further limitations in ps_1_3 and earlier.
 } // state_TEXKILL
 
+static void state_texbem(Context *ctx, const char *opcode)
+{
+    // The TEXBEM equasion, according to MSDN:
+    //u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R
+    //         + D3DTSS_BUMPENVMAT10(stage m)*t(n)G
+    //v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R
+    //         + D3DTSS_BUMPENVMAT11(stage m)*t(n)G
+    //t(m)RGBA = TextureSample(stage m)
+    //
+    // ...TEXBEML adds this at the end:
+    //t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) +
+    //           D3DTSS_BUMPENVLOFFSET(stage m)]
+
+    if (shader_version_atleast(ctx, 1, 4))
+        failf(ctx, "%s opcode not available after Shader Model 1.3", opcode);
+
+    if (!shader_version_atleast(ctx, 1, 2))
+    {
+        if (ctx->source_args[0].src_mod == SRCMOD_SIGN)
+            failf(ctx, "%s forbids _bx2 on source reg before ps_1_2", opcode);
+    } // if
+
+    // !!! FIXME: MSDN:
+    // !!! FIXME: Register data that has been read by a texbem
+    // !!! FIXME:  or texbeml instruction cannot be read later,
+    // !!! FIXME:  except by another texbem or texbeml.
+
+    const DestArgInfo *dst = &ctx->dest_arg;
+    const SourceArgInfo *src = &ctx->source_args[0];
+    if (dst->regtype != REG_TYPE_TEXTURE)
+        failf(ctx, "%s destination must be a texture register", opcode);
+    if (src->regtype != REG_TYPE_TEXTURE)
+        failf(ctx, "%s source must be a texture register", opcode);
+    if (src->regnum >= dst->regnum)  // so says MSDN.
+        failf(ctx, "%s dest must be a higher register than source", opcode);
+
+    add_sampler(ctx, REG_TYPE_SAMPLER, dst->regnum, TEXTURE_TYPE_2D, 1);
+    add_attribute_register(ctx, REG_TYPE_TEXTURE, dst->regnum,
+                           MOJOSHADER_USAGE_TEXCOORD, dst->regnum, 0xF, 0);
+
+    // Strictly speaking, there should be a TEX opcode prior to this call that
+    //  should fill in this metadata, but I'm not sure that's required for the
+    //  shader to assemble in D3D, so we'll do this so we don't fail with a
+    //  cryptic error message even if the developer didn't do the TEX.
+    add_sampler(ctx, REG_TYPE_SAMPLER, src->regnum, TEXTURE_TYPE_2D, 0);
+    add_attribute_register(ctx, REG_TYPE_TEXTURE, src->regnum,
+                           MOJOSHADER_USAGE_TEXCOORD, src->regnum, 0xF, 0);
+} // state_texbem
+
+static void state_TEXBEM(Context *ctx)
+{
+    state_texbem(ctx, "TEXBEM");
+} // state_TEXBEM
+
+static void state_TEXBEML(Context *ctx)
+{
+    state_texbem(ctx, "TEXBEML");
+} // state_TEXBEML
+
 static void state_TEXLD(Context *ctx)
 {
     if (shader_version_atleast(ctx, 2, 0))
@@ -6885,7 +7046,7 @@
         const int sampler = info->regnum;
         if (info->regtype != REG_TYPE_TEXTURE)
             fail(ctx, "TEX param must be a texture register");
-        add_sampler(ctx, REG_TYPE_SAMPLER, sampler, TEXTURE_TYPE_2D);
+        add_sampler(ctx, REG_TYPE_SAMPLER, sampler, TEXTURE_TYPE_2D, 0);
         add_attribute_register(ctx, REG_TYPE_TEXTURE, sampler,
                                MOJOSHADER_USAGE_TEXCOORD, sampler, 0xF, 0);
     } // else
@@ -8053,6 +8214,7 @@
             retval[i].type = type;
             retval[i].index = item->regnum;
             retval[i].name = alloc_varname(ctx, item);
+            retval[i].texbem = (item->misc != 0) ? 1 : 0;
             item = item->next;
         } // for
     } // if
@@ -8469,7 +8631,8 @@
     {
         ctx->sampler_count++;
         ctx->profile->sampler_emitter(ctx, item->regnum,
-                                      (TextureType) item->index);
+                                      (TextureType) item->index,
+                                      item->misc != 0);
     } // for
 
     // ...and attributes...
--- a/mojoshader.h	Mon Apr 16 23:46:42 2012 -0400
+++ b/mojoshader.h	Tue Apr 17 00:07:33 2012 -0400
@@ -176,12 +176,18 @@
  *  before drawing with the shader.
  * (name) is a profile-specific variable name; it may be NULL if it isn't
  *  applicable to the requested profile.
+ * (texbem) will be non-zero if a TEXBEM opcode references this sampler. This
+ *  is only used in legacy shaders (ps_1_1 through ps_1_3), but it needs some
+ *  special support to work, as we have to load a magic uniform behind the
+ *  scenes to support it. Most code can ignore this field in general, and no
+ *  one has to touch it unless they really know what they're doing.
  */
 typedef struct MOJOSHADER_sampler
 {
     MOJOSHADER_samplerType type;
     int index;
     const char *name;
+    int texbem;
 } MOJOSHADER_sampler;
 
 /*
@@ -3014,6 +3020,45 @@
                                          unsigned int bcount);
 
 /*
+ * Set up the vector for the TEXBEM opcode. Most apps can ignore this API.
+ *
+ * Shader Model 1.1 through 1.3 had an instruction for "fake bump mapping"
+ *  called TEXBEM. To use it, you had to set some sampler states,
+ *  D3DTSS_BUMPENVMATxx, which would be referenced by the opcode.
+ *
+ * This functionality was removed from Shader Model 1.4 and later, because
+ *  it was special-purpose and limited. The functionality could be built on
+ *  more general opcodes, and the sampler state could be supplied in a more
+ *  general uniform.
+ *
+ * However, to support this opcode, we supply a way to specify that sampler
+ *  state, and the OpenGL glue code does the right thing to pass that
+ *  information to the shader.
+ *
+ * This call maps to IDirect3DDevice::SetTextureStageState() with the
+ *  D3DTSS_BUMPENVMAT00, D3DTSS_BUMPENVMAT01, D3DTSS_BUMPENVMAT10,
+ *  D3DTSS_BUMPENVMAT11, D3DTSS_BUMPENVLSCALE, and D3DTSS_BUMPENVLOFFSET
+ *  targets. This is only useful for Shader Model < 1.4 pixel shaders, if
+ *  they use the TEXBEM or TEXBEML opcode. If you aren't sure, you don't need
+ *  this function.
+ *
+ * Like the rest of your uniforms, you must call MOJOSHADER_glProgramReady()
+ *  between setting new values and drawing with them.
+ *
+ * This call is NOT thread safe! As most OpenGL implementations are not thread
+ *  safe, you should probably only call this from the same thread that created
+ *  the GL context.
+ *
+ * This call requires a valid MOJOSHADER_glContext to have been made current,
+ *  or it will crash your program. See MOJOSHADER_glMakeContextCurrent().
+ *
+ * These values are not shared between contexts.
+ */
+void MOJOSHADER_glSetLegacyBumpMapEnv(unsigned int sampler, float mat00,
+                                      float mat01, float mat10, float mat11,
+                                      float lscale, float loffset);
+
+/*
  * Connect a client-side array to the currently-bound program.
  *
  * (usage) and (index) map to Direct3D vertex declaration values: COLOR1 would
--- a/mojoshader_internal.h	Mon Apr 16 23:46:42 2012 -0400
+++ b/mojoshader_internal.h	Tue Apr 17 00:07:33 2012 -0400
@@ -655,8 +655,8 @@
 INSTRUCTION_STATE(TEXCRD, "TEXCRD", 1, TEXCRD, MOJOSHADER_TYPE_PIXEL)
 INSTRUCTION_STATE(TEXKILL, "TEXKILL", 2, D, MOJOSHADER_TYPE_PIXEL)
 INSTRUCTION_STATE(TEXLD, "TEXLD", 1, TEXLD, MOJOSHADER_TYPE_PIXEL)
-INSTRUCTION(TEXBEM, "TEXBEM", 1, DS, MOJOSHADER_TYPE_PIXEL)
-INSTRUCTION(TEXBEML, "TEXBEML", 2, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION_STATE(TEXBEM, "TEXBEM", 1, DS, MOJOSHADER_TYPE_PIXEL)
+INSTRUCTION_STATE(TEXBEML, "TEXBEML", 2, DS, MOJOSHADER_TYPE_PIXEL)
 INSTRUCTION(TEXREG2AR, "TEXREG2AR", 1, DS, MOJOSHADER_TYPE_PIXEL)
 INSTRUCTION(TEXREG2GB, "TEXREG2GB", 1, DS, MOJOSHADER_TYPE_PIXEL)
 INSTRUCTION(TEXM3X2PAD, "TEXM3X2PAD", 1, DS, MOJOSHADER_TYPE_PIXEL)
--- a/mojoshader_opengl.c	Mon Apr 16 23:46:42 2012 -0400
+++ b/mojoshader_opengl.c	Tue Apr 17 00:07:33 2012 -0400
@@ -81,6 +81,7 @@
     GLuint handle;
     uint32 generation;
     uint32 uniform_count;
+    uint32 texbem_count;
     UniformMap *uniforms;
     uint32 attribute_count;
     AttributeMap *attributes;
@@ -130,6 +131,7 @@
 #define MAX_REG_FILE_F 8192
 #define MAX_REG_FILE_I 2047
 #define MAX_REG_FILE_B 2047
+#define MAX_TEXBEMS 3  // ps_1_1 allows 4 texture stages, texbem can't use t0.
 
 struct MOJOSHADER_glContext
 {
@@ -148,6 +150,7 @@
     GLint ps_reg_file_i[MAX_REG_FILE_I * 4];
     uint8 ps_reg_file_b[MAX_REG_FILE_B];
     GLuint sampler_reg_file[16];
+    GLfloat texbem_state[MAX_TEXBEMS * 6];
 
     // This increments every time we change the register files.
     uint32 generation;
@@ -769,6 +772,7 @@
     const GLint *srci = program->vs_uniforms_int4;
     const GLint *srcb = program->vs_uniforms_bool;
     GLint loc = 0;
+    GLint texbem_loc = 0;
     uint32 i;
 
     assert(count > 0);  // shouldn't call this with nothing to do!
@@ -786,6 +790,9 @@
         // Did we switch from vertex to pixel (to geometry, etc)?
         if (shader_type != uniform_shader_type)
         {
+            if (shader_type == MOJOSHADER_TYPE_PIXEL)
+                texbem_loc = loc;
+
             // we start with vertex, move to pixel, then to geometry, etc.
             //  The array should always be sorted as such.
             if (uniform_shader_type == MOJOSHADER_TYPE_PIXEL)
@@ -857,6 +864,20 @@
             } // else
         } // else if
     } // for
+
+    if (program->texbem_count)
+    {
+        const GLenum target = GL_FRAGMENT_PROGRAM_ARB;
+        GLfloat *srcf = program->ps_uniforms_float4;
+        srcf += (program->ps_uniforms_float4_count * 4) -
+                (program->texbem_count * 8);
+        loc = texbem_loc;
+        for (i = 0; i < program->texbem_count; i++, srcf += 8)
+        {
+            ctx->glProgramLocalParameter4fvARB(target, loc++, srcf);
+            ctx->glProgramLocalParameter4fvARB(target, loc++, srcf + 4);
+        } // for
+    } // if
 } // impl_ARB1_PushUniforms
 
 static void impl_ARB1_PushSampler(GLint loc, GLuint sampler)
@@ -1539,6 +1560,18 @@
         } // else
     } // for
 
+    if (shader_type == MOJOSHADER_TYPE_PIXEL)
+    {
+        for (i = 0; i < pd->sampler_count; i++)
+        {
+            if (pd->samplers[i].texbem)
+            {
+                float4_count += 2;
+                program->texbem_count++;
+            } // if
+        } // for
+    } // if
+
     #define MAKE_ARRAY(typ, gltyp, siz, count) \
         if (count) { \
             const size_t buflen = sizeof (gltyp) * siz * count; \
@@ -2270,6 +2303,24 @@
 } // MOJOSHADER_glGetPixelPreshaderUniformF
 
 
+void MOJOSHADER_glSetLegacyBumpMapEnv(unsigned int sampler, float mat00,
+                                      float mat01, float mat10, float mat11,
+                                      float lscale, float loffset)
+{
+    if ((sampler == 0) || (sampler > (MAX_TEXBEMS+1)))
+        return;
+
+    GLfloat *dstf = ctx->texbem_state + (6 * (sampler-1));
+    *(dstf++) = (GLfloat) mat00;
+    *(dstf++) = (GLfloat) mat01;
+    *(dstf++) = (GLfloat) mat10;
+    *(dstf++) = (GLfloat) mat11;
+    *(dstf++) = (GLfloat) lscale;
+    *(dstf++) = (GLfloat) loffset;
+    ctx->generation++;
+} // MOJOSHADER_glSetLegacyBumpMapEnv
+
+
 void MOJOSHADER_glProgramReady(void)
 {
     MOJOSHADER_glProgram *program = ctx->bound_program;
@@ -2290,7 +2341,8 @@
     } // if
 
     // push Uniforms to the program from our register files...
-    if ((program->uniform_count) && (program->generation != ctx->generation))
+    if ( ((program->uniform_count) || (program->texbem_count)) &&
+         (program->generation != ctx->generation))
     {
         // vertex shader uniforms come first in program->uniforms array.
         const uint32 count = program->uniform_count;
@@ -2304,6 +2356,7 @@
         const MOJOSHADER_preshader *preshader = NULL;
         uint32 i;
 
+        // !!! FIXME: shouldn't this run even if the generation hasn't changed?
         #if SUPPORT_PRESHADERS
         int ran_preshader = 0;
         if (program->vertex)
@@ -2394,6 +2447,36 @@
             // !!! FIXME: set constants that overlap the array.
         } // for
 
+        if (program->texbem_count)
+        {
+            const MOJOSHADER_parseData *pd = program->fragment->parseData;
+            const int samp_count = pd->sampler_count;
+            const MOJOSHADER_sampler *samps = pd->samplers;
+            GLfloat *dstf = program->ps_uniforms_float4;
+            int texbem_count = 0;
+
+            dstf += (program->ps_uniforms_float4_count * 4) -
+                     (program->texbem_count * 8);
+
+            assert(program->texbem_count <= MAX_TEXBEMS);
+            for (i = 0; i < samp_count; i++)
+            {
+                if (samps[i].texbem)
+                {
+                    assert(samps[i].index > 0);
+                    assert(samps[i].index <= MAX_TEXBEMS);
+                    memcpy(dstf, &ctx->texbem_state[6 * (samps[i].index-1)],
+                           sizeof (GLfloat) * 6);
+                    dstf[6] = 0.0f;
+                    dstf[7] = 0.0f;
+                    dstf += 8;
+                    texbem_count++;
+                } // if
+            } // for
+
+            assert(texbem_count == program->texbem_count);
+        } // for
+
         program->generation = ctx->generation;
 
         ctx->profilePushUniforms();
--- a/utils/testparse.c	Mon Apr 16 23:46:42 2012 -0400
+++ b/utils/testparse.c	Tue Apr 17 00:07:33 2012 -0400
@@ -370,6 +370,8 @@
                 printf("    * %d: %s", s->index, typenames[(int) s->type]);
                 if (s->name != NULL)
                     printf(" (\"%s\")", s->name);
+                if (s->texbem)
+                    printf(" [TEXBEM]");
                 printf("\n");
             } // for
         } // else