More arb1 profile work. trunk
authorRyan C. Gordon <icculus@icculus.org>
Sun, 25 May 2008 19:59:41 -0400
branchtrunk
changeset 326 348ca33afeb0
parent 325 f8f3c27644b9
child 327 394d89b5acce
More arb1 profile work.
mojoshader.c
--- a/mojoshader.c	Sun May 25 10:21:09 2008 -0400
+++ b/mojoshader.c	Sun May 25 19:59:41 2008 -0400
@@ -310,6 +310,10 @@
 // !!! FIXME: get rid of this. use a bitfield instead.
 typedef enum
 {
+    // Specific to ARB1 profile...
+    CTX_FLAGS_ARB1_USES_SCRATCH1 = (1 << 0),
+    CTX_FLAGS_ARB1_USES_SCRATCH2 = (1 << 1),
+
     // Specific to GLSL profile...
     CTX_FLAGS_GLSL_LIT_OPCODE = (1 << 0),
     CTX_FLAGS_MASK = 0xFFFFFFFF
@@ -3685,72 +3689,29 @@
 EMIT_ARB1_OPCODE_DSS_FUNC(DP4)
 EMIT_ARB1_OPCODE_DSS_FUNC(MIN)
 EMIT_ARB1_OPCODE_DSS_FUNC(MAX)
-
-static void emit_ARB1_SLT(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_SLT
-
-static void emit_ARB1_SGE(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_SGE
-
-static void emit_ARB1_EXP(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_EXP
-
-static void emit_ARB1_LOG(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_LOG
-
-static void emit_ARB1_LIT(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_LIT
-
-static void emit_ARB1_DST(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_DST
-
-static void emit_ARB1_LRP(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_LRP
-
-static void emit_ARB1_FRC(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_FRC
-
-static void emit_ARB1_M4X4(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_M4X4
-
-static void emit_ARB1_M4X3(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_M4X3
-
-static void emit_ARB1_M3X4(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_M3X4
-
-static void emit_ARB1_M3X3(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_M3X3
-
-static void emit_ARB1_M3X2(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_M3X2
-
+EMIT_ARB1_OPCODE_DSS_FUNC(SLT)
+EMIT_ARB1_OPCODE_DSS_FUNC(SGE)
+
+static void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
+
+// !!! FIXME: LG2 needs to abs() the value, and deal with zero values.
+EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOG)
+
+EMIT_ARB1_OPCODE_DS_FUNC(LIT)
+EMIT_ARB1_OPCODE_DSS_FUNC(DST)
+EMIT_ARB1_OPCODE_DSSS_FUNC(LRP)
+EMIT_ARB1_OPCODE_DS_FUNC(FRC)
+
+// !!! FIXME: these could be implemented with vector opcodes, but it looks
+// !!! FIXME:  like the Microsoft HLSL compiler never generates matrix
+// !!! FIXME:  operations for some reason.
+EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M4X4)
+EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M4X3)
+EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M3X4)
+EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M3X3)
+EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M3X2)
+
+// !!! FIXME: these are available in nvidia's post-arb1 extensions
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(CALL)
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(CALLNZ)
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP)
@@ -3758,34 +3719,54 @@
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP)
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LABEL)
 
-static void emit_ARB1_POW(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_POW
-
-static void emit_ARB1_CRS(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_CRS
+// !!! FIXME: POW needs to abs() the value, and deal with zero values.
+EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(POW)
+
+static void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); }
 
 static void emit_ARB1_SGN(Context *ctx)
 {
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
+    const char *dst0 = make_ARB1_destarg_string(ctx);
+    const char *src0 = make_ARB1_srcarg_string(ctx, 0);
+    output_line(ctx, "SLT scratch, %s, { 0.0 }", src0);
+    output_line(ctx, "SLT scratch2, -%s, { 0.0 }", src0);
+    output_line(ctx, "ADD%s -scratch, scratch2", dst0);
+    ctx->flags = (ContextFlags) (ctx->flags | CTX_FLAGS_ARB1_USES_SCRATCH1);
+    ctx->flags = (ContextFlags) (ctx->flags | CTX_FLAGS_ARB1_USES_SCRATCH2);
 } // emit_ARB1_SGN
 
-static void emit_ARB1_ABS(Context *ctx)
-{
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
-} // emit_ARB1_ABS
+EMIT_ARB1_OPCODE_DS_FUNC(ABS)
 
 static void emit_ARB1_NRM(Context *ctx)
 {
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
+    const char *dst0 = make_ARB1_destarg_string(ctx);
+    const char *src0 = make_ARB1_srcarg_string(ctx, 0);
+    output_line(ctx, "MUL scratch, %s, %s", src0, src0);
+    output_line(ctx, "ADD scratch2.x, scratch.x, scratch.y");
+    output_line(ctx, "ADD scratch2.x, scratch2.x, scratch.z");
+    output_line(ctx, "RSQ%s, scratch2.x", dst0);
+    ctx->flags = (ContextFlags) (ctx->flags | CTX_FLAGS_ARB1_USES_SCRATCH1);
+    ctx->flags = (ContextFlags) (ctx->flags | CTX_FLAGS_ARB1_USES_SCRATCH2);
 } // emit_ARB1_NRM
 
 static void emit_ARB1_SINCOS(Context *ctx)
 {
-    failf(ctx, "%s unimplemented in arb1 profile", __FUNCTION__);
+    // we don't care about the temp registers that <= sm2 demands; ignore them.
+    //  sm2 also talks about what components are left untouched vs. undefined,
+    //  but we just leave those all untouched with GLSL write masks (which
+    //  would fulfill the "undefined" requirement, too).
+    const int mask = ctx->dest_arg.writemask;
+    const char *dst0 = make_ARB1_destarg_string(ctx);
+    const char *src0 = make_ARB1_srcarg_string(ctx, 0);
+
+    if (writemask_x(mask))
+        output_line(ctx, "COS%s, %s", dst0, src0);
+    else if (writemask_y(mask))
+        output_line(ctx, "SIN%s, %s", dst0, src0);
+    else if (writemask_xy(mask))
+        output_line(ctx, "SCS%s, %s", dst0, src0);
+    else
+        fail(ctx, "unhandled SINCOS writemask in arb1 profile");
 } // emit_ARB1_SINCOS
 
 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(REP)
@@ -5000,6 +4981,14 @@
     } // if
 } // state_CND
 
+static void state_POW(Context *ctx)
+{
+    if (!replicate_swizzle(ctx->source_args[0].swizzle))
+        fail(ctx, "POW src0 must have replicate swizzle");
+    else if (!replicate_swizzle(ctx->source_args[1].swizzle))
+        fail(ctx, "POW src1 must have replicate swizzle");
+} // state_POW
+
 static void state_SINCOS(Context *ctx)
 {
     const DestArgInfo *dst = &ctx->dest_arg;
@@ -5186,7 +5175,7 @@
     INSTRUCTION_STATE(ENDLOOP, NULL, MOJOSHADER_TYPE_ANY),
     INSTRUCTION_STATE(LABEL, S, MOJOSHADER_TYPE_ANY),
     INSTRUCTION_STATE(DCL, DCL, MOJOSHADER_TYPE_ANY),
-    INSTRUCTION(POW, DSS, MOJOSHADER_TYPE_ANY),
+    INSTRUCTION_STATE(POW, DSS, MOJOSHADER_TYPE_ANY),
     INSTRUCTION(CRS, DSS, MOJOSHADER_TYPE_ANY),
     INSTRUCTION(SGN, DSSS, MOJOSHADER_TYPE_ANY),
     INSTRUCTION(ABS, DS, MOJOSHADER_TYPE_ANY),