mojoshader.c
changeset 1097 cc3b4d123312
parent 1095 bc3d2c6e06cf
child 1098 ef8be3b15633
equal deleted inserted replaced
1096:b04fca1befb8 1097:cc3b4d123312
  3671 
  3671 
  3672 static const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
  3672 static const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
  3673                                                   const SourceArgInfo *arg,
  3673                                                   const SourceArgInfo *arg,
  3674                                                   char *buf, size_t buflen)
  3674                                                   char *buf, size_t buflen)
  3675 {
  3675 {
       
  3676     // !!! FIXME: this can hit pathological cases where we look like this...
       
  3677     //
       
  3678     //    dp3 r1.xyz, t0_bx2, t0_bx2
       
  3679     //    mad r1.xyz, t0_bias, 1-r1, t0_bx2
       
  3680     //
       
  3681     // ...which do a lot of duplicate work in arb1...
       
  3682     //
       
  3683     //    SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
       
  3684     //    MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 };
       
  3685     //    SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 };
       
  3686     //    MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 };
       
  3687     //    DP3 r1.xyz, scratch0, scratch1;
       
  3688     //    SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
       
  3689     //    SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1;
       
  3690     //    SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 };
       
  3691     //    MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 };
       
  3692     //    MAD r1.xyz, scratch0, scratch1, scratch2;
       
  3693     //
       
  3694     // ...notice that the dp3 calculates the same value into two scratch
       
  3695     //  registers. This case is easier to handle; just see if multiple
       
  3696     //  source args are identical, build it up once, and use the same
       
  3697     //  scratch register for multiple arguments in that opcode.
       
  3698     //  Even better still, only calculate things once across instructions,
       
  3699     //  and be smart about letting it linger in a scratch register until we
       
  3700     //  definitely don't need the calculation anymore. That's harder to
       
  3701     //  write, though.
       
  3702 
  3676     char regnum_str[16] = { '\0' };
  3703     char regnum_str[16] = { '\0' };
  3677 
  3704 
  3678     // !!! FIXME: use get_ARB1_varname_in_buf() instead?
  3705     // !!! FIXME: use get_ARB1_varname_in_buf() instead?
  3679     const char *regtype_str = NULL;
  3706     const char *regtype_str = NULL;
  3680     if (!arg->relative)
  3707     if (!arg->relative)
  3741              rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
  3768              rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
  3742              rel_rbracket);
  3769              rel_rbracket);
  3743 
  3770 
  3744     // Some of the source mods need to generate instructions to a temp
  3771     // Some of the source mods need to generate instructions to a temp
  3745     //  register, in which case we'll replace the register name.
  3772     //  register, in which case we'll replace the register name.
       
  3773     const SourceMod mod = arg->src_mod;
       
  3774     const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) ||
       
  3775                           ((mod == SRCMOD_ABS) && support_nv2(ctx)) );
       
  3776 
       
  3777     if (!inplace)
       
  3778     {
       
  3779         const size_t len = 64;
       
  3780         char *stackbuf = (char *) alloca(len);
       
  3781         regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len);
       
  3782         regnum_str[0] = '\0'; // move value to scratch register.
       
  3783         rel_lbracket = "";   // scratch register won't use array.
       
  3784         rel_rbracket = "";
       
  3785         rel_offset[0] = '\0';
       
  3786         rel_swizzle[0] = '\0';
       
  3787         rel_regtype_str = "";
       
  3788     } // if
  3746 
  3789 
  3747     const char *premod_str = "";
  3790     const char *premod_str = "";
  3748     const char *postmod_str = "";
  3791     const char *postmod_str = "";
  3749     switch (arg->src_mod)
  3792     switch (mod)
  3750     {
  3793     {
  3751         case SRCMOD_NEGATE:
  3794         case SRCMOD_NEGATE:
  3752             premod_str = "-";
  3795             premod_str = "-";
  3753             break;
  3796             break;
  3754 
  3797 
  3755         case SRCMOD_BIASNEGATE:
  3798         case SRCMOD_BIASNEGATE:
  3756             premod_str = "-";
  3799             premod_str = "-";
  3757             // fall through.
  3800             // fall through.
  3758         case SRCMOD_BIAS:
  3801         case SRCMOD_BIAS:
  3759             fail(ctx, "SRCMOD_BIAS currently unsupported in arb1");
  3802             output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };",
  3760             postmod_str = "_bias";
  3803                         regtype_str, buf);
  3761             break;
  3804             break;
  3762 
  3805 
  3763         case SRCMOD_SIGNNEGATE:
  3806         case SRCMOD_SIGNNEGATE:
  3764             premod_str = "-";
  3807             premod_str = "-";
  3765             // fall through.
  3808             // fall through.
  3766         case SRCMOD_SIGN:
  3809         case SRCMOD_SIGN:
  3767             fail(ctx, "SRCMOD_SIGN currently unsupported in arb1");
  3810             output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };",
  3768             postmod_str = "_bx2";
  3811                         regtype_str, buf);
       
  3812             output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };",
       
  3813                         regtype_str, regtype_str);
  3769             break;
  3814             break;
  3770 
  3815 
  3771         case SRCMOD_COMPLEMENT:
  3816         case SRCMOD_COMPLEMENT:
  3772             fail(ctx, "SRCMOD_COMPLEMENT currently unsupported in arb1");
  3817             output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;",
  3773             premod_str = "1-";
  3818                         regtype_str, buf);
  3774             break;
  3819             break;
  3775 
  3820 
  3776         case SRCMOD_X2NEGATE:
  3821         case SRCMOD_X2NEGATE:
  3777             premod_str = "-";
  3822             premod_str = "-";
  3778             // fall through.
  3823             // fall through.
  3779         case SRCMOD_X2:
  3824         case SRCMOD_X2:
  3780             fail(ctx, "SRCMOD_X2 currently unsupported in arb1");
  3825             output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };",
  3781             postmod_str = "_x2";
  3826                         regtype_str, buf);
  3782             break;
  3827             break;
  3783 
  3828 
  3784         case SRCMOD_DZ:
  3829         case SRCMOD_DZ:
  3785             fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
  3830             fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
  3786             postmod_str = "_dz";
  3831             postmod_str = "_dz";
  3793 
  3838 
  3794         case SRCMOD_ABSNEGATE:
  3839         case SRCMOD_ABSNEGATE:
  3795             premod_str = "-";
  3840             premod_str = "-";
  3796             // fall through.
  3841             // fall through.
  3797         case SRCMOD_ABS:
  3842         case SRCMOD_ABS:
  3798             if (support_nv2(ctx))  // GL_NV_vertex_program2_option adds this.
  3843             if (!support_nv2(ctx))  // GL_NV_vertex_program2_option adds this.
  3799             {
  3844                 output_line(ctx, "ABS %s, %s;", regtype_str, buf);
  3800                 premod_str = (arg->src_mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
       
  3801                 postmod_str = "|";
       
  3802             } // if
       
  3803             else
  3845             else
  3804             {
  3846             {
  3805                 regtype_str = allocate_ARB1_scratch_reg_name(ctx,
  3847                 premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
  3806                                                     (char *) alloca(64), 64);
  3848                 postmod_str = "|";
  3807                 regnum_str[0] = '\0'; // move value to scratch register.
       
  3808                 rel_lbracket = "";   // scratch register won't use array.
       
  3809                 rel_rbracket = "";
       
  3810                 rel_offset[0] = '\0';
       
  3811                 rel_swizzle[0] = '\0';
       
  3812                 rel_regtype_str = "";
       
  3813                 output_line(ctx, "ABS %s, %s;", regtype_str, buf);
       
  3814             } // else
  3849             } // else
  3815             break;
  3850             break;
  3816 
  3851 
  3817         case SRCMOD_NOT:
  3852         case SRCMOD_NOT:
  3818             fail(ctx, "SRCMOD_NOT currently unsupported in arb1");
  3853             fail(ctx, "SRCMOD_NOT currently unsupported in arb1");