3671 |
3671 |
3672 static const char *make_ARB1_srcarg_string_in_buf(Context *ctx, |
3672 static const char *make_ARB1_srcarg_string_in_buf(Context *ctx, |
3673 const SourceArgInfo *arg, |
3673 const SourceArgInfo *arg, |
3674 char *buf, size_t buflen) |
3674 char *buf, size_t buflen) |
3675 { |
3675 { |
|
3676 // !!! FIXME: this can hit pathological cases where we look like this... |
|
3677 // |
|
3678 // dp3 r1.xyz, t0_bx2, t0_bx2 |
|
3679 // mad r1.xyz, t0_bias, 1-r1, t0_bx2 |
|
3680 // |
|
3681 // ...which do a lot of duplicate work in arb1... |
|
3682 // |
|
3683 // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 }; |
|
3684 // MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 }; |
|
3685 // SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 }; |
|
3686 // MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 }; |
|
3687 // DP3 r1.xyz, scratch0, scratch1; |
|
3688 // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 }; |
|
3689 // SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1; |
|
3690 // SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 }; |
|
3691 // MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 }; |
|
3692 // MAD r1.xyz, scratch0, scratch1, scratch2; |
|
3693 // |
|
3694 // ...notice that the dp3 calculates the same value into two scratch |
|
3695 // registers. This case is easier to handle; just see if multiple |
|
3696 // source args are identical, build it up once, and use the same |
|
3697 // scratch register for multiple arguments in that opcode. |
|
3698 // Even better still, only calculate things once across instructions, |
|
3699 // and be smart about letting it linger in a scratch register until we |
|
3700 // definitely don't need the calculation anymore. That's harder to |
|
3701 // write, though. |
|
3702 |
3676 char regnum_str[16] = { '\0' }; |
3703 char regnum_str[16] = { '\0' }; |
3677 |
3704 |
3678 // !!! FIXME: use get_ARB1_varname_in_buf() instead? |
3705 // !!! FIXME: use get_ARB1_varname_in_buf() instead? |
3679 const char *regtype_str = NULL; |
3706 const char *regtype_str = NULL; |
3680 if (!arg->relative) |
3707 if (!arg->relative) |
3741 rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset, |
3768 rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset, |
3742 rel_rbracket); |
3769 rel_rbracket); |
3743 |
3770 |
3744 // Some of the source mods need to generate instructions to a temp |
3771 // Some of the source mods need to generate instructions to a temp |
3745 // register, in which case we'll replace the register name. |
3772 // register, in which case we'll replace the register name. |
|
3773 const SourceMod mod = arg->src_mod; |
|
3774 const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) || |
|
3775 ((mod == SRCMOD_ABS) && support_nv2(ctx)) ); |
|
3776 |
|
3777 if (!inplace) |
|
3778 { |
|
3779 const size_t len = 64; |
|
3780 char *stackbuf = (char *) alloca(len); |
|
3781 regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len); |
|
3782 regnum_str[0] = '\0'; // move value to scratch register. |
|
3783 rel_lbracket = ""; // scratch register won't use array. |
|
3784 rel_rbracket = ""; |
|
3785 rel_offset[0] = '\0'; |
|
3786 rel_swizzle[0] = '\0'; |
|
3787 rel_regtype_str = ""; |
|
3788 } // if |
3746 |
3789 |
3747 const char *premod_str = ""; |
3790 const char *premod_str = ""; |
3748 const char *postmod_str = ""; |
3791 const char *postmod_str = ""; |
3749 switch (arg->src_mod) |
3792 switch (mod) |
3750 { |
3793 { |
3751 case SRCMOD_NEGATE: |
3794 case SRCMOD_NEGATE: |
3752 premod_str = "-"; |
3795 premod_str = "-"; |
3753 break; |
3796 break; |
3754 |
3797 |
3755 case SRCMOD_BIASNEGATE: |
3798 case SRCMOD_BIASNEGATE: |
3756 premod_str = "-"; |
3799 premod_str = "-"; |
3757 // fall through. |
3800 // fall through. |
3758 case SRCMOD_BIAS: |
3801 case SRCMOD_BIAS: |
3759 fail(ctx, "SRCMOD_BIAS currently unsupported in arb1"); |
3802 output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", |
3760 postmod_str = "_bias"; |
3803 regtype_str, buf); |
3761 break; |
3804 break; |
3762 |
3805 |
3763 case SRCMOD_SIGNNEGATE: |
3806 case SRCMOD_SIGNNEGATE: |
3764 premod_str = "-"; |
3807 premod_str = "-"; |
3765 // fall through. |
3808 // fall through. |
3766 case SRCMOD_SIGN: |
3809 case SRCMOD_SIGN: |
3767 fail(ctx, "SRCMOD_SIGN currently unsupported in arb1"); |
3810 output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", |
3768 postmod_str = "_bx2"; |
3811 regtype_str, buf); |
|
3812 output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", |
|
3813 regtype_str, regtype_str); |
3769 break; |
3814 break; |
3770 |
3815 |
3771 case SRCMOD_COMPLEMENT: |
3816 case SRCMOD_COMPLEMENT: |
3772 fail(ctx, "SRCMOD_COMPLEMENT currently unsupported in arb1"); |
3817 output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;", |
3773 premod_str = "1-"; |
3818 regtype_str, buf); |
3774 break; |
3819 break; |
3775 |
3820 |
3776 case SRCMOD_X2NEGATE: |
3821 case SRCMOD_X2NEGATE: |
3777 premod_str = "-"; |
3822 premod_str = "-"; |
3778 // fall through. |
3823 // fall through. |
3779 case SRCMOD_X2: |
3824 case SRCMOD_X2: |
3780 fail(ctx, "SRCMOD_X2 currently unsupported in arb1"); |
3825 output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", |
3781 postmod_str = "_x2"; |
3826 regtype_str, buf); |
3782 break; |
3827 break; |
3783 |
3828 |
3784 case SRCMOD_DZ: |
3829 case SRCMOD_DZ: |
3785 fail(ctx, "SRCMOD_DZ currently unsupported in arb1"); |
3830 fail(ctx, "SRCMOD_DZ currently unsupported in arb1"); |
3786 postmod_str = "_dz"; |
3831 postmod_str = "_dz"; |
3793 |
3838 |
3794 case SRCMOD_ABSNEGATE: |
3839 case SRCMOD_ABSNEGATE: |
3795 premod_str = "-"; |
3840 premod_str = "-"; |
3796 // fall through. |
3841 // fall through. |
3797 case SRCMOD_ABS: |
3842 case SRCMOD_ABS: |
3798 if (support_nv2(ctx)) // GL_NV_vertex_program2_option adds this. |
3843 if (!support_nv2(ctx)) // GL_NV_vertex_program2_option adds this. |
3799 { |
3844 output_line(ctx, "ABS %s, %s;", regtype_str, buf); |
3800 premod_str = (arg->src_mod == SRCMOD_ABSNEGATE) ? "-|" : "|"; |
|
3801 postmod_str = "|"; |
|
3802 } // if |
|
3803 else |
3845 else |
3804 { |
3846 { |
3805 regtype_str = allocate_ARB1_scratch_reg_name(ctx, |
3847 premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|"; |
3806 (char *) alloca(64), 64); |
3848 postmod_str = "|"; |
3807 regnum_str[0] = '\0'; // move value to scratch register. |
|
3808 rel_lbracket = ""; // scratch register won't use array. |
|
3809 rel_rbracket = ""; |
|
3810 rel_offset[0] = '\0'; |
|
3811 rel_swizzle[0] = '\0'; |
|
3812 rel_regtype_str = ""; |
|
3813 output_line(ctx, "ABS %s, %s;", regtype_str, buf); |
|
3814 } // else |
3849 } // else |
3815 break; |
3850 break; |
3816 |
3851 |
3817 case SRCMOD_NOT: |
3852 case SRCMOD_NOT: |
3818 fail(ctx, "SRCMOD_NOT currently unsupported in arb1"); |
3853 fail(ctx, "SRCMOD_NOT currently unsupported in arb1"); |