From 8d9f7ead2e1c158c01e9cd6c326a09751f693ee3 Mon Sep 17 00:00:00 2001 From: Caleb Cornett Date: Tue, 23 Apr 2019 14:43:10 -0400 Subject: [PATCH] Reorganize profiles into their own files --- CMakeLists.txt | 6 + mojoshader.c | 8614 +----------------------- profiles/mojoshader_profile.h | 365 + profiles/mojoshader_profile_arb1.c | 2252 +++++++ profiles/mojoshader_profile_bytecode.c | 152 + profiles/mojoshader_profile_common.c | 504 ++ profiles/mojoshader_profile_d3d.c | 686 ++ profiles/mojoshader_profile_glsl.c | 2307 +++++++ profiles/mojoshader_profile_metal.c | 2305 +++++++ 9 files changed, 8735 insertions(+), 8456 deletions(-) create mode 100644 profiles/mojoshader_profile.h create mode 100644 profiles/mojoshader_profile_arb1.c create mode 100644 profiles/mojoshader_profile_bytecode.c create mode 100644 profiles/mojoshader_profile_common.c create mode 100644 profiles/mojoshader_profile_d3d.c create mode 100644 profiles/mojoshader_profile_glsl.c create mode 100644 profiles/mojoshader_profile_metal.c diff --git a/CMakeLists.txt b/CMakeLists.txt index eb27199d..704ae9ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,6 +154,12 @@ ADD_LIBRARY(mojoshader ${LIBRARY_FORMAT} mojoshader.c mojoshader_common.c mojoshader_opengl.c + profiles/mojoshader_profile_arb1.c + profiles/mojoshader_profile_bytecode.c + profiles/mojoshader_profile_d3d.c + profiles/mojoshader_profile_glsl.c + profiles/mojoshader_profile_metal.c + profiles/mojoshader_profile_common.c ) IF(EFFECT_SUPPORT) TARGET_SOURCES(mojoshader PRIVATE diff --git a/mojoshader.c b/mojoshader.c index a5064f2e..c3e60cbb 100644 --- a/mojoshader.c +++ b/mojoshader.c @@ -16,502 +16,7 @@ // - A maximum of three temp registers can be used in a single instruction. #define __MOJOSHADER_INTERNAL__ 1 -#include "mojoshader_internal.h" - -typedef struct ConstantsList -{ - MOJOSHADER_constant constant; - struct ConstantsList *next; -} ConstantsList; - -typedef struct VariableList -{ - MOJOSHADER_uniformType type; - int index; - int count; - ConstantsList *constant; - int used; - int emit_position; // used in some profiles. - struct VariableList *next; -} VariableList; - -typedef struct RegisterList -{ - RegisterType regtype; - int regnum; - MOJOSHADER_usage usage; - unsigned int index; - int writemask; - int misc; - int written; - const VariableList *array; - struct RegisterList *next; -} RegisterList; - -typedef struct -{ - const uint32 *token; // this is the unmolested token in the stream. - int regnum; - int swizzle; // xyzw (all four, not split out). - int swizzle_x; - int swizzle_y; - int swizzle_z; - int swizzle_w; - SourceMod src_mod; - RegisterType regtype; - int relative; - RegisterType relative_regtype; - int relative_regnum; - int relative_component; - const VariableList *relative_array; -} SourceArgInfo; - -struct Profile; // predeclare. - -typedef struct CtabData -{ - int have_ctab; - int symbol_count; - MOJOSHADER_symbol *symbols; -} CtabData; - -// Context...this is state that changes as we parse through a shader... -typedef struct Context -{ - int isfail; - int out_of_memory; - MOJOSHADER_malloc malloc; - MOJOSHADER_free free; - void *malloc_data; - int current_position; - const uint32 *orig_tokens; - const uint32 *tokens; - uint32 tokencount; - int know_shader_size; - const MOJOSHADER_swizzle *swizzles; - unsigned int swizzles_count; - const MOJOSHADER_samplerMap *samplermap; - unsigned int samplermap_count; - Buffer *output; - Buffer *preflight; - Buffer *globals; - Buffer *inputs; - Buffer *outputs; - Buffer *helpers; - Buffer *subroutines; - Buffer *mainline_intro; - Buffer *mainline_arguments; - Buffer *mainline_top; - Buffer *mainline; - Buffer *postflight; - Buffer *ignore; - Buffer *output_stack[3]; - int indent_stack[3]; - int output_stack_len; - int indent; - const char *shader_type_str; - const char *endline; - const char *mainfn; - int endline_len; - int profileid; - const struct Profile *profile; - MOJOSHADER_shaderType shader_type; - uint8 major_ver; - uint8 minor_ver; - DestArgInfo dest_arg; - SourceArgInfo source_args[5]; - SourceArgInfo predicate_arg; // for predicated instructions. - uint32 dwords[4]; - uint32 version_token; - int instruction_count; - uint32 instruction_controls; - uint32 previous_opcode; - int coissue; - int loops; - int reps; - int max_reps; - int cmps; - int scratch_registers; - int max_scratch_registers; - int branch_labels_stack_index; - int branch_labels_stack[32]; - int assigned_branch_labels; - int assigned_vertex_attributes; - int last_address_reg_component; - RegisterList used_registers; - RegisterList defined_registers; - ErrorList *errors; - int constant_count; - ConstantsList *constants; - int uniform_count; - int uniform_float4_count; - int uniform_int4_count; - int uniform_bool_count; - RegisterList uniforms; - int attribute_count; - RegisterList attributes; - int sampler_count; - RegisterList samplers; - VariableList *variables; // variables to register mapping. - int centroid_allowed; - CtabData ctab; - int have_relative_input_registers; - int have_multi_color_outputs; - int determined_constants_arrays; - int predicated; - int uses_pointsize; - int uses_fog; - - // !!! FIXME: move these into SUPPORT_PROFILE sections. - int glsl_generated_lit_helper; - int glsl_generated_texldd_setup; - int glsl_generated_texm3x3spec_helper; - int arb1_wrote_position; - // !!! FIXME: move these into SUPPORT_PROFILE sections. - - int have_preshader; - int ignores_ctab; - int reset_texmpad; - int texm3x2pad_dst0; - int texm3x2pad_src0; - int texm3x3pad_dst0; - int texm3x3pad_src0; - int texm3x3pad_dst1; - int texm3x3pad_src1; - MOJOSHADER_preshader *preshader; - -#if SUPPORT_PROFILE_ARB1_NV - int profile_supports_nv2; - int profile_supports_nv3; - int profile_supports_nv4; -#endif -#if SUPPORT_PROFILE_GLSL120 - int profile_supports_glsl120; -#endif -#if SUPPORT_PROFILE_GLSLES - int profile_supports_glsles; -#endif - -#if SUPPORT_PROFILE_METAL - int metal_need_header_common; - int metal_need_header_math; - int metal_need_header_relational; - int metal_need_header_geometric; - int metal_need_header_graphics; - int metal_need_header_texture; -#endif -} Context; - - -// Use these macros so we can remove all bits of these profiles from the build. -#if SUPPORT_PROFILE_ARB1_NV -#define support_nv2(ctx) ((ctx)->profile_supports_nv2) -#define support_nv3(ctx) ((ctx)->profile_supports_nv3) -#define support_nv4(ctx) ((ctx)->profile_supports_nv4) -#else -#define support_nv2(ctx) (0) -#define support_nv3(ctx) (0) -#define support_nv4(ctx) (0) -#endif - -#if SUPPORT_PROFILE_GLSL120 -#define support_glsl120(ctx) ((ctx)->profile_supports_glsl120) -#else -#define support_glsl120(ctx) (0) -#endif - -#if SUPPORT_PROFILE_GLSLES -#define support_glsles(ctx) ((ctx)->profile_supports_glsles) -#else -#define support_glsles(ctx) (0) -#endif - - -// Profile entry points... - -// one emit function for each opcode in each profile. -typedef void (*emit_function)(Context *ctx); - -// one emit function for starting output in each profile. -typedef void (*emit_start)(Context *ctx, const char *profilestr); - -// one emit function for ending output in each profile. -typedef void (*emit_end)(Context *ctx); - -// one emit function for phase opcode output in each profile. -typedef void (*emit_phase)(Context *ctx); - -// one emit function for finalizing output in each profile. -typedef void (*emit_finalize)(Context *ctx); - -// one emit function for global definitions in each profile. -typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum); - -// one emit function for relative uniform arrays in each profile. -typedef void (*emit_array)(Context *ctx, VariableList *var); - -// one emit function for relative constants arrays in each profile. -typedef void (*emit_const_array)(Context *ctx, - const struct ConstantsList *constslist, - int base, int size); - -// one emit function for uniforms in each profile. -typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum, - const VariableList *var); - -// one emit function for samplers in each profile. -typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype, - int texbem); - -// one emit function for attributes in each profile. -typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum, - MOJOSHADER_usage usage, int index, int wmask, - int flags); - -// one args function for each possible sequence of opcode arguments. -typedef int (*args_function)(Context *ctx); - -// one state function for each opcode where we have state machine updates. -typedef void (*state_function)(Context *ctx); - -// one function for varnames in each profile. -typedef const char *(*varname_function)(Context *c, RegisterType t, int num); - -// one function for const var array in each profile. -typedef const char *(*const_array_varname_function)(Context *c, int base, int size); - -typedef struct Profile -{ - const char *name; - emit_start start_emitter; - emit_end end_emitter; - emit_phase phase_emitter; - emit_global global_emitter; - emit_array array_emitter; - emit_const_array const_array_emitter; - emit_uniform uniform_emitter; - emit_sampler sampler_emitter; - emit_attribute attribute_emitter; - emit_finalize finalize_emitter; - varname_function get_varname; - const_array_varname_function get_const_array_varname; -} Profile; - - -// !!! FIXME: cut and paste between every damned source file follows... -// !!! FIXME: We need to make some sort of ContextBase that applies to all -// !!! FIXME: files and move this stuff to mojoshader_common.c ... - -static inline void out_of_memory(Context *ctx) -{ - ctx->isfail = ctx->out_of_memory = 1; -} // out_of_memory - -static inline void *Malloc(Context *ctx, const size_t len) -{ - void *retval = ctx->malloc((int) len, ctx->malloc_data); - if (retval == NULL) - out_of_memory(ctx); - return retval; -} // Malloc - -static inline char *StrDup(Context *ctx, const char *str) -{ - char *retval = (char *) Malloc(ctx, strlen(str) + 1); - if (retval != NULL) - strcpy(retval, str); - return retval; -} // StrDup - -static inline void Free(Context *ctx, void *ptr) -{ - ctx->free(ptr, ctx->malloc_data); -} // Free - -static void * MOJOSHADERCALL MallocBridge(int bytes, void *data) -{ - return Malloc((Context *) data, (size_t) bytes); -} // MallocBridge - -static void MOJOSHADERCALL FreeBridge(void *ptr, void *data) -{ - Free((Context *) data, ptr); -} // FreeBridge - - -// jump between output sections in the context... - -static int set_output(Context *ctx, Buffer **section) -{ - // only create output sections on first use. - if (*section == NULL) - { - *section = buffer_create(256, MallocBridge, FreeBridge, ctx); - if (*section == NULL) - return 0; - } // if - - ctx->output = *section; - return 1; -} // set_output - -static void push_output(Context *ctx, Buffer **section) -{ - assert(ctx->output_stack_len < (int) (STATICARRAYLEN(ctx->output_stack))); - ctx->output_stack[ctx->output_stack_len] = ctx->output; - ctx->indent_stack[ctx->output_stack_len] = ctx->indent; - ctx->output_stack_len++; - if (!set_output(ctx, section)) - return; - ctx->indent = 0; -} // push_output - -static inline void pop_output(Context *ctx) -{ - assert(ctx->output_stack_len > 0); - ctx->output_stack_len--; - ctx->output = ctx->output_stack[ctx->output_stack_len]; - ctx->indent = ctx->indent_stack[ctx->output_stack_len]; -} // pop_output - - - -// Shader model version magic... - -static inline uint32 ver_ui32(const uint8 major, const uint8 minor) -{ - return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 1 : (minor)) ); -} // version_ui32 - -static inline int shader_version_supported(const uint8 maj, const uint8 min) -{ - return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR)); -} // shader_version_supported - -static inline int shader_version_atleast(const Context *ctx, const uint8 maj, - const uint8 min) -{ - return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min)); -} // shader_version_atleast - -static inline int shader_version_exactly(const Context *ctx, const uint8 maj, - const uint8 min) -{ - return ((ctx->major_ver == maj) && (ctx->minor_ver == min)); -} // shader_version_exactly - -static inline int shader_is_pixel(const Context *ctx) -{ - return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL); -} // shader_is_pixel - -static inline int shader_is_vertex(const Context *ctx) -{ - return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX); -} // shader_is_vertex - - -static inline int isfail(const Context *ctx) -{ - return ctx->isfail; -} // isfail - - -static void failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3); -static void failf(Context *ctx, const char *fmt, ...) -{ - ctx->isfail = 1; - if (ctx->out_of_memory) - return; - - // no filename at this level (we pass a NULL to errorlist_add_va()...) - va_list ap; - va_start(ap, fmt); - errorlist_add_va(ctx->errors, NULL, ctx->current_position, fmt, ap); - va_end(ap); -} // failf - - -static inline void fail(Context *ctx, const char *reason) -{ - failf(ctx, "%s", reason); -} // fail - - -static void output_line(Context *ctx, const char *fmt, ...) ISPRINTF(2,3); -static void output_line(Context *ctx, const char *fmt, ...) -{ - assert(ctx->output != NULL); - if (isfail(ctx)) - return; // we failed previously, don't go on... - - const int indent = ctx->indent; - if (indent > 0) - { - char *indentbuf = (char *) alloca(indent); - memset(indentbuf, '\t', indent); - buffer_append(ctx->output, indentbuf, indent); - } // if - - va_list ap; - va_start(ap, fmt); - buffer_append_va(ctx->output, fmt, ap); - va_end(ap); - - buffer_append(ctx->output, ctx->endline, ctx->endline_len); -} // output_line - - -static inline void output_blank_line(Context *ctx) -{ - assert(ctx->output != NULL); - if (!isfail(ctx)) - buffer_append(ctx->output, ctx->endline, ctx->endline_len); -} // output_blank_line - - -// !!! FIXME: this is sort of nasty. -static void floatstr(Context *ctx, char *buf, size_t bufsize, float f, - int leavedecimal) -{ - const size_t len = MOJOSHADER_printFloat(buf, bufsize, f); - if ((len+2) >= bufsize) - fail(ctx, "BUG: internal buffer is too small"); - else - { - char *end = buf + len; - char *ptr = strchr(buf, '.'); - if (ptr == NULL) - { - if (leavedecimal) - strcat(buf, ".0"); - return; // done. - } // if - - while (--end != ptr) - { - if (*end != '0') - { - end++; - break; - } // if - } // while - if ((leavedecimal) && (end == ptr)) - end += 2; - *end = '\0'; // chop extra '0' or all decimal places off. - } // else -} // floatstr - -static inline TextureType cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type) -{ - return (TextureType) (((int) type) + 2); -} // cvtMojoToD3DSamplerType - -static inline MOJOSHADER_samplerType cvtD3DToMojoSamplerType(const TextureType type) -{ - return (MOJOSHADER_samplerType) (((int) type) - 2); -} // cvtD3DToMojoSamplerType - +#include "profiles/mojoshader_profile.h" // Deal with register lists... !!! FIXME: I sort of hate this. @@ -525,71 +30,6 @@ static void free_reglist(MOJOSHADER_free f, void *d, RegisterList *item) } // while } // free_reglist -static inline uint32 reg_to_ui32(const RegisterType regtype, const int regnum) -{ - return ( ((uint32) regnum) | (((uint32) regtype) << 16) ); -} // reg_to_uint32 - -// !!! FIXME: ditch this for a hash table. -static RegisterList *reglist_insert(Context *ctx, RegisterList *prev, - const RegisterType regtype, - const int regnum) -{ - const uint32 newval = reg_to_ui32(regtype, regnum); - RegisterList *item = prev->next; - while (item != NULL) - { - const uint32 val = reg_to_ui32(item->regtype, item->regnum); - if (newval == val) - return item; // already set, so we're done. - else if (newval < val) // insert it here. - break; - else // if (newval > val) - { - // keep going, we're not to the insertion point yet. - prev = item; - item = item->next; - } // else - } // while - - // we need to insert an entry after (prev). - item = (RegisterList *) Malloc(ctx, sizeof (RegisterList)); - if (item != NULL) - { - item->regtype = regtype; - item->regnum = regnum; - item->usage = MOJOSHADER_USAGE_UNKNOWN; - item->index = 0; - item->writemask = 0; - item->misc = 0; - item->written = 0; - item->array = NULL; - item->next = prev->next; - prev->next = item; - } // if - - return item; -} // reglist_insert - -static RegisterList *reglist_find(const RegisterList *prev, - const RegisterType rtype, const int regnum) -{ - const uint32 newval = reg_to_ui32(rtype, regnum); - RegisterList *item = prev->next; - while (item != NULL) - { - const uint32 val = reg_to_ui32(item->regtype, item->regnum); - if (newval == val) - return item; // here it is. - else if (newval < val) // should have been here if it existed. - return NULL; - else // if (newval > val) - item = item->next; - } // while - - return NULL; // wasn't in the list. -} // reglist_find - static inline const RegisterList *reglist_exists(RegisterList *prev, const RegisterType regtype, const int regnum) @@ -604,33 +44,6 @@ static inline int register_was_written(Context *ctx, const RegisterType rtype, return (reg && reg->written); } // register_was_written -static inline RegisterList *set_used_register(Context *ctx, - const RegisterType regtype, - const int regnum, - const int written) -{ - RegisterList *reg = NULL; - if ((regtype == REG_TYPE_COLOROUT) && (regnum > 0)) - ctx->have_multi_color_outputs = 1; - - reg = reglist_insert(ctx, &ctx->used_registers, regtype, regnum); - if (reg && written) - reg->written = 1; - return reg; -} // set_used_register - -static inline int get_used_register(Context *ctx, const RegisterType regtype, - const int regnum) -{ - return (reglist_exists(&ctx->used_registers, regtype, regnum) != NULL); -} // get_used_register - -static inline void set_defined_register(Context *ctx, const RegisterType rtype, - const int regnum) -{ - reglist_insert(ctx, &ctx->defined_registers, rtype, regnum); -} // set_defined_register - static inline int get_defined_register(Context *ctx, const RegisterType rtype, const int regnum) { @@ -653,6 +66,16 @@ static void add_attribute_register(Context *ctx, const RegisterType rtype, ctx->uses_fog = 1; // note that we have to check this later. } // add_attribute_register +static inline TextureType cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type) +{ + return (TextureType) (((int) type) + 2); +} // cvtMojoToD3DSamplerType + +static inline MOJOSHADER_samplerType cvtD3DToMojoSamplerType(const TextureType type) +{ + return (MOJOSHADER_samplerType) (((int) type) - 2); +} // cvtD3DToMojoSamplerType + static inline void add_sampler(Context *ctx, const int regnum, TextureType ttype, const int texbem) { @@ -679,80 +102,6 @@ static inline void add_sampler(Context *ctx, const int regnum, item->misc |= texbem; } // add_sampler - -static inline int writemask_xyzw(const int writemask) -{ - return (writemask == 0xF); // 0xF == 1111. No explicit mask (full!). -} // writemask_xyzw - - -static inline int writemask_xyz(const int writemask) -{ - return (writemask == 0x7); // 0x7 == 0111. (that is: xyz) -} // writemask_xyz - - -static inline int writemask_xy(const int writemask) -{ - return (writemask == 0x3); // 0x3 == 0011. (that is: xy) -} // writemask_xy - - -static inline int writemask_x(const int writemask) -{ - return (writemask == 0x1); // 0x1 == 0001. (that is: x) -} // writemask_x - - -static inline int writemask_y(const int writemask) -{ - return (writemask == 0x2); // 0x1 == 0010. (that is: y) -} // writemask_y - - -static inline int replicate_swizzle(const int swizzle) -{ - return ( (((swizzle >> 0) & 0x3) == ((swizzle >> 2) & 0x3)) && - (((swizzle >> 2) & 0x3) == ((swizzle >> 4) & 0x3)) && - (((swizzle >> 4) & 0x3) == ((swizzle >> 6) & 0x3)) ); -} // replicate_swizzle - - -static inline int no_swizzle(const int swizzle) -{ - return (swizzle == 0xE4); // 0xE4 == 11100100 ... 0 1 2 3. No swizzle. -} // no_swizzle - - -static inline int vecsize_from_writemask(const int m) -{ - return (m & 1) + ((m >> 1) & 1) + ((m >> 2) & 1) + ((m >> 3) & 1); -} // vecsize_from_writemask - - -static inline void set_dstarg_writemask(DestArgInfo *dst, const int mask) -{ - dst->writemask = mask; - dst->writemask0 = ((mask >> 0) & 1); - dst->writemask1 = ((mask >> 1) & 1); - dst->writemask2 = ((mask >> 2) & 1); - dst->writemask3 = ((mask >> 3) & 1); -} // set_dstarg_writemask - - -static int allocate_scratch_register(Context *ctx) -{ - const int retval = ctx->scratch_registers++; - if (retval >= ctx->max_scratch_registers) - ctx->max_scratch_registers = retval + 1; - return retval; -} // allocate_scratch_register - -static int allocate_branch_label(Context *ctx) -{ - return ctx->assigned_branch_labels++; -} // allocate_branch_label - static inline void adjust_token_position(Context *ctx, const int incr) { ctx->tokens += incr; @@ -760,7805 +109,159 @@ static inline void adjust_token_position(Context *ctx, const int incr) ctx->current_position += incr * sizeof (uint32); } // adjust_token_position +// Generate emitter declarations for each profile with this macro... + +#define PREDECLARE_PROFILE(prof) \ + void emit_##prof##_start(Context *ctx, const char *profilestr); \ + void emit_##prof##_end(Context *ctx); \ + void emit_##prof##_phase(Context *ctx); \ + void emit_##prof##_finalize(Context *ctx); \ + void emit_##prof##_global(Context *ctx, RegisterType regtype, int regnum);\ + void emit_##prof##_array(Context *ctx, VariableList *var); \ + void emit_##prof##_const_array(Context *ctx, const ConstantsList *clist, \ + int base, int size); \ + void emit_##prof##_uniform(Context *ctx, RegisterType regtype, int regnum,\ + const VariableList *var); \ + void emit_##prof##_sampler(Context *ctx, int stage, TextureType ttype, \ + int tb); \ + void emit_##prof##_attribute(Context *ctx, RegisterType regtype, \ + int regnum, MOJOSHADER_usage usage, \ + int index, int wmask, int flags); \ + void emit_##prof##_NOP(Context *ctx); \ + void emit_##prof##_MOV(Context *ctx); \ + void emit_##prof##_ADD(Context *ctx); \ + void emit_##prof##_SUB(Context *ctx); \ + void emit_##prof##_MAD(Context *ctx); \ + void emit_##prof##_MUL(Context *ctx); \ + void emit_##prof##_RCP(Context *ctx); \ + void emit_##prof##_RSQ(Context *ctx); \ + void emit_##prof##_DP3(Context *ctx); \ + void emit_##prof##_DP4(Context *ctx); \ + void emit_##prof##_MIN(Context *ctx); \ + void emit_##prof##_MAX(Context *ctx); \ + void emit_##prof##_SLT(Context *ctx); \ + void emit_##prof##_SGE(Context *ctx); \ + void emit_##prof##_EXP(Context *ctx); \ + void emit_##prof##_LOG(Context *ctx); \ + void emit_##prof##_LIT(Context *ctx); \ + void emit_##prof##_DST(Context *ctx); \ + void emit_##prof##_LRP(Context *ctx); \ + void emit_##prof##_FRC(Context *ctx); \ + void emit_##prof##_M4X4(Context *ctx); \ + void emit_##prof##_M4X3(Context *ctx); \ + void emit_##prof##_M3X4(Context *ctx); \ + void emit_##prof##_M3X3(Context *ctx); \ + void emit_##prof##_M3X2(Context *ctx); \ + void emit_##prof##_CALL(Context *ctx); \ + void emit_##prof##_CALLNZ(Context *ctx); \ + void emit_##prof##_LOOP(Context *ctx); \ + void emit_##prof##_ENDLOOP(Context *ctx); \ + void emit_##prof##_LABEL(Context *ctx); \ + void emit_##prof##_DCL(Context *ctx); \ + void emit_##prof##_POW(Context *ctx); \ + void emit_##prof##_CRS(Context *ctx); \ + void emit_##prof##_SGN(Context *ctx); \ + void emit_##prof##_ABS(Context *ctx); \ + void emit_##prof##_NRM(Context *ctx); \ + void emit_##prof##_SINCOS(Context *ctx); \ + void emit_##prof##_REP(Context *ctx); \ + void emit_##prof##_ENDREP(Context *ctx); \ + void emit_##prof##_IF(Context *ctx); \ + void emit_##prof##_IFC(Context *ctx); \ + void emit_##prof##_ELSE(Context *ctx); \ + void emit_##prof##_ENDIF(Context *ctx); \ + void emit_##prof##_BREAK(Context *ctx); \ + void emit_##prof##_BREAKC(Context *ctx); \ + void emit_##prof##_MOVA(Context *ctx); \ + void emit_##prof##_DEFB(Context *ctx); \ + void emit_##prof##_DEFI(Context *ctx); \ + void emit_##prof##_TEXCRD(Context *ctx); \ + void emit_##prof##_TEXKILL(Context *ctx); \ + void emit_##prof##_TEXLD(Context *ctx); \ + void emit_##prof##_TEXBEM(Context *ctx); \ + void emit_##prof##_TEXBEML(Context *ctx); \ + void emit_##prof##_TEXREG2AR(Context *ctx); \ + void emit_##prof##_TEXREG2GB(Context *ctx); \ + void emit_##prof##_TEXM3X2PAD(Context *ctx); \ + void emit_##prof##_TEXM3X2TEX(Context *ctx); \ + void emit_##prof##_TEXM3X3PAD(Context *ctx); \ + void emit_##prof##_TEXM3X3TEX(Context *ctx); \ + void emit_##prof##_TEXM3X3SPEC(Context *ctx); \ + void emit_##prof##_TEXM3X3VSPEC(Context *ctx); \ + void emit_##prof##_EXPP(Context *ctx); \ + void emit_##prof##_LOGP(Context *ctx); \ + void emit_##prof##_CND(Context *ctx); \ + void emit_##prof##_DEF(Context *ctx); \ + void emit_##prof##_TEXREG2RGB(Context *ctx); \ + void emit_##prof##_TEXDP3TEX(Context *ctx); \ + void emit_##prof##_TEXM3X2DEPTH(Context *ctx); \ + void emit_##prof##_TEXDP3(Context *ctx); \ + void emit_##prof##_TEXM3X3(Context *ctx); \ + void emit_##prof##_TEXDEPTH(Context *ctx); \ + void emit_##prof##_CMP(Context *ctx); \ + void emit_##prof##_BEM(Context *ctx); \ + void emit_##prof##_DP2ADD(Context *ctx); \ + void emit_##prof##_DSX(Context *ctx); \ + void emit_##prof##_DSY(Context *ctx); \ + void emit_##prof##_TEXLDD(Context *ctx); \ + void emit_##prof##_SETP(Context *ctx); \ + void emit_##prof##_TEXLDL(Context *ctx); \ + void emit_##prof##_BREAKP(Context *ctx); \ + void emit_##prof##_RESERVED(Context *ctx); \ + void emit_##prof##_RET(Context *ctx); \ + const char *get_##prof##_varname(Context *ctx, RegisterType rt, \ + int regnum); \ + const char *get_##prof##_const_array_varname(Context *ctx, \ + int base, int size); + +// Check for profile support... -// D3D stuff that's used in more than just the d3d profile... - -static int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type, - const RegisterType rtype, const int rnum) -{ - const int uses_psize = ctx->uses_pointsize; - const int uses_fog = ctx->uses_fog; - if ( (rtype == REG_TYPE_OUTPUT) && ((uses_psize) || (uses_fog)) ) - { - const RegisterList *reg = reglist_find(&ctx->attributes, rtype, rnum); - if (reg != NULL) - { - const MOJOSHADER_usage usage = reg->usage; - return ( (uses_psize && (usage == MOJOSHADER_USAGE_POINTSIZE)) || - (uses_fog && (usage == MOJOSHADER_USAGE_FOG)) ); - } // if - } // if - - return scalar_register(shader_type, rtype, rnum); -} // isscalar - -static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' }; +#define AT_LEAST_ONE_PROFILE 0 +#if !SUPPORT_PROFILE_BYTECODE +#define PROFILE_EMITTER_BYTECODE(op) +#else +#undef AT_LEAST_ONE_PROFILE +#define AT_LEAST_ONE_PROFILE 1 +#define PROFILE_EMITTER_BYTECODE(op) emit_BYTECODE_##op, +PREDECLARE_PROFILE(BYTECODE) +#endif -static const char *usagestrs[] = { - "_position", "_blendweight", "_blendindices", "_normal", "_psize", - "_texcoord", "_tangent", "_binormal", "_tessfactor", "_positiont", - "_color", "_fog", "_depth", "_sample" -}; +#if !SUPPORT_PROFILE_D3D +#define PROFILE_EMITTER_D3D(op) +#else +#undef AT_LEAST_ONE_PROFILE +#define AT_LEAST_ONE_PROFILE 1 +#define PROFILE_EMITTER_D3D(op) emit_D3D_##op, +PREDECLARE_PROFILE(D3D) +#endif -static const char *get_D3D_register_string(Context *ctx, - RegisterType regtype, - int regnum, char *regnum_str, - size_t regnum_size) -{ - const char *retval = NULL; - int has_number = 1; +#if !SUPPORT_PROFILE_GLSL +#define PROFILE_EMITTER_GLSL(op) +#else +#undef AT_LEAST_ONE_PROFILE +#define AT_LEAST_ONE_PROFILE 1 +#define PROFILE_EMITTER_GLSL(op) emit_GLSL_##op, +PREDECLARE_PROFILE(GLSL) +#endif - switch (regtype) - { - case REG_TYPE_TEMP: - retval = "r"; - break; - - case REG_TYPE_INPUT: - retval = "v"; - break; - - case REG_TYPE_CONST: - retval = "c"; - break; - - case REG_TYPE_ADDRESS: // (or REG_TYPE_TEXTURE, same value.) - retval = shader_is_vertex(ctx) ? "a" : "t"; - break; - - case REG_TYPE_RASTOUT: - switch ((RastOutType) regnum) - { - case RASTOUT_TYPE_POSITION: retval = "oPos"; break; - case RASTOUT_TYPE_FOG: retval = "oFog"; break; - case RASTOUT_TYPE_POINT_SIZE: retval = "oPts"; break; - } // switch - has_number = 0; - break; - - case REG_TYPE_ATTROUT: - retval = "oD"; - break; - - case REG_TYPE_OUTPUT: // (or REG_TYPE_TEXCRDOUT, same value.) - if (shader_is_vertex(ctx) && shader_version_atleast(ctx, 3, 0)) - retval = "o"; - else - retval = "oT"; - break; - - case REG_TYPE_CONSTINT: - retval = "i"; - break; - - case REG_TYPE_COLOROUT: - retval = "oC"; - break; - - case REG_TYPE_DEPTHOUT: - retval = "oDepth"; - has_number = 0; - break; - - case REG_TYPE_SAMPLER: - retval = "s"; - break; - - case REG_TYPE_CONSTBOOL: - retval = "b"; - break; - - case REG_TYPE_LOOP: - retval = "aL"; - has_number = 0; - break; - - case REG_TYPE_MISCTYPE: - switch ((const MiscTypeType) regnum) - { - case MISCTYPE_TYPE_POSITION: retval = "vPos"; break; - case MISCTYPE_TYPE_FACE: retval = "vFace"; break; - } // switch - has_number = 0; - break; - - case REG_TYPE_LABEL: - retval = "l"; - break; - - case REG_TYPE_PREDICATE: - retval = "p"; - break; - - //case REG_TYPE_TEMPFLOAT16: // !!! FIXME: don't know this asm string - default: - fail(ctx, "unknown register type"); - retval = "???"; - has_number = 0; - break; - } // switch - - if (has_number) - snprintf(regnum_str, regnum_size, "%u", (uint) regnum); - else - regnum_str[0] = '\0'; - - return retval; -} // get_D3D_register_string - - -// !!! FIXME: can we split the profile code out to separate source files? - -#define AT_LEAST_ONE_PROFILE 0 - -#if !SUPPORT_PROFILE_D3D -#define PROFILE_EMITTER_D3D(op) -#else -#undef AT_LEAST_ONE_PROFILE -#define AT_LEAST_ONE_PROFILE 1 -#define PROFILE_EMITTER_D3D(op) emit_D3D_##op, - -static const char *make_D3D_srcarg_string_in_buf(Context *ctx, - const SourceArgInfo *arg, - char *buf, size_t buflen) -{ - const char *premod_str = ""; - const char *postmod_str = ""; - switch (arg->src_mod) - { - case SRCMOD_NEGATE: - premod_str = "-"; - break; - - case SRCMOD_BIASNEGATE: - premod_str = "-"; - // fall through. - case SRCMOD_BIAS: - postmod_str = "_bias"; - break; - - case SRCMOD_SIGNNEGATE: - premod_str = "-"; - // fall through. - case SRCMOD_SIGN: - postmod_str = "_bx2"; - break; - - case SRCMOD_COMPLEMENT: - premod_str = "1-"; - break; - - case SRCMOD_X2NEGATE: - premod_str = "-"; - // fall through. - case SRCMOD_X2: - postmod_str = "_x2"; - break; - - case SRCMOD_DZ: - postmod_str = "_dz"; - break; - - case SRCMOD_DW: - postmod_str = "_dw"; - break; - - case SRCMOD_ABSNEGATE: - premod_str = "-"; - // fall through. - case SRCMOD_ABS: - postmod_str = "_abs"; - break; - - case SRCMOD_NOT: - premod_str = "!"; - break; - - case SRCMOD_NONE: - case SRCMOD_TOTAL: - break; // stop compiler whining. - } // switch - - - char regnum_str[16]; - const char *regtype_str = get_D3D_register_string(ctx, arg->regtype, - arg->regnum, regnum_str, - sizeof (regnum_str)); - - if (regtype_str == NULL) - { - fail(ctx, "Unknown source register type."); - *buf = '\0'; - return buf; - } // if - - const char *rel_lbracket = ""; - const char *rel_rbracket = ""; - char rel_swizzle[4] = { '\0' }; - char rel_regnum_str[16] = { '\0' }; - const char *rel_regtype_str = ""; - if (arg->relative) - { - if (arg->relative_regtype == REG_TYPE_LOOP) - { - rel_swizzle[0] = '\0'; - rel_swizzle[1] = '\0'; - rel_swizzle[2] = '\0'; - } // if - else - { - rel_swizzle[0] = '.'; - rel_swizzle[1] = swizzle_channels[arg->relative_component]; - rel_swizzle[2] = '\0'; - } // else - - rel_lbracket = "["; - rel_rbracket = "]"; - rel_regtype_str = get_D3D_register_string(ctx, arg->relative_regtype, - arg->relative_regnum, - rel_regnum_str, - sizeof (rel_regnum_str)); - - if (regtype_str == NULL) - { - fail(ctx, "Unknown relative source register type."); - *buf = '\0'; - return buf; - } // if - } // if - - char swizzle_str[6]; - size_t i = 0; - const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); - if (!scalar && !no_swizzle(arg->swizzle)) - { - swizzle_str[i++] = '.'; - swizzle_str[i++] = swizzle_channels[arg->swizzle_x]; - swizzle_str[i++] = swizzle_channels[arg->swizzle_y]; - swizzle_str[i++] = swizzle_channels[arg->swizzle_z]; - swizzle_str[i++] = swizzle_channels[arg->swizzle_w]; - - // .xyzz is the same as .xyz, .z is the same as .zzzz, etc. - while (swizzle_str[i-1] == swizzle_str[i-2]) - i--; - } // if - swizzle_str[i] = '\0'; - assert(i < sizeof (swizzle_str)); - - // !!! FIXME: c12[a0.x] actually needs to be c[a0.x + 12] - snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", - premod_str, regtype_str, regnum_str, postmod_str, - rel_lbracket, rel_regtype_str, rel_regnum_str, rel_swizzle, - rel_rbracket, swizzle_str); - // !!! FIXME: make sure the scratch buffer was large enough. - return buf; -} // make_D3D_srcarg_string_in_buf - - -static const char *make_D3D_destarg_string(Context *ctx, char *buf, - const size_t buflen) -{ - const DestArgInfo *arg = &ctx->dest_arg; - - const char *result_shift_str = ""; - switch (arg->result_shift) - { - case 0x1: result_shift_str = "_x2"; break; - case 0x2: result_shift_str = "_x4"; break; - case 0x3: result_shift_str = "_x8"; break; - case 0xD: result_shift_str = "_d8"; break; - case 0xE: result_shift_str = "_d4"; break; - case 0xF: result_shift_str = "_d2"; break; - } // switch - - const char *sat_str = (arg->result_mod & MOD_SATURATE) ? "_sat" : ""; - const char *pp_str = (arg->result_mod & MOD_PP) ? "_pp" : ""; - const char *cent_str = (arg->result_mod & MOD_CENTROID) ? "_centroid" : ""; - - char regnum_str[16]; - const char *regtype_str = get_D3D_register_string(ctx, arg->regtype, - arg->regnum, regnum_str, - sizeof (regnum_str)); - if (regtype_str == NULL) - { - fail(ctx, "Unknown destination register type."); - *buf = '\0'; - return buf; - } // if - - char writemask_str[6]; - size_t i = 0; - const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); - if (!scalar && !writemask_xyzw(arg->writemask)) - { - writemask_str[i++] = '.'; - if (arg->writemask0) writemask_str[i++] = 'x'; - if (arg->writemask1) writemask_str[i++] = 'y'; - if (arg->writemask2) writemask_str[i++] = 'z'; - if (arg->writemask3) writemask_str[i++] = 'w'; - } // if - writemask_str[i] = '\0'; - assert(i < sizeof (writemask_str)); - - const char *pred_left = ""; - const char *pred_right = ""; - char pred[32] = { '\0' }; - if (ctx->predicated) - { - pred_left = "("; - pred_right = ") "; - make_D3D_srcarg_string_in_buf(ctx, &ctx->predicate_arg, - pred, sizeof (pred)); - } // if - - // may turn out something like "_x2_sat_pp_centroid (!p0.x) r0.xyzw" ... - snprintf(buf, buflen, "%s%s%s%s %s%s%s%s%s%s", - result_shift_str, sat_str, pp_str, cent_str, - pred_left, pred, pred_right, - regtype_str, regnum_str, writemask_str); - // !!! FIXME: make sure the scratch buffer was large enough. - return buf; -} // make_D3D_destarg_string - - -static const char *make_D3D_srcarg_string(Context *ctx, const size_t idx, - char *buf, size_t buflen) -{ - if (idx >= STATICARRAYLEN(ctx->source_args)) - { - fail(ctx, "Too many source args"); - *buf = '\0'; - return buf; - } // if - - const SourceArgInfo *arg = &ctx->source_args[idx]; - return make_D3D_srcarg_string_in_buf(ctx, arg, buf, buflen); -} // make_D3D_srcarg_string - -static const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt, - int regnum, char *buf, - const size_t len) -{ - char regnum_str[16]; - const char *regtype_str = get_D3D_register_string(ctx, rt, regnum, - regnum_str, sizeof (regnum_str)); - snprintf(buf,len,"%s%s", regtype_str, regnum_str); - return buf; -} // get_D3D_varname_in_buf - - -static const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum) -{ - char buf[64]; - get_D3D_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf)); - return StrDup(ctx, buf); -} // get_D3D_varname - - -static const char *get_D3D_const_array_varname(Context *ctx, int base, int size) -{ - char buf[64]; - snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size); - return StrDup(ctx, buf); -} // get_D3D_const_array_varname - - -static void emit_D3D_start(Context *ctx, const char *profilestr) -{ - const uint major = (uint) ctx->major_ver; - const uint minor = (uint) ctx->minor_ver; - char minor_str[16]; - - ctx->ignores_ctab = 1; - - if (minor == 0xFF) - strcpy(minor_str, "sw"); - else if ((major > 1) && (minor == 1)) - strcpy(minor_str, "x"); // for >= SM2, apparently this is "x". Weird. - else - snprintf(minor_str, sizeof (minor_str), "%u", (uint) minor); - - output_line(ctx, "%s_%u_%s", ctx->shader_type_str, major, minor_str); -} // emit_D3D_start - - -static void emit_D3D_end(Context *ctx) -{ - output_line(ctx, "end"); -} // emit_D3D_end - - -static void emit_D3D_phase(Context *ctx) -{ - output_line(ctx, "phase"); -} // emit_D3D_phase - - -static void emit_D3D_finalize(Context *ctx) -{ - // no-op. -} // emit_D3D_finalize - - -static void emit_D3D_global(Context *ctx, RegisterType regtype, int regnum) -{ - // no-op. -} // emit_D3D_global - - -static void emit_D3D_array(Context *ctx, VariableList *var) -{ - // no-op. -} // emit_D3D_array - - -static void emit_D3D_const_array(Context *ctx, const ConstantsList *clist, - int base, int size) -{ - // no-op. -} // emit_D3D_const_array - - -static void emit_D3D_uniform(Context *ctx, RegisterType regtype, int regnum, - const VariableList *var) -{ - // no-op. -} // emit_D3D_uniform - - -static void emit_D3D_sampler(Context *ctx, int s, TextureType ttype, int tb) -{ - // no-op. -} // emit_D3D_sampler - - -static void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum, - MOJOSHADER_usage usage, int index, int wmask, - int flags) -{ - // no-op. -} // emit_D3D_attribute - - -static void emit_D3D_RESERVED(Context *ctx) -{ - // do nothing; fails in the state machine. -} // emit_D3D_RESERVED - - -// Generic D3D opcode emitters. A list of macros generate all the entry points -// that call into these... - -static char *lowercase(char *dst, const char *src) -{ - int i = 0; - do - { - const char ch = src[i]; - dst[i] = (((ch >= 'A') && (ch <= 'Z')) ? (ch - ('A' - 'a')) : ch); - } while (src[i++]); - return dst; -} // lowercase - - -static void emit_D3D_opcode_d(Context *ctx, const char *opcode) -{ - char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); - opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); - output_line(ctx, "%s%s%s", ctx->coissue ? "+" : "", opcode, dst); -} // emit_D3D_opcode_d - - -static void emit_D3D_opcode_s(Context *ctx, const char *opcode) -{ - char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); - opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); - output_line(ctx, "%s%s %s", ctx->coissue ? "+" : "", opcode, src0); -} // emit_D3D_opcode_s - - -static void emit_D3D_opcode_ss(Context *ctx, const char *opcode) -{ - char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1)); - opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); - output_line(ctx, "%s%s %s, %s", ctx->coissue ? "+" : "", opcode, src0, src1); -} // emit_D3D_opcode_ss - - -static void emit_D3D_opcode_ds(Context *ctx, const char *opcode) -{ - char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); - opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); - output_line(ctx, "%s%s%s, %s", ctx->coissue ? "+" : "", opcode, dst, src0); -} // emit_D3D_opcode_ds - - -static void emit_D3D_opcode_dss(Context *ctx, const char *opcode) -{ - char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1)); - opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); - output_line(ctx, "%s%s%s, %s, %s", ctx->coissue ? "+" : "", - opcode, dst, src0, src1); -} // emit_D3D_opcode_dss - - -static void emit_D3D_opcode_dsss(Context *ctx, const char *opcode) -{ - char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2)); - opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); - output_line(ctx, "%s%s%s, %s, %s, %s", ctx->coissue ? "+" : "", - opcode, dst, src0, src1, src2); -} // emit_D3D_opcode_dsss - - -static void emit_D3D_opcode_dssss(Context *ctx, const char *opcode) -{ - char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2)); - char src3[64]; make_D3D_srcarg_string(ctx, 3, src3, sizeof (src3)); - opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); - output_line(ctx,"%s%s%s, %s, %s, %s, %s", ctx->coissue ? "+" : "", - opcode, dst, src0, src1, src2, src3); -} // emit_D3D_opcode_dssss - - -static void emit_D3D_opcode(Context *ctx, const char *opcode) -{ - opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); - output_line(ctx, "%s%s", ctx->coissue ? "+" : "", opcode); -} // emit_D3D_opcode - - -#define EMIT_D3D_OPCODE_FUNC(op) \ - static void emit_D3D_##op(Context *ctx) { \ - emit_D3D_opcode(ctx, #op); \ - } -#define EMIT_D3D_OPCODE_D_FUNC(op) \ - static void emit_D3D_##op(Context *ctx) { \ - emit_D3D_opcode_d(ctx, #op); \ - } -#define EMIT_D3D_OPCODE_S_FUNC(op) \ - static void emit_D3D_##op(Context *ctx) { \ - emit_D3D_opcode_s(ctx, #op); \ - } -#define EMIT_D3D_OPCODE_SS_FUNC(op) \ - static void emit_D3D_##op(Context *ctx) { \ - emit_D3D_opcode_ss(ctx, #op); \ - } -#define EMIT_D3D_OPCODE_DS_FUNC(op) \ - static void emit_D3D_##op(Context *ctx) { \ - emit_D3D_opcode_ds(ctx, #op); \ - } -#define EMIT_D3D_OPCODE_DSS_FUNC(op) \ - static void emit_D3D_##op(Context *ctx) { \ - emit_D3D_opcode_dss(ctx, #op); \ - } -#define EMIT_D3D_OPCODE_DSSS_FUNC(op) \ - static void emit_D3D_##op(Context *ctx) { \ - emit_D3D_opcode_dsss(ctx, #op); \ - } -#define EMIT_D3D_OPCODE_DSSSS_FUNC(op) \ - static void emit_D3D_##op(Context *ctx) { \ - emit_D3D_opcode_dssss(ctx, #op); \ - } - -EMIT_D3D_OPCODE_FUNC(NOP) -EMIT_D3D_OPCODE_DS_FUNC(MOV) -EMIT_D3D_OPCODE_DSS_FUNC(ADD) -EMIT_D3D_OPCODE_DSS_FUNC(SUB) -EMIT_D3D_OPCODE_DSSS_FUNC(MAD) -EMIT_D3D_OPCODE_DSS_FUNC(MUL) -EMIT_D3D_OPCODE_DS_FUNC(RCP) -EMIT_D3D_OPCODE_DS_FUNC(RSQ) -EMIT_D3D_OPCODE_DSS_FUNC(DP3) -EMIT_D3D_OPCODE_DSS_FUNC(DP4) -EMIT_D3D_OPCODE_DSS_FUNC(MIN) -EMIT_D3D_OPCODE_DSS_FUNC(MAX) -EMIT_D3D_OPCODE_DSS_FUNC(SLT) -EMIT_D3D_OPCODE_DSS_FUNC(SGE) -EMIT_D3D_OPCODE_DS_FUNC(EXP) -EMIT_D3D_OPCODE_DS_FUNC(LOG) -EMIT_D3D_OPCODE_DS_FUNC(LIT) -EMIT_D3D_OPCODE_DSS_FUNC(DST) -EMIT_D3D_OPCODE_DSSS_FUNC(LRP) -EMIT_D3D_OPCODE_DS_FUNC(FRC) -EMIT_D3D_OPCODE_DSS_FUNC(M4X4) -EMIT_D3D_OPCODE_DSS_FUNC(M4X3) -EMIT_D3D_OPCODE_DSS_FUNC(M3X4) -EMIT_D3D_OPCODE_DSS_FUNC(M3X3) -EMIT_D3D_OPCODE_DSS_FUNC(M3X2) -EMIT_D3D_OPCODE_S_FUNC(CALL) -EMIT_D3D_OPCODE_SS_FUNC(CALLNZ) -EMIT_D3D_OPCODE_SS_FUNC(LOOP) -EMIT_D3D_OPCODE_FUNC(RET) -EMIT_D3D_OPCODE_FUNC(ENDLOOP) -EMIT_D3D_OPCODE_S_FUNC(LABEL) -EMIT_D3D_OPCODE_DSS_FUNC(POW) -EMIT_D3D_OPCODE_DSS_FUNC(CRS) -EMIT_D3D_OPCODE_DSSS_FUNC(SGN) -EMIT_D3D_OPCODE_DS_FUNC(ABS) -EMIT_D3D_OPCODE_DS_FUNC(NRM) -EMIT_D3D_OPCODE_S_FUNC(REP) -EMIT_D3D_OPCODE_FUNC(ENDREP) -EMIT_D3D_OPCODE_S_FUNC(IF) -EMIT_D3D_OPCODE_FUNC(ELSE) -EMIT_D3D_OPCODE_FUNC(ENDIF) -EMIT_D3D_OPCODE_FUNC(BREAK) -EMIT_D3D_OPCODE_DS_FUNC(MOVA) -EMIT_D3D_OPCODE_D_FUNC(TEXKILL) -EMIT_D3D_OPCODE_DS_FUNC(TEXBEM) -EMIT_D3D_OPCODE_DS_FUNC(TEXBEML) -EMIT_D3D_OPCODE_DS_FUNC(TEXREG2AR) -EMIT_D3D_OPCODE_DS_FUNC(TEXREG2GB) -EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2PAD) -EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2TEX) -EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3PAD) -EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3TEX) -EMIT_D3D_OPCODE_DSS_FUNC(TEXM3X3SPEC) -EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3VSPEC) -EMIT_D3D_OPCODE_DS_FUNC(EXPP) -EMIT_D3D_OPCODE_DS_FUNC(LOGP) -EMIT_D3D_OPCODE_DSSS_FUNC(CND) -EMIT_D3D_OPCODE_DS_FUNC(TEXREG2RGB) -EMIT_D3D_OPCODE_DS_FUNC(TEXDP3TEX) -EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2DEPTH) -EMIT_D3D_OPCODE_DS_FUNC(TEXDP3) -EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3) -EMIT_D3D_OPCODE_D_FUNC(TEXDEPTH) -EMIT_D3D_OPCODE_DSSS_FUNC(CMP) -EMIT_D3D_OPCODE_DSS_FUNC(BEM) -EMIT_D3D_OPCODE_DSSS_FUNC(DP2ADD) -EMIT_D3D_OPCODE_DS_FUNC(DSX) -EMIT_D3D_OPCODE_DS_FUNC(DSY) -EMIT_D3D_OPCODE_DSSSS_FUNC(TEXLDD) -EMIT_D3D_OPCODE_DSS_FUNC(TEXLDL) -EMIT_D3D_OPCODE_S_FUNC(BREAKP) - -// special cases for comparison opcodes... -static const char *get_D3D_comparison_string(Context *ctx) -{ - static const char *comps[] = { - "", "_gt", "_eq", "_ge", "_lt", "_ne", "_le" - }; - - if (ctx->instruction_controls >= STATICARRAYLEN(comps)) - { - fail(ctx, "unknown comparison control"); - return ""; - } // if - - return comps[ctx->instruction_controls]; -} // get_D3D_comparison_string - -static void emit_D3D_BREAKC(Context *ctx) -{ - char op[16]; - snprintf(op, sizeof (op), "break%s", get_D3D_comparison_string(ctx)); - emit_D3D_opcode_ss(ctx, op); -} // emit_D3D_BREAKC - -static void emit_D3D_IFC(Context *ctx) -{ - char op[16]; - snprintf(op, sizeof (op), "if%s", get_D3D_comparison_string(ctx)); - emit_D3D_opcode_ss(ctx, op); -} // emit_D3D_IFC - -static void emit_D3D_SETP(Context *ctx) -{ - char op[16]; - snprintf(op, sizeof (op), "setp%s", get_D3D_comparison_string(ctx)); - emit_D3D_opcode_dss(ctx, op); -} // emit_D3D_SETP - -static void emit_D3D_DEF(Context *ctx) -{ - char dst[64]; - make_D3D_destarg_string(ctx, dst, sizeof (dst)); - const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? - char val0[32]; - char val1[32]; - char val2[32]; - char val3[32]; - floatstr(ctx, val0, sizeof (val0), val[0], 0); - floatstr(ctx, val1, sizeof (val1), val[1], 0); - floatstr(ctx, val2, sizeof (val2), val[2], 0); - floatstr(ctx, val3, sizeof (val3), val[3], 0); - output_line(ctx, "def%s, %s, %s, %s, %s", dst, val0, val1, val2, val3); -} // emit_D3D_DEF - -static void emit_D3D_DEFI(Context *ctx) -{ - char dst[64]; - make_D3D_destarg_string(ctx, dst, sizeof (dst)); - const int32 *x = (const int32 *) ctx->dwords; - output_line(ctx, "defi%s, %d, %d, %d, %d", dst, - (int) x[0], (int) x[1], (int) x[2], (int) x[3]); -} // emit_D3D_DEFI - -static void emit_D3D_DEFB(Context *ctx) -{ - char dst[64]; - make_D3D_destarg_string(ctx, dst, sizeof (dst)); - output_line(ctx, "defb%s, %s", dst, ctx->dwords[0] ? "true" : "false"); -} // emit_D3D_DEFB - - -static void emit_D3D_DCL(Context *ctx) -{ - char dst[64]; - make_D3D_destarg_string(ctx, dst, sizeof (dst)); - const DestArgInfo *arg = &ctx->dest_arg; - const char *usage_str = ""; - char index_str[16] = { '\0' }; - - if (arg->regtype == REG_TYPE_SAMPLER) - { - switch ((const TextureType) ctx->dwords[0]) - { - case TEXTURE_TYPE_2D: usage_str = "_2d"; break; - case TEXTURE_TYPE_CUBE: usage_str = "_cube"; break; - case TEXTURE_TYPE_VOLUME: usage_str = "_volume"; break; - default: fail(ctx, "unknown sampler texture type"); return; - } // switch - } // if - - else if (arg->regtype == REG_TYPE_MISCTYPE) - { - switch ((const MiscTypeType) arg->regnum) - { - case MISCTYPE_TYPE_POSITION: - case MISCTYPE_TYPE_FACE: - usage_str = ""; // just become "dcl vFace" or whatever. - break; - default: fail(ctx, "unknown misc register type"); return; - } // switch - } // else if - - else - { - const uint32 usage = ctx->dwords[0]; - const uint32 index = ctx->dwords[1]; - usage_str = usagestrs[usage]; - if (index != 0) - snprintf(index_str, sizeof (index_str), "%u", (uint) index); - } // else - - output_line(ctx, "dcl%s%s%s", usage_str, index_str, dst); -} // emit_D3D_DCL - - -static void emit_D3D_TEXCRD(Context *ctx) -{ - // this opcode looks and acts differently depending on the shader model. - if (shader_version_atleast(ctx, 1, 4)) - emit_D3D_opcode_ds(ctx, "texcrd"); - else - emit_D3D_opcode_d(ctx, "texcoord"); -} // emit_D3D_TEXCOORD - -static void emit_D3D_TEXLD(Context *ctx) -{ - // this opcode looks and acts differently depending on the shader model. - if (shader_version_atleast(ctx, 2, 0)) - { - if (ctx->instruction_controls == CONTROL_TEXLD) - emit_D3D_opcode_dss(ctx, "texld"); - else if (ctx->instruction_controls == CONTROL_TEXLDP) - emit_D3D_opcode_dss(ctx, "texldp"); - else if (ctx->instruction_controls == CONTROL_TEXLDB) - emit_D3D_opcode_dss(ctx, "texldb"); - } // if - - else if (shader_version_atleast(ctx, 1, 4)) - { - emit_D3D_opcode_ds(ctx, "texld"); - } // else if - - else - { - emit_D3D_opcode_d(ctx, "tex"); - } // else -} // emit_D3D_TEXLD - -static void emit_D3D_SINCOS(Context *ctx) -{ - // this opcode needs extra registers for sm2 and lower. - if (!shader_version_atleast(ctx, 3, 0)) - emit_D3D_opcode_dsss(ctx, "sincos"); - else - emit_D3D_opcode_ds(ctx, "sincos"); -} // emit_D3D_SINCOS - - -#undef EMIT_D3D_OPCODE_FUNC -#undef EMIT_D3D_OPCODE_D_FUNC -#undef EMIT_D3D_OPCODE_S_FUNC -#undef EMIT_D3D_OPCODE_SS_FUNC -#undef EMIT_D3D_OPCODE_DS_FUNC -#undef EMIT_D3D_OPCODE_DSS_FUNC -#undef EMIT_D3D_OPCODE_DSSS_FUNC -#undef EMIT_D3D_OPCODE_DSSSS_FUNC - -#endif // SUPPORT_PROFILE_D3D - - -#if !SUPPORT_PROFILE_BYTECODE -#define PROFILE_EMITTER_BYTECODE(op) -#else -#undef AT_LEAST_ONE_PROFILE -#define AT_LEAST_ONE_PROFILE 1 -#define PROFILE_EMITTER_BYTECODE(op) emit_BYTECODE_##op, - -static void emit_BYTECODE_start(Context *ctx, const char *profilestr) -{ - ctx->ignores_ctab = 1; -} // emit_BYTECODE_start - -static void emit_BYTECODE_finalize(Context *ctx) -{ - // just copy the whole token stream and make all other emitters no-ops. - if (set_output(ctx, &ctx->mainline)) - { - const size_t len = ((size_t) (ctx->tokens - ctx->orig_tokens)) * sizeof (uint32); - buffer_append(ctx->mainline, (const char *) ctx->orig_tokens, len); - } // if -} // emit_BYTECODE_finalize - -static void emit_BYTECODE_end(Context *ctx) {} -static void emit_BYTECODE_phase(Context *ctx) {} -static void emit_BYTECODE_global(Context *ctx, RegisterType t, int n) {} -static void emit_BYTECODE_array(Context *ctx, VariableList *var) {} -static void emit_BYTECODE_sampler(Context *c, int s, TextureType t, int tb) {} -static void emit_BYTECODE_const_array(Context *ctx, const ConstantsList *c, - int base, int size) {} -static void emit_BYTECODE_uniform(Context *ctx, RegisterType t, int n, - const VariableList *var) {} -static void emit_BYTECODE_attribute(Context *ctx, RegisterType t, int n, - MOJOSHADER_usage u, int i, int w, - int f) {} - -static const char *get_BYTECODE_varname(Context *ctx, RegisterType rt, int regnum) -{ - char regnum_str[16]; - const char *regtype_str = get_D3D_register_string(ctx, rt, regnum, - regnum_str, sizeof (regnum_str)); - char buf[64]; - snprintf(buf, sizeof (buf), "%s%s", regtype_str, regnum_str); - return StrDup(ctx, buf); -} // get_BYTECODE_varname - -static const char *get_BYTECODE_const_array_varname(Context *ctx, int base, int size) -{ - char buf[64]; - snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size); - return StrDup(ctx, buf); -} // get_BYTECODE_const_array_varname - -#define EMIT_BYTECODE_OPCODE_FUNC(op) \ - static void emit_BYTECODE_##op(Context *ctx) {} - -EMIT_BYTECODE_OPCODE_FUNC(RESERVED) -EMIT_BYTECODE_OPCODE_FUNC(NOP) -EMIT_BYTECODE_OPCODE_FUNC(MOV) -EMIT_BYTECODE_OPCODE_FUNC(ADD) -EMIT_BYTECODE_OPCODE_FUNC(SUB) -EMIT_BYTECODE_OPCODE_FUNC(MAD) -EMIT_BYTECODE_OPCODE_FUNC(MUL) -EMIT_BYTECODE_OPCODE_FUNC(RCP) -EMIT_BYTECODE_OPCODE_FUNC(RSQ) -EMIT_BYTECODE_OPCODE_FUNC(DP3) -EMIT_BYTECODE_OPCODE_FUNC(DP4) -EMIT_BYTECODE_OPCODE_FUNC(MIN) -EMIT_BYTECODE_OPCODE_FUNC(MAX) -EMIT_BYTECODE_OPCODE_FUNC(SLT) -EMIT_BYTECODE_OPCODE_FUNC(SGE) -EMIT_BYTECODE_OPCODE_FUNC(EXP) -EMIT_BYTECODE_OPCODE_FUNC(LOG) -EMIT_BYTECODE_OPCODE_FUNC(LIT) -EMIT_BYTECODE_OPCODE_FUNC(DST) -EMIT_BYTECODE_OPCODE_FUNC(LRP) -EMIT_BYTECODE_OPCODE_FUNC(FRC) -EMIT_BYTECODE_OPCODE_FUNC(M4X4) -EMIT_BYTECODE_OPCODE_FUNC(M4X3) -EMIT_BYTECODE_OPCODE_FUNC(M3X4) -EMIT_BYTECODE_OPCODE_FUNC(M3X3) -EMIT_BYTECODE_OPCODE_FUNC(M3X2) -EMIT_BYTECODE_OPCODE_FUNC(CALL) -EMIT_BYTECODE_OPCODE_FUNC(CALLNZ) -EMIT_BYTECODE_OPCODE_FUNC(LOOP) -EMIT_BYTECODE_OPCODE_FUNC(RET) -EMIT_BYTECODE_OPCODE_FUNC(ENDLOOP) -EMIT_BYTECODE_OPCODE_FUNC(LABEL) -EMIT_BYTECODE_OPCODE_FUNC(POW) -EMIT_BYTECODE_OPCODE_FUNC(CRS) -EMIT_BYTECODE_OPCODE_FUNC(SGN) -EMIT_BYTECODE_OPCODE_FUNC(ABS) -EMIT_BYTECODE_OPCODE_FUNC(NRM) -EMIT_BYTECODE_OPCODE_FUNC(SINCOS) -EMIT_BYTECODE_OPCODE_FUNC(REP) -EMIT_BYTECODE_OPCODE_FUNC(ENDREP) -EMIT_BYTECODE_OPCODE_FUNC(IF) -EMIT_BYTECODE_OPCODE_FUNC(ELSE) -EMIT_BYTECODE_OPCODE_FUNC(ENDIF) -EMIT_BYTECODE_OPCODE_FUNC(BREAK) -EMIT_BYTECODE_OPCODE_FUNC(MOVA) -EMIT_BYTECODE_OPCODE_FUNC(TEXKILL) -EMIT_BYTECODE_OPCODE_FUNC(TEXBEM) -EMIT_BYTECODE_OPCODE_FUNC(TEXBEML) -EMIT_BYTECODE_OPCODE_FUNC(TEXREG2AR) -EMIT_BYTECODE_OPCODE_FUNC(TEXREG2GB) -EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2PAD) -EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2TEX) -EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3PAD) -EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3TEX) -EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3SPEC) -EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3VSPEC) -EMIT_BYTECODE_OPCODE_FUNC(EXPP) -EMIT_BYTECODE_OPCODE_FUNC(LOGP) -EMIT_BYTECODE_OPCODE_FUNC(CND) -EMIT_BYTECODE_OPCODE_FUNC(TEXREG2RGB) -EMIT_BYTECODE_OPCODE_FUNC(TEXDP3TEX) -EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2DEPTH) -EMIT_BYTECODE_OPCODE_FUNC(TEXDP3) -EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3) -EMIT_BYTECODE_OPCODE_FUNC(TEXDEPTH) -EMIT_BYTECODE_OPCODE_FUNC(CMP) -EMIT_BYTECODE_OPCODE_FUNC(BEM) -EMIT_BYTECODE_OPCODE_FUNC(DP2ADD) -EMIT_BYTECODE_OPCODE_FUNC(DSX) -EMIT_BYTECODE_OPCODE_FUNC(DSY) -EMIT_BYTECODE_OPCODE_FUNC(TEXLDD) -EMIT_BYTECODE_OPCODE_FUNC(TEXLDL) -EMIT_BYTECODE_OPCODE_FUNC(BREAKP) -EMIT_BYTECODE_OPCODE_FUNC(BREAKC) -EMIT_BYTECODE_OPCODE_FUNC(IFC) -EMIT_BYTECODE_OPCODE_FUNC(SETP) -EMIT_BYTECODE_OPCODE_FUNC(DEF) -EMIT_BYTECODE_OPCODE_FUNC(DEFI) -EMIT_BYTECODE_OPCODE_FUNC(DEFB) -EMIT_BYTECODE_OPCODE_FUNC(DCL) -EMIT_BYTECODE_OPCODE_FUNC(TEXCRD) -EMIT_BYTECODE_OPCODE_FUNC(TEXLD) - -#undef EMIT_BYTECODE_OPCODE_FUNC - -#endif // SUPPORT_PROFILE_BYTECODE - - -#if !SUPPORT_PROFILE_GLSL -#define PROFILE_EMITTER_GLSL(op) -#else -#undef AT_LEAST_ONE_PROFILE -#define AT_LEAST_ONE_PROFILE 1 -#define PROFILE_EMITTER_GLSL(op) emit_GLSL_##op, - -#define EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \ - static void emit_GLSL_##op(Context *ctx) { \ - fail(ctx, #op " unimplemented in glsl profile"); \ - } - -static inline const char *get_GLSL_register_string(Context *ctx, - const RegisterType regtype, const int regnum, - char *regnum_str, const size_t regnum_size) -{ - // turns out these are identical at the moment. - return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size); -} // get_GLSL_register_string - -static const char *get_GLSL_uniform_type(Context *ctx, const RegisterType rtype) -{ - switch (rtype) - { - case REG_TYPE_CONST: return "vec4"; - case REG_TYPE_CONSTINT: return "ivec4"; - case REG_TYPE_CONSTBOOL: return "bool"; - default: fail(ctx, "BUG: used a uniform we don't know how to define."); - } // switch - - return NULL; -} // get_GLSL_uniform_type - -static const char *get_GLSL_varname_in_buf(Context *ctx, RegisterType rt, - int regnum, char *buf, - const size_t len) -{ - char regnum_str[16]; - const char *regtype_str = get_GLSL_register_string(ctx, rt, regnum, - regnum_str, sizeof (regnum_str)); - snprintf(buf,len,"%s_%s%s", ctx->shader_type_str, regtype_str, regnum_str); - return buf; -} // get_GLSL_varname_in_buf - - -static const char *get_GLSL_varname(Context *ctx, RegisterType rt, int regnum) -{ - char buf[64]; - get_GLSL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf)); - return StrDup(ctx, buf); -} // get_GLSL_varname - - -static inline const char *get_GLSL_const_array_varname_in_buf(Context *ctx, - const int base, const int size, - char *buf, const size_t buflen) -{ - const char *type = ctx->shader_type_str; - snprintf(buf, buflen, "%s_const_array_%d_%d", type, base, size); - return buf; -} // get_GLSL_const_array_varname_in_buf - -static const char *get_GLSL_const_array_varname(Context *ctx, int base, int size) -{ - char buf[64]; - get_GLSL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf)); - return StrDup(ctx, buf); -} // get_GLSL_const_array_varname - - -static inline const char *get_GLSL_input_array_varname(Context *ctx, - char *buf, const size_t buflen) -{ - snprintf(buf, buflen, "%s", "vertex_input_array"); - return buf; -} // get_GLSL_input_array_varname - - -static const char *get_GLSL_uniform_array_varname(Context *ctx, - const RegisterType regtype, - char *buf, const size_t len) -{ - const char *shadertype = ctx->shader_type_str; - const char *type = get_GLSL_uniform_type(ctx, regtype); - snprintf(buf, len, "%s_uniforms_%s", shadertype, type); - return buf; -} // get_GLSL_uniform_array_varname - -static const char *get_GLSL_destarg_varname(Context *ctx, char *buf, size_t len) -{ - const DestArgInfo *arg = &ctx->dest_arg; - return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); -} // get_GLSL_destarg_varname - -static const char *get_GLSL_srcarg_varname(Context *ctx, const size_t idx, - char *buf, size_t len) -{ - if (idx >= STATICARRAYLEN(ctx->source_args)) - { - fail(ctx, "Too many source args"); - *buf = '\0'; - return buf; - } // if - - const SourceArgInfo *arg = &ctx->source_args[idx]; - return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); -} // get_GLSL_srcarg_varname - - -static const char *make_GLSL_destarg_assign(Context *, char *, const size_t, - const char *, ...) ISPRINTF(4,5); - -static const char *make_GLSL_destarg_assign(Context *ctx, char *buf, - const size_t buflen, - const char *fmt, ...) -{ - int need_parens = 0; - const DestArgInfo *arg = &ctx->dest_arg; - - if (arg->writemask == 0) - { - *buf = '\0'; - return buf; // no writemask? It's a no-op. - } // if - - char clampbuf[32] = { '\0' }; - const char *clampleft = ""; - const char *clampright = ""; - if (arg->result_mod & MOD_SATURATE) - { - const int vecsize = vecsize_from_writemask(arg->writemask); - clampleft = "clamp("; - if (vecsize == 1) - clampright = ", 0.0, 1.0)"; - else - { - snprintf(clampbuf, sizeof (clampbuf), - ", vec%d(0.0), vec%d(1.0))", vecsize, vecsize); - clampright = clampbuf; - } // else - } // if - - // MSDN says MOD_PP is a hint and many implementations ignore it. So do we. - - // CENTROID only allowed in DCL opcodes, which shouldn't come through here. - assert((arg->result_mod & MOD_CENTROID) == 0); - - if (ctx->predicated) - { - fail(ctx, "predicated destinations unsupported"); // !!! FIXME - *buf = '\0'; - return buf; - } // if - - char operation[256]; - va_list ap; - va_start(ap, fmt); - const int len = vsnprintf(operation, sizeof (operation), fmt, ap); - va_end(ap); - if (len >= sizeof (operation)) - { - fail(ctx, "operation string too large"); // I'm lazy. :P - *buf = '\0'; - return buf; - } // if - - const char *result_shift_str = ""; - switch (arg->result_shift) - { - case 0x1: result_shift_str = " * 2.0"; break; - case 0x2: result_shift_str = " * 4.0"; break; - case 0x3: result_shift_str = " * 8.0"; break; - case 0xD: result_shift_str = " / 8.0"; break; - case 0xE: result_shift_str = " / 4.0"; break; - case 0xF: result_shift_str = " / 2.0"; break; - } // switch - need_parens |= (result_shift_str[0] != '\0'); - - char regnum_str[16]; - const char *regtype_str = get_GLSL_register_string(ctx, arg->regtype, - arg->regnum, regnum_str, - sizeof (regnum_str)); - char writemask_str[6]; - size_t i = 0; - const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); - if (!scalar && !writemask_xyzw(arg->writemask)) - { - writemask_str[i++] = '.'; - if (arg->writemask0) writemask_str[i++] = 'x'; - if (arg->writemask1) writemask_str[i++] = 'y'; - if (arg->writemask2) writemask_str[i++] = 'z'; - if (arg->writemask3) writemask_str[i++] = 'w'; - } // if - writemask_str[i] = '\0'; - assert(i < sizeof (writemask_str)); - - const char *leftparen = (need_parens) ? "(" : ""; - const char *rightparen = (need_parens) ? ")" : ""; - - snprintf(buf, buflen, "%s_%s%s%s = %s%s%s%s%s%s;", - ctx->shader_type_str, regtype_str, regnum_str, writemask_str, - clampleft, leftparen, operation, rightparen, result_shift_str, - clampright); - // !!! FIXME: make sure the scratch buffer was large enough. - return buf; -} // make_GLSL_destarg_assign - - -static char *make_GLSL_swizzle_string(char *swiz_str, const size_t strsize, - const int swizzle, const int writemask) -{ - size_t i = 0; - if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) ) - { - const int writemask0 = (writemask >> 0) & 0x1; - const int writemask1 = (writemask >> 1) & 0x1; - const int writemask2 = (writemask >> 2) & 0x1; - const int writemask3 = (writemask >> 3) & 0x1; - - const int swizzle_x = (swizzle >> 0) & 0x3; - const int swizzle_y = (swizzle >> 2) & 0x3; - const int swizzle_z = (swizzle >> 4) & 0x3; - const int swizzle_w = (swizzle >> 6) & 0x3; - - swiz_str[i++] = '.'; - if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x]; - if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y]; - if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z]; - if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w]; - } // if - assert(i < strsize); - swiz_str[i] = '\0'; - return swiz_str; -} // make_GLSL_swizzle_string - - -static const char *make_GLSL_srcarg_string(Context *ctx, const size_t idx, - const int writemask, char *buf, - const size_t buflen) -{ - *buf = '\0'; - - if (idx >= STATICARRAYLEN(ctx->source_args)) - { - fail(ctx, "Too many source args"); - return buf; - } // if - - const SourceArgInfo *arg = &ctx->source_args[idx]; - - const char *premod_str = ""; - const char *postmod_str = ""; - switch (arg->src_mod) - { - case SRCMOD_NEGATE: - premod_str = "-"; - break; - - case SRCMOD_BIASNEGATE: - premod_str = "-("; - postmod_str = " - 0.5)"; - break; - - case SRCMOD_BIAS: - premod_str = "("; - postmod_str = " - 0.5)"; - break; - - case SRCMOD_SIGNNEGATE: - premod_str = "-(("; - postmod_str = " - 0.5) * 2.0)"; - break; - - case SRCMOD_SIGN: - premod_str = "(("; - postmod_str = " - 0.5) * 2.0)"; - break; - - case SRCMOD_COMPLEMENT: - premod_str = "(1.0 - "; - postmod_str = ")"; - break; - - case SRCMOD_X2NEGATE: - premod_str = "-("; - postmod_str = " * 2.0)"; - break; - - case SRCMOD_X2: - premod_str = "("; - postmod_str = " * 2.0)"; - break; - - case SRCMOD_DZ: - fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME - postmod_str = "_dz"; - break; - - case SRCMOD_DW: - fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME - postmod_str = "_dw"; - break; - - case SRCMOD_ABSNEGATE: - premod_str = "-abs("; - postmod_str = ")"; - break; - - case SRCMOD_ABS: - premod_str = "abs("; - postmod_str = ")"; - break; - - case SRCMOD_NOT: - premod_str = "!"; - break; - - case SRCMOD_NONE: - case SRCMOD_TOTAL: - break; // stop compiler whining. - } // switch - - const char *regtype_str = NULL; - - if (!arg->relative) - { - regtype_str = get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, - (char *) alloca(64), 64); - } // if - - const char *rel_lbracket = ""; - char rel_offset[32] = { '\0' }; - const char *rel_rbracket = ""; - char rel_swizzle[4] = { '\0' }; - const char *rel_regtype_str = ""; - if (arg->relative) - { - if (arg->regtype == REG_TYPE_INPUT) - regtype_str=get_GLSL_input_array_varname(ctx,(char*)alloca(64),64); - else - { - assert(arg->regtype == REG_TYPE_CONST); - const int arrayidx = arg->relative_array->index; - const int offset = arg->regnum - arrayidx; - assert(offset >= 0); - if (arg->relative_array->constant) - { - const int arraysize = arg->relative_array->count; - regtype_str = get_GLSL_const_array_varname_in_buf(ctx, - arrayidx, arraysize, (char *) alloca(64), 64); - if (offset != 0) - snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset); - } // if - else - { - regtype_str = get_GLSL_uniform_array_varname(ctx, arg->regtype, - (char *) alloca(64), 64); - if (offset == 0) - { - snprintf(rel_offset, sizeof (rel_offset), - "ARRAYBASE_%d + ", arrayidx); - } // if - else - { - snprintf(rel_offset, sizeof (rel_offset), - "(ARRAYBASE_%d + %d) + ", arrayidx, offset); - } // else - } // else - } // else - - rel_lbracket = "["; - - if (arg->relative_regtype == REG_TYPE_LOOP) - { - rel_regtype_str = "aL"; - rel_swizzle[0] = '\0'; - rel_swizzle[1] = '\0'; - rel_swizzle[2] = '\0'; - } // if - else - { - rel_regtype_str = get_GLSL_varname_in_buf(ctx, arg->relative_regtype, - arg->relative_regnum, - (char *) alloca(64), 64); - rel_swizzle[0] = '.'; - rel_swizzle[1] = swizzle_channels[arg->relative_component]; - rel_swizzle[2] = '\0'; - } // else - rel_rbracket = "]"; - } // if - - char swiz_str[6] = { '\0' }; - if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum)) - { - make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str), - arg->swizzle, writemask); - } // if - - if (regtype_str == NULL) - { - fail(ctx, "Unknown source register type."); - return buf; - } // if - - snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s", - premod_str, regtype_str, rel_lbracket, rel_offset, - rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str, - postmod_str); - // !!! FIXME: make sure the scratch buffer was large enough. - return buf; -} // make_GLSL_srcarg_string - -// generate some convenience functions. -#define MAKE_GLSL_SRCARG_STRING_(mask, bitmask) \ - static inline const char *make_GLSL_srcarg_string_##mask(Context *ctx, \ - const size_t idx, char *buf, \ - const size_t buflen) { \ - return make_GLSL_srcarg_string(ctx, idx, bitmask, buf, buflen); \ - } -MAKE_GLSL_SRCARG_STRING_(x, (1 << 0)) -MAKE_GLSL_SRCARG_STRING_(y, (1 << 1)) -MAKE_GLSL_SRCARG_STRING_(z, (1 << 2)) -MAKE_GLSL_SRCARG_STRING_(w, (1 << 3)) -MAKE_GLSL_SRCARG_STRING_(scalar, (1 << 0)) -MAKE_GLSL_SRCARG_STRING_(full, 0xF) -MAKE_GLSL_SRCARG_STRING_(masked, ctx->dest_arg.writemask) -MAKE_GLSL_SRCARG_STRING_(vec3, 0x7) -MAKE_GLSL_SRCARG_STRING_(vec2, 0x3) -#undef MAKE_GLSL_SRCARG_STRING_ - -// special cases for comparison opcodes... - -static const char *get_GLSL_comparison_string_scalar(Context *ctx) -{ - static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" }; - if (ctx->instruction_controls >= STATICARRAYLEN(comps)) - { - fail(ctx, "unknown comparison control"); - return ""; - } // if - - return comps[ctx->instruction_controls]; -} // get_GLSL_comparison_string_scalar - -static const char *get_GLSL_comparison_string_vector(Context *ctx) -{ - static const char *comps[] = { - "", "greaterThan", "equal", "greaterThanEqual", "lessThan", - "notEqual", "lessThanEqual" - }; - - if (ctx->instruction_controls >= STATICARRAYLEN(comps)) - { - fail(ctx, "unknown comparison control"); - return ""; - } // if - - return comps[ctx->instruction_controls]; -} // get_GLSL_comparison_string_vector - - -static void emit_GLSL_start(Context *ctx, const char *profilestr) -{ - if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx)) - { - failf(ctx, "Shader type %u unsupported in this profile.", - (uint) ctx->shader_type); - return; - } // if - - else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL) == 0) - { - // No gl_FragData[] before GLSL 1.10, so we have to force the version. - push_output(ctx, &ctx->preflight); - output_line(ctx, "#version 110"); - pop_output(ctx); - } // else if - - #if SUPPORT_PROFILE_GLSL120 - else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL120) == 0) - { - ctx->profile_supports_glsl120 = 1; - push_output(ctx, &ctx->preflight); - output_line(ctx, "#version 120"); - pop_output(ctx); - } // else if - #endif - - #if SUPPORT_PROFILE_GLSLES - else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSLES) == 0) - { - ctx->profile_supports_glsles = 1; - push_output(ctx, &ctx->preflight); - output_line(ctx, "#version 100"); - if (shader_is_vertex(ctx)) - output_line(ctx, "precision highp float;"); - else - output_line(ctx, "precision mediump float;"); - output_line(ctx, "precision mediump int;"); - pop_output(ctx); - } // else if - #endif - - else - { - failf(ctx, "Profile '%s' unsupported or unknown.", profilestr); - return; - } // else - - push_output(ctx, &ctx->mainline_intro); - output_line(ctx, "void main()"); - output_line(ctx, "{"); - pop_output(ctx); - - set_output(ctx, &ctx->mainline); - ctx->indent++; -} // emit_GLSL_start - -static void emit_GLSL_RET(Context *ctx); -static void emit_GLSL_end(Context *ctx) -{ - // ps_1_* writes color to r0 instead oC0. We move it to the right place. - // We don't have to worry about a RET opcode messing this up, since - // RET isn't available before ps_2_0. - if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) - { - const char *shstr = ctx->shader_type_str; - set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1); - output_line(ctx, "%s_oC0 = %s_r0;", shstr, shstr); - } // if - else if (shader_is_vertex(ctx)) - { -#ifdef MOJOSHADER_FLIP_RENDERTARGET - output_line(ctx, "gl_Position.y = gl_Position.y * vpFlip;"); -#endif -#ifdef MOJOSHADER_DEPTH_CLIPPING - output_line(ctx, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;"); -#endif - } // else if - - // force a RET opcode if we're at the end of the stream without one. - if (ctx->previous_opcode != OPCODE_RET) - emit_GLSL_RET(ctx); -} // emit_GLSL_end - -static void emit_GLSL_phase(Context *ctx) -{ - // no-op in GLSL. -} // emit_GLSL_phase - -static void output_GLSL_uniform_array(Context *ctx, const RegisterType regtype, - const int size) -{ - if (size > 0) - { - char buf[64]; - get_GLSL_uniform_array_varname(ctx, regtype, buf, sizeof (buf)); - const char *typ; - switch (regtype) - { - case REG_TYPE_CONST: typ = "vec4"; break; - case REG_TYPE_CONSTINT: typ ="ivec4"; break; - case REG_TYPE_CONSTBOOL: typ = "bool"; break; - default: - { - fail(ctx, "BUG: used a uniform we don't know how to define."); - return; - } // default - } // switch - output_line(ctx, "uniform %s %s[%d];", typ, buf, size); - } // if -} // output_GLSL_uniform_array - -static void emit_GLSL_finalize(Context *ctx) -{ - // throw some blank lines around to make source more readable. - push_output(ctx, &ctx->globals); - output_blank_line(ctx); - pop_output(ctx); - - // If we had a relative addressing of REG_TYPE_INPUT, we need to build - // an array for it at the start of main(). GLSL doesn't let you specify - // arrays of attributes. - //vec4 blah_array[BIGGEST_ARRAY]; - if (ctx->have_relative_input_registers) // !!! FIXME - fail(ctx, "Relative addressing of input registers not supported."); - - push_output(ctx, &ctx->preflight); - output_GLSL_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count); - output_GLSL_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count); - output_GLSL_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count); -#ifdef MOJOSHADER_FLIP_RENDERTARGET - if (shader_is_vertex(ctx)) - output_line(ctx, "uniform float vpFlip;"); -#endif - pop_output(ctx); -} // emit_GLSL_finalize - -static void emit_GLSL_global(Context *ctx, RegisterType regtype, int regnum) -{ - char varname[64]; - get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); - - push_output(ctx, &ctx->globals); - switch (regtype) - { - case REG_TYPE_ADDRESS: - if (shader_is_vertex(ctx)) - output_line(ctx, "ivec4 %s;", varname); - else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE. - { - // We have to map texture registers to temps for ps_1_1, since - // they work like temps, initialize with tex coords, and the - // ps_1_1 TEX opcode expects to overwrite it. - if (!shader_version_atleast(ctx, 1, 4)) - { -#if SUPPORT_PROFILE_GLSLES - // GLSL ES does not have gl_TexCoord - if (support_glsles(ctx)) - output_line(ctx, "vec4 %s = io_%i_%i;", - varname, MOJOSHADER_USAGE_TEXCOORD, regnum); - else -#endif - output_line(ctx, "vec4 %s = gl_TexCoord[%d];", - varname, regnum); - } // if - } // else if - break; - case REG_TYPE_PREDICATE: - output_line(ctx, "bvec4 %s;", varname); - break; - case REG_TYPE_TEMP: - output_line(ctx, "vec4 %s;", varname); - break; - case REG_TYPE_LOOP: - break; // no-op. We declare these in for loops at the moment. - case REG_TYPE_LABEL: - break; // no-op. If we see it here, it means we optimized it out. - default: - fail(ctx, "BUG: we used a register we don't know how to define."); - break; - } // switch - pop_output(ctx); -} // emit_GLSL_global - -static void emit_GLSL_array(Context *ctx, VariableList *var) -{ - // All uniforms (except constant arrays, which only get pushed once at - // compile time) are now packed into a single array, so we can batch - // the uniform transfers. So this doesn't actually define an array - // here; the one, big array is emitted during finalization instead. - // However, we need to #define the offset into the one, big array here, - // and let dereferences use that #define. - const int base = var->index; - const int glslbase = ctx->uniform_float4_count; - push_output(ctx, &ctx->globals); - output_line(ctx, "#define ARRAYBASE_%d %d", base, glslbase); - pop_output(ctx); - var->emit_position = glslbase; -} // emit_GLSL_array - -static void emit_GLSL_const_array(Context *ctx, const ConstantsList *clist, - int base, int size) -{ - char varname[64]; - get_GLSL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname)); - -#if 0 - // !!! FIXME: fails on Nvidia's and Apple's GL, even with #version 120. - // !!! FIXME: (the 1.20 spec says it should work, though, I think...) - if (support_glsl120(ctx)) - { - // GLSL 1.20 can do constant arrays. - const char *cstr = NULL; - push_output(ctx, &ctx->globals); - output_line(ctx, "const vec4 %s[%d] = vec4[%d](", varname, size, size); - ctx->indent++; - - int i; - for (i = 0; i < size; i++) - { - while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) - clist = clist->next; - assert(clist->constant.index == (base + i)); - - char val0[32]; - char val1[32]; - char val2[32]; - char val3[32]; - floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1); - floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1); - floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1); - floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1); - - output_line(ctx, "vec4(%s, %s, %s, %s)%s", val0, val1, val2, val3, - (i < (size-1)) ? "," : ""); - - clist = clist->next; - } // for - - ctx->indent--; - output_line(ctx, ");"); - pop_output(ctx); - } // if - - else -#endif - { - // stock GLSL 1.0 can't do constant arrays, so make a uniform array - // and have the OpenGL glue assign it at link time. Lame! - push_output(ctx, &ctx->globals); - output_line(ctx, "uniform vec4 %s[%d];", varname, size); - pop_output(ctx); - } // else -} // emit_GLSL_const_array - -static void emit_GLSL_uniform(Context *ctx, RegisterType regtype, int regnum, - const VariableList *var) -{ - // Now that we're pushing all the uniforms as one big array, pack these - // down, so if we only use register c439, it'll actually map to - // glsl_uniforms_vec4[0]. As we push one big array, this will prevent - // uploading unused data. - - char varname[64]; - char name[64]; - int index = 0; - - get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); - - push_output(ctx, &ctx->globals); - - if (var == NULL) - { - get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name)); - - if (regtype == REG_TYPE_CONST) - index = ctx->uniform_float4_count; - else if (regtype == REG_TYPE_CONSTINT) - index = ctx->uniform_int4_count; - else if (regtype == REG_TYPE_CONSTBOOL) - index = ctx->uniform_bool_count; - else // get_GLSL_uniform_array_varname() would have called fail(). - assert(isfail(ctx)); - - output_line(ctx, "#define %s %s[%d]", varname, name, index); - } // if - - else - { - const int arraybase = var->index; - if (var->constant) - { - get_GLSL_const_array_varname_in_buf(ctx, arraybase, var->count, - name, sizeof (name)); - index = (regnum - arraybase); - } // if - else - { - assert(var->emit_position != -1); - get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name)); - index = (regnum - arraybase) + var->emit_position; - } // else - - output_line(ctx, "#define %s %s[%d]", varname, name, index); - } // else - - pop_output(ctx); -} // emit_GLSL_uniform - -static void emit_GLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb) -{ - const char *type = ""; - switch (ttype) - { - case TEXTURE_TYPE_2D: type = "sampler2D"; break; - case TEXTURE_TYPE_CUBE: type = "samplerCube"; break; - case TEXTURE_TYPE_VOLUME: type = "sampler3D"; break; - default: fail(ctx, "BUG: used a sampler we don't know how to define."); - } // switch - - char var[64]; - get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var)); - - push_output(ctx, &ctx->globals); - output_line(ctx, "uniform %s %s;", type, var); - if (tb) // This sampler used a ps_1_1 TEXBEM opcode? - { - char name[64]; - const int index = ctx->uniform_float4_count; - ctx->uniform_float4_count += 2; - get_GLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name)); - output_line(ctx, "#define %s_texbem %s[%d]", var, name, index); - output_line(ctx, "#define %s_texbeml %s[%d]", var, name, index+1); - } // if - pop_output(ctx); -} // emit_GLSL_sampler - -static void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum, - MOJOSHADER_usage usage, int index, int wmask, - int flags) -{ - // !!! FIXME: this function doesn't deal with write masks at all yet! - const char *usage_str = NULL; - const char *arrayleft = ""; - const char *arrayright = ""; - char index_str[16] = { '\0' }; - char var[64]; - - get_GLSL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var)); - - //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed? - - if (index != 0) // !!! FIXME: a lot of these MUST be zero. - snprintf(index_str, sizeof (index_str), "%u", (uint) index); - - if (shader_is_vertex(ctx)) - { - // pre-vs3 output registers. - // these don't ever happen in DCL opcodes, I think. Map to vs_3_* - // output registers. - if (!shader_version_atleast(ctx, 3, 0)) - { - if (regtype == REG_TYPE_RASTOUT) - { - regtype = REG_TYPE_OUTPUT; - index = regnum; - switch ((const RastOutType) regnum) - { - case RASTOUT_TYPE_POSITION: - usage = MOJOSHADER_USAGE_POSITION; - break; - case RASTOUT_TYPE_FOG: - usage = MOJOSHADER_USAGE_FOG; - break; - case RASTOUT_TYPE_POINT_SIZE: - usage = MOJOSHADER_USAGE_POINTSIZE; - break; - } // switch - } // if - - else if (regtype == REG_TYPE_ATTROUT) - { - regtype = REG_TYPE_OUTPUT; - usage = MOJOSHADER_USAGE_COLOR; - index = regnum; - } // else if - - else if (regtype == REG_TYPE_TEXCRDOUT) - { - regtype = REG_TYPE_OUTPUT; - usage = MOJOSHADER_USAGE_TEXCOORD; - index = regnum; - } // else if - } // if - - // to avoid limitations of various GL entry points for input - // attributes (glSecondaryColorPointer() can only take 3 component - // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other - // issues), we set up all inputs as generic vertex attributes, so we - // can pass data in just about any form, and ignore the built-in GLSL - // attributes like gl_SecondaryColor. Output needs to use the the - // built-ins, though, but we don't have to worry about the GL entry - // point limitations there. - - if (regtype == REG_TYPE_INPUT) - { - push_output(ctx, &ctx->globals); - output_line(ctx, "attribute vec4 %s;", var); - pop_output(ctx); - } // if - - else if (regtype == REG_TYPE_OUTPUT) - { - switch (usage) - { - case MOJOSHADER_USAGE_POSITION: - if (index == 0) - { - usage_str = "gl_Position"; - } // if - break; - case MOJOSHADER_USAGE_POINTSIZE: - usage_str = "gl_PointSize"; - break; - case MOJOSHADER_USAGE_COLOR: -#if SUPPORT_PROFILE_GLSLES - if (support_glsles(ctx)) - break; // GLSL ES does not have gl_FrontColor -#endif - index_str[0] = '\0'; // no explicit number. - if (index == 0) - { - usage_str = "gl_FrontColor"; - } // if - else if (index == 1) - { - usage_str = "gl_FrontSecondaryColor"; - } // else if - break; - case MOJOSHADER_USAGE_FOG: - usage_str = "gl_FogFragCoord"; - break; - case MOJOSHADER_USAGE_TEXCOORD: -#if SUPPORT_PROFILE_GLSLES - if (support_glsles(ctx)) - break; // GLSL ES does not have gl_TexCoord -#endif - snprintf(index_str, sizeof (index_str), "%u", (uint) index); - usage_str = "gl_TexCoord"; - arrayleft = "["; - arrayright = "]"; - break; - default: - // !!! FIXME: we need to deal with some more built-in varyings here. - break; - } // switch - - // !!! FIXME: the #define is a little hacky, but it means we don't - // !!! FIXME: have to track these separately if this works. - push_output(ctx, &ctx->globals); - // no mapping to built-in var? Just make it a regular global, pray. - if (usage_str == NULL) - { -#if SUPPORT_PROFILE_GLSLES - if (support_glsles(ctx)) - output_line(ctx, "varying highp vec4 io_%i_%i;", usage, index); - else -#endif - output_line(ctx, "varying vec4 io_%i_%i;", usage, index); - output_line(ctx, "#define %s io_%i_%i", var, usage, index); - } // if - else - { - output_line(ctx, "#define %s %s%s%s%s", var, usage_str, - arrayleft, index_str, arrayright); - } // else - pop_output(ctx); - } // else if - - else - { - fail(ctx, "unknown vertex shader attribute register"); - } // else - } // if - - else if (shader_is_pixel(ctx)) - { - // samplers DCLs get handled in emit_GLSL_sampler(). - - if (flags & MOD_CENTROID) // !!! FIXME - { - failf(ctx, "centroid unsupported in %s profile", ctx->profile->name); - return; - } // if - - if (regtype == REG_TYPE_COLOROUT) - { - if (!ctx->have_multi_color_outputs) - usage_str = "gl_FragColor"; // maybe faster? - else - { - snprintf(index_str, sizeof (index_str), "%u", (uint) regnum); - usage_str = "gl_FragData"; - arrayleft = "["; - arrayright = "]"; - } // else - } // if - - else if (regtype == REG_TYPE_DEPTHOUT) - usage_str = "gl_FragDepth"; - - // !!! FIXME: can you actualy have a texture register with COLOR usage? - else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT)) - { -#if SUPPORT_PROFILE_GLSLES - if (!support_glsles(ctx)) - { -#endif - if (usage == MOJOSHADER_USAGE_TEXCOORD) - { - // ps_1_1 does a different hack for this attribute. - // Refer to emit_GLSL_global()'s REG_TYPE_ADDRESS code. - if (shader_version_atleast(ctx, 1, 4)) - { - snprintf(index_str, sizeof (index_str), "%u", (uint) index); - usage_str = "gl_TexCoord"; - arrayleft = "["; - arrayright = "]"; - } // if - } // if - - else if (usage == MOJOSHADER_USAGE_COLOR) - { - index_str[0] = '\0'; // no explicit number. - if (index == 0) - { - usage_str = "gl_Color"; - } // if - else if (index == 1) - { - usage_str = "gl_SecondaryColor"; - } // else if - // FIXME: Does this even matter when we have varyings? -flibit - // else - // fail(ctx, "unsupported color index"); - } // else if -#if SUPPORT_PROFILE_GLSLES - } // if -#endif - } // else if - - else if (regtype == REG_TYPE_MISCTYPE) - { - const MiscTypeType mt = (MiscTypeType) regnum; - if (mt == MISCTYPE_TYPE_FACE) - { - push_output(ctx, &ctx->globals); - output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;", var); - pop_output(ctx); - } // if - else if (mt == MISCTYPE_TYPE_POSITION) - { - index_str[0] = '\0'; // no explicit number. - usage_str = "gl_FragCoord"; // !!! FIXME: is this the same coord space as D3D? - } // else if - else - { - fail(ctx, "BUG: unhandled misc register"); - } // else - } // else if - - else - { - fail(ctx, "unknown pixel shader attribute register"); - } // else - - push_output(ctx, &ctx->globals); - // no mapping to built-in var? Just make it a regular global, pray. - if (usage_str == NULL) - { -#if SUPPORT_PROFILE_GLSLES - if (support_glsles(ctx)) - output_line(ctx, "varying highp vec4 io_%i_%i;", usage, index); - else -#endif - output_line(ctx, "varying vec4 io_%i_%i;", usage, index); - output_line(ctx, "#define %s io_%i_%i", var, usage, index); - } // if - else - { - output_line(ctx, "#define %s %s%s%s%s", var, usage_str, - arrayleft, index_str, arrayright); - } // else - pop_output(ctx); - } // else if - - else - { - fail(ctx, "Unknown shader type"); // state machine should catch this. - } // else -} // emit_GLSL_attribute - -static void emit_GLSL_NOP(Context *ctx) -{ - // no-op is a no-op. :) -} // emit_GLSL_NOP - -static void emit_GLSL_MOV(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_MOV - -static void emit_GLSL_ADD(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1); - output_line(ctx, "%s", code); -} // emit_GLSL_ADD - -static void emit_GLSL_SUB(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1); - output_line(ctx, "%s", code); -} // emit_GLSL_SUB - -static void emit_GLSL_MAD(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2); - output_line(ctx, "%s", code); -} // emit_GLSL_MAD - -static void emit_GLSL_MUL(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1); - output_line(ctx, "%s", code); -} // emit_GLSL_MUL - -static void emit_GLSL_RCP(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_RCP - -static void emit_GLSL_RSQ(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "inversesqrt(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_RSQ - -static void emit_GLSL_dotprod(Context *ctx, const char *src0, const char *src1, - const char *extra) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char castleft[16] = { '\0' }; - const char *castright = ""; - if (vecsize != 1) - { - snprintf(castleft, sizeof (castleft), "vec%d(", vecsize); - castright = ")"; - } // if - - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s", - castleft, src0, src1, extra, castright); - output_line(ctx, "%s", code); -} // emit_GLSL_dotprod - -static void emit_GLSL_DP3(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); - emit_GLSL_dotprod(ctx, src0, src1, ""); -} // emit_GLSL_DP3 - -static void emit_GLSL_DP4(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_full(ctx, 1, src1, sizeof (src1)); - emit_GLSL_dotprod(ctx, src0, src1, ""); -} // emit_GLSL_DP4 - -static void emit_GLSL_MIN(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1); - output_line(ctx, "%s", code); -} // emit_GLSL_MIN - -static void emit_GLSL_MAX(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1); - output_line(ctx, "%s", code); -} // emit_GLSL_MAX - -static void emit_GLSL_SLT(Context *ctx) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - - // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants. - if (vecsize == 1) - make_GLSL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1); - else - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec%d(lessThan(%s, %s))", - vecsize, src0, src1); - } // else - output_line(ctx, "%s", code); -} // emit_GLSL_SLT - -static void emit_GLSL_SGE(Context *ctx) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - - // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants. - if (vecsize == 1) - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "float(%s >= %s)", src0, src1); - } // if - else - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec%d(greaterThanEqual(%s, %s))", - vecsize, src0, src1); - } // else - output_line(ctx, "%s", code); -} // emit_GLSL_SGE - -static void emit_GLSL_EXP(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_EXP - -static void emit_GLSL_LOG(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_LOG - -static void emit_GLSL_LIT_helper(Context *ctx) -{ - const char *maxp = "127.9961"; // value from the dx9 reference. - - if (ctx->glsl_generated_lit_helper) - return; - - ctx->glsl_generated_lit_helper = 1; - - push_output(ctx, &ctx->helpers); - output_line(ctx, "vec4 LIT(const vec4 src)"); - output_line(ctx, "{"); ctx->indent++; - output_line(ctx, "float power = clamp(src.w, -%s, %s);",maxp,maxp); - output_line(ctx, "vec4 retval = vec4(1.0, 0.0, 0.0, 1.0);"); - output_line(ctx, "if (src.x > 0.0) {"); ctx->indent++; - output_line(ctx, "retval.y = src.x;"); - output_line(ctx, "if (src.y > 0.0) {"); ctx->indent++; - output_line(ctx, "retval.z = pow(src.y, power);"); ctx->indent--; - output_line(ctx, "}"); ctx->indent--; - output_line(ctx, "}"); - output_line(ctx, "return retval;"); ctx->indent--; - output_line(ctx, "}"); - output_blank_line(ctx); - pop_output(ctx); -} // emit_GLSL_LIT_helper - -static void emit_GLSL_LIT(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - char code[128]; - emit_GLSL_LIT_helper(ctx); - make_GLSL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_LIT - -static void emit_GLSL_DST(Context *ctx) -{ - // !!! FIXME: needs to take ctx->dst_arg.writemask into account. - char src0_y[64]; make_GLSL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y)); - char src1_y[64]; make_GLSL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y)); - char src0_z[64]; make_GLSL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z)); - char src1_w[64]; make_GLSL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w)); - - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec4(1.0, %s * %s, %s, %s)", - src0_y, src1_y, src0_z, src1_w); - output_line(ctx, "%s", code); -} // emit_GLSL_DST - -static void emit_GLSL_LRP(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)", - src2, src1, src0); - output_line(ctx, "%s", code); -} // emit_GLSL_LRP - -static void emit_GLSL_FRC(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_FRC - -static void emit_GLSL_M4X4(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); - char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); - char row3[64]; make_GLSL_srcarg_string_full(ctx, 4, row3, sizeof (row3)); - char code[256]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1, src0, row2, src0, row3); - output_line(ctx, "%s", code); -} // emit_GLSL_M4X4 - -static void emit_GLSL_M4X3(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); - char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); - char code[256]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1, src0, row2); - output_line(ctx, "%s", code); -} // emit_GLSL_M4X3 - -static void emit_GLSL_M3X4(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); - char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); - char row3[64]; make_GLSL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3)); - - char code[256]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec4(dot(%s, %s), dot(%s, %s), " - "dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1, - src0, row2, src0, row3); - output_line(ctx, "%s", code); -} // emit_GLSL_M3X4 - -static void emit_GLSL_M3X3(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); - char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); - char code[256]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1, src0, row2); - output_line(ctx, "%s", code); -} // emit_GLSL_M3X3 - -static void emit_GLSL_M3X2(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); - - char code[256]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec2(dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1); - output_line(ctx, "%s", code); -} // emit_GLSL_M3X2 - -static void emit_GLSL_CALL(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - if (ctx->loops > 0) - output_line(ctx, "%s(aL);", src0); - else - output_line(ctx, "%s();", src0); -} // emit_GLSL_CALL - -static void emit_GLSL_CALLNZ(Context *ctx) -{ - // !!! FIXME: if src1 is a constbool that's true, we can remove the - // !!! FIXME: if. If it's false, we can make this a no-op. - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - - if (ctx->loops > 0) - output_line(ctx, "if (%s) { %s(aL); }", src1, src0); - else - output_line(ctx, "if (%s) { %s(); }", src1, src0); -} // emit_GLSL_CALLNZ - -static void emit_GLSL_LOOP(Context *ctx) -{ - // !!! FIXME: swizzle? - char var[64]; get_GLSL_srcarg_varname(ctx, 1, var, sizeof (var)); - assert(ctx->source_args[0].regnum == 0); // in case they add aL1 someday. - output_line(ctx, "{"); - ctx->indent++; - output_line(ctx, "const int aLend = %s.x + %s.y;", var, var); - output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var); - ctx->indent++; -} // emit_GLSL_LOOP - -static void emit_GLSL_RET(Context *ctx) -{ - // thankfully, the MSDN specs say a RET _has_ to end a function...no - // early returns. So if you hit one, you know you can safely close - // a high-level function. - ctx->indent--; - output_line(ctx, "}"); - output_blank_line(ctx); - set_output(ctx, &ctx->subroutines); // !!! FIXME: is this for LABEL? Maybe set it there so we don't allocate unnecessarily. -} // emit_GLSL_RET - -static void emit_GLSL_ENDLOOP(Context *ctx) -{ - ctx->indent--; - output_line(ctx, "}"); - ctx->indent--; - output_line(ctx, "}"); -} // emit_GLSL_ENDLOOP - -static void emit_GLSL_LABEL(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - const int label = ctx->source_args[0].regnum; - RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label); - assert(ctx->output == ctx->subroutines); // not mainline, etc. - assert(ctx->indent == 0); // we shouldn't be in the middle of a function. - - // MSDN specs say CALL* has to come before the LABEL, so we know if we - // can ditch the entire function here as unused. - if (reg == NULL) - set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output. - - // !!! FIXME: it would be nice if we could determine if a function is - // !!! FIXME: only called once and, if so, forcibly inline it. - - const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : ""; - output_line(ctx, "void %s(%s)", src0, uses_loopreg); - output_line(ctx, "{"); - ctx->indent++; -} // emit_GLSL_LABEL - -static void emit_GLSL_DCL(Context *ctx) -{ - // no-op. We do this in our emit_attribute() and emit_uniform(). -} // emit_GLSL_DCL - -static void emit_GLSL_POW(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "pow(abs(%s), %s)", src0, src1); - output_line(ctx, "%s", code); -} // emit_GLSL_POW - -static void emit_GLSL_CRS(Context *ctx) -{ - // !!! FIXME: needs to take ctx->dst_arg.writemask into account. - char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "cross(%s, %s)", src0, src1); - output_line(ctx, "%s", code); -} // emit_GLSL_CRS - -static void emit_GLSL_SGN(Context *ctx) -{ - // (we don't need the temporary registers specified for the D3D opcode.) - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_SGN - -static void emit_GLSL_ABS(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_ABS - -static void emit_GLSL_NRM(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_NRM - -static void emit_GLSL_SINCOS(Context *ctx) -{ - // we don't care about the temp registers that <= sm2 demands; ignore them. - // sm2 also talks about what components are left untouched vs. undefined, - // but we just leave those all untouched with GLSL write masks (which - // would fulfill the "undefined" requirement, too). - const int mask = ctx->dest_arg.writemask; - char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - char code[128] = { '\0' }; - - if (writemask_x(mask)) - make_GLSL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0); - else if (writemask_y(mask)) - make_GLSL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0); - else if (writemask_xy(mask)) - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec2(cos(%s), sin(%s))", src0, src0); - } // else if - - output_line(ctx, "%s", code); -} // emit_GLSL_SINCOS - -static void emit_GLSL_REP(Context *ctx) -{ - // !!! FIXME: - // msdn docs say legal loop values are 0 to 255. We can check DEFI values - // at parse time, but if they are pulling a value from a uniform, do - // we clamp here? - // !!! FIXME: swizzle is legal here, right? - char src0[64]; make_GLSL_srcarg_string_x(ctx, 0, src0, sizeof (src0)); - const uint rep = (uint) ctx->reps; - output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {", - rep, rep, src0, rep); - ctx->indent++; -} // emit_GLSL_REP - -static void emit_GLSL_ENDREP(Context *ctx) -{ - ctx->indent--; - output_line(ctx, "}"); -} // emit_GLSL_ENDREP - -static void emit_GLSL_IF(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - output_line(ctx, "if (%s) {", src0); - ctx->indent++; -} // emit_GLSL_IF - -static void emit_GLSL_IFC(Context *ctx) -{ - const char *comp = get_GLSL_comparison_string_scalar(ctx); - char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); - output_line(ctx, "if (%s %s %s) {", src0, comp, src1); - ctx->indent++; -} // emit_GLSL_IFC - -static void emit_GLSL_ELSE(Context *ctx) -{ - ctx->indent--; - output_line(ctx, "} else {"); - ctx->indent++; -} // emit_GLSL_ELSE - -static void emit_GLSL_ENDIF(Context *ctx) -{ - ctx->indent--; - output_line(ctx, "}"); -} // emit_GLSL_ENDIF - -static void emit_GLSL_BREAK(Context *ctx) -{ - output_line(ctx, "break;"); -} // emit_GLSL_BREAK - -static void emit_GLSL_BREAKC(Context *ctx) -{ - const char *comp = get_GLSL_comparison_string_scalar(ctx); - char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); - output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1); -} // emit_GLSL_BREAKC - -static void emit_GLSL_MOVA(Context *ctx) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - - if (vecsize == 1) - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "int(floor(abs(%s) + 0.5) * sign(%s))", - src0, src0); - } // if - - else - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s))", - vecsize, src0, vecsize, src0); - } // else - - output_line(ctx, "%s", code); -} // emit_GLSL_MOVA - -static void emit_GLSL_DEFB(Context *ctx) -{ - char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname)); - push_output(ctx, &ctx->globals); - output_line(ctx, "const bool %s = %s;", - varname, ctx->dwords[0] ? "true" : "false"); - pop_output(ctx); -} // emit_GLSL_DEFB - -static void emit_GLSL_DEFI(Context *ctx) -{ - char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname)); - const int32 *x = (const int32 *) ctx->dwords; - push_output(ctx, &ctx->globals); - output_line(ctx, "const ivec4 %s = ivec4(%d, %d, %d, %d);", - varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]); - pop_output(ctx); -} // emit_GLSL_DEFI - -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) - -static void emit_GLSL_TEXKILL(Context *ctx) -{ - char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - output_line(ctx, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;", dst); -} // emit_GLSL_TEXKILL - -static void glsl_texld(Context *ctx, const int texldd) -{ - if (!shader_version_atleast(ctx, 1, 4)) - { - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char sampler[64]; - char code[128] = {0}; - - assert(!texldd); - - RegisterList *sreg; - sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - if (ttype == TEXTURE_TYPE_2D) - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "texture2D(%s, %s.xy)", - sampler, dst); - } - else if (ttype == TEXTURE_TYPE_CUBE) - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "textureCube(%s, %s.xyz)", - sampler, dst); - } - else if (ttype == TEXTURE_TYPE_VOLUME) - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "texture3D(%s, %s.xyz)", - sampler, dst); - } - else - { - fail(ctx, "unexpected texture type"); - } // else - output_line(ctx, "%s", code); - } // if - - else if (!shader_version_atleast(ctx, 2, 0)) - { - // ps_1_4 is different, too! - fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME - return; - } // else if - - else - { - const SourceArgInfo *samp_arg = &ctx->source_args[1]; - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, - samp_arg->regnum); - const char *funcname = NULL; - char src0[64] = { '\0' }; - char src1[64]; get_GLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? - char src2[64] = { '\0' }; - char src3[64] = { '\0' }; - - if (sreg == NULL) - { - fail(ctx, "TEXLD using undeclared sampler"); - return; - } // if - - if (texldd) - { - if (sreg->index == TEXTURE_TYPE_2D) - { - make_GLSL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2)); - make_GLSL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3)); - } // if - else - { - assert((sreg->index == TEXTURE_TYPE_CUBE) || (sreg->index == TEXTURE_TYPE_VOLUME)); - make_GLSL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2)); - make_GLSL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3)); - } // else - } // if - - // !!! FIXME: can TEXLDD set instruction_controls? - // !!! FIXME: does the d3d bias value map directly to GLSL? - const char *biassep = ""; - char bias[64] = { '\0' }; - if (ctx->instruction_controls == CONTROL_TEXLDB) - { - biassep = ", "; - make_GLSL_srcarg_string_w(ctx, 0, bias, sizeof (bias)); - } // if - - switch ((const TextureType) sreg->index) - { - case TEXTURE_TYPE_2D: - if (ctx->instruction_controls == CONTROL_TEXLDP) - { - funcname = "texture2DProj"; - make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - } // if - else // texld/texldb - { - funcname = "texture2D"; - make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); - } // else - break; - case TEXTURE_TYPE_CUBE: - if (ctx->instruction_controls == CONTROL_TEXLDP) - fail(ctx, "TEXLDP on a cubemap"); // !!! FIXME: is this legal? - funcname = "textureCube"; - make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - break; - case TEXTURE_TYPE_VOLUME: - if (ctx->instruction_controls == CONTROL_TEXLDP) - { - funcname = "texture3DProj"; - make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - } // if - else // texld/texldb - { - funcname = "texture3D"; - make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - } // else - break; - default: - fail(ctx, "unknown texture type"); - return; - } // switch - - assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); - char swiz_str[6] = { '\0' }; - make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str), - samp_arg->swizzle, ctx->dest_arg.writemask); - - char code[128]; - if (texldd) - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "%sGrad(%s, %s, %s, %s)%s", funcname, - src1, src0, src2, src3, swiz_str); - } // if - else - { - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "%s(%s, %s%s%s)%s", funcname, - src1, src0, biassep, bias, swiz_str); - } // else - - output_line(ctx, "%s", code); - } // else -} // glsl_texld - -static void emit_GLSL_TEXLD(Context *ctx) -{ - glsl_texld(ctx, 0); -} // emit_GLSL_TEXLD - - -static void emit_GLSL_TEXBEM(Context *ctx) -{ - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src)); - char sampler[64]; - char code[512]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," - " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))", - sampler, - dst, sampler, src, sampler, src, - dst, sampler, src, sampler, src); - - output_line(ctx, "%s", code); -} // emit_GLSL_TEXBEM - - -static void emit_GLSL_TEXBEML(Context *ctx) -{ - // !!! FIXME: this code counts on the register not having swizzles, etc. - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src)); - char sampler[64]; - char code[512]; - - get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "(texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," - " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *" - " ((%s.z * %s_texbeml.x) + %s_texbem.y)", - sampler, - dst, sampler, src, sampler, src, - dst, sampler, src, sampler, src, - src, sampler, sampler); - - output_line(ctx, "%s", code); -} // emit_GLSL_TEXBEML - -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME - - -static void emit_GLSL_TEXM3X2PAD(Context *ctx) -{ - // no-op ... work happens in emit_GLSL_TEXM3X2TEX(). -} // emit_GLSL_TEXM3X2PAD - -static void emit_GLSL_TEXM3X2TEX(Context *ctx) -{ - if (ctx->texm3x2pad_src0 == -1) - return; - - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char sampler[64]; - char code[512]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0, - src0, sizeof (src0)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0, - src1, sizeof (src1)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src2, sizeof (src2)); - get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "texture2D(%s, vec2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))", - sampler, src0, src1, src2, dst); - - output_line(ctx, "%s", code); -} // emit_GLSL_TEXM3X2TEX - -static void emit_GLSL_TEXM3X3PAD(Context *ctx) -{ - // no-op ... work happens in emit_GLSL_TEXM3X3*(). -} // emit_GLSL_TEXM3X3PAD - -static void emit_GLSL_TEXM3X3TEX(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char sampler[64]; - char code[512]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, - info->regnum); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; - - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "texture%s(%s," - " vec3(dot(%s.xyz, %s.xyz)," - " dot(%s.xyz, %s.xyz)," - " dot(%s.xyz, %s.xyz)))", - ttypestr, sampler, src0, src1, src2, src3, dst, src4); - - output_line(ctx, "%s", code); -} // emit_GLSL_TEXM3X3TEX - -static void emit_GLSL_TEXM3X3SPEC_helper(Context *ctx) -{ - if (ctx->glsl_generated_texm3x3spec_helper) - return; - - ctx->glsl_generated_texm3x3spec_helper = 1; - - push_output(ctx, &ctx->helpers); - output_line(ctx, "vec3 TEXM3X3SPEC_reflection(const vec3 normal, const vec3 eyeray)"); - output_line(ctx, "{"); ctx->indent++; - output_line(ctx, "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--; - output_line(ctx, "}"); - output_blank_line(ctx); - pop_output(ctx); -} // emit_GLSL_TEXM3X3SPEC_helper - -static void emit_GLSL_TEXM3X3SPEC(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char src5[64]; - char sampler[64]; - char code[512]; - - emit_GLSL_TEXM3X3SPEC_helper(ctx); - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum, - src5, sizeof (src5)); - get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, - info->regnum); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; - - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "texture%s(%s, " - "TEXM3X3SPEC_reflection(" - "vec3(" - "dot(%s.xyz, %s.xyz), " - "dot(%s.xyz, %s.xyz), " - "dot(%s.xyz, %s.xyz)" - ")," - "%s.xyz," - ")" - ")", - ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5); - - output_line(ctx, "%s", code); -} // emit_GLSL_TEXM3X3SPEC - -static void emit_GLSL_TEXM3X3VSPEC(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char sampler[64]; - char code[512]; - - emit_GLSL_TEXM3X3SPEC_helper(ctx); - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, - info->regnum); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; - - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "texture%s(%s, " - "TEXM3X3SPEC_reflection(" - "vec3(" - "dot(%s.xyz, %s.xyz), " - "dot(%s.xyz, %s.xyz), " - "dot(%s.xyz, %s.xyz)" - "), " - "vec3(%s.w, %s.w, %s.w)" - ")" - ")", - ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst); - - output_line(ctx, "%s", code); -} // emit_GLSL_TEXM3X3VSPEC - -static void emit_GLSL_EXPP(Context *ctx) -{ - // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation. - emit_GLSL_EXP(ctx); // I guess this is just partial precision EXP? -} // emit_GLSL_EXPP - -static void emit_GLSL_LOGP(Context *ctx) -{ - // LOGP is just low-precision LOG, but we'll take the higher precision. - emit_GLSL_LOG(ctx); -} // emit_GLSL_LOGP - -// common code between CMP and CND. -static void emit_GLSL_comparison_operations(Context *ctx, const char *cmp) -{ - int i, j; - DestArgInfo *dst = &ctx->dest_arg; - const SourceArgInfo *srcarg0 = &ctx->source_args[0]; - const int origmask = dst->writemask; - int used_swiz[4] = { 0, 0, 0, 0 }; - const int writemask[4] = { dst->writemask0, dst->writemask1, - dst->writemask2, dst->writemask3 }; - const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y, - srcarg0->swizzle_z, srcarg0->swizzle_w }; - - for (i = 0; i < 4; i++) - { - int mask = (1 << i); - - if (!writemask[i]) continue; - if (used_swiz[i]) continue; - - // This is a swizzle we haven't checked yet. - used_swiz[i] = 1; - - // see if there are any other elements swizzled to match (.yyyy) - for (j = i + 1; j < 4; j++) - { - if (!writemask[j]) continue; - if (src0swiz[i] != src0swiz[j]) continue; - mask |= (1 << j); - used_swiz[j] = 1; - } // for - - // okay, (mask) should be the writemask of swizzles we like. - - //return make_GLSL_srcarg_string(ctx, idx, (1 << 0)); - - char src0[64]; - char src1[64]; - char src2[64]; - make_GLSL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0)); - make_GLSL_srcarg_string(ctx, 1, mask, src1, sizeof (src1)); - make_GLSL_srcarg_string(ctx, 2, mask, src2, sizeof (src2)); - - set_dstarg_writemask(dst, mask); - - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "((%s %s) ? %s : %s)", - src0, cmp, src1, src2); - output_line(ctx, "%s", code); - } // for - - set_dstarg_writemask(dst, origmask); -} // emit_GLSL_comparison_operations - -static void emit_GLSL_CND(Context *ctx) -{ - emit_GLSL_comparison_operations(ctx, "> 0.5"); -} // emit_GLSL_CND - -static void emit_GLSL_DEF(Context *ctx) -{ - const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? - char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname)); - char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1); - char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1); - char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1); - char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1); - - push_output(ctx, &ctx->globals); - output_line(ctx, "const vec4 %s = vec4(%s, %s, %s, %s);", - varname, val0, val1, val2, val3); - pop_output(ctx); -} // emit_GLSL_DEF - -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME - -static void emit_GLSL_TEXM3X3(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char code[512]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); - - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "vec4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)", - src0, src1, src2, src3, dst, src4); - - output_line(ctx, "%s", code); -} // emit_GLSL_TEXM3X3 - -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME - -static void emit_GLSL_CMP(Context *ctx) -{ - emit_GLSL_comparison_operations(ctx, ">= 0.0"); -} // emit_GLSL_CMP - -EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME - -static void emit_GLSL_DP2ADD(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_GLSL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2)); - char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2); - emit_GLSL_dotprod(ctx, src0, src1, extra); -} // emit_GLSL_DP2ADD - -static void emit_GLSL_DSX(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdx(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_DSX - -static void emit_GLSL_DSY(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdy(%s)", src0); - output_line(ctx, "%s", code); -} // emit_GLSL_DSY - -static void emit_GLSL_TEXLDD(Context *ctx) -{ - // !!! FIXME: - // GLSL 1.30 introduced textureGrad() for this, but it looks like the - // functions are overloaded instead of texture2DGrad() (etc). - - // GL_shader_texture_lod and GL_EXT_gpu_shader4 added texture2DGrad*(), - // so we'll use them if available. Failing that, we'll just fallback - // to a regular texture2D call and hope the mipmap it chooses is close - // enough. - if (!ctx->glsl_generated_texldd_setup) - { - ctx->glsl_generated_texldd_setup = 1; - push_output(ctx, &ctx->preflight); - output_line(ctx, "#if GL_ARB_shader_texture_lod"); - output_line(ctx, "#extension GL_ARB_shader_texture_lod : enable"); - output_line(ctx, "#define texture2DGrad texture2DGradARB"); - output_line(ctx, "#define texture2DProjGrad texture2DProjARB"); - output_line(ctx, "#elif GL_EXT_gpu_shader4"); - output_line(ctx, "#extension GL_EXT_gpu_shader4 : enable"); - output_line(ctx, "#else"); - output_line(ctx, "#define texture2DGrad(a,b,c,d) texture2D(a,b)"); - output_line(ctx, "#define texture2DProjGrad(a,b,c,d) texture2DProj(a,b)"); - output_line(ctx, "#endif"); - output_blank_line(ctx); - pop_output(ctx); - } // if - - glsl_texld(ctx, 1); -} // emit_GLSL_TEXLDD - -static void emit_GLSL_SETP(Context *ctx) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - - // destination is always predicate register (which is type bvec4). - if (vecsize == 1) - { - const char *comp = get_GLSL_comparison_string_scalar(ctx); - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "(%s %s %s)", src0, comp, src1); - } // if - else - { - const char *comp = get_GLSL_comparison_string_vector(ctx); - make_GLSL_destarg_assign(ctx, code, sizeof (code), - "%s(%s, %s)", comp, src0, src1); - } // else - - output_line(ctx, "%s", code); -} // emit_GLSL_SETP - -static void emit_GLSL_TEXLDL(Context *ctx) -{ - // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins - // !!! FIXME: from fragment shaders for some inexplicable reason. - // !!! FIXME: For now, you'll just have to suffer with the potentially - // !!! FIXME: wrong mipmap until I can figure something out. - emit_GLSL_TEXLD(ctx); -} // emit_GLSL_TEXLDL - -static void emit_GLSL_BREAKP(Context *ctx) -{ - char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - output_line(ctx, "if (%s) { break; }", src0); -} // emit_GLSL_BREAKP - -static void emit_GLSL_RESERVED(Context *ctx) -{ - // do nothing; fails in the state machine. -} // emit_GLSL_RESERVED - -#endif // SUPPORT_PROFILE_GLSL - - -// !!! FIXME: A lot of this is cut-and-paste from the GLSL version. -#if !SUPPORT_PROFILE_METAL -#define PROFILE_EMITTER_METAL(op) -#else -#undef AT_LEAST_ONE_PROFILE -#define AT_LEAST_ONE_PROFILE 1 -#define PROFILE_EMITTER_METAL(op) emit_METAL_##op, - -#define EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(op) \ - static void emit_METAL_##op(Context *ctx) { \ - fail(ctx, #op " unimplemented in Metal profile"); \ - } - -static inline const char *get_METAL_register_string(Context *ctx, - const RegisterType regtype, const int regnum, - char *regnum_str, const size_t regnum_size) -{ - // turns out these are identical at the moment. - return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size); -} // get_METAL_register_string - -static const char *get_METAL_uniform_type(Context *ctx, const RegisterType rtype) -{ - switch (rtype) - { - case REG_TYPE_CONST: return "float4"; - case REG_TYPE_CONSTINT: return "int4"; - case REG_TYPE_CONSTBOOL: return "bool"; - default: fail(ctx, "BUG: used a uniform we don't know how to define."); - } // switch - - return NULL; -} // get_METAL_uniform_type - -static const char *get_METAL_varname_in_buf(Context *ctx, RegisterType rt, - int regnum, char *buf, - const size_t len) -{ - char regnum_str[16]; - const char *regtype_str = get_METAL_register_string(ctx, rt, regnum, - regnum_str, sizeof (regnum_str)); - - // We don't separate vars with vs_ or ps_ here, because, for the most part, - // there are only local vars in Metal shaders. - snprintf(buf, len, "%s%s", regtype_str, regnum_str); - return buf; -} // get_METAL_varname_in_buf - - -static const char *get_METAL_varname(Context *ctx, RegisterType rt, int regnum) -{ - char buf[64]; - get_METAL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf)); - return StrDup(ctx, buf); -} // get_METAL_varname - - -static inline const char *get_METAL_const_array_varname_in_buf(Context *ctx, - const int base, const int size, - char *buf, const size_t buflen) -{ - snprintf(buf, buflen, "const_array_%d_%d", base, size); - return buf; -} // get_METAL_const_array_varname_in_buf - -static const char *get_METAL_const_array_varname(Context *ctx, int base, int size) -{ - char buf[64]; - get_METAL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf)); - return StrDup(ctx, buf); -} // get_METAL_const_array_varname - - -static inline const char *get_METAL_input_array_varname(Context *ctx, - char *buf, const size_t buflen) -{ - snprintf(buf, buflen, "%s", "vertex_input_array"); - return buf; -} // get_METAL_input_array_varname - - -static const char *get_METAL_uniform_array_varname(Context *ctx, - const RegisterType regtype, - char *buf, const size_t len) -{ - const char *shadertype = ctx->shader_type_str; - const char *type = get_METAL_uniform_type(ctx, regtype); - snprintf(buf, len, "uniforms.uniforms_%s", type); - return buf; -} // get_METAL_uniform_array_varname - -static const char *get_METAL_destarg_varname(Context *ctx, char *buf, size_t len) -{ - const DestArgInfo *arg = &ctx->dest_arg; - return get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); -} // get_METAL_destarg_varname - -static const char *get_METAL_srcarg_varname(Context *ctx, const size_t idx, - char *buf, size_t len) -{ - if (idx >= STATICARRAYLEN(ctx->source_args)) - { - fail(ctx, "Too many source args"); - *buf = '\0'; - return buf; - } // if - - const SourceArgInfo *arg = &ctx->source_args[idx]; - return get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); -} // get_METAL_srcarg_varname - - -static const char *make_METAL_destarg_assign(Context *, char *, const size_t, - const char *, ...) ISPRINTF(4,5); - -static const char *make_METAL_destarg_assign(Context *ctx, char *buf, - const size_t buflen, - const char *fmt, ...) -{ - int need_parens = 0; - const DestArgInfo *arg = &ctx->dest_arg; - - if (arg->writemask == 0) - { - *buf = '\0'; - return buf; // no writemask? It's a no-op. - } // if - - char clampbuf[32] = { '\0' }; - const char *clampleft = ""; - const char *clampright = ""; - if (arg->result_mod & MOD_SATURATE) - { - ctx->metal_need_header_common = 1; - const int vecsize = vecsize_from_writemask(arg->writemask); - clampleft = "clamp("; - if (vecsize == 1) - clampright = ", 0.0, 1.0)"; - else - { - snprintf(clampbuf, sizeof (clampbuf), - ", float%d(0.0), float%d(1.0))", vecsize, vecsize); - clampright = clampbuf; - } // else - } // if - - // MSDN says MOD_PP is a hint and many implementations ignore it. So do we. - - // CENTROID only allowed in DCL opcodes, which shouldn't come through here. - assert((arg->result_mod & MOD_CENTROID) == 0); - - if (ctx->predicated) - { - fail(ctx, "predicated destinations unsupported"); // !!! FIXME - *buf = '\0'; - return buf; - } // if - - char operation[256]; - va_list ap; - va_start(ap, fmt); - const int len = vsnprintf(operation, sizeof (operation), fmt, ap); - va_end(ap); - if (len >= sizeof (operation)) - { - fail(ctx, "operation string too large"); // I'm lazy. :P - *buf = '\0'; - return buf; - } // if - - const char *result_shift_str = ""; - switch (arg->result_shift) - { - case 0x1: result_shift_str = " * 2.0"; break; - case 0x2: result_shift_str = " * 4.0"; break; - case 0x3: result_shift_str = " * 8.0"; break; - case 0xD: result_shift_str = " / 8.0"; break; - case 0xE: result_shift_str = " / 4.0"; break; - case 0xF: result_shift_str = " / 2.0"; break; - } // switch - need_parens |= (result_shift_str[0] != '\0'); - - char regnum_str[16]; - const char *regtype_str = get_METAL_register_string(ctx, arg->regtype, - arg->regnum, regnum_str, - sizeof (regnum_str)); - char writemask_str[6]; - size_t i = 0; - const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); - if (!scalar && !writemask_xyzw(arg->writemask)) - { - writemask_str[i++] = '.'; - if (arg->writemask0) writemask_str[i++] = 'x'; - if (arg->writemask1) writemask_str[i++] = 'y'; - if (arg->writemask2) writemask_str[i++] = 'z'; - if (arg->writemask3) writemask_str[i++] = 'w'; - } // if - writemask_str[i] = '\0'; - assert(i < sizeof (writemask_str)); - - const char *leftparen = (need_parens) ? "(" : ""; - const char *rightparen = (need_parens) ? ")" : ""; - - snprintf(buf, buflen, "%s%s%s = %s%s%s%s%s%s;", - regtype_str, regnum_str, writemask_str, - clampleft, leftparen, operation, rightparen, result_shift_str, - clampright); - // !!! FIXME: make sure the scratch buffer was large enough. - return buf; -} // make_METAL_destarg_assign - - -static char *make_METAL_swizzle_string(char *swiz_str, const size_t strsize, - const int swizzle, const int writemask) -{ - size_t i = 0; - if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) ) - { - const int writemask0 = (writemask >> 0) & 0x1; - const int writemask1 = (writemask >> 1) & 0x1; - const int writemask2 = (writemask >> 2) & 0x1; - const int writemask3 = (writemask >> 3) & 0x1; - - const int swizzle_x = (swizzle >> 0) & 0x3; - const int swizzle_y = (swizzle >> 2) & 0x3; - const int swizzle_z = (swizzle >> 4) & 0x3; - const int swizzle_w = (swizzle >> 6) & 0x3; - - swiz_str[i++] = '.'; - if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x]; - if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y]; - if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z]; - if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w]; - } // if - assert(i < strsize); - swiz_str[i] = '\0'; - return swiz_str; -} // make_METAL_swizzle_string - - -static const char *make_METAL_srcarg_string(Context *ctx, const size_t idx, - const int writemask, char *buf, - const size_t buflen) -{ - *buf = '\0'; - - if (idx >= STATICARRAYLEN(ctx->source_args)) - { - fail(ctx, "Too many source args"); - return buf; - } // if - - const SourceArgInfo *arg = &ctx->source_args[idx]; - - const char *premod_str = ""; - const char *postmod_str = ""; - switch (arg->src_mod) - { - case SRCMOD_NEGATE: - premod_str = "-"; - break; - - case SRCMOD_BIASNEGATE: - premod_str = "-("; - postmod_str = " - 0.5)"; - break; - - case SRCMOD_BIAS: - premod_str = "("; - postmod_str = " - 0.5)"; - break; - - case SRCMOD_SIGNNEGATE: - premod_str = "-(("; - postmod_str = " - 0.5) * 2.0)"; - break; - - case SRCMOD_SIGN: - premod_str = "(("; - postmod_str = " - 0.5) * 2.0)"; - break; - - case SRCMOD_COMPLEMENT: - premod_str = "(1.0 - "; - postmod_str = ")"; - break; - - case SRCMOD_X2NEGATE: - premod_str = "-("; - postmod_str = " * 2.0)"; - break; - - case SRCMOD_X2: - premod_str = "("; - postmod_str = " * 2.0)"; - break; - - case SRCMOD_DZ: - fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME - postmod_str = "_dz"; - break; - - case SRCMOD_DW: - fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME - postmod_str = "_dw"; - break; - - case SRCMOD_ABSNEGATE: - ctx->metal_need_header_math = 1; - premod_str = "-abs("; - postmod_str = ")"; - break; - - case SRCMOD_ABS: - ctx->metal_need_header_math = 1; - premod_str = "abs("; - postmod_str = ")"; - break; - - case SRCMOD_NOT: - premod_str = "!"; - break; - - case SRCMOD_NONE: - case SRCMOD_TOTAL: - break; // stop compiler whining. - } // switch - - const char *regtype_str = NULL; - - if (!arg->relative) - { - regtype_str = get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum, - (char *) alloca(64), 64); - } // if - - const char *rel_lbracket = ""; - char rel_offset[32] = { '\0' }; - const char *rel_rbracket = ""; - char rel_swizzle[4] = { '\0' }; - const char *rel_regtype_str = ""; - if (arg->relative) - { - if (arg->regtype == REG_TYPE_INPUT) - regtype_str=get_METAL_input_array_varname(ctx,(char*)alloca(64),64); - else - { - assert(arg->regtype == REG_TYPE_CONST); - const int arrayidx = arg->relative_array->index; - const int offset = arg->regnum - arrayidx; - assert(offset >= 0); - if (arg->relative_array->constant) - { - const int arraysize = arg->relative_array->count; - regtype_str = get_METAL_const_array_varname_in_buf(ctx, - arrayidx, arraysize, (char *) alloca(64), 64); - if (offset != 0) - snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset); - } // if - else - { - regtype_str = get_METAL_uniform_array_varname(ctx, arg->regtype, - (char *) alloca(64), 64); - if (offset == 0) - { - snprintf(rel_offset, sizeof (rel_offset), - "ARRAYBASE_%d + ", arrayidx); - } // if - else - { - snprintf(rel_offset, sizeof (rel_offset), - "(ARRAYBASE_%d + %d) + ", arrayidx, offset); - } // else - } // else - } // else - - rel_lbracket = "["; - - rel_regtype_str = get_METAL_varname_in_buf(ctx, arg->relative_regtype, - arg->relative_regnum, - (char *) alloca(64), 64); - rel_swizzle[0] = '.'; - rel_swizzle[1] = swizzle_channels[arg->relative_component]; - rel_swizzle[2] = '\0'; - rel_rbracket = "]"; - } // if - - char swiz_str[6] = { '\0' }; - if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum)) - { - make_METAL_swizzle_string(swiz_str, sizeof (swiz_str), - arg->swizzle, writemask); - } // if - - if (regtype_str == NULL) - { - fail(ctx, "Unknown source register type."); - return buf; - } // if - - snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s", - premod_str, regtype_str, rel_lbracket, rel_offset, - rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str, - postmod_str); - // !!! FIXME: make sure the scratch buffer was large enough. - return buf; -} // make_METAL_srcarg_string - -// generate some convenience functions. -#define MAKE_METAL_SRCARG_STRING_(mask, bitmask) \ - static inline const char *make_METAL_srcarg_string_##mask(Context *ctx, \ - const size_t idx, char *buf, \ - const size_t buflen) { \ - return make_METAL_srcarg_string(ctx, idx, bitmask, buf, buflen); \ - } -MAKE_METAL_SRCARG_STRING_(x, (1 << 0)) -MAKE_METAL_SRCARG_STRING_(y, (1 << 1)) -MAKE_METAL_SRCARG_STRING_(z, (1 << 2)) -MAKE_METAL_SRCARG_STRING_(w, (1 << 3)) -MAKE_METAL_SRCARG_STRING_(scalar, (1 << 0)) -MAKE_METAL_SRCARG_STRING_(full, 0xF) -MAKE_METAL_SRCARG_STRING_(masked, ctx->dest_arg.writemask) -MAKE_METAL_SRCARG_STRING_(vec3, 0x7) -MAKE_METAL_SRCARG_STRING_(vec2, 0x3) -#undef MAKE_METAL_SRCARG_STRING_ - -// special cases for comparison opcodes... - -static const char *get_METAL_comparison_string_scalar(Context *ctx) -{ - static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" }; - if (ctx->instruction_controls >= STATICARRAYLEN(comps)) - { - fail(ctx, "unknown comparison control"); - return ""; - } // if - - return comps[ctx->instruction_controls]; -} // get_METAL_comparison_string_scalar - -static const char *get_METAL_comparison_string_vector(Context *ctx) -{ - return get_METAL_comparison_string_scalar(ctx); // standard C operators work for vectors in Metal. -} // get_METAL_comparison_string_vector - - -static void emit_METAL_start(Context *ctx, const char *profilestr) -{ - if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx)) - { - failf(ctx, "Shader type %u unsupported in this profile.", - (uint) ctx->shader_type); - return; - } // if - - if (!ctx->mainfn) - { - if (shader_is_vertex(ctx)) - ctx->mainfn = StrDup(ctx, "VertexShader"); - else if (shader_is_pixel(ctx)) - ctx->mainfn = StrDup(ctx, "FragmentShader"); - } // if - - set_output(ctx, &ctx->mainline); - ctx->indent++; -} // emit_METAL_start - -static void emit_METAL_RET(Context *ctx); -static void emit_METAL_end(Context *ctx) -{ - // !!! FIXME: maybe handle this at a higher level? - // ps_1_* writes color to r0 instead oC0. We move it to the right place. - // We don't have to worry about a RET opcode messing this up, since - // RET isn't available before ps_2_0. - if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) - { - set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1); - output_line(ctx, "oC0 = r0;"); - } // if - - // !!! FIXME: maybe handle this at a higher level? - // force a RET opcode if we're at the end of the stream without one. - if (ctx->previous_opcode != OPCODE_RET) - emit_METAL_RET(ctx); -} // emit_METAL_end - -static void emit_METAL_phase(Context *ctx) -{ - // no-op in Metal. -} // emit_METAL_phase - -static void emit_METAL_finalize(Context *ctx) -{ - // If we had a relative addressing of REG_TYPE_INPUT, we need to build - // an array for it at the start of main(). GLSL doesn't let you specify - // arrays of attributes. - //float4 blah_array[BIGGEST_ARRAY]; - if (ctx->have_relative_input_registers) // !!! FIXME - fail(ctx, "Relative addressing of input registers not supported."); - - // Insert header includes we need... - push_output(ctx, &ctx->preflight); - #define INC_METAL_HEADER(name) \ - if (ctx->metal_need_header_##name) { \ - output_line(ctx, "#include "); \ - } - INC_METAL_HEADER(common); - INC_METAL_HEADER(math); - INC_METAL_HEADER(relational); - INC_METAL_HEADER(geometric); - INC_METAL_HEADER(graphics); - INC_METAL_HEADER(texture); - #undef INC_METAL_HEADER - output_blank_line(ctx); - output_line(ctx, "using namespace metal;"); - output_blank_line(ctx); - pop_output(ctx); - - // Fill in the shader's mainline function signature. - push_output(ctx, &ctx->mainline_intro); - output_line(ctx, "%s %s%s %s (", - shader_is_vertex(ctx) ? "vertex" : "fragment", - ctx->outputs ? ctx->mainfn : "void", - ctx->outputs ? "_Output" : "", ctx->mainfn); - pop_output(ctx); - - push_output(ctx, &ctx->mainline_arguments); - ctx->indent++; - - const int uniform_count = ctx->uniform_float4_count + ctx->uniform_int4_count + ctx->uniform_bool_count; - int commas = 0; - if (uniform_count) commas++; - if (ctx->inputs) commas++; - if (commas) commas--; - - if (uniform_count > 0) - { - push_output(ctx, &ctx->globals); - output_line(ctx, "struct %s_Uniforms", ctx->mainfn); - output_line(ctx, "{"); - ctx->indent++; - if (ctx->uniform_float4_count > 0) - output_line(ctx, "float4 uniforms_float4[%d];", ctx->uniform_float4_count); - if (ctx->uniform_int4_count > 0) - output_line(ctx, "int4 uniforms_int4[%d];", ctx->uniform_int4_count); - if (ctx->uniform_bool_count > 0) - output_line(ctx, "bool uniforms_bool[%d];", ctx->uniform_bool_count); - ctx->indent--; - output_line(ctx, "};"); - pop_output(ctx); - - output_line(ctx, "constant %s_Uniforms &uniforms [[buffer(16)]]%s", ctx->mainfn, commas ? "," : ""); - commas--; - } // if - - if (ctx->inputs) - { - output_line(ctx, "%s_Input input [[stage_in]]%s", ctx->mainfn, commas ? "," : ""); - commas--; - } // if - - ctx->indent--; - output_line(ctx, ") {"); - if (ctx->outputs) - { - ctx->indent++; - output_line(ctx, "%s_Output output;", ctx->mainfn); - - push_output(ctx, &ctx->mainline); - ctx->indent++; - output_line(ctx, "return output;"); - pop_output(ctx); - } // if - pop_output(ctx); - - if (ctx->inputs) - { - push_output(ctx, &ctx->inputs); - output_line(ctx, "};"); - output_blank_line(ctx); - pop_output(ctx); - } // if - - if (ctx->outputs) - { - push_output(ctx, &ctx->outputs); - output_line(ctx, "};"); - output_blank_line(ctx); - pop_output(ctx); - } // if - - // throw some blank lines around to make source more readable. - if (ctx->globals) // don't add a blank line if the section is empty. - { - push_output(ctx, &ctx->globals); - output_blank_line(ctx); - pop_output(ctx); - } // if -} // emit_METAL_finalize - -static void emit_METAL_global(Context *ctx, RegisterType regtype, int regnum) -{ - char varname[64]; - get_METAL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); - - // These aren't actually global in metal, set them up at top of mainline. - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - - switch (regtype) - { - case REG_TYPE_ADDRESS: - if (shader_is_vertex(ctx)) - output_line(ctx, "int4 %s;", varname); - else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE. - { - // We have to map texture registers to temps for ps_1_1, since - // they work like temps, initialize with tex coords, and the - // ps_1_1 TEX opcode expects to overwrite it. - if (!shader_version_atleast(ctx, 1, 4)) - output_line(ctx, "float4 %s = input.%s;",varname,varname); - } // else if - break; - case REG_TYPE_PREDICATE: - output_line(ctx, "bool4 %s;", varname); - break; - case REG_TYPE_TEMP: - output_line(ctx, "float4 %s;", varname); - break; - case REG_TYPE_LOOP: - break; // no-op. We declare these in for loops at the moment. - case REG_TYPE_LABEL: - break; // no-op. If we see it here, it means we optimized it out. - default: - fail(ctx, "BUG: we used a register we don't know how to define."); - break; - } // switch - - pop_output(ctx); -} // emit_METAL_global - -static void emit_METAL_array(Context *ctx, VariableList *var) -{ - // All uniforms (except constant arrays, which are literally constant - // data embedded in Metal shaders) are now packed into a single array, - // so we can batch the uniform transfers. So this doesn't actually - // define an array here; the one, big array is emitted during - // finalization instead. - // However, we need to #define the offset into the one, big array here, - // and let dereferences use that #define. - const int base = var->index; - const int metalbase = ctx->uniform_float4_count; - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - output_line(ctx, "const int ARRAYBASE_%d = %d;", base, metalbase); - pop_output(ctx); - var->emit_position = metalbase; -} // emit_METAL_array - -static void emit_METAL_const_array(Context *ctx, const ConstantsList *clist, - int base, int size) -{ - char varname[64]; - get_METAL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname)); - - const char *cstr = NULL; - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - output_line(ctx, "const float4 %s[%d] = {", varname, size); - ctx->indent++; - - int i; - for (i = 0; i < size; i++) - { - while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) - clist = clist->next; - assert(clist->constant.index == (base + i)); - - char val0[32]; - char val1[32]; - char val2[32]; - char val3[32]; - floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1); - floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1); - floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1); - floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1); - - output_line(ctx, "float4(%s, %s, %s, %s)%s", val0, val1, val2, val3, - (i < (size-1)) ? "," : ""); - - clist = clist->next; - } // for - - ctx->indent--; - output_line(ctx, "};"); - output_line(ctx, "(void) %s[0];", varname); // stop compiler warnings. - pop_output(ctx); -} // emit_METAL_const_array - -static void emit_METAL_uniform(Context *ctx, RegisterType regtype, int regnum, - const VariableList *var) -{ - // Now that we're pushing all the uniforms as one struct, pack these - // down, so if we only use register c439, it'll actually map to - // uniforms.uniforms_float4[0]. As we push one big struct, this will - // prevent uploading unused data. - - const char *utype = get_METAL_uniform_type(ctx, regtype); - char varname[64]; - char name[64]; - int index = 0; - - get_METAL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); - - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - - if (var == NULL) - { - get_METAL_uniform_array_varname(ctx, regtype, name, sizeof (name)); - - if (regtype == REG_TYPE_CONST) - index = ctx->uniform_float4_count; - else if (regtype == REG_TYPE_CONSTINT) - index = ctx->uniform_int4_count; - else if (regtype == REG_TYPE_CONSTBOOL) - index = ctx->uniform_bool_count; - else // get_METAL_uniform_array_varname() would have called fail(). - assert(isfail(ctx)); - - // !!! FIXME: can cause unused var warnings in Clang... - //output_line(ctx, "constant %s &%s = %s[%d];", utype, varname, name, index); - output_line(ctx, "#define %s %s[%d]", varname, name, index); - push_output(ctx, &ctx->mainline); - ctx->indent++; - output_line(ctx, "#undef %s", varname); // !!! FIXME: gross. - pop_output(ctx); - } // if - - else - { - const int arraybase = var->index; - if (var->constant) - { - get_METAL_const_array_varname_in_buf(ctx, arraybase, var->count, - name, sizeof (name)); - index = (regnum - arraybase); - } // if - else - { - assert(var->emit_position != -1); - get_METAL_uniform_array_varname(ctx, regtype, name, sizeof (name)); - index = (regnum - arraybase) + var->emit_position; - } // else - - // !!! FIXME: might trigger unused var warnings in Clang. - //output_line(ctx, "constant %s &%s = %s[%d];", utype, varname, name, index); - output_line(ctx, "#define %s %s[%d];", varname, name, index); - push_output(ctx, &ctx->mainline); - ctx->indent++; - output_line(ctx, "#undef %s", varname); // !!! FIXME: gross. - pop_output(ctx); - } // else - - pop_output(ctx); -} // emit_METAL_uniform - -static void emit_METAL_sampler(Context *ctx,int stage,TextureType ttype,int tb) -{ - char var[64]; - const char *texsuffix = NULL; - switch (ttype) - { - case TEXTURE_TYPE_2D: texsuffix = "2d"; break; - case TEXTURE_TYPE_CUBE: texsuffix = "cube"; break; - case TEXTURE_TYPE_VOLUME: texsuffix = "3d"; break; - default: assert(!"unexpected texture type"); return; - } // switch - - get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var)); - - push_output(ctx, &ctx->mainline_arguments); - ctx->indent++; - output_line(ctx, "texture%s %s_texture [[texture(%d)]],", - texsuffix, var, stage); - output_line(ctx, "sampler %s [[sampler(%d)]],", var, stage); - pop_output(ctx); - - if (tb) // This sampler used a ps_1_1 TEXBEM opcode? - { - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - char name[64]; - const int index = ctx->uniform_float4_count; - ctx->uniform_float4_count += 2; - get_METAL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name)); - output_line(ctx, "constant float4 &%s_texbem = %s[%d];", var, name, index); - output_line(ctx, "constant float4 &%s_texbeml = %s[%d];", var, name, index+1); - pop_output(ctx); - } // if -} // emit_METAL_sampler - -static void emit_METAL_attribute(Context *ctx, RegisterType regtype, int regnum, - MOJOSHADER_usage usage, int index, int wmask, - int flags) -{ - // !!! FIXME: this function doesn't deal with write masks at all yet! - const char *usage_str = NULL; - char index_str[16] = { '\0' }; - char var[64]; - - get_METAL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var)); - - //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed? - - if (index != 0) // !!! FIXME: a lot of these MUST be zero. - snprintf(index_str, sizeof (index_str), "%u", (uint) index); - - if (shader_is_vertex(ctx)) - { - // pre-vs3 output registers. - // these don't ever happen in DCL opcodes, I think. Map to vs_3_* - // output registers. - if (!shader_version_atleast(ctx, 3, 0)) - { - if (regtype == REG_TYPE_RASTOUT) - { - regtype = REG_TYPE_OUTPUT; - index = regnum; - switch ((const RastOutType) regnum) - { - case RASTOUT_TYPE_POSITION: - usage = MOJOSHADER_USAGE_POSITION; - break; - case RASTOUT_TYPE_FOG: - usage = MOJOSHADER_USAGE_FOG; - break; - case RASTOUT_TYPE_POINT_SIZE: - usage = MOJOSHADER_USAGE_POINTSIZE; - break; - } // switch - } // if - - else if (regtype == REG_TYPE_ATTROUT) - { - regtype = REG_TYPE_OUTPUT; - usage = MOJOSHADER_USAGE_COLOR; - index = regnum; - } // else if - - else if (regtype == REG_TYPE_TEXCRDOUT) - { - regtype = REG_TYPE_OUTPUT; - usage = MOJOSHADER_USAGE_TEXCOORD; - index = regnum; - } // else if - } // if - - if (regtype == REG_TYPE_INPUT) - { - push_output(ctx, &ctx->inputs); - if (buffer_size(ctx->inputs) == 0) - { - output_line(ctx, "struct %s_Input", ctx->mainfn); - output_line(ctx, "{"); - } // if - - ctx->indent++; - output_line(ctx, "float4 %s [[attribute(%d)]];", var, regnum); - pop_output(ctx); - - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - // !!! FIXME: might trigger unused var warnings in Clang. - //output_line(ctx, "constant float4 &%s = input.%s;", var, var); - output_line(ctx, "#define %s input.%s", var, var); - pop_output(ctx); - push_output(ctx, &ctx->mainline); - ctx->indent++; - output_line(ctx, "#undef %s", var); // !!! FIXME: gross. - pop_output(ctx); - } // if - - else if (regtype == REG_TYPE_OUTPUT) - { - push_output(ctx, &ctx->outputs); - if (buffer_size(ctx->outputs) == 0) - { - output_line(ctx, "struct %s_Output", ctx->mainfn); - output_line(ctx, "{"); - } // if - - ctx->indent++; - - switch (usage) - { - case MOJOSHADER_USAGE_POSITION: - output_line(ctx, "float4 %s [[position]];", var); - break; - case MOJOSHADER_USAGE_POINTSIZE: - output_line(ctx, "float4 %s [[point_size]];", var); - break; - case MOJOSHADER_USAGE_COLOR: - output_line(ctx, "float4 %s [[user(color%d)]];", var, index); - break; - case MOJOSHADER_USAGE_FOG: - output_line(ctx, "float4 %s [[user(fog)]];", var); - break; - case MOJOSHADER_USAGE_TEXCOORD: - output_line(ctx, "float4 %s [[user(texcoord%d)]];", var, index); - break; - default: - // !!! FIXME: we need to deal with some more built-in varyings here. - break; - } // switch - - pop_output(ctx); - - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - // !!! FIXME: this doesn't work. - //output_line(ctx, "float4 &%s = output.%s;", var, var); - output_line(ctx, "#define %s output.%s", var, var); - pop_output(ctx); - push_output(ctx, &ctx->mainline); - ctx->indent++; - output_line(ctx, "#undef %s", var); // !!! FIXME: gross. - pop_output(ctx); - } // else if - - else - { - fail(ctx, "unknown vertex shader attribute register"); - } // else - } // if - - else if (shader_is_pixel(ctx)) - { - // samplers DCLs get handled in emit_METAL_sampler(). - - if (flags & MOD_CENTROID) // !!! FIXME - { - failf(ctx, "centroid unsupported in %s profile", ctx->profile->name); - return; - } // if - - if ((regtype == REG_TYPE_COLOROUT) || (regtype == REG_TYPE_DEPTHOUT)) - { - push_output(ctx, &ctx->outputs); - if (buffer_size(ctx->outputs) == 0) - { - output_line(ctx, "struct %s_Output", ctx->mainfn); - output_line(ctx, "{"); - } // if - ctx->indent++; - - if (regtype == REG_TYPE_COLOROUT) - output_line(ctx, "float4 %s [[color(%d)]];", var, regnum); - else if (regtype == REG_TYPE_DEPTHOUT) - output_line(ctx, "float %s [[depth(any)]];", var); - - pop_output(ctx); - - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - // !!! FIXME: this doesn't work. - //output_line(ctx, "float%s &%s = output.%s;", (regtype == REG_TYPE_DEPTHOUT) ? "" : "4", var, var); - output_line(ctx, "#define %s output.%s", var, var); - pop_output(ctx); - push_output(ctx, &ctx->mainline); - ctx->indent++; - output_line(ctx, "#undef %s", var); // !!! FIXME: gross. - pop_output(ctx); - } // if - - // !!! FIXME: can you actualy have a texture register with COLOR usage? - else if ((regtype == REG_TYPE_TEXTURE) || - (regtype == REG_TYPE_INPUT) || - (regtype == REG_TYPE_MISCTYPE)) - { - int skipreference = 0; - push_output(ctx, &ctx->inputs); - if (buffer_size(ctx->inputs) == 0) - { - output_line(ctx, "struct %s_Input", ctx->mainfn); - output_line(ctx, "{"); - } // if - ctx->indent++; - - if (regtype == REG_TYPE_MISCTYPE) - { - const MiscTypeType mt = (MiscTypeType) regnum; - if (mt == MISCTYPE_TYPE_FACE) - output_line(ctx, "bool %s [[front_facing]];", var); - else if (mt == MISCTYPE_TYPE_POSITION) - output_line(ctx, "float4 %s [[position]];", var); - else - fail(ctx, "BUG: unhandled misc register"); - } // else if - - else - { - if (usage == MOJOSHADER_USAGE_TEXCOORD) - { - // ps_1_1 does a different hack for this attribute. - // Refer to emit_METAL_global()'s REG_TYPE_ADDRESS code. - if (!shader_version_atleast(ctx, 1, 4)) - skipreference = 1; - output_line(ctx, "float4 %s [[user(texcoord%d)]];", var, index); - } // if - - else if (usage == MOJOSHADER_USAGE_COLOR) - output_line(ctx, "float4 %s [[user(color%d)]];", var, index); - - else if (usage == MOJOSHADER_USAGE_FOG) - output_line(ctx, "float4 %s [[user(fog)]];", var); - } // else - - pop_output(ctx); - - // !!! FIXME: can cause unused var warnings in Clang... - #if 0 - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - if ((regtype == REG_TYPE_MISCTYPE)&&(regnum == MISCTYPE_TYPE_FACE)) - output_line(ctx, "constant bool &%s = input.%s;", var, var); - else if (!skipreference) - output_line(ctx, "constant float4 &%s = input.%s;", var, var); - pop_output(ctx); - #endif - - if (!skipreference) - { - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - output_line(ctx, "#define %s input.%s", var, var); - pop_output(ctx); - push_output(ctx, &ctx->mainline); - ctx->indent++; - output_line(ctx, "#undef %s", var); // !!! FIXME: gross. - pop_output(ctx); - } // if - } // else if - - else - { - fail(ctx, "unknown pixel shader attribute register"); - } // else - } // else if - - else - { - fail(ctx, "Unknown shader type"); // state machine should catch this. - } // else -} // emit_METAL_attribute - -static void emit_METAL_NOP(Context *ctx) -{ - // no-op is a no-op. :) -} // emit_METAL_NOP - -static void emit_METAL_MOV(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_METAL_destarg_assign(ctx, code, sizeof (code), "%s", src0); - output_line(ctx, "%s", code); -} // emit_METAL_MOV - -static void emit_METAL_ADD(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_METAL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1); - output_line(ctx, "%s", code); -} // emit_METAL_ADD - -static void emit_METAL_SUB(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_METAL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1); - output_line(ctx, "%s", code); -} // emit_METAL_SUB - -static void emit_METAL_MAD(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_METAL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); - char code[128]; - make_METAL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2); - output_line(ctx, "%s", code); -} // emit_METAL_MAD - -static void emit_METAL_MUL(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - make_METAL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1); - output_line(ctx, "%s", code); -} // emit_METAL_MUL - -static void emit_METAL_RCP(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - make_METAL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0); - output_line(ctx, "%s", code); -} // emit_METAL_RCP - -static void emit_METAL_RSQ(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_math = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "rsqrt(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_RSQ - -static void emit_METAL_dotprod(Context *ctx, const char *src0, const char *src1, - const char *extra) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char castleft[16] = { '\0' }; - const char *castright = ""; - if (vecsize != 1) - { - snprintf(castleft, sizeof (castleft), "float%d(", vecsize); - castright = ")"; - } // if - - char code[128]; - ctx->metal_need_header_geometric = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s", - castleft, src0, src1, extra, castright); - output_line(ctx, "%s", code); -} // emit_METAL_dotprod - -static void emit_METAL_DP3(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); - emit_METAL_dotprod(ctx, src0, src1, ""); -} // emit_METAL_DP3 - -static void emit_METAL_DP4(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_full(ctx, 1, src1, sizeof (src1)); - emit_METAL_dotprod(ctx, src0, src1, ""); -} // emit_METAL_DP4 - -static void emit_METAL_MIN(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - ctx->metal_need_header_math = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1); - output_line(ctx, "%s", code); -} // emit_METAL_MIN - -static void emit_METAL_MAX(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - ctx->metal_need_header_math = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1); - output_line(ctx, "%s", code); -} // emit_METAL_MAX - -static void emit_METAL_SLT(Context *ctx) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - - // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants. - if (vecsize == 1) - make_METAL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1); - else - { - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float%d(%s < %s)", vecsize, src0, src1); - } // else - output_line(ctx, "%s", code); -} // emit_METAL_SLT - -static void emit_METAL_SGE(Context *ctx) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - - // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants. - if (vecsize == 1) - { - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float(%s >= %s)", src0, src1); - } // if - else - { - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float%d(%s >= %s)", vecsize, src0, src1); - } // else - output_line(ctx, "%s", code); -} // emit_METAL_SGE - -static void emit_METAL_EXP(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_math = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_EXP - -static void emit_METAL_LOG(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_math = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_LOG - -static void emit_METAL_LIT_helper(Context *ctx) -{ - const char *maxp = "127.9961"; // value from the dx9 reference. - - if (ctx->glsl_generated_lit_helper) - return; - - ctx->glsl_generated_lit_helper = 1; - ctx->metal_need_header_common = 1; - ctx->metal_need_header_math = 1; - - push_output(ctx, &ctx->helpers); - output_line(ctx, "static float4 LIT(const float4 src)"); - output_line(ctx, "{"); ctx->indent++; - output_line(ctx, "const float power = clamp(src.w, -%s, %s);",maxp,maxp); - output_line(ctx, "float4 retval = float4(1.0, 0.0, 0.0, 1.0);"); - output_line(ctx, "if (src.x > 0.0) {"); ctx->indent++; - output_line(ctx, "retval.y = src.x;"); - output_line(ctx, "if (src.y > 0.0) {"); ctx->indent++; - output_line(ctx, "retval.z = pow(src.y, power);"); ctx->indent--; - output_line(ctx, "}"); ctx->indent--; - output_line(ctx, "}"); - output_line(ctx, "return retval;"); ctx->indent--; - output_line(ctx, "}"); - output_blank_line(ctx); - pop_output(ctx); -} // emit_METAL_LIT_helper - -static void emit_METAL_LIT(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - char code[128]; - emit_METAL_LIT_helper(ctx); - make_METAL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_LIT - -static void emit_METAL_DST(Context *ctx) -{ - // !!! FIXME: needs to take ctx->dst_arg.writemask into account. - char src0_y[64]; make_METAL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y)); - char src1_y[64]; make_METAL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y)); - char src0_z[64]; make_METAL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z)); - char src1_w[64]; make_METAL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w)); - - char code[128]; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float4(1.0, %s * %s, %s, %s)", - src0_y, src1_y, src0_z, src1_w); - output_line(ctx, "%s", code); -} // emit_METAL_DST - -static void emit_METAL_LRP(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_METAL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); - char code[128]; - ctx->metal_need_header_common = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)", - src2, src1, src0); - output_line(ctx, "%s", code); -} // emit_METAL_LRP - -static void emit_METAL_FRC(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_math = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_FRC - -static void emit_METAL_M4X4(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_METAL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_METAL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); - char row2[64]; make_METAL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); - char row3[64]; make_METAL_srcarg_string_full(ctx, 4, row3, sizeof (row3)); - char code[256]; - ctx->metal_need_header_geometric = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1, src0, row2, src0, row3); - output_line(ctx, "%s", code); -} // emit_METAL_M4X4 - -static void emit_METAL_M4X3(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_METAL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_METAL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); - char row2[64]; make_METAL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); - char code[256]; - ctx->metal_need_header_geometric = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1, src0, row2); - output_line(ctx, "%s", code); -} // emit_METAL_M4X3 - -static void emit_METAL_M3X4(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); - char row2[64]; make_METAL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); - char row3[64]; make_METAL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3)); - char code[256]; - ctx->metal_need_header_geometric = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float4(dot(%s, %s), dot(%s, %s), " - "dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1, - src0, row2, src0, row3); - output_line(ctx, "%s", code); -} // emit_METAL_M3X4 - -static void emit_METAL_M3X3(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); - char row2[64]; make_METAL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); - char code[256]; - ctx->metal_need_header_geometric = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1, src0, row2); - output_line(ctx, "%s", code); -} // emit_METAL_M3X3 - -static void emit_METAL_M3X2(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); - char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); - char code[256]; - ctx->metal_need_header_geometric = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float2(dot(%s, %s), dot(%s, %s))", - src0, row0, src0, row1); - output_line(ctx, "%s", code); -} // emit_METAL_M3X2 - -static void emit_METAL_CALL(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - if (ctx->loops > 0) - output_line(ctx, "%s(aL);", src0); - else - output_line(ctx, "%s();", src0); -} // emit_METAL_CALL - -static void emit_METAL_CALLNZ(Context *ctx) -{ - // !!! FIXME: if src1 is a constbool that's true, we can remove the - // !!! FIXME: if. If it's false, we can make this a no-op. - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - - if (ctx->loops > 0) - output_line(ctx, "if (%s) { %s(aL); }", src1, src0); - else - output_line(ctx, "if (%s) { %s(); }", src1, src0); -} // emit_METAL_CALLNZ - -static void emit_METAL_LOOP(Context *ctx) -{ - // !!! FIXME: swizzle? - char var[64]; get_METAL_srcarg_varname(ctx, 1, var, sizeof (var)); - assert(ctx->source_args[0].regnum == 0); // in case they add aL1 someday. - output_line(ctx, "{"); - ctx->indent++; - output_line(ctx, "const int aLend = %s.x + %s.y;", var, var); - output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var); - ctx->indent++; -} // emit_METAL_LOOP - -static void emit_METAL_RET(Context *ctx) -{ - // thankfully, the MSDN specs say a RET _has_ to end a function...no - // early returns. So if you hit one, you know you can safely close - // a high-level function. - push_output(ctx, &ctx->postflight); - output_line(ctx, "}"); - output_blank_line(ctx); - set_output(ctx, &ctx->subroutines); // !!! FIXME: is this for LABEL? Maybe set it there so we don't allocate unnecessarily. -} // emit_METAL_RET - -static void emit_METAL_ENDLOOP(Context *ctx) -{ - ctx->indent--; - output_line(ctx, "}"); - ctx->indent--; - output_line(ctx, "}"); -} // emit_METAL_ENDLOOP - -static void emit_METAL_LABEL(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - const int label = ctx->source_args[0].regnum; - RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label); - assert(ctx->output == ctx->subroutines); // not mainline, etc. - assert(ctx->indent == 0); // we shouldn't be in the middle of a function. - - // MSDN specs say CALL* has to come before the LABEL, so we know if we - // can ditch the entire function here as unused. - if (reg == NULL) - set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output. - - // !!! FIXME: it would be nice if we could determine if a function is - // !!! FIXME: only called once and, if so, forcibly inline it. - - // !!! FIXME: this worked in GLSL because all our state is global to the shader, - // !!! FIXME: but in metal we kept it local to the shader mainline. - // !!! FIXME: Can we do C++11 lambdas in Metal to have nested functions? :) - - const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : ""; - output_line(ctx, "static void %s(%s)", src0, uses_loopreg); - output_line(ctx, "{"); - ctx->indent++; -} // emit_METAL_LABEL - -static void emit_METAL_DCL(Context *ctx) -{ - // no-op. We do this in our emit_attribute() and emit_uniform(). -} // emit_METAL_DCL - -static void emit_METAL_POW(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - ctx->metal_need_header_math = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "pow(abs(%s), %s)", src0, src1); - output_line(ctx, "%s", code); -} // emit_METAL_POW - -static void emit_METAL_CRS(Context *ctx) -{ - // !!! FIXME: needs to take ctx->dst_arg.writemask into account. - char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); - char code[128]; - ctx->metal_need_header_geometric = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "cross(%s, %s)", src0, src1); - output_line(ctx, "%s", code); -} // emit_METAL_CRS - -static void emit_METAL_SGN(Context *ctx) -{ - // (we don't need the temporary registers specified for the D3D opcode.) - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_common = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_SGN - -static void emit_METAL_ABS(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_math = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_ABS - -static void emit_METAL_NRM(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_geometric = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_NRM - -static void emit_METAL_SINCOS(Context *ctx) -{ - // we don't care about the temp registers that <= sm2 demands; ignore them. - // sm2 also talks about what components are left untouched vs. undefined, - // but we just leave those all untouched with Metal write masks (which - // would fulfill the "undefined" requirement, too). - const int mask = ctx->dest_arg.writemask; - char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - char code[128] = { '\0' }; - - ctx->metal_need_header_math = 1; - if (writemask_x(mask)) - make_METAL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0); - else if (writemask_y(mask)) - make_METAL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0); - else if (writemask_xy(mask)) - { - // !!! FIXME: can use sincos(), but need to assign cos to a temp, since it needs a reference. - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float2(cos(%s), sin(%s))", src0, src0); - } // else if - - output_line(ctx, "%s", code); -} // emit_METAL_SINCOS - -static void emit_METAL_REP(Context *ctx) -{ - // !!! FIXME: - // msdn docs say legal loop values are 0 to 255. We can check DEFI values - // at parse time, but if they are pulling a value from a uniform, do - // we clamp here? - // !!! FIXME: swizzle is legal here, right? - char src0[64]; make_METAL_srcarg_string_x(ctx, 0, src0, sizeof (src0)); - const uint rep = (uint) ctx->reps; - output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {", - rep, rep, src0, rep); - ctx->indent++; -} // emit_METAL_REP - -static void emit_METAL_ENDREP(Context *ctx) -{ - ctx->indent--; - output_line(ctx, "}"); -} // emit_METAL_ENDREP - -static void emit_METAL_IF(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - output_line(ctx, "if (%s) {", src0); - ctx->indent++; -} // emit_METAL_IF - -static void emit_METAL_IFC(Context *ctx) -{ - const char *comp = get_METAL_comparison_string_scalar(ctx); - char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); - output_line(ctx, "if (%s %s %s) {", src0, comp, src1); - ctx->indent++; -} // emit_METAL_IFC - -static void emit_METAL_ELSE(Context *ctx) -{ - ctx->indent--; - output_line(ctx, "} else {"); - ctx->indent++; -} // emit_METAL_ELSE - -static void emit_METAL_ENDIF(Context *ctx) -{ - ctx->indent--; - output_line(ctx, "}"); -} // emit_METAL_ENDIF - -static void emit_METAL_BREAK(Context *ctx) -{ - output_line(ctx, "break;"); -} // emit_METAL_BREAK - -static void emit_METAL_BREAKC(Context *ctx) -{ - const char *comp = get_METAL_comparison_string_scalar(ctx); - char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); - output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1); -} // emit_METAL_BREAKC - -static void emit_METAL_MOVA(Context *ctx) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - - ctx->metal_need_header_math = 1; - ctx->metal_need_header_common = 1; - - if (vecsize == 1) - { - make_METAL_destarg_assign(ctx, code, sizeof (code), - "int(floor(abs(%s) + 0.5) * sign(%s))", - src0, src0); - } // if - - else - { - make_METAL_destarg_assign(ctx, code, sizeof (code), - "int%d(floor(abs(%s) + float%d(0.5)) * sign(%s))", - vecsize, src0, vecsize, src0); - } // else - - output_line(ctx, "%s", code); -} // emit_METAL_MOVA - -static void emit_METAL_DEFB(Context *ctx) -{ - char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname)); - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - output_line(ctx, "const bool %s = %s;", - varname, ctx->dwords[0] ? "true" : "false"); - pop_output(ctx); -} // emit_METAL_DEFB - -static void emit_METAL_DEFI(Context *ctx) -{ - char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname)); - const int32 *x = (const int32 *) ctx->dwords; - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - output_line(ctx, "const int4 %s = int4(%d, %d, %d, %d);", - varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]); - pop_output(ctx); -} // emit_METAL_DEFI - -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) - -static void emit_METAL_TEXKILL(Context *ctx) -{ - char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - ctx->metal_need_header_relational = 1; - ctx->metal_need_header_graphics = 1; - output_line(ctx, "if (any(%s.xyz < float3(0.0))) discard_fragment();", dst); -} // emit_METAL_TEXKILL - -static void metal_texld(Context *ctx, const int texldd) -{ - ctx->metal_need_header_texture = 1; - if (!shader_version_atleast(ctx, 1, 4)) - { - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char sampler[64]; - char code[128] = {0}; - - assert(!texldd); - - RegisterList *sreg; - sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - - char swizzle[4] = { 'x', 'y', 'z', '\0' }; - if (ttype == TEXTURE_TYPE_2D) - swizzle[2] = '\0'; // "xy" instead of "xyz". - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "%s_texture.sample(%s, %s.%s)", - sampler, sampler, dst, swizzle); - output_line(ctx, "%s", code); - } // if - - else if (!shader_version_atleast(ctx, 2, 0)) - { - // ps_1_4 is different, too! - fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME - return; - } // else if - - else - { - const SourceArgInfo *samp_arg = &ctx->source_args[1]; - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, - samp_arg->regnum); - const char *funcname = NULL; - char src0[64] = { '\0' }; - char src1[64]; get_METAL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? - char src2[64] = { '\0' }; - char src3[64] = { '\0' }; - - if (sreg == NULL) - { - fail(ctx, "TEXLD using undeclared sampler"); - return; - } // if - - const char *grad = ""; - if (texldd) - { - switch ((const TextureType) sreg->index) - { - case TEXTURE_TYPE_2D: - grad = "2d"; - make_METAL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2)); - make_METAL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3)); - break; - case TEXTURE_TYPE_VOLUME: - grad = "3d"; - make_METAL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2)); - make_METAL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3)); - break; - case TEXTURE_TYPE_CUBE: - grad = "cube"; - make_METAL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2)); - make_METAL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3)); - break; - } // switch - } // if - - // !!! FIXME: can TEXLDD set instruction_controls? - // !!! FIXME: does the d3d bias value map directly to Metal? - const char *biasleft = ""; - const char *biasright = ""; - char bias[64] = { '\0' }; - if (ctx->instruction_controls == CONTROL_TEXLDB) - { - biasleft = ", bias("; - make_METAL_srcarg_string_w(ctx, 0, bias, sizeof (bias)); - biasright = ")"; - } // if - - // Metal doesn't have a texture2DProj() function, but you just divide - // your texcoords by texcoords.w to achieve it anyhow, so DIY. - const char *projop = ""; - char proj[64] = { '\0' }; - if (ctx->instruction_controls == CONTROL_TEXLDP) - { - if (sreg->index == TEXTURE_TYPE_CUBE) - fail(ctx, "TEXLDP on a cubemap"); // !!! FIXME: is this legal? - projop = " / "; - make_METAL_srcarg_string_w(ctx, 0, proj, sizeof (proj)); - } // if - - switch ((const TextureType) sreg->index) - { - case TEXTURE_TYPE_2D: - make_METAL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); - break; - - case TEXTURE_TYPE_CUBE: - case TEXTURE_TYPE_VOLUME: - make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); - break; - - default: - fail(ctx, "unknown texture type"); - return; - } // switch - - assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); - char swiz_str[6] = { '\0' }; - make_METAL_swizzle_string(swiz_str, sizeof (swiz_str), - samp_arg->swizzle, ctx->dest_arg.writemask); - - char code[128]; - if (texldd) - { - make_METAL_destarg_assign(ctx, code, sizeof (code), - "%s_texture.sample(%s, %s, gradient%s(%s, %s))%s", - src1, src1, src0, grad, src2, src3, swiz_str); - } // if - else - { - make_METAL_destarg_assign(ctx, code, sizeof (code), - "%s_texture.sample(%s, %s%s%s%s%s%s)%s", - src1, src1, src0, projop, proj, - biasleft, bias, biasright, swiz_str); - } // else - - output_line(ctx, "%s", code); - } // else -} // metal_texld - -static void emit_METAL_TEXLD(Context *ctx) -{ - metal_texld(ctx, 0); -} // emit_METAL_TEXLD - - -static void emit_METAL_TEXBEM(Context *ctx) -{ - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - char src[64]; get_METAL_srcarg_varname(ctx, 0, src, sizeof (src)); - char sampler[64]; - char code[512]; - - ctx->metal_need_header_texture = 1; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "%s_texture.sample(%s, float2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," - " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))", - sampler, sampler, - dst, sampler, src, sampler, src, - dst, sampler, src, sampler, src); - - output_line(ctx, "%s", code); -} // emit_METAL_TEXBEM - - -static void emit_METAL_TEXBEML(Context *ctx) -{ - // !!! FIXME: this code counts on the register not having swizzles, etc. - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - char src[64]; get_METAL_srcarg_varname(ctx, 0, src, sizeof (src)); - char sampler[64]; - char code[512]; - - ctx->metal_need_header_texture = 1; - - get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "(%s_texture.sample(%s, float2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," - " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *" - " ((%s.z * %s_texbeml.x) + %s_texbem.y)", - sampler, sampler, - dst, sampler, src, sampler, src, - dst, sampler, src, sampler, src, - src, sampler, sampler); - - output_line(ctx, "%s", code); -} // emit_METAL_TEXBEML - -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME - - -static void emit_METAL_TEXM3X2PAD(Context *ctx) -{ - // no-op ... work happens in emit_METAL_TEXM3X2TEX(). -} // emit_METAL_TEXM3X2PAD - -static void emit_METAL_TEXM3X2TEX(Context *ctx) -{ - if (ctx->texm3x2pad_src0 == -1) - return; - - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char sampler[64]; - char code[512]; - - ctx->metal_need_header_texture = 1; - ctx->metal_need_header_geometric = 1; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0, - src0, sizeof (src0)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0, - src1, sizeof (src1)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src2, sizeof (src2)); - get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "%s_texture.sample(%s, float2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))", - sampler, sampler, src0, src1, src2, dst); - - output_line(ctx, "%s", code); -} // emit_METAL_TEXM3X2TEX - -static void emit_METAL_TEXM3X3PAD(Context *ctx) -{ - // no-op ... work happens in emit_METAL_TEXM3X3*(). -} // emit_METAL_TEXM3X3PAD - -static void emit_METAL_TEXM3X3TEX(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char sampler[64]; - char code[512]; - - ctx->metal_need_header_texture = 1; - ctx->metal_need_header_geometric = 1; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, - info->regnum); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "texture%s(%s," - " float3(dot(%s.xyz, %s.xyz)," - " dot(%s.xyz, %s.xyz)," - " dot(%s.xyz, %s.xyz)))", - ttypestr, sampler, src0, src1, src2, src3, dst, src4); - - output_line(ctx, "%s", code); -} // emit_METAL_TEXM3X3TEX - -static void emit_METAL_TEXM3X3SPEC_helper(Context *ctx) -{ - if (ctx->glsl_generated_texm3x3spec_helper) - return; - - ctx->glsl_generated_texm3x3spec_helper = 1; - - push_output(ctx, &ctx->helpers); - output_line(ctx, "float3 TEXM3X3SPEC_reflection(const float3 normal, const float3 eyeray)"); - output_line(ctx, "{"); ctx->indent++; - output_line(ctx, "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--; - output_line(ctx, "}"); - output_blank_line(ctx); - pop_output(ctx); -} // emit_METAL_TEXM3X3SPEC_helper - -static void emit_METAL_TEXM3X3SPEC(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char src5[64]; - char sampler[64]; - char code[512]; - - ctx->metal_need_header_texture = 1; - ctx->metal_need_header_geometric = 1; - - emit_METAL_TEXM3X3SPEC_helper(ctx); - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum, - src5, sizeof (src5)); - get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, - info->regnum); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "texture%s(%s, " - "TEXM3X3SPEC_reflection(" - "float3(" - "dot(%s.xyz, %s.xyz), " - "dot(%s.xyz, %s.xyz), " - "dot(%s.xyz, %s.xyz)" - ")," - "%s.xyz," - ")" - ")", - ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5); - - output_line(ctx, "%s", code); -} // emit_METAL_TEXM3X3SPEC - -static void emit_METAL_TEXM3X3VSPEC(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - DestArgInfo *info = &ctx->dest_arg; - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char sampler[64]; - char code[512]; - - ctx->metal_need_header_texture = 1; - ctx->metal_need_header_geometric = 1; - - emit_METAL_TEXM3X3SPEC_helper(ctx); - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, - sampler, sizeof (sampler)); - - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, - info->regnum); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "texture%s(%s, " - "TEXM3X3SPEC_reflection(" - "float3(" - "dot(%s.xyz, %s.xyz), " - "dot(%s.xyz, %s.xyz), " - "dot(%s.xyz, %s.xyz)" - "), " - "float3(%s.w, %s.w, %s.w)" - ")" - ")", - ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst); - - output_line(ctx, "%s", code); -} // emit_METAL_TEXM3X3VSPEC - -static void emit_METAL_EXPP(Context *ctx) -{ - // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation. - emit_METAL_EXP(ctx); // I guess this is just partial precision EXP? -} // emit_METAL_EXPP - -static void emit_METAL_LOGP(Context *ctx) -{ - // LOGP is just low-precision LOG, but we'll take the higher precision. - emit_METAL_LOG(ctx); -} // emit_METAL_LOGP - -// common code between CMP and CND. -static void emit_METAL_comparison_operations(Context *ctx, const char *cmp) -{ - int i, j; - DestArgInfo *dst = &ctx->dest_arg; - const SourceArgInfo *srcarg0 = &ctx->source_args[0]; - const int origmask = dst->writemask; - int used_swiz[4] = { 0, 0, 0, 0 }; - const int writemask[4] = { dst->writemask0, dst->writemask1, - dst->writemask2, dst->writemask3 }; - const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y, - srcarg0->swizzle_z, srcarg0->swizzle_w }; - - for (i = 0; i < 4; i++) - { - int mask = (1 << i); - - if (!writemask[i]) continue; - if (used_swiz[i]) continue; - - // This is a swizzle we haven't checked yet. - used_swiz[i] = 1; - - // see if there are any other elements swizzled to match (.yyyy) - for (j = i + 1; j < 4; j++) - { - if (!writemask[j]) continue; - if (src0swiz[i] != src0swiz[j]) continue; - mask |= (1 << j); - used_swiz[j] = 1; - } // for - - // okay, (mask) should be the writemask of swizzles we like. - - //return make_METAL_srcarg_string(ctx, idx, (1 << 0)); - - char src0[64]; - char src1[64]; - char src2[64]; - make_METAL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0)); - make_METAL_srcarg_string(ctx, 1, mask, src1, sizeof (src1)); - make_METAL_srcarg_string(ctx, 2, mask, src2, sizeof (src2)); - - set_dstarg_writemask(dst, mask); - - char code[128]; - make_METAL_destarg_assign(ctx, code, sizeof (code), - "((%s %s) ? %s : %s)", - src0, cmp, src1, src2); - output_line(ctx, "%s", code); - } // for - - set_dstarg_writemask(dst, origmask); -} // emit_METAL_comparison_operations - -static void emit_METAL_CND(Context *ctx) -{ - emit_METAL_comparison_operations(ctx, "> 0.5"); -} // emit_METAL_CND - -static void emit_METAL_DEF(Context *ctx) -{ - const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? - char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname)); - char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1); - char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1); - char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1); - char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1); - - push_output(ctx, &ctx->mainline_top); - ctx->indent++; - // The "(void) %s;" is to make the compiler not warn if this isn't used. - output_line(ctx, "const float4 %s = float4(%s, %s, %s, %s); (void) %s;", - varname, val0, val1, val2, val3, varname); - pop_output(ctx); -} // emit_METAL_DEF - -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME - -static void emit_METAL_TEXM3X3(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char code[512]; - - ctx->metal_need_header_geometric = 1; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_METAL_destarg_varname(ctx, dst, sizeof (dst)); - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "float4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)", - src0, src1, src2, src3, dst, src4); - - output_line(ctx, "%s", code); -} // emit_METAL_TEXM3X3 - -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME - -static void emit_METAL_CMP(Context *ctx) -{ - emit_METAL_comparison_operations(ctx, ">= 0.0"); -} // emit_METAL_CMP - -EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME - -static void emit_METAL_DP2ADD(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_METAL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2)); - char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2); - emit_METAL_dotprod(ctx, src0, src1, extra); -} // emit_METAL_DP2ADD - -static void emit_METAL_DSX(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_graphics = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "dfdx(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_DSX - -static void emit_METAL_DSY(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char code[128]; - ctx->metal_need_header_graphics = 1; - make_METAL_destarg_assign(ctx, code, sizeof (code), "dfdy(%s)", src0); - output_line(ctx, "%s", code); -} // emit_METAL_DSY - -static void emit_METAL_TEXLDD(Context *ctx) -{ - metal_texld(ctx, 1); -} // emit_METAL_TEXLDD - -static void emit_METAL_SETP(Context *ctx) -{ - const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); - char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); - char code[128]; - - // destination is always predicate register (which is type bvec4). - const char *comp = (vecsize == 1) ? - get_METAL_comparison_string_scalar(ctx) : - get_METAL_comparison_string_vector(ctx); - - make_METAL_destarg_assign(ctx, code, sizeof (code), - "(%s %s %s)", src0, comp, src1); - output_line(ctx, "%s", code); -} // emit_METAL_SETP - -static void emit_METAL_TEXLDL(Context *ctx) -{ - // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins - // !!! FIXME: from fragment shaders for some inexplicable reason. - // !!! FIXME: Maybe Metal can do it, but I haven't looked into it yet. - emit_METAL_TEXLD(ctx); -} // emit_METAL_TEXLDL - -static void emit_METAL_BREAKP(Context *ctx) -{ - char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); - output_line(ctx, "if (%s) { break; }", src0); -} // emit_METAL_BREAKP - -static void emit_METAL_RESERVED(Context *ctx) -{ - // do nothing; fails in the state machine. -} // emit_METAL_RESERVED - -#endif // SUPPORT_PROFILE_METAL - - -#if !SUPPORT_PROFILE_ARB1 -#define PROFILE_EMITTER_ARB1(op) -#else -#undef AT_LEAST_ONE_PROFILE -#define AT_LEAST_ONE_PROFILE 1 -#define PROFILE_EMITTER_ARB1(op) emit_ARB1_##op, - -static inline const char *get_ARB1_register_string(Context *ctx, - const RegisterType regtype, const int regnum, - char *regnum_str, const size_t regnum_size) -{ - // turns out these are identical at the moment. - return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size); -} // get_ARB1_register_string - -static const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf, - const size_t buflen) -{ - const int scratch = allocate_scratch_register(ctx); - snprintf(buf, buflen, "scratch%d", scratch); - return buf; -} // allocate_ARB1_scratch_reg_name - -static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id, - char *buf, const size_t buflen) -{ - snprintf(buf, buflen, "branch_label%d", id); - return buf; -} // get_ARB1_branch_label_name - -static const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt, - const int regnum, char *buf, - const size_t buflen) -{ - // turns out these are identical at the moment. - return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen); -} // get_ARB1_varname_in_buf - -static const char *get_ARB1_varname(Context *ctx, const RegisterType rt, - const int regnum) -{ - // turns out these are identical at the moment. - return get_D3D_varname(ctx, rt, regnum); -} // get_ARB1_varname - - -static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx, - const int base, const int size, - char *buf, const size_t buflen) -{ - snprintf(buf, buflen, "c_array_%d_%d", base, size); - return buf; -} // get_ARB1_const_array_varname_in_buf - - -static const char *get_ARB1_const_array_varname(Context *ctx, int base, int size) -{ - char buf[64]; - get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf)); - return StrDup(ctx, buf); -} // get_ARB1_const_array_varname - - -static const char *make_ARB1_srcarg_string_in_buf(Context *ctx, - const SourceArgInfo *arg, - char *buf, size_t buflen) -{ - // !!! FIXME: this can hit pathological cases where we look like this... - // - // dp3 r1.xyz, t0_bx2, t0_bx2 - // mad r1.xyz, t0_bias, 1-r1, t0_bx2 - // - // ...which do a lot of duplicate work in arb1... - // - // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 }; - // MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 }; - // SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 }; - // MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 }; - // DP3 r1.xyz, scratch0, scratch1; - // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 }; - // SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1; - // SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 }; - // MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 }; - // MAD r1.xyz, scratch0, scratch1, scratch2; - // - // ...notice that the dp3 calculates the same value into two scratch - // registers. This case is easier to handle; just see if multiple - // source args are identical, build it up once, and use the same - // scratch register for multiple arguments in that opcode. - // Even better still, only calculate things once across instructions, - // and be smart about letting it linger in a scratch register until we - // definitely don't need the calculation anymore. That's harder to - // write, though. - - char regnum_str[16] = { '\0' }; - - // !!! FIXME: use get_ARB1_varname_in_buf() instead? - const char *regtype_str = NULL; - if (!arg->relative) - { - regtype_str = get_ARB1_register_string(ctx, arg->regtype, - arg->regnum, regnum_str, - sizeof (regnum_str)); - } // if - - const char *rel_lbracket = ""; - char rel_offset[32] = { '\0' }; - const char *rel_rbracket = ""; - char rel_swizzle[4] = { '\0' }; - const char *rel_regtype_str = ""; - if (arg->relative) - { - rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype, - arg->relative_regnum, - (char *) alloca(64), 64); - - rel_swizzle[0] = '.'; - rel_swizzle[1] = swizzle_channels[arg->relative_component]; - rel_swizzle[2] = '\0'; - - if (!support_nv2(ctx)) - { - // The address register in ARB1 only allows the '.x' component, so - // we need to load the component we need from a temp vector - // register into .x as needed. - assert(arg->relative_regtype == REG_TYPE_ADDRESS); - assert(arg->relative_regnum == 0); - if (ctx->last_address_reg_component != arg->relative_component) - { - output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str, - arg->relative_regnum, - swizzle_channels[arg->relative_component]); - ctx->last_address_reg_component = arg->relative_component; - } // if - - rel_swizzle[1] = 'x'; - } // if - - if (arg->regtype == REG_TYPE_INPUT) - regtype_str = "vertex.attrib"; - else - { - assert(arg->regtype == REG_TYPE_CONST); - const int arrayidx = arg->relative_array->index; - const int arraysize = arg->relative_array->count; - const int offset = arg->regnum - arrayidx; - assert(offset >= 0); - regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx, - arraysize, (char *) alloca(64), 64); - if (offset != 0) - snprintf(rel_offset, sizeof (rel_offset), " + %d", offset); - } // else - - rel_lbracket = "["; - rel_rbracket = "]"; - } // if - - // This is the source register with everything but swizzle and source mods. - snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str, - rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset, - rel_rbracket); - - // Some of the source mods need to generate instructions to a temp - // register, in which case we'll replace the register name. - const SourceMod mod = arg->src_mod; - const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) || - ((mod == SRCMOD_ABS) && support_nv2(ctx)) ); - - if (!inplace) - { - const size_t len = 64; - char *stackbuf = (char *) alloca(len); - regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len); - regnum_str[0] = '\0'; // move value to scratch register. - rel_lbracket = ""; // scratch register won't use array. - rel_rbracket = ""; - rel_offset[0] = '\0'; - rel_swizzle[0] = '\0'; - rel_regtype_str = ""; - } // if - - const char *premod_str = ""; - const char *postmod_str = ""; - switch (mod) - { - case SRCMOD_NEGATE: - premod_str = "-"; - break; - - case SRCMOD_BIASNEGATE: - premod_str = "-"; - // fall through. - case SRCMOD_BIAS: - output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", - regtype_str, buf); - break; - - case SRCMOD_SIGNNEGATE: - premod_str = "-"; - // fall through. - case SRCMOD_SIGN: - output_line(ctx, - "MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };", - regtype_str, buf); - break; - - case SRCMOD_COMPLEMENT: - output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;", - regtype_str, buf); - break; - - case SRCMOD_X2NEGATE: - premod_str = "-"; - // fall through. - case SRCMOD_X2: - output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", - regtype_str, buf); - break; - - case SRCMOD_DZ: - fail(ctx, "SRCMOD_DZ currently unsupported in arb1"); - postmod_str = "_dz"; - break; - - case SRCMOD_DW: - fail(ctx, "SRCMOD_DW currently unsupported in arb1"); - postmod_str = "_dw"; - break; - - case SRCMOD_ABSNEGATE: - premod_str = "-"; - // fall through. - case SRCMOD_ABS: - if (!support_nv2(ctx)) // GL_NV_vertex_program2_option adds this. - output_line(ctx, "ABS %s, %s;", regtype_str, buf); - else - { - premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|"; - postmod_str = "|"; - } // else - break; - - case SRCMOD_NOT: - fail(ctx, "SRCMOD_NOT currently unsupported in arb1"); - premod_str = "!"; - break; - - case SRCMOD_NONE: - case SRCMOD_TOTAL: - break; // stop compiler whining. - } // switch - - char swizzle_str[6]; - size_t i = 0; - - if (support_nv4(ctx)) // vFace must be output as "vFace.x" in nv4. - { - if (arg->regtype == REG_TYPE_MISCTYPE) - { - if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE ) - { - swizzle_str[i++] = '.'; - swizzle_str[i++] = 'x'; - } // if - } // if - } // if - - const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); - if (!scalar && !no_swizzle(arg->swizzle)) - { - swizzle_str[i++] = '.'; - - // .xxxx is the same as .x, but .xx is illegal...scalar or full! - if (replicate_swizzle(arg->swizzle)) - swizzle_str[i++] = swizzle_channels[arg->swizzle_x]; - else - { - swizzle_str[i++] = swizzle_channels[arg->swizzle_x]; - swizzle_str[i++] = swizzle_channels[arg->swizzle_y]; - swizzle_str[i++] = swizzle_channels[arg->swizzle_z]; - swizzle_str[i++] = swizzle_channels[arg->swizzle_w]; - } // else - } // if - swizzle_str[i] = '\0'; - assert(i < sizeof (swizzle_str)); - - snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str, - regtype_str, regnum_str, rel_lbracket, - rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket, - swizzle_str, postmod_str); - // !!! FIXME: make sure the scratch buffer was large enough. - return buf; -} // make_ARB1_srcarg_string_in_buf - -static const char *get_ARB1_destarg_varname(Context *ctx, char *buf, - const size_t buflen) -{ - const DestArgInfo *arg = &ctx->dest_arg; - return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen); -} // get_ARB1_destarg_varname - -static const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx, - char *buf, const size_t buflen) -{ - if (idx >= STATICARRAYLEN(ctx->source_args)) - { - fail(ctx, "Too many source args"); - *buf = '\0'; - return buf; - } // if - - const SourceArgInfo *arg = &ctx->source_args[idx]; - return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen); -} // get_ARB1_srcarg_varname - - -static const char *make_ARB1_destarg_string(Context *ctx, char *buf, - const size_t buflen) -{ - const DestArgInfo *arg = &ctx->dest_arg; - - *buf = '\0'; - - const char *sat_str = ""; - if (arg->result_mod & MOD_SATURATE) - { - // nv4 can use ".SAT" in all program types. - // For less than nv4, the "_SAT" modifier is only available in - // fragment shaders. Every thing else will fake it later in - // emit_ARB1_dest_modifiers() ... - if (support_nv4(ctx)) - sat_str = ".SAT"; - else if (shader_is_pixel(ctx)) - sat_str = "_SAT"; - } // if - - const char *pp_str = ""; - if (arg->result_mod & MOD_PP) - { - // Most ARB1 profiles can't do partial precision (MOD_PP), but that's - // okay. The spec says lots of Direct3D implementations ignore the - // flag anyhow. - if (support_nv4(ctx)) - pp_str = "H"; - } // if - - // CENTROID only allowed in DCL opcodes, which shouldn't come through here. - assert((arg->result_mod & MOD_CENTROID) == 0); - - char regnum_str[16]; - const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype, - arg->regnum, regnum_str, - sizeof (regnum_str)); - if (regtype_str == NULL) - { - fail(ctx, "Unknown destination register type."); - return buf; - } // if - - char writemask_str[6]; - size_t i = 0; - const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); - if (!scalar && !writemask_xyzw(arg->writemask)) - { - writemask_str[i++] = '.'; - if (arg->writemask0) writemask_str[i++] = 'x'; - if (arg->writemask1) writemask_str[i++] = 'y'; - if (arg->writemask2) writemask_str[i++] = 'z'; - if (arg->writemask3) writemask_str[i++] = 'w'; - } // if - writemask_str[i] = '\0'; - assert(i < sizeof (writemask_str)); - - //const char *pred_left = ""; - //const char *pred_right = ""; - char pred[32] = { '\0' }; - if (ctx->predicated) - { - fail(ctx, "dest register predication currently unsupported in arb1"); - return buf; - //pred_left = "("; - //pred_right = ") "; - make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg, - pred, sizeof (pred)); - } // if - - snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str, - regtype_str, regnum_str, writemask_str); - // !!! FIXME: make sure the scratch buffer was large enough. - return buf; -} // make_ARB1_destarg_string - - -static void emit_ARB1_dest_modifiers(Context *ctx) -{ - const DestArgInfo *arg = &ctx->dest_arg; - - if (arg->result_shift != 0x0) - { - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - const char *multiplier = NULL; - - switch (arg->result_shift) - { - case 0x1: multiplier = "2.0"; break; - case 0x2: multiplier = "4.0"; break; - case 0x3: multiplier = "8.0"; break; - case 0xD: multiplier = "0.125"; break; - case 0xE: multiplier = "0.25"; break; - case 0xF: multiplier = "0.5"; break; - } // switch - - if (multiplier != NULL) - { - char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var)); - output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier); - } // if - } // if - - if (arg->result_mod & MOD_SATURATE) - { - // nv4 and/or pixel shaders just used the "SAT" modifier, instead. - if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) ) - { - char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var)); - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - output_line(ctx, "MIN%s, %s, 1.0;", dst, var); - output_line(ctx, "MAX%s, %s, 0.0;", dst, var); - } // if - } // if -} // emit_ARB1_dest_modifiers - - -static const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx, - char *buf, const size_t buflen) -{ - if (idx >= STATICARRAYLEN(ctx->source_args)) - { - fail(ctx, "Too many source args"); - *buf = '\0'; - return buf; - } // if - - const SourceArgInfo *arg = &ctx->source_args[idx]; - return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen); -} // make_ARB1_srcarg_string - -static void emit_ARB1_opcode_ds(Context *ctx, const char *opcode) -{ - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - output_line(ctx, "%s%s, %s;", opcode, dst, src0); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_opcode_ds - -static void emit_ARB1_opcode_dss(Context *ctx, const char *opcode) -{ - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); - output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_opcode_dss - -static void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode) -{ - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); - output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_opcode_dsss - - -#define EMIT_ARB1_OPCODE_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - emit_ARB1_opcode(ctx, #op); \ - } -#define EMIT_ARB1_OPCODE_D_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - emit_ARB1_opcode_d(ctx, #op); \ - } -#define EMIT_ARB1_OPCODE_S_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - emit_ARB1_opcode_s(ctx, #op); \ - } -#define EMIT_ARB1_OPCODE_SS_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - emit_ARB1_opcode_ss(ctx, #op); \ - } -#define EMIT_ARB1_OPCODE_DS_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - emit_ARB1_opcode_ds(ctx, #op); \ - } -#define EMIT_ARB1_OPCODE_DSS_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - emit_ARB1_opcode_dss(ctx, #op); \ - } -#define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - emit_ARB1_opcode_dsss(ctx, #op); \ - } -#define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - emit_ARB1_opcode_dssss(ctx, #op); \ - } -#define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \ - static void emit_ARB1_##op(Context *ctx) { \ - failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \ - } - - -static void emit_ARB1_start(Context *ctx, const char *profilestr) -{ - const char *shader_str = NULL; - const char *shader_full_str = NULL; - if (shader_is_vertex(ctx)) - { - shader_str = "vp"; - shader_full_str = "vertex"; - } // if - else if (shader_is_pixel(ctx)) - { - shader_str = "fp"; - shader_full_str = "fragment"; - } // else if - else - { - failf(ctx, "Shader type %u unsupported in this profile.", - (uint) ctx->shader_type); - return; - } // if - - set_output(ctx, &ctx->preflight); - - if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0) - output_line(ctx, "!!ARB%s1.0", shader_str); - - #if SUPPORT_PROFILE_ARB1_NV - else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0) - { - ctx->profile_supports_nv2 = 1; - output_line(ctx, "!!ARB%s1.0", shader_str); - output_line(ctx, "OPTION NV_%s_program2;", shader_full_str); - } // else if - - else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0) - { - // there's no NV_fragment_program3, so just use 2. - const int ver = shader_is_pixel(ctx) ? 2 : 3; - ctx->profile_supports_nv2 = 1; - ctx->profile_supports_nv3 = 1; - output_line(ctx, "!!ARB%s1.0", shader_str); - output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver); - } // else if - - else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0) - { - ctx->profile_supports_nv2 = 1; - ctx->profile_supports_nv3 = 1; - ctx->profile_supports_nv4 = 1; - output_line(ctx, "!!NV%s4.0", shader_str); - } // else if - #endif - - else - { - failf(ctx, "Profile '%s' unsupported or unknown.", profilestr); - } // else - - set_output(ctx, &ctx->mainline); -} // emit_ARB1_start - -static void emit_ARB1_end(Context *ctx) -{ - // ps_1_* writes color to r0 instead oC0. We move it to the right place. - // We don't have to worry about a RET opcode messing this up, since - // RET isn't available before ps_2_0. - if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) - { - set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1); - output_line(ctx, "MOV oC0, r0;"); - } // if - - output_line(ctx, "END"); -} // emit_ARB1_end - -static void emit_ARB1_phase(Context *ctx) -{ - // no-op in arb1. -} // emit_ARB1_phase - -static inline const char *arb1_float_temp(const Context *ctx) -{ - // nv4 lets you specify data type. - return (support_nv4(ctx)) ? "FLOAT TEMP" : "TEMP"; -} // arb1_float_temp - -static void emit_ARB1_finalize(Context *ctx) -{ - push_output(ctx, &ctx->preflight); - - if (shader_is_vertex(ctx) && !ctx->arb1_wrote_position) - output_line(ctx, "OPTION ARB_position_invariant;"); - - if (shader_is_pixel(ctx) && ctx->have_multi_color_outputs) - output_line(ctx, "OPTION ARB_draw_buffers;"); - - pop_output(ctx); - - const char *tmpstr = arb1_float_temp(ctx); - int i; - push_output(ctx, &ctx->globals); - for (i = 0; i < ctx->max_scratch_registers; i++) - { - char buf[64]; - allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); - output_line(ctx, "%s %s;", tmpstr, buf); - } // for - - // nv2 fragment programs (and anything nv4) have a real REP/ENDREP. - if ( (support_nv2(ctx)) && (!shader_is_pixel(ctx)) && (!support_nv4(ctx)) ) - { - // set up temps for nv2 REP/ENDREP emulation through branching. - for (i = 0; i < ctx->max_reps; i++) - output_line(ctx, "TEMP rep%d;", i); - } // if - - pop_output(ctx); - assert(ctx->scratch_registers == ctx->max_scratch_registers); -} // emit_ARB1_finalize - -static void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum) -{ - // !!! FIXME: dependency on ARB1 profile. // !!! FIXME about FIXME: huh? - char varname[64]; - get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); - - push_output(ctx, &ctx->globals); - switch (regtype) - { - case REG_TYPE_ADDRESS: - if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE. - { - // We have to map texture registers to temps for ps_1_1, since - // they work like temps, initialize with tex coords, and the - // ps_1_1 TEX opcode expects to overwrite it. - if (!shader_version_atleast(ctx, 1, 4)) - { - output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); - push_output(ctx, &ctx->mainline_top); - output_line(ctx, "MOV %s, fragment.texcoord[%d];", - varname, regnum); - pop_output(ctx); - } // if - break; - } // if - - // nv4 replaced address registers with generic int registers. - if (support_nv4(ctx)) - output_line(ctx, "INT TEMP %s;", varname); - else - { - // nv2 has four-component address already, but stock arb1 has - // to emulate it in a temporary, and move components to the - // scalar ADDRESS register on demand. - output_line(ctx, "ADDRESS %s;", varname); - if (!support_nv2(ctx)) - output_line(ctx, "TEMP addr%d;", regnum); - } // else - break; - - //case REG_TYPE_PREDICATE: - // output_line(ctx, "bvec4 %s;", varname); - // break; - case REG_TYPE_TEMP: - output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); - break; - //case REG_TYPE_LOOP: - // break; // no-op. We declare these in for loops at the moment. - //case REG_TYPE_LABEL: - // break; // no-op. If we see it here, it means we optimized it out. - default: - fail(ctx, "BUG: we used a register we don't know how to define."); - break; - } // switch - pop_output(ctx); -} // emit_ARB1_global - -static void emit_ARB1_array(Context *ctx, VariableList *var) -{ - // All uniforms are now packed tightly into the program.local array, - // instead of trying to map them to the d3d registers. So this needs to - // map to the next piece of the array we haven't used yet. Thankfully, - // arb1 lets you make a PARAM array that maps to a subset of another - // array; we don't need to do offsets, since myarray[0] can map to - // program.local[5] without any extra math from us. - const int base = var->index; - const int size = var->count; - const int arb1base = ctx->uniform_float4_count + - ctx->uniform_int4_count + - ctx->uniform_bool_count; - char varname[64]; - get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname)); - push_output(ctx, &ctx->globals); - output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname, - size, arb1base, (arb1base + size) - 1); - pop_output(ctx); - var->emit_position = arb1base; -} // emit_ARB1_array - -static void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist, - int base, int size) -{ - char varname[64]; - get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname)); - int i; - - push_output(ctx, &ctx->globals); - output_line(ctx, "PARAM %s[%d] = {", varname, size); - ctx->indent++; - - for (i = 0; i < size; i++) - { - while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) - clist = clist->next; - assert(clist->constant.index == (base + i)); - - char val0[32]; - char val1[32]; - char val2[32]; - char val3[32]; - floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1); - floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1); - floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1); - floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1); - - output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3, - (i < (size-1)) ? "," : ""); - - clist = clist->next; - } // for - - ctx->indent--; - output_line(ctx, "};"); - pop_output(ctx); -} // emit_ARB1_const_array - -static void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum, - const VariableList *var) -{ - // We pack these down into the program.local array, so if we only use - // register c439, it'll actually map to program.local[0]. This will - // prevent overflows when we actually have enough resources to run. - - const char *arrayname = "program.local"; - int index = 0; - - char varname[64]; - get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); - - push_output(ctx, &ctx->globals); - - if (var == NULL) - { - // all types share one array (rather, all types convert to float4). - index = ctx->uniform_float4_count + ctx->uniform_int4_count + - ctx->uniform_bool_count; - } // if - - else - { - const int arraybase = var->index; - if (var->constant) - { - const int arraysize = var->count; - arrayname = get_ARB1_const_array_varname_in_buf(ctx, arraybase, - arraysize, (char *) alloca(64), 64); - index = (regnum - arraybase); - } // if - else - { - assert(var->emit_position != -1); - index = (regnum - arraybase) + var->emit_position; - } // else - } // else - - output_line(ctx, "PARAM %s = %s[%d];", varname, arrayname, index); - pop_output(ctx); -} // emit_ARB1_uniform - -static void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb) -{ - // this is mostly a no-op...you don't predeclare samplers in arb1. - - if (tb) // This sampler used a ps_1_1 TEXBEM opcode? - { - const int index = ctx->uniform_float4_count + ctx->uniform_int4_count + - ctx->uniform_bool_count; - char var[64]; - get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var)); - push_output(ctx, &ctx->globals); - output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index); - output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1); - pop_output(ctx); - ctx->uniform_float4_count += 2; - } // if -} // emit_ARB1_sampler - -// !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute(). -static void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum, - MOJOSHADER_usage usage, int index, int wmask, - int flags) -{ - // !!! FIXME: this function doesn't deal with write masks at all yet! - const char *usage_str = NULL; - const char *arrayleft = ""; - const char *arrayright = ""; - char index_str[16] = { '\0' }; - - char varname[64]; - get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); - - //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed? - - if (index != 0) // !!! FIXME: a lot of these MUST be zero. - snprintf(index_str, sizeof (index_str), "%u", (uint) index); - - if (shader_is_vertex(ctx)) - { - // pre-vs3 output registers. - // these don't ever happen in DCL opcodes, I think. Map to vs_3_* - // output registers. - if (!shader_version_atleast(ctx, 3, 0)) - { - if (regtype == REG_TYPE_RASTOUT) - { - regtype = REG_TYPE_OUTPUT; - index = regnum; - switch ((const RastOutType) regnum) - { - case RASTOUT_TYPE_POSITION: - usage = MOJOSHADER_USAGE_POSITION; - break; - case RASTOUT_TYPE_FOG: - usage = MOJOSHADER_USAGE_FOG; - break; - case RASTOUT_TYPE_POINT_SIZE: - usage = MOJOSHADER_USAGE_POINTSIZE; - break; - } // switch - } // if - - else if (regtype == REG_TYPE_ATTROUT) - { - regtype = REG_TYPE_OUTPUT; - usage = MOJOSHADER_USAGE_COLOR; - index = regnum; - } // else if - - else if (regtype == REG_TYPE_TEXCRDOUT) - { - regtype = REG_TYPE_OUTPUT; - usage = MOJOSHADER_USAGE_TEXCOORD; - index = regnum; - } // else if - } // if - - // to avoid limitations of various GL entry points for input - // attributes (glSecondaryColorPointer() can only take 3 component - // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other - // issues), we set up all inputs as generic vertex attributes, so we - // can pass data in just about any form, and ignore the built-in GLSL - // attributes like gl_SecondaryColor. Output needs to use the the - // built-ins, though, but we don't have to worry about the GL entry - // point limitations there. - - if (regtype == REG_TYPE_INPUT) - { - const int attr = ctx->assigned_vertex_attributes++; - push_output(ctx, &ctx->globals); - output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr); - pop_output(ctx); - } // if - - else if (regtype == REG_TYPE_OUTPUT) - { - switch (usage) - { - case MOJOSHADER_USAGE_POSITION: - ctx->arb1_wrote_position = 1; - usage_str = "result.position"; - break; - case MOJOSHADER_USAGE_POINTSIZE: - usage_str = "result.pointsize"; - break; - case MOJOSHADER_USAGE_COLOR: - index_str[0] = '\0'; // no explicit number. - if (index == 0) - usage_str = "result.color.primary"; - else if (index == 1) - usage_str = "result.color.secondary"; - break; - case MOJOSHADER_USAGE_FOG: - usage_str = "result.fogcoord"; - break; - case MOJOSHADER_USAGE_TEXCOORD: - snprintf(index_str, sizeof (index_str), "%u", (uint) index); - usage_str = "result.texcoord"; - arrayleft = "["; - arrayright = "]"; - break; - default: - // !!! FIXME: we need to deal with some more built-in varyings here. - break; - } // switch - - // !!! FIXME: the #define is a little hacky, but it means we don't - // !!! FIXME: have to track these separately if this works. - push_output(ctx, &ctx->globals); - // no mapping to built-in var? Just make it a regular global, pray. - if (usage_str == NULL) - output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); - else - { - output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str, - arrayleft, index_str, arrayright); - } // else - pop_output(ctx); - } // else if - - else - { - fail(ctx, "unknown vertex shader attribute register"); - } // else - } // if - - else if (shader_is_pixel(ctx)) - { - const char *paramtype_str = "ATTRIB"; - - // samplers DCLs get handled in emit_ARB1_sampler(). - - if (flags & MOD_CENTROID) - { - if (!support_nv4(ctx)) // GL_NV_fragment_program4 adds centroid. - { - // !!! FIXME: should we just wing it without centroid here? - failf(ctx, "centroid unsupported in %s profile", - ctx->profile->name); - return; - } // if - - paramtype_str = "CENTROID ATTRIB"; - } // if - - if (regtype == REG_TYPE_COLOROUT) - { - paramtype_str = "OUTPUT"; - usage_str = "result.color"; - if (ctx->have_multi_color_outputs) - { - // We have to gamble that you have GL_ARB_draw_buffers. - // You probably do at this point if you have a sane setup. - snprintf(index_str, sizeof (index_str), "%u", (uint) regnum); - arrayleft = "["; - arrayright = "]"; - } // if - } // if - - else if (regtype == REG_TYPE_DEPTHOUT) - { - paramtype_str = "OUTPUT"; - usage_str = "result.depth"; - } // else if - - // !!! FIXME: can you actualy have a texture register with COLOR usage? - else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT)) - { - if (usage == MOJOSHADER_USAGE_TEXCOORD) - { - // ps_1_1 does a different hack for this attribute. - // Refer to emit_ARB1_global()'s REG_TYPE_TEXTURE code. - if (shader_version_atleast(ctx, 1, 4)) - { - snprintf(index_str, sizeof (index_str), "%u", (uint) index); - usage_str = "fragment.texcoord"; - arrayleft = "["; - arrayright = "]"; - } // if - } // if - - else if (usage == MOJOSHADER_USAGE_COLOR) - { - index_str[0] = '\0'; // no explicit number. - if (index == 0) - usage_str = "fragment.color.primary"; - else if (index == 1) - usage_str = "fragment.color.secondary"; - else - fail(ctx, "unsupported color index"); - } // else if - } // else if - - else if (regtype == REG_TYPE_MISCTYPE) - { - const MiscTypeType mt = (MiscTypeType) regnum; - if (mt == MISCTYPE_TYPE_FACE) - { - if (support_nv4(ctx)) // FINALLY, a vFace equivalent in nv4! - { - index_str[0] = '\0'; // no explicit number. - usage_str = "fragment.facing"; - } // if - else - { - failf(ctx, "vFace unsupported in %s profile", - ctx->profile->name); - } // else - } // if - else if (mt == MISCTYPE_TYPE_POSITION) - { - index_str[0] = '\0'; // no explicit number. - usage_str = "fragment.position"; // !!! FIXME: is this the same coord space as D3D? - } // else if - else - { - fail(ctx, "BUG: unhandled misc register"); - } // else - } // else if - - else - { - fail(ctx, "unknown pixel shader attribute register"); - } // else - - if (usage_str != NULL) - { - push_output(ctx, &ctx->globals); - output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname, - usage_str, arrayleft, index_str, arrayright); - pop_output(ctx); - } // if - } // else if - - else - { - fail(ctx, "Unknown shader type"); // state machine should catch this. - } // else -} // emit_ARB1_attribute - -static void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ } - -static void emit_ARB1_NOP(Context *ctx) -{ - // There is no NOP in arb1. Just don't output anything here. -} // emit_ARB1_NOP - -EMIT_ARB1_OPCODE_DS_FUNC(MOV) -EMIT_ARB1_OPCODE_DSS_FUNC(ADD) -EMIT_ARB1_OPCODE_DSS_FUNC(SUB) -EMIT_ARB1_OPCODE_DSSS_FUNC(MAD) -EMIT_ARB1_OPCODE_DSS_FUNC(MUL) -EMIT_ARB1_OPCODE_DS_FUNC(RCP) - -static void emit_ARB1_RSQ(Context *ctx) -{ - // nv4 doesn't force abs() on this, so negative values will generate NaN. - // The spec says you should force the abs() yourself. - if (!support_nv4(ctx)) - { - emit_ARB1_opcode_ds(ctx, "RSQ"); // pre-nv4 implies ABS. - return; - } // if - - // we can optimize this to use nv2's |abs| construct in some cases. - if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || - (ctx->source_args[0].src_mod == SRCMOD_NEGATE) || - (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) - ctx->source_args[0].src_mod = SRCMOD_ABS; - - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - - if (ctx->source_args[0].src_mod == SRCMOD_ABS) - output_line(ctx, "RSQ%s, %s;", dst, src0); - else - { - char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); - output_line(ctx, "ABS %s, %s;", buf, src0); - output_line(ctx, "RSQ%s, %s.x;", dst, buf); - } // else - - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_RSQ - -EMIT_ARB1_OPCODE_DSS_FUNC(DP3) -EMIT_ARB1_OPCODE_DSS_FUNC(DP4) -EMIT_ARB1_OPCODE_DSS_FUNC(MIN) -EMIT_ARB1_OPCODE_DSS_FUNC(MAX) -EMIT_ARB1_OPCODE_DSS_FUNC(SLT) -EMIT_ARB1_OPCODE_DSS_FUNC(SGE) - -static void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); } - -static void arb1_log(Context *ctx, const char *opcode) -{ - // !!! FIXME: SRCMOD_NEGATE can be made into SRCMOD_ABS here, too - // we can optimize this to use nv2's |abs| construct in some cases. - if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || - (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) - ctx->source_args[0].src_mod = SRCMOD_ABS; - - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - - if (ctx->source_args[0].src_mod == SRCMOD_ABS) - output_line(ctx, "%s%s, %s;", opcode, dst, src0); - else - { - char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); - output_line(ctx, "ABS %s, %s;", buf, src0); - output_line(ctx, "%s%s, %s.x;", opcode, dst, buf); - } // else - - emit_ARB1_dest_modifiers(ctx); -} // arb1_log - - -static void emit_ARB1_LOG(Context *ctx) -{ - arb1_log(ctx, "LG2"); -} // emit_ARB1_LOG - - -EMIT_ARB1_OPCODE_DS_FUNC(LIT) -EMIT_ARB1_OPCODE_DSS_FUNC(DST) - -static void emit_ARB1_LRP(Context *ctx) -{ - if (shader_is_pixel(ctx)) // fragment shaders have a matching LRP opcode. - emit_ARB1_opcode_dsss(ctx, "LRP"); - else - { - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); - char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); - - // LRP is: dest = src2 + src0 * (src1 - src2) - output_line(ctx, "SUB %s, %s, %s;", buf, src1, src2); - output_line(ctx, "MAD%s, %s, %s, %s;", dst, buf, src0, src2); - emit_ARB1_dest_modifiers(ctx); - } // else -} // emit_ARB1_LRP - -EMIT_ARB1_OPCODE_DS_FUNC(FRC) - -static void arb1_MxXy(Context *ctx, const int x, const int y) -{ - DestArgInfo *dstarg = &ctx->dest_arg; - const int origmask = dstarg->writemask; - char src0[64]; - int i; - - make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - - for (i = 0; i < y; i++) - { - char dst[64]; - char row[64]; - make_ARB1_srcarg_string(ctx, i + 1, row, sizeof (row)); - set_dstarg_writemask(dstarg, 1 << i); - make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - output_line(ctx, "DP%d%s, %s, %s;", x, dst, src0, row); - } // for - - set_dstarg_writemask(dstarg, origmask); - emit_ARB1_dest_modifiers(ctx); -} // arb1_MxXy - -static void emit_ARB1_M4X4(Context *ctx) { arb1_MxXy(ctx, 4, 4); } -static void emit_ARB1_M4X3(Context *ctx) { arb1_MxXy(ctx, 4, 3); } -static void emit_ARB1_M3X4(Context *ctx) { arb1_MxXy(ctx, 3, 4); } -static void emit_ARB1_M3X3(Context *ctx) { arb1_MxXy(ctx, 3, 3); } -static void emit_ARB1_M3X2(Context *ctx) { arb1_MxXy(ctx, 3, 2); } - -static void emit_ARB1_CALL(Context *ctx) -{ - if (!support_nv2(ctx)) // no branching in stock ARB1. - { - failf(ctx, "branching unsupported in %s profile", ctx->profile->name); - return; - } // if - - char labelstr[64]; - get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); - output_line(ctx, "CAL %s;", labelstr); -} // emit_ARB1_CALL - -static void emit_ARB1_CALLNZ(Context *ctx) -{ - // !!! FIXME: if src1 is a constbool that's true, we can remove the - // !!! FIXME: if. If it's false, we can make this a no-op. - - if (!support_nv2(ctx)) // no branching in stock ARB1. - failf(ctx, "branching unsupported in %s profile", ctx->profile->name); - else - { - // !!! FIXME: double-check this. - char labelstr[64]; - char scratch[64]; - char src1[64]; - get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); - get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); - allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); - output_line(ctx, "MOVC %s, %s;", scratch, src1); - output_line(ctx, "CAL %s (NE.x);", labelstr); - } // else -} // emit_ARB1_CALLNZ - -// !!! FIXME: needs BRA in nv2, LOOP in nv2 fragment progs, and REP in nv4. -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP) - -static void emit_ARB1_RET(Context *ctx) -{ - // don't fail() if no nv2...maybe we're just ending the mainline? - // if we're ending a LABEL that had no CALL, this would all be written - // to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will - // just end up throwing all this code out. - if (support_nv2(ctx)) // no branching in stock ARB1. - output_line(ctx, "RET;"); - set_output(ctx, &ctx->mainline); // in case we were ignoring this function. -} // emit_ARB1_RET - - -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP) - -static void emit_ARB1_LABEL(Context *ctx) -{ - if (!support_nv2(ctx)) // no branching in stock ARB1. - return; // don't fail()...maybe we never use it, but do fail in CALL. - - const int label = ctx->source_args[0].regnum; - RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label); - - // MSDN specs say CALL* has to come before the LABEL, so we know if we - // can ditch the entire function here as unused. - if (reg == NULL) - set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output. - - // !!! FIXME: it would be nice if we could determine if a function is - // !!! FIXME: only called once and, if so, forcibly inline it. - - //const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : ""; - char labelstr[64]; - get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); - output_line(ctx, "%s:", labelstr); -} // emit_ARB1_LABEL - - -static void emit_ARB1_POW(Context *ctx) -{ - // we can optimize this to use nv2's |abs| construct in some cases. - if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || - (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) - ctx->source_args[0].src_mod = SRCMOD_ABS; - - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); - - if (ctx->source_args[0].src_mod == SRCMOD_ABS) - output_line(ctx, "POW%s, %s, %s;", dst, src0, src1); - else - { - char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); - output_line(ctx, "ABS %s, %s;", buf, src0); - output_line(ctx, "POW%s, %s.x, %s;", dst, buf, src1); - } // else - - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_POW - -static void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); } - -static void emit_ARB1_SGN(Context *ctx) -{ - if (support_nv2(ctx)) - emit_ARB1_opcode_ds(ctx, "SSG"); - else - { - char dst[64]; - char src0[64]; - char scratch1[64]; - char scratch2[64]; - make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - allocate_ARB1_scratch_reg_name(ctx, scratch1, sizeof (scratch1)); - allocate_ARB1_scratch_reg_name(ctx, scratch2, sizeof (scratch2)); - output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0); - output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0); - output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2); - emit_ARB1_dest_modifiers(ctx); - } // else -} // emit_ARB1_SGN - -EMIT_ARB1_OPCODE_DS_FUNC(ABS) - -static void emit_ARB1_NRM(Context *ctx) -{ - // nv2 fragment programs (and anything nv4) have a real NRM. - if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) - emit_ARB1_opcode_ds(ctx, "NRM"); - else - { - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); - output_line(ctx, "DP3 %s.w, %s, %s;", buf, src0, src0); - output_line(ctx, "RSQ %s.w, %s.w;", buf, buf); - output_line(ctx, "MUL%s, %s.w, %s;", dst, buf, src0); - emit_ARB1_dest_modifiers(ctx); - } // else -} // emit_ARB1_NRM - - -static void emit_ARB1_SINCOS(Context *ctx) -{ - // we don't care about the temp registers that <= sm2 demands; ignore them. - const int mask = ctx->dest_arg.writemask; - - // arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes. - if ((shader_is_pixel(ctx)) || (support_nv4(ctx))) - { - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - if (writemask_x(mask)) - output_line(ctx, "COS%s, %s;", dst, src0); - else if (writemask_y(mask)) - output_line(ctx, "SIN%s, %s;", dst, src0); - else if (writemask_xy(mask)) - output_line(ctx, "SCS%s, %s;", dst, src0); - } // if - - // nv2+ profiles have sin and cos opcodes. - else if (support_nv2(ctx)) - { - char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - if (writemask_x(mask)) - output_line(ctx, "COS %s.x, %s;", dst, src0); - else if (writemask_y(mask)) - output_line(ctx, "SIN %s.y, %s;", dst, src0); - else if (writemask_xy(mask)) - { - output_line(ctx, "SIN %s.x, %s;", dst, src0); - output_line(ctx, "COS %s.y, %s;", dst, src0); - } // else if - } // if - - else // big nasty. - { - char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); - const int need_sin = (writemask_x(mask) || writemask_xy(mask)); - const int need_cos = (writemask_y(mask) || writemask_xy(mask)); - char scratch[64]; - - if (need_sin || need_cos) - allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); - - // These sin() and cos() approximations originally found here: - // http://www.devmaster.net/forums/showthread.php?t=5784 - // - // const float B = 4.0f / M_PI; - // const float C = -4.0f / (M_PI * M_PI); - // float y = B * x + C * x * fabs(x); - // - // // optional better precision... - // const float P = 0.225f; - // y = P * (y * fabs(y) - y) + y; - // - // - // That first thing can be reduced to: - // const float y = ((1.2732395447351626861510701069801f * x) + - // ((-0.40528473456935108577551785283891f * x) * fabs(x))); - - if (need_sin) - { - // !!! FIXME: use SRCMOD_ABS here? - output_line(ctx, "ABS %s.x, %s.x;", dst, src0); - output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst); - output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0); - output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch); - } // if - - // cosine is sin(x + M_PI/2), but you have to wrap x to pi: - // if (x+(M_PI/2) > M_PI) - // x -= 2 * M_PI; - // - // which is... - // if (x+(1.57079637050628662109375) > 3.1415927410125732421875) - // x += -6.283185482025146484375; - - if (need_cos) - { - output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0); - output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch); - output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch); - output_line(ctx, "ABS %s.x, %s.x;", dst, src0); - output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst); - output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0); - output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch); - } // if - } // else - - // !!! FIXME: might not have done anything. Don't emit if we didn't. - if (!isfail(ctx)) - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_SINCOS - - -static void emit_ARB1_REP(Context *ctx) -{ - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - - // nv2 fragment programs (and everything nv4) have a real REP. - if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) - output_line(ctx, "REP %s;", src0); - - else if (support_nv2(ctx)) - { - // no REP, but we can use branches. - char failbranch[32]; - char topbranch[32]; - const int toplabel = allocate_branch_label(ctx); - const int faillabel = allocate_branch_label(ctx); - get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); - get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch)); - - assert(((size_t) ctx->branch_labels_stack_index) < - STATICARRAYLEN(ctx->branch_labels_stack)-1); - - ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel; - ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel; - - char scratch[32]; - snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps); - output_line(ctx, "MOVC %s.x, %s;", scratch, src0); - output_line(ctx, "BRA %s (LE.x);", failbranch); - output_line(ctx, "%s:", topbranch); - } // else if - - else // stock ARB1 has no branching. - { - fail(ctx, "branching unsupported in this profile"); - } // else -} // emit_ARB1_REP - - -static void emit_ARB1_ENDREP(Context *ctx) -{ - // nv2 fragment programs (and everything nv4) have a real ENDREP. - if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) - output_line(ctx, "ENDREP;"); - - else if (support_nv2(ctx)) - { - // no ENDREP, but we can use branches. - assert(ctx->branch_labels_stack_index >= 2); - - char failbranch[32]; - char topbranch[32]; - const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; - const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; - get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); - get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch)); - - char scratch[32]; - snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps); - output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch); - output_line(ctx, "BRA %s (GT.x);", topbranch); - output_line(ctx, "%s:", failbranch); - } // else if - - else // stock ARB1 has no branching. - { - fail(ctx, "branching unsupported in this profile"); - } // else -} // emit_ARB1_ENDREP - - -static void nv2_if(Context *ctx) -{ - // The condition code register MUST be set up before this! - // nv2 fragment programs (and everything nv4) have a real IF. - if ( (support_nv4(ctx)) || (shader_is_pixel(ctx)) ) - output_line(ctx, "IF EQ.x;"); - else - { - // there's no IF construct, but we can use a branch to a label. - char failbranch[32]; - const int label = allocate_branch_label(ctx); - get_ARB1_branch_label_name(ctx, label, failbranch, sizeof (failbranch)); - - assert(((size_t) ctx->branch_labels_stack_index) - < STATICARRAYLEN(ctx->branch_labels_stack)); - - ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label; - - // !!! FIXME: should this be NE? (EQ would jump to the ELSE for the IF condition, right?). - output_line(ctx, "BRA %s (EQ.x);", failbranch); - } // else -} // nv2_if - - -static void emit_ARB1_IF(Context *ctx) -{ - if (support_nv2(ctx)) - { - char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); - char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); - output_line(ctx, "MOVC %s.x, %s;", buf, src0); - nv2_if(ctx); - } // if - - else // stock ARB1 has no branching. - { - failf(ctx, "branching unsupported in %s profile", ctx->profile->name); - } // else -} // emit_ARB1_IF - - -static void emit_ARB1_ELSE(Context *ctx) -{ - // nv2 fragment programs (and everything nv4) have a real ELSE. - if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) - output_line(ctx, "ELSE;"); - - else if (support_nv2(ctx)) - { - // there's no ELSE construct, but we can use a branch to a label. - assert(ctx->branch_labels_stack_index > 0); - - // At the end of the IF block, unconditionally jump to the ENDIF. - const int endlabel = allocate_branch_label(ctx); - char endbranch[32]; - get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch)); - output_line(ctx, "BRA %s;", endbranch); - - // Now mark the ELSE section with a lable. - const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1]; - char elsebranch[32]; - get_ARB1_branch_label_name(ctx,elselabel,elsebranch,sizeof(elsebranch)); - output_line(ctx, "%s:", elsebranch); - - // Replace the ELSE label with the ENDIF on the label stack. - ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel; - } // else if - - else // stock ARB1 has no branching. - { - failf(ctx, "branching unsupported in %s profile", ctx->profile->name); - } // else -} // emit_ARB1_ELSE - - -static void emit_ARB1_ENDIF(Context *ctx) -{ - // nv2 fragment programs (and everything nv4) have a real ENDIF. - if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) - output_line(ctx, "ENDIF;"); - - else if (support_nv2(ctx)) - { - // there's no ENDIF construct, but we can use a branch to a label. - assert(ctx->branch_labels_stack_index > 0); - const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; - char endbranch[32]; - get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch)); - output_line(ctx, "%s:", endbranch); - } // if - - else // stock ARB1 has no branching. - { - failf(ctx, "branching unsupported in %s profile", ctx->profile->name); - } // else -} // emit_ARB1_ENDIF - - -static void emit_ARB1_BREAK(Context *ctx) -{ - // nv2 fragment programs (and everything nv4) have a real BREAK. - if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) - output_line(ctx, "BRK;"); - - else if (support_nv2(ctx)) - { - // no BREAK, but we can use branches. - assert(ctx->branch_labels_stack_index >= 2); - const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index]; - char failbranch[32]; - get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); - output_line(ctx, "BRA %s;", failbranch); - } // else if - - else // stock ARB1 has no branching. - { - failf(ctx, "branching unsupported in %s profile", ctx->profile->name); - } // else -} // emit_ARB1_BREAK - - -static void emit_ARB1_MOVA(Context *ctx) -{ - // nv2 and nv3 can use the ARR opcode. - // But nv4 removed ARR (and ADDRESS registers!). Just ROUND to an INT. - if (support_nv4(ctx)) - emit_ARB1_opcode_ds(ctx, "ROUND.S"); // !!! FIXME: don't use a modifier here. - else if ((support_nv2(ctx)) || (support_nv3(ctx))) - emit_ARB1_opcode_ds(ctx, "ARR"); - else - { - char src0[64]; - char scratch[64]; - char addr[32]; - - make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); - snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum); - - // !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE. - - // ARL uses floor(), but D3D expects round-to-nearest. - // There is probably a more efficient way to do this. - if (shader_is_pixel(ctx)) // CMP only exists in fragment programs. :/ - output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0); - else - { - output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0); - output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch); - } // else - - output_line(ctx, "ABS %s, %s;", addr, src0); - output_line(ctx, "ADD %s, %s, 0.5;", addr, addr); - output_line(ctx, "FLR %s, %s;", addr, addr); - output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch); - - // we don't handle these right now, since emit_ARB1_dest_modifiers(ctx) - // wants to look at dest_arg, not our temp register. - assert(ctx->dest_arg.result_mod == 0); - assert(ctx->dest_arg.result_shift == 0); - - // we assign to the actual address register as needed. - ctx->last_address_reg_component = -1; - } // else -} // emit_ARB1_MOVA - - -static void emit_ARB1_TEXKILL(Context *ctx) -{ - // d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle. - // We just map the x component to w. If it's negative, the fragment - // would discard anyhow, otherwise, it'll pass through okay. This saves - // us a temp register. - char dst[64]; - get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - output_line(ctx, "KIL %s.xyzx;", dst); -} // emit_ARB1_TEXKILL - -static void arb1_texbem(Context *ctx, const int luminance) -{ - // !!! FIXME: this code counts on the register not having swizzles, etc. - const int stage = ctx->dest_arg.regnum; - char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src)); - char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); - char sampler[64]; - get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, - sampler, sizeof (sampler)); - - output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src); - output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp); - output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst); - output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage); - - if (luminance) // TEXBEML, not just TEXBEM? - { - output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;", - tmp, src, sampler, sampler); - output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp); - } // if - - emit_ARB1_dest_modifiers(ctx); -} // arb1_texbem - -static void emit_ARB1_TEXBEM(Context *ctx) -{ - arb1_texbem(ctx, 0); -} // emit_ARB1_TEXBEM - -static void emit_ARB1_TEXBEML(Context *ctx) -{ - arb1_texbem(ctx, 1); -} // emit_ARB1_TEXBEML - -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) - - -static void emit_ARB1_TEXM3X2PAD(Context *ctx) -{ - // no-op ... work happens in emit_ARB1_TEXM3X2TEX(). -} // emit_ARB1_TEXM3X2PAD - -static void emit_ARB1_TEXM3X2TEX(Context *ctx) -{ - if (ctx->texm3x2pad_src0 == -1) - return; - - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - const int stage = ctx->dest_arg.regnum; - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0, - src0, sizeof (src0)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0, - src1, sizeof (src1)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src2, sizeof (src2)); - get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - - output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, dst); - output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); - output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, dst, stage); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_TEXM3X2TEX - - -static void emit_ARB1_TEXM3X3PAD(Context *ctx) -{ - // no-op ... work happens in emit_ARB1_TEXM3X3*(). -} // emit_ARB1_TEXM3X3PAD - - -static void emit_ARB1_TEXM3X3TEX(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - const int stage = ctx->dest_arg.regnum; - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; - - output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); - output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); - output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); - output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, dst, stage, ttypestr); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_TEXM3X3TEX - -static void emit_ARB1_TEXM3X3SPEC(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char src5[64]; - char tmp[64]; - char tmp2[64]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - const int stage = ctx->dest_arg.regnum; - allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); - allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum, - src5, sizeof (src5)); - get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; - - output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); - output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); - output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); - output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal - output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, src5); // normal * eyeray - - // !!! FIXME: This is goofy. There's got to be a way to do vector-wide - // !!! FIXME: divides or reciprocals...right? - output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2); - output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2); - output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2); - output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2); - output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2); - - output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp); - output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, src5); - output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_TEXM3X3SPEC - -static void emit_ARB1_TEXM3X3VSPEC(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - char tmp[64]; - char tmp2[64]; - char tmp3[64]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - const int stage = ctx->dest_arg.regnum; - allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); - allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2)); - allocate_ARB1_scratch_reg_name(ctx, tmp3, sizeof (tmp3)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); - const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); - const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; - - output_line(ctx, "MOV %s.x, %s.w;", tmp3, src0); - output_line(ctx, "MOV %s.y, %s.w;", tmp3, src2); - output_line(ctx, "MOV %s.z, %s.w;", tmp3, dst); - output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); - output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); - output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); - output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal - output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, tmp3); // normal * eyeray - - // !!! FIXME: This is goofy. There's got to be a way to do vector-wide - // !!! FIXME: divides or reciprocals...right? - output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2); - output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2); - output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2); - output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2); - output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2); - - output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp); - output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, tmp3); - output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_TEXM3X3VSPEC - -static void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); } -static void emit_ARB1_LOGP(Context *ctx) { arb1_log(ctx, "LG2"); } - -static void emit_ARB1_CND(Context *ctx) -{ - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); - char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); - - // CND compares against 0.5, but we need to compare against 0.0... - // ...subtract to make up the difference. - output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", tmp, src0); - // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just - // switch src1 and src2 to get the same results. - output_line(ctx, "CMP%s, %s, %s, %s;", dst, tmp, src2, src1); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_CND - -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) - -static void emit_ARB1_TEXM3X3(Context *ctx) -{ - if (ctx->texm3x3pad_src1 == -1) - return; - - char dst[64]; - char src0[64]; - char src1[64]; - char src2[64]; - char src3[64]; - char src4[64]; - - // !!! FIXME: this code counts on the register not having swizzles, etc. - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, - src0, sizeof (src0)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, - src1, sizeof (src1)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, - src2, sizeof (src2)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, - src3, sizeof (src3)); - get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, - src4, sizeof (src4)); - get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - - output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); - output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); - output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); - output_line(ctx, "MOV %s.w, { 1.0, 1.0, 1.0, 1.0 };", dst); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_TEXM3X3 - -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) - -static void emit_ARB1_CMP(Context *ctx) -{ - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); - // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just - // switch src1 and src2 to get the same results. - output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1); - emit_ARB1_dest_modifiers(ctx); -} // emit_ARB1_CMP - -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM) - - -static void emit_ARB1_DP2ADD(Context *ctx) -{ - if (support_nv4(ctx)) // nv4 has a built-in equivalent to DP2ADD. - emit_ARB1_opcode_dsss(ctx, "DP2A"); - else - { - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); - char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); - char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); - char scratch[64]; - - // DP2ADD is: - // dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz - allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); - output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1); - output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch); - output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2); - emit_ARB1_dest_modifiers(ctx); - } // else -} // emit_ARB1_DP2ADD - - -static void emit_ARB1_DSX(Context *ctx) -{ - if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSX. - emit_ARB1_opcode_ds(ctx, "DDX"); - else - failf(ctx, "DSX unsupported in %s profile", ctx->profile->name); -} // emit_ARB1_DSX - - -static void emit_ARB1_DSY(Context *ctx) -{ - if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSY. - emit_ARB1_opcode_ds(ctx, "DDY"); - else - failf(ctx, "DSY unsupported in %s profile", ctx->profile->name); -} // emit_ARB1_DSY - -static void arb1_texld(Context *ctx, const char *opcode, const int texldd) -{ - // !!! FIXME: Hack: "TEXH" is invalid in nv4. Fix this more cleanly. - if ((ctx->dest_arg.result_mod & MOD_PP) && (support_nv4(ctx))) - ctx->dest_arg.result_mod &= ~MOD_PP; - - char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); - - const int sm1 = !shader_version_atleast(ctx, 1, 4); - const int regnum = sm1 ? ctx->dest_arg.regnum : ctx->source_args[1].regnum; - RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, regnum); - - const char *ttype = NULL; - char src0[64]; - if (sm1) - get_ARB1_destarg_varname(ctx, src0, sizeof (src0)); - else - get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); - //char src1[64]; get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? - - char src2[64] = { 0 }; - char src3[64] = { 0 }; - - if (texldd) - { - make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); - make_ARB1_srcarg_string(ctx, 3, src3, sizeof (src3)); - } // if - - // !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters. - if (sreg == NULL) - { - fail(ctx, "TEXLD using undeclared sampler"); - return; - } // if - - // SM1 only specifies dst, so don't check swizzle there. - if ( !sm1 && (!no_swizzle(ctx->source_args[1].swizzle)) ) - { - // !!! FIXME: does this ever actually happen? - fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment"); - } // if - - switch ((const TextureType) sreg->index) - { - case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"? - case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break; - case TEXTURE_TYPE_VOLUME: ttype = "3D"; break; - default: fail(ctx, "unknown texture type"); return; - } // switch - - if (texldd) - { - output_line(ctx, "%s%s, %s, %s, %s, texture[%d], %s;", opcode, dst, - src0, src2, src3, regnum, ttype); - } // if - else - { - output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0, - regnum, ttype); - } // else -} // arb1_texld - - -static void emit_ARB1_TEXLDD(Context *ctx) -{ - // With GL_NV_fragment_program2, we can use the TXD opcode. - // In stock arb1, we can settle for a standard texld, which isn't - // perfect, but oh well. - if (support_nv2(ctx)) - arb1_texld(ctx, "TXD", 1); - else - arb1_texld(ctx, "TEX", 0); -} // emit_ARB1_TEXLDD - - -static void emit_ARB1_TEXLDL(Context *ctx) -{ - if ((shader_is_vertex(ctx)) && (!support_nv3(ctx))) - { - failf(ctx, "Vertex shader TEXLDL unsupported in %s profile", - ctx->profile->name); - return; - } // if - - else if ((shader_is_pixel(ctx)) && (!support_nv2(ctx))) - { - failf(ctx, "Pixel shader TEXLDL unsupported in %s profile", - ctx->profile->name); - return; - } // if - - // !!! FIXME: this doesn't map exactly to TEXLDL. Review this. - arb1_texld(ctx, "TXL", 0); -} // emit_ARB1_TEXLDL - - -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP) -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC) - -static void emit_ARB1_IFC(Context *ctx) -{ - if (support_nv2(ctx)) - { - static const char *comps[] = { - "", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC" - }; - - if (ctx->instruction_controls >= STATICARRAYLEN(comps)) - { - fail(ctx, "unknown comparison control"); - return; - } // if - - char src0[64]; - char src1[64]; - char scratch[64]; - - const char *comp = comps[ctx->instruction_controls]; - get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); - get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); - allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); - output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1); - nv2_if(ctx); - } // if - - else // stock ARB1 has no branching. - { - failf(ctx, "branching unsupported in %s profile", ctx->profile->name); - } // else -} // emit_ARB1_IFC - - -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP) - -static void emit_ARB1_DEF(Context *ctx) -{ - const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? - char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1); - char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1); - char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1); - char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1); - - push_output(ctx, &ctx->globals); - output_line(ctx, "PARAM %s = { %s, %s, %s, %s };", - dst, val0, val1, val2, val3); - pop_output(ctx); -} // emit_ARB1_DEF - -static void emit_ARB1_DEFI(Context *ctx) -{ - char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - const int32 *x = (const int32 *) ctx->dwords; - push_output(ctx, &ctx->globals); - output_line(ctx, "PARAM %s = { %d, %d, %d, %d };", - dst, (int) x[0], (int) x[1], (int) x[2], (int) x[3]); - pop_output(ctx); -} // emit_ARB1_DEFI - -static void emit_ARB1_DEFB(Context *ctx) -{ - char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); - push_output(ctx, &ctx->globals); - output_line(ctx, "PARAM %s = %d;", dst, ctx->dwords[0] ? 1 : 0); - pop_output(ctx); -} // emit_ARB1_DEFB - -static void emit_ARB1_DCL(Context *ctx) -{ - // no-op. We do this in our emit_attribute() and emit_uniform(). -} // emit_ARB1_DCL - -EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) - -static void emit_ARB1_TEXLD(Context *ctx) -{ - if (!shader_version_atleast(ctx, 1, 4)) - { - arb1_texld(ctx, "TEX", 0); - return; - } // if - - else if (!shader_version_atleast(ctx, 2, 0)) - { - // ps_1_4 is different, too! - fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME - return; - } // if - - // !!! FIXME: do texldb and texldp map between OpenGL and D3D correctly? - if (ctx->instruction_controls == CONTROL_TEXLD) - arb1_texld(ctx, "TEX", 0); - else if (ctx->instruction_controls == CONTROL_TEXLDP) - arb1_texld(ctx, "TXP", 0); - else if (ctx->instruction_controls == CONTROL_TEXLDB) - arb1_texld(ctx, "TXB", 0); -} // emit_ARB1_TEXLD - -#endif // SUPPORT_PROFILE_ARB1 +#if !SUPPORT_PROFILE_METAL +#define PROFILE_EMITTER_METAL(op) +#else +#undef AT_LEAST_ONE_PROFILE +#define AT_LEAST_ONE_PROFILE 1 +#define PROFILE_EMITTER_METAL(op) emit_METAL_##op, +PREDECLARE_PROFILE(METAL) +#endif +#if !SUPPORT_PROFILE_ARB1 +#define PROFILE_EMITTER_ARB1(op) +#else +#undef AT_LEAST_ONE_PROFILE +#define AT_LEAST_ONE_PROFILE 1 +#define PROFILE_EMITTER_ARB1(op) emit_ARB1_##op, +PREDECLARE_PROFILE(ARB1) +#endif #if !AT_LEAST_ONE_PROFILE #error No profiles are supported. Fix your build. @@ -8611,7 +314,6 @@ static const struct { const char *from; const char *to; } profileMap[] = { MOJOSHADER_PROFILE_NV4, MOJOSHADER_PROFILE_ARB1 }, }; - // The PROFILE_EMITTER_* items MUST be in the same order as profiles[]! #define PROFILE_EMITTERS(op) { \ PROFILE_EMITTER_D3D(op) \ diff --git a/profiles/mojoshader_profile.h b/profiles/mojoshader_profile.h new file mode 100644 index 00000000..1fada766 --- /dev/null +++ b/profiles/mojoshader_profile.h @@ -0,0 +1,365 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#ifndef MOJOSHADER_PROFILE_H +#define MOJOSHADER_PROFILE_H + +#include "../mojoshader_internal.h" + +typedef struct ConstantsList +{ + MOJOSHADER_constant constant; + struct ConstantsList *next; +} ConstantsList; + +typedef struct VariableList +{ + MOJOSHADER_uniformType type; + int index; + int count; + ConstantsList *constant; + int used; + int emit_position; // used in some profiles. + struct VariableList *next; +} VariableList; + +typedef struct RegisterList +{ + RegisterType regtype; + int regnum; + MOJOSHADER_usage usage; + unsigned int index; + int writemask; + int misc; + int written; + const VariableList *array; + struct RegisterList *next; +} RegisterList; + +typedef struct +{ + const uint32 *token; // this is the unmolested token in the stream. + int regnum; + int swizzle; // xyzw (all four, not split out). + int swizzle_x; + int swizzle_y; + int swizzle_z; + int swizzle_w; + SourceMod src_mod; + RegisterType regtype; + int relative; + RegisterType relative_regtype; + int relative_regnum; + int relative_component; + const VariableList *relative_array; +} SourceArgInfo; + +struct Profile; // predeclare. + +typedef struct CtabData +{ + int have_ctab; + int symbol_count; + MOJOSHADER_symbol *symbols; +} CtabData; + +// Context...this is state that changes as we parse through a shader... +typedef struct Context +{ + int isfail; + int out_of_memory; + MOJOSHADER_malloc malloc; + MOJOSHADER_free free; + void *malloc_data; + int current_position; + const uint32 *orig_tokens; + const uint32 *tokens; + uint32 tokencount; + int know_shader_size; + const MOJOSHADER_swizzle *swizzles; + unsigned int swizzles_count; + const MOJOSHADER_samplerMap *samplermap; + unsigned int samplermap_count; + Buffer *output; + Buffer *preflight; + Buffer *globals; + Buffer *inputs; + Buffer *outputs; + Buffer *helpers; + Buffer *subroutines; + Buffer *mainline_intro; + Buffer *mainline_arguments; + Buffer *mainline_top; + Buffer *mainline; + Buffer *postflight; + Buffer *ignore; + Buffer *output_stack[3]; + int indent_stack[3]; + int output_stack_len; + int indent; + const char *shader_type_str; + const char *endline; + const char *mainfn; + int endline_len; + int profileid; + const struct Profile *profile; + MOJOSHADER_shaderType shader_type; + uint8 major_ver; + uint8 minor_ver; + DestArgInfo dest_arg; + SourceArgInfo source_args[5]; + SourceArgInfo predicate_arg; // for predicated instructions. + uint32 dwords[4]; + uint32 version_token; + int instruction_count; + uint32 instruction_controls; + uint32 previous_opcode; + int coissue; + int loops; + int reps; + int max_reps; + int cmps; + int scratch_registers; + int max_scratch_registers; + int branch_labels_stack_index; + int branch_labels_stack[32]; + int assigned_branch_labels; + int assigned_vertex_attributes; + int last_address_reg_component; + RegisterList used_registers; + RegisterList defined_registers; + ErrorList *errors; + int constant_count; + ConstantsList *constants; + int uniform_count; + int uniform_float4_count; + int uniform_int4_count; + int uniform_bool_count; + RegisterList uniforms; + int attribute_count; + RegisterList attributes; + int sampler_count; + RegisterList samplers; + VariableList *variables; // variables to register mapping. + int centroid_allowed; + CtabData ctab; + int have_relative_input_registers; + int have_multi_color_outputs; + int determined_constants_arrays; + int predicated; + int uses_pointsize; + int uses_fog; + + // !!! FIXME: move these into SUPPORT_PROFILE sections. + int glsl_generated_lit_helper; + int glsl_generated_texldd_setup; + int glsl_generated_texm3x3spec_helper; + int arb1_wrote_position; + // !!! FIXME: move these into SUPPORT_PROFILE sections. + + int have_preshader; + int ignores_ctab; + int reset_texmpad; + int texm3x2pad_dst0; + int texm3x2pad_src0; + int texm3x3pad_dst0; + int texm3x3pad_src0; + int texm3x3pad_dst1; + int texm3x3pad_src1; + MOJOSHADER_preshader *preshader; + +#if SUPPORT_PROFILE_ARB1_NV + int profile_supports_nv2; + int profile_supports_nv3; + int profile_supports_nv4; +#endif +#if SUPPORT_PROFILE_GLSL120 + int profile_supports_glsl120; +#endif +#if SUPPORT_PROFILE_GLSLES + int profile_supports_glsles; +#endif + +#if SUPPORT_PROFILE_METAL + int metal_need_header_common; + int metal_need_header_math; + int metal_need_header_relational; + int metal_need_header_geometric; + int metal_need_header_graphics; + int metal_need_header_texture; +#endif +} Context; + +// Use these macros so we can remove all bits of these profiles from the build. +#if SUPPORT_PROFILE_ARB1_NV +#define support_nv2(ctx) ((ctx)->profile_supports_nv2) +#define support_nv3(ctx) ((ctx)->profile_supports_nv3) +#define support_nv4(ctx) ((ctx)->profile_supports_nv4) +#else +#define support_nv2(ctx) (0) +#define support_nv3(ctx) (0) +#define support_nv4(ctx) (0) +#endif + +#if SUPPORT_PROFILE_GLSL120 +#define support_glsl120(ctx) ((ctx)->profile_supports_glsl120) +#else +#define support_glsl120(ctx) (0) +#endif + +#if SUPPORT_PROFILE_GLSLES +#define support_glsles(ctx) ((ctx)->profile_supports_glsles) +#else +#define support_glsles(ctx) (0) +#endif + +// Profile entry points... + +// one emit function for each opcode in each profile. +typedef void (*emit_function)(Context *ctx); + +// one emit function for starting output in each profile. +typedef void (*emit_start)(Context *ctx, const char *profilestr); + +// one emit function for ending output in each profile. +typedef void (*emit_end)(Context *ctx); + +// one emit function for phase opcode output in each profile. +typedef void (*emit_phase)(Context *ctx); + +// one emit function for finalizing output in each profile. +typedef void (*emit_finalize)(Context *ctx); + +// one emit function for global definitions in each profile. +typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum); + +// one emit function for relative uniform arrays in each profile. +typedef void (*emit_array)(Context *ctx, VariableList *var); + +// one emit function for relative constants arrays in each profile. +typedef void (*emit_const_array)(Context *ctx, + const struct ConstantsList *constslist, + int base, int size); + +// one emit function for uniforms in each profile. +typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum, + const VariableList *var); + +// one emit function for samplers in each profile. +typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype, + int texbem); + +// one emit function for attributes in each profile. +typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum, + MOJOSHADER_usage usage, int index, int wmask, + int flags); + +// one args function for each possible sequence of opcode arguments. +typedef int (*args_function)(Context *ctx); + +// one state function for each opcode where we have state machine updates. +typedef void (*state_function)(Context *ctx); + +// one function for varnames in each profile. +typedef const char *(*varname_function)(Context *c, RegisterType t, int num); + +// one function for const var array in each profile. +typedef const char *(*const_array_varname_function)(Context *c, int base, int size); + +typedef struct Profile +{ + const char *name; + emit_start start_emitter; + emit_end end_emitter; + emit_phase phase_emitter; + emit_global global_emitter; + emit_array array_emitter; + emit_const_array const_array_emitter; + emit_uniform uniform_emitter; + emit_sampler sampler_emitter; + emit_attribute attribute_emitter; + emit_finalize finalize_emitter; + varname_function get_varname; + const_array_varname_function get_const_array_varname; +} Profile; + +// Common utilities... + +void out_of_memory(Context *ctx); +void *Malloc(Context *ctx, const size_t len); +char *StrDup(Context *ctx, const char *str); +void Free(Context *ctx, void *ptr); +void * MOJOSHADERCALL MallocBridge(int bytes, void *data); +void MOJOSHADERCALL FreeBridge(void *ptr, void *data); + +int set_output(Context *ctx, Buffer **section); +void push_output(Context *ctx, Buffer **section); +void pop_output(Context *ctx); + +uint32 ver_ui32(const uint8 major, const uint8 minor); +int shader_version_supported(const uint8 maj, const uint8 min); +int shader_version_atleast(const Context *ctx, const uint8 maj, + const uint8 min); +int shader_version_exactly(const Context *ctx, const uint8 maj, + const uint8 min); +int shader_is_pixel(const Context *ctx); +int shader_is_vertex(const Context *ctx); + +int isfail(const Context *ctx); +void failf(Context *ctx, const char *fmt, ...); +void fail(Context *ctx, const char *reason); + +void output_line(Context *ctx, const char *fmt, ...); +void output_blank_line(Context *ctx); + +void floatstr(Context *ctx, char *buf, size_t bufsize, float f, + int leavedecimal); + +RegisterList *reglist_insert(Context *ctx, RegisterList *prev, + const RegisterType regtype, + const int regnum); +RegisterList *reglist_find(const RegisterList *prev, + const RegisterType rtype, + const int regnum); +RegisterList *set_used_register(Context *ctx, + const RegisterType regtype, + const int regnum, + const int written); +void set_defined_register(Context *ctx, const RegisterType rtype, + const int regnum); + +int writemask_xyzw(const int writemask); +int writemask_xyz(const int writemask); +int writemask_xy(const int writemask); +int writemask_x(const int writemask); +int writemask_y(const int writemask); +int replicate_swizzle(const int swizzle); +int no_swizzle(const int swizzle); +int vecsize_from_writemask(const int m); +void set_dstarg_writemask(DestArgInfo *dst, const int mask); + +int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type, + const RegisterType rtype, const int rnum); + +static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' }; + +const char *get_D3D_register_string(Context *ctx, + RegisterType regtype, + int regnum, char *regnum_str, + size_t regnum_size); + +// !!! FIXME: These should stay in the mojoshader_profile_d3d file +// !!! FIXME: but ARB1 relies on them, so we have to move them here. +// !!! FIXME: If/when we kill off ARB1, we can move these back. +const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt, + int regnum, char *buf, + const size_t len); +const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum); + +#endif \ No newline at end of file diff --git a/profiles/mojoshader_profile_arb1.c b/profiles/mojoshader_profile_arb1.c new file mode 100644 index 00000000..2e5bf900 --- /dev/null +++ b/profiles/mojoshader_profile_arb1.c @@ -0,0 +1,2252 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#pragma GCC visibility push(hidden) + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_profile.h" + +#if SUPPORT_PROFILE_ARB1 + +static inline const char *get_ARB1_register_string(Context *ctx, + const RegisterType regtype, const int regnum, + char *regnum_str, const size_t regnum_size) +{ + // turns out these are identical at the moment. + return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size); +} // get_ARB1_register_string + +int allocate_scratch_register(Context *ctx) +{ + const int retval = ctx->scratch_registers++; + if (retval >= ctx->max_scratch_registers) + ctx->max_scratch_registers = retval + 1; + return retval; +} // allocate_scratch_register + +int allocate_branch_label(Context *ctx) +{ + return ctx->assigned_branch_labels++; +} // allocate_branch_label + +const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf, + const size_t buflen) +{ + const int scratch = allocate_scratch_register(ctx); + snprintf(buf, buflen, "scratch%d", scratch); + return buf; +} // allocate_ARB1_scratch_reg_name + +static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id, + char *buf, const size_t buflen) +{ + snprintf(buf, buflen, "branch_label%d", id); + return buf; +} // get_ARB1_branch_label_name + +const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt, + const int regnum, char *buf, + const size_t buflen) +{ + // turns out these are identical at the moment. + return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen); +} // get_ARB1_varname_in_buf + +const char *get_ARB1_varname(Context *ctx, const RegisterType rt, + const int regnum) +{ + // turns out these are identical at the moment. + return get_D3D_varname(ctx, rt, regnum); +} // get_ARB1_varname + + +static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx, + const int base, const int size, + char *buf, const size_t buflen) +{ + snprintf(buf, buflen, "c_array_%d_%d", base, size); + return buf; +} // get_ARB1_const_array_varname_in_buf + + +const char *get_ARB1_const_array_varname(Context *ctx, int base, int size) +{ + char buf[64]; + get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf)); + return StrDup(ctx, buf); +} // get_ARB1_const_array_varname + + +const char *make_ARB1_srcarg_string_in_buf(Context *ctx, + const SourceArgInfo *arg, + char *buf, size_t buflen) +{ + // !!! FIXME: this can hit pathological cases where we look like this... + // + // dp3 r1.xyz, t0_bx2, t0_bx2 + // mad r1.xyz, t0_bias, 1-r1, t0_bx2 + // + // ...which do a lot of duplicate work in arb1... + // + // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 }; + // MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 }; + // SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 }; + // MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 }; + // DP3 r1.xyz, scratch0, scratch1; + // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 }; + // SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1; + // SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 }; + // MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 }; + // MAD r1.xyz, scratch0, scratch1, scratch2; + // + // ...notice that the dp3 calculates the same value into two scratch + // registers. This case is easier to handle; just see if multiple + // source args are identical, build it up once, and use the same + // scratch register for multiple arguments in that opcode. + // Even better still, only calculate things once across instructions, + // and be smart about letting it linger in a scratch register until we + // definitely don't need the calculation anymore. That's harder to + // write, though. + + char regnum_str[16] = { '\0' }; + + // !!! FIXME: use get_ARB1_varname_in_buf() instead? + const char *regtype_str = NULL; + if (!arg->relative) + { + regtype_str = get_ARB1_register_string(ctx, arg->regtype, + arg->regnum, regnum_str, + sizeof (regnum_str)); + } // if + + const char *rel_lbracket = ""; + char rel_offset[32] = { '\0' }; + const char *rel_rbracket = ""; + char rel_swizzle[4] = { '\0' }; + const char *rel_regtype_str = ""; + if (arg->relative) + { + rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype, + arg->relative_regnum, + (char *) alloca(64), 64); + + rel_swizzle[0] = '.'; + rel_swizzle[1] = swizzle_channels[arg->relative_component]; + rel_swizzle[2] = '\0'; + + if (!support_nv2(ctx)) + { + // The address register in ARB1 only allows the '.x' component, so + // we need to load the component we need from a temp vector + // register into .x as needed. + assert(arg->relative_regtype == REG_TYPE_ADDRESS); + assert(arg->relative_regnum == 0); + if (ctx->last_address_reg_component != arg->relative_component) + { + output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str, + arg->relative_regnum, + swizzle_channels[arg->relative_component]); + ctx->last_address_reg_component = arg->relative_component; + } // if + + rel_swizzle[1] = 'x'; + } // if + + if (arg->regtype == REG_TYPE_INPUT) + regtype_str = "vertex.attrib"; + else + { + assert(arg->regtype == REG_TYPE_CONST); + const int arrayidx = arg->relative_array->index; + const int arraysize = arg->relative_array->count; + const int offset = arg->regnum - arrayidx; + assert(offset >= 0); + regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx, + arraysize, (char *) alloca(64), 64); + if (offset != 0) + snprintf(rel_offset, sizeof (rel_offset), " + %d", offset); + } // else + + rel_lbracket = "["; + rel_rbracket = "]"; + } // if + + // This is the source register with everything but swizzle and source mods. + snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str, + rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset, + rel_rbracket); + + // Some of the source mods need to generate instructions to a temp + // register, in which case we'll replace the register name. + const SourceMod mod = arg->src_mod; + const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) || + ((mod == SRCMOD_ABS) && support_nv2(ctx)) ); + + if (!inplace) + { + const size_t len = 64; + char *stackbuf = (char *) alloca(len); + regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len); + regnum_str[0] = '\0'; // move value to scratch register. + rel_lbracket = ""; // scratch register won't use array. + rel_rbracket = ""; + rel_offset[0] = '\0'; + rel_swizzle[0] = '\0'; + rel_regtype_str = ""; + } // if + + const char *premod_str = ""; + const char *postmod_str = ""; + switch (mod) + { + case SRCMOD_NEGATE: + premod_str = "-"; + break; + + case SRCMOD_BIASNEGATE: + premod_str = "-"; + // fall through. + case SRCMOD_BIAS: + output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", + regtype_str, buf); + break; + + case SRCMOD_SIGNNEGATE: + premod_str = "-"; + // fall through. + case SRCMOD_SIGN: + output_line(ctx, + "MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };", + regtype_str, buf); + break; + + case SRCMOD_COMPLEMENT: + output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;", + regtype_str, buf); + break; + + case SRCMOD_X2NEGATE: + premod_str = "-"; + // fall through. + case SRCMOD_X2: + output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", + regtype_str, buf); + break; + + case SRCMOD_DZ: + fail(ctx, "SRCMOD_DZ currently unsupported in arb1"); + postmod_str = "_dz"; + break; + + case SRCMOD_DW: + fail(ctx, "SRCMOD_DW currently unsupported in arb1"); + postmod_str = "_dw"; + break; + + case SRCMOD_ABSNEGATE: + premod_str = "-"; + // fall through. + case SRCMOD_ABS: + if (!support_nv2(ctx)) // GL_NV_vertex_program2_option adds this. + output_line(ctx, "ABS %s, %s;", regtype_str, buf); + else + { + premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|"; + postmod_str = "|"; + } // else + break; + + case SRCMOD_NOT: + fail(ctx, "SRCMOD_NOT currently unsupported in arb1"); + premod_str = "!"; + break; + + case SRCMOD_NONE: + case SRCMOD_TOTAL: + break; // stop compiler whining. + } // switch + + char swizzle_str[6]; + size_t i = 0; + + if (support_nv4(ctx)) // vFace must be output as "vFace.x" in nv4. + { + if (arg->regtype == REG_TYPE_MISCTYPE) + { + if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE ) + { + swizzle_str[i++] = '.'; + swizzle_str[i++] = 'x'; + } // if + } // if + } // if + + const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); + if (!scalar && !no_swizzle(arg->swizzle)) + { + swizzle_str[i++] = '.'; + + // .xxxx is the same as .x, but .xx is illegal...scalar or full! + if (replicate_swizzle(arg->swizzle)) + swizzle_str[i++] = swizzle_channels[arg->swizzle_x]; + else + { + swizzle_str[i++] = swizzle_channels[arg->swizzle_x]; + swizzle_str[i++] = swizzle_channels[arg->swizzle_y]; + swizzle_str[i++] = swizzle_channels[arg->swizzle_z]; + swizzle_str[i++] = swizzle_channels[arg->swizzle_w]; + } // else + } // if + swizzle_str[i] = '\0'; + assert(i < sizeof (swizzle_str)); + + snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str, + regtype_str, regnum_str, rel_lbracket, + rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket, + swizzle_str, postmod_str); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_ARB1_srcarg_string_in_buf + +const char *get_ARB1_destarg_varname(Context *ctx, char *buf, + const size_t buflen) +{ + const DestArgInfo *arg = &ctx->dest_arg; + return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen); +} // get_ARB1_destarg_varname + +const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx, + char *buf, const size_t buflen) +{ + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + *buf = '\0'; + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen); +} // get_ARB1_srcarg_varname + + +const char *make_ARB1_destarg_string(Context *ctx, char *buf, + const size_t buflen) +{ + const DestArgInfo *arg = &ctx->dest_arg; + + *buf = '\0'; + + const char *sat_str = ""; + if (arg->result_mod & MOD_SATURATE) + { + // nv4 can use ".SAT" in all program types. + // For less than nv4, the "_SAT" modifier is only available in + // fragment shaders. Every thing else will fake it later in + // emit_ARB1_dest_modifiers() ... + if (support_nv4(ctx)) + sat_str = ".SAT"; + else if (shader_is_pixel(ctx)) + sat_str = "_SAT"; + } // if + + const char *pp_str = ""; + if (arg->result_mod & MOD_PP) + { + // Most ARB1 profiles can't do partial precision (MOD_PP), but that's + // okay. The spec says lots of Direct3D implementations ignore the + // flag anyhow. + if (support_nv4(ctx)) + pp_str = "H"; + } // if + + // CENTROID only allowed in DCL opcodes, which shouldn't come through here. + assert((arg->result_mod & MOD_CENTROID) == 0); + + char regnum_str[16]; + const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype, + arg->regnum, regnum_str, + sizeof (regnum_str)); + if (regtype_str == NULL) + { + fail(ctx, "Unknown destination register type."); + return buf; + } // if + + char writemask_str[6]; + size_t i = 0; + const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); + if (!scalar && !writemask_xyzw(arg->writemask)) + { + writemask_str[i++] = '.'; + if (arg->writemask0) writemask_str[i++] = 'x'; + if (arg->writemask1) writemask_str[i++] = 'y'; + if (arg->writemask2) writemask_str[i++] = 'z'; + if (arg->writemask3) writemask_str[i++] = 'w'; + } // if + writemask_str[i] = '\0'; + assert(i < sizeof (writemask_str)); + + //const char *pred_left = ""; + //const char *pred_right = ""; + char pred[32] = { '\0' }; + if (ctx->predicated) + { + fail(ctx, "dest register predication currently unsupported in arb1"); + return buf; + //pred_left = "("; + //pred_right = ") "; + make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg, + pred, sizeof (pred)); + } // if + + snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str, + regtype_str, regnum_str, writemask_str); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_ARB1_destarg_string + + +void emit_ARB1_dest_modifiers(Context *ctx) +{ + const DestArgInfo *arg = &ctx->dest_arg; + + if (arg->result_shift != 0x0) + { + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + const char *multiplier = NULL; + + switch (arg->result_shift) + { + case 0x1: multiplier = "2.0"; break; + case 0x2: multiplier = "4.0"; break; + case 0x3: multiplier = "8.0"; break; + case 0xD: multiplier = "0.125"; break; + case 0xE: multiplier = "0.25"; break; + case 0xF: multiplier = "0.5"; break; + } // switch + + if (multiplier != NULL) + { + char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var)); + output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier); + } // if + } // if + + if (arg->result_mod & MOD_SATURATE) + { + // nv4 and/or pixel shaders just used the "SAT" modifier, instead. + if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) ) + { + char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var)); + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + output_line(ctx, "MIN%s, %s, 1.0;", dst, var); + output_line(ctx, "MAX%s, %s, 0.0;", dst, var); + } // if + } // if +} // emit_ARB1_dest_modifiers + + +const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx, + char *buf, const size_t buflen) +{ + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + *buf = '\0'; + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen); +} // make_ARB1_srcarg_string + +void emit_ARB1_opcode_ds(Context *ctx, const char *opcode) +{ + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + output_line(ctx, "%s%s, %s;", opcode, dst, src0); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_opcode_ds + +void emit_ARB1_opcode_dss(Context *ctx, const char *opcode) +{ + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); + output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_opcode_dss + +void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode) +{ + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); + output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_opcode_dsss + + +#define EMIT_ARB1_OPCODE_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + emit_ARB1_opcode(ctx, #op); \ + } +#define EMIT_ARB1_OPCODE_D_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + emit_ARB1_opcode_d(ctx, #op); \ + } +#define EMIT_ARB1_OPCODE_S_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + emit_ARB1_opcode_s(ctx, #op); \ + } +#define EMIT_ARB1_OPCODE_SS_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + emit_ARB1_opcode_ss(ctx, #op); \ + } +#define EMIT_ARB1_OPCODE_DS_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + emit_ARB1_opcode_ds(ctx, #op); \ + } +#define EMIT_ARB1_OPCODE_DSS_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + emit_ARB1_opcode_dss(ctx, #op); \ + } +#define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + emit_ARB1_opcode_dsss(ctx, #op); \ + } +#define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + emit_ARB1_opcode_dssss(ctx, #op); \ + } +#define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \ + void emit_ARB1_##op(Context *ctx) { \ + failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \ + } + + +void emit_ARB1_start(Context *ctx, const char *profilestr) +{ + const char *shader_str = NULL; + const char *shader_full_str = NULL; + if (shader_is_vertex(ctx)) + { + shader_str = "vp"; + shader_full_str = "vertex"; + } // if + else if (shader_is_pixel(ctx)) + { + shader_str = "fp"; + shader_full_str = "fragment"; + } // else if + else + { + failf(ctx, "Shader type %u unsupported in this profile.", + (uint) ctx->shader_type); + return; + } // if + + set_output(ctx, &ctx->preflight); + + if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0) + output_line(ctx, "!!ARB%s1.0", shader_str); + + #if SUPPORT_PROFILE_ARB1_NV + else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0) + { + ctx->profile_supports_nv2 = 1; + output_line(ctx, "!!ARB%s1.0", shader_str); + output_line(ctx, "OPTION NV_%s_program2;", shader_full_str); + } // else if + + else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0) + { + // there's no NV_fragment_program3, so just use 2. + const int ver = shader_is_pixel(ctx) ? 2 : 3; + ctx->profile_supports_nv2 = 1; + ctx->profile_supports_nv3 = 1; + output_line(ctx, "!!ARB%s1.0", shader_str); + output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver); + } // else if + + else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0) + { + ctx->profile_supports_nv2 = 1; + ctx->profile_supports_nv3 = 1; + ctx->profile_supports_nv4 = 1; + output_line(ctx, "!!NV%s4.0", shader_str); + } // else if + #endif + + else + { + failf(ctx, "Profile '%s' unsupported or unknown.", profilestr); + } // else + + set_output(ctx, &ctx->mainline); +} // emit_ARB1_start + +void emit_ARB1_end(Context *ctx) +{ + // ps_1_* writes color to r0 instead oC0. We move it to the right place. + // We don't have to worry about a RET opcode messing this up, since + // RET isn't available before ps_2_0. + if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) + { + set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1); + output_line(ctx, "MOV oC0, r0;"); + } // if + + output_line(ctx, "END"); +} // emit_ARB1_end + +void emit_ARB1_phase(Context *ctx) +{ + // no-op in arb1. +} // emit_ARB1_phase + +static inline const char *arb1_float_temp(const Context *ctx) +{ + // nv4 lets you specify data type. + return (support_nv4(ctx)) ? "FLOAT TEMP" : "TEMP"; +} // arb1_float_temp + +void emit_ARB1_finalize(Context *ctx) +{ + push_output(ctx, &ctx->preflight); + + if (shader_is_vertex(ctx) && !ctx->arb1_wrote_position) + output_line(ctx, "OPTION ARB_position_invariant;"); + + if (shader_is_pixel(ctx) && ctx->have_multi_color_outputs) + output_line(ctx, "OPTION ARB_draw_buffers;"); + + pop_output(ctx); + + const char *tmpstr = arb1_float_temp(ctx); + int i; + push_output(ctx, &ctx->globals); + for (i = 0; i < ctx->max_scratch_registers; i++) + { + char buf[64]; + allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); + output_line(ctx, "%s %s;", tmpstr, buf); + } // for + + // nv2 fragment programs (and anything nv4) have a real REP/ENDREP. + if ( (support_nv2(ctx)) && (!shader_is_pixel(ctx)) && (!support_nv4(ctx)) ) + { + // set up temps for nv2 REP/ENDREP emulation through branching. + for (i = 0; i < ctx->max_reps; i++) + output_line(ctx, "TEMP rep%d;", i); + } // if + + pop_output(ctx); + assert(ctx->scratch_registers == ctx->max_scratch_registers); +} // emit_ARB1_finalize + +void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum) +{ + // !!! FIXME: dependency on ARB1 profile. // !!! FIXME about FIXME: huh? + char varname[64]; + get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + push_output(ctx, &ctx->globals); + switch (regtype) + { + case REG_TYPE_ADDRESS: + if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE. + { + // We have to map texture registers to temps for ps_1_1, since + // they work like temps, initialize with tex coords, and the + // ps_1_1 TEX opcode expects to overwrite it. + if (!shader_version_atleast(ctx, 1, 4)) + { + output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); + push_output(ctx, &ctx->mainline_top); + output_line(ctx, "MOV %s, fragment.texcoord[%d];", + varname, regnum); + pop_output(ctx); + } // if + break; + } // if + + // nv4 replaced address registers with generic int registers. + if (support_nv4(ctx)) + output_line(ctx, "INT TEMP %s;", varname); + else + { + // nv2 has four-component address already, but stock arb1 has + // to emulate it in a temporary, and move components to the + // scalar ADDRESS register on demand. + output_line(ctx, "ADDRESS %s;", varname); + if (!support_nv2(ctx)) + output_line(ctx, "TEMP addr%d;", regnum); + } // else + break; + + //case REG_TYPE_PREDICATE: + // output_line(ctx, "bvec4 %s;", varname); + // break; + case REG_TYPE_TEMP: + output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); + break; + //case REG_TYPE_LOOP: + // break; // no-op. We declare these in for loops at the moment. + //case REG_TYPE_LABEL: + // break; // no-op. If we see it here, it means we optimized it out. + default: + fail(ctx, "BUG: we used a register we don't know how to define."); + break; + } // switch + pop_output(ctx); +} // emit_ARB1_global + +void emit_ARB1_array(Context *ctx, VariableList *var) +{ + // All uniforms are now packed tightly into the program.local array, + // instead of trying to map them to the d3d registers. So this needs to + // map to the next piece of the array we haven't used yet. Thankfully, + // arb1 lets you make a PARAM array that maps to a subset of another + // array; we don't need to do offsets, since myarray[0] can map to + // program.local[5] without any extra math from us. + const int base = var->index; + const int size = var->count; + const int arb1base = ctx->uniform_float4_count + + ctx->uniform_int4_count + + ctx->uniform_bool_count; + char varname[64]; + get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname)); + push_output(ctx, &ctx->globals); + output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname, + size, arb1base, (arb1base + size) - 1); + pop_output(ctx); + var->emit_position = arb1base; +} // emit_ARB1_array + +void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist, + int base, int size) +{ + char varname[64]; + get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname)); + int i; + + push_output(ctx, &ctx->globals); + output_line(ctx, "PARAM %s[%d] = {", varname, size); + ctx->indent++; + + for (i = 0; i < size; i++) + { + while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) + clist = clist->next; + assert(clist->constant.index == (base + i)); + + char val0[32]; + char val1[32]; + char val2[32]; + char val3[32]; + floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1); + floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1); + floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1); + floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1); + + output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3, + (i < (size-1)) ? "," : ""); + + clist = clist->next; + } // for + + ctx->indent--; + output_line(ctx, "};"); + pop_output(ctx); +} // emit_ARB1_const_array + +void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum, + const VariableList *var) +{ + // We pack these down into the program.local array, so if we only use + // register c439, it'll actually map to program.local[0]. This will + // prevent overflows when we actually have enough resources to run. + + const char *arrayname = "program.local"; + int index = 0; + + char varname[64]; + get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + push_output(ctx, &ctx->globals); + + if (var == NULL) + { + // all types share one array (rather, all types convert to float4). + index = ctx->uniform_float4_count + ctx->uniform_int4_count + + ctx->uniform_bool_count; + } // if + + else + { + const int arraybase = var->index; + if (var->constant) + { + const int arraysize = var->count; + arrayname = get_ARB1_const_array_varname_in_buf(ctx, arraybase, + arraysize, (char *) alloca(64), 64); + index = (regnum - arraybase); + } // if + else + { + assert(var->emit_position != -1); + index = (regnum - arraybase) + var->emit_position; + } // else + } // else + + output_line(ctx, "PARAM %s = %s[%d];", varname, arrayname, index); + pop_output(ctx); +} // emit_ARB1_uniform + +void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb) +{ + // this is mostly a no-op...you don't predeclare samplers in arb1. + + if (tb) // This sampler used a ps_1_1 TEXBEM opcode? + { + const int index = ctx->uniform_float4_count + ctx->uniform_int4_count + + ctx->uniform_bool_count; + char var[64]; + get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var)); + push_output(ctx, &ctx->globals); + output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index); + output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1); + pop_output(ctx); + ctx->uniform_float4_count += 2; + } // if +} // emit_ARB1_sampler + +// !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute(). +void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum, + MOJOSHADER_usage usage, int index, int wmask, + int flags) +{ + // !!! FIXME: this function doesn't deal with write masks at all yet! + const char *usage_str = NULL; + const char *arrayleft = ""; + const char *arrayright = ""; + char index_str[16] = { '\0' }; + + char varname[64]; + get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed? + + if (index != 0) // !!! FIXME: a lot of these MUST be zero. + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + + if (shader_is_vertex(ctx)) + { + // pre-vs3 output registers. + // these don't ever happen in DCL opcodes, I think. Map to vs_3_* + // output registers. + if (!shader_version_atleast(ctx, 3, 0)) + { + if (regtype == REG_TYPE_RASTOUT) + { + regtype = REG_TYPE_OUTPUT; + index = regnum; + switch ((const RastOutType) regnum) + { + case RASTOUT_TYPE_POSITION: + usage = MOJOSHADER_USAGE_POSITION; + break; + case RASTOUT_TYPE_FOG: + usage = MOJOSHADER_USAGE_FOG; + break; + case RASTOUT_TYPE_POINT_SIZE: + usage = MOJOSHADER_USAGE_POINTSIZE; + break; + } // switch + } // if + + else if (regtype == REG_TYPE_ATTROUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_COLOR; + index = regnum; + } // else if + + else if (regtype == REG_TYPE_TEXCRDOUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_TEXCOORD; + index = regnum; + } // else if + } // if + + // to avoid limitations of various GL entry points for input + // attributes (glSecondaryColorPointer() can only take 3 component + // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other + // issues), we set up all inputs as generic vertex attributes, so we + // can pass data in just about any form, and ignore the built-in GLSL + // attributes like gl_SecondaryColor. Output needs to use the the + // built-ins, though, but we don't have to worry about the GL entry + // point limitations there. + + if (regtype == REG_TYPE_INPUT) + { + const int attr = ctx->assigned_vertex_attributes++; + push_output(ctx, &ctx->globals); + output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr); + pop_output(ctx); + } // if + + else if (regtype == REG_TYPE_OUTPUT) + { + switch (usage) + { + case MOJOSHADER_USAGE_POSITION: + ctx->arb1_wrote_position = 1; + usage_str = "result.position"; + break; + case MOJOSHADER_USAGE_POINTSIZE: + usage_str = "result.pointsize"; + break; + case MOJOSHADER_USAGE_COLOR: + index_str[0] = '\0'; // no explicit number. + if (index == 0) + usage_str = "result.color.primary"; + else if (index == 1) + usage_str = "result.color.secondary"; + break; + case MOJOSHADER_USAGE_FOG: + usage_str = "result.fogcoord"; + break; + case MOJOSHADER_USAGE_TEXCOORD: + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + usage_str = "result.texcoord"; + arrayleft = "["; + arrayright = "]"; + break; + default: + // !!! FIXME: we need to deal with some more built-in varyings here. + break; + } // switch + + // !!! FIXME: the #define is a little hacky, but it means we don't + // !!! FIXME: have to track these separately if this works. + push_output(ctx, &ctx->globals); + // no mapping to built-in var? Just make it a regular global, pray. + if (usage_str == NULL) + output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); + else + { + output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str, + arrayleft, index_str, arrayright); + } // else + pop_output(ctx); + } // else if + + else + { + fail(ctx, "unknown vertex shader attribute register"); + } // else + } // if + + else if (shader_is_pixel(ctx)) + { + const char *paramtype_str = "ATTRIB"; + + // samplers DCLs get handled in emit_ARB1_sampler(). + + if (flags & MOD_CENTROID) + { + if (!support_nv4(ctx)) // GL_NV_fragment_program4 adds centroid. + { + // !!! FIXME: should we just wing it without centroid here? + failf(ctx, "centroid unsupported in %s profile", + ctx->profile->name); + return; + } // if + + paramtype_str = "CENTROID ATTRIB"; + } // if + + if (regtype == REG_TYPE_COLOROUT) + { + paramtype_str = "OUTPUT"; + usage_str = "result.color"; + if (ctx->have_multi_color_outputs) + { + // We have to gamble that you have GL_ARB_draw_buffers. + // You probably do at this point if you have a sane setup. + snprintf(index_str, sizeof (index_str), "%u", (uint) regnum); + arrayleft = "["; + arrayright = "]"; + } // if + } // if + + else if (regtype == REG_TYPE_DEPTHOUT) + { + paramtype_str = "OUTPUT"; + usage_str = "result.depth"; + } // else if + + // !!! FIXME: can you actualy have a texture register with COLOR usage? + else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT)) + { + if (usage == MOJOSHADER_USAGE_TEXCOORD) + { + // ps_1_1 does a different hack for this attribute. + // Refer to emit_ARB1_global()'s REG_TYPE_TEXTURE code. + if (shader_version_atleast(ctx, 1, 4)) + { + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + usage_str = "fragment.texcoord"; + arrayleft = "["; + arrayright = "]"; + } // if + } // if + + else if (usage == MOJOSHADER_USAGE_COLOR) + { + index_str[0] = '\0'; // no explicit number. + if (index == 0) + usage_str = "fragment.color.primary"; + else if (index == 1) + usage_str = "fragment.color.secondary"; + else + fail(ctx, "unsupported color index"); + } // else if + } // else if + + else if (regtype == REG_TYPE_MISCTYPE) + { + const MiscTypeType mt = (MiscTypeType) regnum; + if (mt == MISCTYPE_TYPE_FACE) + { + if (support_nv4(ctx)) // FINALLY, a vFace equivalent in nv4! + { + index_str[0] = '\0'; // no explicit number. + usage_str = "fragment.facing"; + } // if + else + { + failf(ctx, "vFace unsupported in %s profile", + ctx->profile->name); + } // else + } // if + else if (mt == MISCTYPE_TYPE_POSITION) + { + index_str[0] = '\0'; // no explicit number. + usage_str = "fragment.position"; // !!! FIXME: is this the same coord space as D3D? + } // else if + else + { + fail(ctx, "BUG: unhandled misc register"); + } // else + } // else if + + else + { + fail(ctx, "unknown pixel shader attribute register"); + } // else + + if (usage_str != NULL) + { + push_output(ctx, &ctx->globals); + output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname, + usage_str, arrayleft, index_str, arrayright); + pop_output(ctx); + } // if + } // else if + + else + { + fail(ctx, "Unknown shader type"); // state machine should catch this. + } // else +} // emit_ARB1_attribute + +void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ } + +void emit_ARB1_NOP(Context *ctx) +{ + // There is no NOP in arb1. Just don't output anything here. +} // emit_ARB1_NOP + +EMIT_ARB1_OPCODE_DS_FUNC(MOV) +EMIT_ARB1_OPCODE_DSS_FUNC(ADD) +EMIT_ARB1_OPCODE_DSS_FUNC(SUB) +EMIT_ARB1_OPCODE_DSSS_FUNC(MAD) +EMIT_ARB1_OPCODE_DSS_FUNC(MUL) +EMIT_ARB1_OPCODE_DS_FUNC(RCP) + +void emit_ARB1_RSQ(Context *ctx) +{ + // nv4 doesn't force abs() on this, so negative values will generate NaN. + // The spec says you should force the abs() yourself. + if (!support_nv4(ctx)) + { + emit_ARB1_opcode_ds(ctx, "RSQ"); // pre-nv4 implies ABS. + return; + } // if + + // we can optimize this to use nv2's |abs| construct in some cases. + if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || + (ctx->source_args[0].src_mod == SRCMOD_NEGATE) || + (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) + ctx->source_args[0].src_mod = SRCMOD_ABS; + + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + + if (ctx->source_args[0].src_mod == SRCMOD_ABS) + output_line(ctx, "RSQ%s, %s;", dst, src0); + else + { + char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); + output_line(ctx, "ABS %s, %s;", buf, src0); + output_line(ctx, "RSQ%s, %s.x;", dst, buf); + } // else + + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_RSQ + +EMIT_ARB1_OPCODE_DSS_FUNC(DP3) +EMIT_ARB1_OPCODE_DSS_FUNC(DP4) +EMIT_ARB1_OPCODE_DSS_FUNC(MIN) +EMIT_ARB1_OPCODE_DSS_FUNC(MAX) +EMIT_ARB1_OPCODE_DSS_FUNC(SLT) +EMIT_ARB1_OPCODE_DSS_FUNC(SGE) + +void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); } + +static void arb1_log(Context *ctx, const char *opcode) +{ + // !!! FIXME: SRCMOD_NEGATE can be made into SRCMOD_ABS here, too + // we can optimize this to use nv2's |abs| construct in some cases. + if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || + (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) + ctx->source_args[0].src_mod = SRCMOD_ABS; + + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + + if (ctx->source_args[0].src_mod == SRCMOD_ABS) + output_line(ctx, "%s%s, %s;", opcode, dst, src0); + else + { + char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); + output_line(ctx, "ABS %s, %s;", buf, src0); + output_line(ctx, "%s%s, %s.x;", opcode, dst, buf); + } // else + + emit_ARB1_dest_modifiers(ctx); +} // arb1_log + + +void emit_ARB1_LOG(Context *ctx) +{ + arb1_log(ctx, "LG2"); +} // emit_ARB1_LOG + + +EMIT_ARB1_OPCODE_DS_FUNC(LIT) +EMIT_ARB1_OPCODE_DSS_FUNC(DST) + +void emit_ARB1_LRP(Context *ctx) +{ + if (shader_is_pixel(ctx)) // fragment shaders have a matching LRP opcode. + emit_ARB1_opcode_dsss(ctx, "LRP"); + else + { + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); + char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); + + // LRP is: dest = src2 + src0 * (src1 - src2) + output_line(ctx, "SUB %s, %s, %s;", buf, src1, src2); + output_line(ctx, "MAD%s, %s, %s, %s;", dst, buf, src0, src2); + emit_ARB1_dest_modifiers(ctx); + } // else +} // emit_ARB1_LRP + +EMIT_ARB1_OPCODE_DS_FUNC(FRC) + +static void arb1_MxXy(Context *ctx, const int x, const int y) +{ + DestArgInfo *dstarg = &ctx->dest_arg; + const int origmask = dstarg->writemask; + char src0[64]; + int i; + + make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + + for (i = 0; i < y; i++) + { + char dst[64]; + char row[64]; + make_ARB1_srcarg_string(ctx, i + 1, row, sizeof (row)); + set_dstarg_writemask(dstarg, 1 << i); + make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + output_line(ctx, "DP%d%s, %s, %s;", x, dst, src0, row); + } // for + + set_dstarg_writemask(dstarg, origmask); + emit_ARB1_dest_modifiers(ctx); +} // arb1_MxXy + +void emit_ARB1_M4X4(Context *ctx) { arb1_MxXy(ctx, 4, 4); } +void emit_ARB1_M4X3(Context *ctx) { arb1_MxXy(ctx, 4, 3); } +void emit_ARB1_M3X4(Context *ctx) { arb1_MxXy(ctx, 3, 4); } +void emit_ARB1_M3X3(Context *ctx) { arb1_MxXy(ctx, 3, 3); } +void emit_ARB1_M3X2(Context *ctx) { arb1_MxXy(ctx, 3, 2); } + +void emit_ARB1_CALL(Context *ctx) +{ + if (!support_nv2(ctx)) // no branching in stock ARB1. + { + failf(ctx, "branching unsupported in %s profile", ctx->profile->name); + return; + } // if + + char labelstr[64]; + get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); + output_line(ctx, "CAL %s;", labelstr); +} // emit_ARB1_CALL + +void emit_ARB1_CALLNZ(Context *ctx) +{ + // !!! FIXME: if src1 is a constbool that's true, we can remove the + // !!! FIXME: if. If it's false, we can make this a no-op. + + if (!support_nv2(ctx)) // no branching in stock ARB1. + failf(ctx, "branching unsupported in %s profile", ctx->profile->name); + else + { + // !!! FIXME: double-check this. + char labelstr[64]; + char scratch[64]; + char src1[64]; + get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); + get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); + allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); + output_line(ctx, "MOVC %s, %s;", scratch, src1); + output_line(ctx, "CAL %s (NE.x);", labelstr); + } // else +} // emit_ARB1_CALLNZ + +// !!! FIXME: needs BRA in nv2, LOOP in nv2 fragment progs, and REP in nv4. +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP) + +void emit_ARB1_RET(Context *ctx) +{ + // don't fail() if no nv2...maybe we're just ending the mainline? + // if we're ending a LABEL that had no CALL, this would all be written + // to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will + // just end up throwing all this code out. + if (support_nv2(ctx)) // no branching in stock ARB1. + output_line(ctx, "RET;"); + set_output(ctx, &ctx->mainline); // in case we were ignoring this function. +} // emit_ARB1_RET + + +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP) + +void emit_ARB1_LABEL(Context *ctx) +{ + if (!support_nv2(ctx)) // no branching in stock ARB1. + return; // don't fail()...maybe we never use it, but do fail in CALL. + + const int label = ctx->source_args[0].regnum; + RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label); + + // MSDN specs say CALL* has to come before the LABEL, so we know if we + // can ditch the entire function here as unused. + if (reg == NULL) + set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output. + + // !!! FIXME: it would be nice if we could determine if a function is + // !!! FIXME: only called once and, if so, forcibly inline it. + + //const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : ""; + char labelstr[64]; + get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); + output_line(ctx, "%s:", labelstr); +} // emit_ARB1_LABEL + + +void emit_ARB1_POW(Context *ctx) +{ + // we can optimize this to use nv2's |abs| construct in some cases. + if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || + (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) + ctx->source_args[0].src_mod = SRCMOD_ABS; + + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); + + if (ctx->source_args[0].src_mod == SRCMOD_ABS) + output_line(ctx, "POW%s, %s, %s;", dst, src0, src1); + else + { + char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); + output_line(ctx, "ABS %s, %s;", buf, src0); + output_line(ctx, "POW%s, %s.x, %s;", dst, buf, src1); + } // else + + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_POW + +void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); } + +void emit_ARB1_SGN(Context *ctx) +{ + if (support_nv2(ctx)) + emit_ARB1_opcode_ds(ctx, "SSG"); + else + { + char dst[64]; + char src0[64]; + char scratch1[64]; + char scratch2[64]; + make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + allocate_ARB1_scratch_reg_name(ctx, scratch1, sizeof (scratch1)); + allocate_ARB1_scratch_reg_name(ctx, scratch2, sizeof (scratch2)); + output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0); + output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0); + output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2); + emit_ARB1_dest_modifiers(ctx); + } // else +} // emit_ARB1_SGN + +EMIT_ARB1_OPCODE_DS_FUNC(ABS) + +void emit_ARB1_NRM(Context *ctx) +{ + // nv2 fragment programs (and anything nv4) have a real NRM. + if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) + emit_ARB1_opcode_ds(ctx, "NRM"); + else + { + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); + output_line(ctx, "DP3 %s.w, %s, %s;", buf, src0, src0); + output_line(ctx, "RSQ %s.w, %s.w;", buf, buf); + output_line(ctx, "MUL%s, %s.w, %s;", dst, buf, src0); + emit_ARB1_dest_modifiers(ctx); + } // else +} // emit_ARB1_NRM + + +void emit_ARB1_SINCOS(Context *ctx) +{ + // we don't care about the temp registers that <= sm2 demands; ignore them. + const int mask = ctx->dest_arg.writemask; + + // arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes. + if ((shader_is_pixel(ctx)) || (support_nv4(ctx))) + { + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + if (writemask_x(mask)) + output_line(ctx, "COS%s, %s;", dst, src0); + else if (writemask_y(mask)) + output_line(ctx, "SIN%s, %s;", dst, src0); + else if (writemask_xy(mask)) + output_line(ctx, "SCS%s, %s;", dst, src0); + } // if + + // nv2+ profiles have sin and cos opcodes. + else if (support_nv2(ctx)) + { + char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + if (writemask_x(mask)) + output_line(ctx, "COS %s.x, %s;", dst, src0); + else if (writemask_y(mask)) + output_line(ctx, "SIN %s.y, %s;", dst, src0); + else if (writemask_xy(mask)) + { + output_line(ctx, "SIN %s.x, %s;", dst, src0); + output_line(ctx, "COS %s.y, %s;", dst, src0); + } // else if + } // if + + else // big nasty. + { + char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); + const int need_sin = (writemask_x(mask) || writemask_xy(mask)); + const int need_cos = (writemask_y(mask) || writemask_xy(mask)); + char scratch[64]; + + if (need_sin || need_cos) + allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); + + // These sin() and cos() approximations originally found here: + // http://www.devmaster.net/forums/showthread.php?t=5784 + // + // const float B = 4.0f / M_PI; + // const float C = -4.0f / (M_PI * M_PI); + // float y = B * x + C * x * fabs(x); + // + // // optional better precision... + // const float P = 0.225f; + // y = P * (y * fabs(y) - y) + y; + // + // + // That first thing can be reduced to: + // const float y = ((1.2732395447351626861510701069801f * x) + + // ((-0.40528473456935108577551785283891f * x) * fabs(x))); + + if (need_sin) + { + // !!! FIXME: use SRCMOD_ABS here? + output_line(ctx, "ABS %s.x, %s.x;", dst, src0); + output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst); + output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0); + output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch); + } // if + + // cosine is sin(x + M_PI/2), but you have to wrap x to pi: + // if (x+(M_PI/2) > M_PI) + // x -= 2 * M_PI; + // + // which is... + // if (x+(1.57079637050628662109375) > 3.1415927410125732421875) + // x += -6.283185482025146484375; + + if (need_cos) + { + output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0); + output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch); + output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch); + output_line(ctx, "ABS %s.x, %s.x;", dst, src0); + output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst); + output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0); + output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch); + } // if + } // else + + // !!! FIXME: might not have done anything. Don't emit if we didn't. + if (!(ctx->isfail)) + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_SINCOS + + +void emit_ARB1_REP(Context *ctx) +{ + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + + // nv2 fragment programs (and everything nv4) have a real REP. + if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) + output_line(ctx, "REP %s;", src0); + + else if (support_nv2(ctx)) + { + // no REP, but we can use branches. + char failbranch[32]; + char topbranch[32]; + const int toplabel = allocate_branch_label(ctx); + const int faillabel = allocate_branch_label(ctx); + get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); + get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch)); + + assert(((size_t) ctx->branch_labels_stack_index) < + STATICARRAYLEN(ctx->branch_labels_stack)-1); + + ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel; + ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel; + + char scratch[32]; + snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps); + output_line(ctx, "MOVC %s.x, %s;", scratch, src0); + output_line(ctx, "BRA %s (LE.x);", failbranch); + output_line(ctx, "%s:", topbranch); + } // else if + + else // stock ARB1 has no branching. + { + fail(ctx, "branching unsupported in this profile"); + } // else +} // emit_ARB1_REP + + +void emit_ARB1_ENDREP(Context *ctx) +{ + // nv2 fragment programs (and everything nv4) have a real ENDREP. + if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) + output_line(ctx, "ENDREP;"); + + else if (support_nv2(ctx)) + { + // no ENDREP, but we can use branches. + assert(ctx->branch_labels_stack_index >= 2); + + char failbranch[32]; + char topbranch[32]; + const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; + const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; + get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); + get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch)); + + char scratch[32]; + snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps); + output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch); + output_line(ctx, "BRA %s (GT.x);", topbranch); + output_line(ctx, "%s:", failbranch); + } // else if + + else // stock ARB1 has no branching. + { + fail(ctx, "branching unsupported in this profile"); + } // else +} // emit_ARB1_ENDREP + + +void nv2_if(Context *ctx) +{ + // The condition code register MUST be set up before this! + // nv2 fragment programs (and everything nv4) have a real IF. + if ( (support_nv4(ctx)) || (shader_is_pixel(ctx)) ) + output_line(ctx, "IF EQ.x;"); + else + { + // there's no IF construct, but we can use a branch to a label. + char failbranch[32]; + const int label = allocate_branch_label(ctx); + get_ARB1_branch_label_name(ctx, label, failbranch, sizeof (failbranch)); + + assert(((size_t) ctx->branch_labels_stack_index) + < STATICARRAYLEN(ctx->branch_labels_stack)); + + ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label; + + // !!! FIXME: should this be NE? (EQ would jump to the ELSE for the IF condition, right?). + output_line(ctx, "BRA %s (EQ.x);", failbranch); + } // else +} // nv2_if + + +void emit_ARB1_IF(Context *ctx) +{ + if (support_nv2(ctx)) + { + char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); + char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); + output_line(ctx, "MOVC %s.x, %s;", buf, src0); + nv2_if(ctx); + } // if + + else // stock ARB1 has no branching. + { + failf(ctx, "branching unsupported in %s profile", ctx->profile->name); + } // else +} // emit_ARB1_IF + + +void emit_ARB1_ELSE(Context *ctx) +{ + // nv2 fragment programs (and everything nv4) have a real ELSE. + if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) + output_line(ctx, "ELSE;"); + + else if (support_nv2(ctx)) + { + // there's no ELSE construct, but we can use a branch to a label. + assert(ctx->branch_labels_stack_index > 0); + + // At the end of the IF block, unconditionally jump to the ENDIF. + const int endlabel = allocate_branch_label(ctx); + char endbranch[32]; + get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch)); + output_line(ctx, "BRA %s;", endbranch); + + // Now mark the ELSE section with a lable. + const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1]; + char elsebranch[32]; + get_ARB1_branch_label_name(ctx,elselabel,elsebranch,sizeof(elsebranch)); + output_line(ctx, "%s:", elsebranch); + + // Replace the ELSE label with the ENDIF on the label stack. + ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel; + } // else if + + else // stock ARB1 has no branching. + { + failf(ctx, "branching unsupported in %s profile", ctx->profile->name); + } // else +} // emit_ARB1_ELSE + + +void emit_ARB1_ENDIF(Context *ctx) +{ + // nv2 fragment programs (and everything nv4) have a real ENDIF. + if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) + output_line(ctx, "ENDIF;"); + + else if (support_nv2(ctx)) + { + // there's no ENDIF construct, but we can use a branch to a label. + assert(ctx->branch_labels_stack_index > 0); + const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; + char endbranch[32]; + get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch)); + output_line(ctx, "%s:", endbranch); + } // if + + else // stock ARB1 has no branching. + { + failf(ctx, "branching unsupported in %s profile", ctx->profile->name); + } // else +} // emit_ARB1_ENDIF + + +void emit_ARB1_BREAK(Context *ctx) +{ + // nv2 fragment programs (and everything nv4) have a real BREAK. + if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) + output_line(ctx, "BRK;"); + + else if (support_nv2(ctx)) + { + // no BREAK, but we can use branches. + assert(ctx->branch_labels_stack_index >= 2); + const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index]; + char failbranch[32]; + get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); + output_line(ctx, "BRA %s;", failbranch); + } // else if + + else // stock ARB1 has no branching. + { + failf(ctx, "branching unsupported in %s profile", ctx->profile->name); + } // else +} // emit_ARB1_BREAK + + +void emit_ARB1_MOVA(Context *ctx) +{ + // nv2 and nv3 can use the ARR opcode. + // But nv4 removed ARR (and ADDRESS registers!). Just ROUND to an INT. + if (support_nv4(ctx)) + emit_ARB1_opcode_ds(ctx, "ROUND.S"); // !!! FIXME: don't use a modifier here. + else if ((support_nv2(ctx)) || (support_nv3(ctx))) + emit_ARB1_opcode_ds(ctx, "ARR"); + else + { + char src0[64]; + char scratch[64]; + char addr[32]; + + make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); + snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum); + + // !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE. + + // ARL uses floor(), but D3D expects round-to-nearest. + // There is probably a more efficient way to do this. + if (shader_is_pixel(ctx)) // CMP only exists in fragment programs. :/ + output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0); + else + { + output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0); + output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch); + } // else + + output_line(ctx, "ABS %s, %s;", addr, src0); + output_line(ctx, "ADD %s, %s, 0.5;", addr, addr); + output_line(ctx, "FLR %s, %s;", addr, addr); + output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch); + + // we don't handle these right now, since emit_ARB1_dest_modifiers(ctx) + // wants to look at dest_arg, not our temp register. + assert(ctx->dest_arg.result_mod == 0); + assert(ctx->dest_arg.result_shift == 0); + + // we assign to the actual address register as needed. + ctx->last_address_reg_component = -1; + } // else +} // emit_ARB1_MOVA + + +void emit_ARB1_TEXKILL(Context *ctx) +{ + // d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle. + // We just map the x component to w. If it's negative, the fragment + // would discard anyhow, otherwise, it'll pass through okay. This saves + // us a temp register. + char dst[64]; + get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + output_line(ctx, "KIL %s.xyzx;", dst); +} // emit_ARB1_TEXKILL + +static void arb1_texbem(Context *ctx, const int luminance) +{ + // !!! FIXME: this code counts on the register not having swizzles, etc. + const int stage = ctx->dest_arg.regnum; + char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src)); + char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); + char sampler[64]; + get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, + sampler, sizeof (sampler)); + + output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src); + output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp); + output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst); + output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage); + + if (luminance) // TEXBEML, not just TEXBEM? + { + output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;", + tmp, src, sampler, sampler); + output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp); + } // if + + emit_ARB1_dest_modifiers(ctx); +} // arb1_texbem + +void emit_ARB1_TEXBEM(Context *ctx) +{ + arb1_texbem(ctx, 0); +} // emit_ARB1_TEXBEM + +void emit_ARB1_TEXBEML(Context *ctx) +{ + arb1_texbem(ctx, 1); +} // emit_ARB1_TEXBEML + +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) + + +void emit_ARB1_TEXM3X2PAD(Context *ctx) +{ + // no-op ... work happens in emit_ARB1_TEXM3X2TEX(). +} // emit_ARB1_TEXM3X2PAD + +void emit_ARB1_TEXM3X2TEX(Context *ctx) +{ + if (ctx->texm3x2pad_src0 == -1) + return; + + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + const int stage = ctx->dest_arg.regnum; + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0, + src0, sizeof (src0)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0, + src1, sizeof (src1)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src2, sizeof (src2)); + get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + + output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, dst); + output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); + output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, dst, stage); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_TEXM3X2TEX + + +void emit_ARB1_TEXM3X3PAD(Context *ctx) +{ + // no-op ... work happens in emit_ARB1_TEXM3X3*(). +} // emit_ARB1_TEXM3X3PAD + + +void emit_ARB1_TEXM3X3TEX(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + const int stage = ctx->dest_arg.regnum; + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; + + output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); + output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); + output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); + output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, dst, stage, ttypestr); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_TEXM3X3TEX + +void emit_ARB1_TEXM3X3SPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char src5[64]; + char tmp[64]; + char tmp2[64]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + const int stage = ctx->dest_arg.regnum; + allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); + allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum, + src5, sizeof (src5)); + get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; + + output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); + output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); + output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); + output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal + output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, src5); // normal * eyeray + + // !!! FIXME: This is goofy. There's got to be a way to do vector-wide + // !!! FIXME: divides or reciprocals...right? + output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2); + output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2); + output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2); + output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2); + output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2); + + output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp); + output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, src5); + output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_TEXM3X3SPEC + +void emit_ARB1_TEXM3X3VSPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char tmp[64]; + char tmp2[64]; + char tmp3[64]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + const int stage = ctx->dest_arg.regnum; + allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); + allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2)); + allocate_ARB1_scratch_reg_name(ctx, tmp3, sizeof (tmp3)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; + + output_line(ctx, "MOV %s.x, %s.w;", tmp3, src0); + output_line(ctx, "MOV %s.y, %s.w;", tmp3, src2); + output_line(ctx, "MOV %s.z, %s.w;", tmp3, dst); + output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); + output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); + output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); + output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal + output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, tmp3); // normal * eyeray + + // !!! FIXME: This is goofy. There's got to be a way to do vector-wide + // !!! FIXME: divides or reciprocals...right? + output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2); + output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2); + output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2); + output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2); + output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2); + + output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp); + output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, tmp3); + output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_TEXM3X3VSPEC + +void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); } +void emit_ARB1_LOGP(Context *ctx) { arb1_log(ctx, "LG2"); } + +void emit_ARB1_CND(Context *ctx) +{ + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); + char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); + + // CND compares against 0.5, but we need to compare against 0.0... + // ...subtract to make up the difference. + output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", tmp, src0); + // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just + // switch src1 and src2 to get the same results. + output_line(ctx, "CMP%s, %s, %s, %s;", dst, tmp, src2, src1); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_CND + +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) + +void emit_ARB1_TEXM3X3(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + + output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); + output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); + output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); + output_line(ctx, "MOV %s.w, { 1.0, 1.0, 1.0, 1.0 };", dst); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_TEXM3X3 + +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) + +void emit_ARB1_CMP(Context *ctx) +{ + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); + // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just + // switch src1 and src2 to get the same results. + output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1); + emit_ARB1_dest_modifiers(ctx); +} // emit_ARB1_CMP + +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM) + + +void emit_ARB1_DP2ADD(Context *ctx) +{ + if (support_nv4(ctx)) // nv4 has a built-in equivalent to DP2ADD. + emit_ARB1_opcode_dsss(ctx, "DP2A"); + else + { + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); + char scratch[64]; + + // DP2ADD is: + // dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz + allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); + output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1); + output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch); + output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2); + emit_ARB1_dest_modifiers(ctx); + } // else +} // emit_ARB1_DP2ADD + + +void emit_ARB1_DSX(Context *ctx) +{ + if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSX. + emit_ARB1_opcode_ds(ctx, "DDX"); + else + failf(ctx, "DSX unsupported in %s profile", ctx->profile->name); +} // emit_ARB1_DSX + + +void emit_ARB1_DSY(Context *ctx) +{ + if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSY. + emit_ARB1_opcode_ds(ctx, "DDY"); + else + failf(ctx, "DSY unsupported in %s profile", ctx->profile->name); +} // emit_ARB1_DSY + +static void arb1_texld(Context *ctx, const char *opcode, const int texldd) +{ + // !!! FIXME: Hack: "TEXH" is invalid in nv4. Fix this more cleanly. + if ((ctx->dest_arg.result_mod & MOD_PP) && (support_nv4(ctx))) + ctx->dest_arg.result_mod &= ~MOD_PP; + + char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); + + const int sm1 = !shader_version_atleast(ctx, 1, 4); + const int regnum = sm1 ? ctx->dest_arg.regnum : ctx->source_args[1].regnum; + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, regnum); + + const char *ttype = NULL; + char src0[64]; + if (sm1) + get_ARB1_destarg_varname(ctx, src0, sizeof (src0)); + else + get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); + //char src1[64]; get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? + + char src2[64] = { 0 }; + char src3[64] = { 0 }; + + if (texldd) + { + make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); + make_ARB1_srcarg_string(ctx, 3, src3, sizeof (src3)); + } // if + + // !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters. + if (sreg == NULL) + { + fail(ctx, "TEXLD using undeclared sampler"); + return; + } // if + + // SM1 only specifies dst, so don't check swizzle there. + if ( !sm1 && (!no_swizzle(ctx->source_args[1].swizzle)) ) + { + // !!! FIXME: does this ever actually happen? + fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment"); + } // if + + switch ((const TextureType) sreg->index) + { + case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"? + case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break; + case TEXTURE_TYPE_VOLUME: ttype = "3D"; break; + default: fail(ctx, "unknown texture type"); return; + } // switch + + if (texldd) + { + output_line(ctx, "%s%s, %s, %s, %s, texture[%d], %s;", opcode, dst, + src0, src2, src3, regnum, ttype); + } // if + else + { + output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0, + regnum, ttype); + } // else +} // arb1_texld + + +void emit_ARB1_TEXLDD(Context *ctx) +{ + // With GL_NV_fragment_program2, we can use the TXD opcode. + // In stock arb1, we can settle for a standard texld, which isn't + // perfect, but oh well. + if (support_nv2(ctx)) + arb1_texld(ctx, "TXD", 1); + else + arb1_texld(ctx, "TEX", 0); +} // emit_ARB1_TEXLDD + + +void emit_ARB1_TEXLDL(Context *ctx) +{ + if ((shader_is_vertex(ctx)) && (!support_nv3(ctx))) + { + failf(ctx, "Vertex shader TEXLDL unsupported in %s profile", + ctx->profile->name); + return; + } // if + + else if ((shader_is_pixel(ctx)) && (!support_nv2(ctx))) + { + failf(ctx, "Pixel shader TEXLDL unsupported in %s profile", + ctx->profile->name); + return; + } // if + + // !!! FIXME: this doesn't map exactly to TEXLDL. Review this. + arb1_texld(ctx, "TXL", 0); +} // emit_ARB1_TEXLDL + + +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP) +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC) + +void emit_ARB1_IFC(Context *ctx) +{ + if (support_nv2(ctx)) + { + const char *comps[] = { + "", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC" + }; + + if (ctx->instruction_controls >= STATICARRAYLEN(comps)) + { + fail(ctx, "unknown comparison control"); + return; + } // if + + char src0[64]; + char src1[64]; + char scratch[64]; + + const char *comp = comps[ctx->instruction_controls]; + get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); + get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); + allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); + output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1); + nv2_if(ctx); + } // if + + else // stock ARB1 has no branching. + { + failf(ctx, "branching unsupported in %s profile", ctx->profile->name); + } // else +} // emit_ARB1_IFC + + +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP) + +void emit_ARB1_DEF(Context *ctx) +{ + const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? + char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1); + char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1); + char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1); + char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1); + + push_output(ctx, &ctx->globals); + output_line(ctx, "PARAM %s = { %s, %s, %s, %s };", + dst, val0, val1, val2, val3); + pop_output(ctx); +} // emit_ARB1_DEF + +void emit_ARB1_DEFI(Context *ctx) +{ + char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + const int32 *x = (const int32 *) ctx->dwords; + push_output(ctx, &ctx->globals); + output_line(ctx, "PARAM %s = { %d, %d, %d, %d };", + dst, (int) x[0], (int) x[1], (int) x[2], (int) x[3]); + pop_output(ctx); +} // emit_ARB1_DEFI + +void emit_ARB1_DEFB(Context *ctx) +{ + char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); + push_output(ctx, &ctx->globals); + output_line(ctx, "PARAM %s = %d;", dst, ctx->dwords[0] ? 1 : 0); + pop_output(ctx); +} // emit_ARB1_DEFB + +void emit_ARB1_DCL(Context *ctx) +{ + // no-op. We do this in our emit_attribute() and emit_uniform(). +} // emit_ARB1_DCL + +EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) + +void emit_ARB1_TEXLD(Context *ctx) +{ + if (!shader_version_atleast(ctx, 1, 4)) + { + arb1_texld(ctx, "TEX", 0); + return; + } // if + + else if (!shader_version_atleast(ctx, 2, 0)) + { + // ps_1_4 is different, too! + fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME + return; + } // if + + // !!! FIXME: do texldb and texldp map between OpenGL and D3D correctly? + if (ctx->instruction_controls == CONTROL_TEXLD) + arb1_texld(ctx, "TEX", 0); + else if (ctx->instruction_controls == CONTROL_TEXLDP) + arb1_texld(ctx, "TXP", 0); + else if (ctx->instruction_controls == CONTROL_TEXLDB) + arb1_texld(ctx, "TXB", 0); +} // emit_ARB1_TEXLD + +#undef EMIT_ARB1_OPCODE_FUNC +#undef EMIT_ARB1_OPCODE_D_FUNC +#undef EMIT_ARB1_OPCODE_S_FUNC +#undef EMIT_ARB1_OPCODE_SS_FUNC +#undef EMIT_ARB1_OPCODE_DS_FUNC +#undef EMIT_ARB1_OPCODE_DSS_FUNC +#undef EMIT_ARB1_OPCODE_DSSS_FUNC +#undef EMIT_ARB1_OPCODE_DSSSS_FUNC +#undef EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC + +#endif // SUPPORT_PROFILE_ARB1 + +#pragma GCC visibility pop \ No newline at end of file diff --git a/profiles/mojoshader_profile_bytecode.c b/profiles/mojoshader_profile_bytecode.c new file mode 100644 index 00000000..072e2ddc --- /dev/null +++ b/profiles/mojoshader_profile_bytecode.c @@ -0,0 +1,152 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#pragma GCC visibility push(hidden) + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_profile.h" + +#if SUPPORT_PROFILE_BYTECODE + +void emit_BYTECODE_start(Context *ctx, const char *profilestr) +{ + ctx->ignores_ctab = 1; +} // emit_BYTECODE_start + +void emit_BYTECODE_finalize(Context *ctx) +{ + // just copy the whole token stream and make all other emitters no-ops. + if (set_output(ctx, &ctx->mainline)) + { + const size_t len = ((size_t) (ctx->tokens - ctx->orig_tokens)) * sizeof (uint32); + buffer_append(ctx->mainline, (const char *) ctx->orig_tokens, len); + } // if +} // emit_BYTECODE_finalize + +void emit_BYTECODE_end(Context *ctx) {} +void emit_BYTECODE_phase(Context *ctx) {} +void emit_BYTECODE_global(Context *ctx, RegisterType t, int n) {} +void emit_BYTECODE_array(Context *ctx, VariableList *var) {} +void emit_BYTECODE_sampler(Context *c, int s, TextureType t, int tb) {} +void emit_BYTECODE_const_array(Context *ctx, const ConstantsList *c, + int base, int size) {} +void emit_BYTECODE_uniform(Context *ctx, RegisterType t, int n, + const VariableList *var) {} +void emit_BYTECODE_attribute(Context *ctx, RegisterType t, int n, + MOJOSHADER_usage u, int i, int w, + int f) {} + +const char *get_BYTECODE_varname(Context *ctx, RegisterType rt, int regnum) +{ + char regnum_str[16]; + const char *regtype_str = get_D3D_register_string(ctx, rt, regnum, + regnum_str, sizeof (regnum_str)); + char buf[64]; + snprintf(buf, sizeof (buf), "%s%s", regtype_str, regnum_str); + return StrDup(ctx, buf); +} // get_BYTECODE_varname + +const char *get_BYTECODE_const_array_varname(Context *ctx, int base, int size) +{ + char buf[64]; + snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size); + return StrDup(ctx, buf); +} // get_BYTECODE_const_array_varname + +#define EMIT_BYTECODE_OPCODE_FUNC(op) \ + void emit_BYTECODE_##op(Context *ctx) {} + +EMIT_BYTECODE_OPCODE_FUNC(RESERVED) +EMIT_BYTECODE_OPCODE_FUNC(NOP) +EMIT_BYTECODE_OPCODE_FUNC(MOV) +EMIT_BYTECODE_OPCODE_FUNC(ADD) +EMIT_BYTECODE_OPCODE_FUNC(SUB) +EMIT_BYTECODE_OPCODE_FUNC(MAD) +EMIT_BYTECODE_OPCODE_FUNC(MUL) +EMIT_BYTECODE_OPCODE_FUNC(RCP) +EMIT_BYTECODE_OPCODE_FUNC(RSQ) +EMIT_BYTECODE_OPCODE_FUNC(DP3) +EMIT_BYTECODE_OPCODE_FUNC(DP4) +EMIT_BYTECODE_OPCODE_FUNC(MIN) +EMIT_BYTECODE_OPCODE_FUNC(MAX) +EMIT_BYTECODE_OPCODE_FUNC(SLT) +EMIT_BYTECODE_OPCODE_FUNC(SGE) +EMIT_BYTECODE_OPCODE_FUNC(EXP) +EMIT_BYTECODE_OPCODE_FUNC(LOG) +EMIT_BYTECODE_OPCODE_FUNC(LIT) +EMIT_BYTECODE_OPCODE_FUNC(DST) +EMIT_BYTECODE_OPCODE_FUNC(LRP) +EMIT_BYTECODE_OPCODE_FUNC(FRC) +EMIT_BYTECODE_OPCODE_FUNC(M4X4) +EMIT_BYTECODE_OPCODE_FUNC(M4X3) +EMIT_BYTECODE_OPCODE_FUNC(M3X4) +EMIT_BYTECODE_OPCODE_FUNC(M3X3) +EMIT_BYTECODE_OPCODE_FUNC(M3X2) +EMIT_BYTECODE_OPCODE_FUNC(CALL) +EMIT_BYTECODE_OPCODE_FUNC(CALLNZ) +EMIT_BYTECODE_OPCODE_FUNC(LOOP) +EMIT_BYTECODE_OPCODE_FUNC(RET) +EMIT_BYTECODE_OPCODE_FUNC(ENDLOOP) +EMIT_BYTECODE_OPCODE_FUNC(LABEL) +EMIT_BYTECODE_OPCODE_FUNC(POW) +EMIT_BYTECODE_OPCODE_FUNC(CRS) +EMIT_BYTECODE_OPCODE_FUNC(SGN) +EMIT_BYTECODE_OPCODE_FUNC(ABS) +EMIT_BYTECODE_OPCODE_FUNC(NRM) +EMIT_BYTECODE_OPCODE_FUNC(SINCOS) +EMIT_BYTECODE_OPCODE_FUNC(REP) +EMIT_BYTECODE_OPCODE_FUNC(ENDREP) +EMIT_BYTECODE_OPCODE_FUNC(IF) +EMIT_BYTECODE_OPCODE_FUNC(ELSE) +EMIT_BYTECODE_OPCODE_FUNC(ENDIF) +EMIT_BYTECODE_OPCODE_FUNC(BREAK) +EMIT_BYTECODE_OPCODE_FUNC(MOVA) +EMIT_BYTECODE_OPCODE_FUNC(TEXKILL) +EMIT_BYTECODE_OPCODE_FUNC(TEXBEM) +EMIT_BYTECODE_OPCODE_FUNC(TEXBEML) +EMIT_BYTECODE_OPCODE_FUNC(TEXREG2AR) +EMIT_BYTECODE_OPCODE_FUNC(TEXREG2GB) +EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2PAD) +EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2TEX) +EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3PAD) +EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3TEX) +EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3SPEC) +EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3VSPEC) +EMIT_BYTECODE_OPCODE_FUNC(EXPP) +EMIT_BYTECODE_OPCODE_FUNC(LOGP) +EMIT_BYTECODE_OPCODE_FUNC(CND) +EMIT_BYTECODE_OPCODE_FUNC(TEXREG2RGB) +EMIT_BYTECODE_OPCODE_FUNC(TEXDP3TEX) +EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2DEPTH) +EMIT_BYTECODE_OPCODE_FUNC(TEXDP3) +EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3) +EMIT_BYTECODE_OPCODE_FUNC(TEXDEPTH) +EMIT_BYTECODE_OPCODE_FUNC(CMP) +EMIT_BYTECODE_OPCODE_FUNC(BEM) +EMIT_BYTECODE_OPCODE_FUNC(DP2ADD) +EMIT_BYTECODE_OPCODE_FUNC(DSX) +EMIT_BYTECODE_OPCODE_FUNC(DSY) +EMIT_BYTECODE_OPCODE_FUNC(TEXLDD) +EMIT_BYTECODE_OPCODE_FUNC(TEXLDL) +EMIT_BYTECODE_OPCODE_FUNC(BREAKP) +EMIT_BYTECODE_OPCODE_FUNC(BREAKC) +EMIT_BYTECODE_OPCODE_FUNC(IFC) +EMIT_BYTECODE_OPCODE_FUNC(SETP) +EMIT_BYTECODE_OPCODE_FUNC(DEF) +EMIT_BYTECODE_OPCODE_FUNC(DEFI) +EMIT_BYTECODE_OPCODE_FUNC(DEFB) +EMIT_BYTECODE_OPCODE_FUNC(DCL) +EMIT_BYTECODE_OPCODE_FUNC(TEXCRD) +EMIT_BYTECODE_OPCODE_FUNC(TEXLD) + +#undef EMIT_BYTECODE_OPCODE_FUNC + +#endif // SUPPORT_PROFILE_BYTECODE + +#pragma GCC visibility pop \ No newline at end of file diff --git a/profiles/mojoshader_profile_common.c b/profiles/mojoshader_profile_common.c new file mode 100644 index 00000000..8569a7ce --- /dev/null +++ b/profiles/mojoshader_profile_common.c @@ -0,0 +1,504 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#pragma GCC visibility push(hidden) + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_profile.h" + +// Common Utilities + +void out_of_memory(Context *ctx) +{ + ctx->isfail = ctx->out_of_memory = 1; +} // out_of_memory + +void *Malloc(Context *ctx, const size_t len) +{ + void *retval = ctx->malloc((int) len, ctx->malloc_data); + if (retval == NULL) + out_of_memory(ctx); + return retval; +} // Malloc + +char *StrDup(Context *ctx, const char *str) +{ + char *retval = (char *) Malloc(ctx, strlen(str) + 1); + if (retval != NULL) + strcpy(retval, str); + return retval; +} // StrDup + +void Free(Context *ctx, void *ptr) +{ + ctx->free(ptr, ctx->malloc_data); +} // Free + +void * MOJOSHADERCALL MallocBridge(int bytes, void *data) +{ + return Malloc((Context *) data, (size_t) bytes); +} // MallocBridge + +void MOJOSHADERCALL FreeBridge(void *ptr, void *data) +{ + Free((Context *) data, ptr); +} // FreeBridge + +// Jump between output sections in the context... + +int set_output(Context *ctx, Buffer **section) +{ + // only create output sections on first use. + if (*section == NULL) + { + *section = buffer_create(256, MallocBridge, FreeBridge, ctx); + if (*section == NULL) + return 0; + } // if + + ctx->output = *section; + return 1; +} // set_output + +void push_output(Context *ctx, Buffer **section) +{ + assert(ctx->output_stack_len < (int) (STATICARRAYLEN(ctx->output_stack))); + ctx->output_stack[ctx->output_stack_len] = ctx->output; + ctx->indent_stack[ctx->output_stack_len] = ctx->indent; + ctx->output_stack_len++; + if (!set_output(ctx, section)) + return; + ctx->indent = 0; +} // push_output + +void pop_output(Context *ctx) +{ + assert(ctx->output_stack_len > 0); + ctx->output_stack_len--; + ctx->output = ctx->output_stack[ctx->output_stack_len]; + ctx->indent = ctx->indent_stack[ctx->output_stack_len]; +} // pop_output + +// Shader model version magic... + +uint32 ver_ui32(const uint8 major, const uint8 minor) +{ + return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 1 : (minor)) ); +} // version_ui32 + +int shader_version_supported(const uint8 maj, const uint8 min) +{ + return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR)); +} // shader_version_supported + +int shader_version_atleast(const Context *ctx, const uint8 maj, + const uint8 min) +{ + return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min)); +} // shader_version_atleast + +int shader_version_exactly(const Context *ctx, const uint8 maj, + const uint8 min) +{ + return ((ctx->major_ver == maj) && (ctx->minor_ver == min)); +} // shader_version_exactly + +int shader_is_pixel(const Context *ctx) +{ + return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL); +} // shader_is_pixel + +int shader_is_vertex(const Context *ctx) +{ + return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX); +} // shader_is_vertex + +// Fail... + +int isfail(const Context *ctx) +{ + return ctx->isfail; +} // isfail + +void failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3); +void failf(Context *ctx, const char *fmt, ...) +{ + ctx->isfail = 1; + if (ctx->out_of_memory) + return; + + // no filename at this level (we pass a NULL to errorlist_add_va()...) + va_list ap; + va_start(ap, fmt); + errorlist_add_va(ctx->errors, NULL, ctx->current_position, fmt, ap); + va_end(ap); +} // failf + +void fail(Context *ctx, const char *reason) +{ + failf(ctx, "%s", reason); +} // fail + +// Output Lines... + +void output_line(Context *ctx, const char *fmt, ...) ISPRINTF(2,3); +void output_line(Context *ctx, const char *fmt, ...) +{ + assert(ctx->output != NULL); + if (isfail(ctx)) + return; // we failed previously, don't go on... + + const int indent = ctx->indent; + if (indent > 0) + { + char *indentbuf = (char *) alloca(indent); + memset(indentbuf, '\t', indent); + buffer_append(ctx->output, indentbuf, indent); + } // if + + va_list ap; + va_start(ap, fmt); + buffer_append_va(ctx->output, fmt, ap); + va_end(ap); + + buffer_append(ctx->output, ctx->endline, ctx->endline_len); +} // output_line + +void output_blank_line(Context *ctx) +{ + assert(ctx->output != NULL); + if (!isfail(ctx)) + buffer_append(ctx->output, ctx->endline, ctx->endline_len); +} // output_blank_line + +// !!! FIXME: this is sort of nasty. +void floatstr(Context *ctx, char *buf, size_t bufsize, float f, + int leavedecimal) +{ + const size_t len = MOJOSHADER_printFloat(buf, bufsize, f); + if ((len+2) >= bufsize) + fail(ctx, "BUG: internal buffer is too small"); + else + { + char *end = buf + len; + char *ptr = strchr(buf, '.'); + if (ptr == NULL) + { + if (leavedecimal) + strcat(buf, ".0"); + return; // done. + } // if + + while (--end != ptr) + { + if (*end != '0') + { + end++; + break; + } // if + } // while + if ((leavedecimal) && (end == ptr)) + end += 2; + *end = '\0'; // chop extra '0' or all decimal places off. + } // else +} // floatstr + +// Deal with register lists... + +static inline uint32 reg_to_ui32(const RegisterType regtype, const int regnum) +{ + return ( ((uint32) regnum) | (((uint32) regtype) << 16) ); +} // reg_to_uint32 + +// !!! FIXME: ditch this for a hash table. +RegisterList *reglist_insert(Context *ctx, RegisterList *prev, + const RegisterType regtype, + const int regnum) +{ + const uint32 newval = reg_to_ui32(regtype, regnum); + RegisterList *item = prev->next; + while (item != NULL) + { + const uint32 val = reg_to_ui32(item->regtype, item->regnum); + if (newval == val) + return item; // already set, so we're done. + else if (newval < val) // insert it here. + break; + else // if (newval > val) + { + // keep going, we're not to the insertion point yet. + prev = item; + item = item->next; + } // else + } // while + + // we need to insert an entry after (prev). + item = (RegisterList *) Malloc(ctx, sizeof (RegisterList)); + if (item != NULL) + { + item->regtype = regtype; + item->regnum = regnum; + item->usage = MOJOSHADER_USAGE_UNKNOWN; + item->index = 0; + item->writemask = 0; + item->misc = 0; + item->written = 0; + item->array = NULL; + item->next = prev->next; + prev->next = item; + } // if + + return item; +} // reglist_insert + +RegisterList *reglist_find(const RegisterList *prev, + const RegisterType rtype, + const int regnum) +{ + const uint32 newval = reg_to_ui32(rtype, regnum); + RegisterList *item = prev->next; + while (item != NULL) + { + const uint32 val = reg_to_ui32(item->regtype, item->regnum); + if (newval == val) + return item; // here it is. + else if (newval < val) // should have been here if it existed. + return NULL; + else // if (newval > val) + item = item->next; + } // while + + return NULL; // wasn't in the list. +} // reglist_find + +RegisterList *set_used_register(Context *ctx, + const RegisterType regtype, + const int regnum, + const int written) +{ + RegisterList *reg = NULL; + if ((regtype == REG_TYPE_COLOROUT) && (regnum > 0)) + ctx->have_multi_color_outputs = 1; + + reg = reglist_insert(ctx, &ctx->used_registers, regtype, regnum); + if (reg && written) + reg->written = 1; + return reg; +} // set_used_register + +void set_defined_register(Context *ctx, const RegisterType rtype, + const int regnum) +{ + reglist_insert(ctx, &ctx->defined_registers, rtype, regnum); +} // set_defined_register + +// Writemasks + +int writemask_xyzw(const int writemask) +{ + return (writemask == 0xF); // 0xF == 1111. No explicit mask (full!). +} // writemask_xyzw + +int writemask_xyz(const int writemask) +{ + return (writemask == 0x7); // 0x7 == 0111. (that is: xyz) +} // writemask_xyz + +int writemask_xy(const int writemask) +{ + return (writemask == 0x3); // 0x3 == 0011. (that is: xy) +} // writemask_xy + +int writemask_x(const int writemask) +{ + return (writemask == 0x1); // 0x1 == 0001. (that is: x) +} // writemask_x + +int writemask_y(const int writemask) +{ + return (writemask == 0x2); // 0x2 == 0010. (that is: y) +} // writemask_y + +int replicate_swizzle(const int swizzle) +{ + return ( (((swizzle >> 0) & 0x3) == ((swizzle >> 2) & 0x3)) && + (((swizzle >> 2) & 0x3) == ((swizzle >> 4) & 0x3)) && + (((swizzle >> 4) & 0x3) == ((swizzle >> 6) & 0x3)) ); +} // replicate_swizzle + +int no_swizzle(const int swizzle) +{ + return (swizzle == 0xE4); // 0xE4 == 11100100 ... 0 1 2 3. No swizzle. +} // no_swizzle + +int vecsize_from_writemask(const int m) +{ + return (m & 1) + ((m >> 1) & 1) + ((m >> 2) & 1) + ((m >> 3) & 1); +} // vecsize_from_writemask + +void set_dstarg_writemask(DestArgInfo *dst, const int mask) +{ + dst->writemask = mask; + dst->writemask0 = ((mask >> 0) & 1); + dst->writemask1 = ((mask >> 1) & 1); + dst->writemask2 = ((mask >> 2) & 1); + dst->writemask3 = ((mask >> 3) & 1); +} // set_dstarg_writemask + +// D3D stuff that's used in more than just the d3d profile... + +int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type, + const RegisterType rtype, const int rnum) +{ + const int uses_psize = ctx->uses_pointsize; + const int uses_fog = ctx->uses_fog; + if ( (rtype == REG_TYPE_OUTPUT) && ((uses_psize) || (uses_fog)) ) + { + const RegisterList *reg = reglist_find(&ctx->attributes, rtype, rnum); + if (reg != NULL) + { + const MOJOSHADER_usage usage = reg->usage; + return ( (uses_psize && (usage == MOJOSHADER_USAGE_POINTSIZE)) || + (uses_fog && (usage == MOJOSHADER_USAGE_FOG)) ); + } // if + } // if + + return scalar_register(shader_type, rtype, rnum); +} // isscalar + +const char *get_D3D_register_string(Context *ctx, + RegisterType regtype, + int regnum, char *regnum_str, + size_t regnum_size) +{ + const char *retval = NULL; + int has_number = 1; + + switch (regtype) + { + case REG_TYPE_TEMP: + retval = "r"; + break; + + case REG_TYPE_INPUT: + retval = "v"; + break; + + case REG_TYPE_CONST: + retval = "c"; + break; + + case REG_TYPE_ADDRESS: // (or REG_TYPE_TEXTURE, same value.) + retval = shader_is_vertex(ctx) ? "a" : "t"; + break; + + case REG_TYPE_RASTOUT: + switch ((RastOutType) regnum) + { + case RASTOUT_TYPE_POSITION: retval = "oPos"; break; + case RASTOUT_TYPE_FOG: retval = "oFog"; break; + case RASTOUT_TYPE_POINT_SIZE: retval = "oPts"; break; + } // switch + has_number = 0; + break; + + case REG_TYPE_ATTROUT: + retval = "oD"; + break; + + case REG_TYPE_OUTPUT: // (or REG_TYPE_TEXCRDOUT, same value.) + if (shader_is_vertex(ctx) && shader_version_atleast(ctx, 3, 0)) + retval = "o"; + else + retval = "oT"; + break; + + case REG_TYPE_CONSTINT: + retval = "i"; + break; + + case REG_TYPE_COLOROUT: + retval = "oC"; + break; + + case REG_TYPE_DEPTHOUT: + retval = "oDepth"; + has_number = 0; + break; + + case REG_TYPE_SAMPLER: + retval = "s"; + break; + + case REG_TYPE_CONSTBOOL: + retval = "b"; + break; + + case REG_TYPE_LOOP: + retval = "aL"; + has_number = 0; + break; + + case REG_TYPE_MISCTYPE: + switch ((const MiscTypeType) regnum) + { + case MISCTYPE_TYPE_POSITION: retval = "vPos"; break; + case MISCTYPE_TYPE_FACE: retval = "vFace"; break; + } // switch + has_number = 0; + break; + + case REG_TYPE_LABEL: + retval = "l"; + break; + + case REG_TYPE_PREDICATE: + retval = "p"; + break; + + //case REG_TYPE_TEMPFLOAT16: // !!! FIXME: don't know this asm string + default: + fail(ctx, "unknown register type"); + retval = "???"; + has_number = 0; + break; + } // switch + + if (has_number) + snprintf(regnum_str, regnum_size, "%u", (uint) regnum); + else + regnum_str[0] = '\0'; + + return retval; +} // get_D3D_register_string + +// !!! FIXME: These should stay in the mojoshader_profile_d3d file +// !!! FIXME: but ARB1 relies on them, so we have to move them here. +// !!! FIXME: If/when we kill off ARB1, we can move these back. + +const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt, + int regnum, char *buf, + const size_t len) +{ + char regnum_str[16]; + const char *regtype_str = get_D3D_register_string(ctx, rt, regnum, + regnum_str, sizeof (regnum_str)); + snprintf(buf,len,"%s%s", regtype_str, regnum_str); + return buf; +} // get_D3D_varname_in_buf + + +const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum) +{ + char buf[64]; + get_D3D_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf)); + return StrDup(ctx, buf); +} // get_D3D_varname + +#pragma GCC visibility pop \ No newline at end of file diff --git a/profiles/mojoshader_profile_d3d.c b/profiles/mojoshader_profile_d3d.c new file mode 100644 index 00000000..6327dc59 --- /dev/null +++ b/profiles/mojoshader_profile_d3d.c @@ -0,0 +1,686 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#pragma GCC visibility push(hidden) + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_profile.h" + +#if SUPPORT_PROFILE_D3D + +const char *make_D3D_srcarg_string_in_buf(Context *ctx, + const SourceArgInfo *arg, + char *buf, size_t buflen) +{ + const char *premod_str = ""; + const char *postmod_str = ""; + switch (arg->src_mod) + { + case SRCMOD_NEGATE: + premod_str = "-"; + break; + + case SRCMOD_BIASNEGATE: + premod_str = "-"; + // fall through. + case SRCMOD_BIAS: + postmod_str = "_bias"; + break; + + case SRCMOD_SIGNNEGATE: + premod_str = "-"; + // fall through. + case SRCMOD_SIGN: + postmod_str = "_bx2"; + break; + + case SRCMOD_COMPLEMENT: + premod_str = "1-"; + break; + + case SRCMOD_X2NEGATE: + premod_str = "-"; + // fall through. + case SRCMOD_X2: + postmod_str = "_x2"; + break; + + case SRCMOD_DZ: + postmod_str = "_dz"; + break; + + case SRCMOD_DW: + postmod_str = "_dw"; + break; + + case SRCMOD_ABSNEGATE: + premod_str = "-"; + // fall through. + case SRCMOD_ABS: + postmod_str = "_abs"; + break; + + case SRCMOD_NOT: + premod_str = "!"; + break; + + case SRCMOD_NONE: + case SRCMOD_TOTAL: + break; // stop compiler whining. + } // switch + + + char regnum_str[16]; + const char *regtype_str = get_D3D_register_string(ctx, arg->regtype, + arg->regnum, regnum_str, + sizeof (regnum_str)); + + if (regtype_str == NULL) + { + fail(ctx, "Unknown source register type."); + *buf = '\0'; + return buf; + } // if + + const char *rel_lbracket = ""; + const char *rel_rbracket = ""; + char rel_swizzle[4] = { '\0' }; + char rel_regnum_str[16] = { '\0' }; + const char *rel_regtype_str = ""; + if (arg->relative) + { + if (arg->relative_regtype == REG_TYPE_LOOP) + { + rel_swizzle[0] = '\0'; + rel_swizzle[1] = '\0'; + rel_swizzle[2] = '\0'; + } // if + else + { + rel_swizzle[0] = '.'; + rel_swizzle[1] = swizzle_channels[arg->relative_component]; + rel_swizzle[2] = '\0'; + } // else + + rel_lbracket = "["; + rel_rbracket = "]"; + rel_regtype_str = get_D3D_register_string(ctx, arg->relative_regtype, + arg->relative_regnum, + rel_regnum_str, + sizeof (rel_regnum_str)); + + if (regtype_str == NULL) + { + fail(ctx, "Unknown relative source register type."); + *buf = '\0'; + return buf; + } // if + } // if + + char swizzle_str[6]; + size_t i = 0; + const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); + if (!scalar && !no_swizzle(arg->swizzle)) + { + swizzle_str[i++] = '.'; + swizzle_str[i++] = swizzle_channels[arg->swizzle_x]; + swizzle_str[i++] = swizzle_channels[arg->swizzle_y]; + swizzle_str[i++] = swizzle_channels[arg->swizzle_z]; + swizzle_str[i++] = swizzle_channels[arg->swizzle_w]; + + // .xyzz is the same as .xyz, .z is the same as .zzzz, etc. + while (swizzle_str[i-1] == swizzle_str[i-2]) + i--; + } // if + swizzle_str[i] = '\0'; + assert(i < sizeof (swizzle_str)); + + // !!! FIXME: c12[a0.x] actually needs to be c[a0.x + 12] + snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", + premod_str, regtype_str, regnum_str, postmod_str, + rel_lbracket, rel_regtype_str, rel_regnum_str, rel_swizzle, + rel_rbracket, swizzle_str); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_D3D_srcarg_string_in_buf + + +const char *make_D3D_destarg_string(Context *ctx, char *buf, + const size_t buflen) +{ + const DestArgInfo *arg = &ctx->dest_arg; + + const char *result_shift_str = ""; + switch (arg->result_shift) + { + case 0x1: result_shift_str = "_x2"; break; + case 0x2: result_shift_str = "_x4"; break; + case 0x3: result_shift_str = "_x8"; break; + case 0xD: result_shift_str = "_d8"; break; + case 0xE: result_shift_str = "_d4"; break; + case 0xF: result_shift_str = "_d2"; break; + } // switch + + const char *sat_str = (arg->result_mod & MOD_SATURATE) ? "_sat" : ""; + const char *pp_str = (arg->result_mod & MOD_PP) ? "_pp" : ""; + const char *cent_str = (arg->result_mod & MOD_CENTROID) ? "_centroid" : ""; + + char regnum_str[16]; + const char *regtype_str = get_D3D_register_string(ctx, arg->regtype, + arg->regnum, regnum_str, + sizeof (regnum_str)); + if (regtype_str == NULL) + { + fail(ctx, "Unknown destination register type."); + *buf = '\0'; + return buf; + } // if + + char writemask_str[6]; + size_t i = 0; + const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); + if (!scalar && !writemask_xyzw(arg->writemask)) + { + writemask_str[i++] = '.'; + if (arg->writemask0) writemask_str[i++] = 'x'; + if (arg->writemask1) writemask_str[i++] = 'y'; + if (arg->writemask2) writemask_str[i++] = 'z'; + if (arg->writemask3) writemask_str[i++] = 'w'; + } // if + writemask_str[i] = '\0'; + assert(i < sizeof (writemask_str)); + + const char *pred_left = ""; + const char *pred_right = ""; + char pred[32] = { '\0' }; + if (ctx->predicated) + { + pred_left = "("; + pred_right = ") "; + make_D3D_srcarg_string_in_buf(ctx, &ctx->predicate_arg, + pred, sizeof (pred)); + } // if + + // may turn out something like "_x2_sat_pp_centroid (!p0.x) r0.xyzw" ... + snprintf(buf, buflen, "%s%s%s%s %s%s%s%s%s%s", + result_shift_str, sat_str, pp_str, cent_str, + pred_left, pred, pred_right, + regtype_str, regnum_str, writemask_str); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_D3D_destarg_string + + +const char *make_D3D_srcarg_string(Context *ctx, const size_t idx, + char *buf, size_t buflen) +{ + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + *buf = '\0'; + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + return make_D3D_srcarg_string_in_buf(ctx, arg, buf, buflen); +} // make_D3D_srcarg_string + +const char *get_D3D_const_array_varname(Context *ctx, int base, int size) +{ + char buf[64]; + snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size); + return StrDup(ctx, buf); +} // get_D3D_const_array_varname + + +void emit_D3D_start(Context *ctx, const char *profilestr) +{ + const uint major = (uint) ctx->major_ver; + const uint minor = (uint) ctx->minor_ver; + char minor_str[16]; + + ctx->ignores_ctab = 1; + + if (minor == 0xFF) + strcpy(minor_str, "sw"); + else if ((major > 1) && (minor == 1)) + strcpy(minor_str, "x"); // for >= SM2, apparently this is "x". Weird. + else + snprintf(minor_str, sizeof (minor_str), "%u", (uint) minor); + + output_line(ctx, "%s_%u_%s", ctx->shader_type_str, major, minor_str); +} // emit_D3D_start + + +void emit_D3D_end(Context *ctx) +{ + output_line(ctx, "end"); +} // emit_D3D_end + + +void emit_D3D_phase(Context *ctx) +{ + output_line(ctx, "phase"); +} // emit_D3D_phase + + +void emit_D3D_finalize(Context *ctx) +{ + // no-op. +} // emit_D3D_finalize + + +void emit_D3D_global(Context *ctx, RegisterType regtype, int regnum) +{ + // no-op. +} // emit_D3D_global + + +void emit_D3D_array(Context *ctx, VariableList *var) +{ + // no-op. +} // emit_D3D_array + + +void emit_D3D_const_array(Context *ctx, const ConstantsList *clist, + int base, int size) +{ + // no-op. +} // emit_D3D_const_array + + +void emit_D3D_uniform(Context *ctx, RegisterType regtype, int regnum, + const VariableList *var) +{ + // no-op. +} // emit_D3D_uniform + + +void emit_D3D_sampler(Context *ctx, int s, TextureType ttype, int tb) +{ + // no-op. +} // emit_D3D_sampler + + +void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum, + MOJOSHADER_usage usage, int index, int wmask, + int flags) +{ + // no-op. +} // emit_D3D_attribute + + +void emit_D3D_RESERVED(Context *ctx) +{ + // do nothing; fails in the state machine. +} // emit_D3D_RESERVED + + +// Generic D3D opcode emitters. A list of macros generate all the entry points +// that call into these... + +char *lowercase(char *dst, const char *src) +{ + int i = 0; + do + { + const char ch = src[i]; + dst[i] = (((ch >= 'A') && (ch <= 'Z')) ? (ch - ('A' - 'a')) : ch); + } while (src[i++]); + return dst; +} // lowercase + + +void emit_D3D_opcode_d(Context *ctx, const char *opcode) +{ + char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); + opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); + output_line(ctx, "%s%s%s", ctx->coissue ? "+" : "", opcode, dst); +} // emit_D3D_opcode_d + + +void emit_D3D_opcode_s(Context *ctx, const char *opcode) +{ + char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); + opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); + output_line(ctx, "%s%s %s", ctx->coissue ? "+" : "", opcode, src0); +} // emit_D3D_opcode_s + + +void emit_D3D_opcode_ss(Context *ctx, const char *opcode) +{ + char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1)); + opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); + output_line(ctx, "%s%s %s, %s", ctx->coissue ? "+" : "", opcode, src0, src1); +} // emit_D3D_opcode_ss + + +void emit_D3D_opcode_ds(Context *ctx, const char *opcode) +{ + char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); + opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); + output_line(ctx, "%s%s%s, %s", ctx->coissue ? "+" : "", opcode, dst, src0); +} // emit_D3D_opcode_ds + + +void emit_D3D_opcode_dss(Context *ctx, const char *opcode) +{ + char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1)); + opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); + output_line(ctx, "%s%s%s, %s, %s", ctx->coissue ? "+" : "", + opcode, dst, src0, src1); +} // emit_D3D_opcode_dss + + +void emit_D3D_opcode_dsss(Context *ctx, const char *opcode) +{ + char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2)); + opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); + output_line(ctx, "%s%s%s, %s, %s, %s", ctx->coissue ? "+" : "", + opcode, dst, src0, src1, src2); +} // emit_D3D_opcode_dsss + + +void emit_D3D_opcode_dssss(Context *ctx, const char *opcode) +{ + char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst)); + char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2)); + char src3[64]; make_D3D_srcarg_string(ctx, 3, src3, sizeof (src3)); + opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); + output_line(ctx,"%s%s%s, %s, %s, %s, %s", ctx->coissue ? "+" : "", + opcode, dst, src0, src1, src2, src3); +} // emit_D3D_opcode_dssss + + +void emit_D3D_opcode(Context *ctx, const char *opcode) +{ + opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode); + output_line(ctx, "%s%s", ctx->coissue ? "+" : "", opcode); +} // emit_D3D_opcode + + +#define EMIT_D3D_OPCODE_FUNC(op) \ + void emit_D3D_##op(Context *ctx) { \ + emit_D3D_opcode(ctx, #op); \ + } +#define EMIT_D3D_OPCODE_D_FUNC(op) \ + void emit_D3D_##op(Context *ctx) { \ + emit_D3D_opcode_d(ctx, #op); \ + } +#define EMIT_D3D_OPCODE_S_FUNC(op) \ + void emit_D3D_##op(Context *ctx) { \ + emit_D3D_opcode_s(ctx, #op); \ + } +#define EMIT_D3D_OPCODE_SS_FUNC(op) \ + void emit_D3D_##op(Context *ctx) { \ + emit_D3D_opcode_ss(ctx, #op); \ + } +#define EMIT_D3D_OPCODE_DS_FUNC(op) \ + void emit_D3D_##op(Context *ctx) { \ + emit_D3D_opcode_ds(ctx, #op); \ + } +#define EMIT_D3D_OPCODE_DSS_FUNC(op) \ + void emit_D3D_##op(Context *ctx) { \ + emit_D3D_opcode_dss(ctx, #op); \ + } +#define EMIT_D3D_OPCODE_DSSS_FUNC(op) \ + void emit_D3D_##op(Context *ctx) { \ + emit_D3D_opcode_dsss(ctx, #op); \ + } +#define EMIT_D3D_OPCODE_DSSSS_FUNC(op) \ + void emit_D3D_##op(Context *ctx) { \ + emit_D3D_opcode_dssss(ctx, #op); \ + } + +EMIT_D3D_OPCODE_FUNC(NOP) +EMIT_D3D_OPCODE_DS_FUNC(MOV) +EMIT_D3D_OPCODE_DSS_FUNC(ADD) +EMIT_D3D_OPCODE_DSS_FUNC(SUB) +EMIT_D3D_OPCODE_DSSS_FUNC(MAD) +EMIT_D3D_OPCODE_DSS_FUNC(MUL) +EMIT_D3D_OPCODE_DS_FUNC(RCP) +EMIT_D3D_OPCODE_DS_FUNC(RSQ) +EMIT_D3D_OPCODE_DSS_FUNC(DP3) +EMIT_D3D_OPCODE_DSS_FUNC(DP4) +EMIT_D3D_OPCODE_DSS_FUNC(MIN) +EMIT_D3D_OPCODE_DSS_FUNC(MAX) +EMIT_D3D_OPCODE_DSS_FUNC(SLT) +EMIT_D3D_OPCODE_DSS_FUNC(SGE) +EMIT_D3D_OPCODE_DS_FUNC(EXP) +EMIT_D3D_OPCODE_DS_FUNC(LOG) +EMIT_D3D_OPCODE_DS_FUNC(LIT) +EMIT_D3D_OPCODE_DSS_FUNC(DST) +EMIT_D3D_OPCODE_DSSS_FUNC(LRP) +EMIT_D3D_OPCODE_DS_FUNC(FRC) +EMIT_D3D_OPCODE_DSS_FUNC(M4X4) +EMIT_D3D_OPCODE_DSS_FUNC(M4X3) +EMIT_D3D_OPCODE_DSS_FUNC(M3X4) +EMIT_D3D_OPCODE_DSS_FUNC(M3X3) +EMIT_D3D_OPCODE_DSS_FUNC(M3X2) +EMIT_D3D_OPCODE_S_FUNC(CALL) +EMIT_D3D_OPCODE_SS_FUNC(CALLNZ) +EMIT_D3D_OPCODE_SS_FUNC(LOOP) +EMIT_D3D_OPCODE_FUNC(RET) +EMIT_D3D_OPCODE_FUNC(ENDLOOP) +EMIT_D3D_OPCODE_S_FUNC(LABEL) +EMIT_D3D_OPCODE_DSS_FUNC(POW) +EMIT_D3D_OPCODE_DSS_FUNC(CRS) +EMIT_D3D_OPCODE_DSSS_FUNC(SGN) +EMIT_D3D_OPCODE_DS_FUNC(ABS) +EMIT_D3D_OPCODE_DS_FUNC(NRM) +EMIT_D3D_OPCODE_S_FUNC(REP) +EMIT_D3D_OPCODE_FUNC(ENDREP) +EMIT_D3D_OPCODE_S_FUNC(IF) +EMIT_D3D_OPCODE_FUNC(ELSE) +EMIT_D3D_OPCODE_FUNC(ENDIF) +EMIT_D3D_OPCODE_FUNC(BREAK) +EMIT_D3D_OPCODE_DS_FUNC(MOVA) +EMIT_D3D_OPCODE_D_FUNC(TEXKILL) +EMIT_D3D_OPCODE_DS_FUNC(TEXBEM) +EMIT_D3D_OPCODE_DS_FUNC(TEXBEML) +EMIT_D3D_OPCODE_DS_FUNC(TEXREG2AR) +EMIT_D3D_OPCODE_DS_FUNC(TEXREG2GB) +EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2PAD) +EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2TEX) +EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3PAD) +EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3TEX) +EMIT_D3D_OPCODE_DSS_FUNC(TEXM3X3SPEC) +EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3VSPEC) +EMIT_D3D_OPCODE_DS_FUNC(EXPP) +EMIT_D3D_OPCODE_DS_FUNC(LOGP) +EMIT_D3D_OPCODE_DSSS_FUNC(CND) +EMIT_D3D_OPCODE_DS_FUNC(TEXREG2RGB) +EMIT_D3D_OPCODE_DS_FUNC(TEXDP3TEX) +EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2DEPTH) +EMIT_D3D_OPCODE_DS_FUNC(TEXDP3) +EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3) +EMIT_D3D_OPCODE_D_FUNC(TEXDEPTH) +EMIT_D3D_OPCODE_DSSS_FUNC(CMP) +EMIT_D3D_OPCODE_DSS_FUNC(BEM) +EMIT_D3D_OPCODE_DSSS_FUNC(DP2ADD) +EMIT_D3D_OPCODE_DS_FUNC(DSX) +EMIT_D3D_OPCODE_DS_FUNC(DSY) +EMIT_D3D_OPCODE_DSSSS_FUNC(TEXLDD) +EMIT_D3D_OPCODE_DSS_FUNC(TEXLDL) +EMIT_D3D_OPCODE_S_FUNC(BREAKP) + +// special cases for comparison opcodes... +const char *get_D3D_comparison_string(Context *ctx) +{ + const char *comps[] = { + "", "_gt", "_eq", "_ge", "_lt", "_ne", "_le" + }; + + if (ctx->instruction_controls >= STATICARRAYLEN(comps)) + { + fail(ctx, "unknown comparison control"); + return ""; + } // if + + return comps[ctx->instruction_controls]; +} // get_D3D_comparison_string + +void emit_D3D_BREAKC(Context *ctx) +{ + char op[16]; + snprintf(op, sizeof (op), "break%s", get_D3D_comparison_string(ctx)); + emit_D3D_opcode_ss(ctx, op); +} // emit_D3D_BREAKC + +void emit_D3D_IFC(Context *ctx) +{ + char op[16]; + snprintf(op, sizeof (op), "if%s", get_D3D_comparison_string(ctx)); + emit_D3D_opcode_ss(ctx, op); +} // emit_D3D_IFC + +void emit_D3D_SETP(Context *ctx) +{ + char op[16]; + snprintf(op, sizeof (op), "setp%s", get_D3D_comparison_string(ctx)); + emit_D3D_opcode_dss(ctx, op); +} // emit_D3D_SETP + +void emit_D3D_DEF(Context *ctx) +{ + char dst[64]; + make_D3D_destarg_string(ctx, dst, sizeof (dst)); + const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? + char val0[32]; + char val1[32]; + char val2[32]; + char val3[32]; + floatstr(ctx, val0, sizeof (val0), val[0], 0); + floatstr(ctx, val1, sizeof (val1), val[1], 0); + floatstr(ctx, val2, sizeof (val2), val[2], 0); + floatstr(ctx, val3, sizeof (val3), val[3], 0); + output_line(ctx, "def%s, %s, %s, %s, %s", dst, val0, val1, val2, val3); +} // emit_D3D_DEF + +void emit_D3D_DEFI(Context *ctx) +{ + char dst[64]; + make_D3D_destarg_string(ctx, dst, sizeof (dst)); + const int32 *x = (const int32 *) ctx->dwords; + output_line(ctx, "defi%s, %d, %d, %d, %d", dst, + (int) x[0], (int) x[1], (int) x[2], (int) x[3]); +} // emit_D3D_DEFI + +void emit_D3D_DEFB(Context *ctx) +{ + char dst[64]; + make_D3D_destarg_string(ctx, dst, sizeof (dst)); + output_line(ctx, "defb%s, %s", dst, ctx->dwords[0] ? "true" : "false"); +} // emit_D3D_DEFB + + +static const char *usagestrs[] = { + "_position", "_blendweight", "_blendindices", "_normal", "_psize", + "_texcoord", "_tangent", "_binormal", "_tessfactor", "_positiont", + "_color", "_fog", "_depth", "_sample" +}; + +void emit_D3D_DCL(Context *ctx) +{ + char dst[64]; + make_D3D_destarg_string(ctx, dst, sizeof (dst)); + const DestArgInfo *arg = &ctx->dest_arg; + const char *usage_str = ""; + char index_str[16] = { '\0' }; + + if (arg->regtype == REG_TYPE_SAMPLER) + { + switch ((const TextureType) ctx->dwords[0]) + { + case TEXTURE_TYPE_2D: usage_str = "_2d"; break; + case TEXTURE_TYPE_CUBE: usage_str = "_cube"; break; + case TEXTURE_TYPE_VOLUME: usage_str = "_volume"; break; + default: fail(ctx, "unknown sampler texture type"); return; + } // switch + } // if + + else if (arg->regtype == REG_TYPE_MISCTYPE) + { + switch ((const MiscTypeType) arg->regnum) + { + case MISCTYPE_TYPE_POSITION: + case MISCTYPE_TYPE_FACE: + usage_str = ""; // just become "dcl vFace" or whatever. + break; + default: fail(ctx, "unknown misc register type"); return; + } // switch + } // else if + + else + { + const uint32 usage = ctx->dwords[0]; + const uint32 index = ctx->dwords[1]; + usage_str = usagestrs[usage]; + if (index != 0) + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + } // else + + output_line(ctx, "dcl%s%s%s", usage_str, index_str, dst); +} // emit_D3D_DCL + + +void emit_D3D_TEXCRD(Context *ctx) +{ + // this opcode looks and acts differently depending on the shader model. + if (shader_version_atleast(ctx, 1, 4)) + emit_D3D_opcode_ds(ctx, "texcrd"); + else + emit_D3D_opcode_d(ctx, "texcoord"); +} // emit_D3D_TEXCOORD + +void emit_D3D_TEXLD(Context *ctx) +{ + // this opcode looks and acts differently depending on the shader model. + if (shader_version_atleast(ctx, 2, 0)) + { + if (ctx->instruction_controls == CONTROL_TEXLD) + emit_D3D_opcode_dss(ctx, "texld"); + else if (ctx->instruction_controls == CONTROL_TEXLDP) + emit_D3D_opcode_dss(ctx, "texldp"); + else if (ctx->instruction_controls == CONTROL_TEXLDB) + emit_D3D_opcode_dss(ctx, "texldb"); + } // if + + else if (shader_version_atleast(ctx, 1, 4)) + { + emit_D3D_opcode_ds(ctx, "texld"); + } // else if + + else + { + emit_D3D_opcode_d(ctx, "tex"); + } // else +} // emit_D3D_TEXLD + +void emit_D3D_SINCOS(Context *ctx) +{ + // this opcode needs extra registers for sm2 and lower. + if (!shader_version_atleast(ctx, 3, 0)) + emit_D3D_opcode_dsss(ctx, "sincos"); + else + emit_D3D_opcode_ds(ctx, "sincos"); +} // emit_D3D_SINCOS + +#endif // SUPPORT_PROFILE_D3D + +#pragma GCC visibility pop \ No newline at end of file diff --git a/profiles/mojoshader_profile_glsl.c b/profiles/mojoshader_profile_glsl.c new file mode 100644 index 00000000..7760e147 --- /dev/null +++ b/profiles/mojoshader_profile_glsl.c @@ -0,0 +1,2307 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#pragma GCC visibility push(hidden) + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_profile.h" + +#if SUPPORT_PROFILE_GLSL + +#define EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \ + void emit_GLSL_##op(Context *ctx) { \ + fail(ctx, #op " unimplemented in glsl profile"); \ + } + +static inline const char *get_GLSL_register_string(Context *ctx, + const RegisterType regtype, const int regnum, + char *regnum_str, const size_t regnum_size) +{ + // turns out these are identical at the moment. + return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size); +} // get_GLSL_register_string + +const char *get_GLSL_uniform_type(Context *ctx, const RegisterType rtype) +{ + switch (rtype) + { + case REG_TYPE_CONST: return "vec4"; + case REG_TYPE_CONSTINT: return "ivec4"; + case REG_TYPE_CONSTBOOL: return "bool"; + default: fail(ctx, "BUG: used a uniform we don't know how to define."); + } // switch + + return NULL; +} // get_GLSL_uniform_type + +const char *get_GLSL_varname_in_buf(Context *ctx, RegisterType rt, + int regnum, char *buf, + const size_t len) +{ + char regnum_str[16]; + const char *regtype_str = get_GLSL_register_string(ctx, rt, regnum, + regnum_str, sizeof (regnum_str)); + snprintf(buf,len,"%s_%s%s", ctx->shader_type_str, regtype_str, regnum_str); + return buf; +} // get_GLSL_varname_in_buf + + +const char *get_GLSL_varname(Context *ctx, RegisterType rt, int regnum) +{ + char buf[64]; + get_GLSL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf)); + return StrDup(ctx, buf); +} // get_GLSL_varname + + +static inline const char *get_GLSL_const_array_varname_in_buf(Context *ctx, + const int base, const int size, + char *buf, const size_t buflen) +{ + const char *type = ctx->shader_type_str; + snprintf(buf, buflen, "%s_const_array_%d_%d", type, base, size); + return buf; +} // get_GLSL_const_array_varname_in_buf + +const char *get_GLSL_const_array_varname(Context *ctx, int base, int size) +{ + char buf[64]; + get_GLSL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf)); + return StrDup(ctx, buf); +} // get_GLSL_const_array_varname + + +static inline const char *get_GLSL_input_array_varname(Context *ctx, + char *buf, const size_t buflen) +{ + snprintf(buf, buflen, "%s", "vertex_input_array"); + return buf; +} // get_GLSL_input_array_varname + + +const char *get_GLSL_uniform_array_varname(Context *ctx, + const RegisterType regtype, + char *buf, const size_t len) +{ + const char *shadertype = ctx->shader_type_str; + const char *type = get_GLSL_uniform_type(ctx, regtype); + snprintf(buf, len, "%s_uniforms_%s", shadertype, type); + return buf; +} // get_GLSL_uniform_array_varname + +const char *get_GLSL_destarg_varname(Context *ctx, char *buf, size_t len) +{ + const DestArgInfo *arg = &ctx->dest_arg; + return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); +} // get_GLSL_destarg_varname + +const char *get_GLSL_srcarg_varname(Context *ctx, const size_t idx, + char *buf, size_t len) +{ + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + *buf = '\0'; + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); +} // get_GLSL_srcarg_varname + + +const char *make_GLSL_destarg_assign(Context *, char *, const size_t, + const char *, ...) ISPRINTF(4,5); + +const char *make_GLSL_destarg_assign(Context *ctx, char *buf, + const size_t buflen, + const char *fmt, ...) +{ + int need_parens = 0; + const DestArgInfo *arg = &ctx->dest_arg; + + if (arg->writemask == 0) + { + *buf = '\0'; + return buf; // no writemask? It's a no-op. + } // if + + char clampbuf[32] = { '\0' }; + const char *clampleft = ""; + const char *clampright = ""; + if (arg->result_mod & MOD_SATURATE) + { + const int vecsize = vecsize_from_writemask(arg->writemask); + clampleft = "clamp("; + if (vecsize == 1) + clampright = ", 0.0, 1.0)"; + else + { + snprintf(clampbuf, sizeof (clampbuf), + ", vec%d(0.0), vec%d(1.0))", vecsize, vecsize); + clampright = clampbuf; + } // else + } // if + + // MSDN says MOD_PP is a hint and many implementations ignore it. So do we. + + // CENTROID only allowed in DCL opcodes, which shouldn't come through here. + assert((arg->result_mod & MOD_CENTROID) == 0); + + if (ctx->predicated) + { + fail(ctx, "predicated destinations unsupported"); // !!! FIXME + *buf = '\0'; + return buf; + } // if + + char operation[256]; + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(operation, sizeof (operation), fmt, ap); + va_end(ap); + if (len >= sizeof (operation)) + { + fail(ctx, "operation string too large"); // I'm lazy. :P + *buf = '\0'; + return buf; + } // if + + const char *result_shift_str = ""; + switch (arg->result_shift) + { + case 0x1: result_shift_str = " * 2.0"; break; + case 0x2: result_shift_str = " * 4.0"; break; + case 0x3: result_shift_str = " * 8.0"; break; + case 0xD: result_shift_str = " / 8.0"; break; + case 0xE: result_shift_str = " / 4.0"; break; + case 0xF: result_shift_str = " / 2.0"; break; + } // switch + need_parens |= (result_shift_str[0] != '\0'); + + char regnum_str[16]; + const char *regtype_str = get_GLSL_register_string(ctx, arg->regtype, + arg->regnum, regnum_str, + sizeof (regnum_str)); + char writemask_str[6]; + size_t i = 0; + const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); + if (!scalar && !writemask_xyzw(arg->writemask)) + { + writemask_str[i++] = '.'; + if (arg->writemask0) writemask_str[i++] = 'x'; + if (arg->writemask1) writemask_str[i++] = 'y'; + if (arg->writemask2) writemask_str[i++] = 'z'; + if (arg->writemask3) writemask_str[i++] = 'w'; + } // if + writemask_str[i] = '\0'; + assert(i < sizeof (writemask_str)); + + const char *leftparen = (need_parens) ? "(" : ""; + const char *rightparen = (need_parens) ? ")" : ""; + + snprintf(buf, buflen, "%s_%s%s%s = %s%s%s%s%s%s;", + ctx->shader_type_str, regtype_str, regnum_str, writemask_str, + clampleft, leftparen, operation, rightparen, result_shift_str, + clampright); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_GLSL_destarg_assign + + +char *make_GLSL_swizzle_string(char *swiz_str, const size_t strsize, + const int swizzle, const int writemask) +{ + size_t i = 0; + if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) ) + { + const int writemask0 = (writemask >> 0) & 0x1; + const int writemask1 = (writemask >> 1) & 0x1; + const int writemask2 = (writemask >> 2) & 0x1; + const int writemask3 = (writemask >> 3) & 0x1; + + const int swizzle_x = (swizzle >> 0) & 0x3; + const int swizzle_y = (swizzle >> 2) & 0x3; + const int swizzle_z = (swizzle >> 4) & 0x3; + const int swizzle_w = (swizzle >> 6) & 0x3; + + swiz_str[i++] = '.'; + if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x]; + if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y]; + if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z]; + if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w]; + } // if + assert(i < strsize); + swiz_str[i] = '\0'; + return swiz_str; +} // make_GLSL_swizzle_string + + +const char *make_GLSL_srcarg_string(Context *ctx, const size_t idx, + const int writemask, char *buf, + const size_t buflen) +{ + *buf = '\0'; + + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + + const char *premod_str = ""; + const char *postmod_str = ""; + switch (arg->src_mod) + { + case SRCMOD_NEGATE: + premod_str = "-"; + break; + + case SRCMOD_BIASNEGATE: + premod_str = "-("; + postmod_str = " - 0.5)"; + break; + + case SRCMOD_BIAS: + premod_str = "("; + postmod_str = " - 0.5)"; + break; + + case SRCMOD_SIGNNEGATE: + premod_str = "-(("; + postmod_str = " - 0.5) * 2.0)"; + break; + + case SRCMOD_SIGN: + premod_str = "(("; + postmod_str = " - 0.5) * 2.0)"; + break; + + case SRCMOD_COMPLEMENT: + premod_str = "(1.0 - "; + postmod_str = ")"; + break; + + case SRCMOD_X2NEGATE: + premod_str = "-("; + postmod_str = " * 2.0)"; + break; + + case SRCMOD_X2: + premod_str = "("; + postmod_str = " * 2.0)"; + break; + + case SRCMOD_DZ: + fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME + postmod_str = "_dz"; + break; + + case SRCMOD_DW: + fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME + postmod_str = "_dw"; + break; + + case SRCMOD_ABSNEGATE: + premod_str = "-abs("; + postmod_str = ")"; + break; + + case SRCMOD_ABS: + premod_str = "abs("; + postmod_str = ")"; + break; + + case SRCMOD_NOT: + premod_str = "!"; + break; + + case SRCMOD_NONE: + case SRCMOD_TOTAL: + break; // stop compiler whining. + } // switch + + const char *regtype_str = NULL; + + if (!arg->relative) + { + regtype_str = get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, + (char *) alloca(64), 64); + } // if + + const char *rel_lbracket = ""; + char rel_offset[32] = { '\0' }; + const char *rel_rbracket = ""; + char rel_swizzle[4] = { '\0' }; + const char *rel_regtype_str = ""; + if (arg->relative) + { + if (arg->regtype == REG_TYPE_INPUT) + regtype_str=get_GLSL_input_array_varname(ctx,(char*)alloca(64),64); + else + { + assert(arg->regtype == REG_TYPE_CONST); + const int arrayidx = arg->relative_array->index; + const int offset = arg->regnum - arrayidx; + assert(offset >= 0); + if (arg->relative_array->constant) + { + const int arraysize = arg->relative_array->count; + regtype_str = get_GLSL_const_array_varname_in_buf(ctx, + arrayidx, arraysize, (char *) alloca(64), 64); + if (offset != 0) + snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset); + } // if + else + { + regtype_str = get_GLSL_uniform_array_varname(ctx, arg->regtype, + (char *) alloca(64), 64); + if (offset == 0) + { + snprintf(rel_offset, sizeof (rel_offset), + "ARRAYBASE_%d + ", arrayidx); + } // if + else + { + snprintf(rel_offset, sizeof (rel_offset), + "(ARRAYBASE_%d + %d) + ", arrayidx, offset); + } // else + } // else + } // else + + rel_lbracket = "["; + + if (arg->relative_regtype == REG_TYPE_LOOP) + { + rel_regtype_str = "aL"; + rel_swizzle[0] = '\0'; + rel_swizzle[1] = '\0'; + rel_swizzle[2] = '\0'; + } // if + else + { + rel_regtype_str = get_GLSL_varname_in_buf(ctx, arg->relative_regtype, + arg->relative_regnum, + (char *) alloca(64), 64); + rel_swizzle[0] = '.'; + rel_swizzle[1] = swizzle_channels[arg->relative_component]; + rel_swizzle[2] = '\0'; + } // else + rel_rbracket = "]"; + } // if + + char swiz_str[6] = { '\0' }; + if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum)) + { + make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str), + arg->swizzle, writemask); + } // if + + if (regtype_str == NULL) + { + fail(ctx, "Unknown source register type."); + return buf; + } // if + + snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s", + premod_str, regtype_str, rel_lbracket, rel_offset, + rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str, + postmod_str); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_GLSL_srcarg_string + +// generate some convenience functions. +#define MAKE_GLSL_SRCARG_STRING_(mask, bitmask) \ + static inline const char *make_GLSL_srcarg_string_##mask(Context *ctx, \ + const size_t idx, char *buf, \ + const size_t buflen) { \ + return make_GLSL_srcarg_string(ctx, idx, bitmask, buf, buflen); \ + } +MAKE_GLSL_SRCARG_STRING_(x, (1 << 0)) +MAKE_GLSL_SRCARG_STRING_(y, (1 << 1)) +MAKE_GLSL_SRCARG_STRING_(z, (1 << 2)) +MAKE_GLSL_SRCARG_STRING_(w, (1 << 3)) +MAKE_GLSL_SRCARG_STRING_(scalar, (1 << 0)) +MAKE_GLSL_SRCARG_STRING_(full, 0xF) +MAKE_GLSL_SRCARG_STRING_(masked, ctx->dest_arg.writemask) +MAKE_GLSL_SRCARG_STRING_(vec3, 0x7) +MAKE_GLSL_SRCARG_STRING_(vec2, 0x3) +#undef MAKE_GLSL_SRCARG_STRING_ + +// special cases for comparison opcodes... + +const char *get_GLSL_comparison_string_scalar(Context *ctx) +{ + const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" }; + if (ctx->instruction_controls >= STATICARRAYLEN(comps)) + { + fail(ctx, "unknown comparison control"); + return ""; + } // if + + return comps[ctx->instruction_controls]; +} // get_GLSL_comparison_string_scalar + +const char *get_GLSL_comparison_string_vector(Context *ctx) +{ + const char *comps[] = { + "", "greaterThan", "equal", "greaterThanEqual", "lessThan", + "notEqual", "lessThanEqual" + }; + + if (ctx->instruction_controls >= STATICARRAYLEN(comps)) + { + fail(ctx, "unknown comparison control"); + return ""; + } // if + + return comps[ctx->instruction_controls]; +} // get_GLSL_comparison_string_vector + + +void emit_GLSL_start(Context *ctx, const char *profilestr) +{ + if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx)) + { + failf(ctx, "Shader type %u unsupported in this profile.", + (uint) ctx->shader_type); + return; + } // if + + else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL) == 0) + { + // No gl_FragData[] before GLSL 1.10, so we have to force the version. + push_output(ctx, &ctx->preflight); + output_line(ctx, "#version 110"); + pop_output(ctx); + } // else if + + #if SUPPORT_PROFILE_GLSL120 + else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL120) == 0) + { + ctx->profile_supports_glsl120 = 1; + push_output(ctx, &ctx->preflight); + output_line(ctx, "#version 120"); + pop_output(ctx); + } // else if + #endif + + #if SUPPORT_PROFILE_GLSLES + else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSLES) == 0) + { + ctx->profile_supports_glsles = 1; + push_output(ctx, &ctx->preflight); + output_line(ctx, "#version 100"); + if (shader_is_vertex(ctx)) + output_line(ctx, "precision highp float;"); + else + output_line(ctx, "precision mediump float;"); + output_line(ctx, "precision mediump int;"); + pop_output(ctx); + } // else if + #endif + + else + { + failf(ctx, "Profile '%s' unsupported or unknown.", profilestr); + return; + } // else + + push_output(ctx, &ctx->mainline_intro); + output_line(ctx, "void main()"); + output_line(ctx, "{"); + pop_output(ctx); + + set_output(ctx, &ctx->mainline); + ctx->indent++; +} // emit_GLSL_start + +void emit_GLSL_RET(Context *ctx); +void emit_GLSL_end(Context *ctx) +{ + // ps_1_* writes color to r0 instead oC0. We move it to the right place. + // We don't have to worry about a RET opcode messing this up, since + // RET isn't available before ps_2_0. + if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) + { + const char *shstr = ctx->shader_type_str; + set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1); + output_line(ctx, "%s_oC0 = %s_r0;", shstr, shstr); + } // if + else if (shader_is_vertex(ctx)) + { +#ifdef MOJOSHADER_FLIP_RENDERTARGET + output_line(ctx, "gl_Position.y = gl_Position.y * vpFlip;"); +#endif +#ifdef MOJOSHADER_DEPTH_CLIPPING + output_line(ctx, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;"); +#endif + } // else if + + // force a RET opcode if we're at the end of the stream without one. + if (ctx->previous_opcode != OPCODE_RET) + emit_GLSL_RET(ctx); +} // emit_GLSL_end + +void emit_GLSL_phase(Context *ctx) +{ + // no-op in GLSL. +} // emit_GLSL_phase + +void output_GLSL_uniform_array(Context *ctx, const RegisterType regtype, + const int size) +{ + if (size > 0) + { + char buf[64]; + get_GLSL_uniform_array_varname(ctx, regtype, buf, sizeof (buf)); + const char *typ; + switch (regtype) + { + case REG_TYPE_CONST: typ = "vec4"; break; + case REG_TYPE_CONSTINT: typ ="ivec4"; break; + case REG_TYPE_CONSTBOOL: typ = "bool"; break; + default: + { + fail(ctx, "BUG: used a uniform we don't know how to define."); + return; + } // default + } // switch + output_line(ctx, "uniform %s %s[%d];", typ, buf, size); + } // if +} // output_GLSL_uniform_array + +void emit_GLSL_finalize(Context *ctx) +{ + // throw some blank lines around to make source more readable. + push_output(ctx, &ctx->globals); + output_blank_line(ctx); + pop_output(ctx); + + // If we had a relative addressing of REG_TYPE_INPUT, we need to build + // an array for it at the start of main(). GLSL doesn't let you specify + // arrays of attributes. + //vec4 blah_array[BIGGEST_ARRAY]; + if (ctx->have_relative_input_registers) // !!! FIXME + fail(ctx, "Relative addressing of input registers not supported."); + + push_output(ctx, &ctx->preflight); + output_GLSL_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count); + output_GLSL_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count); + output_GLSL_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count); +#ifdef MOJOSHADER_FLIP_RENDERTARGET + if (shader_is_vertex(ctx)) + output_line(ctx, "uniform float vpFlip;"); +#endif + pop_output(ctx); +} // emit_GLSL_finalize + +void emit_GLSL_global(Context *ctx, RegisterType regtype, int regnum) +{ + char varname[64]; + get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + push_output(ctx, &ctx->globals); + switch (regtype) + { + case REG_TYPE_ADDRESS: + if (shader_is_vertex(ctx)) + output_line(ctx, "ivec4 %s;", varname); + else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE. + { + // We have to map texture registers to temps for ps_1_1, since + // they work like temps, initialize with tex coords, and the + // ps_1_1 TEX opcode expects to overwrite it. + if (!shader_version_atleast(ctx, 1, 4)) + { +#if SUPPORT_PROFILE_GLSLES + // GLSL ES does not have gl_TexCoord + if (support_glsles(ctx)) + output_line(ctx, "vec4 %s = io_%i_%i;", + varname, MOJOSHADER_USAGE_TEXCOORD, regnum); + else +#endif + output_line(ctx, "vec4 %s = gl_TexCoord[%d];", + varname, regnum); + } // if + } // else if + break; + case REG_TYPE_PREDICATE: + output_line(ctx, "bvec4 %s;", varname); + break; + case REG_TYPE_TEMP: + output_line(ctx, "vec4 %s;", varname); + break; + case REG_TYPE_LOOP: + break; // no-op. We declare these in for loops at the moment. + case REG_TYPE_LABEL: + break; // no-op. If we see it here, it means we optimized it out. + default: + fail(ctx, "BUG: we used a register we don't know how to define."); + break; + } // switch + pop_output(ctx); +} // emit_GLSL_global + +void emit_GLSL_array(Context *ctx, VariableList *var) +{ + // All uniforms (except constant arrays, which only get pushed once at + // compile time) are now packed into a single array, so we can batch + // the uniform transfers. So this doesn't actually define an array + // here; the one, big array is emitted during finalization instead. + // However, we need to #define the offset into the one, big array here, + // and let dereferences use that #define. + const int base = var->index; + const int glslbase = ctx->uniform_float4_count; + push_output(ctx, &ctx->globals); + output_line(ctx, "#define ARRAYBASE_%d %d", base, glslbase); + pop_output(ctx); + var->emit_position = glslbase; +} // emit_GLSL_array + +void emit_GLSL_const_array(Context *ctx, const ConstantsList *clist, + int base, int size) +{ + char varname[64]; + get_GLSL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname)); + +#if 0 + // !!! FIXME: fails on Nvidia's and Apple's GL, even with #version 120. + // !!! FIXME: (the 1.20 spec says it should work, though, I think...) + if (support_glsl120(ctx)) + { + // GLSL 1.20 can do constant arrays. + const char *cstr = NULL; + push_output(ctx, &ctx->globals); + output_line(ctx, "const vec4 %s[%d] = vec4[%d](", varname, size, size); + ctx->indent++; + + int i; + for (i = 0; i < size; i++) + { + while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) + clist = clist->next; + assert(clist->constant.index == (base + i)); + + char val0[32]; + char val1[32]; + char val2[32]; + char val3[32]; + floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1); + floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1); + floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1); + floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1); + + output_line(ctx, "vec4(%s, %s, %s, %s)%s", val0, val1, val2, val3, + (i < (size-1)) ? "," : ""); + + clist = clist->next; + } // for + + ctx->indent--; + output_line(ctx, ");"); + pop_output(ctx); + } // if + + else +#endif + { + // stock GLSL 1.0 can't do constant arrays, so make a uniform array + // and have the OpenGL glue assign it at link time. Lame! + push_output(ctx, &ctx->globals); + output_line(ctx, "uniform vec4 %s[%d];", varname, size); + pop_output(ctx); + } // else +} // emit_GLSL_const_array + +void emit_GLSL_uniform(Context *ctx, RegisterType regtype, int regnum, + const VariableList *var) +{ + // Now that we're pushing all the uniforms as one big array, pack these + // down, so if we only use register c439, it'll actually map to + // glsl_uniforms_vec4[0]. As we push one big array, this will prevent + // uploading unused data. + + char varname[64]; + char name[64]; + int index = 0; + + get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + push_output(ctx, &ctx->globals); + + if (var == NULL) + { + get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name)); + + if (regtype == REG_TYPE_CONST) + index = ctx->uniform_float4_count; + else if (regtype == REG_TYPE_CONSTINT) + index = ctx->uniform_int4_count; + else if (regtype == REG_TYPE_CONSTBOOL) + index = ctx->uniform_bool_count; + else // get_GLSL_uniform_array_varname() would have called fail(). + assert(!(ctx->isfail)); + + output_line(ctx, "#define %s %s[%d]", varname, name, index); + } // if + + else + { + const int arraybase = var->index; + if (var->constant) + { + get_GLSL_const_array_varname_in_buf(ctx, arraybase, var->count, + name, sizeof (name)); + index = (regnum - arraybase); + } // if + else + { + assert(var->emit_position != -1); + get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name)); + index = (regnum - arraybase) + var->emit_position; + } // else + + output_line(ctx, "#define %s %s[%d]", varname, name, index); + } // else + + pop_output(ctx); +} // emit_GLSL_uniform + +void emit_GLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb) +{ + const char *type = ""; + switch (ttype) + { + case TEXTURE_TYPE_2D: type = "sampler2D"; break; + case TEXTURE_TYPE_CUBE: type = "samplerCube"; break; + case TEXTURE_TYPE_VOLUME: type = "sampler3D"; break; + default: fail(ctx, "BUG: used a sampler we don't know how to define."); + } // switch + + char var[64]; + get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var)); + + push_output(ctx, &ctx->globals); + output_line(ctx, "uniform %s %s;", type, var); + if (tb) // This sampler used a ps_1_1 TEXBEM opcode? + { + char name[64]; + const int index = ctx->uniform_float4_count; + ctx->uniform_float4_count += 2; + get_GLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name)); + output_line(ctx, "#define %s_texbem %s[%d]", var, name, index); + output_line(ctx, "#define %s_texbeml %s[%d]", var, name, index+1); + } // if + pop_output(ctx); +} // emit_GLSL_sampler + +void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum, + MOJOSHADER_usage usage, int index, int wmask, + int flags) +{ + // !!! FIXME: this function doesn't deal with write masks at all yet! + const char *usage_str = NULL; + const char *arrayleft = ""; + const char *arrayright = ""; + char index_str[16] = { '\0' }; + char var[64]; + + get_GLSL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var)); + + //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed? + + if (index != 0) // !!! FIXME: a lot of these MUST be zero. + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + + if (shader_is_vertex(ctx)) + { + // pre-vs3 output registers. + // these don't ever happen in DCL opcodes, I think. Map to vs_3_* + // output registers. + if (!shader_version_atleast(ctx, 3, 0)) + { + if (regtype == REG_TYPE_RASTOUT) + { + regtype = REG_TYPE_OUTPUT; + index = regnum; + switch ((const RastOutType) regnum) + { + case RASTOUT_TYPE_POSITION: + usage = MOJOSHADER_USAGE_POSITION; + break; + case RASTOUT_TYPE_FOG: + usage = MOJOSHADER_USAGE_FOG; + break; + case RASTOUT_TYPE_POINT_SIZE: + usage = MOJOSHADER_USAGE_POINTSIZE; + break; + } // switch + } // if + + else if (regtype == REG_TYPE_ATTROUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_COLOR; + index = regnum; + } // else if + + else if (regtype == REG_TYPE_TEXCRDOUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_TEXCOORD; + index = regnum; + } // else if + } // if + + // to avoid limitations of various GL entry points for input + // attributes (glSecondaryColorPointer() can only take 3 component + // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other + // issues), we set up all inputs as generic vertex attributes, so we + // can pass data in just about any form, and ignore the built-in GLSL + // attributes like gl_SecondaryColor. Output needs to use the the + // built-ins, though, but we don't have to worry about the GL entry + // point limitations there. + + if (regtype == REG_TYPE_INPUT) + { + push_output(ctx, &ctx->globals); + output_line(ctx, "attribute vec4 %s;", var); + pop_output(ctx); + } // if + + else if (regtype == REG_TYPE_OUTPUT) + { + switch (usage) + { + case MOJOSHADER_USAGE_POSITION: + if (index == 0) + { + usage_str = "gl_Position"; + } // if + break; + case MOJOSHADER_USAGE_POINTSIZE: + usage_str = "gl_PointSize"; + break; + case MOJOSHADER_USAGE_COLOR: +#if SUPPORT_PROFILE_GLSLES + if (support_glsles(ctx)) + break; // GLSL ES does not have gl_FrontColor +#endif + index_str[0] = '\0'; // no explicit number. + if (index == 0) + { + usage_str = "gl_FrontColor"; + } // if + else if (index == 1) + { + usage_str = "gl_FrontSecondaryColor"; + } // else if + break; + case MOJOSHADER_USAGE_FOG: + usage_str = "gl_FogFragCoord"; + break; + case MOJOSHADER_USAGE_TEXCOORD: +#if SUPPORT_PROFILE_GLSLES + if (support_glsles(ctx)) + break; // GLSL ES does not have gl_TexCoord +#endif + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + usage_str = "gl_TexCoord"; + arrayleft = "["; + arrayright = "]"; + break; + default: + // !!! FIXME: we need to deal with some more built-in varyings here. + break; + } // switch + + // !!! FIXME: the #define is a little hacky, but it means we don't + // !!! FIXME: have to track these separately if this works. + push_output(ctx, &ctx->globals); + // no mapping to built-in var? Just make it a regular global, pray. + if (usage_str == NULL) + { +#if SUPPORT_PROFILE_GLSLES + if (support_glsles(ctx)) + output_line(ctx, "varying highp vec4 io_%i_%i;", usage, index); + else +#endif + output_line(ctx, "varying vec4 io_%i_%i;", usage, index); + output_line(ctx, "#define %s io_%i_%i", var, usage, index); + } // if + else + { + output_line(ctx, "#define %s %s%s%s%s", var, usage_str, + arrayleft, index_str, arrayright); + } // else + pop_output(ctx); + } // else if + + else + { + fail(ctx, "unknown vertex shader attribute register"); + } // else + } // if + + else if (shader_is_pixel(ctx)) + { + // samplers DCLs get handled in emit_GLSL_sampler(). + + if (flags & MOD_CENTROID) // !!! FIXME + { + failf(ctx, "centroid unsupported in %s profile", ctx->profile->name); + return; + } // if + + if (regtype == REG_TYPE_COLOROUT) + { + if (!ctx->have_multi_color_outputs) + usage_str = "gl_FragColor"; // maybe faster? + else + { + snprintf(index_str, sizeof (index_str), "%u", (uint) regnum); + usage_str = "gl_FragData"; + arrayleft = "["; + arrayright = "]"; + } // else + } // if + + else if (regtype == REG_TYPE_DEPTHOUT) + usage_str = "gl_FragDepth"; + + // !!! FIXME: can you actualy have a texture register with COLOR usage? + else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT)) + { +#if SUPPORT_PROFILE_GLSLES + if (!support_glsles(ctx)) + { +#endif + if (usage == MOJOSHADER_USAGE_TEXCOORD) + { + // ps_1_1 does a different hack for this attribute. + // Refer to emit_GLSL_global()'s REG_TYPE_ADDRESS code. + if (shader_version_atleast(ctx, 1, 4)) + { + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + usage_str = "gl_TexCoord"; + arrayleft = "["; + arrayright = "]"; + } // if + } // if + + else if (usage == MOJOSHADER_USAGE_COLOR) + { + index_str[0] = '\0'; // no explicit number. + if (index == 0) + { + usage_str = "gl_Color"; + } // if + else if (index == 1) + { + usage_str = "gl_SecondaryColor"; + } // else if + // FIXME: Does this even matter when we have varyings? -flibit + // else + // fail(ctx, "unsupported color index"); + } // else if +#if SUPPORT_PROFILE_GLSLES + } // if +#endif + } // else if + + else if (regtype == REG_TYPE_MISCTYPE) + { + const MiscTypeType mt = (MiscTypeType) regnum; + if (mt == MISCTYPE_TYPE_FACE) + { + push_output(ctx, &ctx->globals); + output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;", var); + pop_output(ctx); + } // if + else if (mt == MISCTYPE_TYPE_POSITION) + { + index_str[0] = '\0'; // no explicit number. + usage_str = "gl_FragCoord"; // !!! FIXME: is this the same coord space as D3D? + } // else if + else + { + fail(ctx, "BUG: unhandled misc register"); + } // else + } // else if + + else + { + fail(ctx, "unknown pixel shader attribute register"); + } // else + + push_output(ctx, &ctx->globals); + // no mapping to built-in var? Just make it a regular global, pray. + if (usage_str == NULL) + { +#if SUPPORT_PROFILE_GLSLES + if (support_glsles(ctx)) + output_line(ctx, "varying highp vec4 io_%i_%i;", usage, index); + else +#endif + output_line(ctx, "varying vec4 io_%i_%i;", usage, index); + output_line(ctx, "#define %s io_%i_%i", var, usage, index); + } // if + else + { + output_line(ctx, "#define %s %s%s%s%s", var, usage_str, + arrayleft, index_str, arrayright); + } // else + pop_output(ctx); + } // else if + + else + { + fail(ctx, "Unknown shader type"); // state machine should catch this. + } // else +} // emit_GLSL_attribute + +void emit_GLSL_NOP(Context *ctx) +{ + // no-op is a no-op. :) +} // emit_GLSL_NOP + +void emit_GLSL_MOV(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_MOV + +void emit_GLSL_ADD(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_GLSL_ADD + +void emit_GLSL_SUB(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_GLSL_SUB + +void emit_GLSL_MAD(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2); + output_line(ctx, "%s", code); +} // emit_GLSL_MAD + +void emit_GLSL_MUL(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_GLSL_MUL + +void emit_GLSL_RCP(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_RCP + +void emit_GLSL_RSQ(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "inversesqrt(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_RSQ + +void emit_GLSL_dotprod(Context *ctx, const char *src0, const char *src1, + const char *extra) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char castleft[16] = { '\0' }; + const char *castright = ""; + if (vecsize != 1) + { + snprintf(castleft, sizeof (castleft), "vec%d(", vecsize); + castright = ")"; + } // if + + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s", + castleft, src0, src1, extra, castright); + output_line(ctx, "%s", code); +} // emit_GLSL_dotprod + +void emit_GLSL_DP3(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); + emit_GLSL_dotprod(ctx, src0, src1, ""); +} // emit_GLSL_DP3 + +void emit_GLSL_DP4(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_full(ctx, 1, src1, sizeof (src1)); + emit_GLSL_dotprod(ctx, src0, src1, ""); +} // emit_GLSL_DP4 + +void emit_GLSL_MIN(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_GLSL_MIN + +void emit_GLSL_MAX(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_GLSL_MAX + +void emit_GLSL_SLT(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants. + if (vecsize == 1) + make_GLSL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1); + else + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec%d(lessThan(%s, %s))", + vecsize, src0, src1); + } // else + output_line(ctx, "%s", code); +} // emit_GLSL_SLT + +void emit_GLSL_SGE(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants. + if (vecsize == 1) + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "float(%s >= %s)", src0, src1); + } // if + else + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec%d(greaterThanEqual(%s, %s))", + vecsize, src0, src1); + } // else + output_line(ctx, "%s", code); +} // emit_GLSL_SGE + +void emit_GLSL_EXP(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_EXP + +void emit_GLSL_LOG(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_LOG + +void emit_GLSL_LIT_helper(Context *ctx) +{ + const char *maxp = "127.9961"; // value from the dx9 reference. + + if (ctx->glsl_generated_lit_helper) + return; + + ctx->glsl_generated_lit_helper = 1; + + push_output(ctx, &ctx->helpers); + output_line(ctx, "vec4 LIT(const vec4 src)"); + output_line(ctx, "{"); ctx->indent++; + output_line(ctx, "float power = clamp(src.w, -%s, %s);",maxp,maxp); + output_line(ctx, "vec4 retval = vec4(1.0, 0.0, 0.0, 1.0);"); + output_line(ctx, "if (src.x > 0.0) {"); ctx->indent++; + output_line(ctx, "retval.y = src.x;"); + output_line(ctx, "if (src.y > 0.0) {"); ctx->indent++; + output_line(ctx, "retval.z = pow(src.y, power);"); ctx->indent--; + output_line(ctx, "}"); ctx->indent--; + output_line(ctx, "}"); + output_line(ctx, "return retval;"); ctx->indent--; + output_line(ctx, "}"); + output_blank_line(ctx); + pop_output(ctx); +} // emit_GLSL_LIT_helper + +void emit_GLSL_LIT(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char code[128]; + emit_GLSL_LIT_helper(ctx); + make_GLSL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_LIT + +void emit_GLSL_DST(Context *ctx) +{ + // !!! FIXME: needs to take ctx->dst_arg.writemask into account. + char src0_y[64]; make_GLSL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y)); + char src1_y[64]; make_GLSL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y)); + char src0_z[64]; make_GLSL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z)); + char src1_w[64]; make_GLSL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w)); + + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec4(1.0, %s * %s, %s, %s)", + src0_y, src1_y, src0_z, src1_w); + output_line(ctx, "%s", code); +} // emit_GLSL_DST + +void emit_GLSL_LRP(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)", + src2, src1, src0); + output_line(ctx, "%s", code); +} // emit_GLSL_LRP + +void emit_GLSL_FRC(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_FRC + +void emit_GLSL_M4X4(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); + char row3[64]; make_GLSL_srcarg_string_full(ctx, 4, row3, sizeof (row3)); + char code[256]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2, src0, row3); + output_line(ctx, "%s", code); +} // emit_GLSL_M4X4 + +void emit_GLSL_M4X3(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); + char code[256]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2); + output_line(ctx, "%s", code); +} // emit_GLSL_M4X3 + +void emit_GLSL_M3X4(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); + char row3[64]; make_GLSL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3)); + + char code[256]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec4(dot(%s, %s), dot(%s, %s), " + "dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, + src0, row2, src0, row3); + output_line(ctx, "%s", code); +} // emit_GLSL_M3X4 + +void emit_GLSL_M3X3(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); + char code[256]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2); + output_line(ctx, "%s", code); +} // emit_GLSL_M3X3 + +void emit_GLSL_M3X2(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + + char code[256]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec2(dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1); + output_line(ctx, "%s", code); +} // emit_GLSL_M3X2 + +void emit_GLSL_CALL(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + if (ctx->loops > 0) + output_line(ctx, "%s(aL);", src0); + else + output_line(ctx, "%s();", src0); +} // emit_GLSL_CALL + +void emit_GLSL_CALLNZ(Context *ctx) +{ + // !!! FIXME: if src1 is a constbool that's true, we can remove the + // !!! FIXME: if. If it's false, we can make this a no-op. + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + + if (ctx->loops > 0) + output_line(ctx, "if (%s) { %s(aL); }", src1, src0); + else + output_line(ctx, "if (%s) { %s(); }", src1, src0); +} // emit_GLSL_CALLNZ + +void emit_GLSL_LOOP(Context *ctx) +{ + // !!! FIXME: swizzle? + char var[64]; get_GLSL_srcarg_varname(ctx, 1, var, sizeof (var)); + assert(ctx->source_args[0].regnum == 0); // in case they add aL1 someday. + output_line(ctx, "{"); + ctx->indent++; + output_line(ctx, "const int aLend = %s.x + %s.y;", var, var); + output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var); + ctx->indent++; +} // emit_GLSL_LOOP + +void emit_GLSL_RET(Context *ctx) +{ + // thankfully, the MSDN specs say a RET _has_ to end a function...no + // early returns. So if you hit one, you know you can safely close + // a high-level function. + ctx->indent--; + output_line(ctx, "}"); + output_blank_line(ctx); + set_output(ctx, &ctx->subroutines); // !!! FIXME: is this for LABEL? Maybe set it there so we don't allocate unnecessarily. +} // emit_GLSL_RET + +void emit_GLSL_ENDLOOP(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); + ctx->indent--; + output_line(ctx, "}"); +} // emit_GLSL_ENDLOOP + +void emit_GLSL_LABEL(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + const int label = ctx->source_args[0].regnum; + RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label); + assert(ctx->output == ctx->subroutines); // not mainline, etc. + assert(ctx->indent == 0); // we shouldn't be in the middle of a function. + + // MSDN specs say CALL* has to come before the LABEL, so we know if we + // can ditch the entire function here as unused. + if (reg == NULL) + set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output. + + // !!! FIXME: it would be nice if we could determine if a function is + // !!! FIXME: only called once and, if so, forcibly inline it. + + const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : ""; + output_line(ctx, "void %s(%s)", src0, uses_loopreg); + output_line(ctx, "{"); + ctx->indent++; +} // emit_GLSL_LABEL + +void emit_GLSL_DCL(Context *ctx) +{ + // no-op. We do this in our emit_attribute() and emit_uniform(). +} // emit_GLSL_DCL + +void emit_GLSL_POW(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "pow(abs(%s), %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_GLSL_POW + +void emit_GLSL_CRS(Context *ctx) +{ + // !!! FIXME: needs to take ctx->dst_arg.writemask into account. + char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "cross(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_GLSL_CRS + +void emit_GLSL_SGN(Context *ctx) +{ + // (we don't need the temporary registers specified for the D3D opcode.) + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_SGN + +void emit_GLSL_ABS(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_ABS + +void emit_GLSL_NRM(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_NRM + +void emit_GLSL_SINCOS(Context *ctx) +{ + // we don't care about the temp registers that <= sm2 demands; ignore them. + // sm2 also talks about what components are left untouched vs. undefined, + // but we just leave those all untouched with GLSL write masks (which + // would fulfill the "undefined" requirement, too). + const int mask = ctx->dest_arg.writemask; + char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char code[128] = { '\0' }; + + if (writemask_x(mask)) + make_GLSL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0); + else if (writemask_y(mask)) + make_GLSL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0); + else if (writemask_xy(mask)) + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec2(cos(%s), sin(%s))", src0, src0); + } // else if + + output_line(ctx, "%s", code); +} // emit_GLSL_SINCOS + +void emit_GLSL_REP(Context *ctx) +{ + // !!! FIXME: + // msdn docs say legal loop values are 0 to 255. We can check DEFI values + // at parse time, but if they are pulling a value from a uniform, do + // we clamp here? + // !!! FIXME: swizzle is legal here, right? + char src0[64]; make_GLSL_srcarg_string_x(ctx, 0, src0, sizeof (src0)); + const uint rep = (uint) ctx->reps; + output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {", + rep, rep, src0, rep); + ctx->indent++; +} // emit_GLSL_REP + +void emit_GLSL_ENDREP(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); +} // emit_GLSL_ENDREP + +void emit_GLSL_IF(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + output_line(ctx, "if (%s) {", src0); + ctx->indent++; +} // emit_GLSL_IF + +void emit_GLSL_IFC(Context *ctx) +{ + const char *comp = get_GLSL_comparison_string_scalar(ctx); + char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); + output_line(ctx, "if (%s %s %s) {", src0, comp, src1); + ctx->indent++; +} // emit_GLSL_IFC + +void emit_GLSL_ELSE(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "} else {"); + ctx->indent++; +} // emit_GLSL_ELSE + +void emit_GLSL_ENDIF(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); +} // emit_GLSL_ENDIF + +void emit_GLSL_BREAK(Context *ctx) +{ + output_line(ctx, "break;"); +} // emit_GLSL_BREAK + +void emit_GLSL_BREAKC(Context *ctx) +{ + const char *comp = get_GLSL_comparison_string_scalar(ctx); + char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); + output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1); +} // emit_GLSL_BREAKC + +void emit_GLSL_MOVA(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + + if (vecsize == 1) + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "int(floor(abs(%s) + 0.5) * sign(%s))", + src0, src0); + } // if + + else + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s))", + vecsize, src0, vecsize, src0); + } // else + + output_line(ctx, "%s", code); +} // emit_GLSL_MOVA + +void emit_GLSL_DEFB(Context *ctx) +{ + char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname)); + push_output(ctx, &ctx->globals); + output_line(ctx, "const bool %s = %s;", + varname, ctx->dwords[0] ? "true" : "false"); + pop_output(ctx); +} // emit_GLSL_DEFB + +void emit_GLSL_DEFI(Context *ctx) +{ + char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname)); + const int32 *x = (const int32 *) ctx->dwords; + push_output(ctx, &ctx->globals); + output_line(ctx, "const ivec4 %s = ivec4(%d, %d, %d, %d);", + varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]); + pop_output(ctx); +} // emit_GLSL_DEFI + +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) + +void emit_GLSL_TEXKILL(Context *ctx) +{ + char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + output_line(ctx, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;", dst); +} // emit_GLSL_TEXKILL + +static void glsl_texld(Context *ctx, const int texldd) +{ + if (!shader_version_atleast(ctx, 1, 4)) + { + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char sampler[64]; + char code[128] = {0}; + + assert(!texldd); + + RegisterList *sreg; + sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + if (ttype == TEXTURE_TYPE_2D) + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "texture2D(%s, %s.xy)", + sampler, dst); + } + else if (ttype == TEXTURE_TYPE_CUBE) + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "textureCube(%s, %s.xyz)", + sampler, dst); + } + else if (ttype == TEXTURE_TYPE_VOLUME) + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "texture3D(%s, %s.xyz)", + sampler, dst); + } + else + { + fail(ctx, "unexpected texture type"); + } // else + output_line(ctx, "%s", code); + } // if + + else if (!shader_version_atleast(ctx, 2, 0)) + { + // ps_1_4 is different, too! + fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME + return; + } // else if + + else + { + const SourceArgInfo *samp_arg = &ctx->source_args[1]; + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + samp_arg->regnum); + const char *funcname = NULL; + char src0[64] = { '\0' }; + char src1[64]; get_GLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? + char src2[64] = { '\0' }; + char src3[64] = { '\0' }; + + if (sreg == NULL) + { + fail(ctx, "TEXLD using undeclared sampler"); + return; + } // if + + if (texldd) + { + if (sreg->index == TEXTURE_TYPE_2D) + { + make_GLSL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2)); + make_GLSL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3)); + } // if + else + { + assert((sreg->index == TEXTURE_TYPE_CUBE) || (sreg->index == TEXTURE_TYPE_VOLUME)); + make_GLSL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2)); + make_GLSL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3)); + } // else + } // if + + // !!! FIXME: can TEXLDD set instruction_controls? + // !!! FIXME: does the d3d bias value map directly to GLSL? + const char *biassep = ""; + char bias[64] = { '\0' }; + if (ctx->instruction_controls == CONTROL_TEXLDB) + { + biassep = ", "; + make_GLSL_srcarg_string_w(ctx, 0, bias, sizeof (bias)); + } // if + + switch ((const TextureType) sreg->index) + { + case TEXTURE_TYPE_2D: + if (ctx->instruction_controls == CONTROL_TEXLDP) + { + funcname = "texture2DProj"; + make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + } // if + else // texld/texldb + { + funcname = "texture2D"; + make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); + } // else + break; + case TEXTURE_TYPE_CUBE: + if (ctx->instruction_controls == CONTROL_TEXLDP) + fail(ctx, "TEXLDP on a cubemap"); // !!! FIXME: is this legal? + funcname = "textureCube"; + make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + break; + case TEXTURE_TYPE_VOLUME: + if (ctx->instruction_controls == CONTROL_TEXLDP) + { + funcname = "texture3DProj"; + make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + } // if + else // texld/texldb + { + funcname = "texture3D"; + make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + } // else + break; + default: + fail(ctx, "unknown texture type"); + return; + } // switch + + assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); + char swiz_str[6] = { '\0' }; + make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str), + samp_arg->swizzle, ctx->dest_arg.writemask); + + char code[128]; + if (texldd) + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "%sGrad(%s, %s, %s, %s)%s", funcname, + src1, src0, src2, src3, swiz_str); + } // if + else + { + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "%s(%s, %s%s%s)%s", funcname, + src1, src0, biassep, bias, swiz_str); + } // else + + output_line(ctx, "%s", code); + } // else +} // glsl_texld + +void emit_GLSL_TEXLD(Context *ctx) +{ + glsl_texld(ctx, 0); +} // emit_GLSL_TEXLD + + +void emit_GLSL_TEXBEM(Context *ctx) +{ + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src)); + char sampler[64]; + char code[512]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," + " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))", + sampler, + dst, sampler, src, sampler, src, + dst, sampler, src, sampler, src); + + output_line(ctx, "%s", code); +} // emit_GLSL_TEXBEM + + +void emit_GLSL_TEXBEML(Context *ctx) +{ + // !!! FIXME: this code counts on the register not having swizzles, etc. + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src)); + char sampler[64]; + char code[512]; + + get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "(texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," + " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *" + " ((%s.z * %s_texbeml.x) + %s_texbem.y)", + sampler, + dst, sampler, src, sampler, src, + dst, sampler, src, sampler, src, + src, sampler, sampler); + + output_line(ctx, "%s", code); +} // emit_GLSL_TEXBEML + +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME + + +void emit_GLSL_TEXM3X2PAD(Context *ctx) +{ + // no-op ... work happens in emit_GLSL_TEXM3X2TEX(). +} // emit_GLSL_TEXM3X2PAD + +void emit_GLSL_TEXM3X2TEX(Context *ctx) +{ + if (ctx->texm3x2pad_src0 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char sampler[64]; + char code[512]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0, + src0, sizeof (src0)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0, + src1, sizeof (src1)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src2, sizeof (src2)); + get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "texture2D(%s, vec2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))", + sampler, src0, src1, src2, dst); + + output_line(ctx, "%s", code); +} // emit_GLSL_TEXM3X2TEX + +void emit_GLSL_TEXM3X3PAD(Context *ctx) +{ + // no-op ... work happens in emit_GLSL_TEXM3X3*(). +} // emit_GLSL_TEXM3X3PAD + +void emit_GLSL_TEXM3X3TEX(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char sampler[64]; + char code[512]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; + + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "texture%s(%s," + " vec3(dot(%s.xyz, %s.xyz)," + " dot(%s.xyz, %s.xyz)," + " dot(%s.xyz, %s.xyz)))", + ttypestr, sampler, src0, src1, src2, src3, dst, src4); + + output_line(ctx, "%s", code); +} // emit_GLSL_TEXM3X3TEX + +void emit_GLSL_TEXM3X3SPEC_helper(Context *ctx) +{ + if (ctx->glsl_generated_texm3x3spec_helper) + return; + + ctx->glsl_generated_texm3x3spec_helper = 1; + + push_output(ctx, &ctx->helpers); + output_line(ctx, "vec3 TEXM3X3SPEC_reflection(const vec3 normal, const vec3 eyeray)"); + output_line(ctx, "{"); ctx->indent++; + output_line(ctx, "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--; + output_line(ctx, "}"); + output_blank_line(ctx); + pop_output(ctx); +} // emit_GLSL_TEXM3X3SPEC_helper + +void emit_GLSL_TEXM3X3SPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char src5[64]; + char sampler[64]; + char code[512]; + + emit_GLSL_TEXM3X3SPEC_helper(ctx); + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum, + src5, sizeof (src5)); + get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; + + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "texture%s(%s, " + "TEXM3X3SPEC_reflection(" + "vec3(" + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz)" + ")," + "%s.xyz," + ")" + ")", + ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5); + + output_line(ctx, "%s", code); +} // emit_GLSL_TEXM3X3SPEC + +void emit_GLSL_TEXM3X3VSPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char sampler[64]; + char code[512]; + + emit_GLSL_TEXM3X3SPEC_helper(ctx); + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; + + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "texture%s(%s, " + "TEXM3X3SPEC_reflection(" + "vec3(" + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz)" + "), " + "vec3(%s.w, %s.w, %s.w)" + ")" + ")", + ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst); + + output_line(ctx, "%s", code); +} // emit_GLSL_TEXM3X3VSPEC + +void emit_GLSL_EXPP(Context *ctx) +{ + // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation. + emit_GLSL_EXP(ctx); // I guess this is just partial precision EXP? +} // emit_GLSL_EXPP + +void emit_GLSL_LOGP(Context *ctx) +{ + // LOGP is just low-precision LOG, but we'll take the higher precision. + emit_GLSL_LOG(ctx); +} // emit_GLSL_LOGP + +// common code between CMP and CND. +void emit_GLSL_comparison_operations(Context *ctx, const char *cmp) +{ + int i, j; + DestArgInfo *dst = &ctx->dest_arg; + const SourceArgInfo *srcarg0 = &ctx->source_args[0]; + const int origmask = dst->writemask; + int used_swiz[4] = { 0, 0, 0, 0 }; + const int writemask[4] = { dst->writemask0, dst->writemask1, + dst->writemask2, dst->writemask3 }; + const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y, + srcarg0->swizzle_z, srcarg0->swizzle_w }; + + for (i = 0; i < 4; i++) + { + int mask = (1 << i); + + if (!writemask[i]) continue; + if (used_swiz[i]) continue; + + // This is a swizzle we haven't checked yet. + used_swiz[i] = 1; + + // see if there are any other elements swizzled to match (.yyyy) + for (j = i + 1; j < 4; j++) + { + if (!writemask[j]) continue; + if (src0swiz[i] != src0swiz[j]) continue; + mask |= (1 << j); + used_swiz[j] = 1; + } // for + + // okay, (mask) should be the writemask of swizzles we like. + + //return make_GLSL_srcarg_string(ctx, idx, (1 << 0)); + + char src0[64]; + char src1[64]; + char src2[64]; + make_GLSL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0)); + make_GLSL_srcarg_string(ctx, 1, mask, src1, sizeof (src1)); + make_GLSL_srcarg_string(ctx, 2, mask, src2, sizeof (src2)); + + set_dstarg_writemask(dst, mask); + + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "((%s %s) ? %s : %s)", + src0, cmp, src1, src2); + output_line(ctx, "%s", code); + } // for + + set_dstarg_writemask(dst, origmask); +} // emit_GLSL_comparison_operations + +void emit_GLSL_CND(Context *ctx) +{ + emit_GLSL_comparison_operations(ctx, "> 0.5"); +} // emit_GLSL_CND + +void emit_GLSL_DEF(Context *ctx) +{ + const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? + char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname)); + char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1); + char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1); + char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1); + char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1); + + push_output(ctx, &ctx->globals); + output_line(ctx, "const vec4 %s = vec4(%s, %s, %s, %s);", + varname, val0, val1, val2, val3); + pop_output(ctx); +} // emit_GLSL_DEF + +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME + +void emit_GLSL_TEXM3X3(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char code[512]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_GLSL_destarg_varname(ctx, dst, sizeof (dst)); + + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "vec4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)", + src0, src1, src2, src3, dst, src4); + + output_line(ctx, "%s", code); +} // emit_GLSL_TEXM3X3 + +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME + +void emit_GLSL_CMP(Context *ctx) +{ + emit_GLSL_comparison_operations(ctx, ">= 0.0"); +} // emit_GLSL_CMP + +EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME + +void emit_GLSL_DP2ADD(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_GLSL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2)); + char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2); + emit_GLSL_dotprod(ctx, src0, src1, extra); +} // emit_GLSL_DP2ADD + +void emit_GLSL_DSX(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdx(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_DSX + +void emit_GLSL_DSY(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdy(%s)", src0); + output_line(ctx, "%s", code); +} // emit_GLSL_DSY + +void emit_GLSL_TEXLDD(Context *ctx) +{ + // !!! FIXME: + // GLSL 1.30 introduced textureGrad() for this, but it looks like the + // functions are overloaded instead of texture2DGrad() (etc). + + // GL_shader_texture_lod and GL_EXT_gpu_shader4 added texture2DGrad*(), + // so we'll use them if available. Failing that, we'll just fallback + // to a regular texture2D call and hope the mipmap it chooses is close + // enough. + if (!ctx->glsl_generated_texldd_setup) + { + ctx->glsl_generated_texldd_setup = 1; + push_output(ctx, &ctx->preflight); + output_line(ctx, "#if GL_ARB_shader_texture_lod"); + output_line(ctx, "#extension GL_ARB_shader_texture_lod : enable"); + output_line(ctx, "#define texture2DGrad texture2DGradARB"); + output_line(ctx, "#define texture2DProjGrad texture2DProjARB"); + output_line(ctx, "#elif GL_EXT_gpu_shader4"); + output_line(ctx, "#extension GL_EXT_gpu_shader4 : enable"); + output_line(ctx, "#else"); + output_line(ctx, "#define texture2DGrad(a,b,c,d) texture2D(a,b)"); + output_line(ctx, "#define texture2DProjGrad(a,b,c,d) texture2DProj(a,b)"); + output_line(ctx, "#endif"); + output_blank_line(ctx); + pop_output(ctx); + } // if + + glsl_texld(ctx, 1); +} // emit_GLSL_TEXLDD + +void emit_GLSL_SETP(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // destination is always predicate register (which is type bvec4). + if (vecsize == 1) + { + const char *comp = get_GLSL_comparison_string_scalar(ctx); + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "(%s %s %s)", src0, comp, src1); + } // if + else + { + const char *comp = get_GLSL_comparison_string_vector(ctx); + make_GLSL_destarg_assign(ctx, code, sizeof (code), + "%s(%s, %s)", comp, src0, src1); + } // else + + output_line(ctx, "%s", code); +} // emit_GLSL_SETP + +void emit_GLSL_TEXLDL(Context *ctx) +{ + // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins + // !!! FIXME: from fragment shaders for some inexplicable reason. + // !!! FIXME: For now, you'll just have to suffer with the potentially + // !!! FIXME: wrong mipmap until I can figure something out. + emit_GLSL_TEXLD(ctx); +} // emit_GLSL_TEXLDL + +void emit_GLSL_BREAKP(Context *ctx) +{ + char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + output_line(ctx, "if (%s) { break; }", src0); +} // emit_GLSL_BREAKP + +void emit_GLSL_RESERVED(Context *ctx) +{ + // do nothing; fails in the state machine. +} // emit_GLSL_RESERVED + +#endif // SUPPORT_PROFILE_GLSL + +#pragma GCC visibility pop \ No newline at end of file diff --git a/profiles/mojoshader_profile_metal.c b/profiles/mojoshader_profile_metal.c new file mode 100644 index 00000000..6106f3af --- /dev/null +++ b/profiles/mojoshader_profile_metal.c @@ -0,0 +1,2305 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#pragma GCC visibility push(hidden) + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_profile.h" + +// !!! FIXME: A lot of this is cut-and-paste from the GLSL version. +#if SUPPORT_PROFILE_METAL + +#define EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(op) \ + void emit_METAL_##op(Context *ctx) { \ + fail(ctx, #op " unimplemented in Metal profile"); \ + } + +static inline const char *get_METAL_register_string(Context *ctx, + const RegisterType regtype, const int regnum, + char *regnum_str, const size_t regnum_size) +{ + // turns out these are identical at the moment. + return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size); +} // get_METAL_register_string + +const char *get_METAL_uniform_type(Context *ctx, const RegisterType rtype) +{ + switch (rtype) + { + case REG_TYPE_CONST: return "float4"; + case REG_TYPE_CONSTINT: return "int4"; + case REG_TYPE_CONSTBOOL: return "bool"; + default: fail(ctx, "BUG: used a uniform we don't know how to define."); + } // switch + + return NULL; +} // get_METAL_uniform_type + +const char *get_METAL_varname_in_buf(Context *ctx, RegisterType rt, + int regnum, char *buf, + const size_t len) +{ + char regnum_str[16]; + const char *regtype_str = get_METAL_register_string(ctx, rt, regnum, + regnum_str, sizeof (regnum_str)); + + // We don't separate vars with vs_ or ps_ here, because, for the most part, + // there are only local vars in Metal shaders. + snprintf(buf, len, "%s%s", regtype_str, regnum_str); + return buf; +} // get_METAL_varname_in_buf + + +const char *get_METAL_varname(Context *ctx, RegisterType rt, int regnum) +{ + char buf[64]; + get_METAL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf)); + return StrDup(ctx, buf); +} // get_METAL_varname + + +static inline const char *get_METAL_const_array_varname_in_buf(Context *ctx, + const int base, const int size, + char *buf, const size_t buflen) +{ + snprintf(buf, buflen, "const_array_%d_%d", base, size); + return buf; +} // get_METAL_const_array_varname_in_buf + +const char *get_METAL_const_array_varname(Context *ctx, int base, int size) +{ + char buf[64]; + get_METAL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf)); + return StrDup(ctx, buf); +} // get_METAL_const_array_varname + + +static inline const char *get_METAL_input_array_varname(Context *ctx, + char *buf, const size_t buflen) +{ + snprintf(buf, buflen, "%s", "vertex_input_array"); + return buf; +} // get_METAL_input_array_varname + + +const char *get_METAL_uniform_array_varname(Context *ctx, + const RegisterType regtype, + char *buf, const size_t len) +{ + const char *shadertype = ctx->shader_type_str; + const char *type = get_METAL_uniform_type(ctx, regtype); + snprintf(buf, len, "uniforms.uniforms_%s", type); + return buf; +} // get_METAL_uniform_array_varname + +const char *get_METAL_destarg_varname(Context *ctx, char *buf, size_t len) +{ + const DestArgInfo *arg = &ctx->dest_arg; + return get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); +} // get_METAL_destarg_varname + +const char *get_METAL_srcarg_varname(Context *ctx, const size_t idx, + char *buf, size_t len) +{ + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + *buf = '\0'; + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + return get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); +} // get_METAL_srcarg_varname + + +const char *make_METAL_destarg_assign(Context *, char *, const size_t, + const char *, ...) ISPRINTF(4,5); + +const char *make_METAL_destarg_assign(Context *ctx, char *buf, + const size_t buflen, + const char *fmt, ...) +{ + int need_parens = 0; + const DestArgInfo *arg = &ctx->dest_arg; + + if (arg->writemask == 0) + { + *buf = '\0'; + return buf; // no writemask? It's a no-op. + } // if + + char clampbuf[32] = { '\0' }; + const char *clampleft = ""; + const char *clampright = ""; + if (arg->result_mod & MOD_SATURATE) + { + ctx->metal_need_header_common = 1; + const int vecsize = vecsize_from_writemask(arg->writemask); + clampleft = "clamp("; + if (vecsize == 1) + clampright = ", 0.0, 1.0)"; + else + { + snprintf(clampbuf, sizeof (clampbuf), + ", float%d(0.0), float%d(1.0))", vecsize, vecsize); + clampright = clampbuf; + } // else + } // if + + // MSDN says MOD_PP is a hint and many implementations ignore it. So do we. + + // CENTROID only allowed in DCL opcodes, which shouldn't come through here. + assert((arg->result_mod & MOD_CENTROID) == 0); + + if (ctx->predicated) + { + fail(ctx, "predicated destinations unsupported"); // !!! FIXME + *buf = '\0'; + return buf; + } // if + + char operation[256]; + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(operation, sizeof (operation), fmt, ap); + va_end(ap); + if (len >= sizeof (operation)) + { + fail(ctx, "operation string too large"); // I'm lazy. :P + *buf = '\0'; + return buf; + } // if + + const char *result_shift_str = ""; + switch (arg->result_shift) + { + case 0x1: result_shift_str = " * 2.0"; break; + case 0x2: result_shift_str = " * 4.0"; break; + case 0x3: result_shift_str = " * 8.0"; break; + case 0xD: result_shift_str = " / 8.0"; break; + case 0xE: result_shift_str = " / 4.0"; break; + case 0xF: result_shift_str = " / 2.0"; break; + } // switch + need_parens |= (result_shift_str[0] != '\0'); + + char regnum_str[16]; + const char *regtype_str = get_METAL_register_string(ctx, arg->regtype, + arg->regnum, regnum_str, + sizeof (regnum_str)); + char writemask_str[6]; + size_t i = 0; + const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); + if (!scalar && !writemask_xyzw(arg->writemask)) + { + writemask_str[i++] = '.'; + if (arg->writemask0) writemask_str[i++] = 'x'; + if (arg->writemask1) writemask_str[i++] = 'y'; + if (arg->writemask2) writemask_str[i++] = 'z'; + if (arg->writemask3) writemask_str[i++] = 'w'; + } // if + writemask_str[i] = '\0'; + assert(i < sizeof (writemask_str)); + + const char *leftparen = (need_parens) ? "(" : ""; + const char *rightparen = (need_parens) ? ")" : ""; + + snprintf(buf, buflen, "%s%s%s = %s%s%s%s%s%s;", + regtype_str, regnum_str, writemask_str, + clampleft, leftparen, operation, rightparen, result_shift_str, + clampright); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_METAL_destarg_assign + + +char *make_METAL_swizzle_string(char *swiz_str, const size_t strsize, + const int swizzle, const int writemask) +{ + size_t i = 0; + if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) ) + { + const int writemask0 = (writemask >> 0) & 0x1; + const int writemask1 = (writemask >> 1) & 0x1; + const int writemask2 = (writemask >> 2) & 0x1; + const int writemask3 = (writemask >> 3) & 0x1; + + const int swizzle_x = (swizzle >> 0) & 0x3; + const int swizzle_y = (swizzle >> 2) & 0x3; + const int swizzle_z = (swizzle >> 4) & 0x3; + const int swizzle_w = (swizzle >> 6) & 0x3; + + swiz_str[i++] = '.'; + if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x]; + if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y]; + if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z]; + if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w]; + } // if + assert(i < strsize); + swiz_str[i] = '\0'; + return swiz_str; +} // make_METAL_swizzle_string + + +const char *make_METAL_srcarg_string(Context *ctx, const size_t idx, + const int writemask, char *buf, + const size_t buflen) +{ + *buf = '\0'; + + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + + const char *premod_str = ""; + const char *postmod_str = ""; + switch (arg->src_mod) + { + case SRCMOD_NEGATE: + premod_str = "-"; + break; + + case SRCMOD_BIASNEGATE: + premod_str = "-("; + postmod_str = " - 0.5)"; + break; + + case SRCMOD_BIAS: + premod_str = "("; + postmod_str = " - 0.5)"; + break; + + case SRCMOD_SIGNNEGATE: + premod_str = "-(("; + postmod_str = " - 0.5) * 2.0)"; + break; + + case SRCMOD_SIGN: + premod_str = "(("; + postmod_str = " - 0.5) * 2.0)"; + break; + + case SRCMOD_COMPLEMENT: + premod_str = "(1.0 - "; + postmod_str = ")"; + break; + + case SRCMOD_X2NEGATE: + premod_str = "-("; + postmod_str = " * 2.0)"; + break; + + case SRCMOD_X2: + premod_str = "("; + postmod_str = " * 2.0)"; + break; + + case SRCMOD_DZ: + fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME + postmod_str = "_dz"; + break; + + case SRCMOD_DW: + fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME + postmod_str = "_dw"; + break; + + case SRCMOD_ABSNEGATE: + ctx->metal_need_header_math = 1; + premod_str = "-abs("; + postmod_str = ")"; + break; + + case SRCMOD_ABS: + ctx->metal_need_header_math = 1; + premod_str = "abs("; + postmod_str = ")"; + break; + + case SRCMOD_NOT: + premod_str = "!"; + break; + + case SRCMOD_NONE: + case SRCMOD_TOTAL: + break; // stop compiler whining. + } // switch + + const char *regtype_str = NULL; + + if (!arg->relative) + { + regtype_str = get_METAL_varname_in_buf(ctx, arg->regtype, arg->regnum, + (char *) alloca(64), 64); + } // if + + const char *rel_lbracket = ""; + char rel_offset[32] = { '\0' }; + const char *rel_rbracket = ""; + char rel_swizzle[4] = { '\0' }; + const char *rel_regtype_str = ""; + if (arg->relative) + { + if (arg->regtype == REG_TYPE_INPUT) + regtype_str=get_METAL_input_array_varname(ctx,(char*)alloca(64),64); + else + { + assert(arg->regtype == REG_TYPE_CONST); + const int arrayidx = arg->relative_array->index; + const int offset = arg->regnum - arrayidx; + assert(offset >= 0); + if (arg->relative_array->constant) + { + const int arraysize = arg->relative_array->count; + regtype_str = get_METAL_const_array_varname_in_buf(ctx, + arrayidx, arraysize, (char *) alloca(64), 64); + if (offset != 0) + snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset); + } // if + else + { + regtype_str = get_METAL_uniform_array_varname(ctx, arg->regtype, + (char *) alloca(64), 64); + if (offset == 0) + { + snprintf(rel_offset, sizeof (rel_offset), + "ARRAYBASE_%d + ", arrayidx); + } // if + else + { + snprintf(rel_offset, sizeof (rel_offset), + "(ARRAYBASE_%d + %d) + ", arrayidx, offset); + } // else + } // else + } // else + + rel_lbracket = "["; + + rel_regtype_str = get_METAL_varname_in_buf(ctx, arg->relative_regtype, + arg->relative_regnum, + (char *) alloca(64), 64); + rel_swizzle[0] = '.'; + rel_swizzle[1] = swizzle_channels[arg->relative_component]; + rel_swizzle[2] = '\0'; + rel_rbracket = "]"; + } // if + + char swiz_str[6] = { '\0' }; + if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum)) + { + make_METAL_swizzle_string(swiz_str, sizeof (swiz_str), + arg->swizzle, writemask); + } // if + + if (regtype_str == NULL) + { + fail(ctx, "Unknown source register type."); + return buf; + } // if + + snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s", + premod_str, regtype_str, rel_lbracket, rel_offset, + rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str, + postmod_str); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_METAL_srcarg_string + +// generate some convenience functions. +#define MAKE_METAL_SRCARG_STRING_(mask, bitmask) \ + static inline const char *make_METAL_srcarg_string_##mask(Context *ctx, \ + const size_t idx, char *buf, \ + const size_t buflen) { \ + return make_METAL_srcarg_string(ctx, idx, bitmask, buf, buflen); \ + } +MAKE_METAL_SRCARG_STRING_(x, (1 << 0)) +MAKE_METAL_SRCARG_STRING_(y, (1 << 1)) +MAKE_METAL_SRCARG_STRING_(z, (1 << 2)) +MAKE_METAL_SRCARG_STRING_(w, (1 << 3)) +MAKE_METAL_SRCARG_STRING_(scalar, (1 << 0)) +MAKE_METAL_SRCARG_STRING_(full, 0xF) +MAKE_METAL_SRCARG_STRING_(masked, ctx->dest_arg.writemask) +MAKE_METAL_SRCARG_STRING_(vec3, 0x7) +MAKE_METAL_SRCARG_STRING_(vec2, 0x3) +#undef MAKE_METAL_SRCARG_STRING_ + +// special cases for comparison opcodes... + +const char *get_METAL_comparison_string_scalar(Context *ctx) +{ + const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" }; + if (ctx->instruction_controls >= STATICARRAYLEN(comps)) + { + fail(ctx, "unknown comparison control"); + return ""; + } // if + + return comps[ctx->instruction_controls]; +} // get_METAL_comparison_string_scalar + +const char *get_METAL_comparison_string_vector(Context *ctx) +{ + return get_METAL_comparison_string_scalar(ctx); // standard C operators work for vectors in Metal. +} // get_METAL_comparison_string_vector + + +void emit_METAL_start(Context *ctx, const char *profilestr) +{ + if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx)) + { + failf(ctx, "Shader type %u unsupported in this profile.", + (uint) ctx->shader_type); + return; + } // if + + if (!ctx->mainfn) + { + if (shader_is_vertex(ctx)) + ctx->mainfn = StrDup(ctx, "VertexShader"); + else if (shader_is_pixel(ctx)) + ctx->mainfn = StrDup(ctx, "FragmentShader"); + } // if + + set_output(ctx, &ctx->mainline); + ctx->indent++; +} // emit_METAL_start + +void emit_METAL_RET(Context *ctx); +void emit_METAL_end(Context *ctx) +{ + // !!! FIXME: maybe handle this at a higher level? + // ps_1_* writes color to r0 instead oC0. We move it to the right place. + // We don't have to worry about a RET opcode messing this up, since + // RET isn't available before ps_2_0. + if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) + { + set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1); + output_line(ctx, "oC0 = r0;"); + } // if + + // !!! FIXME: maybe handle this at a higher level? + // force a RET opcode if we're at the end of the stream without one. + if (ctx->previous_opcode != OPCODE_RET) + emit_METAL_RET(ctx); +} // emit_METAL_end + +void emit_METAL_phase(Context *ctx) +{ + // no-op in Metal. +} // emit_METAL_phase + +void emit_METAL_finalize(Context *ctx) +{ + // If we had a relative addressing of REG_TYPE_INPUT, we need to build + // an array for it at the start of main(). GLSL doesn't let you specify + // arrays of attributes. + //float4 blah_array[BIGGEST_ARRAY]; + if (ctx->have_relative_input_registers) // !!! FIXME + fail(ctx, "Relative addressing of input registers not supported."); + + // Insert header includes we need... + push_output(ctx, &ctx->preflight); + #define INC_METAL_HEADER(name) \ + if (ctx->metal_need_header_##name) { \ + output_line(ctx, "#include "); \ + } + INC_METAL_HEADER(common); + INC_METAL_HEADER(math); + INC_METAL_HEADER(relational); + INC_METAL_HEADER(geometric); + INC_METAL_HEADER(graphics); + INC_METAL_HEADER(texture); + #undef INC_METAL_HEADER + output_blank_line(ctx); + output_line(ctx, "using namespace metal;"); + output_blank_line(ctx); + pop_output(ctx); + + // Fill in the shader's mainline function signature. + push_output(ctx, &ctx->mainline_intro); + output_line(ctx, "%s %s%s %s (", + shader_is_vertex(ctx) ? "vertex" : "fragment", + ctx->outputs ? ctx->mainfn : "void", + ctx->outputs ? "_Output" : "", ctx->mainfn); + pop_output(ctx); + + push_output(ctx, &ctx->mainline_arguments); + ctx->indent++; + + const int uniform_count = ctx->uniform_float4_count + ctx->uniform_int4_count + ctx->uniform_bool_count; + int commas = 0; + if (uniform_count) commas++; + if (ctx->inputs) commas++; + if (commas) commas--; + + if (uniform_count > 0) + { + push_output(ctx, &ctx->globals); + output_line(ctx, "struct %s_Uniforms", ctx->mainfn); + output_line(ctx, "{"); + ctx->indent++; + if (ctx->uniform_float4_count > 0) + output_line(ctx, "float4 uniforms_float4[%d];", ctx->uniform_float4_count); + if (ctx->uniform_int4_count > 0) + output_line(ctx, "int4 uniforms_int4[%d];", ctx->uniform_int4_count); + if (ctx->uniform_bool_count > 0) + output_line(ctx, "bool uniforms_bool[%d];", ctx->uniform_bool_count); + ctx->indent--; + output_line(ctx, "};"); + pop_output(ctx); + + output_line(ctx, "constant %s_Uniforms &uniforms [[buffer(16)]]%s", ctx->mainfn, commas ? "," : ""); + commas--; + } // if + + if (ctx->inputs) + { + output_line(ctx, "%s_Input input [[stage_in]]%s", ctx->mainfn, commas ? "," : ""); + commas--; + } // if + + ctx->indent--; + output_line(ctx, ") {"); + if (ctx->outputs) + { + ctx->indent++; + output_line(ctx, "%s_Output output;", ctx->mainfn); + + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "return output;"); + pop_output(ctx); + } // if + pop_output(ctx); + + if (ctx->inputs) + { + push_output(ctx, &ctx->inputs); + output_line(ctx, "};"); + output_blank_line(ctx); + pop_output(ctx); + } // if + + if (ctx->outputs) + { + push_output(ctx, &ctx->outputs); + output_line(ctx, "};"); + output_blank_line(ctx); + pop_output(ctx); + } // if + + // throw some blank lines around to make source more readable. + if (ctx->globals) // don't add a blank line if the section is empty. + { + push_output(ctx, &ctx->globals); + output_blank_line(ctx); + pop_output(ctx); + } // if +} // emit_METAL_finalize + +void emit_METAL_global(Context *ctx, RegisterType regtype, int regnum) +{ + char varname[64]; + get_METAL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + // These aren't actually global in metal, set them up at top of mainline. + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + + switch (regtype) + { + case REG_TYPE_ADDRESS: + if (shader_is_vertex(ctx)) + output_line(ctx, "int4 %s;", varname); + else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE. + { + // We have to map texture registers to temps for ps_1_1, since + // they work like temps, initialize with tex coords, and the + // ps_1_1 TEX opcode expects to overwrite it. + if (!shader_version_atleast(ctx, 1, 4)) + output_line(ctx, "float4 %s = input.%s;",varname,varname); + } // else if + break; + case REG_TYPE_PREDICATE: + output_line(ctx, "bool4 %s;", varname); + break; + case REG_TYPE_TEMP: + output_line(ctx, "float4 %s;", varname); + break; + case REG_TYPE_LOOP: + break; // no-op. We declare these in for loops at the moment. + case REG_TYPE_LABEL: + break; // no-op. If we see it here, it means we optimized it out. + default: + fail(ctx, "BUG: we used a register we don't know how to define."); + break; + } // switch + + pop_output(ctx); +} // emit_METAL_global + +void emit_METAL_array(Context *ctx, VariableList *var) +{ + // All uniforms (except constant arrays, which are literally constant + // data embedded in Metal shaders) are now packed into a single array, + // so we can batch the uniform transfers. So this doesn't actually + // define an array here; the one, big array is emitted during + // finalization instead. + // However, we need to #define the offset into the one, big array here, + // and let dereferences use that #define. + const int base = var->index; + const int metalbase = ctx->uniform_float4_count; + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const int ARRAYBASE_%d = %d;", base, metalbase); + pop_output(ctx); + var->emit_position = metalbase; +} // emit_METAL_array + +void emit_METAL_const_array(Context *ctx, const ConstantsList *clist, + int base, int size) +{ + char varname[64]; + get_METAL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname)); + + const char *cstr = NULL; + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const float4 %s[%d] = {", varname, size); + ctx->indent++; + + int i; + for (i = 0; i < size; i++) + { + while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) + clist = clist->next; + assert(clist->constant.index == (base + i)); + + char val0[32]; + char val1[32]; + char val2[32]; + char val3[32]; + floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1); + floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1); + floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1); + floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1); + + output_line(ctx, "float4(%s, %s, %s, %s)%s", val0, val1, val2, val3, + (i < (size-1)) ? "," : ""); + + clist = clist->next; + } // for + + ctx->indent--; + output_line(ctx, "};"); + output_line(ctx, "(void) %s[0];", varname); // stop compiler warnings. + pop_output(ctx); +} // emit_METAL_const_array + +void emit_METAL_uniform(Context *ctx, RegisterType regtype, int regnum, + const VariableList *var) +{ + // Now that we're pushing all the uniforms as one struct, pack these + // down, so if we only use register c439, it'll actually map to + // uniforms.uniforms_float4[0]. As we push one big struct, this will + // prevent uploading unused data. + + const char *utype = get_METAL_uniform_type(ctx, regtype); + char varname[64]; + char name[64]; + int index = 0; + + get_METAL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + + if (var == NULL) + { + get_METAL_uniform_array_varname(ctx, regtype, name, sizeof (name)); + + if (regtype == REG_TYPE_CONST) + index = ctx->uniform_float4_count; + else if (regtype == REG_TYPE_CONSTINT) + index = ctx->uniform_int4_count; + else if (regtype == REG_TYPE_CONSTBOOL) + index = ctx->uniform_bool_count; + else // get_METAL_uniform_array_varname() would have called fail(). + assert(!(ctx->isfail)); + + // !!! FIXME: can cause unused var warnings in Clang... + //output_line(ctx, "constant %s &%s = %s[%d];", utype, varname, name, index); + output_line(ctx, "#define %s %s[%d]", varname, name, index); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", varname); // !!! FIXME: gross. + pop_output(ctx); + } // if + + else + { + const int arraybase = var->index; + if (var->constant) + { + get_METAL_const_array_varname_in_buf(ctx, arraybase, var->count, + name, sizeof (name)); + index = (regnum - arraybase); + } // if + else + { + assert(var->emit_position != -1); + get_METAL_uniform_array_varname(ctx, regtype, name, sizeof (name)); + index = (regnum - arraybase) + var->emit_position; + } // else + + // !!! FIXME: might trigger unused var warnings in Clang. + //output_line(ctx, "constant %s &%s = %s[%d];", utype, varname, name, index); + output_line(ctx, "#define %s %s[%d];", varname, name, index); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", varname); // !!! FIXME: gross. + pop_output(ctx); + } // else + + pop_output(ctx); +} // emit_METAL_uniform + +void emit_METAL_sampler(Context *ctx,int stage,TextureType ttype,int tb) +{ + char var[64]; + const char *texsuffix = NULL; + switch (ttype) + { + case TEXTURE_TYPE_2D: texsuffix = "2d"; break; + case TEXTURE_TYPE_CUBE: texsuffix = "cube"; break; + case TEXTURE_TYPE_VOLUME: texsuffix = "3d"; break; + default: assert(!"unexpected texture type"); return; + } // switch + + get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var)); + + push_output(ctx, &ctx->mainline_arguments); + ctx->indent++; + output_line(ctx, "texture%s %s_texture [[texture(%d)]],", + texsuffix, var, stage); + output_line(ctx, "sampler %s [[sampler(%d)]],", var, stage); + pop_output(ctx); + + if (tb) // This sampler used a ps_1_1 TEXBEM opcode? + { + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + char name[64]; + const int index = ctx->uniform_float4_count; + ctx->uniform_float4_count += 2; + get_METAL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name)); + output_line(ctx, "constant float4 &%s_texbem = %s[%d];", var, name, index); + output_line(ctx, "constant float4 &%s_texbeml = %s[%d];", var, name, index+1); + pop_output(ctx); + } // if +} // emit_METAL_sampler + +void emit_METAL_attribute(Context *ctx, RegisterType regtype, int regnum, + MOJOSHADER_usage usage, int index, int wmask, + int flags) +{ + // !!! FIXME: this function doesn't deal with write masks at all yet! + const char *usage_str = NULL; + char index_str[16] = { '\0' }; + char var[64]; + + get_METAL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var)); + + //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed? + + if (index != 0) // !!! FIXME: a lot of these MUST be zero. + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + + if (shader_is_vertex(ctx)) + { + // pre-vs3 output registers. + // these don't ever happen in DCL opcodes, I think. Map to vs_3_* + // output registers. + if (!shader_version_atleast(ctx, 3, 0)) + { + if (regtype == REG_TYPE_RASTOUT) + { + regtype = REG_TYPE_OUTPUT; + index = regnum; + switch ((const RastOutType) regnum) + { + case RASTOUT_TYPE_POSITION: + usage = MOJOSHADER_USAGE_POSITION; + break; + case RASTOUT_TYPE_FOG: + usage = MOJOSHADER_USAGE_FOG; + break; + case RASTOUT_TYPE_POINT_SIZE: + usage = MOJOSHADER_USAGE_POINTSIZE; + break; + } // switch + } // if + + else if (regtype == REG_TYPE_ATTROUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_COLOR; + index = regnum; + } // else if + + else if (regtype == REG_TYPE_TEXCRDOUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_TEXCOORD; + index = regnum; + } // else if + } // if + + if (regtype == REG_TYPE_INPUT) + { + push_output(ctx, &ctx->inputs); + if (buffer_size(ctx->inputs) == 0) + { + output_line(ctx, "struct %s_Input", ctx->mainfn); + output_line(ctx, "{"); + } // if + + ctx->indent++; + output_line(ctx, "float4 %s [[attribute(%d)]];", var, regnum); + pop_output(ctx); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + // !!! FIXME: might trigger unused var warnings in Clang. + //output_line(ctx, "constant float4 &%s = input.%s;", var, var); + output_line(ctx, "#define %s input.%s", var, var); + pop_output(ctx); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", var); // !!! FIXME: gross. + pop_output(ctx); + } // if + + else if (regtype == REG_TYPE_OUTPUT) + { + push_output(ctx, &ctx->outputs); + if (buffer_size(ctx->outputs) == 0) + { + output_line(ctx, "struct %s_Output", ctx->mainfn); + output_line(ctx, "{"); + } // if + + ctx->indent++; + + switch (usage) + { + case MOJOSHADER_USAGE_POSITION: + output_line(ctx, "float4 %s [[position]];", var); + break; + case MOJOSHADER_USAGE_POINTSIZE: + output_line(ctx, "float4 %s [[point_size]];", var); + break; + case MOJOSHADER_USAGE_COLOR: + output_line(ctx, "float4 %s [[user(color%d)]];", var, index); + break; + case MOJOSHADER_USAGE_FOG: + output_line(ctx, "float4 %s [[user(fog)]];", var); + break; + case MOJOSHADER_USAGE_TEXCOORD: + output_line(ctx, "float4 %s [[user(texcoord%d)]];", var, index); + break; + default: + // !!! FIXME: we need to deal with some more built-in varyings here. + break; + } // switch + + pop_output(ctx); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + // !!! FIXME: this doesn't work. + //output_line(ctx, "float4 &%s = output.%s;", var, var); + output_line(ctx, "#define %s output.%s", var, var); + pop_output(ctx); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", var); // !!! FIXME: gross. + pop_output(ctx); + } // else if + + else + { + fail(ctx, "unknown vertex shader attribute register"); + } // else + } // if + + else if (shader_is_pixel(ctx)) + { + // samplers DCLs get handled in emit_METAL_sampler(). + + if (flags & MOD_CENTROID) // !!! FIXME + { + failf(ctx, "centroid unsupported in %s profile", ctx->profile->name); + return; + } // if + + if ((regtype == REG_TYPE_COLOROUT) || (regtype == REG_TYPE_DEPTHOUT)) + { + push_output(ctx, &ctx->outputs); + if (buffer_size(ctx->outputs) == 0) + { + output_line(ctx, "struct %s_Output", ctx->mainfn); + output_line(ctx, "{"); + } // if + ctx->indent++; + + if (regtype == REG_TYPE_COLOROUT) + output_line(ctx, "float4 %s [[color(%d)]];", var, regnum); + else if (regtype == REG_TYPE_DEPTHOUT) + output_line(ctx, "float %s [[depth(any)]];", var); + + pop_output(ctx); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + // !!! FIXME: this doesn't work. + //output_line(ctx, "float%s &%s = output.%s;", (regtype == REG_TYPE_DEPTHOUT) ? "" : "4", var, var); + output_line(ctx, "#define %s output.%s", var, var); + pop_output(ctx); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", var); // !!! FIXME: gross. + pop_output(ctx); + } // if + + // !!! FIXME: can you actualy have a texture register with COLOR usage? + else if ((regtype == REG_TYPE_TEXTURE) || + (regtype == REG_TYPE_INPUT) || + (regtype == REG_TYPE_MISCTYPE)) + { + int skipreference = 0; + push_output(ctx, &ctx->inputs); + if (buffer_size(ctx->inputs) == 0) + { + output_line(ctx, "struct %s_Input", ctx->mainfn); + output_line(ctx, "{"); + } // if + ctx->indent++; + + if (regtype == REG_TYPE_MISCTYPE) + { + const MiscTypeType mt = (MiscTypeType) regnum; + if (mt == MISCTYPE_TYPE_FACE) + output_line(ctx, "bool %s [[front_facing]];", var); + else if (mt == MISCTYPE_TYPE_POSITION) + output_line(ctx, "float4 %s [[position]];", var); + else + fail(ctx, "BUG: unhandled misc register"); + } // else if + + else + { + if (usage == MOJOSHADER_USAGE_TEXCOORD) + { + // ps_1_1 does a different hack for this attribute. + // Refer to emit_METAL_global()'s REG_TYPE_ADDRESS code. + if (!shader_version_atleast(ctx, 1, 4)) + skipreference = 1; + output_line(ctx, "float4 %s [[user(texcoord%d)]];", var, index); + } // if + + else if (usage == MOJOSHADER_USAGE_COLOR) + output_line(ctx, "float4 %s [[user(color%d)]];", var, index); + + else if (usage == MOJOSHADER_USAGE_FOG) + output_line(ctx, "float4 %s [[user(fog)]];", var); + } // else + + pop_output(ctx); + + // !!! FIXME: can cause unused var warnings in Clang... + #if 0 + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + if ((regtype == REG_TYPE_MISCTYPE)&&(regnum == MISCTYPE_TYPE_FACE)) + output_line(ctx, "constant bool &%s = input.%s;", var, var); + else if (!skipreference) + output_line(ctx, "constant float4 &%s = input.%s;", var, var); + pop_output(ctx); + #endif + + if (!skipreference) + { + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "#define %s input.%s", var, var); + pop_output(ctx); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", var); // !!! FIXME: gross. + pop_output(ctx); + } // if + } // else if + + else + { + fail(ctx, "unknown pixel shader attribute register"); + } // else + } // else if + + else + { + fail(ctx, "Unknown shader type"); // state machine should catch this. + } // else +} // emit_METAL_attribute + +void emit_METAL_NOP(Context *ctx) +{ + // no-op is a no-op. :) +} // emit_METAL_NOP + +void emit_METAL_MOV(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_METAL_destarg_assign(ctx, code, sizeof (code), "%s", src0); + output_line(ctx, "%s", code); +} // emit_METAL_MOV + +void emit_METAL_ADD(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_METAL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_METAL_ADD + +void emit_METAL_SUB(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_METAL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_METAL_SUB + +void emit_METAL_MAD(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_METAL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); + char code[128]; + make_METAL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2); + output_line(ctx, "%s", code); +} // emit_METAL_MAD + +void emit_METAL_MUL(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_METAL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_METAL_MUL + +void emit_METAL_RCP(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_METAL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0); + output_line(ctx, "%s", code); +} // emit_METAL_RCP + +void emit_METAL_RSQ(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_math = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "rsqrt(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_RSQ + +void emit_METAL_dotprod(Context *ctx, const char *src0, const char *src1, + const char *extra) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char castleft[16] = { '\0' }; + const char *castright = ""; + if (vecsize != 1) + { + snprintf(castleft, sizeof (castleft), "float%d(", vecsize); + castright = ")"; + } // if + + char code[128]; + ctx->metal_need_header_geometric = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s", + castleft, src0, src1, extra, castright); + output_line(ctx, "%s", code); +} // emit_METAL_dotprod + +void emit_METAL_DP3(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); + emit_METAL_dotprod(ctx, src0, src1, ""); +} // emit_METAL_DP3 + +void emit_METAL_DP4(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_full(ctx, 1, src1, sizeof (src1)); + emit_METAL_dotprod(ctx, src0, src1, ""); +} // emit_METAL_DP4 + +void emit_METAL_MIN(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + ctx->metal_need_header_math = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_METAL_MIN + +void emit_METAL_MAX(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + ctx->metal_need_header_math = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_METAL_MAX + +void emit_METAL_SLT(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants. + if (vecsize == 1) + make_METAL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1); + else + { + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float%d(%s < %s)", vecsize, src0, src1); + } // else + output_line(ctx, "%s", code); +} // emit_METAL_SLT + +void emit_METAL_SGE(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants. + if (vecsize == 1) + { + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float(%s >= %s)", src0, src1); + } // if + else + { + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float%d(%s >= %s)", vecsize, src0, src1); + } // else + output_line(ctx, "%s", code); +} // emit_METAL_SGE + +void emit_METAL_EXP(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_math = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_EXP + +void emit_METAL_LOG(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_math = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_LOG + +void emit_METAL_LIT_helper(Context *ctx) +{ + const char *maxp = "127.9961"; // value from the dx9 reference. + + if (ctx->glsl_generated_lit_helper) + return; + + ctx->glsl_generated_lit_helper = 1; + ctx->metal_need_header_common = 1; + ctx->metal_need_header_math = 1; + + push_output(ctx, &ctx->helpers); + output_line(ctx, "float4 LIT(const float4 src)"); + output_line(ctx, "{"); ctx->indent++; + output_line(ctx, "const float power = clamp(src.w, -%s, %s);",maxp,maxp); + output_line(ctx, "float4 retval = float4(1.0, 0.0, 0.0, 1.0);"); + output_line(ctx, "if (src.x > 0.0) {"); ctx->indent++; + output_line(ctx, "retval.y = src.x;"); + output_line(ctx, "if (src.y > 0.0) {"); ctx->indent++; + output_line(ctx, "retval.z = pow(src.y, power);"); ctx->indent--; + output_line(ctx, "}"); ctx->indent--; + output_line(ctx, "}"); + output_line(ctx, "return retval;"); ctx->indent--; + output_line(ctx, "}"); + output_blank_line(ctx); + pop_output(ctx); +} // emit_METAL_LIT_helper + +void emit_METAL_LIT(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char code[128]; + emit_METAL_LIT_helper(ctx); + make_METAL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_LIT + +void emit_METAL_DST(Context *ctx) +{ + // !!! FIXME: needs to take ctx->dst_arg.writemask into account. + char src0_y[64]; make_METAL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y)); + char src1_y[64]; make_METAL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y)); + char src0_z[64]; make_METAL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z)); + char src1_w[64]; make_METAL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w)); + + char code[128]; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float4(1.0, %s * %s, %s, %s)", + src0_y, src1_y, src0_z, src1_w); + output_line(ctx, "%s", code); +} // emit_METAL_DST + +void emit_METAL_LRP(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_METAL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); + char code[128]; + ctx->metal_need_header_common = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)", + src2, src1, src0); + output_line(ctx, "%s", code); +} // emit_METAL_LRP + +void emit_METAL_FRC(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_math = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_FRC + +void emit_METAL_M4X4(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_METAL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_METAL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_METAL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); + char row3[64]; make_METAL_srcarg_string_full(ctx, 4, row3, sizeof (row3)); + char code[256]; + ctx->metal_need_header_geometric = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2, src0, row3); + output_line(ctx, "%s", code); +} // emit_METAL_M4X4 + +void emit_METAL_M4X3(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_METAL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_METAL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_METAL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); + char code[256]; + ctx->metal_need_header_geometric = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2); + output_line(ctx, "%s", code); +} // emit_METAL_M4X3 + +void emit_METAL_M3X4(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_METAL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); + char row3[64]; make_METAL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3)); + char code[256]; + ctx->metal_need_header_geometric = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float4(dot(%s, %s), dot(%s, %s), " + "dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, + src0, row2, src0, row3); + output_line(ctx, "%s", code); +} // emit_METAL_M3X4 + +void emit_METAL_M3X3(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_METAL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); + char code[256]; + ctx->metal_need_header_geometric = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2); + output_line(ctx, "%s", code); +} // emit_METAL_M3X3 + +void emit_METAL_M3X2(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_METAL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_METAL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + char code[256]; + ctx->metal_need_header_geometric = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float2(dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1); + output_line(ctx, "%s", code); +} // emit_METAL_M3X2 + +void emit_METAL_CALL(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + if (ctx->loops > 0) + output_line(ctx, "%s(aL);", src0); + else + output_line(ctx, "%s();", src0); +} // emit_METAL_CALL + +void emit_METAL_CALLNZ(Context *ctx) +{ + // !!! FIXME: if src1 is a constbool that's true, we can remove the + // !!! FIXME: if. If it's false, we can make this a no-op. + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + + if (ctx->loops > 0) + output_line(ctx, "if (%s) { %s(aL); }", src1, src0); + else + output_line(ctx, "if (%s) { %s(); }", src1, src0); +} // emit_METAL_CALLNZ + +void emit_METAL_LOOP(Context *ctx) +{ + // !!! FIXME: swizzle? + char var[64]; get_METAL_srcarg_varname(ctx, 1, var, sizeof (var)); + assert(ctx->source_args[0].regnum == 0); // in case they add aL1 someday. + output_line(ctx, "{"); + ctx->indent++; + output_line(ctx, "const int aLend = %s.x + %s.y;", var, var); + output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var); + ctx->indent++; +} // emit_METAL_LOOP + +void emit_METAL_RET(Context *ctx) +{ + // thankfully, the MSDN specs say a RET _has_ to end a function...no + // early returns. So if you hit one, you know you can safely close + // a high-level function. + push_output(ctx, &ctx->postflight); + output_line(ctx, "}"); + output_blank_line(ctx); + set_output(ctx, &ctx->subroutines); // !!! FIXME: is this for LABEL? Maybe set it there so we don't allocate unnecessarily. +} // emit_METAL_RET + +void emit_METAL_ENDLOOP(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); + ctx->indent--; + output_line(ctx, "}"); +} // emit_METAL_ENDLOOP + +void emit_METAL_LABEL(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + const int label = ctx->source_args[0].regnum; + RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label); + assert(ctx->output == ctx->subroutines); // not mainline, etc. + assert(ctx->indent == 0); // we shouldn't be in the middle of a function. + + // MSDN specs say CALL* has to come before the LABEL, so we know if we + // can ditch the entire function here as unused. + if (reg == NULL) + set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output. + + // !!! FIXME: it would be nice if we could determine if a function is + // !!! FIXME: only called once and, if so, forcibly inline it. + + // !!! FIXME: this worked in GLSL because all our state is global to the shader, + // !!! FIXME: but in metal we kept it local to the shader mainline. + // !!! FIXME: Can we do C++11 lambdas in Metal to have nested functions? :) + + const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : ""; + output_line(ctx, "void %s(%s)", src0, uses_loopreg); + output_line(ctx, "{"); + ctx->indent++; +} // emit_METAL_LABEL + +void emit_METAL_DCL(Context *ctx) +{ + // no-op. We do this in our emit_attribute() and emit_uniform(). +} // emit_METAL_DCL + +void emit_METAL_POW(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + ctx->metal_need_header_math = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "pow(abs(%s), %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_METAL_POW + +void emit_METAL_CRS(Context *ctx) +{ + // !!! FIXME: needs to take ctx->dst_arg.writemask into account. + char src0[64]; make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); + char code[128]; + ctx->metal_need_header_geometric = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "cross(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_METAL_CRS + +void emit_METAL_SGN(Context *ctx) +{ + // (we don't need the temporary registers specified for the D3D opcode.) + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_common = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_SGN + +void emit_METAL_ABS(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_math = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_ABS + +void emit_METAL_NRM(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_geometric = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_NRM + +void emit_METAL_SINCOS(Context *ctx) +{ + // we don't care about the temp registers that <= sm2 demands; ignore them. + // sm2 also talks about what components are left untouched vs. undefined, + // but we just leave those all untouched with Metal write masks (which + // would fulfill the "undefined" requirement, too). + const int mask = ctx->dest_arg.writemask; + char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char code[128] = { '\0' }; + + ctx->metal_need_header_math = 1; + if (writemask_x(mask)) + make_METAL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0); + else if (writemask_y(mask)) + make_METAL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0); + else if (writemask_xy(mask)) + { + // !!! FIXME: can use sincos(), but need to assign cos to a temp, since it needs a reference. + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float2(cos(%s), sin(%s))", src0, src0); + } // else if + + output_line(ctx, "%s", code); +} // emit_METAL_SINCOS + +void emit_METAL_REP(Context *ctx) +{ + // !!! FIXME: + // msdn docs say legal loop values are 0 to 255. We can check DEFI values + // at parse time, but if they are pulling a value from a uniform, do + // we clamp here? + // !!! FIXME: swizzle is legal here, right? + char src0[64]; make_METAL_srcarg_string_x(ctx, 0, src0, sizeof (src0)); + const uint rep = (uint) ctx->reps; + output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {", + rep, rep, src0, rep); + ctx->indent++; +} // emit_METAL_REP + +void emit_METAL_ENDREP(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); +} // emit_METAL_ENDREP + +void emit_METAL_IF(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + output_line(ctx, "if (%s) {", src0); + ctx->indent++; +} // emit_METAL_IF + +void emit_METAL_IFC(Context *ctx) +{ + const char *comp = get_METAL_comparison_string_scalar(ctx); + char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); + output_line(ctx, "if (%s %s %s) {", src0, comp, src1); + ctx->indent++; +} // emit_METAL_IFC + +void emit_METAL_ELSE(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "} else {"); + ctx->indent++; +} // emit_METAL_ELSE + +void emit_METAL_ENDIF(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); +} // emit_METAL_ENDIF + +void emit_METAL_BREAK(Context *ctx) +{ + output_line(ctx, "break;"); +} // emit_METAL_BREAK + +void emit_METAL_BREAKC(Context *ctx) +{ + const char *comp = get_METAL_comparison_string_scalar(ctx); + char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); + output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1); +} // emit_METAL_BREAKC + +void emit_METAL_MOVA(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + + ctx->metal_need_header_math = 1; + ctx->metal_need_header_common = 1; + + if (vecsize == 1) + { + make_METAL_destarg_assign(ctx, code, sizeof (code), + "int(floor(abs(%s) + 0.5) * sign(%s))", + src0, src0); + } // if + + else + { + make_METAL_destarg_assign(ctx, code, sizeof (code), + "int%d(floor(abs(%s) + float%d(0.5)) * sign(%s))", + vecsize, src0, vecsize, src0); + } // else + + output_line(ctx, "%s", code); +} // emit_METAL_MOVA + +void emit_METAL_DEFB(Context *ctx) +{ + char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname)); + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const bool %s = %s;", + varname, ctx->dwords[0] ? "true" : "false"); + pop_output(ctx); +} // emit_METAL_DEFB + +void emit_METAL_DEFI(Context *ctx) +{ + char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname)); + const int32 *x = (const int32 *) ctx->dwords; + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const int4 %s = int4(%d, %d, %d, %d);", + varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]); + pop_output(ctx); +} // emit_METAL_DEFI + +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) + +void emit_METAL_TEXKILL(Context *ctx) +{ + char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + ctx->metal_need_header_relational = 1; + ctx->metal_need_header_graphics = 1; + output_line(ctx, "if (any(%s.xyz < float3(0.0))) discard_fragment();", dst); +} // emit_METAL_TEXKILL + +static void metal_texld(Context *ctx, const int texldd) +{ + ctx->metal_need_header_texture = 1; + if (!shader_version_atleast(ctx, 1, 4)) + { + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char sampler[64]; + char code[128] = {0}; + + assert(!texldd); + + RegisterList *sreg; + sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + + char swizzle[4] = { 'x', 'y', 'z', '\0' }; + if (ttype == TEXTURE_TYPE_2D) + swizzle[2] = '\0'; // "xy" instead of "xyz". + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.sample(%s, %s.%s)", + sampler, sampler, dst, swizzle); + output_line(ctx, "%s", code); + } // if + + else if (!shader_version_atleast(ctx, 2, 0)) + { + // ps_1_4 is different, too! + fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME + return; + } // else if + + else + { + const SourceArgInfo *samp_arg = &ctx->source_args[1]; + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + samp_arg->regnum); + const char *funcname = NULL; + char src0[64] = { '\0' }; + char src1[64]; get_METAL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? + char src2[64] = { '\0' }; + char src3[64] = { '\0' }; + + if (sreg == NULL) + { + fail(ctx, "TEXLD using undeclared sampler"); + return; + } // if + + const char *grad = ""; + if (texldd) + { + switch ((const TextureType) sreg->index) + { + case TEXTURE_TYPE_2D: + grad = "2d"; + make_METAL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2)); + make_METAL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3)); + break; + case TEXTURE_TYPE_VOLUME: + grad = "3d"; + make_METAL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2)); + make_METAL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3)); + break; + case TEXTURE_TYPE_CUBE: + grad = "cube"; + make_METAL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2)); + make_METAL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3)); + break; + } // switch + } // if + + // !!! FIXME: can TEXLDD set instruction_controls? + // !!! FIXME: does the d3d bias value map directly to Metal? + const char *biasleft = ""; + const char *biasright = ""; + char bias[64] = { '\0' }; + if (ctx->instruction_controls == CONTROL_TEXLDB) + { + biasleft = ", bias("; + make_METAL_srcarg_string_w(ctx, 0, bias, sizeof (bias)); + biasright = ")"; + } // if + + // Metal doesn't have a texture2DProj() function, but you just divide + // your texcoords by texcoords.w to achieve it anyhow, so DIY. + const char *projop = ""; + char proj[64] = { '\0' }; + if (ctx->instruction_controls == CONTROL_TEXLDP) + { + if (sreg->index == TEXTURE_TYPE_CUBE) + fail(ctx, "TEXLDP on a cubemap"); // !!! FIXME: is this legal? + projop = " / "; + make_METAL_srcarg_string_w(ctx, 0, proj, sizeof (proj)); + } // if + + switch ((const TextureType) sreg->index) + { + case TEXTURE_TYPE_2D: + make_METAL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); + break; + + case TEXTURE_TYPE_CUBE: + case TEXTURE_TYPE_VOLUME: + make_METAL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + break; + + default: + fail(ctx, "unknown texture type"); + return; + } // switch + + assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); + char swiz_str[6] = { '\0' }; + make_METAL_swizzle_string(swiz_str, sizeof (swiz_str), + samp_arg->swizzle, ctx->dest_arg.writemask); + + char code[128]; + if (texldd) + { + make_METAL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.sample(%s, %s, gradient%s(%s, %s))%s", + src1, src1, src0, grad, src2, src3, swiz_str); + } // if + else + { + make_METAL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.sample(%s, %s%s%s%s%s%s)%s", + src1, src1, src0, projop, proj, + biasleft, bias, biasright, swiz_str); + } // else + + output_line(ctx, "%s", code); + } // else +} // metal_texld + +void emit_METAL_TEXLD(Context *ctx) +{ + metal_texld(ctx, 0); +} // emit_METAL_TEXLD + + +void emit_METAL_TEXBEM(Context *ctx) +{ + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + char src[64]; get_METAL_srcarg_varname(ctx, 0, src, sizeof (src)); + char sampler[64]; + char code[512]; + + ctx->metal_need_header_texture = 1; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.sample(%s, float2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," + " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))", + sampler, sampler, + dst, sampler, src, sampler, src, + dst, sampler, src, sampler, src); + + output_line(ctx, "%s", code); +} // emit_METAL_TEXBEM + + +void emit_METAL_TEXBEML(Context *ctx) +{ + // !!! FIXME: this code counts on the register not having swizzles, etc. + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + char src[64]; get_METAL_srcarg_varname(ctx, 0, src, sizeof (src)); + char sampler[64]; + char code[512]; + + ctx->metal_need_header_texture = 1; + + get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "(%s_texture.sample(%s, float2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," + " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *" + " ((%s.z * %s_texbeml.x) + %s_texbem.y)", + sampler, sampler, + dst, sampler, src, sampler, src, + dst, sampler, src, sampler, src, + src, sampler, sampler); + + output_line(ctx, "%s", code); +} // emit_METAL_TEXBEML + +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME + + +void emit_METAL_TEXM3X2PAD(Context *ctx) +{ + // no-op ... work happens in emit_METAL_TEXM3X2TEX(). +} // emit_METAL_TEXM3X2PAD + +void emit_METAL_TEXM3X2TEX(Context *ctx) +{ + if (ctx->texm3x2pad_src0 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char sampler[64]; + char code[512]; + + ctx->metal_need_header_texture = 1; + ctx->metal_need_header_geometric = 1; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0, + src0, sizeof (src0)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0, + src1, sizeof (src1)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src2, sizeof (src2)); + get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.sample(%s, float2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))", + sampler, sampler, src0, src1, src2, dst); + + output_line(ctx, "%s", code); +} // emit_METAL_TEXM3X2TEX + +void emit_METAL_TEXM3X3PAD(Context *ctx) +{ + // no-op ... work happens in emit_METAL_TEXM3X3*(). +} // emit_METAL_TEXM3X3PAD + +void emit_METAL_TEXM3X3TEX(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char sampler[64]; + char code[512]; + + ctx->metal_need_header_texture = 1; + ctx->metal_need_header_geometric = 1; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "texture%s(%s," + " float3(dot(%s.xyz, %s.xyz)," + " dot(%s.xyz, %s.xyz)," + " dot(%s.xyz, %s.xyz)))", + ttypestr, sampler, src0, src1, src2, src3, dst, src4); + + output_line(ctx, "%s", code); +} // emit_METAL_TEXM3X3TEX + +void emit_METAL_TEXM3X3SPEC_helper(Context *ctx) +{ + if (ctx->glsl_generated_texm3x3spec_helper) + return; + + ctx->glsl_generated_texm3x3spec_helper = 1; + + push_output(ctx, &ctx->helpers); + output_line(ctx, "float3 TEXM3X3SPEC_reflection(const float3 normal, const float3 eyeray)"); + output_line(ctx, "{"); ctx->indent++; + output_line(ctx, "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--; + output_line(ctx, "}"); + output_blank_line(ctx); + pop_output(ctx); +} // emit_METAL_TEXM3X3SPEC_helper + +void emit_METAL_TEXM3X3SPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char src5[64]; + char sampler[64]; + char code[512]; + + ctx->metal_need_header_texture = 1; + ctx->metal_need_header_geometric = 1; + + emit_METAL_TEXM3X3SPEC_helper(ctx); + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum, + src5, sizeof (src5)); + get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "texture%s(%s, " + "TEXM3X3SPEC_reflection(" + "float3(" + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz)" + ")," + "%s.xyz," + ")" + ")", + ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5); + + output_line(ctx, "%s", code); +} // emit_METAL_TEXM3X3SPEC + +void emit_METAL_TEXM3X3VSPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char sampler[64]; + char code[512]; + + ctx->metal_need_header_texture = 1; + ctx->metal_need_header_geometric = 1; + + emit_METAL_TEXM3X3SPEC_helper(ctx); + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_METAL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D"; + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "texture%s(%s, " + "TEXM3X3SPEC_reflection(" + "float3(" + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz)" + "), " + "float3(%s.w, %s.w, %s.w)" + ")" + ")", + ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst); + + output_line(ctx, "%s", code); +} // emit_METAL_TEXM3X3VSPEC + +void emit_METAL_EXPP(Context *ctx) +{ + // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation. + emit_METAL_EXP(ctx); // I guess this is just partial precision EXP? +} // emit_METAL_EXPP + +void emit_METAL_LOGP(Context *ctx) +{ + // LOGP is just low-precision LOG, but we'll take the higher precision. + emit_METAL_LOG(ctx); +} // emit_METAL_LOGP + +// common code between CMP and CND. +void emit_METAL_comparison_operations(Context *ctx, const char *cmp) +{ + int i, j; + DestArgInfo *dst = &ctx->dest_arg; + const SourceArgInfo *srcarg0 = &ctx->source_args[0]; + const int origmask = dst->writemask; + int used_swiz[4] = { 0, 0, 0, 0 }; + const int writemask[4] = { dst->writemask0, dst->writemask1, + dst->writemask2, dst->writemask3 }; + const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y, + srcarg0->swizzle_z, srcarg0->swizzle_w }; + + for (i = 0; i < 4; i++) + { + int mask = (1 << i); + + if (!writemask[i]) continue; + if (used_swiz[i]) continue; + + // This is a swizzle we haven't checked yet. + used_swiz[i] = 1; + + // see if there are any other elements swizzled to match (.yyyy) + for (j = i + 1; j < 4; j++) + { + if (!writemask[j]) continue; + if (src0swiz[i] != src0swiz[j]) continue; + mask |= (1 << j); + used_swiz[j] = 1; + } // for + + // okay, (mask) should be the writemask of swizzles we like. + + //return make_METAL_srcarg_string(ctx, idx, (1 << 0)); + + char src0[64]; + char src1[64]; + char src2[64]; + make_METAL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0)); + make_METAL_srcarg_string(ctx, 1, mask, src1, sizeof (src1)); + make_METAL_srcarg_string(ctx, 2, mask, src2, sizeof (src2)); + + set_dstarg_writemask(dst, mask); + + char code[128]; + make_METAL_destarg_assign(ctx, code, sizeof (code), + "((%s %s) ? %s : %s)", + src0, cmp, src1, src2); + output_line(ctx, "%s", code); + } // for + + set_dstarg_writemask(dst, origmask); +} // emit_METAL_comparison_operations + +void emit_METAL_CND(Context *ctx) +{ + emit_METAL_comparison_operations(ctx, "> 0.5"); +} // emit_METAL_CND + +void emit_METAL_DEF(Context *ctx) +{ + const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? + char varname[64]; get_METAL_destarg_varname(ctx, varname, sizeof (varname)); + char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1); + char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1); + char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1); + char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + // The "(void) %s;" is to make the compiler not warn if this isn't used. + output_line(ctx, "const float4 %s = float4(%s, %s, %s, %s); (void) %s;", + varname, val0, val1, val2, val3, varname); + pop_output(ctx); +} // emit_METAL_DEF + +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME + +void emit_METAL_TEXM3X3(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char code[512]; + + ctx->metal_need_header_geometric = 1; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_METAL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_METAL_destarg_varname(ctx, dst, sizeof (dst)); + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "float4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)", + src0, src1, src2, src3, dst, src4); + + output_line(ctx, "%s", code); +} // emit_METAL_TEXM3X3 + +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME + +void emit_METAL_CMP(Context *ctx) +{ + emit_METAL_comparison_operations(ctx, ">= 0.0"); +} // emit_METAL_CMP + +EMIT_METAL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME + +void emit_METAL_DP2ADD(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_METAL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2)); + char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2); + emit_METAL_dotprod(ctx, src0, src1, extra); +} // emit_METAL_DP2ADD + +void emit_METAL_DSX(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_graphics = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "dfdx(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_DSX + +void emit_METAL_DSY(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->metal_need_header_graphics = 1; + make_METAL_destarg_assign(ctx, code, sizeof (code), "dfdy(%s)", src0); + output_line(ctx, "%s", code); +} // emit_METAL_DSY + +void emit_METAL_TEXLDD(Context *ctx) +{ + metal_texld(ctx, 1); +} // emit_METAL_TEXLDD + +void emit_METAL_SETP(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_METAL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_METAL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // destination is always predicate register (which is type bvec4). + const char *comp = (vecsize == 1) ? + get_METAL_comparison_string_scalar(ctx) : + get_METAL_comparison_string_vector(ctx); + + make_METAL_destarg_assign(ctx, code, sizeof (code), + "(%s %s %s)", src0, comp, src1); + output_line(ctx, "%s", code); +} // emit_METAL_SETP + +void emit_METAL_TEXLDL(Context *ctx) +{ + // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins + // !!! FIXME: from fragment shaders for some inexplicable reason. + // !!! FIXME: Maybe Metal can do it, but I haven't looked into it yet. + emit_METAL_TEXLD(ctx); +} // emit_METAL_TEXLDL + +void emit_METAL_BREAKP(Context *ctx) +{ + char src0[64]; make_METAL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + output_line(ctx, "if (%s) { break; }", src0); +} // emit_METAL_BREAKP + +void emit_METAL_RESERVED(Context *ctx) +{ + // do nothing; fails in the state machine. +} // emit_METAL_RESERVED + +#endif // SUPPORT_PROFILE_METAL + +#pragma GCC visibility pop \ No newline at end of file