Fixed incorrect fail() string.
/**
* MojoShader; generate shader programs from bytecode of compiled
* Direct3D shaders.
*
* Please see the file LICENSE.txt in the source's root directory.
*
* This file written by Ryan C. Gordon.
*/
// !!! FIXME: this file really needs to be split up.
// !!! FIXME: I keep changing coding styles for symbols and typedefs.
// Shader bytecode format is described at MSDN:
// http://msdn2.microsoft.com/en-us/library/ms800307.aspx
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <assert.h>
#include "mojoshader.h"
// This is the highest shader version we currently support.
#define MAX_SHADER_MAJOR 3
#define MAX_SHADER_MINOR 0
// If SUPPORT_PROFILE_* isn't defined, we assume an implicit desire to support.
// You get all the profiles unless you go out of your way to disable them.
#ifndef SUPPORT_PROFILE_D3D
#define SUPPORT_PROFILE_D3D 1
#endif
#ifndef SUPPORT_PROFILE_PASSTHROUGH
#define SUPPORT_PROFILE_PASSTHROUGH 1
#endif
#ifndef SUPPORT_PROFILE_GLSL
#define SUPPORT_PROFILE_GLSL 1
#endif
#ifndef SUPPORT_PROFILE_ARB1
#define SUPPORT_PROFILE_ARB1 1
#endif
// Get basic wankery out of the way here...
typedef unsigned int uint; // this is a printf() helper. don't use for code.
#ifdef _MSC_VER
#include <malloc.h>
#define snprintf _snprintf
typedef unsigned __int8 uint8;
typedef unsigned __int16 uint16;
typedef unsigned __int32 uint32;
typedef unsigned __int32 int32;
// Warning Level 4 considered harmful. :)
#pragma warning(disable: 4100) // "unreferenced formal parameter"
#pragma warning(disable: 4389) // "signed/unsigned mismatch"
#else
#include <stdint.h>
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef int32_t int32;
#endif
#ifdef __GNUC__
#define ISPRINTF(x,y) __attribute__((format (printf, x, y)))
#else
#define ISPRINTF(x,y)
#endif
#define STATICARRAYLEN(x) ( (sizeof ((x))) / (sizeof ((x)[0])) )
#ifdef _WINDOWS // !!! FIXME: bleh
const char *endline_str = "\r\n";
#else
const char *endline_str = "\n";
#endif
// we need to reference this by explicit value occasionally.
#define OPCODE_RET 28
// Special-case return values from the parsing pipeline...
#define FAIL (-1)
#define NOFAIL (-2)
#define END_OF_STREAM (-3)
// Byteswap magic...
#if ((defined __GNUC__) && (defined __POWERPC__))
static inline uint32 SWAP32(uint32 x)
{
__asm__ __volatile__("lwbrx %0,0,%1" : "=r" (x) : "r" (&x));
return x;
} // SWAP32
static inline uint16 SWAP16(uint16 x)
{
__asm__ __volatile__("lhbrx %0,0,%1" : "=r" (x) : "r" (&x));
return x;
} // SWAP16
#elif defined(__POWERPC__)
static inline uint32 SWAP32(uint32 x)
{
return ( (((x) >> 24) & 0x000000FF) | (((x) >> 8) & 0x0000FF00) |
(((x) << 8) & 0x00FF0000) | (((x) << 24) & 0xFF000000) );
} // SWAP32
static inline uint16 SWAP16(uint16 x)
{
return ( (((x) >> 8) & 0x00FF) | (((x) << 8) & 0xFF00) );
} // SWAP16
#else
# define SWAP16(x) (x)
# define SWAP32(x) (x)
#endif
typedef enum
{
REG_TYPE_TEMP = 0,
REG_TYPE_INPUT = 1,
REG_TYPE_CONST = 2,
REG_TYPE_ADDRESS = 3,
REG_TYPE_TEXTURE = 3, // ALSO 3!
REG_TYPE_RASTOUT = 4,
REG_TYPE_ATTROUT = 5,
REG_TYPE_TEXCRDOUT = 6,
REG_TYPE_OUTPUT = 6, // ALSO 6!
REG_TYPE_CONSTINT = 7,
REG_TYPE_COLOROUT = 8,
REG_TYPE_DEPTHOUT = 9,
REG_TYPE_SAMPLER = 10,
REG_TYPE_CONST2 = 11,
REG_TYPE_CONST3 = 12,
REG_TYPE_CONST4 = 13,
REG_TYPE_CONSTBOOL = 14,
REG_TYPE_LOOP = 15,
REG_TYPE_TEMPFLOAT16 = 16,
REG_TYPE_MISCTYPE = 17,
REG_TYPE_LABEL = 18,
REG_TYPE_PREDICATE = 19,
REG_TYPE_MAX = 19
} RegisterType;
typedef enum
{
TEXTURE_TYPE_2D = 2,
TEXTURE_TYPE_CUBE = 3,
TEXTURE_TYPE_VOLUME = 4,
} TextureType;
// predeclare.
typedef struct Context Context;
struct ConstantsList;
// one emit function for each opcode in each profile.
typedef void (*emit_function)(Context *ctx);
// one emit function for starting output in each profile.
typedef void (*emit_start)(Context *ctx, const char *profilestr);
// one emit function for ending output in each profile.
typedef void (*emit_end)(Context *ctx);
// one emit function for phase opcode output in each profile.
typedef void (*emit_phase)(Context *ctx);
// one emit function for finalizing output in each profile.
typedef void (*emit_finalize)(Context *ctx);
// one emit function for global definitions in each profile.
typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum);
// one emit function for relative uniform arrays in each profile.
typedef void (*emit_array)(Context *ctx, int base, int size);
// one emit function for relative constants arrays in each profile.
typedef void (*emit_const_array)(Context *ctx,
const struct ConstantsList *constslist,
int base, int size);
// one emit function for uniforms in each profile.
typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum,
int arraybase, int arraysize);
// one emit function for samplers in each profile.
typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype);
// one emit function for attributes in each profile.
typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
MOJOSHADER_usage usage, int index, int wmask);
// one args function for each possible sequence of opcode arguments.
typedef int (*args_function)(Context *ctx);
// one state function for each opcode where we have state machine updates.
typedef void (*state_function)(Context *ctx);
// one function for varnames in each profile.
typedef const char *(*varname_function)(Context *c, RegisterType t, int num);
// one function for const var array in each profile.
typedef const char *(*const_array_varname_function)(Context *c, int base, int size);
typedef struct
{
const char *name;
emit_start start_emitter;
emit_end end_emitter;
emit_phase phase_emitter;
emit_global global_emitter;
emit_array array_emitter;
emit_const_array const_array_emitter;
emit_uniform uniform_emitter;
emit_sampler sampler_emitter;
emit_attribute attribute_emitter;
emit_finalize finalize_emitter;
varname_function get_varname;
const_array_varname_function get_const_array_varname;
} Profile;
typedef enum
{
RASTOUT_TYPE_POSITION = 0,
RASTOUT_TYPE_FOG = 1,
RASTOUT_TYPE_POINT_SIZE = 2,
RASTOUT_TYPE_MAX = 2
} RastOutType;
typedef enum
{
MISCTYPE_TYPE_POSITION = 0,
MISCTYPE_TYPE_FACE = 1,
MISCTYPE_TYPE_MAX = 1
} MiscTypeType;
// A simple linked list of strings, so we can build the final output without
// realloc()ing for each new line, and easily insert lines into the middle
// of the output without much trouble.
typedef struct OutputListNode
{
char *str;
struct OutputListNode *next;
} OutputListNode;
typedef struct OutputList
{
OutputListNode head;
OutputListNode *tail;
} OutputList;
typedef struct ConstantsList
{
MOJOSHADER_constant constant;
struct ConstantsList *next;
} ConstantsList;
typedef struct VariableList
{
MOJOSHADER_uniformType type;
int index;
int count;
ConstantsList *constant;
int used;
struct VariableList *next;
} VariableList;
typedef struct RegisterList
{
RegisterType regtype;
int regnum;
MOJOSHADER_usage usage;
int index;
int writemask;
int misc;
const VariableList *array;
struct RegisterList *next;
} RegisterList;
// result modifiers.
// !!! FIXME: why isn't this an enum?
#define MOD_SATURATE 0x01
#define MOD_PP 0x02
#define MOD_CENTROID 0x04
// source modifiers.
typedef enum
{
SRCMOD_NONE,
SRCMOD_NEGATE,
SRCMOD_BIAS,
SRCMOD_BIASNEGATE,
SRCMOD_SIGN,
SRCMOD_SIGNNEGATE,
SRCMOD_COMPLEMENT,
SRCMOD_X2,
SRCMOD_X2NEGATE,
SRCMOD_DZ,
SRCMOD_DW,
SRCMOD_ABS,
SRCMOD_ABSNEGATE,
SRCMOD_NOT,
SRCMOD_TOTAL
} SourceMod;
typedef struct
{
const uint32 *token; // this is the unmolested token in the stream.
int regnum;
int relative;
int writemask; // xyzw or rgba (all four, not split out).
int writemask0; // x or red
int writemask1; // y or green
int writemask2; // z or blue
int writemask3; // w or alpha
int orig_writemask; // writemask before mojoshader tweaks it.
int result_mod;
int result_shift;
RegisterType regtype;
} DestArgInfo;
typedef struct
{
const uint32 *token; // this is the unmolested token in the stream.
int regnum;
int swizzle; // xyzw (all four, not split out).
int swizzle_x;
int swizzle_y;
int swizzle_z;
int swizzle_w;
SourceMod src_mod;
RegisterType regtype;
int relative;
RegisterType relative_regtype;
int relative_regnum;
int relative_component;
const VariableList *relative_array;
} SourceArgInfo;
#define SCRATCH_BUFFER_SIZE 128
#define SCRATCH_BUFFERS 32
// !!! FIXME: the scratch buffers make Context pretty big.
// !!! FIXME: might be worth having one set of static scratch buffers that
// !!! FIXME: are mutex protected?
// Context...this is state that changes as we parse through a shader...
struct Context
{
MOJOSHADER_malloc malloc;
MOJOSHADER_free free;
void *malloc_data;
const uint32 *tokens;
uint32 tokencount;
OutputList *output;
OutputList globals;
OutputList helpers;
OutputList subroutines;
OutputList mainline_intro;
OutputList mainline;
OutputList ignore;
OutputList *output_stack[2];
uint8 *output_bytes; // can be used instead of the OutputLists.
int indent_stack[2];
int output_stack_len;
int output_len; // total strlen; prevents walking the lists just to malloc.
int indent;
const char *shader_type_str;
const char *endline;
int endline_len;
const char *failstr;
char scratch[SCRATCH_BUFFERS][SCRATCH_BUFFER_SIZE];
int scratchidx; // current scratch buffer.
int profileid;
const Profile *profile;
MOJOSHADER_shaderType shader_type;
uint8 major_ver;
uint8 minor_ver;
DestArgInfo dest_arg;
SourceArgInfo source_args[5];
SourceArgInfo predicate_arg; // for predicated instructions.
uint32 dwords[4];
uint32 version_token;
int instruction_count;
uint32 instruction_controls;
uint32 previous_opcode;
int loops;
int reps;
int max_reps;
int cmps;
int scratch_registers;
int max_scratch_registers;
int branch_labels_stack_index;
int branch_labels_stack[32];
int assigned_branch_labels;
int assigned_vertex_attributes;
int last_address_reg_component;
RegisterList used_registers;
RegisterList defined_registers;
int constant_count;
ConstantsList *constants;
int uniform_count;
RegisterList uniforms;
int attribute_count;
RegisterList attributes;
int sampler_count;
RegisterList samplers;
VariableList *variables; // variables to register mapping.
int have_ctab:1;
int determined_constants_arrays:1;
int predicated:1;
int support_nv2:1;
int support_nv3:1;
int support_glsl120:1;
int glsl_generated_lit_opcode:1;
};
// Convenience functions for allocators...
static MOJOSHADER_parseData out_of_mem_data = {
"Out of memory", 0, 0, 0, 0, MOJOSHADER_TYPE_UNKNOWN, 0, 0, 0, 0
};
static const char *out_of_mem_str = "Out of memory";
static inline int out_of_memory(Context *ctx)
{
if (ctx->failstr == NULL)
ctx->failstr = out_of_mem_str; // fail() would call malloc().
return FAIL;
} // out_of_memory
static inline void *Malloc(Context *ctx, const size_t len)
{
void *retval = ctx->malloc((int) len, ctx->malloc_data);
if (retval == NULL)
out_of_memory(ctx);
return retval;
} // Malloc
static inline void Free(Context *ctx, void *ptr)
{
if (ptr != NULL) // check for NULL in case of dumb free() impl.
ctx->free(ptr, ctx->malloc_data);
} // Free
// jump between output sections in the context...
static inline void push_output(Context *ctx, OutputList *section)
{
assert(ctx->output_stack_len < STATICARRAYLEN(ctx->output_stack));
ctx->output_stack[ctx->output_stack_len] = ctx->output;
ctx->indent_stack[ctx->output_stack_len] = ctx->indent;
ctx->output_stack_len++;
ctx->output = section;
ctx->indent = 0;
} // push_output
static inline void pop_output(Context *ctx)
{
assert(ctx->output_stack_len > 0);
ctx->output_stack_len--;
ctx->output = ctx->output_stack[ctx->output_stack_len];
ctx->indent = ctx->indent_stack[ctx->output_stack_len];
} // pop_output
// Shader model version magic...
static inline uint32 ver_ui32(const uint8 major, const uint8 minor)
{
return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 0 : (minor)) );
} // version_ui32
static inline int shader_version_supported(const uint8 maj, const uint8 min)
{
return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR));
} // shader_version_supported
static inline int shader_version_atleast(const Context *ctx, const uint8 maj,
const uint8 min)
{
return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min));
} // shader_version_atleast
static inline int shader_version_exactly(const Context *ctx, const uint8 maj,
const uint8 min)
{
return ((ctx->major_ver == maj) && (ctx->minor_ver == min));
} // shader_version_exactly
static inline int shader_is_pixel(const Context *ctx)
{
return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL);
} // shader_is_pixel
static inline int shader_is_vertex(const Context *ctx)
{
return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX);
} // shader_is_vertex
static inline int isfail(const Context *ctx)
{
return (ctx->failstr != NULL);
} // isfail
static inline char *get_scratch_buffer(Context *ctx)
{
if ((ctx->scratchidx >= SCRATCH_BUFFERS) && !isfail(ctx))
{
// can't call fail() here, since it calls back into here.
const char *errstr = "BUG: overflowed scratch buffers";
char *failstr = (char *) Malloc(ctx, strlen(errstr) + 1);
if (failstr != NULL)
{
strcpy(failstr, errstr);
ctx->failstr = failstr;
} // if
} // if
ctx->scratchidx = (ctx->scratchidx + 1) % SCRATCH_BUFFERS;
return ctx->scratch[ctx->scratchidx];
} // get_scratch_buffer
static int failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
static int failf(Context *ctx, const char *fmt, ...)
{
if (ctx->failstr == NULL) // don't change existing error.
{
char *scratch = get_scratch_buffer(ctx);
va_list ap;
va_start(ap, fmt);
const int len = vsnprintf(scratch, SCRATCH_BUFFER_SIZE, fmt, ap);
va_end(ap);
char *failstr = (char *) Malloc(ctx, len + 1);
if (failstr != NULL)
{
// see comments about scratch buffer overflow in output_line().
if (len < SCRATCH_BUFFER_SIZE)
strcpy(failstr, scratch); // copy it over.
else
{
va_start(ap, fmt);
vsnprintf(failstr, len + 1, fmt, ap); // rebuild it.
va_end(ap);
} // else
ctx->failstr = failstr;
} // if
} // if
return FAIL;
} // failf
static inline int fail(Context *ctx, const char *reason)
{
return failf(ctx, "%s", reason);
} // fail
static int output_line(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
static int output_line(Context *ctx, const char *fmt, ...)
{
OutputListNode *item = NULL;
if (isfail(ctx))
return FAIL; // we failed previously, don't go on...
char *scratch = get_scratch_buffer(ctx);
const int indent = ctx->indent;
if (indent > 0)
memset(scratch, '\t', indent);
va_list ap;
va_start(ap, fmt);
const int len = vsnprintf(scratch+indent, SCRATCH_BUFFER_SIZE-indent, fmt, ap) + indent;
va_end(ap);
item = (OutputListNode *) Malloc(ctx, sizeof (OutputListNode));
if (item == NULL)
return FAIL;
item->str = (char *) Malloc(ctx, len + 1);
if (item->str == NULL)
{
Free(ctx, item);
return FAIL;
} // if
// If we overflowed our scratch buffer, that's okay. We were going to
// allocate anyhow...the scratch buffer just lets us avoid a second
// run of vsnprintf().
if (len < SCRATCH_BUFFER_SIZE)
strcpy(item->str, scratch); // copy it over.
else
{
if (indent > 0)
memset(item->str, '\t', indent);
va_start(ap, fmt);
vsnprintf(item->str+indent, len + 1, fmt, ap); // rebuild it.
va_end(ap);
} // else
item->next = NULL;
ctx->output->tail->next = item;
ctx->output->tail = item;
ctx->output_len += len + ctx->endline_len;
return 0;
} // output_line
// this is just to stop gcc whining.
static inline int output_blank_line(Context *ctx)
{
return output_line(ctx, "%s", "");
} // output_blank_line
// !!! FIXME: this is sort of nasty.
static void floatstr(Context *ctx, char *buf, size_t bufsize, float f,
int leavedecimal)
{
const size_t len = snprintf(buf, bufsize, "%f", f);
if ((len+2) >= bufsize)
fail(ctx, "BUG: internal buffer is too small");
else
{
char *end = buf + len;
char *ptr = strchr(buf, '.');
if (ptr == NULL)
{
if (leavedecimal)
strcat(buf, ".0");
return; // done.
} // if
while (--end != ptr)
{
if (*end != '0')
{
end++;
break;
} // if
} // while
if ((leavedecimal) && (end == ptr))
end += 2;
*end = '\0'; // chop extra '0' or all decimal places off.
} // else
} // floatstr
// Deal with register lists... !!! FIXME: I sort of hate this.
static void free_reglist(MOJOSHADER_free f, void *d, RegisterList *item)
{
while (item != NULL)
{
RegisterList *next = item->next;
f(item, d);
item = next;
} // while
} // free_reglist
static inline uint32 reg_to_ui32(const RegisterType regtype, const int regnum)
{
return ( ((uint32) regtype) | (((uint32) regnum) << 16) );
} // reg_to_uint32
static RegisterList *reglist_insert(Context *ctx, RegisterList *prev,
const RegisterType regtype,
const int regnum)
{
const uint32 newval = reg_to_ui32(regtype, regnum);
RegisterList *item = prev->next;
while (item != NULL)
{
const uint32 val = reg_to_ui32(item->regtype, item->regnum);
if (newval == val)
return item; // already set, so we're done.
else if (newval < val) // insert it here.
break;
else // if (newval > val)
{
// keep going, we're not to the insertion point yet.
prev = item;
item = item->next;
} // else
} // while
// we need to insert an entry after (prev).
item = (RegisterList *) Malloc(ctx, sizeof (RegisterList));
if (item != NULL)
{
item->regtype = regtype;
item->regnum = regnum;
item->usage = MOJOSHADER_USAGE_UNKNOWN;
item->index = 0;
item->writemask = 0;
item->misc = 0;
item->array = NULL;
item->next = prev->next;
prev->next = item;
} // if
return item;
} // reglist_insert
static RegisterList *reglist_find(RegisterList *prev, const RegisterType rtype,
const int regnum)
{
const uint32 newval = reg_to_ui32(rtype, regnum);
RegisterList *item = prev->next;
while (item != NULL)
{
const uint32 val = reg_to_ui32(item->regtype, item->regnum);
if (newval == val)
return item; // here it is.
else if (newval < val) // should have been here if it existed.
return NULL;
else // if (newval > val)
{
// keep going, we're not to the insertion point yet.
prev = item;
item = item->next;
} // else
} // while
return NULL; // wasn't in the list.
} // reglist_find
static inline const RegisterList *reglist_exists(RegisterList *prev,
const RegisterType regtype,
const int regnum)
{
return (reglist_find(prev, regtype, regnum));
} // reglist_exists
static inline void set_used_register(Context *ctx, const RegisterType regtype,
const int regnum)
{
reglist_insert(ctx, &ctx->used_registers, regtype, regnum);
} // set_used_register
static inline int get_used_register(Context *ctx, const RegisterType regtype,
const int regnum)
{
return (reglist_exists(&ctx->used_registers, regtype, regnum) != NULL);
} // get_used_register
static inline void set_defined_register(Context *ctx, const RegisterType rtype,
const int regnum)
{
reglist_insert(ctx, &ctx->defined_registers, rtype, regnum);
} // set_defined_register
static inline int get_defined_register(Context *ctx, const RegisterType rtype,
const int regnum)
{
return (reglist_exists(&ctx->defined_registers, rtype, regnum) != NULL);
} // get_defined_register
static const RegisterList *declared_attribute(Context *ctx,
const MOJOSHADER_usage usage,
const int index)
{
const RegisterList *item = ctx->attributes.next;
while (item != NULL)
{
if ((item->usage == usage) && (item->index == index))
return item;
item = item->next;
} // while
return NULL;
} // declared_attribute
static void add_attribute_register(Context *ctx, const RegisterType rtype,
const int regnum, const MOJOSHADER_usage usage,
const int index, const int writemask)
{
RegisterList *item = reglist_insert(ctx, &ctx->attributes, rtype, regnum);
item->usage = usage;
item->index = index;
item->writemask = writemask;
} // add_attribute_register
static inline void add_sampler(Context *ctx, const RegisterType rtype,
const int regnum, const TextureType ttype)
{
// !!! FIXME: make sure it doesn't exist?
RegisterList *item = reglist_insert(ctx, &ctx->samplers, rtype, regnum);
item->index = (int) ttype;
} // add_sampler
static inline int writemask_xyzw(const int writemask)
{
return (writemask == 0xF); // 0xF == 1111. No explicit mask (full!).
} // writemask_xyzw
static inline int writemask_xyz(const int writemask)
{
return (writemask == 0x7); // 0x7 == 0111. (that is: xyz)
} // writemask_xyz
static inline int writemask_xy(const int writemask)
{
return (writemask == 0x3); // 0x3 == 0011. (that is: xy)
} // writemask_xy
static inline int writemask_x(const int writemask)
{
return (writemask == 0x1); // 0x1 == 0001. (that is: x)
} // writemask_x
static inline int writemask_y(const int writemask)
{
return (writemask == 0x2); // 0x1 == 0010. (that is: y)
} // writemask_y
static inline int replicate_swizzle(const int swizzle)
{
return ( (((swizzle >> 0) & 0x3) == ((swizzle >> 2) & 0x3)) &&
(((swizzle >> 2) & 0x3) == ((swizzle >> 4) & 0x3)) &&
(((swizzle >> 4) & 0x3) == ((swizzle >> 6) & 0x3)) );
} // replicate_swizzle
static inline int scalar_register(const RegisterType regtype, const int regnum)
{
switch (regtype)
{
case REG_TYPE_DEPTHOUT:
case REG_TYPE_CONSTBOOL:
case REG_TYPE_PREDICATE:
case REG_TYPE_LOOP:
return 1;
case REG_TYPE_MISCTYPE:
if ( ((const MiscTypeType) regnum) == MISCTYPE_TYPE_FACE )
return 1;
return 0;
default: break;
} // switch
return 0;
} // scalar_register
static inline int no_swizzle(const int swizzle)
{
return (swizzle == 0xE4); // 0xE4 == 11100100 ... 0 1 2 3. No swizzle.
} // no_swizzle
static inline int vecsize_from_writemask(const int m)
{
return (m & 1) + ((m >> 1) & 1) + ((m >> 2) & 1) + ((m >> 3) & 1);
} // vecsize_from_writemask
static int allocate_scratch_register(Context *ctx)
{
const int retval = ctx->scratch_registers++;
if (retval >= ctx->max_scratch_registers)
ctx->max_scratch_registers = retval + 1;
return retval;
} // allocate_scratch_register
static int allocate_branch_label(Context *ctx)
{
return ctx->assigned_branch_labels++;
} // allocate_branch_label
// D3D stuff that's used in more than just the d3d profile...
static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' };
static const char *usagestrs[] = {
"_position", "_blendweight", "_blendindices", "_normal", "_psize",
"_texcoord", "_tangent", "_binormal", "_tessfactor", "_positiont",
"_color", "_fog", "_depth", "_sample"
};
static const char *get_D3D_register_string(Context *ctx,
RegisterType regtype,
int regnum, char *regnum_str,
size_t regnum_size)
{
const char *retval = NULL;
int has_number = 1;
switch (regtype)
{
case REG_TYPE_TEMP:
retval = "r";
break;
case REG_TYPE_INPUT:
retval = "v";
break;
case REG_TYPE_CONST:
retval = "c";
break;
case REG_TYPE_ADDRESS: // (or REG_TYPE_TEXTURE, same value.)
retval = shader_is_vertex(ctx) ? "a" : "t";
break;
case REG_TYPE_RASTOUT:
switch ((RastOutType) regnum)
{
case RASTOUT_TYPE_POSITION: retval = "oPos"; break;
case RASTOUT_TYPE_FOG: retval = "oFog"; break;
case RASTOUT_TYPE_POINT_SIZE: retval = "oPts"; break;
} // switch
has_number = 0;
break;
case REG_TYPE_ATTROUT:
retval = "oD";
break;
case REG_TYPE_OUTPUT: // (or REG_TYPE_TEXCRDOUT, same value.)
if (shader_is_vertex(ctx) && shader_version_atleast(ctx, 3, 0))
retval = "o";
else
retval = "oT";
break;
case REG_TYPE_CONSTINT:
retval = "i";
break;
case REG_TYPE_COLOROUT:
retval = "oC";
break;
case REG_TYPE_DEPTHOUT:
retval = "oDepth";
has_number = 0;
break;
case REG_TYPE_SAMPLER:
retval = "s";
break;
case REG_TYPE_CONSTBOOL:
retval = "b";
break;
case REG_TYPE_LOOP:
retval = "aL";
has_number = 0;
break;
case REG_TYPE_MISCTYPE:
switch ((const MiscTypeType) regnum)
{
case MISCTYPE_TYPE_POSITION: retval = "vPos"; break;
case MISCTYPE_TYPE_FACE: retval = "vFace"; break;
} // switch
has_number = 0;
break;
case REG_TYPE_LABEL:
retval = "l";
break;
case REG_TYPE_PREDICATE:
retval = "p";
break;
//case REG_TYPE_TEMPFLOAT16: // !!! FIXME: don't know this asm string
default:
fail(ctx, "unknown register type");
retval = "???";
has_number = 0;
break;
} // switch
if (has_number)
snprintf(regnum_str, regnum_size, "%u", (uint) regnum);
else
regnum_str[0] = '\0';
return retval;
} // get_D3D_register_string
#define AT_LEAST_ONE_PROFILE 0
#if !SUPPORT_PROFILE_D3D
#define PROFILE_EMITTER_D3D(op)
#else
#undef AT_LEAST_ONE_PROFILE
#define AT_LEAST_ONE_PROFILE 1
#define PROFILE_EMITTER_D3D(op) emit_D3D_##op,
static const char *make_D3D_srcarg_string_in_buf(Context *ctx,
const SourceArgInfo *arg,
char *buf, size_t buflen)
{
const char *premod_str = "";
const char *postmod_str = "";
switch (arg->src_mod)
{
case SRCMOD_NEGATE:
premod_str = "-";
break;
case SRCMOD_BIASNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_BIAS:
postmod_str = "_bias";
break;
case SRCMOD_SIGNNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_SIGN:
postmod_str = "_bx2";
break;
case SRCMOD_COMPLEMENT:
premod_str = "1-";
break;
case SRCMOD_X2NEGATE:
premod_str = "-";
// fall through.
case SRCMOD_X2:
postmod_str = "_x2";
break;
case SRCMOD_DZ:
postmod_str = "_dz";
break;
case SRCMOD_DW:
postmod_str = "_dw";
break;
case SRCMOD_ABSNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_ABS:
postmod_str = "_abs";
break;
case SRCMOD_NOT:
premod_str = "!";
break;
case SRCMOD_NONE:
case SRCMOD_TOTAL:
break; // stop compiler whining.
} // switch
char regnum_str[16];
const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
arg->regnum, regnum_str,
sizeof (regnum_str));
if (regtype_str == NULL)
{
fail(ctx, "Unknown source register type.");
return "";
} // if
const char *rel_lbracket = "";
const char *rel_rbracket = "";
char rel_swizzle[4] = { '\0' };
char rel_regnum_str[16] = { '\0' };
const char *rel_regtype_str = "";
if (arg->relative)
{
rel_swizzle[0] = '.';
rel_swizzle[1] = swizzle_channels[arg->relative_component];
rel_swizzle[2] = '\0';
rel_lbracket = "[";
rel_rbracket = "]";
rel_regtype_str = get_D3D_register_string(ctx, arg->relative_regtype,
arg->relative_regnum,
rel_regnum_str,
sizeof (rel_regnum_str));
if (regtype_str == NULL)
{
fail(ctx, "Unknown relative source register type.");
return "";
} // if
} // if
char swizzle_str[6];
int i = 0;
const int scalar = scalar_register(arg->regtype, arg->regnum);
if (!scalar && !no_swizzle(arg->swizzle))
{
swizzle_str[i++] = '.';
swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
// .xyzz is the same as .xyz, .z is the same as .zzzz, etc.
while (swizzle_str[i-1] == swizzle_str[i-2])
i--;
} // if
swizzle_str[i] = '\0';
assert(i < sizeof (swizzle_str));
snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s",
premod_str, regtype_str, regnum_str, postmod_str,
rel_lbracket, rel_regtype_str, rel_regnum_str, rel_swizzle,
rel_rbracket, swizzle_str);
// !!! FIXME: make sure the scratch buffer was large enough.
return buf;
} // make_D3D_srcarg_string_in_buf
static const char *make_D3D_destarg_string(Context *ctx)
{
const DestArgInfo *arg = &ctx->dest_arg;
const char *result_shift_str = "";
switch (arg->result_shift)
{
case 0x1: result_shift_str = "_x2"; break;
case 0x2: result_shift_str = "_x4"; break;
case 0x3: result_shift_str = "_x8"; break;
case 0xD: result_shift_str = "_d8"; break;
case 0xE: result_shift_str = "_d4"; break;
case 0xF: result_shift_str = "_d2"; break;
} // switch
const char *sat_str = (arg->result_mod & MOD_SATURATE) ? "_sat" : "";
const char *pp_str = (arg->result_mod & MOD_PP) ? "_pp" : "";
const char *cent_str = (arg->result_mod & MOD_CENTROID) ? "_centroid" : "";
char regnum_str[16];
const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
arg->regnum, regnum_str,
sizeof (regnum_str));
if (regtype_str == NULL)
{
fail(ctx, "Unknown destination register type.");
return "";
} // if
char writemask_str[6];
int i = 0;
const int scalar = scalar_register(arg->regtype, arg->regnum);
if (!scalar && !writemask_xyzw(arg->writemask))
{
writemask_str[i++] = '.';
if (arg->writemask0) writemask_str[i++] = 'x';
if (arg->writemask1) writemask_str[i++] = 'y';
if (arg->writemask2) writemask_str[i++] = 'z';
if (arg->writemask3) writemask_str[i++] = 'w';
} // if
writemask_str[i] = '\0';
assert(i < sizeof (writemask_str));
const char *pred_left = "";
const char *pred_right = "";
char pred[32] = { '\0' };
if (ctx->predicated)
{
pred_left = "(";
pred_right = ") ";
make_D3D_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
pred, sizeof (pred));
} // if
// may turn out something like "_x2_sat_pp_centroid (!p0.x) r0.xyzw" ...
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s%s%s%s %s%s%s%s%s%s",
result_shift_str, sat_str, pp_str, cent_str,
pred_left, pred, pred_right,
regtype_str, regnum_str, writemask_str);
// !!! FIXME: make sure the scratch buffer was large enough.
return retval;
} // make_D3D_destarg_string
static const char *make_D3D_srcarg_string(Context *ctx, const int idx)
{
if (idx >= STATICARRAYLEN(ctx->source_args))
{
fail(ctx, "Too many source args");
return "";
} // if
const SourceArgInfo *arg = &ctx->source_args[idx];
char *buf = get_scratch_buffer(ctx);
return make_D3D_srcarg_string_in_buf(ctx, arg, buf, SCRATCH_BUFFER_SIZE);
} // make_D3D_srcarg_string
static const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum)
{
char regnum_str[16];
const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
regnum_str, sizeof (regnum_str));
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s%s", regtype_str, regnum_str);
return retval;
} // get_D3D_varname
static const char *get_D3D_const_array_varname(Context *ctx, int base, int size)
{
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "c_array_%d_%d", base, size);
return retval;
} // get_D3D_const_array_varname
static void emit_D3D_start(Context *ctx, const char *profilestr)
{
const uint major = (uint) ctx->major_ver;
const uint minor = (uint) ctx->minor_ver;
char minor_str[16];
if (minor == 0xFF)
strcpy(minor_str, "sw");
else if (minor == 0x1) // apparently this is "vs_2_x". Weird.
strcpy(minor_str, "x");
else
snprintf(minor_str, sizeof (minor_str), "%u", (uint) minor);
output_line(ctx, "%s_%u_%s", ctx->shader_type_str, major, minor_str);
} // emit_D3D_start
static void emit_D3D_end(Context *ctx)
{
output_line(ctx, "end");
} // emit_D3D_end
static void emit_D3D_phase(Context *ctx)
{
output_line(ctx, "phase");
} // emit_D3D_phase
static void emit_D3D_finalize(Context *ctx)
{
// no-op.
} // emit_D3D_finalize
static void emit_D3D_global(Context *ctx, RegisterType regtype, int regnum)
{
// no-op.
} // emit_D3D_global
static void emit_D3D_array(Context *ctx, int base, int size)
{
// no-op.
} // emit_D3D_array
static void emit_D3D_const_array(Context *ctx, const ConstantsList *clist,
int base, int size)
{
// no-op.
} // emit_D3D_const_array
static void emit_D3D_uniform(Context *ctx, RegisterType regtype, int regnum,
int arraybase, int arraysize)
{
// no-op.
} // emit_D3D_uniform
static void emit_D3D_sampler(Context *ctx, int stage, TextureType ttype)
{
// no-op.
} // emit_D3D_sampler
static void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum,
MOJOSHADER_usage usage, int index, int wmask)
{
// no-op.
} // emit_D3D_attribute
static void emit_D3D_RESERVED(Context *ctx)
{
// do nothing; fails in the state machine.
} // emit_D3D_RESERVED
// Generic D3D opcode emitters. A list of macros generate all the entry points
// that call into these...
static char *lowercase(char *dst, const char *src)
{
int i = 0;
do
{
const char ch = src[i];
dst[i] = (((ch >= 'A') && (ch <= 'Z')) ? (ch - ('A' - 'a')) : ch);
} while (src[i++]);
return dst;
} // lowercase
static void emit_D3D_opcode_d(Context *ctx, const char *opcode)
{
const char *dst0 = make_D3D_destarg_string(ctx);
opcode = lowercase(get_scratch_buffer(ctx), opcode);
output_line(ctx, "%s%s", opcode, dst0);
} // emit_D3D_opcode_d
static void emit_D3D_opcode_s(Context *ctx, const char *opcode)
{
const char *src0 = make_D3D_srcarg_string(ctx, 0);
opcode = lowercase(get_scratch_buffer(ctx), opcode);
output_line(ctx, "%s %s", opcode, src0);
} // emit_D3D_opcode_s
static void emit_D3D_opcode_ss(Context *ctx, const char *opcode)
{
const char *src0 = make_D3D_srcarg_string(ctx, 0);
const char *src1 = make_D3D_srcarg_string(ctx, 1);
opcode = lowercase(get_scratch_buffer(ctx), opcode);
output_line(ctx, "%s %s, %s", opcode, src0, src1);
} // emit_D3D_opcode_ss
static void emit_D3D_opcode_ds(Context *ctx, const char *opcode)
{
const char *dst0 = make_D3D_destarg_string(ctx);
const char *src0 = make_D3D_srcarg_string(ctx, 0);
opcode = lowercase(get_scratch_buffer(ctx), opcode);
output_line(ctx, "%s%s, %s", opcode, dst0, src0);
} // emit_D3D_opcode_ds
static void emit_D3D_opcode_dss(Context *ctx, const char *opcode)
{
const char *dst0 = make_D3D_destarg_string(ctx);
const char *src0 = make_D3D_srcarg_string(ctx, 0);
const char *src1 = make_D3D_srcarg_string(ctx, 1);
opcode = lowercase(get_scratch_buffer(ctx), opcode);
output_line(ctx, "%s%s, %s, %s", opcode, dst0, src0, src1);
} // emit_D3D_opcode_dss
static void emit_D3D_opcode_dsss(Context *ctx, const char *opcode)
{
const char *dst0 = make_D3D_destarg_string(ctx);
const char *src0 = make_D3D_srcarg_string(ctx, 0);
const char *src1 = make_D3D_srcarg_string(ctx, 1);
const char *src2 = make_D3D_srcarg_string(ctx, 2);
opcode = lowercase(get_scratch_buffer(ctx), opcode);
output_line(ctx, "%s%s, %s, %s, %s", opcode, dst0, src0, src1, src2);
} // emit_D3D_opcode_dsss
static void emit_D3D_opcode_dssss(Context *ctx, const char *opcode)
{
const char *dst0 = make_D3D_destarg_string(ctx);
const char *src0 = make_D3D_srcarg_string(ctx, 0);
const char *src1 = make_D3D_srcarg_string(ctx, 1);
const char *src2 = make_D3D_srcarg_string(ctx, 2);
const char *src3 = make_D3D_srcarg_string(ctx, 3);
opcode = lowercase(get_scratch_buffer(ctx), opcode);
output_line(ctx,"%s%s, %s, %s, %s, %s",opcode,dst0,src0,src1,src2,src3);
} // emit_D3D_opcode_dssss
static void emit_D3D_opcode(Context *ctx, const char *opcode)
{
opcode = lowercase(get_scratch_buffer(ctx), opcode);
output_line(ctx, "%s", opcode);
} // emit_D3D_opcode_dssss
#define EMIT_D3D_OPCODE_FUNC(op) \
static void emit_D3D_##op(Context *ctx) { \
emit_D3D_opcode(ctx, #op); \
}
#define EMIT_D3D_OPCODE_D_FUNC(op) \
static void emit_D3D_##op(Context *ctx) { \
emit_D3D_opcode_d(ctx, #op); \
}
#define EMIT_D3D_OPCODE_S_FUNC(op) \
static void emit_D3D_##op(Context *ctx) { \
emit_D3D_opcode_s(ctx, #op); \
}
#define EMIT_D3D_OPCODE_SS_FUNC(op) \
static void emit_D3D_##op(Context *ctx) { \
emit_D3D_opcode_ss(ctx, #op); \
}
#define EMIT_D3D_OPCODE_DS_FUNC(op) \
static void emit_D3D_##op(Context *ctx) { \
emit_D3D_opcode_ds(ctx, #op); \
}
#define EMIT_D3D_OPCODE_DSS_FUNC(op) \
static void emit_D3D_##op(Context *ctx) { \
emit_D3D_opcode_dss(ctx, #op); \
}
#define EMIT_D3D_OPCODE_DSSS_FUNC(op) \
static void emit_D3D_##op(Context *ctx) { \
emit_D3D_opcode_dsss(ctx, #op); \
}
#define EMIT_D3D_OPCODE_DSSSS_FUNC(op) \
static void emit_D3D_##op(Context *ctx) { \
emit_D3D_opcode_dssss(ctx, #op); \
}
EMIT_D3D_OPCODE_FUNC(NOP)
EMIT_D3D_OPCODE_DS_FUNC(MOV)
EMIT_D3D_OPCODE_DSS_FUNC(ADD)
EMIT_D3D_OPCODE_DSS_FUNC(SUB)
EMIT_D3D_OPCODE_DSSS_FUNC(MAD)
EMIT_D3D_OPCODE_DSS_FUNC(MUL)
EMIT_D3D_OPCODE_DS_FUNC(RCP)
EMIT_D3D_OPCODE_DS_FUNC(RSQ)
EMIT_D3D_OPCODE_DSS_FUNC(DP3)
EMIT_D3D_OPCODE_DSS_FUNC(DP4)
EMIT_D3D_OPCODE_DSS_FUNC(MIN)
EMIT_D3D_OPCODE_DSS_FUNC(MAX)
EMIT_D3D_OPCODE_DSS_FUNC(SLT)
EMIT_D3D_OPCODE_DSS_FUNC(SGE)
EMIT_D3D_OPCODE_DS_FUNC(EXP)
EMIT_D3D_OPCODE_DS_FUNC(LOG)
EMIT_D3D_OPCODE_DS_FUNC(LIT)
EMIT_D3D_OPCODE_DSS_FUNC(DST)
EMIT_D3D_OPCODE_DSSS_FUNC(LRP)
EMIT_D3D_OPCODE_DS_FUNC(FRC)
EMIT_D3D_OPCODE_DSS_FUNC(M4X4)
EMIT_D3D_OPCODE_DSS_FUNC(M4X3)
EMIT_D3D_OPCODE_DSS_FUNC(M3X4)
EMIT_D3D_OPCODE_DSS_FUNC(M3X3)
EMIT_D3D_OPCODE_DSS_FUNC(M3X2)
EMIT_D3D_OPCODE_S_FUNC(CALL)
EMIT_D3D_OPCODE_SS_FUNC(CALLNZ)
EMIT_D3D_OPCODE_SS_FUNC(LOOP)
EMIT_D3D_OPCODE_FUNC(RET)
EMIT_D3D_OPCODE_FUNC(ENDLOOP)
EMIT_D3D_OPCODE_S_FUNC(LABEL)
EMIT_D3D_OPCODE_DSS_FUNC(POW)
EMIT_D3D_OPCODE_DSS_FUNC(CRS)
EMIT_D3D_OPCODE_DSSS_FUNC(SGN)
EMIT_D3D_OPCODE_DS_FUNC(ABS)
EMIT_D3D_OPCODE_DS_FUNC(NRM)
EMIT_D3D_OPCODE_S_FUNC(REP)
EMIT_D3D_OPCODE_FUNC(ENDREP)
EMIT_D3D_OPCODE_S_FUNC(IF)
EMIT_D3D_OPCODE_FUNC(ELSE)
EMIT_D3D_OPCODE_FUNC(ENDIF)
EMIT_D3D_OPCODE_FUNC(BREAK)
EMIT_D3D_OPCODE_DS_FUNC(MOVA)
EMIT_D3D_OPCODE_D_FUNC(TEXKILL)
EMIT_D3D_OPCODE_DS_FUNC(TEXBEM)
EMIT_D3D_OPCODE_DS_FUNC(TEXBEML)
EMIT_D3D_OPCODE_DS_FUNC(TEXREG2AR)
EMIT_D3D_OPCODE_DS_FUNC(TEXREG2GB)
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2PAD)
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2TEX)
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3PAD)
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3TEX)
EMIT_D3D_OPCODE_DSS_FUNC(TEXM3X3SPEC)
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3VSPEC)
EMIT_D3D_OPCODE_DS_FUNC(EXPP)
EMIT_D3D_OPCODE_DS_FUNC(LOGP)
EMIT_D3D_OPCODE_DSSS_FUNC(CND)
EMIT_D3D_OPCODE_DS_FUNC(TEXREG2RGB)
EMIT_D3D_OPCODE_DS_FUNC(TEXDP3TEX)
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2DEPTH)
EMIT_D3D_OPCODE_DS_FUNC(TEXDP3)
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3)
EMIT_D3D_OPCODE_D_FUNC(TEXDEPTH)
EMIT_D3D_OPCODE_DSSS_FUNC(CMP)
EMIT_D3D_OPCODE_DSS_FUNC(BEM)
EMIT_D3D_OPCODE_DSSS_FUNC(DP2ADD)
EMIT_D3D_OPCODE_DS_FUNC(DSX)
EMIT_D3D_OPCODE_DS_FUNC(DSY)
EMIT_D3D_OPCODE_DSSSS_FUNC(TEXLDD)
EMIT_D3D_OPCODE_DSS_FUNC(TEXLDL)
EMIT_D3D_OPCODE_S_FUNC(BREAKP)
// special cases for comparison opcodes...
static const char *get_D3D_comparison_string(Context *ctx)
{
static const char *comps[] = {
"", "_gt", "_eq", "_ge", "_lt", "_ne", "_le"
};
if (ctx->instruction_controls >= STATICARRAYLEN(comps))
{
fail(ctx, "unknown comparison control");
return "";
} // if
return comps[ctx->instruction_controls];
} // get_D3D_comparison_string
static void emit_D3D_BREAKC(Context *ctx)
{
char op[16];
snprintf(op, sizeof (op), "break%s", get_D3D_comparison_string(ctx));
emit_D3D_opcode_ss(ctx, op);
} // emit_D3D_BREAKC
static void emit_D3D_IFC(Context *ctx)
{
char op[16];
snprintf(op, sizeof (op), "if%s", get_D3D_comparison_string(ctx));
emit_D3D_opcode_ss(ctx, op);
} // emit_D3D_IFC
static void emit_D3D_SETP(Context *ctx)
{
char op[16];
snprintf(op, sizeof (op), "setp%s", get_D3D_comparison_string(ctx));
emit_D3D_opcode_dss(ctx, op);
} // emit_D3D_SETP
static void emit_D3D_DEF(Context *ctx)
{
const char *dst0 = make_D3D_destarg_string(ctx);
const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
char val0[32];
char val1[32];
char val2[32];
char val3[32];
floatstr(ctx, val0, sizeof (val0), val[0], 0);
floatstr(ctx, val1, sizeof (val1), val[1], 0);
floatstr(ctx, val2, sizeof (val2), val[2], 0);
floatstr(ctx, val3, sizeof (val3), val[3], 0);
output_line(ctx, "def%s, %s, %s, %s, %s", dst0, val0, val1, val2, val3);
} // emit_D3D_DEF
static void emit_D3D_DEFI(Context *ctx)
{
const char *dst0 = make_D3D_destarg_string(ctx);
const int32 *x = (const int32 *) ctx->dwords;
output_line(ctx, "defi%s, %d, %d, %d, %d", dst0,
(int) x[0], (int) x[1], (int) x[2], (int) x[3]);
} // emit_D3D_DEFI
static void emit_D3D_DEFB(Context *ctx)
{
const char *dst0 = make_D3D_destarg_string(ctx);
output_line(ctx, "defb%s, %s", dst0, ctx->dwords[0] ? "true" : "false");
} // emit_D3D_DEFB
static void emit_D3D_DCL(Context *ctx)
{
const char *dst0 = make_D3D_destarg_string(ctx);
const DestArgInfo *arg = &ctx->dest_arg;
const char *usage_str = "";
char index_str[16] = { '\0' };
if (arg->regtype == REG_TYPE_SAMPLER)
{
switch ((const TextureType) ctx->dwords[0])
{
case TEXTURE_TYPE_2D: usage_str = "_2d"; break;
case TEXTURE_TYPE_CUBE: usage_str = "_cube"; break;
case TEXTURE_TYPE_VOLUME: usage_str = "_volume"; break;
default: fail(ctx, "unknown sampler texture type"); return;
} // switch
} // if
else if (arg->regtype == REG_TYPE_MISCTYPE)
{
switch ((const MiscTypeType) arg->regnum)
{
case MISCTYPE_TYPE_POSITION:
case MISCTYPE_TYPE_FACE:
usage_str = ""; // just become "dcl vFace" or whatever.
break;
default: fail(ctx, "unknown misc register type"); return;
} // switch
} // else if
else
{
const uint32 usage = ctx->dwords[0];
const uint32 index = ctx->dwords[1];
usage_str = usagestrs[usage];
if (index != 0)
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
} // else
output_line(ctx, "dcl%s%s%s", usage_str, index_str, dst0);
} // emit_D3D_DCL
static void emit_D3D_TEXCRD(Context *ctx)
{
// this opcode looks and acts differently depending on the shader model.
if (shader_version_atleast(ctx, 1, 4))
emit_D3D_opcode_ds(ctx, "texcrd");
else
emit_D3D_opcode_d(ctx, "texcoord");
} // emit_D3D_TEXCOORD
static void emit_D3D_TEXLD(Context *ctx)
{
// this opcode looks and acts differently depending on the shader model.
if (shader_version_atleast(ctx, 2, 0))
emit_D3D_opcode_dss(ctx, "texld");
else if (shader_version_atleast(ctx, 1, 4))
emit_D3D_opcode_ds(ctx, "texld");
else
emit_D3D_opcode_d(ctx, "tex");
} // emit_D3D_TEXLD
static void emit_D3D_SINCOS(Context *ctx)
{
// this opcode needs extra registers for sm2 and lower.
if (!shader_version_atleast(ctx, 3, 0))
emit_D3D_opcode_dsss(ctx, "sincos");
else
emit_D3D_opcode_ds(ctx, "sincos");
} // emit_D3D_SINCOS
#undef EMIT_D3D_OPCODE_FUNC
#undef EMIT_D3D_OPCODE_D_FUNC
#undef EMIT_D3D_OPCODE_S_FUNC
#undef EMIT_D3D_OPCODE_SS_FUNC
#undef EMIT_D3D_OPCODE_DS_FUNC
#undef EMIT_D3D_OPCODE_DSS_FUNC
#undef EMIT_D3D_OPCODE_DSSS_FUNC
#undef EMIT_D3D_OPCODE_DSSSS_FUNC
#endif // SUPPORT_PROFILE_D3D
#if !SUPPORT_PROFILE_PASSTHROUGH
#define PROFILE_EMITTER_PASSTHROUGH(op)
#else
#undef AT_LEAST_ONE_PROFILE
#define AT_LEAST_ONE_PROFILE 1
#define PROFILE_EMITTER_PASSTHROUGH(op) emit_PASSTHROUGH_##op,
static void emit_PASSTHROUGH_start(Context *ctx, const char *profilestr)
{
// just copy the whole token stream and make all other emitters no-ops.
ctx->output_len = (ctx->tokencount * sizeof (uint32));
ctx->output_bytes = (uint8 *) Malloc(ctx, ctx->output_len);
if (ctx->output_bytes != NULL)
memcpy(ctx->output_bytes, ctx->tokens, ctx->output_len);
} // emit_PASSTHROUGH_start
static void emit_PASSTHROUGH_end(Context *ctx)
{
// no-op in this profile.
} // emit_PASSTHROUGH_end
static void emit_PASSTHROUGH_phase(Context *ctx) {}
static void emit_PASSTHROUGH_finalize(Context *ctx) {}
static void emit_PASSTHROUGH_global(Context *ctx, RegisterType t, int n) {}
static void emit_PASSTHROUGH_array(Context *ctx, int base, int size) {}
static void emit_PASSTHROUGH_sampler(Context *ctx, int s, TextureType ttype) {}
static void emit_PASSTHROUGH_const_array(Context *ctx, const ConstantsList *c,
int base, int size) {}
static void emit_PASSTHROUGH_uniform(Context *ctx, RegisterType t, int n,
int arraybase, int arraysize) {}
static void emit_PASSTHROUGH_attribute(Context *ctx, RegisterType t, int n,
MOJOSHADER_usage u, int i, int w) {}
static const char *get_PASSTHROUGH_varname(Context *ctx, RegisterType rt, int regnum)
{
char regnum_str[16];
const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
regnum_str, sizeof (regnum_str));
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s%s", regtype_str, regnum_str);
return retval;
} // get_PASSTHROUGH_varname
static const char *get_PASSTHROUGH_const_array_varname(Context *ctx, int base, int size)
{
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "c_array_%d_%d", base, size);
return retval;
} // get_PASSTHROUGH_const_array_varname
#define EMIT_PASSTHROUGH_OPCODE_FUNC(op) \
static void emit_PASSTHROUGH_##op(Context *ctx) {}
EMIT_PASSTHROUGH_OPCODE_FUNC(RESERVED)
EMIT_PASSTHROUGH_OPCODE_FUNC(NOP)
EMIT_PASSTHROUGH_OPCODE_FUNC(MOV)
EMIT_PASSTHROUGH_OPCODE_FUNC(ADD)
EMIT_PASSTHROUGH_OPCODE_FUNC(SUB)
EMIT_PASSTHROUGH_OPCODE_FUNC(MAD)
EMIT_PASSTHROUGH_OPCODE_FUNC(MUL)
EMIT_PASSTHROUGH_OPCODE_FUNC(RCP)
EMIT_PASSTHROUGH_OPCODE_FUNC(RSQ)
EMIT_PASSTHROUGH_OPCODE_FUNC(DP3)
EMIT_PASSTHROUGH_OPCODE_FUNC(DP4)
EMIT_PASSTHROUGH_OPCODE_FUNC(MIN)
EMIT_PASSTHROUGH_OPCODE_FUNC(MAX)
EMIT_PASSTHROUGH_OPCODE_FUNC(SLT)
EMIT_PASSTHROUGH_OPCODE_FUNC(SGE)
EMIT_PASSTHROUGH_OPCODE_FUNC(EXP)
EMIT_PASSTHROUGH_OPCODE_FUNC(LOG)
EMIT_PASSTHROUGH_OPCODE_FUNC(LIT)
EMIT_PASSTHROUGH_OPCODE_FUNC(DST)
EMIT_PASSTHROUGH_OPCODE_FUNC(LRP)
EMIT_PASSTHROUGH_OPCODE_FUNC(FRC)
EMIT_PASSTHROUGH_OPCODE_FUNC(M4X4)
EMIT_PASSTHROUGH_OPCODE_FUNC(M4X3)
EMIT_PASSTHROUGH_OPCODE_FUNC(M3X4)
EMIT_PASSTHROUGH_OPCODE_FUNC(M3X3)
EMIT_PASSTHROUGH_OPCODE_FUNC(M3X2)
EMIT_PASSTHROUGH_OPCODE_FUNC(CALL)
EMIT_PASSTHROUGH_OPCODE_FUNC(CALLNZ)
EMIT_PASSTHROUGH_OPCODE_FUNC(LOOP)
EMIT_PASSTHROUGH_OPCODE_FUNC(RET)
EMIT_PASSTHROUGH_OPCODE_FUNC(ENDLOOP)
EMIT_PASSTHROUGH_OPCODE_FUNC(LABEL)
EMIT_PASSTHROUGH_OPCODE_FUNC(POW)
EMIT_PASSTHROUGH_OPCODE_FUNC(CRS)
EMIT_PASSTHROUGH_OPCODE_FUNC(SGN)
EMIT_PASSTHROUGH_OPCODE_FUNC(ABS)
EMIT_PASSTHROUGH_OPCODE_FUNC(NRM)
EMIT_PASSTHROUGH_OPCODE_FUNC(SINCOS)
EMIT_PASSTHROUGH_OPCODE_FUNC(REP)
EMIT_PASSTHROUGH_OPCODE_FUNC(ENDREP)
EMIT_PASSTHROUGH_OPCODE_FUNC(IF)
EMIT_PASSTHROUGH_OPCODE_FUNC(ELSE)
EMIT_PASSTHROUGH_OPCODE_FUNC(ENDIF)
EMIT_PASSTHROUGH_OPCODE_FUNC(BREAK)
EMIT_PASSTHROUGH_OPCODE_FUNC(MOVA)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXKILL)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXBEM)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXBEML)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXREG2AR)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXREG2GB)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXM3X2PAD)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXM3X2TEX)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXM3X3PAD)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXM3X3TEX)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXM3X3SPEC)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXM3X3VSPEC)
EMIT_PASSTHROUGH_OPCODE_FUNC(EXPP)
EMIT_PASSTHROUGH_OPCODE_FUNC(LOGP)
EMIT_PASSTHROUGH_OPCODE_FUNC(CND)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXREG2RGB)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXDP3TEX)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXM3X2DEPTH)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXDP3)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXM3X3)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXDEPTH)
EMIT_PASSTHROUGH_OPCODE_FUNC(CMP)
EMIT_PASSTHROUGH_OPCODE_FUNC(BEM)
EMIT_PASSTHROUGH_OPCODE_FUNC(DP2ADD)
EMIT_PASSTHROUGH_OPCODE_FUNC(DSX)
EMIT_PASSTHROUGH_OPCODE_FUNC(DSY)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXLDD)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXLDL)
EMIT_PASSTHROUGH_OPCODE_FUNC(BREAKP)
EMIT_PASSTHROUGH_OPCODE_FUNC(BREAKC)
EMIT_PASSTHROUGH_OPCODE_FUNC(IFC)
EMIT_PASSTHROUGH_OPCODE_FUNC(SETP)
EMIT_PASSTHROUGH_OPCODE_FUNC(DEF)
EMIT_PASSTHROUGH_OPCODE_FUNC(DEFI)
EMIT_PASSTHROUGH_OPCODE_FUNC(DEFB)
EMIT_PASSTHROUGH_OPCODE_FUNC(DCL)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXCRD)
EMIT_PASSTHROUGH_OPCODE_FUNC(TEXLD)
#undef EMIT_PASSTHROUGH_OPCODE_FUNC
#endif // SUPPORT_PROFILE_PASSTHROUGH
#if !SUPPORT_PROFILE_GLSL
#define PROFILE_EMITTER_GLSL(op)
#else
#undef AT_LEAST_ONE_PROFILE
#define AT_LEAST_ONE_PROFILE 1
#define PROFILE_EMITTER_GLSL(op) emit_GLSL_##op,
#define EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \
static void emit_GLSL_##op(Context *ctx) { \
fail(ctx, #op " unimplemented in glsl profile"); \
}
const char *get_GLSL_register_string(Context *ctx, RegisterType regtype,
int regnum, char *regnum_str, int len)
{
const char *retval = get_D3D_register_string(ctx, regtype, regnum,
regnum_str, len);
if (retval == NULL)
{
fail(ctx, "Unknown D3D register type.");
return "";
} // if
return retval;
} // get_GLSL_register_string
static const char *get_GLSL_varname(Context *ctx, RegisterType rt, int regnum)
{
char regnum_str[16];
const char *regtype_str = get_GLSL_register_string(ctx, rt, regnum,
regnum_str, sizeof (regnum_str));
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s_%s%s", ctx->shader_type_str,
regtype_str, regnum_str);
return retval;
} // get_GLSL_varname
static const char *get_GLSL_const_array_varname(Context *ctx, int base, int size)
{
const char *shader_type_str = ctx->shader_type_str;
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s_const_array_%d_%d",
shader_type_str, base, size);
return retval;
} // get_GLSL_const_array_varname
static const char *get_GLSL_destarg_varname(Context *ctx)
{
const DestArgInfo *arg = &ctx->dest_arg;
return get_GLSL_varname(ctx, arg->regtype, arg->regnum);
} // get_GLSL_destarg_varname
static const char *get_GLSL_srcarg_varname(Context *ctx, int idx)
{
if (idx >= STATICARRAYLEN(ctx->source_args))
{
fail(ctx, "Too many source args");
return "";
} // if
const SourceArgInfo *arg = &ctx->source_args[idx];
return get_GLSL_varname(ctx, arg->regtype, arg->regnum);
} // get_GLSL_srcarg_varname
static const char *make_GLSL_destarg_assign(Context *, const char *, ...) ISPRINTF(2,3);
static const char *make_GLSL_destarg_assign(Context *ctx, const char *fmt, ...)
{
int need_parens = 0;
const DestArgInfo *arg = &ctx->dest_arg;
if (arg->writemask == 0)
return ""; // no writemask? It's a no-op.
char clampbuf[32] = { '\0' };
const char *clampleft = "";
const char *clampright = "";
if (arg->result_mod & MOD_SATURATE)
{
const int vecsize = vecsize_from_writemask(arg->writemask);
clampleft = "clamp(";
if (vecsize == 1)
clampright = ", 0.0, 1.0)";
else
{
snprintf(clampbuf, sizeof (clampbuf),
", vec%d(0.0), vec%d(1.0))", vecsize, vecsize);
clampright = clampbuf;
} // else
} // if
// MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
if (arg->result_mod & MOD_CENTROID)
{
fail(ctx, "MOD_CENTROID unsupported"); // !!! FIXME
return "";
} // if
if (ctx->predicated)
{
fail(ctx, "predicated destinations unsupported"); // !!! FIXME
return "";
} // if
char *operation = get_scratch_buffer(ctx);
va_list ap;
va_start(ap, fmt);
const int len = vsnprintf(operation, SCRATCH_BUFFER_SIZE, fmt, ap);
va_end(ap);
if (len >= SCRATCH_BUFFER_SIZE)
{
fail(ctx, "operation string too large"); // I'm lazy. :P
return "";
} // if
const char *result_shift_str = "";
switch (arg->result_shift)
{
case 0x1: result_shift_str = " * 2.0"; break;
case 0x2: result_shift_str = " * 4.0"; break;
case 0x3: result_shift_str = " * 8.0"; break;
case 0xD: result_shift_str = " / 8.0"; break;
case 0xE: result_shift_str = " / 4.0"; break;
case 0xF: result_shift_str = " / 2.0"; break;
} // switch
need_parens |= (result_shift_str[0] != '\0');
char regnum_str[16];
const char *regtype_str = get_GLSL_register_string(ctx, arg->regtype,
arg->regnum, regnum_str,
sizeof (regnum_str));
char writemask_str[6];
int i = 0;
const int scalar = scalar_register(arg->regtype, arg->regnum);
if (!scalar && !writemask_xyzw(arg->writemask))
{
writemask_str[i++] = '.';
if (arg->writemask0) writemask_str[i++] = 'x';
if (arg->writemask1) writemask_str[i++] = 'y';
if (arg->writemask2) writemask_str[i++] = 'z';
if (arg->writemask3) writemask_str[i++] = 'w';
} // if
writemask_str[i] = '\0';
assert(i < sizeof (writemask_str));
const char *leftparen = (need_parens) ? "(" : "";
const char *rightparen = (need_parens) ? ")" : "";
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s_%s%s%s = %s%s%s%s%s%s;",
ctx->shader_type_str, regtype_str, regnum_str, writemask_str,
clampleft, leftparen, operation, rightparen, result_shift_str,
clampright);
// !!! FIXME: make sure the scratch buffer was large enough.
return retval;
} // make_GLSL_destarg_assign
static char *make_GLSL_swizzle_string(char *swiz_str, const size_t strsize,
const int swizzle, const int writemask)
{
size_t i = 0;
if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) )
{
const int writemask0 = (writemask >> 0) & 0x1;
const int writemask1 = (writemask >> 1) & 0x1;
const int writemask2 = (writemask >> 2) & 0x1;
const int writemask3 = (writemask >> 3) & 0x1;
const int swizzle_x = (swizzle >> 0) & 0x3;
const int swizzle_y = (swizzle >> 2) & 0x3;
const int swizzle_z = (swizzle >> 4) & 0x3;
const int swizzle_w = (swizzle >> 6) & 0x3;
swiz_str[i++] = '.';
if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x];
if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y];
if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z];
if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w];
} // if
assert(i < strsize);
swiz_str[i] = '\0';
return swiz_str;
} // make_GLSL_swizzle_string
static char *make_GLSL_srcarg_string(Context *ctx, const int idx,
const int writemask)
{
if (idx >= STATICARRAYLEN(ctx->source_args))
{
fail(ctx, "Too many source args");
return "";
} // if
// !!! FIXME: not right.
const SourceArgInfo *arg = &ctx->source_args[idx];
const char *premod_str = "";
const char *postmod_str = "";
switch (arg->src_mod)
{
case SRCMOD_NEGATE:
premod_str = "-";
break;
case SRCMOD_BIASNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_BIAS:
fail(ctx, "SRCMOD_BIAS unsupported"); return ""; // !!! FIXME
postmod_str = "_bias";
break;
case SRCMOD_SIGNNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_SIGN:
fail(ctx, "SRCMOD_SIGN unsupported"); return ""; // !!! FIXME
postmod_str = "_bx2";
break;
case SRCMOD_COMPLEMENT:
fail(ctx, "SRCMOD_COMPLEMENT unsupported"); return ""; // !!! FIXME (need to handle vecsize)
premod_str = "(1.0 - (";
postmod_str = "))";
break;
case SRCMOD_X2NEGATE:
fail(ctx, "SRCMOD_X2NEGATE unsupported"); return ""; // !!! FIXME (need to handle vecsize)
premod_str = "-(";
postmod_str = " * 2.0)";
break;
case SRCMOD_X2:
fail(ctx, "SRCMOD_X2 unsupported"); return ""; // !!! FIXME (need to handle vecsize)
premod_str = "(";
postmod_str = " * 2.0)";
break;
case SRCMOD_DZ:
fail(ctx, "SRCMOD_DZ unsupported"); return ""; // !!! FIXME
postmod_str = "_dz";
break;
case SRCMOD_DW:
fail(ctx, "SRCMOD_DW unsupported"); return ""; // !!! FIXME
postmod_str = "_dw";
break;
case SRCMOD_ABSNEGATE:
premod_str = "-abs(";
postmod_str = ")";
break;
case SRCMOD_ABS:
premod_str = "abs(";
postmod_str = ")";
break;
case SRCMOD_NOT:
premod_str = "!";
break;
case SRCMOD_NONE:
case SRCMOD_TOTAL:
break; // stop compiler whining.
} // switch
const char *regtype_str = NULL;
if (!arg->relative)
regtype_str = get_GLSL_varname(ctx, arg->regtype, arg->regnum);
const char *rel_lbracket = "";
char rel_offset[32] = { '\0' };
const char *rel_rbracket = "";
char rel_swizzle[4] = { '\0' };
const char *rel_regtype_str = "";
if (arg->relative)
{
const int arrayidx = arg->relative_array->index;
const int arraysize = arg->relative_array->count;
const int offset = arg->regnum - arrayidx;
assert(offset >= 0);
regtype_str = get_GLSL_const_array_varname(ctx, arrayidx, arraysize);
rel_lbracket = "[";
if (offset != 0)
snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset);
rel_regtype_str = get_GLSL_varname(ctx, arg->relative_regtype,
arg->relative_regnum);
rel_swizzle[0] = '.';
rel_swizzle[1] = swizzle_channels[arg->relative_component];
rel_swizzle[2] = '\0';
rel_rbracket = "]";
} // if
char swiz_str[6] = { '\0' };
if (!scalar_register(arg->regtype, arg->regnum))
{
make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
arg->swizzle, writemask);
} // if
if (regtype_str == NULL)
{
fail(ctx, "Unknown source register type.");
return "";
} // if
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s%s%s%s%s%s%s%s%s",
premod_str, regtype_str, rel_lbracket, rel_offset,
rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str,
postmod_str);
// !!! FIXME: make sure the scratch buffer was large enough.
return retval;
} // make_GLSL_srcarg_string
static inline char *make_GLSL_srcarg_string_x(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string(ctx, idx, (1 << 0));
} // make_GLSL_srcarg_string_x
static inline char *make_GLSL_srcarg_string_y(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string(ctx, idx, (1 << 1));
} // make_GLSL_srcarg_string_y
static inline char *make_GLSL_srcarg_string_z(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string(ctx, idx, (1 << 2));
} // make_GLSL_srcarg_string_z
static inline char *make_GLSL_srcarg_string_w(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string(ctx, idx, (1 << 3));
} // make_GLSL_srcarg_string_w
static inline char *make_GLSL_srcarg_string_scalar(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string_x(ctx, idx);
} // make_GLSL_srcarg_string_scalar
static inline char *make_GLSL_srcarg_string_full(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string(ctx, idx, 0xF);
} // make_GLSL_srcarg_string_scalar
static inline char *make_GLSL_srcarg_string_masked(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string(ctx, idx, ctx->dest_arg.writemask);
} // make_GLSL_srcarg_string_scalar
static inline char *make_GLSL_srcarg_string_vec3(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string(ctx, idx, 0x7);
} // make_GLSL_srcarg_string_vec3
static inline char *make_GLSL_srcarg_string_vec2(Context *ctx, const int idx)
{
return make_GLSL_srcarg_string(ctx, idx, 0x3);
} // make_GLSL_srcarg_string_vec2
// special cases for comparison opcodes...
static const char *get_GLSL_comparison_string_scalar(Context *ctx)
{
static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" };
if (ctx->instruction_controls >= STATICARRAYLEN(comps))
{
fail(ctx, "unknown comparison control");
return "";
} // if
return comps[ctx->instruction_controls];
} // get_GLSL_comparison_string_scalar
static const char *get_GLSL_comparison_string_vector(Context *ctx)
{
static const char *comps[] = {
"", "greaterThan", "equal", "greaterThanEqual", "lessThan",
"notEqual", "lessThanEqual"
};
if (ctx->instruction_controls >= STATICARRAYLEN(comps))
{
fail(ctx, "unknown comparison control");
return "";
} // if
return comps[ctx->instruction_controls];
} // get_GLSL_comparison_string_vector
static void emit_GLSL_start(Context *ctx, const char *profilestr)
{
if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx))
{
failf(ctx, "Shader type %u unsupported in this profile.",
(uint) ctx->shader_type);
return;
} // if
ctx->output = &ctx->globals;
if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL) == 0)
/* no-op. */ ;
else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL120) == 0)
{
ctx->support_glsl120 = 1;
output_line(ctx, "#version 120");
} // else if
else
{
failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
return;
} // else
ctx->output = &ctx->mainline_intro;
output_line(ctx, "void main()");
output_line(ctx, "{");
ctx->output = &ctx->mainline;
ctx->indent++;
} // emit_GLSL_start
static void emit_GLSL_RET(Context *ctx);
static void emit_GLSL_end(Context *ctx)
{
// force a RET opcode if we're at the end of the stream without one.
if (ctx->previous_opcode != OPCODE_RET)
emit_GLSL_RET(ctx);
} // emit_GLSL_end
static void emit_GLSL_phase(Context *ctx)
{
// no-op in GLSL.
} // emit_GLSL_phase
static void emit_GLSL_finalize(Context *ctx)
{
const RegisterList *reg;
// throw some blank lines around to make source more readable.
push_output(ctx, &ctx->globals);
output_blank_line(ctx);
pop_output(ctx);
push_output(ctx, &ctx->mainline_intro);
ctx->indent++;
// Make sure this is always set, moved from our generic attribute.
reg = declared_attribute(ctx, MOJOSHADER_USAGE_POSITION, 0);
if (reg != NULL)
{
#if 0 // !!! FIXME: probably not necessary?
// !!! FIXME: only emit if shader didn't definitely set gl_Position.
output_line(ctx, "gl_Position = %s;",
get_GLSL_varname(ctx, reg->regtype, reg->regnum));
#endif
} // if
ctx->indent--;
pop_output(ctx);
} // emit_GLSL_finalize
static void emit_GLSL_global(Context *ctx, RegisterType regtype, int regnum)
{
const char *varname = get_GLSL_varname(ctx, regtype, regnum);
push_output(ctx, &ctx->globals);
switch (regtype)
{
case REG_TYPE_ADDRESS:
output_line(ctx, "ivec4 %s;", varname);
break;
case REG_TYPE_PREDICATE:
output_line(ctx, "bvec4 %s;", varname);
break;
case REG_TYPE_TEMP:
output_line(ctx, "vec4 %s;", varname);
break;
case REG_TYPE_LOOP:
break; // no-op. We declare these in for loops at the moment.
case REG_TYPE_LABEL:
break; // no-op. If we see it here, it means we optimized it out.
default:
fail(ctx, "BUG: we used a register we don't know how to define.");
break;
} // switch
pop_output(ctx);
} // emit_GLSL_global
static void emit_GLSL_array(Context *ctx, int base, int size)
{
const char *varname = get_GLSL_const_array_varname(ctx, base, size);
push_output(ctx, &ctx->globals);
output_line(ctx, "uniform vec4 %s[%d];", varname, size);
pop_output(ctx);
} // emit_GLSL_array
static void emit_GLSL_const_array(Context *ctx, const ConstantsList *clist,
int base, int size)
{
const char *varname = get_GLSL_const_array_varname(ctx, base, size);
const char *cstr = NULL;
const int origscratch = ctx->scratchidx;
int i;
#if 0
// !!! FIXME: fails on Nvidia's and Apple's GL, even with #version 120.
// !!! FIXME: (the 1.20 spec says it should work, though, I think...)
if (ctx->support_glsl120)
{
// GLSL 1.20 can do constant arrays.
push_output(ctx, &ctx->globals);
output_line(ctx, "const vec4 %s[%d] = vec4[%d](", varname, size, size);
ctx->indent++;
for (i = 0; i < size; i++)
{
while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
clist = clist->next;
assert(clist->constant.index == (base + i));
char val0[32];
char val1[32];
char val2[32];
char val3[32];
floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
output_line(ctx, "vec4(%s, %s, %s, %s)%s", val0, val1, val2, val3,
(i < (size-1)) ? "," : "");
ctx->scratchidx = origscratch;
clist = clist->next;
} // for
ctx->indent--;
output_line(ctx, ");");
pop_output(ctx);
} // if
else
#endif
{
// stock GLSL 1.0 can't do constant arrays, so make a global array
// and assign all entries at the start of the mainline...
push_output(ctx, &ctx->globals);
output_line(ctx, "vec4 %s[%d];", varname, size);
pop_output(ctx);
push_output(ctx, &ctx->mainline_intro);
ctx->indent++;
for (i = 0; i < size; i++)
{
while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
clist = clist->next;
assert(clist->constant.index == (base + i));
cstr = get_GLSL_varname(ctx, REG_TYPE_CONST, clist->constant.index);
output_line(ctx, "%s[%d] = %s;", varname, i, cstr);
clist = clist->next;
ctx->scratchidx = origscratch;
} // for
ctx->indent--;
pop_output(ctx);
} // else
} // emit_GLSL_const_array
static void emit_GLSL_uniform(Context *ctx, RegisterType regtype, int regnum,
int arraybase, int arraysize)
{
const char *varname = get_GLSL_varname(ctx, regtype, regnum);
const char *type = NULL;
switch (regtype)
{
case REG_TYPE_CONST: type = "vec4"; break;
case REG_TYPE_CONSTINT: type = "ivec4"; break;
case REG_TYPE_CONSTBOOL: type = "bvec4"; break;
default: fail(ctx, "BUG: used a uniform we don't know how to define.");
} // switch
push_output(ctx, &ctx->globals);
if (arraysize <= 0)
output_line(ctx, "uniform %s %s;", type, varname);
else
{
const int offset = (regnum - arraybase);
const char *array = get_GLSL_const_array_varname(ctx, arraybase, arraysize);
output_line(ctx, "#define %s %s[%d]", varname, array, offset);
} // else
pop_output(ctx);
} // emit_GLSL_uniform
static void emit_GLSL_sampler(Context *ctx, int stage, TextureType ttype)
{
const char *varname = get_GLSL_varname(ctx, REG_TYPE_SAMPLER, stage);
const char *type = NULL;
switch (ttype)
{
case TEXTURE_TYPE_2D: type = "sampler2D"; break;
case TEXTURE_TYPE_CUBE: type = "samplerCube"; break;
case TEXTURE_TYPE_VOLUME: type = "sampler3D"; break;
default: fail(ctx, "BUG: used a sampler we don't know how to define.");
} // switch
push_output(ctx, &ctx->globals);
output_line(ctx, "uniform %s %s;", type, varname);
pop_output(ctx);
} // emit_GLSL_sampler
static void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum,
MOJOSHADER_usage usage, int index, int wmask)
{
// !!! FIXME: this function doesn't deal with write masks at all yet!
const char *varname = get_GLSL_varname(ctx, regtype, regnum);
const char *usage_str = NULL;
const char *arrayleft = "";
const char *arrayright = "";
char index_str[16] = { '\0' };
if (index != 0) // !!! FIXME: a lot of these MUST be zero.
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
if (shader_is_vertex(ctx))
{
// pre-vs3 output registers.
// these don't ever happen in DCL opcodes, I think. Map to vs_3_*
// output registers.
if (!shader_version_atleast(ctx, 3, 0))
{
if (regtype == REG_TYPE_RASTOUT)
{
regtype = REG_TYPE_OUTPUT;
index = regnum;
switch ((const RastOutType) regnum)
{
case RASTOUT_TYPE_POSITION:
usage = MOJOSHADER_USAGE_POSITION;
break;
case RASTOUT_TYPE_FOG:
usage = MOJOSHADER_USAGE_FOG;
break;
case RASTOUT_TYPE_POINT_SIZE:
usage = MOJOSHADER_USAGE_POINTSIZE;
break;
} // switch
} // if
else if (regtype == REG_TYPE_ATTROUT)
{
regtype = REG_TYPE_OUTPUT;
usage = MOJOSHADER_USAGE_COLOR;
index = regnum;
} // else if
else if (regtype == REG_TYPE_TEXCRDOUT)
{
regtype = REG_TYPE_OUTPUT;
usage = MOJOSHADER_USAGE_TEXCOORD;
index = regnum;
} // else if
} // if
// to avoid limitations of various GL entry points for input
// attributes (glSecondaryColorPointer() can only take 3 component
// items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
// issues), we set up all inputs as generic vertex attributes, so we
// can pass data in just about any form, and ignore the built-in GLSL
// attributes like gl_SecondaryColor. Output needs to use the the
// built-ins, though, but we don't have to worry about the GL entry
// point limitations there.
if (regtype == REG_TYPE_INPUT)
{
push_output(ctx, &ctx->globals);
output_line(ctx, "attribute vec4 %s;", varname);
pop_output(ctx);
} // if
else if (regtype == REG_TYPE_OUTPUT)
{
switch (usage)
{
case MOJOSHADER_USAGE_POSITION:
usage_str = "gl_Position";
break;
case MOJOSHADER_USAGE_POINTSIZE:
usage_str = "gl_PointSize";
break;
case MOJOSHADER_USAGE_COLOR:
index_str[0] = '\0'; // no explicit number.
if (index == 0)
usage_str = "gl_FrontColor";
else if (index == 1)
usage_str = "gl_FrontSecondaryColor";
break;
case MOJOSHADER_USAGE_FOG:
usage_str = "gl_FogFragCoord";
break;
case MOJOSHADER_USAGE_TEXCOORD:
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
usage_str = "gl_TexCoord";
arrayleft = "[";
arrayright = "]";
break;
default:
// !!! FIXME: we need to deal with some more built-in varyings here.
break;
} // switch
// !!! FIXME: the #define is a little hacky, but it means we don't
// !!! FIXME: have to track these separately if this works.
push_output(ctx, &ctx->globals);
// no mapping to built-in var? Just make it a regular global, pray.
if (usage_str == NULL)
output_line(ctx, "vec4 %s;", varname);
else
{
output_line(ctx, "#define %s %s%s%s%s", varname, usage_str,
arrayleft, index_str, arrayright);
} // else
pop_output(ctx);
} // else if
else
{
fail(ctx, "unknown vertex shader attribute register");
} // else
} // if
else if (shader_is_pixel(ctx))
{
// samplers DCLs get handled in emit_GLSL_sampler().
if (regtype == REG_TYPE_COLOROUT)
usage_str = "gl_FragColor";
else if (regtype == REG_TYPE_DEPTHOUT)
usage_str = "gl_FragDepth";
// !!! FIXME: can you actualy have a texture register with COLOR usage?
else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
{
if (usage == MOJOSHADER_USAGE_TEXCOORD)
{
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
usage_str = "gl_TexCoord";
arrayleft = "[";
arrayright = "]";
} // if
else if (usage == MOJOSHADER_USAGE_COLOR)
{
index_str[0] = '\0'; // no explicit number.
if (index == 0)
usage_str = "gl_Color";
else if (index == 1)
usage_str = "gl_SecondaryColor";
else
fail(ctx, "unsupported color index");
} // else if
} // else if
else if (regtype == REG_TYPE_MISCTYPE)
{
const MiscTypeType mt = (MiscTypeType) regnum;
if (mt == MISCTYPE_TYPE_FACE)
{
push_output(ctx, &ctx->globals);
output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;",
varname);
pop_output(ctx);
} // if
else if (mt == MISCTYPE_TYPE_POSITION)
{
index_str[0] = '\0'; // no explicit number.
usage_str = "gl_FragCoord"; // !!! FIXME: is this the same coord space as D3D?
} // else if
else
{
fail(ctx, "BUG: unhandled misc register");
} // else
} // else if
else
{
fail(ctx, "unknown pixel shader attribute register");
} // else
if (usage_str != NULL)
{
push_output(ctx, &ctx->globals);
output_line(ctx, "#define %s %s%s%s%s", varname, usage_str,
arrayleft, index_str, arrayright);
pop_output(ctx);
} // if
} // else if
else
{
fail(ctx, "Unknown shader type"); // state machine should catch this.
} // else
} // emit_GLSL_attribute
static void emit_GLSL_NOP(Context *ctx)
{
// no-op is a no-op. :)
} // emit_GLSL_NOP
static void emit_GLSL_MOV(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "%s", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_MOV
static void emit_GLSL_ADD(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = make_GLSL_destarg_assign(ctx, "%s + %s", src0, src1);
output_line(ctx, "%s", code);
} // emit_GLSL_ADD
static void emit_GLSL_SUB(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = make_GLSL_destarg_assign(ctx, "%s - %s", src0, src1);
output_line(ctx, "%s", code);
} // emit_GLSL_SUB
static void emit_GLSL_MAD(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *src2 = make_GLSL_srcarg_string_masked(ctx, 2);
const char *code = make_GLSL_destarg_assign(ctx, "(%s * %s) + %s", src0, src1, src2);
output_line(ctx, "%s", code);
} // emit_GLSL_MAD
static void emit_GLSL_MUL(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = make_GLSL_destarg_assign(ctx, "%s * %s", src0, src1);
output_line(ctx, "%s", code);
} // emit_GLSL_MUL
static void emit_GLSL_RCP(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "1.0 / %s", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_RCP
static void emit_GLSL_RSQ(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "inversesqrt(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_RSQ
static void emit_GLSL_dotprod(Context *ctx, const char *src0, const char *src1,
const char *extra)
{
const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
char castleft[16] = { '\0' };
const char *castright = "";
if (vecsize != 1)
{
snprintf(castleft, sizeof (castleft), "vec%d(", vecsize);
castright = ")";
} // if
const char *code = make_GLSL_destarg_assign(ctx, "%sdot(%s, %s)%s%s",
castleft, src0, src1, extra, castright);
output_line(ctx, "%s", code);
} // emit_GLSL_dotprod
static void emit_GLSL_DP3(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_vec3(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_vec3(ctx, 1);
emit_GLSL_dotprod(ctx, src0, src1, "");
} // emit_GLSL_DP3
static void emit_GLSL_DP4(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_full(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_full(ctx, 1);
emit_GLSL_dotprod(ctx, src0, src1, "");
} // emit_GLSL_DP4
static void emit_GLSL_MIN(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = make_GLSL_destarg_assign(ctx, "min(%s, %s)", src0, src1);
output_line(ctx, "%s", code);
} // emit_GLSL_MIN
static void emit_GLSL_MAX(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = make_GLSL_destarg_assign(ctx, "max(%s, %s)", src0, src1);
output_line(ctx, "%s", code);
} // emit_GLSL_MAX
static void emit_GLSL_SLT(Context *ctx)
{
const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = NULL;
// float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants.
if (vecsize == 1)
code = make_GLSL_destarg_assign(ctx, "float(%s < %s)", src0, src1);
else
{
code = make_GLSL_destarg_assign(ctx, "vec%d(lessThan(%s, %s))",
vecsize, src0, src1);
} // else
output_line(ctx, "%s", code);
} // emit_GLSL_SLT
static void emit_GLSL_SGE(Context *ctx)
{
const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = NULL;
// float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants.
if (vecsize == 1)
code = make_GLSL_destarg_assign(ctx, "float(%s >= %s)", src0, src1);
else
{
code = make_GLSL_destarg_assign(ctx, "vec%d(greaterThanEqual(%s, %s))",
vecsize, src0, src1);
} // else
output_line(ctx, "%s", code);
} // emit_GLSL_SGE
static void emit_GLSL_EXP(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "exp2(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_EXP
static void emit_GLSL_LOG(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "log2(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_LOG
static void emit_GLSL_LIT_helper(Context *ctx)
{
const char *maxp = "127.9961f"; // value from the dx9 reference.
if (ctx->glsl_generated_lit_opcode)
return;
ctx->glsl_generated_lit_opcode = 1;
push_output(ctx, &ctx->helpers);
output_line(ctx, "const vec4 LIT(const vec4 src)");
output_line(ctx, "{"); ctx->indent++;
output_line(ctx, "const float power = clamp(src.w, -%s, %s);",maxp,maxp);
output_line(ctx, "vec4 retval(1.0, 0.0, 0.0, 1.0)");
output_line(ctx, "if (src.x > 0.0) {"); ctx->indent++;
output_line(ctx, "retval.y = src.x;");
output_line(ctx, "if (src.y > 0.0) {"); ctx->indent++;
output_line(ctx, "retval.z = pow(src.y, power);"); ctx->indent--;
output_line(ctx, "}"); ctx->indent--;
output_line(ctx, "}");
output_line(ctx, "return retval;"); ctx->indent--;
output_line(ctx, "}");
output_blank_line(ctx);
pop_output(ctx);
} // emit_GLSL_LIT_helper
static void emit_GLSL_LIT(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_full(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "LIT(%s)", src0);
output_line(ctx, "%s", code);
emit_GLSL_LIT_helper(ctx);
} // emit_GLSL_LIT
static void emit_GLSL_DST(Context *ctx)
{
// !!! FIXME: needs to take ctx->dst_arg.writemask into account.
const char *src0_y = make_GLSL_srcarg_string_y(ctx, 0);
const char *src1_y = make_GLSL_srcarg_string_y(ctx, 1);
const char *src0_z = make_GLSL_srcarg_string_z(ctx, 0);
const char *src1_w = make_GLSL_srcarg_string_w(ctx, 1);
const char *code = make_GLSL_destarg_assign(ctx,
"vec4(1.0, %s * %s, %s, %s)",
src0_y, src1_y, src0_z, src1_w);
output_line(ctx, "%s", code);
} // emit_GLSL_DST
static void emit_GLSL_LRP(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *src2 = make_GLSL_srcarg_string_masked(ctx, 2);
const char *code = make_GLSL_destarg_assign(ctx, "mix(%s, %s, %s)", src2, src1, src0);
output_line(ctx, "%s", code);
} // emit_GLSL_LRP
static void emit_GLSL_FRC(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "fract(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_FRC
static void emit_GLSL_M4X4(Context *ctx)
{
// !!! FIXME: d3d is row-major, glsl is column-major, I think.
const char *src0 = make_GLSL_srcarg_string_full(ctx, 0);
const char *row0 = make_GLSL_srcarg_string_full(ctx, 1);
const char *row1 = make_GLSL_srcarg_string_full(ctx, 2);
const char *row2 = make_GLSL_srcarg_string_full(ctx, 3);
const char *row3 = make_GLSL_srcarg_string_full(ctx, 4);
const char *code = make_GLSL_destarg_assign(ctx,
"vec4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))",
src0, row0, src0, row1, src0, row2, src0, row3);
output_line(ctx, "%s", code);
} // emit_GLSL_M4X4
static void emit_GLSL_M4X3(Context *ctx)
{
// !!! FIXME: d3d is row-major, glsl is column-major, I think.
const char *src0 = make_GLSL_srcarg_string_full(ctx, 0);
const char *row0 = make_GLSL_srcarg_string_full(ctx, 1);
const char *row1 = make_GLSL_srcarg_string_full(ctx, 2);
const char *row2 = make_GLSL_srcarg_string_full(ctx, 3);
const char *code = make_GLSL_destarg_assign(ctx,
"vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
src0, row0, src0, row1, src0, row2);
output_line(ctx, "%s", code);
} // emit_GLSL_M4X3
static void emit_GLSL_M3X4(Context *ctx)
{
// !!! FIXME: d3d is row-major, glsl is column-major, I think.
const char *src0 = make_GLSL_srcarg_string_vec3(ctx, 0);
const char *row0 = make_GLSL_srcarg_string_vec3(ctx, 1);
const char *row1 = make_GLSL_srcarg_string_vec3(ctx, 2);
const char *row2 = make_GLSL_srcarg_string_vec3(ctx, 3);
const char *row3 = make_GLSL_srcarg_string_vec3(ctx, 4);
const char *code = make_GLSL_destarg_assign(ctx,
"vec4(dot(%s, %s), dot(%s, %s), "
"dot(%s, %s), dot(%s, %s))",
src0, row0, src0, row1,
src0, row2, src0, row3);
output_line(ctx, "%s", code);
} // emit_GLSL_M3X4
static void emit_GLSL_M3X3(Context *ctx)
{
// !!! FIXME: d3d is row-major, glsl is column-major, I think.
const char *src0 = make_GLSL_srcarg_string_vec3(ctx, 0);
const char *row0 = make_GLSL_srcarg_string_vec3(ctx, 1);
const char *row1 = make_GLSL_srcarg_string_vec3(ctx, 2);
const char *row2 = make_GLSL_srcarg_string_vec3(ctx, 3);
const char *code = make_GLSL_destarg_assign(ctx,
"vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
src0, row0, src0, row1, src0, row2);
output_line(ctx, "%s", code);
} // emit_GLSL_M3X3
static void emit_GLSL_M3X2(Context *ctx)
{
// !!! FIXME: d3d is row-major, glsl is column-major, I think.
const char *src0 = make_GLSL_srcarg_string_vec3(ctx, 0);
const char *row0 = make_GLSL_srcarg_string_vec3(ctx, 1);
const char *row1 = make_GLSL_srcarg_string_vec3(ctx, 2);
const char *code = make_GLSL_destarg_assign(ctx,
"vec2(dot(%s, %s), dot(%s, %s))",
src0, row0, src0, row1);
output_line(ctx, "%s", code);
} // emit_GLSL_M3X2
static void emit_GLSL_CALL(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
if (ctx->loops > 0)
output_line(ctx, "%s(aL);", src0);
else
output_line(ctx, "%s();", src0);
} // emit_GLSL_CALL
static void emit_GLSL_CALLNZ(Context *ctx)
{
// !!! FIXME: if src1 is a constbool that's true, we can remove the
// !!! FIXME: if. If it's false, we can make this a no-op.
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
if (ctx->loops > 0)
output_line(ctx, "if (%s) { %s(aL); }", src1, src0);
else
output_line(ctx, "if (%s) { %s(); }", src1, src0);
} // emit_GLSL_CALLNZ
static void emit_GLSL_LOOP(Context *ctx)
{
// !!! FIXME: swizzle?
const char *varname = get_GLSL_srcarg_varname(ctx, 1);
assert(ctx->source_args[0].regnum == 0); // in case they add aL1 someday.
output_line(ctx, "{");
ctx->indent++;
output_line(ctx, "const int aLend = %s.x + %s.y;", varname, varname);
output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {",
varname, varname);
ctx->indent++;
} // emit_GLSL_LOOP
static void emit_GLSL_RET(Context *ctx)
{
// thankfully, the MSDN specs say a RET _has_ to end a function...no
// early returns. So if you hit one, you know you can safely close
// a high-level function.
ctx->indent--;
output_line(ctx, "}");
output_blank_line(ctx);
ctx->output = &ctx->subroutines;
} // emit_GLSL_RET
static void emit_GLSL_ENDLOOP(Context *ctx)
{
ctx->indent--;
output_line(ctx, "}");
ctx->indent--;
output_line(ctx, "}");
} // emit_GLSL_ENDLOOP
static void emit_GLSL_LABEL(Context *ctx)
{
const char *labelstr = make_GLSL_srcarg_string_masked(ctx, 0);
const int label = ctx->source_args[0].regnum;
RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
assert(ctx->output == &ctx->subroutines); // not mainline, etc.
assert(ctx->indent == 0); // we shouldn't be in the middle of a function.
// MSDN specs say CALL* has to come before the LABEL, so we know if we
// can ditch the entire function here as unused.
if (reg == NULL)
ctx->output = &ctx->ignore; // Func not used. Parse, but don't output.
// !!! FIXME: it would be nice if we could determine if a function is
// !!! FIXME: only called once and, if so, forcibly inline it.
const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
output_line(ctx, "void %s(%s)", labelstr, uses_loopreg);
output_line(ctx, "{");
ctx->indent++;
} // emit_GLSL_LABEL
static void emit_GLSL_DCL(Context *ctx)
{
// no-op. We do this in our emit_attribute() and emit_uniform().
} // emit_GLSL_DCL
static void emit_GLSL_POW(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = make_GLSL_destarg_assign(ctx, "pow(abs(%s), %s)", src0, src1);
output_line(ctx, "%s", code);
} // emit_GLSL_POW
static void emit_GLSL_CRS(Context *ctx)
{
// !!! FIXME: needs to take ctx->dst_arg.writemask into account.
const char *src0 = make_GLSL_srcarg_string_vec3(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_vec3(ctx, 1);
const char *code = make_GLSL_destarg_assign(ctx, "cross(%s, %s)", src0, src1);
output_line(ctx, "%s", code);
} // emit_GLSL_CRS
static void emit_GLSL_SGN(Context *ctx)
{
// (we don't need the temporary registers specified for the D3D opcode.)
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "sign(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_SGN
static void emit_GLSL_ABS(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "abs(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_ABS
static void emit_GLSL_NRM(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "normalize(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_NRM
static void emit_GLSL_SINCOS(Context *ctx)
{
// we don't care about the temp registers that <= sm2 demands; ignore them.
// sm2 also talks about what components are left untouched vs. undefined,
// but we just leave those all untouched with GLSL write masks (which
// would fulfill the "undefined" requirement, too).
const int mask = ctx->dest_arg.writemask;
const char *src0 = make_GLSL_srcarg_string_scalar(ctx, 0);
const char *code = NULL;
if (writemask_x(mask))
code = make_GLSL_destarg_assign(ctx, "cos(%s)", src0);
else if (writemask_y(mask))
code = make_GLSL_destarg_assign(ctx, "sin(%s)", src0);
else if (writemask_xy(mask))
code = make_GLSL_destarg_assign(ctx, "vec2(cos(%s), sin(%s))", src0, src0);
output_line(ctx, "%s", code);
} // emit_GLSL_SINCOS
static void emit_GLSL_REP(Context *ctx)
{
// !!! FIXME:
// msdn docs say legal loop values are 0 to 255. We can check DEFI values
// at parse time, but if they are pulling a value from a uniform, do
// we clamp here?
// !!! FIXME: swizzle is legal here, right?
const char *src0 = make_GLSL_srcarg_string_x(ctx, 0);
const uint rep = (uint) ctx->reps;
output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {",
rep, rep, src0, rep);
ctx->indent++;
} // emit_GLSL_REP
static void emit_GLSL_ENDREP(Context *ctx)
{
ctx->indent--;
output_line(ctx, "}");
} // emit_GLSL_ENDREP
static void emit_GLSL_IF(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_scalar(ctx, 0);
output_line(ctx, "if (%s) {", src0);
ctx->indent++;
} // emit_GLSL_IF
static void emit_GLSL_IFC(Context *ctx)
{
const char *comp = get_GLSL_comparison_string_scalar(ctx);
const char *src0 = make_GLSL_srcarg_string_scalar(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_scalar(ctx, 1);
output_line(ctx, "if (%s %s %s) {", src0, comp, src1);
ctx->indent++;
} // emit_GLSL_IFC
static void emit_GLSL_ELSE(Context *ctx)
{
ctx->indent--;
output_line(ctx, "} else {");
ctx->indent++;
} // emit_GLSL_ELSE
static void emit_GLSL_ENDIF(Context *ctx)
{
ctx->indent--;
output_line(ctx, "}");
} // emit_GLSL_ENDIF
static void emit_GLSL_BREAK(Context *ctx)
{
output_line(ctx, "break;");
} // emit_GLSL_BREAK
static void emit_GLSL_BREAKC(Context *ctx)
{
const char *comp = get_GLSL_comparison_string_scalar(ctx);
const char *src0 = make_GLSL_srcarg_string_scalar(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_scalar(ctx, 1);
output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1);
} // emit_GLSL_BREAKC
static void emit_GLSL_MOVA(Context *ctx)
{
const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
if (vecsize == 1)
{
const char *code = make_GLSL_destarg_assign(ctx,
"int(floor(abs(%s) + 0.5) * sign(%s))", src0, src0);
output_line(ctx, "%s", code);
} // if
else
{
const char *code = make_GLSL_destarg_assign(ctx,
"ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s))",
vecsize, src0, vecsize, src0);
output_line(ctx, "%s", code);
} // else
} // emit_GLSL_MOVA
static void emit_GLSL_DEFB(Context *ctx)
{
const char *varname = get_GLSL_destarg_varname(ctx);
push_output(ctx, &ctx->globals);
output_line(ctx, "const bool %s = %s;",
varname, ctx->dwords[0] ? "true" : "false");
pop_output(ctx);
} // emit_GLSL_DEFB
static void emit_GLSL_DEFI(Context *ctx)
{
const char *varname = get_GLSL_destarg_varname(ctx);
const int32 *x = (const int32 *) ctx->dwords;
push_output(ctx, &ctx->globals);
output_line(ctx, "const ivec4 %s = ivec4(%d, %d, %d, %d);",
varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
pop_output(ctx);
} // emit_GLSL_DEFI
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
static void emit_GLSL_TEXKILL(Context *ctx)
{
// !!! FIXME: can texkill swizzle?
const char *dst = get_GLSL_destarg_varname(ctx);
output_line(ctx, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;", dst);
} // emit_GLSL_TEXKILL
static void emit_GLSL_TEXLD(Context *ctx)
{
// !!! FIXME: do non-RGBA textures map to same default values as D3D?
if (!shader_version_atleast(ctx, 2, 0))
{
// ps_1_0 and ps_1_4 are both different, too!
fail(ctx, "TEXLD <= Shader Model 2.0 unimplemented."); // !!! FIXME
return;
} // if
else
{
const SourceArgInfo *samp_arg = &ctx->source_args[1];
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
samp_arg->regnum);
const char *funcname = NULL;
const char *src0 = NULL;
const char *src1 = get_GLSL_srcarg_varname(ctx, 1); // !!! FIXME: SRC_MOD?
if (sreg == NULL)
{
fail(ctx, "TEXLD using undeclared sampler");
return;
} // if
switch ((const TextureType) sreg->index)
{
case TEXTURE_TYPE_2D:
funcname = "texture2D";
src0 = make_GLSL_srcarg_string_vec2(ctx, 0);
break;
case TEXTURE_TYPE_CUBE:
funcname = "textureCube";
src0 = make_GLSL_srcarg_string_vec3(ctx, 0);
break;
case TEXTURE_TYPE_VOLUME:
funcname = "texture3D";
src0 = make_GLSL_srcarg_string_vec3(ctx, 0);
break;
default:
fail(ctx, "unknown texture type");
return;
} // switch
assert(!scalar_register(samp_arg->regtype, samp_arg->regnum));
char swiz_str[6] = { '\0' };
make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
samp_arg->swizzle, ctx->dest_arg.writemask);
const char *code = make_GLSL_destarg_assign(ctx,
"%s(%s, %s)%s", funcname, src1, src0, swiz_str);
output_line(ctx, "%s", code);
} // else
} // emit_GLSL_TEXLD
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXBEM) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXBEML) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2PAD) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2TEX) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3PAD) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3TEX) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3SPEC) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3VSPEC) // !!! FIXME
static void emit_GLSL_EXPP(Context *ctx)
{
// !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation.
emit_GLSL_EXP(ctx); // I guess this is just partial precision EXP?
} // emit_GLSL_EXPP
static void emit_GLSL_LOGP(Context *ctx)
{
// LOGP is just low-precision LOG, but we'll take the higher precision.
emit_GLSL_LOG(ctx);
} // emit_GLSL_LOGP
// common code between CMP and CND.
static void emit_GLSL_comparison_operations(Context *ctx, const char *cmp)
{
int i, j;
DestArgInfo *dst = &ctx->dest_arg;
const SourceArgInfo *srcarg0 = &ctx->source_args[0];
const int origmask = dst->writemask;
int used_swiz[4] = { 0, 0, 0, 0 };
const int writemask[4] = { dst->writemask0, dst->writemask1,
dst->writemask2, dst->writemask3 };
const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y,
srcarg0->swizzle_z, srcarg0->swizzle_w };
for (i = 0; i < 4; i++)
{
int mask = (1 << i);
if (!writemask[i]) continue;
if (used_swiz[i]) continue;
// This is a swizzle we haven't checked yet.
used_swiz[i] = 1;
// see if there are any other elements swizzled to match (.yyyy)
for (j = i + 1; j < 4; j++)
{
if (!writemask[j]) continue;
if (src0swiz[i] != src0swiz[j]) continue;
mask |= (1 << j);
used_swiz[j] = 1;
} // for
// okay, (mask) should be the writemask of swizzles we like.
//return make_GLSL_srcarg_string(ctx, idx, (1 << 0));
const char *src0 = make_GLSL_srcarg_string(ctx, 0, (1 << i));
const char *src1 = make_GLSL_srcarg_string(ctx, 1, mask);
const char *src2 = make_GLSL_srcarg_string(ctx, 2, mask);
dst->writemask = mask;
dst->writemask0 = ((mask >> 0) & 1);
dst->writemask1 = ((mask >> 1) & 1);
dst->writemask2 = ((mask >> 2) & 1);
dst->writemask3 = ((mask >> 3) & 1);
const char *code = make_GLSL_destarg_assign(ctx, "((%s %s) ? %s : %s)",
src0, cmp, src1, src2);
dst->writemask = origmask;
dst->writemask0 = ((origmask >> 0) & 1);
dst->writemask1 = ((origmask >> 1) & 1);
dst->writemask2 = ((origmask >> 2) & 1);
dst->writemask3 = ((origmask >> 3) & 1);
output_line(ctx, "%s", code);
} // for
} // emit_GLSL_comparison_operations
static void emit_GLSL_CND(Context *ctx)
{
emit_GLSL_comparison_operations(ctx, "> 0.5");
} // emit_GLSL_CND
static void emit_GLSL_DEF(Context *ctx)
{
const char *varname = get_GLSL_destarg_varname(ctx);
const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
char val0[32];
char val1[32];
char val2[32];
char val3[32];
floatstr(ctx, val0, sizeof (val0), val[0], 1);
floatstr(ctx, val1, sizeof (val1), val[1], 1);
floatstr(ctx, val2, sizeof (val2), val[2], 1);
floatstr(ctx, val3, sizeof (val3), val[3], 1);
push_output(ctx, &ctx->globals);
output_line(ctx, "const vec4 %s = vec4(%s, %s, %s, %s);",
varname, val0, val1, val2, val3);
pop_output(ctx);
} // emit_GLSL_DEF
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3) // !!! FIXME
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME
static void emit_GLSL_CMP(Context *ctx)
{
emit_GLSL_comparison_operations(ctx, ">= 0.0");
} // emit_GLSL_CMP
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME
static void emit_GLSL_DP2ADD(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_vec2(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_vec2(ctx, 1);
const char *src2 = make_GLSL_srcarg_string_scalar(ctx, 2);
char extra[64];
snprintf(extra, sizeof (extra), " + %s", src2);
emit_GLSL_dotprod(ctx, src0, src1, extra);
} // emit_GLSL_DP2ADD
static void emit_GLSL_DSX(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "dFdx(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_DSX
static void emit_GLSL_DSY(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *code = make_GLSL_destarg_assign(ctx, "dFdy(%s)", src0);
output_line(ctx, "%s", code);
} // emit_GLSL_DSY
EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXLDD) // !!! FIXME
static void emit_GLSL_SETP(Context *ctx)
{
const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
const char *src0 = make_GLSL_srcarg_string_masked(ctx, 0);
const char *src1 = make_GLSL_srcarg_string_masked(ctx, 1);
const char *code = NULL;
// destination is always predicate register (which is type bvec4).
if (vecsize == 1)
{
const char *comp = get_GLSL_comparison_string_scalar(ctx);
code = make_GLSL_destarg_assign(ctx, "(%s %s %s)", src0, comp, src1);
} // if
else
{
const char *comp = get_GLSL_comparison_string_vector(ctx);
code = make_GLSL_destarg_assign(ctx, "%s(%s, %s)", comp, src0, src1);
} // else
output_line(ctx, "%s", code);
} // emit_GLSL_SETP
static void emit_GLSL_TEXLDL(Context *ctx)
{
// !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins
// !!! FIXME: from fragment shaders for some inexplicable reason.
// !!! FIXME: For now, you'll just have to suffer with the potentially
// !!! FIXME: wrong mipmap until I can figure something out.
emit_GLSL_TEXLD(ctx);
} // emit_GLSL_TEXLDL
static void emit_GLSL_BREAKP(Context *ctx)
{
const char *src0 = make_GLSL_srcarg_string_scalar(ctx, 0);
output_line(ctx, "if (%s) { break; }", src0);
} // emit_GLSL_BREAKP
static void emit_GLSL_RESERVED(Context *ctx)
{
// do nothing; fails in the state machine.
} // emit_GLSL_RESERVED
#endif // SUPPORT_PROFILE_GLSL
#if !SUPPORT_PROFILE_ARB1
#define PROFILE_EMITTER_ARB1(op)
#else
#undef AT_LEAST_ONE_PROFILE
#define AT_LEAST_ONE_PROFILE 1
#define PROFILE_EMITTER_ARB1(op) emit_ARB1_##op,
static const char *allocate_ARB1_scratch_reg_name(Context *ctx)
{
char *buf = get_scratch_buffer(ctx);
const int scratch = allocate_scratch_register(ctx);
snprintf(buf, SCRATCH_BUFFER_SIZE, "scratch%d", scratch);
return buf;
} // allocate_ARB1_scratch_reg_name
static const char *get_ARB1_branch_label_name(Context *ctx, int id)
{
char *buf = get_scratch_buffer(ctx);
snprintf(buf, SCRATCH_BUFFER_SIZE, "branch_label%d", id);
return buf;
} // get_ARB1_branch_label_name
const char *get_ARB1_register_string(Context *ctx, RegisterType regtype,
int regnum, char *regnum_str, int len)
{
const char *retval = get_D3D_register_string(ctx, regtype, regnum,
regnum_str, len);
if (retval == NULL)
{
fail(ctx, "Unknown D3D register type.");
return "";
} // if
return retval;
} // get_ARB1_register_string
static const char *get_ARB1_varname(Context *ctx, RegisterType rt, int regnum)
{
char regnum_str[16];
const char *regtype_str = get_ARB1_register_string(ctx, rt, regnum,
regnum_str, sizeof (regnum_str));
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s%s", regtype_str, regnum_str);
return retval;
} // get_ARB1_varname
static const char *get_ARB1_const_array_varname(Context *ctx, int base, int size)
{
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "c_array_%d_%d", base, size);
return retval;
} // get_ARB1_const_array_varname
static const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
const SourceArgInfo *arg,
char *buf, size_t buflen)
{
char regnum_str[16] = { '\0' };
// !!! FIXME: use get_ARB1_varname() instead?
const char *regtype_str = NULL;
if (!arg->relative)
{
regtype_str = get_ARB1_register_string(ctx, arg->regtype,
arg->regnum, regnum_str,
sizeof (regnum_str));
} // if
const char *rel_lbracket = "";
char rel_offset[32] = { '\0' };
const char *rel_rbracket = "";
char rel_swizzle[4] = { '\0' };
const char *rel_regtype_str = "";
if (arg->relative)
{
rel_regtype_str = get_ARB1_varname(ctx, arg->relative_regtype,
arg->relative_regnum);
rel_swizzle[0] = '.';
rel_swizzle[1] = swizzle_channels[arg->relative_component];
rel_swizzle[2] = '\0';
if (!ctx->support_nv2)
{
// The address register in ARB1 only allows the '.x' component, so
// we need to load the component we need from a temp vector
// register into .x as needed.
assert(arg->relative_regtype == REG_TYPE_ADDRESS);
assert(arg->relative_regnum == 0);
if (ctx->last_address_reg_component != arg->relative_component)
{
output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str,
arg->relative_regnum,
swizzle_channels[arg->relative_component]);
ctx->last_address_reg_component = arg->relative_component;
} // if
rel_swizzle[1] = 'x';
} // if
const int arrayidx = arg->relative_array->index;
const int arraysize = arg->relative_array->count;
const int offset = arg->regnum - arrayidx;
assert(offset >= 0);
regtype_str = get_ARB1_const_array_varname(ctx, arrayidx, arraysize);
if (offset != 0)
snprintf(rel_offset, sizeof (rel_offset), " + %d", offset);
rel_lbracket = "[";
rel_rbracket = "]";
} // if
// This is the source register with everything but swizzle and source mods.
snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str,
rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
rel_rbracket);
// Some of the source mods need to generate instructions to a temp
// register, in which case we'll replace the register name.
const char *premod_str = "";
const char *postmod_str = "";
switch (arg->src_mod)
{
case SRCMOD_NEGATE:
premod_str = "-";
break;
case SRCMOD_BIASNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_BIAS:
fail(ctx, "SRCMOD_BIAS currently unsupported in arb1");
postmod_str = "_bias";
break;
case SRCMOD_SIGNNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_SIGN:
fail(ctx, "SRCMOD_SIGN currently unsupported in arb1");
postmod_str = "_bx2";
break;
case SRCMOD_COMPLEMENT:
fail(ctx, "SRCMOD_COMPLEMENT currently unsupported in arb1");
premod_str = "1-";
break;
case SRCMOD_X2NEGATE:
premod_str = "-";
// fall through.
case SRCMOD_X2:
fail(ctx, "SRCMOD_X2 currently unsupported in arb1");
postmod_str = "_x2";
break;
case SRCMOD_DZ:
fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
postmod_str = "_dz";
break;
case SRCMOD_DW:
fail(ctx, "SRCMOD_DW currently unsupported in arb1");
postmod_str = "_dw";
break;
case SRCMOD_ABSNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_ABS:
if (ctx->support_nv2) // GL_NV_vertex_program2_option adds this.
{
premod_str = (arg->src_mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
postmod_str = "|";
} // if
else
{
regtype_str = allocate_ARB1_scratch_reg_name(ctx);
regnum_str[0] = '\0'; // move value to scratch register.
rel_lbracket = ""; // scratch register won't use array.
rel_rbracket = "";
rel_offset[0] = '\0';
rel_swizzle[0] = '\0';
rel_regtype_str = "";
output_line(ctx, "ABS %s, %s;", regtype_str, buf);
} // else
break;
case SRCMOD_NOT:
fail(ctx, "SRCMOD_NOT currently unsupported in arb1");
premod_str = "!";
break;
case SRCMOD_NONE:
case SRCMOD_TOTAL:
break; // stop compiler whining.
} // switch
char swizzle_str[6];
int i = 0;
const int scalar = scalar_register(arg->regtype, arg->regnum);
if (!scalar && !no_swizzle(arg->swizzle))
{
swizzle_str[i++] = '.';
// .xxxx is the same as .x, but .xx is illegal...scalar or full!
if (replicate_swizzle(arg->swizzle))
swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
else
{
swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
} // else
} // if
swizzle_str[i] = '\0';
assert(i < sizeof (swizzle_str));
snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str,
regtype_str, regnum_str, rel_lbracket,
rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket,
swizzle_str, postmod_str);
// !!! FIXME: make sure the scratch buffer was large enough.
return buf;
} // make_ARB1_srcarg_string_in_buf
static const char *get_ARB1_destarg_varname(Context *ctx)
{
const DestArgInfo *arg = &ctx->dest_arg;
return get_ARB1_varname(ctx, arg->regtype, arg->regnum);
} // get_ARB1_destarg_varname
static const char *get_ARB1_srcarg_varname(Context *ctx, int idx)
{
if (idx >= STATICARRAYLEN(ctx->source_args))
{
fail(ctx, "Too many source args");
return "";
} // if
const SourceArgInfo *arg = &ctx->source_args[idx];
return get_ARB1_varname(ctx, arg->regtype, arg->regnum);
} // get_ARB1_srcarg_varname
static const char *make_ARB1_destarg_string(Context *ctx)
{
const DestArgInfo *arg = &ctx->dest_arg;
const char *sat_str = "";
// The "_SAT" modifier is only available in fragment shaders, but we'll
// fake it for them later in emit_ARB1_dest_modifiers() ...
if ( (arg->result_mod & MOD_SATURATE) && (shader_is_pixel(ctx)) )
sat_str = "_SAT";
// no partial precision (MOD_PP), but that's okay.
if (arg->result_mod & MOD_CENTROID)
{
fail(ctx, "dest register MOD_CENTROID currently unsupported in arb1");
return "";
} // if
char regnum_str[16];
const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype,
arg->regnum, regnum_str,
sizeof (regnum_str));
if (regtype_str == NULL)
{
fail(ctx, "Unknown destination register type.");
return "";
} // if
char writemask_str[6];
int i = 0;
const int scalar = scalar_register(arg->regtype, arg->regnum);
if (!scalar && !writemask_xyzw(arg->writemask))
{
writemask_str[i++] = '.';
if (arg->writemask0) writemask_str[i++] = 'x';
if (arg->writemask1) writemask_str[i++] = 'y';
if (arg->writemask2) writemask_str[i++] = 'z';
if (arg->writemask3) writemask_str[i++] = 'w';
} // if
writemask_str[i] = '\0';
assert(i < sizeof (writemask_str));
const char *pred_left = "";
const char *pred_right = "";
char pred[32] = { '\0' };
if (ctx->predicated)
{
fail(ctx, "dest register predication currently unsupported in arb1");
return "";
pred_left = "(";
pred_right = ") ";
make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
pred, sizeof (pred));
} // if
char *retval = get_scratch_buffer(ctx);
snprintf(retval, SCRATCH_BUFFER_SIZE, "%s %s%s%s", sat_str,
regtype_str, regnum_str, writemask_str);
// !!! FIXME: make sure the scratch buffer was large enough.
return retval;
} // make_ARB1_destarg_string
static void emit_ARB1_dest_modifiers(Context *ctx)
{
const DestArgInfo *arg = &ctx->dest_arg;
if (arg->result_shift != 0x0)
{
const char *varname = get_ARB1_destarg_varname(ctx);
const char *dst = make_ARB1_destarg_string(ctx);
const char *multiplier = NULL;
switch (arg->result_shift)
{
case 0x1: multiplier = "2.0"; break;
case 0x2: multiplier = "4.0"; break;
case 0x3: multiplier = "8.0"; break;
case 0xD: multiplier = "0.125"; break;
case 0xE: multiplier = "0.25"; break;
case 0xF: multiplier = "0.5"; break;
} // switch
if (multiplier != NULL)
output_line(ctx, "MUL%s, %s, %s;", dst, varname, multiplier);
} // if
if ( (arg->result_mod & MOD_SATURATE) && (!shader_is_pixel(ctx)) )
{
const char *varname = get_ARB1_destarg_varname(ctx);
const char *dst = make_ARB1_destarg_string(ctx);
// pixel shaders just use the "_SAT" modifier here, instead.
output_line(ctx, "MIN%s, %s, 1.0;", dst, varname);
output_line(ctx, "MAX%s, %s, 0.0;", dst, varname);
} // if
} // emit_ARB1_dest_modifiers
static const char *make_ARB1_srcarg_string(Context *ctx, const int idx)
{
if (idx >= STATICARRAYLEN(ctx->source_args))
{
fail(ctx, "Too many source args");
return "";
} // if
const SourceArgInfo *arg = &ctx->source_args[idx];
char *buf = get_scratch_buffer(ctx);
return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, SCRATCH_BUFFER_SIZE);
} // make_ARB1_srcarg_string
static void emit_ARB1_opcode_ds(Context *ctx, const char *opcode)
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
output_line(ctx, "%s%s, %s;", opcode, dst, src0);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_opcode_ds
static void emit_ARB1_opcode_dss(Context *ctx, const char *opcode)
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *src1 = make_ARB1_srcarg_string(ctx, 1);
output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_opcode_dss
static void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode)
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *src1 = make_ARB1_srcarg_string(ctx, 1);
const char *src2 = make_ARB1_srcarg_string(ctx, 2);
output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_opcode_dsss
#define EMIT_ARB1_OPCODE_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_D_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_d(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_S_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_s(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_SS_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_ss(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_DS_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_ds(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_DSS_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_dss(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_dsss(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_dssss(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \
static void emit_ARB1_##op(Context *ctx) { \
failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \
}
static void emit_ARB1_start(Context *ctx, const char *profilestr)
{
const char *shader_str = NULL;
const char *shader_full_str = NULL;
if (shader_is_vertex(ctx))
{
shader_str = "vp";
shader_full_str = "vertex";
} // if
else if (shader_is_pixel(ctx))
{
shader_str = "fp";
shader_full_str = "fragment";
} // else if
else
{
failf(ctx, "Shader type %u unsupported in this profile.",
(uint) ctx->shader_type);
return;
} // if
ctx->output = &ctx->globals;
if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0)
output_line(ctx, "!!ARB%s1.0", shader_str);
else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0)
{
ctx->support_nv2 = 1;
output_line(ctx, "!!ARB%s1.0", shader_str);
output_line(ctx, "OPTION NV_%s_program2;", shader_full_str);
} // else if
else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0)
{
// there's no NV_fragment_program3, so just use 2.
const int ver = shader_is_pixel(ctx) ? 2 : 3;
ctx->support_nv2 = 1;
ctx->support_nv3 = 1;
output_line(ctx, "!!ARB%s1.0", shader_str);
output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver);
} // else if
else
{
failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
} // else
ctx->output = &ctx->mainline;
} // emit_ARB1_start
static void emit_ARB1_end(Context *ctx)
{
output_line(ctx, "END");
} // emit_ARB1_end
static void emit_ARB1_phase(Context *ctx)
{
// no-op in arb1.
} // emit_ARB1_phase
static void emit_ARB1_finalize(Context *ctx)
{
// !!! FIXME: if we never wrote the position register, add the
// !!! FIXME: position_invariant program option here.
int i;
push_output(ctx, &ctx->globals);
for (i = 0; i < ctx->max_scratch_registers; i++)
output_line(ctx, "TEMP %s;", allocate_ARB1_scratch_reg_name(ctx));
if ( (ctx->support_nv2) && (!shader_is_pixel(ctx)) )
{
// set up temps for nv2 REP/ENDREP emulation through branching.
for (i = 0; i < ctx->max_reps; i++)
output_line(ctx, "TEMP rep%d;", i);
} // if
pop_output(ctx);
assert(ctx->scratch_registers == ctx->max_scratch_registers);
} // emit_ARB1_finalize
static void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum)
{
// !!! FIXME: dependency on ARB1 profile.
const char *varname = get_ARB1_varname(ctx, regtype, regnum);
push_output(ctx, &ctx->globals);
switch (regtype)
{
case REG_TYPE_ADDRESS:
output_line(ctx, "ADDRESS %s;", varname);
if (!ctx->support_nv2) // nv2 has four-component address already.
output_line(ctx, "TEMP addr%d;", regnum);
break;
//case REG_TYPE_PREDICATE:
// output_line(ctx, "bvec4 %s;", varname);
// break;
case REG_TYPE_TEMP:
output_line(ctx, "TEMP %s;", varname);
break;
//case REG_TYPE_LOOP:
// break; // no-op. We declare these in for loops at the moment.
//case REG_TYPE_LABEL:
// break; // no-op. If we see it here, it means we optimized it out.
default:
fail(ctx, "BUG: we used a register we don't know how to define.");
break;
} // switch
pop_output(ctx);
} // emit_ARB1_global
static void emit_ARB1_array(Context *ctx, int base, int size)
{
const char *varname = get_ARB1_const_array_varname(ctx, base, size);
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname,
size, base, (base + size) - 1);
pop_output(ctx);
} // emit_ARB1_array
static void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist,
int base, int size)
{
const char *varname = get_ARB1_const_array_varname(ctx, base, size);
const int origscratch = ctx->scratchidx;
int i;
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s[%d] = {", varname, size);
ctx->indent++;
for (i = 0; i < size; i++)
{
while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
clist = clist->next;
assert(clist->constant.index == (base + i));
char val0[32];
char val1[32];
char val2[32];
char val3[32];
floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3,
(i < (size-1)) ? "," : "");
ctx->scratchidx = origscratch;
clist = clist->next;
} // for
ctx->indent--;
output_line(ctx, "};");
pop_output(ctx);
} // emit_ARB1_const_array
static void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum,
int arraybase, int arraysize)
{
const char *varname = get_ARB1_varname(ctx, regtype, regnum);
push_output(ctx, &ctx->globals);
// !!! FIXME: this only works if you have no bool or int uniforms.
if (regtype != REG_TYPE_CONST)
fail(ctx, "BUG: non-float uniforms not supported in arb1 at the moment");
else
output_line(ctx, "PARAM %s = program.local[%d];", varname, regnum);
pop_output(ctx);
} // emit_ARB1_uniform
static void emit_ARB1_sampler(Context *ctx, int stage, TextureType ttype)
{
// this is a no-op...you don't predeclare samplers in arb1.
} // emit_ARB1_sampler
// !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute().
static void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum,
MOJOSHADER_usage usage, int index, int wmask)
{
// !!! FIXME: this function doesn't deal with write masks at all yet!
const char *varname = get_ARB1_varname(ctx, regtype, regnum);
const char *usage_str = NULL;
const char *arrayleft = "";
const char *arrayright = "";
char index_str[16] = { '\0' };
if (index != 0) // !!! FIXME: a lot of these MUST be zero.
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
if (shader_is_vertex(ctx))
{
// pre-vs3 output registers.
// these don't ever happen in DCL opcodes, I think. Map to vs_3_*
// output registers.
if (!shader_version_atleast(ctx, 3, 0))
{
if (regtype == REG_TYPE_RASTOUT)
{
regtype = REG_TYPE_OUTPUT;
index = regnum;
switch ((const RastOutType) regnum)
{
case RASTOUT_TYPE_POSITION:
usage = MOJOSHADER_USAGE_POSITION;
break;
case RASTOUT_TYPE_FOG:
usage = MOJOSHADER_USAGE_FOG;
break;
case RASTOUT_TYPE_POINT_SIZE:
usage = MOJOSHADER_USAGE_POINTSIZE;
break;
} // switch
} // if
else if (regtype == REG_TYPE_ATTROUT)
{
regtype = REG_TYPE_OUTPUT;
usage = MOJOSHADER_USAGE_COLOR;
index = regnum;
} // else if
else if (regtype == REG_TYPE_TEXCRDOUT)
{
regtype = REG_TYPE_OUTPUT;
usage = MOJOSHADER_USAGE_TEXCOORD;
index = regnum;
} // else if
} // if
// to avoid limitations of various GL entry points for input
// attributes (glSecondaryColorPointer() can only take 3 component
// items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
// issues), we set up all inputs as generic vertex attributes, so we
// can pass data in just about any form, and ignore the built-in GLSL
// attributes like gl_SecondaryColor. Output needs to use the the
// built-ins, though, but we don't have to worry about the GL entry
// point limitations there.
if (regtype == REG_TYPE_INPUT)
{
int attr = 0; // POSITION0 _must_ be vertex.attrib[0]!
if ((usage != MOJOSHADER_USAGE_POSITION) || (index != 0))
attr = ++ctx->assigned_vertex_attributes;
push_output(ctx, &ctx->globals);
output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr);
pop_output(ctx);
} // if
else if (regtype == REG_TYPE_OUTPUT)
{
switch (usage)
{
case MOJOSHADER_USAGE_POSITION:
usage_str = "result.position";
break;
case MOJOSHADER_USAGE_POINTSIZE:
usage_str = "result.pointsize";
break;
case MOJOSHADER_USAGE_COLOR:
index_str[0] = '\0'; // no explicit number.
if (index == 0)
usage_str = "result.color.primary";
else if (index == 1)
usage_str = "result.color.secondary";
break;
case MOJOSHADER_USAGE_FOG:
usage_str = "result.fogcoord";
break;
case MOJOSHADER_USAGE_TEXCOORD:
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
usage_str = "result.texcoord";
arrayleft = "[";
arrayright = "]";
break;
default:
// !!! FIXME: we need to deal with some more built-in varyings here.
break;
} // switch
// !!! FIXME: the #define is a little hacky, but it means we don't
// !!! FIXME: have to track these separately if this works.
push_output(ctx, &ctx->globals);
// no mapping to built-in var? Just make it a regular global, pray.
if (usage_str == NULL)
output_line(ctx, "TEMP %s;", varname);
else
{
output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str,
arrayleft, index_str, arrayright);
} // else
pop_output(ctx);
} // else if
else
{
fail(ctx, "unknown vertex shader attribute register");
} // else
} // if
else if (shader_is_pixel(ctx))
{
const char *paramtype_str = "ATTRIB";
// samplers DCLs get handled in emit_ARB1_sampler().
if (regtype == REG_TYPE_COLOROUT)
{
paramtype_str = "OUTPUT";
usage_str = "result.color";
} // if
else if (regtype == REG_TYPE_DEPTHOUT)
{
paramtype_str = "OUTPUT";
usage_str = "result.depth";
} // else if
// !!! FIXME: can you actualy have a texture register with COLOR usage?
else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
{
if (usage == MOJOSHADER_USAGE_TEXCOORD)
{
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
usage_str = "fragment.texcoord";
arrayleft = "[";
arrayright = "]";
} // if
else if (usage == MOJOSHADER_USAGE_COLOR)
{
index_str[0] = '\0'; // no explicit number.
if (index == 0)
usage_str = "fragment.color.primary";
else if (index == 1)
usage_str = "fragment.color.secondary";
else
fail(ctx, "unsupported color index");
} // else if
} // else if
else if (regtype == REG_TYPE_MISCTYPE)
{
const MiscTypeType mt = (MiscTypeType) regnum;
if (mt == MISCTYPE_TYPE_FACE)
{
fail(ctx, "Can't handle vFace in arb1 profile"); // !!! FIXME
//push_output(ctx, &ctx->globals);
//output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;",
// varname);
//pop_output(ctx);
} // if
else if (mt == MISCTYPE_TYPE_POSITION)
{
index_str[0] = '\0'; // no explicit number.
usage_str = "fragment.position"; // !!! FIXME: is this the same coord space as D3D?
} // else if
else
{
fail(ctx, "BUG: unhandled misc register");
} // else
} // else if
else
{
fail(ctx, "unknown pixel shader attribute register");
} // else
if (usage_str != NULL)
{
push_output(ctx, &ctx->globals);
output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname,
usage_str, arrayleft, index_str, arrayright);
pop_output(ctx);
} // if
} // else if
else
{
fail(ctx, "Unknown shader type"); // state machine should catch this.
} // else
} // emit_ARB1_attribute
static void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ }
static void emit_ARB1_NOP(Context *ctx)
{
// There is no NOP in arb1. Just don't output anything here.
} // emit_ARB1_NOP
EMIT_ARB1_OPCODE_DS_FUNC(MOV)
EMIT_ARB1_OPCODE_DSS_FUNC(ADD)
EMIT_ARB1_OPCODE_DSS_FUNC(SUB)
EMIT_ARB1_OPCODE_DSSS_FUNC(MAD)
EMIT_ARB1_OPCODE_DSS_FUNC(MUL)
EMIT_ARB1_OPCODE_DS_FUNC(RCP)
EMIT_ARB1_OPCODE_DS_FUNC(RSQ)
EMIT_ARB1_OPCODE_DSS_FUNC(DP3)
EMIT_ARB1_OPCODE_DSS_FUNC(DP4)
EMIT_ARB1_OPCODE_DSS_FUNC(MIN)
EMIT_ARB1_OPCODE_DSS_FUNC(MAX)
EMIT_ARB1_OPCODE_DSS_FUNC(SLT)
EMIT_ARB1_OPCODE_DSS_FUNC(SGE)
static void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
static void emit_ARB1_LOG(Context *ctx)
{
// we can optimize this to use nv2's |abs| construct in some cases.
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
ctx->source_args[0].src_mod = SRCMOD_ABS;
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
if (ctx->source_args[0].src_mod == SRCMOD_ABS)
output_line(ctx, "LG2%s, %s;", dst, src0);
else
{
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
output_line(ctx, "ABS %s, %s;", scratch, src0);
output_line(ctx, "LG2%s, %s.x;", dst, scratch);
} // else
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_LOG
EMIT_ARB1_OPCODE_DS_FUNC(LIT)
EMIT_ARB1_OPCODE_DSS_FUNC(DST)
static void emit_ARB1_LRP(Context *ctx)
{
if (shader_is_pixel(ctx)) // fragment shaders have a matching LRP opcode.
emit_ARB1_opcode_dsss(ctx, "LRP");
else
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *src1 = make_ARB1_srcarg_string(ctx, 1);
const char *src2 = make_ARB1_srcarg_string(ctx, 2);
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
// LRP is: dest = src2 + src0 * (src1 - src2)
output_line(ctx, "SUB %s, %s, %s;", scratch, src1, src2);
output_line(ctx, "MAD%s, %s, %s, %s;", dst, scratch, src0, src2);
emit_ARB1_dest_modifiers(ctx);
} // else
} // emit_ARB1_LRP
EMIT_ARB1_OPCODE_DS_FUNC(FRC)
// !!! FIXME: these could be implemented with vector opcodes, but it looks
// !!! FIXME: like the Microsoft HLSL compiler never generates matrix
// !!! FIXME: operations for some reason.
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M4X4)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M4X3)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M3X4)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M3X3)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(M3X2)
static void emit_ARB1_CALL(Context *ctx)
{
const char *labelstr = get_ARB1_srcarg_varname(ctx, 0);
if (!ctx->support_nv2) // no branching in stock ARB1.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
return;
} // if
output_line(ctx, "CAL %s;", labelstr);
} // emit_ARB1_CALL
static void emit_ARB1_CALLNZ(Context *ctx)
{
// !!! FIXME: if src1 is a constbool that's true, we can remove the
// !!! FIXME: if. If it's false, we can make this a no-op.
const char *labelstr = get_ARB1_srcarg_varname(ctx, 0);
const char *src1 = make_ARB1_srcarg_string(ctx, 1);
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
if (!ctx->support_nv2) // no branching in stock ARB1.
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
else
{
// !!! FIXME: double-check this.
output_line(ctx, "MOVC %s, %s;", scratch, src1);
output_line(ctx, "CAL %s (NE.x);", labelstr);
} // else
} // emit_ARB1_CALLNZ
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP)
static void emit_ARB1_RET(Context *ctx)
{
// don't fail() if no nv2...maybe we're just ending the mainline?
// if we're ending a LABEL that had no CALL, this would all be written
// to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will
// just end up throwing all this code out.
if (ctx->support_nv2) // no branching in stock ARB1.
output_line(ctx, "RET;");
ctx->output = &ctx->mainline; // in case we were ignoring this function.
} // emit_ARB1_RET
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP)
static void emit_ARB1_LABEL(Context *ctx)
{
if (!ctx->support_nv2) // no branching in stock ARB1.
return; // don't fail()...maybe we never use it, but do fail in CALL.
const char *labelstr = get_ARB1_srcarg_varname(ctx, 0);
const int label = ctx->source_args[0].regnum;
RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
// MSDN specs say CALL* has to come before the LABEL, so we know if we
// can ditch the entire function here as unused.
if (reg == NULL)
ctx->output = &ctx->ignore; // Func not used. Parse, but don't output.
// !!! FIXME: it would be nice if we could determine if a function is
// !!! FIXME: only called once and, if so, forcibly inline it.
//const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
output_line(ctx, "%s:", labelstr);
} // emit_ARB1_LABEL
static void emit_ARB1_POW(Context *ctx)
{
// we can optimize this to use nv2's |abs| construct in some cases.
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
ctx->source_args[0].src_mod = SRCMOD_ABS;
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *src1 = make_ARB1_srcarg_string(ctx, 1);
if (ctx->source_args[0].src_mod == SRCMOD_ABS)
output_line(ctx, "POW%s, %s, %s;", dst, src0, src1);
else
{
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
output_line(ctx, "ABS %s, %s;", scratch, src0);
output_line(ctx, "POW%s, %s.x, %s;", dst, scratch, src1);
} // else
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_POW
static void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); }
static void emit_ARB1_SGN(Context *ctx)
{
if (ctx->support_nv2)
emit_ARB1_opcode_ds(ctx, "SSG");
else
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *scratch1 = allocate_ARB1_scratch_reg_name(ctx);
const char *scratch2 = allocate_ARB1_scratch_reg_name(ctx);
output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0);
output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0);
output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2);
emit_ARB1_dest_modifiers(ctx);
} // else
} // emit_ARB1_SGN
EMIT_ARB1_OPCODE_DS_FUNC(ABS)
static void emit_ARB1_NRM(Context *ctx)
{
// nv2 fragment programs have a real NRM.
if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
emit_ARB1_opcode_ds(ctx, "NRM");
else
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
output_line(ctx, "DP3 %s.w, %s, %s;", scratch, src0, src0);
output_line(ctx, "RSQ %s.w, %s.w;", scratch, scratch);
output_line(ctx, "MUL%s, %s.w, %s;", dst, scratch, src0);
emit_ARB1_dest_modifiers(ctx);
} // else
} // emit_ARB1_NRM
static void emit_ARB1_SINCOS(Context *ctx)
{
// we don't care about the temp registers that <= sm2 demands; ignore them.
const int mask = ctx->dest_arg.writemask;
// arb1 fragment shaders have sin/cos/sincos opcodes.
if (shader_is_pixel(ctx))
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
if (writemask_x(mask))
output_line(ctx, "COS%s, %s;", dst, src0);
else if (writemask_y(mask))
output_line(ctx, "SIN%s, %s;", dst, src0);
else if (writemask_xy(mask))
output_line(ctx, "SCS%s, %s;", dst, src0);
} // if
// nv2+ shaders have sin and cos opcodes.
else if (ctx->support_nv2)
{
const char *dst = get_ARB1_destarg_varname(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
if (writemask_x(mask))
output_line(ctx, "COS %s.x, %s;", dst, src0);
else if (writemask_y(mask))
output_line(ctx, "SIN %s.y, %s;", dst, src0);
else if (writemask_xy(mask))
{
output_line(ctx, "SIN %s.x, %s;", dst, src0);
output_line(ctx, "COS %s.y, %s;", dst, src0);
} // else if
} // if
else // big nasty.
{
const char *dst = get_ARB1_destarg_varname(ctx);
const char *src0 = get_ARB1_srcarg_varname(ctx, 0);
const int need_sin = (writemask_x(mask) || writemask_xy(mask));
const int need_cos = (writemask_y(mask) || writemask_xy(mask));
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
// These sin() and cos() approximations originally found here:
// http://www.devmaster.net/forums/showthread.php?t=5784
//
// const float B = 4.0f / M_PI;
// const float C = -4.0f / (M_PI * M_PI);
// float y = B * x + C * x * fabs(x);
//
// // optional better precision...
// const float P = 0.225f;
// y = P * (y * fabs(y) - y) + y;
//
//
// That first thing can be reduced to:
// const float y = ((1.2732395447351626861510701069801f * x) +
// ((-0.40528473456935108577551785283891f * x) * fabs(x)));
if (need_sin)
{
// !!! FIXME: use SRCMOD_ABS here?
output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
} // if
// cosine is sin(x + M_PI/2), but you have to wrap x to pi:
// if (x+(M_PI/2) > M_PI)
// x -= 2 * M_PI;
//
// which is...
// if (x+(1.57079637050628662109375) > 3.1415927410125732421875)
// x += -6.283185482025146484375;
if (need_cos)
{
output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0);
output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch);
output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch);
output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
} // if
} // else
if (!isfail(ctx))
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_SINCOS
static void emit_ARB1_REP(Context *ctx)
{
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
// nv2 fragment programs have a real REP.
if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
output_line(ctx, "REP %s;", src0);
else if (ctx->support_nv2)
{
// no REP, but we can use branches.
const int toplabel = allocate_branch_label(ctx);
const int faillabel = allocate_branch_label(ctx);
const char *topbranch = get_ARB1_branch_label_name(ctx, toplabel);
const char *failbranch = get_ARB1_branch_label_name(ctx, faillabel);
assert(ctx->branch_labels_stack_index < STATICARRAYLEN(ctx->branch_labels_stack)-1);
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel;
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel;
char scratch[32];
snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
output_line(ctx, "MOVC %s.x, %s;", scratch, src0);
output_line(ctx, "BRA %s (LE.x);", failbranch);
output_line(ctx, "%s:", topbranch);
} // else if
else // stock ARB1 has no branching.
{
fail(ctx, "branching unsupported in this profile");
} // else
} // emit_ARB1_REP
static void emit_ARB1_ENDREP(Context *ctx)
{
// nv2 fragment programs have a real ENDREP.
if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
output_line(ctx, "ENDREP;");
else if (ctx->support_nv2)
{
// no ENDREP, but we can use branches.
assert(ctx->branch_labels_stack_index >= 2);
const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
const char *topbranch = get_ARB1_branch_label_name(ctx, toplabel);
const char *failbranch = get_ARB1_branch_label_name(ctx, faillabel);
char scratch[32];
snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch);
output_line(ctx, "BRA %s (GT.x);", topbranch);
output_line(ctx, "%s:", failbranch);
} // else if
else // stock ARB1 has no branching.
{
fail(ctx, "branching unsupported in this profile");
} // else
} // emit_ARB1_ENDREP
static void nv2_if(Context *ctx)
{
// The condition code register MUST be set up before this!
if (shader_is_pixel(ctx)) // nv2 fragment programs have a real IF.
output_line(ctx, "IF EQ.x;");
else
{
// there's no IF construct, but we can use a branch to a label.
const int label = allocate_branch_label(ctx);
const char *failbranch = get_ARB1_branch_label_name(ctx, label);
assert(ctx->branch_labels_stack_index < STATICARRAYLEN(ctx->branch_labels_stack));
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label;
output_line(ctx, "BRA %s (EQ.x);", failbranch);
} // else
} // nv2_if
static void emit_ARB1_IF(Context *ctx)
{
if (ctx->support_nv2)
{
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
const char *src0 = get_ARB1_srcarg_varname(ctx, 0);
output_line(ctx, "MOVC %s.x, %s;", scratch, src0);
nv2_if(ctx);
} // if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_IF
static void emit_ARB1_ELSE(Context *ctx)
{
// nv2 fragment programs have a real ELSE.
if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
output_line(ctx, "ELSE;");
else if (ctx->support_nv2)
{
// there's no ELSE construct, but we can use a branch to a label.
assert(ctx->branch_labels_stack_index > 0);
// At the end of the IF block, unconditionally jump to the ENDIF.
const int endlabel = allocate_branch_label(ctx);
output_line(ctx, "BRA %s;", get_ARB1_branch_label_name(ctx, endlabel));
// Now mark the ELSE section with a lable.
const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1];
output_line(ctx, "%s:", get_ARB1_branch_label_name(ctx, elselabel));
// Replace the ELSE label with the ENDIF on the label stack.
ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel;
} // else if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_ELSE
static void emit_ARB1_ENDIF(Context *ctx)
{
// nv2 fragment programs have a real ENDIF.
if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
output_line(ctx, "ENDIF;");
else if (ctx->support_nv2)
{
// there's no ENDIF construct, but we can use a branch to a label.
assert(ctx->branch_labels_stack_index > 0);
const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
output_line(ctx, "%s:", get_ARB1_branch_label_name(ctx, endlabel));
} // if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_ENDIF
static void emit_ARB1_BREAK(Context *ctx)
{
// nv2 fragment programs have a real BREAK.
if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
output_line(ctx, "BRK;");
else if (ctx->support_nv2)
{
// no BREAK, but we can use branches.
assert(ctx->branch_labels_stack_index >= 2);
const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index];
const char *failbranch = get_ARB1_branch_label_name(ctx, faillabel);
output_line(ctx, "BRA %s;", failbranch);
} // else if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_BREAK
static void emit_ARB1_MOVA(Context *ctx)
{
// NV_vertex_program2_option and later can use the ARR opcode.
if (ctx->support_nv2)
emit_ARB1_opcode_ds(ctx, "ARR");
else
{
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
char addr[32];
snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum);
// !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE.
// ARL uses floor(), but D3D expects round-to-nearest.
// There is probably a more efficient way to do this.
if (shader_is_pixel(ctx)) // CMP only exists in fragment programs. :/
output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0);
else
{
output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0);
output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch);
} // else
output_line(ctx, "ABS %s, %s;", addr, src0);
output_line(ctx, "ADD %s, %s, 0.5;", addr, addr);
output_line(ctx, "FLR %s, %s;", addr, addr);
output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch);
// we don't handle these right now, since emit_ARB1_dest_modifiers(ctx)
// wants to look at dest_arg, not our temp register.
assert(ctx->dest_arg.result_mod == 0);
assert(ctx->dest_arg.result_shift == 0);
// we assign to the actual address register as needed.
ctx->last_address_reg_component = -1;
} // else
} // emit_ARB1_MOVA
static void emit_ARB1_TEXKILL(Context *ctx)
{
// !!! FIXME: d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle!
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
output_line(ctx, "KIL %s;", src0);
} // emit_ARB1_TEXKILL
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXBEM)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXBEML)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2PAD)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2TEX)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3PAD)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3TEX)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3SPEC)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3VSPEC)
static void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EXP"); }
static void emit_ARB1_LOGP(Context *ctx)
{
// we can optimize this to use nv2's |abs| construct in some cases.
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
ctx->source_args[0].src_mod = SRCMOD_ABS;
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
if (ctx->source_args[0].src_mod == SRCMOD_ABS)
output_line(ctx, "LOG%s, %s;", dst, src0);
else
{
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
output_line(ctx, "ABS %s, %s;", scratch, src0);
output_line(ctx, "LOG%s, %s.x;", dst, scratch);
} // else
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_LOGP
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(CND)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X3)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH)
static void emit_ARB1_CMP(Context *ctx)
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *src1 = make_ARB1_srcarg_string(ctx, 1);
const char *src2 = make_ARB1_srcarg_string(ctx, 2);
// D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
// switch src1 and src2 to get the same results.
output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_CMP
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM)
static void emit_ARB1_DP2ADD(Context *ctx)
{
const char *dst = make_ARB1_destarg_string(ctx);
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
const char *src1 = make_ARB1_srcarg_string(ctx, 1);
const char *src2 = make_ARB1_srcarg_string(ctx, 2);
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
// DP2ADD is:
// dest = src0.r * src1.r + src0.g * src1.g + src2.replicate_swizzle
output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1);
output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch);
output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_DP2ADD
static void emit_ARB1_DSX(Context *ctx)
{
if (ctx->support_nv2) // nv2 has a built-in equivalent to DSX.
emit_ARB1_opcode_ds(ctx, "DDX");
else
failf(ctx, "DSX unsupported in %s profile", ctx->profile->name);
} // emit_ARB1_DSX
static void emit_ARB1_DSY(Context *ctx)
{
if (ctx->support_nv2) // nv2 has a built-in equivalent to DSY.
emit_ARB1_opcode_ds(ctx, "DDY");
else
failf(ctx, "DSY unsupported in %s profile", ctx->profile->name);
} // emit_ARB1_DSY
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXLDD)
static void arb1_texld(Context *ctx, const char *opcode)
{
// !!! FIXME: do non-RGBA textures map to same default values as D3D?
const char *dst = make_ARB1_destarg_string(ctx);
const SourceArgInfo *samp_arg = &ctx->source_args[1];
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
samp_arg->regnum);
const char *ttype = NULL;
const char *src0 = make_ARB1_srcarg_string(ctx, 0);
//const char *src1 = get_ARB1_srcarg_varname(ctx, 1); // !!! FIXME: SRC_MOD?
// !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters.
if (sreg == NULL)
{
fail(ctx, "TEXLD using undeclared sampler");
return;
} // if
if (!no_swizzle(samp_arg->swizzle))
{
// !!! FIXME: does this ever actually happen?
fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment");
} // if
switch ((const TextureType) sreg->index)
{
case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"?
case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break;
case TEXTURE_TYPE_VOLUME: ttype = "3D"; break;
default: fail(ctx, "unknown texture type"); return;
} // switch
output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0,
samp_arg->regnum, ttype);
} // arb1_texld
static void emit_ARB1_TEXLDL(Context *ctx)
{
if ((shader_is_vertex(ctx)) && (!ctx->support_nv3))
{
failf(ctx, "Vertex shader TEXLDL unsupported in %s profile",
ctx->profile->name);
return;
} // if
else if ((shader_is_pixel(ctx)) && (!ctx->support_nv2))
{
failf(ctx, "Pixel shader TEXLDL unsupported in %s profile",
ctx->profile->name);
return;
} // if
// !!! FIXME: this doesn't map exactly to TEXLDL. Review this.
arb1_texld(ctx, "TXL");
} // emit_ARB1_TEXLDL
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC)
static void emit_ARB1_IFC(Context *ctx)
{
if (ctx->support_nv2)
{
static const char *comps[] = {
"", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC"
};
if (ctx->instruction_controls >= STATICARRAYLEN(comps))
{
fail(ctx, "unknown comparison control");
return;
} // if
const char *comp = comps[ctx->instruction_controls];
const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
const char *src0 = get_ARB1_srcarg_varname(ctx, 0);
const char *src1 = get_ARB1_srcarg_varname(ctx, 1);
output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1);
nv2_if(ctx);
} // if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_IFC
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP)
static void emit_ARB1_DEF(Context *ctx)
{
const char *varname = get_ARB1_destarg_varname(ctx);
const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
char val0[32];
char val1[32];
char val2[32];
char val3[32];
floatstr(ctx, val0, sizeof (val0), val[0], 1);
floatstr(ctx, val1, sizeof (val1), val[1], 1);
floatstr(ctx, val2, sizeof (val2), val[2], 1);
floatstr(ctx, val3, sizeof (val3), val[3], 1);
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s = { %s, %s, %s, %s };",
varname, val0, val1, val2, val3);
pop_output(ctx);
} // emit_ARB1_DEF
static void emit_ARB1_DEFI(Context *ctx)
{
const char *varname = get_ARB1_destarg_varname(ctx);
const int32 *x = (const int32 *) ctx->dwords;
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s = { %d, %d, %d, %d };",
varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
pop_output(ctx);
} // emit_ARB1_DEFI
static void emit_ARB1_DEFB(Context *ctx)
{
const char *varname = get_ARB1_destarg_varname(ctx);
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s = %d;", varname, ctx->dwords[0] ? 1 : 0);
pop_output(ctx);
} // emit_ARB1_DEFB
static void emit_ARB1_DCL(Context *ctx)
{
// no-op. We do this in our emit_attribute() and emit_uniform().
} // emit_ARB1_DCL
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
static void emit_ARB1_TEXLD(Context *ctx)
{
if (!shader_version_atleast(ctx, 2, 0))
{
// ps_1_0 and ps_1_4 are both different, too!
fail(ctx, "TEXLD <= Shader Model 2.0 unimplemented."); // !!! FIXME
return;
} // if
arb1_texld(ctx, "TEX");
} // emit_ARB1_TEXLD
#endif // SUPPORT_PROFILE_ARB1
#if !AT_LEAST_ONE_PROFILE
#error No profiles are supported. Fix your build.
#endif
#define DEFINE_PROFILE(prof) { \
MOJOSHADER_PROFILE_##prof, \
emit_##prof##_start, \
emit_##prof##_end, \
emit_##prof##_phase, \
emit_##prof##_global, \
emit_##prof##_array, \
emit_##prof##_const_array, \
emit_##prof##_uniform, \
emit_##prof##_sampler, \
emit_##prof##_attribute, \
emit_##prof##_finalize, \
get_##prof##_varname, \
get_##prof##_const_array_varname, \
},
static const Profile profiles[] =
{
#if SUPPORT_PROFILE_D3D
DEFINE_PROFILE(D3D)
#endif
#if SUPPORT_PROFILE_PASSTHROUGH
DEFINE_PROFILE(PASSTHROUGH)
#endif
#if SUPPORT_PROFILE_GLSL
DEFINE_PROFILE(GLSL)
#endif
#if SUPPORT_PROFILE_ARB1
DEFINE_PROFILE(ARB1)
#endif
};
#undef DEFINE_PROFILE
// This is for profiles that extend other profiles...
static const struct { const char *from; const char *to; } profileMap[] =
{
{ MOJOSHADER_PROFILE_GLSL120, MOJOSHADER_PROFILE_GLSL },
{ MOJOSHADER_PROFILE_NV2, MOJOSHADER_PROFILE_ARB1 },
};
// The PROFILE_EMITTER_* items MUST be in the same order as profiles[]!
#define PROFILE_EMITTERS(op) { \
PROFILE_EMITTER_D3D(op) \
PROFILE_EMITTER_PASSTHROUGH(op) \
PROFILE_EMITTER_GLSL(op) \
PROFILE_EMITTER_ARB1(op) \
}
static int parse_destination_token(Context *ctx, DestArgInfo *info)
{
// !!! FIXME: recheck against the spec for ranges (like RASTOUT values, etc).
if (isfail(ctx))
return FAIL; // already failed elsewhere.
if (ctx->tokencount == 0)
return fail(ctx, "Out of tokens in destination parameter");
const uint32 token = SWAP32(*(ctx->tokens));
const int reserved1 = (int) ((token >> 14) & 0x3); // bits 14 through 15
const int reserved2 = (int) ((token >> 31) & 0x1); // bit 31
info->token = ctx->tokens;
info->regnum = (int) (token & 0x7ff); // bits 0 through 10
info->relative = (int) ((token >> 13) & 0x1); // bit 13
info->orig_writemask = (int) ((token >> 16) & 0xF); // bits 16 through 19
info->result_mod = (int) ((token >> 20) & 0xF); // bits 20 through 23
info->result_shift = (int) ((token >> 24) & 0xF); // bits 24 through 27 abc
info->regtype = (RegisterType) (((token >> 28) & 0x7) | ((token >> 8) & 0x18)); // bits 28-30, 11-12
int writemask;
if (scalar_register(info->regtype, info->regnum))
writemask = 0x1; // just x.
else
writemask = info->orig_writemask;
info->writemask = writemask;
info->writemask0 = (int) ((writemask >> 0) & 0x1); // bit 16
info->writemask1 = (int) ((writemask >> 1) & 0x1); // bit 17
info->writemask2 = (int) ((writemask >> 2) & 0x1); // bit 18
info->writemask3 = (int) ((writemask >> 3) & 0x1); // bit 19
// all the REG_TYPE_CONSTx types are the same register type, it's just
// split up so its regnum can be > 2047 in the bytecode. Clean it up.
if (info->regtype == REG_TYPE_CONST2)
{
info->regtype = REG_TYPE_CONST;
info->regnum += 2048;
} // else if
else if (info->regtype == REG_TYPE_CONST3)
{
info->regtype = REG_TYPE_CONST;
info->regnum += 4096;
} // else if
else if (info->regtype == REG_TYPE_CONST4)
{
info->regtype = REG_TYPE_CONST;
info->regnum += 6144;
} // else if
ctx->tokens++; // swallow token for now, for multiple calls in a row.
ctx->tokencount--; // swallow token for now, for multiple calls in a row.
if (reserved1 != 0x0)
return fail(ctx, "Reserved bit #1 in destination token must be zero");
if (reserved2 != 0x1)
return fail(ctx, "Reserved bit #2 in destination token must be one");
if (info->relative)
{
if (!shader_is_vertex(ctx))
return fail(ctx, "Relative addressing in non-vertex shader");
else if (!shader_version_atleast(ctx, 3, 0))
return fail(ctx, "Relative addressing in vertex shader version < 3.0");
else if (!ctx->have_ctab) // it's hard to do this efficiently without!
return fail(ctx, "relative addressing unsupported without a CTAB");
// !!! FIXME: I don't have a shader that has a relative dest currently.
return fail(ctx, "Relative addressing of dest tokens is unsupported");
} // if
const int s = info->result_shift;
if (s != 0)
{
if (!shader_is_pixel(ctx))
return fail(ctx, "Result shift scale in non-pixel shader");
else if (shader_version_atleast(ctx, 2, 0))
return fail(ctx, "Result shift scale in pixel shader version >= 2.0");
else if ( ! (((s >= 1) && (s <= 3)) || ((s >= 0xD) && (s <= 0xF))) )
return fail(ctx, "Result shift scale isn't 1 to 3, or 13 to 15.");
} // if
if (info->result_mod & MOD_PP) // Partial precision (pixel shaders only)
{
if (!shader_is_pixel(ctx))
return fail(ctx, "Partial precision result mod in non-pixel shader");
} // if
if (info->result_mod & MOD_CENTROID) // Centroid (pixel shaders only)
{
if (!shader_is_pixel(ctx))
return fail(ctx, "Centroid result mod in non-pixel shader");
} // if
if ((info->regtype < 0) || (info->regtype > REG_TYPE_MAX))
return fail(ctx, "Register type is out of range");
// !!! FIXME: from msdn:
// "_sat cannot be used with instructions writing to output o# registers."
// !!! FIXME: actually, just go over this page:
// http://msdn.microsoft.com/archive/default.asp?url=/archive/en-us/directx9_c/directx/graphics/reference/shaders/ps_instructionmodifiers.asp
set_used_register(ctx, info->regtype, info->regnum);
return 1;
} // parse_destination_token
static void determine_constants_arrays(Context *ctx)
{
// Only process this stuff once. This is called after all DEF* opcodes
// could have been parsed.
if (ctx->determined_constants_arrays)
return;
ctx->determined_constants_arrays = 1;
if (ctx->constant_count <= 1)
return; // nothing to sort or group.
// Sort the linked list into an array for easier tapdancing...
ConstantsList **array = (ConstantsList **) alloca(sizeof (ConstantsList *) * (ctx->constant_count + 1));
ConstantsList *item = ctx->constants;
int i;
for (i = 0; i < ctx->constant_count; i++)
{
if (item == NULL)
{
fail(ctx, "BUG: mismatched constant list and count");
return;
} // if
array[i] = item;
item = item->next;
} // for
array[ctx->constant_count] = NULL;
// bubble sort ftw.
int sorted;
do
{
sorted = 1;
for (i = 0; i < ctx->constant_count-1; i++)
{
if (array[i]->constant.index > array[i+1]->constant.index)
{
ConstantsList *tmp = array[i];
array[i] = array[i+1];
array[i+1] = tmp;
sorted = 0;
} // if
} // for
} while (!sorted);
// okay, sorted. While we're here, let's redo the linked list in order...
for (i = 0; i < ctx->constant_count; i++)
array[i]->next = array[i+1];
ctx->constants = array[0];
// now figure out the groupings of constants and add to ctx->variables...
int start = -1;
int prev = -1;
int count = 0;
const int hi = ctx->constant_count;
for (i = 0; i <= hi; i++)
{
if (array[i] && (array[i]->constant.type != MOJOSHADER_UNIFORM_FLOAT))
continue; // we only care about REG_TYPE_CONST for array groups.
if (start == -1)
{
prev = start = i; // first REG_TYPE_CONST we've seen. Mark it!
continue;
} // if
// not a match (or last item in the array)...see if we had a
// contiguous set before this point...
if ( (array[i]) && (array[i]->constant.index == (array[prev]->constant.index + 1)) )
count++;
else
{
if (count > 0) // multiple constants in the set?
{
VariableList *var;
var = (VariableList *) Malloc(ctx, sizeof (VariableList));
if (var == NULL)
break;
var->type = MOJOSHADER_UNIFORM_FLOAT;
var->index = array[start]->constant.index;
var->count = (array[prev]->constant.index - var->index) + 1;
var->constant = array[start];
var->used = 0;
var->next = ctx->variables;
ctx->variables = var;
} // else
start = i; // set this as new start of sequence.
} // if
prev = i;
} // for
} // determine_constants_arrays
static int parse_source_token(Context *ctx, SourceArgInfo *info)
{
int retval = 1;
if (isfail(ctx))
return FAIL; // already failed elsewhere.
if (ctx->tokencount == 0)
return fail(ctx, "Out of tokens in source parameter");
const uint32 token = SWAP32(*(ctx->tokens));
const int reserved1 = (int) ((token >> 14) & 0x3); // bits 14 through 15
const int reserved2 = (int) ((token >> 31) & 0x1); // bit 31
info->token = ctx->tokens;
info->regnum = (int) (token & 0x7ff); // bits 0 through 10
info->relative = (int) ((token >> 13) & 0x1); // bit 13
info->swizzle = (int) ((token >> 16) & 0xFF); // bits 16 through 23
info->swizzle_x = (int) ((token >> 16) & 0x3); // bits 16 through 17
info->swizzle_y = (int) ((token >> 18) & 0x3); // bits 18 through 19
info->swizzle_z = (int) ((token >> 20) & 0x3); // bits 20 through 21
info->swizzle_w = (int) ((token >> 22) & 0x3); // bits 22 through 23
info->src_mod = (SourceMod) ((token >> 24) & 0xF); // bits 24 through 27
info->regtype = (RegisterType) (((token >> 28) & 0x7) | ((token >> 8) & 0x18)); // bits 28-30, 11-12
// all the REG_TYPE_CONSTx types are the same register type, it's just
// split up so its regnum can be > 2047 in the bytecode. Clean it up.
if (info->regtype == REG_TYPE_CONST2)
{
info->regtype = REG_TYPE_CONST;
info->regnum += 2048;
} // else if
else if (info->regtype == REG_TYPE_CONST3)
{
info->regtype = REG_TYPE_CONST;
info->regnum += 4096;
} // else if
else if (info->regtype == REG_TYPE_CONST4)
{
info->regtype = REG_TYPE_CONST;
info->regnum += 6144;
} // else if
ctx->tokens++; // swallow token for now, for multiple calls in a row.
ctx->tokencount--; // swallow token for now, for multiple calls in a row.
if (reserved1 != 0x0)
return fail(ctx, "Reserved bits #1 in source token must be zero");
if (reserved2 != 0x1)
return fail(ctx, "Reserved bit #2 in source token must be one");
if (info->relative)
{
if ( (shader_is_pixel(ctx)) && (!shader_version_atleast(ctx, 3, 0)) )
return fail(ctx, "Relative addressing in pixel shader version < 3.0");
if (ctx->tokencount == 0)
return fail(ctx, "Out of tokens in relative source parameter");
const uint32 reltoken = SWAP32(*(ctx->tokens));
ctx->tokens++; // swallow token for now, for multiple calls in a row.
ctx->tokencount--; // swallow token for now, for multiple calls in a row.
const int relswiz = (int) ((reltoken >> 16) & 0xFF);
info->relative_component = relswiz & 0x3;
info->relative_regnum = (int) (reltoken & 0x7ff);
info->relative_regtype = (RegisterType)
(((reltoken >> 28) & 0x7) |
((reltoken >> 8) & 0x18));
if (((reltoken >> 31) & 0x1) == 0)
return fail(ctx, "bit #31 in relative address must be set");
if ((reltoken & 0xF00E000) != 0) // usused bits.
return fail(ctx, "relative address reserved bit must be zero");
switch (info->relative_regtype)
{
case REG_TYPE_LOOP:
case REG_TYPE_ADDRESS:
break;
default:
return fail(ctx, "invalid register for relative address");
break;
} // switch
if (info->relative_regnum != 0) // true for now.
return fail(ctx, "invalid register for relative address");
if (info->regtype != REG_TYPE_CONST)
return fail(ctx, "relative addressing of non-const register");
if (!replicate_swizzle(relswiz))
return fail(ctx, "relative address needs replicate swizzle");
// figure out what array we're in...
if (!ctx->have_ctab) // it's hard to do this efficiently without!
return fail(ctx, "relative addressing unsupported without a CTAB");
determine_constants_arrays(ctx);
VariableList *var;
const int reltarget = info->regnum;
for (var = ctx->variables; var != NULL; var = var->next)
{
const int lo = var->index;
if ( (reltarget >= lo) && (reltarget < (lo + var->count)) )
break; // match!
} // for
if (var == NULL)
return fail(ctx, "relative addressing of indeterminate array");
var->used = 1;
info->relative_array = var;
set_used_register(ctx, info->relative_regtype, info->relative_regnum);
retval++;
} // if
switch (info->src_mod)
{
case SRCMOD_NONE:
case SRCMOD_ABSNEGATE:
case SRCMOD_ABS:
case SRCMOD_NEGATE:
break; // okay in any shader model.
// apparently these are only legal in Shader Model 1.x ...
case SRCMOD_BIASNEGATE:
case SRCMOD_BIAS:
case SRCMOD_SIGNNEGATE:
case SRCMOD_SIGN:
case SRCMOD_COMPLEMENT:
case SRCMOD_X2NEGATE:
case SRCMOD_X2:
case SRCMOD_DZ:
case SRCMOD_DW:
case SRCMOD_NOT:
if (shader_version_atleast(ctx, 2, 0))
return fail(ctx, "illegal source mod for this Shader Model.");
break;
default:
return fail(ctx, "Unknown source modifier");
} // switch
// !!! FIXME: docs say this for sm3 ... check these!
// "The negate modifier cannot be used on second source register of these
// instructions: m3x2 - ps, m3x3 - ps, m3x4 - ps, m4x3 - ps, and
// m4x4 - ps."
// "If any version 3 shader reads from one or more constant float
// registers (c#), one of the following must be true.
// All of the constant floating-point registers must use the abs modifier.
// None of the constant floating-point registers can use the abs modifier.
set_used_register(ctx, info->regtype, info->regnum);
return retval;
} // parse_source_token
static int parse_predicated_token(Context *ctx)
{
SourceArgInfo *arg = &ctx->predicate_arg;
if (parse_source_token(ctx, arg) == FAIL)
return FAIL;
else if (arg->regtype != REG_TYPE_PREDICATE)
return fail(ctx, "Predicated instruction but not predicate register!");
else if ((arg->src_mod != SRCMOD_NONE) && (arg->src_mod != SRCMOD_NOT))
return fail(ctx, "Predicated instruction register is not NONE or NOT");
else if ( !no_swizzle(arg->swizzle) && !replicate_swizzle(arg->swizzle) )
return fail(ctx, "Predicated instruction register has wrong swizzle");
else if (arg->relative) // I'm pretty sure this is illegal...?
return fail(ctx, "relative addressing in predicated token");
return 1;
} // parse_predicated_token
static int parse_args_NULL(Context *ctx)
{
return (isfail(ctx) ? FAIL : 1);
} // parse_args_NULL
static int parse_args_DEF(Context *ctx)
{
if (parse_destination_token(ctx, &ctx->dest_arg) == FAIL)
return FAIL;
if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
return fail(ctx, "relative addressing in DEFB");
ctx->dwords[0] = SWAP32(ctx->tokens[0]);
ctx->dwords[1] = SWAP32(ctx->tokens[1]);
ctx->dwords[2] = SWAP32(ctx->tokens[2]);
ctx->dwords[3] = SWAP32(ctx->tokens[3]);
return 6;
} // parse_args_DEF
static int parse_args_DEFB(Context *ctx)
{
if (parse_destination_token(ctx, &ctx->dest_arg) == FAIL)
return FAIL;
if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
return fail(ctx, "relative addressing in DEFB");
ctx->dwords[0] = *(ctx->tokens) ? 1 : 0;
return 3;
} // parse_args_DEFB
static int valid_texture_type(const uint32 ttype)
{
switch ((const TextureType) ttype)
{
case TEXTURE_TYPE_2D:
case TEXTURE_TYPE_CUBE:
case TEXTURE_TYPE_VOLUME:
return 1; // it's okay.
} // switch
return 0;
} // valid_texture_type
// !!! FIXME: this function is kind of a mess.
static int parse_args_DCL(Context *ctx)
{
int unsupported = 0;
const uint32 token = SWAP32(*(ctx->tokens));
const int reserved1 = (int) ((token >> 31) & 0x1); // bit 31
uint32 reserved_mask = 0x00000000;
if (reserved1 != 0x1)
return fail(ctx, "Bit #31 in DCL token must be one");
ctx->tokens++;
ctx->tokencount--;
if (parse_destination_token(ctx, &ctx->dest_arg) == FAIL)
return FAIL;
if (ctx->dest_arg.result_shift != 0) // I'm pretty sure this is illegal...?
return fail(ctx, "shift scale in DCL");
else if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
return fail(ctx, "relative addressing in DCL");
const RegisterType regtype = ctx->dest_arg.regtype;
const int regnum = ctx->dest_arg.regnum;
if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
{
if (regtype == REG_TYPE_INPUT)
{
const uint32 usage = (token & 0xF);
const uint32 index = ((token >> 16) & 0xF);
reserved_mask = 0x7FF0FFE0;
ctx->dwords[0] = usage;
ctx->dwords[1] = index;
} // if
else if (regtype == REG_TYPE_MISCTYPE)
{
const MiscTypeType mt = (MiscTypeType) regnum;
if (mt == MISCTYPE_TYPE_POSITION)
reserved_mask = 0x7FFFFFFF;
else if (mt == MISCTYPE_TYPE_FACE)
{
reserved_mask = 0x7FFFFFFF;
if (!writemask_xyzw(ctx->dest_arg.orig_writemask))
return fail(ctx, "DCL face writemask must be full");
else if (ctx->dest_arg.result_mod != 0)
return fail(ctx, "DCL face result modifier must be zero");
else if (ctx->dest_arg.result_shift != 0)
return fail(ctx, "DCL face shift scale must be zero");
} // else if
else
{
unsupported = 1;
} // else
ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_UNKNOWN;
ctx->dwords[1] = 0;
} // else if
else if (regtype == REG_TYPE_TEXTURE)
{
const uint32 usage = (token & 0xF);
const uint32 index = ((token >> 16) & 0xF);
if (usage == MOJOSHADER_USAGE_TEXCOORD)
{
if (index > 7)
return fail(ctx, "DCL texcoord usage must have 0-7 index");
} // if
else if (usage == MOJOSHADER_USAGE_COLOR)
{
if (index != 0)
return fail(ctx, "DCL color usage must have 0 index");
} // else if
else
{
return fail(ctx, "Invalid DCL texture usage");
} // else
reserved_mask = 0x7FF0FFE0;
ctx->dwords[0] = usage;
ctx->dwords[1] = index;
} // else if
else if (regtype == REG_TYPE_SAMPLER)
{
const uint32 ttype = ((token >> 27) & 0xF);
if (!valid_texture_type(ttype))
return fail(ctx, "unknown sampler texture type");
reserved_mask = 0x7FFFFFF;
ctx->dwords[0] = ttype;
} // else if
else
{
unsupported = 1;
} // else
} // if
else if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 2, 0)) )
{
if (regtype == REG_TYPE_INPUT)
{
ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_COLOR;
ctx->dwords[1] = regnum;
reserved_mask = 0x7FFFFFFF;
} // if
else if (regtype == REG_TYPE_TEXTURE)
{
ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_TEXCOORD;
ctx->dwords[1] = regnum;
reserved_mask = 0x7FFFFFFF;
} // else if
else if (regtype == REG_TYPE_SAMPLER)
{
const uint32 ttype = ((token >> 27) & 0xF);
if (!valid_texture_type(ttype))
return fail(ctx, "unknown sampler texture type");
reserved_mask = 0x7FFFFFF;
ctx->dwords[0] = ttype;
} // else if
else
{
unsupported = 1;
} // else
} // if
else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
{
if ((regtype == REG_TYPE_INPUT) || (regtype == REG_TYPE_OUTPUT))
{
const uint32 usage = (token & 0xF);
const uint32 index = ((token >> 16) & 0xF);
reserved_mask = 0x7FF0FFE0;
ctx->dwords[0] = usage;
ctx->dwords[1] = index;
} // if
else
{
unsupported = 1;
} // else
} // else if
else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 2, 0)) )
{
if (regtype == REG_TYPE_INPUT)
{
const uint32 usage = (token & 0xF);
const uint32 index = ((token >> 16) & 0xF);
reserved_mask = 0x7FF0FFE0;
ctx->dwords[0] = usage;
ctx->dwords[1] = index;
} // if
else
{
unsupported = 1;
} // else
} // else if
else
{
unsupported = 1;
} // else
if (unsupported)
return fail(ctx, "invalid DCL register type for this shader model");
if ((token & reserved_mask) != 0)
return fail(ctx, "reserved bits in DCL dword aren't zero");
return 3;
} // parse_args_DCL
static int parse_args_D(Context *ctx)
{
int retval = 1;
retval += parse_destination_token(ctx, &ctx->dest_arg);
return isfail(ctx) ? FAIL : retval;
} // parse_args_D
static int parse_args_S(Context *ctx)
{
int retval = 1;
retval += parse_source_token(ctx, &ctx->source_args[0]);
return isfail(ctx) ? FAIL : retval;
} // parse_args_S
static int parse_args_SS(Context *ctx)
{
int retval = 1;
retval += parse_source_token(ctx, &ctx->source_args[0]);
retval += parse_source_token(ctx, &ctx->source_args[1]);
return isfail(ctx) ? FAIL : retval;
} // parse_args_SS
static int parse_args_DS(Context *ctx)
{
int retval = 1;
retval += parse_destination_token(ctx, &ctx->dest_arg);
retval += parse_source_token(ctx, &ctx->source_args[0]);
return isfail(ctx) ? FAIL : retval;
} // parse_args_DS
static int parse_args_DSS(Context *ctx)
{
int retval = 1;
retval += parse_destination_token(ctx, &ctx->dest_arg);
retval += parse_source_token(ctx, &ctx->source_args[0]);
retval += parse_source_token(ctx, &ctx->source_args[1]);
return isfail(ctx) ? FAIL : retval;
} // parse_args_DSS
static int parse_args_DSSS(Context *ctx)
{
int retval = 1;
retval += parse_destination_token(ctx, &ctx->dest_arg);
retval += parse_source_token(ctx, &ctx->source_args[0]);
retval += parse_source_token(ctx, &ctx->source_args[1]);
retval += parse_source_token(ctx, &ctx->source_args[2]);
return isfail(ctx) ? FAIL : retval;
} // parse_args_DSSS
static int parse_args_DSSSS(Context *ctx)
{
int retval = 1;
retval += parse_destination_token(ctx, &ctx->dest_arg);
retval += parse_source_token(ctx, &ctx->source_args[0]);
retval += parse_source_token(ctx, &ctx->source_args[1]);
retval += parse_source_token(ctx, &ctx->source_args[2]);
retval += parse_source_token(ctx, &ctx->source_args[3]);
return isfail(ctx) ? FAIL : retval;
} // parse_args_DSSSS
static int parse_args_SINCOS(Context *ctx)
{
// this opcode needs extra registers for sm2 and lower.
if (!shader_version_atleast(ctx, 3, 0))
return parse_args_DSSS(ctx);
return parse_args_DS(ctx);
} // parse_args_SINCOS
static int parse_args_TEXCRD(Context *ctx)
{
// added extra register in ps_1_4.
if (shader_version_atleast(ctx, 1, 4))
return parse_args_DS(ctx);
return parse_args_D(ctx);
} // parse_args_TEXCRD
static int parse_args_TEXLD(Context *ctx)
{
// different registers in px_1_3, ps_1_4, and ps_2_0!
if (shader_version_atleast(ctx, 2, 0))
return parse_args_DSS(ctx);
else if (shader_version_atleast(ctx, 1, 4))
return parse_args_DS(ctx);
return parse_args_D(ctx);
} // parse_args_TEXLD
// State machine functions...
static ConstantsList *alloc_constant_listitem(Context *ctx)
{
ConstantsList *item = (ConstantsList *) Malloc(ctx, sizeof (ConstantsList));
if (item == NULL)
return NULL;
memset(&item->constant, '\0', sizeof (MOJOSHADER_constant));
item->next = ctx->constants;
ctx->constants = item;
ctx->constant_count++;
return item;
} // alloc_constant_listitem
static void state_DEF(Context *ctx)
{
const RegisterType regtype = ctx->dest_arg.regtype;
const int regnum = ctx->dest_arg.regnum;
// !!! FIXME: fail if same register is defined twice.
if (ctx->instruction_count != 0)
fail(ctx, "DEF token must come before any instructions");
else if (regtype != REG_TYPE_CONST)
fail(ctx, "DEF token using invalid register");
else
{
ConstantsList *item = alloc_constant_listitem(ctx);
item->constant.index = regnum;
item->constant.type = MOJOSHADER_UNIFORM_FLOAT;
memcpy(item->constant.value.f, ctx->dwords,
sizeof (item->constant.value.f));
set_defined_register(ctx, regtype, regnum);
} // else
} // state_DEF
static void state_DEFI(Context *ctx)
{
const RegisterType regtype = ctx->dest_arg.regtype;
const int regnum = ctx->dest_arg.regnum;
// !!! FIXME: fail if same register is defined twice.
if (ctx->instruction_count != 0)
fail(ctx, "DEFI token must come before any instructions");
else if (regtype != REG_TYPE_CONSTINT)
fail(ctx, "DEFI token using invalid register");
else
{
ConstantsList *item = alloc_constant_listitem(ctx);
item->constant.index = regnum;
item->constant.type = MOJOSHADER_UNIFORM_INT;
memcpy(item->constant.value.i, ctx->dwords,
sizeof (item->constant.value.i));
set_defined_register(ctx, regtype, regnum);
} // else
} // state_DEFI
static void state_DEFB(Context *ctx)
{
const RegisterType regtype = ctx->dest_arg.regtype;
const int regnum = ctx->dest_arg.regnum;
// !!! FIXME: fail if same register is defined twice.
if (ctx->instruction_count != 0)
fail(ctx, "DEFB token must come before any instructions");
else if (regtype != REG_TYPE_CONSTBOOL)
fail(ctx, "DEFB token using invalid register");
else
{
ConstantsList *item = alloc_constant_listitem(ctx);
item->constant.index = regnum;
item->constant.type = MOJOSHADER_UNIFORM_BOOL;
item->constant.value.b = ctx->dwords[0] ? 1 : 0;
set_defined_register(ctx, regtype, regnum);
} // else
} // state_DEFB
static void state_DCL(Context *ctx)
{
const DestArgInfo *arg = &ctx->dest_arg;
const RegisterType regtype = arg->regtype;
const int regnum = arg->regnum;
const int wmask = arg->writemask;
// parse_args_DCL() does a lot of state checking before we get here.
// !!! FIXME: apparently vs_3_0 can use sampler registers now.
// !!! FIXME: (but only s0 through s3, not all 16 of them.)
if (ctx->instruction_count != 0)
fail(ctx, "DCL token must come before any instructions");
else if (shader_is_vertex(ctx))
{
const MOJOSHADER_usage usage = (const MOJOSHADER_usage) ctx->dwords[0];
const int index = ctx->dwords[1];
if (usage >= MOJOSHADER_USAGE_TOTAL)
{
fail(ctx, "unknown DCL usage");
return;
} // if
add_attribute_register(ctx, regtype, regnum, usage, index, wmask);
} // if
else if (shader_is_pixel(ctx))
{
if (regtype == REG_TYPE_SAMPLER)
add_sampler(ctx, regtype, regnum, (TextureType) ctx->dwords[0]);
else
{
const MOJOSHADER_usage usage = (MOJOSHADER_usage) ctx->dwords[0];
const int index = ctx->dwords[1];
add_attribute_register(ctx, regtype, regnum, usage, index, wmask);
} // else
} // else if
else
{
fail(ctx, "unsupported shader type."); // should be caught elsewhere.
return;
} // else
set_defined_register(ctx, regtype, regnum);
} // state_DCL
static void state_TEXCRD(Context *ctx)
{
if (shader_version_atleast(ctx, 2, 0))
fail(ctx, "TEXCRD in Shader Model >= 2.0"); // apparently removed.
} // state_TEXCRD
static void state_FRC(Context *ctx)
{
const DestArgInfo *dst = &ctx->dest_arg;
if (dst->result_mod & MOD_SATURATE) // according to msdn...
fail(ctx, "FRC destination can't use saturate modifier");
else if (!shader_version_atleast(ctx, 2, 0))
{
if (!writemask_y(dst->writemask) && !writemask_xy(dst->writemask))
fail(ctx, "FRC writemask must be .y or .xy for shader model 1.x");
} // else if
} // state_FRC
// replicate the matrix registers to source args. The D3D profile will
// only use the one legitimate argument, but this saves other profiles
// from having to build this.
static void srcarg_matrix_replicate(Context *ctx, const int idx,
const int rows)
{
int i;
SourceArgInfo *src = &ctx->source_args[idx];
SourceArgInfo *dst = &ctx->source_args[idx+1];
for (i = 0; i < (rows-1); i++, dst++)
{
memcpy(dst, src, sizeof (SourceArgInfo));
dst->regnum += (i + 1);
set_used_register(ctx, dst->regtype, dst->regnum);
} // for
} // srcarg_matrix_replicate
static void state_M4X4(Context *ctx)
{
const DestArgInfo *info = &ctx->dest_arg;
if (!writemask_xyzw(info->writemask))
fail(ctx, "M4X4 writemask must be full");
// !!! FIXME: MSDN:
//The xyzw (default) mask is required for the destination register. Negate and swizzle modifiers are allowed for src0, but not for src1.
//Swizzle and negate modifiers are invalid for the src0 register. The dest and src0 registers cannot be the same.
srcarg_matrix_replicate(ctx, 1, 4);
} // state_M4X4
static void state_M4X3(Context *ctx)
{
const DestArgInfo *info = &ctx->dest_arg;
if (!writemask_xyz(info->writemask))
fail(ctx, "M4X3 writemask must be .xyz");
// !!! FIXME: MSDN stuff
srcarg_matrix_replicate(ctx, 1, 3);
} // state_M4X3
static void state_M3X4(Context *ctx)
{
const DestArgInfo *info = &ctx->dest_arg;
if (!writemask_xyzw(info->writemask))
fail(ctx, "M3X4 writemask must be .xyzw");
// !!! FIXME: MSDN stuff
srcarg_matrix_replicate(ctx, 1, 4);
} // state_M3X4
static void state_M3X3(Context *ctx)
{
const DestArgInfo *info = &ctx->dest_arg;
if (!writemask_xyz(info->writemask))
fail(ctx, "M3X3 writemask must be .xyz");
// !!! FIXME: MSDN stuff
srcarg_matrix_replicate(ctx, 1, 3);
} // state_M3X3
static void state_M3X2(Context *ctx)
{
const DestArgInfo *info = &ctx->dest_arg;
if (!writemask_xy(info->writemask))
fail(ctx, "M3X2 writemask must be .xy");
// !!! FIXME: MSDN stuff
srcarg_matrix_replicate(ctx, 1, 2);
} // state_M3X2
static void state_RET(Context *ctx)
{
// MSDN all but says that assembly shaders are more or less serialized
// HLSL functions, and a RET means you're at the end of one, unlike how
// most CPUs would behave. This is actually really helpful,
// since we can use high-level constructs and not a mess of GOTOs,
// which is a godsend for GLSL...this also means we can consider things
// like a LOOP without a matching ENDLOOP within a label's section as
// an error.
if (ctx->loops > 0)
fail(ctx, "LOOP without ENDLOOP");
if (ctx->reps > 0)
fail(ctx, "REP without ENDREP");
} // state_RET
static int check_label_register(Context *ctx, int arg, const char *opcode)
{
const SourceArgInfo *info = &ctx->source_args[arg];
const RegisterType regtype = info->regtype;
const int regnum = info->regnum;
if (regtype != REG_TYPE_LABEL)
return failf(ctx, "%s with a non-label register specified", opcode);
else if (!shader_version_atleast(ctx, 2, 0))
return failf(ctx, "%s not supported in Shader Model 1", opcode);
else if ((shader_version_atleast(ctx, 2, 255)) && (regnum > 2047))
return failf(ctx, "label register number must be <= 2047");
else if (regnum > 15)
return failf(ctx, "label register number must be <= 15");
return 0;
} // check_label_register
static void state_LABEL(Context *ctx)
{
if (ctx->previous_opcode != OPCODE_RET)
fail(ctx, "LABEL not followed by a RET");
check_label_register(ctx, 0, "LABEL");
set_defined_register(ctx, REG_TYPE_LABEL, ctx->source_args[0].regnum);
} // state_LABEL
static void check_call_loop_wrappage(Context *ctx, const int regnum)
{
// msdn says subroutines inherit aL register if you're in a loop when
// you call, and further more _if you ever call this function in a loop,
// it must always be called in a loop_. So we'll just pass our loop
// variable as a function parameter in those cases.
const int current_usage = (ctx->loops > 0) ? 1 : -1;
RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, regnum);
assert(reg != NULL);
if (reg->misc == 0)
reg->misc = current_usage;
else if (reg->misc != current_usage)
{
if (current_usage == 1)
fail(ctx, "CALL to this label must be wrapped in LOOP/ENDLOOP");
else
fail(ctx, "CALL to this label must not be wrapped in LOOP/ENDLOOP");
} // else if
} // check_call_loop_wrappage
static void state_CALL(Context *ctx)
{
if (check_label_register(ctx, 0, "CALL") != FAIL)
check_call_loop_wrappage(ctx, ctx->source_args[0].regnum);
} // state_CALL
static void state_CALLNZ(Context *ctx)
{
const RegisterType regtype = ctx->source_args[1].regtype;
if ((regtype != REG_TYPE_CONSTBOOL) && (regtype != REG_TYPE_PREDICATE))
fail(ctx, "CALLNZ argument isn't constbool or predicate register");
else if (check_label_register(ctx, 0, "CALLNZ") != FAIL)
check_call_loop_wrappage(ctx, ctx->source_args[0].regnum);
} // state_CALLNZ
static void state_MOVA(Context *ctx)
{
if (ctx->dest_arg.regtype != REG_TYPE_ADDRESS)
fail(ctx, "MOVA argument isn't address register");
} // state_MOVA
static void state_RCP(Context *ctx)
{
if (!replicate_swizzle(ctx->source_args[0].swizzle))
fail(ctx, "RCP without replicate swizzzle");
} // state_RCP
static void state_LOOP(Context *ctx)
{
if (ctx->source_args[0].regtype != REG_TYPE_LOOP)
fail(ctx, "LOOP argument isn't loop register");
else if (ctx->source_args[1].regtype != REG_TYPE_CONSTINT)
fail(ctx, "LOOP argument isn't constint register");
else
ctx->loops++;
} // state_LOOP
static void state_ENDLOOP(Context *ctx)
{
// !!! FIXME: check that we aren't straddling an IF block.
if (ctx->loops <= 0)
fail(ctx, "ENDLOOP without LOOP");
ctx->loops--;
} // state_ENDLOOP
static void state_BREAKP(Context *ctx)
{
const RegisterType regtype = ctx->source_args[0].regtype;
if (regtype != REG_TYPE_PREDICATE)
fail(ctx, "BREAKP argument isn't predicate register");
else if (!replicate_swizzle(ctx->source_args[0].swizzle))
fail(ctx, "BREAKP without replicate swizzzle");
else if ((ctx->loops == 0) && (ctx->reps == 0))
fail(ctx, "BREAKP outside LOOP/ENDLOOP or REP/ENDREP");
} // state_BREAKP
static void state_BREAK(Context *ctx)
{
if ((ctx->loops == 0) && (ctx->reps == 0))
fail(ctx, "BREAK outside LOOP/ENDLOOP or REP/ENDREP");
} // state_BREAK
static void state_SETP(Context *ctx)
{
const RegisterType regtype = ctx->dest_arg.regtype;
if (regtype != REG_TYPE_PREDICATE)
fail(ctx, "SETP argument isn't predicate register");
} // state_SETP
static void state_REP(Context *ctx)
{
const RegisterType regtype = ctx->source_args[0].regtype;
if (regtype != REG_TYPE_CONSTINT)
fail(ctx, "REP argument isn't constint register");
ctx->reps++;
if (ctx->reps > ctx->max_reps)
ctx->max_reps = ctx->reps;
} // state_REP
static void state_ENDREP(Context *ctx)
{
// !!! FIXME: check that we aren't straddling an IF block.
if (ctx->reps <= 0)
fail(ctx, "ENDREP without REP");
ctx->reps--;
} // state_ENDREP
static void state_CMP(Context *ctx)
{
ctx->cmps++;
// extra limitations for ps <= 1.4 ...
if (!shader_version_atleast(ctx, 1, 4))
{
int i;
const DestArgInfo *dst = &ctx->dest_arg;
const RegisterType dregtype = dst->regtype;
const int dregnum = dst->regnum;
if (ctx->cmps > 3)
fail(ctx, "only 3 CMP instructions allowed in this shader model");
for (i = 0; i < 3; i++)
{
const SourceArgInfo *src = &ctx->source_args[i];
const RegisterType sregtype = src->regtype;
const int sregnum = src->regnum;
if ((dregtype == sregtype) && (dregnum == sregnum))
fail(ctx, "CMP dest can't match sources in this shader model");
} // for
ctx->instruction_count++; // takes an extra slot in ps_1_2 and _3.
} // if
} // state_CMP
static void state_DP4(Context *ctx)
{
// extra limitations for ps <= 1.4 ...
if (!shader_version_atleast(ctx, 1, 4))
ctx->instruction_count++; // takes an extra slot in ps_1_2 and _3.
} // state_DP4
static void state_CND(Context *ctx)
{
// apparently it was removed...it's not in the docs past ps_1_4 ...
if (shader_version_atleast(ctx, 2, 0))
fail(ctx, "CND not allowed in this shader model");
// extra limitations for ps <= 1.4 ...
else if (!shader_version_atleast(ctx, 1, 4))
{
const SourceArgInfo *src = &ctx->source_args[0];
if ((src->regtype != REG_TYPE_TEMP) || (src->regnum != 0) ||
(src->swizzle != 0x0000))
{
fail(ctx, "CND src must be r0.a in this shader model");
} // if
} // if
} // state_CND
static void state_POW(Context *ctx)
{
if (!replicate_swizzle(ctx->source_args[0].swizzle))
fail(ctx, "POW src0 must have replicate swizzle");
else if (!replicate_swizzle(ctx->source_args[1].swizzle))
fail(ctx, "POW src1 must have replicate swizzle");
} // state_POW
static void state_LOG(Context *ctx)
{
if (!replicate_swizzle(ctx->source_args[0].swizzle))
fail(ctx, "LOG src0 must have replicate swizzle");
} // state_LOG
static void state_LOGP(Context *ctx)
{
if (!replicate_swizzle(ctx->source_args[0].swizzle))
fail(ctx, "LOGP src0 must have replicate swizzle");
} // state_LOGP
static void state_SINCOS(Context *ctx)
{
const DestArgInfo *dst = &ctx->dest_arg;
const int mask = dst->writemask;
if (!writemask_x(mask) && !writemask_y(mask) && !writemask_xy(mask))
fail(ctx, "SINCOS write mask must be .x or .y or .xy");
else if (!replicate_swizzle(ctx->source_args[0].swizzle))
fail(ctx, "SINCOS src0 must have replicate swizzle");
else if (dst->result_mod & MOD_SATURATE) // according to msdn...
fail(ctx, "SINCOS destination can't use saturate modifier");
// this opcode needs extra registers, with extra limitations, for <= sm2.
else if (!shader_version_atleast(ctx, 3, 0))
{
int i;
for (i = 1; i < 3; i++)
{
if (ctx->source_args[i].regtype != REG_TYPE_CONST)
{
failf(ctx, "SINCOS src%d must be constfloat", i);
return;
} // if
} // for
if (ctx->source_args[1].regnum == ctx->source_args[2].regnum)
fail(ctx, "SINCOS src1 and src2 must be different registers");
} // if
} // state_SINCOS
static void state_IF(Context *ctx)
{
if (ctx->source_args[0].regtype != REG_TYPE_CONSTBOOL)
fail(ctx, "IF src0 must be CONSTBOOL");
// !!! FIXME: track if nesting depth.
} // state_IF
static void state_IFC(Context *ctx)
{
if (!replicate_swizzle(ctx->source_args[0].swizzle))
fail(ctx, "IFC src0 must have replicate swizzle");
else if (!replicate_swizzle(ctx->source_args[1].swizzle))
fail(ctx, "IFC src1 must have replicate swizzle");
// !!! FIXME: track if nesting depth.
} // state_IFC
static void state_BREAKC(Context *ctx)
{
if (!replicate_swizzle(ctx->source_args[0].swizzle))
fail(ctx, "BREAKC src1 must have replicate swizzle");
else if (!replicate_swizzle(ctx->source_args[1].swizzle))
fail(ctx, "BREAKC src2 must have replicate swizzle");
else if ((ctx->loops == 0) && (ctx->reps == 0))
fail(ctx, "BREAKC outside LOOP/ENDLOOP or REP/ENDREP");
} // state_BREAKC
static void state_TEXKILL(Context *ctx)
{
// The MSDN docs say this should be a source arg, but the driver docs
// say it's a dest arg. That's annoying.
const DestArgInfo *info = &ctx->dest_arg;
const RegisterType regtype = info->regtype;
if (!writemask_xyzw(info->writemask))
fail(ctx, "TEXKILL writemask must be .xyzw");
else if ((regtype != REG_TYPE_TEMP) && (regtype != REG_TYPE_TEXTURE))
fail(ctx, "TEXKILL must use a temp or texture register");
// !!! FIXME: "If a temporary register is used, all components must have been previously written."
// !!! FIXME: "If a texture register is used, all components that are read must have been declared."
// !!! FIXME: there are further limitations in ps_1_3 and earlier.
} // state_TEXKILL
static void state_TEXLD(Context *ctx)
{
if (shader_version_atleast(ctx, 2, 0))
{
const SourceArgInfo *src0 = &ctx->source_args[0];
const SourceArgInfo *src1 = &ctx->source_args[1];
//const RegisterType rt0 = src0->regtype;
// !!! FIXME: msdn says it has to be temp, but Microsoft's HLSL
// !!! FIXME: compiler is generating code that uses oC0 for a dest.
//if (ctx->dest_arg.regtype != REG_TYPE_TEMP)
// fail(ctx, "TEXLD dest must be a temp register");
// !!! FIXME: this can be an REG_TYPE_INPUT, DCL'd to TEXCOORD.
//else if ((rt0 != REG_TYPE_TEXTURE) && (rt0 != REG_TYPE_TEMP))
// fail(ctx, "TEXLD src0 must be texture or temp register");
//else
if (src0->src_mod != SRCMOD_NONE)
fail(ctx, "TEXLD src0 must have no modifiers");
else if (src1->regtype != REG_TYPE_SAMPLER)
fail(ctx, "TEXLD src1 must be sampler register");
else if (src1->src_mod != SRCMOD_NONE)
fail(ctx, "TEXLD src0 must have no modifiers");
// Shader Model 3 added swizzle support to this opcode.
if (!shader_version_atleast(ctx, 3, 0))
{
if (!no_swizzle(src0->swizzle))
fail(ctx, "TEXLD src0 must not swizzle");
else if (!no_swizzle(src1->swizzle))
fail(ctx, "TEXLD src1 must not swizzle");
} // if
if ( ((TextureType) ctx->source_args[1].regnum) == TEXTURE_TYPE_CUBE )
ctx->instruction_count += 3;
} // if
// !!! FIXME: checks for ps_1_4 and ps_1_0 versions here...
} // state_TEXLD
static void state_TEXLDL(Context *ctx)
{
if (!shader_version_atleast(ctx, 3, 0))
fail(ctx, "TEXLDL in version < Shader Model 3.0");
else if (ctx->source_args[1].regtype != REG_TYPE_SAMPLER)
fail(ctx, "TEXLDL src1 must be sampler register");
else
{
if ( ((TextureType) ctx->source_args[1].regnum) == TEXTURE_TYPE_CUBE )
ctx->instruction_count += 3;
} // else
} // state_TEXLDL
static void state_DP2ADD(Context *ctx)
{
if (!replicate_swizzle(ctx->source_args[2].swizzle))
fail(ctx, "DP2ADD src2 must have replicate swizzle");
} // state_DP2ADD
// Lookup table for instruction opcodes...
typedef struct
{
const char *opcode_string;
int slots; // number of instruction slots this opcode eats.
MOJOSHADER_shaderType shader_types; // mask of types that can use opcode.
args_function parse_args;
state_function state;
emit_function emitter[STATICARRAYLEN(profiles)];
} Instruction;
// These have to be in the right order! This array is indexed by the value
// of the instruction token.
static const Instruction instructions[] =
{
// INSTRUCTION_STATE means this opcode has to update the state machine
// (we're entering an ELSE block, etc). INSTRUCTION means there's no
// state, just go straight to the emitters.
#define INSTRUCTION_STATE(op, slots, argsseq, t) { \
#op, slots, t, parse_args_##argsseq, state_##op, PROFILE_EMITTERS(op) \
}
#define INSTRUCTION(op, slots, argsseq, t) { \
#op, slots, t, parse_args_##argsseq, 0, PROFILE_EMITTERS(op) \
}
// !!! FIXME: Some of these MOJOSHADER_TYPE_ANYs need to have their scope
// !!! FIXME: reduced to just PIXEL or VERTEX.
INSTRUCTION(NOP, 1, NULL, MOJOSHADER_TYPE_ANY),
INSTRUCTION(MOV, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(ADD, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(SUB, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(MAD, 1, DSSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(MUL, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(RCP, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(RSQ, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(DP3, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(DP4, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(MIN, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(MAX, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(SLT, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(SGE, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(EXP, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(LOG, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(LIT, 3, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(DST, 1, DSS, MOJOSHADER_TYPE_VERTEX),
INSTRUCTION(LRP, 2, DSSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(FRC, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(M4X4, 4, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(M4X3, 3, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(M3X4, 4, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(M3X3, 3, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(M3X2, 2, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(CALL, 2, S, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(CALLNZ, 3, SS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(LOOP, 3, SS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(RET, 1, NULL, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(ENDLOOP, 2, NULL, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(LABEL, 0, S, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(DCL, 0, DCL, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(POW, 3, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(CRS, 2, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(SGN, 3, DSSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(ABS, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(NRM, 3, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(SINCOS, 8, SINCOS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(REP, 3, S, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(ENDREP, 2, NULL, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(IF, 3, S, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(IFC, 3, SS, MOJOSHADER_TYPE_ANY),
INSTRUCTION(ELSE, 1, NULL, MOJOSHADER_TYPE_ANY), // !!! FIXME: state!
INSTRUCTION(ENDIF, 1, NULL, MOJOSHADER_TYPE_ANY), // !!! FIXME: state!
INSTRUCTION_STATE(BREAK, 1, NULL, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(BREAKC, 3, SS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(MOVA, 1, DS, MOJOSHADER_TYPE_VERTEX),
INSTRUCTION_STATE(DEFB, 0, DEFB, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(DEFI, 0, DEF, MOJOSHADER_TYPE_ANY),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION_STATE(TEXCRD, 1, TEXCRD, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION_STATE(TEXKILL, 2, D, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION_STATE(TEXLD, 1, TEXLD, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXBEM, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXBEML, 2, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXREG2AR, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXREG2GB, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXM3X2PAD, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXM3X2TEX, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXM3X3PAD, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXM3X3TEX, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(RESERVED, 0, NULL, MOJOSHADER_TYPE_UNKNOWN),
INSTRUCTION(TEXM3X3SPEC, 1, DSS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXM3X3VSPEC, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(EXPP, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(LOGP, 1, DS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(CND, 1, DSSS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION_STATE(DEF, 0, DEF, MOJOSHADER_TYPE_ANY),
INSTRUCTION(TEXREG2RGB, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXDP3TEX, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXM3X2DEPTH, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXDP3, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXM3X3, 1, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXDEPTH, 1, D, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION_STATE(CMP, 1, DSSS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(BEM, 2, DSS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION_STATE(DP2ADD, 2, DSSS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(DSX, 2, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(DSY, 2, DS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION(TEXLDD, 3, DSSSS, MOJOSHADER_TYPE_PIXEL),
INSTRUCTION_STATE(SETP, 1, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(TEXLDL, 2, DSS, MOJOSHADER_TYPE_ANY),
INSTRUCTION_STATE(BREAKP, 3, S, MOJOSHADER_TYPE_ANY),
// !!! FIXME: TEXLDB?
#undef INSTRUCTION
#undef INSTRUCTION_STATE
};
// parse various token types...
static int parse_instruction_token(Context *ctx)
{
int retval = NOFAIL;
const uint32 *start_tokens = ctx->tokens;
const uint32 start_tokencount = ctx->tokencount;
const uint32 token = SWAP32(*(ctx->tokens));
const uint32 opcode = (token & 0xFFFF);
const uint32 controls = ((token >> 16) & 0xFF);
const uint32 insttoks = ((token >> 24) & 0x0F);
const int coissue = (token & 0x40000000) ? 1 : 0;
const int predicated = (token & 0x10000000) ? 1 : 0;
if ( opcode >= (sizeof (instructions) / sizeof (instructions[0])) )
return 0; // not an instruction token, or just not handled here.
const Instruction *instruction = &instructions[opcode];
const emit_function emitter = instruction->emitter[ctx->profileid];
if ((token & 0x80000000) != 0)
return fail(ctx, "instruction token high bit must be zero."); // so says msdn.
if (coissue)
{
if (!shader_is_pixel(ctx))
return fail(ctx, "coissue instruction on non-pixel shader");
else if (shader_version_atleast(ctx, 2, 0))
return fail(ctx, "coissue instruction in Shader Model >= 2.0");
// !!! FIXME: I'm not sure what this actually means, yet.
return fail(ctx, "coissue instructions unsupported");
} // if
if ((ctx->shader_type & instruction->shader_types) == 0)
{
return failf(ctx, "opcode '%s' not available in this shader type.",
instruction->opcode_string);
} // if
memset(ctx->dwords, '\0', sizeof (ctx->dwords));
ctx->instruction_controls = controls;
ctx->predicated = predicated;
// Update the context with instruction's arguments.
ctx->tokens++;
ctx->tokencount--;
retval = instruction->parse_args(ctx);
assert((isfail(ctx)) || (retval >= 0));
if ( (!isfail(ctx)) && (predicated) )
{
if (parse_predicated_token(ctx) != FAIL)
retval++; // one more token.
} // if
// parse_args() moves these forward for convenience...reset them.
ctx->tokens = start_tokens;
ctx->tokencount = start_tokencount;
if (!isfail(ctx))
{
if (instruction->state != NULL)
instruction->state(ctx);
} // if
ctx->instruction_count += instruction->slots;
if (isfail(ctx))
retval = FAIL;
else
emitter(ctx); // call the profile's emitter.
ctx->previous_opcode = opcode;
ctx->scratch_registers = 0; // reset after every instruction.
if (!isfail(ctx))
{
if (!shader_version_atleast(ctx, 2, 0))
{
if (insttoks != 0) // reserved field in shaders < 2.0 ...
return fail(ctx, "instruction token count must be zero");
} // if
else
{
if (retval != (insttoks+1))
{
return failf(ctx,
"wrong token count (%u, not %u) for opcode '%s'.",
(uint) retval, (uint) (insttoks+1),
instruction->opcode_string);
} // if
} // else
} // if
return retval;
} // parse_instruction_token
static int parse_version_token(Context *ctx, const char *profilestr)
{
if (isfail(ctx)) // catch preexisting errors here.
return FAIL;
if (ctx->tokencount == 0)
return fail(ctx, "Expected version token, got none at all.");
const uint32 token = SWAP32(*(ctx->tokens));
const uint32 shadertype = ((token >> 16) & 0xFFFF);
const uint8 major = (uint8) ((token >> 8) & 0xFF);
const uint8 minor = (uint8) (token & 0xFF);
ctx->version_token = token;
// 0xFFFF == pixel shader, 0xFFFE == vertex shader
if (shadertype == 0xFFFF)
{
ctx->shader_type = MOJOSHADER_TYPE_PIXEL;
ctx->shader_type_str = "ps";
} // if
else if (shadertype == 0xFFFE)
{
ctx->shader_type = MOJOSHADER_TYPE_VERTEX;
ctx->shader_type_str = "vs";
} // else if
else // geometry shader? Bogus data?
{
return fail(ctx, "Unsupported shader type or not a shader at all");
} // else
ctx->major_ver = major;
ctx->minor_ver = minor;
if (!shader_version_supported(major, minor))
{
return failf(ctx, "Shader Model %u.%u is currently unsupported.",
(uint) major, (uint) minor);
} // if
ctx->profile->start_emitter(ctx, profilestr);
return 1; // ate one token.
} // parse_version_token
// Microsoft's tools add a CTAB comment to all shaders. This is the
// "constant table," or specifically: D3DXSHADER_CONSTANTTABLE:
// http://msdn.microsoft.com/en-us/library/bb205440(VS.85).aspx
// This may tell us high-level truths about an otherwise generic low-level
// registers, for instance, how large an array actually is, etc.
static void parse_constant_table(Context *ctx, const uint32 bytes)
{
const uint8 *start = (uint8 *) &ctx->tokens[2];
const uint32 id = SWAP32(ctx->tokens[1]);
const uint32 size = SWAP32(ctx->tokens[2]);
const uint32 creator = SWAP32(ctx->tokens[3]);
const uint32 version = SWAP32(ctx->tokens[4]);
const uint32 constants = SWAP32(ctx->tokens[5]);
const uint32 constantinfo = SWAP32(ctx->tokens[6]);
const uint32 target = SWAP32(ctx->tokens[8]);
uint32 i = 0;
if (id != 0x42415443) // 0x42415443 == 'CTAB'
return; // not the constant table.
if (size != 28)
return; // only handle this version of the struct.
if (version != ctx->version_token) goto corrupt_ctab;
if (creator >= bytes) goto corrupt_ctab;
if ((constantinfo + (constants * 20)) >= bytes) goto corrupt_ctab;
if (target >= bytes) goto corrupt_ctab;
ctx->have_ctab = 1;
for (i = 0; i < constants; i++)
{
// we only care about deciding which variables might be arrays at
// the moment, but there's lots of other good info in the CTAB.
const uint8 *ptr = start + constantinfo + (i * 20);
const uint32 name = SWAP32(*((uint32 *) (ptr + 0)));
const uint16 regset = SWAP16(*((uint16 *) (ptr + 4)));
const uint16 regidx = SWAP16(*((uint16 *) (ptr + 6)));
const uint16 regcnt = SWAP16(*((uint16 *) (ptr + 8)));
const uint32 typeinf = SWAP32(*((uint32 *) (ptr + 12)));
const uint32 defval = SWAP32(*((uint32 *) (ptr + 16)));
MOJOSHADER_uniformType mojotype = MOJOSHADER_UNIFORM_UNKNOWN;
if (name >= bytes) goto corrupt_ctab;
if ((typeinf + 16) >= bytes) goto corrupt_ctab;
if (defval >= bytes) goto corrupt_ctab;
switch (regset)
{
case 0: mojotype = MOJOSHADER_UNIFORM_BOOL; break;
case 1: mojotype = MOJOSHADER_UNIFORM_INT; break;
case 2: mojotype = MOJOSHADER_UNIFORM_FLOAT; break;
case 3: /* SAMPLER */ break;
default: goto corrupt_ctab;
} // switch
if (mojotype != MOJOSHADER_UNIFORM_UNKNOWN)
{
VariableList *item;
item = (VariableList *) Malloc(ctx, sizeof (VariableList));
if (item != NULL)
{
item->type = mojotype;
item->index = regidx;
item->count = regcnt;
item->constant = NULL;
item->used = 0;
item->next = ctx->variables;
ctx->variables = item;
} // if
} // if
} // for
return;
corrupt_ctab:
fail(ctx, "Shader has corrupt CTAB data");
} // parse_constant_table
static int parse_comment_token(Context *ctx)
{
const uint32 token = SWAP32(*(ctx->tokens));
if ((token & 0xFFFF) != 0xFFFE)
return 0; // not a comment token.
else if ((token & 0x80000000) != 0)
return fail(ctx, "comment token high bit must be zero."); // so says msdn.
else
{
const uint32 commenttoks = ((token >> 16) & 0xFFFF);
if ((commenttoks >= 8) && (commenttoks < ctx->tokencount))
parse_constant_table(ctx, commenttoks * 4);
return commenttoks + 1; // comment data plus the initial token.
} // else
// shouldn't hit this.
return failf(ctx, "Logic error at %s:%d", __FILE__, __LINE__);
} // parse_comment_token
static int parse_end_token(Context *ctx)
{
if (SWAP32(*(ctx->tokens)) != 0x0000FFFF) // end token always 0x0000FFFF.
return 0; // not us, eat no tokens.
if (ctx->tokencount != 1) // we _must_ be last. If not: fail.
return fail(ctx, "end token before end of stream");
ctx->profile->end_emitter(ctx);
return END_OF_STREAM;
} // parse_end_token
static int parse_phase_token(Context *ctx)
{
if (SWAP32(*(ctx->tokens)) != 0x0000FFFD) // phase token always 0x0000FFFD.
return 0; // not us, eat no tokens.
else if ( (!shader_is_pixel(ctx)) || (!shader_version_exactly(ctx, 1, 4)) )
return fail(ctx, "phase token only available in 1.4 pixel shaders");
ctx->profile->phase_emitter(ctx);
return 1;
} // parse_phase_token
static int parse_token(Context *ctx)
{
int rc = 0;
if (isfail(ctx))
return FAIL; // just in case...catch previously unhandled fails here.
if (ctx->output_stack_len != 0)
return fail(ctx, "BUG: output stack isn't empty on new token!");
if (ctx->tokencount == 0)
return fail(ctx, "unexpected end of shader.");
if ((rc = parse_comment_token(ctx)) != 0)
return rc;
if ((rc = parse_end_token(ctx)) != 0)
return rc;
if ((rc = parse_phase_token(ctx)) != 0)
return rc;
if ((rc = parse_instruction_token(ctx)) != 0)
return rc;
return failf(ctx, "unknown token (%u)", (uint) *ctx->tokens);
} // parse_token
// #define this to force app to supply an allocator, so there's no reference
// to the C runtime's malloc() and free()...
#if MOJOSHADER_FORCE_ALLOCATOR
#define internal_malloc NULL
#define internal_free NULL
#else
static void *internal_malloc(int bytes, void *d) { return malloc(bytes); }
static void internal_free(void *ptr, void *d) { free(ptr); }
#endif
static int find_profile_id(const char *profile)
{
int i;
for (i = 0; i < STATICARRAYLEN(profileMap); i++)
{
const char *name = profileMap[i].from;
if (strcmp(name, profile) == 0)
{
profile = profileMap[i].to;
break;
} // if
} // for
for (i = 0; i < STATICARRAYLEN(profiles); i++)
{
const char *name = profiles[i].name;
if (strcmp(name, profile) == 0)
return i;
} // for
return -1; // no match.
} // find_profile_id
static Context *build_context(const char *profile,
const unsigned char *tokenbuf,
const unsigned int bufsize,
MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
{
if (m == NULL) m = internal_malloc;
if (f == NULL) f = internal_free;
Context *ctx = (Context *) m(sizeof (Context), d);
if (ctx == NULL)
return NULL;
memset(ctx, '\0', sizeof (Context));
ctx->malloc = m;
ctx->free = f;
ctx->malloc_data = d;
ctx->tokens = (const uint32 *) tokenbuf;
ctx->tokencount = bufsize / sizeof (uint32);
ctx->endline = endline_str;
ctx->endline_len = strlen(ctx->endline);
ctx->globals.tail = &ctx->globals.head;
ctx->helpers.tail = &ctx->helpers.head;
ctx->subroutines.tail = &ctx->subroutines.head;
ctx->mainline_intro.tail = &ctx->mainline_intro.head;
ctx->mainline.tail = &ctx->mainline.head;
ctx->ignore.tail = &ctx->ignore.head;
ctx->output = &ctx->mainline;
ctx->last_address_reg_component = -1;
const int profileid = find_profile_id(profile);
ctx->profileid = profileid;
if (profileid >= 0)
ctx->profile = &profiles[profileid];
else
failf(ctx, "Profile '%s' is unknown or unsupported", profile);
return ctx;
} // build_context
static void free_output_list(MOJOSHADER_free f, void *d, OutputListNode *item)
{
while (item != NULL)
{
OutputListNode *next = item->next;
if (item->str != NULL)
f(item->str, d);
f(item, d);
item = next;
} // while
} // free_output_list
static void free_constants_list(MOJOSHADER_free f, void *d, ConstantsList *item)
{
while (item != NULL)
{
ConstantsList *next = item->next;
f(item, d);
item = next;
} // while
} // free_constants_list
static void free_variable_list(MOJOSHADER_free f, void *d, VariableList *item)
{
while (item != NULL)
{
VariableList *next = item->next;
f(item, d);
item = next;
} // while
} // free_variable_list
static void destroy_context(Context *ctx)
{
if (ctx != NULL)
{
MOJOSHADER_free f = ((ctx->free != NULL) ? ctx->free : internal_free);
void *d = ctx->malloc_data;
if (ctx->output_bytes != NULL)
f(d, ctx->output_bytes);
free_output_list(f, d, ctx->globals.head.next);
free_output_list(f, d, ctx->helpers.head.next);
free_output_list(f, d, ctx->subroutines.head.next);
free_output_list(f, d, ctx->mainline_intro.head.next);
free_output_list(f, d, ctx->mainline.head.next);
free_output_list(f, d, ctx->ignore.head.next);
free_constants_list(f, d, ctx->constants);
free_reglist(f, d, ctx->used_registers.next);
free_reglist(f, d, ctx->defined_registers.next);
free_reglist(f, d, ctx->uniforms.next);
free_reglist(f, d, ctx->attributes.next);
free_reglist(f, d, ctx->samplers.next);
free_variable_list(f, d, ctx->variables);
if ((ctx->failstr != NULL) && (ctx->failstr != out_of_mem_str))
f((void *) ctx->failstr, d);
f(ctx, d);
} // if
} // destroy_context
static void append_list(char **_wptr, const char *endline,
const size_t endline_len, OutputListNode *item)
{
char *wptr = *_wptr;
while (item != NULL)
{
const size_t len = strlen(item->str);
memcpy(wptr, item->str, len);
wptr += len;
memcpy(wptr, endline, endline_len);
wptr += endline_len;
item = item->next;
} // while
*wptr = '\0';
*_wptr = wptr;
} // append_list
static char *build_output(Context *ctx)
{
// add a byte for the null terminator if we're doing text output.
const int plusbytes = (ctx->output_bytes == NULL) ? 1 : 0;
char *retval = (char *) Malloc(ctx, ctx->output_len + plusbytes);
if (retval != NULL)
{
const char *endl = ctx->endline;
const size_t endllen = ctx->endline_len;
char *wptr = retval;
if (ctx->output_bytes != NULL)
memcpy(retval, ctx->output_bytes, ctx->output_len);
else
{
append_list(&wptr, endl, endllen, ctx->globals.head.next);
append_list(&wptr, endl, endllen, ctx->helpers.head.next);
append_list(&wptr, endl, endllen, ctx->subroutines.head.next);
append_list(&wptr, endl, endllen, ctx->mainline_intro.head.next);
append_list(&wptr, endl, endllen, ctx->mainline.head.next);
// don't append ctx->ignore ... that's why it's called "ignore"
} // else
} // if
return retval;
} // build_output
static char *alloc_varname(Context *ctx, const RegisterList *reg)
{
const char *varname = ctx->profile->get_varname(ctx, reg->regtype,
reg->regnum);
const size_t len = strlen(varname) + 1;
char *retval = (char *) Malloc(ctx, len);
if (retval != NULL)
strcpy(retval, varname);
return retval;
} // alloc_varname
static MOJOSHADER_uniform *build_uniforms(Context *ctx)
{
const size_t len = sizeof (MOJOSHADER_uniform) * ctx->uniform_count;
MOJOSHADER_uniform *retval = (MOJOSHADER_uniform *) Malloc(ctx, len);
if (retval != NULL)
{
MOJOSHADER_uniform *wptr = retval;
memset(wptr, '\0', len);
VariableList *var;
int written = 0;
for (var = ctx->variables; var != NULL; var = var->next)
{
if ((!var->constant) && (var->used))
{
const char *name = ctx->profile->get_const_array_varname(ctx,
var->index, var->count);
char *namecpy = (char *) Malloc(ctx, strlen(name) + 1);
if (namecpy != NULL)
{
strcpy(namecpy, name);
wptr->type = MOJOSHADER_UNIFORM_FLOAT;
wptr->index = var->index;
wptr->array_count = var->count;
wptr->name = namecpy;
wptr++;
written++;
} // if
} // if
} // for
RegisterList *item = ctx->uniforms.next;
MOJOSHADER_uniformType type = MOJOSHADER_UNIFORM_FLOAT;
while (written < ctx->uniform_count)
{
int skip = 0;
if (item == NULL)
{
fail(ctx, "BUG: mismatched uniform list and count");
break;
} // if
int index = item->regnum;
switch (item->regtype)
{
case REG_TYPE_CONST:
skip = (item->array != NULL);
type = MOJOSHADER_UNIFORM_FLOAT;
break;
case REG_TYPE_CONSTINT:
type = MOJOSHADER_UNIFORM_INT;
break;
case REG_TYPE_CONSTBOOL:
type = MOJOSHADER_UNIFORM_BOOL;
break;
default:
fail(ctx, "unknown uniform datatype");
break;
} // switch
if (!skip)
{
wptr->type = type;
wptr->index = index;
wptr->array_count = 0;
wptr->name = alloc_varname(ctx, item);
wptr++;
written++;
} // if
item = item->next;
} // for
} // if
return retval;
} // build_uniforms
static MOJOSHADER_constant *build_constants(Context *ctx)
{
const size_t len = sizeof (MOJOSHADER_constant) * ctx->constant_count;
MOJOSHADER_constant *retval = (MOJOSHADER_constant *) Malloc(ctx, len);
if (retval != NULL)
{
ConstantsList *item = ctx->constants;
int i;
for (i = 0; i < ctx->constant_count; i++)
{
if (item == NULL)
{
fail(ctx, "BUG: mismatched constant list and count");
break;
} // if
memcpy(&retval[i], &item->constant, sizeof (MOJOSHADER_constant));
item = item->next;
} // for
} // if
return retval;
} // build_constants
static MOJOSHADER_sampler *build_samplers(Context *ctx)
{
const size_t len = sizeof (MOJOSHADER_sampler) * ctx->sampler_count;
MOJOSHADER_sampler *retval = (MOJOSHADER_sampler *) Malloc(ctx, len);
if (retval != NULL)
{
RegisterList *item = ctx->samplers.next;
MOJOSHADER_samplerType type = MOJOSHADER_SAMPLER_2D;
int i;
memset(retval, '\0', len);
for (i = 0; i < ctx->sampler_count; i++)
{
if (item == NULL)
{
fail(ctx, "BUG: mismatched sampler list and count");
break;
} // if
assert(item->regtype == REG_TYPE_SAMPLER);
switch ((const TextureType) item->index)
{
case TEXTURE_TYPE_2D:
type = MOJOSHADER_SAMPLER_2D;
break;
case TEXTURE_TYPE_CUBE:
type = MOJOSHADER_SAMPLER_CUBE;
break;
case TEXTURE_TYPE_VOLUME:
type = MOJOSHADER_SAMPLER_VOLUME;
break;
default:
fail(ctx, "Unknown sampler type");
break;
} // switch
retval[i].type = type;
retval[i].index = item->regnum;
retval[i].name = alloc_varname(ctx, item);
item = item->next;
} // for
} // if
return retval;
} // build_samplers
static MOJOSHADER_attribute *build_attributes(Context *ctx, int *_count)
{
int count = 0;
if (ctx->attribute_count == 0)
{
*_count = 0;
return NULL; // nothing to do.
} // if
const size_t len = sizeof (MOJOSHADER_attribute) * ctx->attribute_count;
MOJOSHADER_attribute *retval = (MOJOSHADER_attribute *) Malloc(ctx, len);
if (retval != NULL)
{
RegisterList *item = ctx->attributes.next;
MOJOSHADER_attribute *wptr = retval;
int ignore = 0;
int i;
memset(retval, '\0', len);
for (i = 0; i < ctx->attribute_count; i++)
{
if (item == NULL)
{
fail(ctx, "BUG: mismatched attribute list and count");
break;
} // if
switch (item->regtype)
{
case REG_TYPE_RASTOUT:
case REG_TYPE_ATTROUT:
case REG_TYPE_TEXCRDOUT:
case REG_TYPE_COLOROUT:
case REG_TYPE_DEPTHOUT:
ignore = 1;
break;
case REG_TYPE_TEXTURE:
case REG_TYPE_MISCTYPE:
case REG_TYPE_INPUT:
ignore = shader_is_pixel(ctx);
break;
default:
ignore = 0;
break;
} // switch
if (!ignore)
{
if (shader_is_pixel(ctx))
fail(ctx, "BUG: pixel shader with vertex attributes");
else
{
wptr->usage = item->usage;
wptr->index = item->index;
wptr->name = alloc_varname(ctx, item);
wptr++;
count++;
} // else
} // if
item = item->next;
} // for
} // if
*_count = count;
return retval;
} // build_attributes
static MOJOSHADER_parseData *build_parsedata(Context *ctx)
{
char *output = NULL;
MOJOSHADER_constant *constants = NULL;
MOJOSHADER_uniform *uniforms = NULL;
MOJOSHADER_attribute *attributes = NULL;
MOJOSHADER_sampler *samplers = NULL;
MOJOSHADER_parseData *retval = NULL;
int attribute_count = 0;
retval = (MOJOSHADER_parseData*) Malloc(ctx, sizeof(MOJOSHADER_parseData));
if (retval == NULL)
return &out_of_mem_data;
memset(retval, '\0', sizeof (MOJOSHADER_parseData));
if (!isfail(ctx))
output = build_output(ctx);
if (!isfail(ctx))
constants = build_constants(ctx);
if (!isfail(ctx))
uniforms = build_uniforms(ctx);
if (!isfail(ctx))
attributes = build_attributes(ctx, &attribute_count);
if (!isfail(ctx))
samplers = build_samplers(ctx);
// check again, in case build_output ran out of memory.
if (isfail(ctx))
{
int i;
Free(ctx, output);
Free(ctx, constants);
if (uniforms != NULL)
{
for (i = 0; i < ctx->uniform_count; i++)
Free(ctx, (void *) uniforms[i].name);
Free(ctx, uniforms);
} // if
if (attributes != NULL)
{
for (i = 0; i < attribute_count; i++)
Free(ctx, (void *) attributes[i].name);
Free(ctx, attributes);
} // if
if (samplers != NULL)
{
for (i = 0; i < ctx->sampler_count; i++)
Free(ctx, (void *) samplers[i].name);
Free(ctx, samplers);
} // if
retval->error = ctx->failstr; // we recycle. :)
ctx->failstr = NULL; // don't let this get free()'d too soon.
} // if
else
{
retval->profile = ctx->profile->name;
retval->output = output;
retval->output_len = ctx->output_len;
retval->instruction_count = ctx->instruction_count;
retval->shader_type = ctx->shader_type;
retval->major_ver = (int) ctx->major_ver;
retval->minor_ver = (int) ctx->minor_ver;
retval->uniform_count = ctx->uniform_count;
retval->uniforms = uniforms;
retval->constant_count = ctx->constant_count;
retval->constants = constants;
retval->attribute_count = attribute_count;
retval->attributes = attributes;
retval->sampler_count = ctx->sampler_count;
retval->samplers = samplers;
} // else
retval->malloc = (ctx->malloc == internal_malloc) ? NULL : ctx->malloc;
retval->free = (ctx->free == internal_free) ? NULL : ctx->free;
retval->malloc_data = ctx->malloc_data;
return retval;
} // build_parsedata
static void process_definitions(Context *ctx)
{
// !!! FIXME: apparently, pre ps_3_0, sampler registers don't need to be
// !!! FIXME: DCL'd before use (default to 2d?). We aren't checking
// !!! FIXME: this at the moment, though.
determine_constants_arrays(ctx); // in case this hasn't been called yet.
RegisterList *uitem = &ctx->uniforms;
RegisterList *prev = &ctx->used_registers;
RegisterList *item = prev->next;
while (item != NULL)
{
RegisterList *next = item->next;
const RegisterType regtype = item->regtype;
const int regnum = item->regnum;
if (!get_defined_register(ctx, regtype, regnum))
{
// haven't already dealt with this one.
switch (regtype)
{
// !!! FIXME: I'm not entirely sure this is right...
case REG_TYPE_RASTOUT:
case REG_TYPE_ATTROUT:
case REG_TYPE_TEXCRDOUT:
case REG_TYPE_COLOROUT:
case REG_TYPE_DEPTHOUT:
if (shader_is_vertex(ctx)&&shader_version_atleast(ctx,3,0))
{
fail(ctx, "vs_3 can't use output registers"
" without declaring them first.");
return;
} // if
// Apparently this is an attribute that wasn't DCL'd.
// Add it to the attribute list; deal with it later.
add_attribute_register(ctx, item->regtype, item->regnum,
MOJOSHADER_USAGE_UNKNOWN, 0, 0xF);
break;
case REG_TYPE_ADDRESS:
case REG_TYPE_PREDICATE:
case REG_TYPE_TEMP:
case REG_TYPE_LOOP:
case REG_TYPE_LABEL:
ctx->profile->global_emitter(ctx, regtype, regnum);
break;
case REG_TYPE_CONST:
case REG_TYPE_CONSTINT:
case REG_TYPE_CONSTBOOL:
// separate uniforms into a different list for now.
prev->next = next;
item->next = NULL;
uitem->next = item;
uitem = item;
item = prev;
break;
default:
fail(ctx, "BUG: we used a register we don't know how to define.");
} // switch
} // if
prev = item;
item = next;
} // while
// okay, now deal with uniform/constant arrays...
VariableList *var;
for (var = ctx->variables; var != NULL; var = var->next)
{
if (var->used)
{
if (var->constant)
{
ctx->profile->const_array_emitter(ctx, var->constant,
var->index, var->count);
} // if
else
{
ctx->profile->array_emitter(ctx, var->index, var->count);
ctx->uniform_count++;
} // else
} // if
} // for
// ...and uniforms...
for (item = ctx->uniforms.next; item != NULL; item = item->next)
{
int arraybase = -1;
int arraysize = -1;
// check if this is a register contained in an array...
if (item->regtype == REG_TYPE_CONST)
{
for (var = ctx->variables; var != NULL; var = var->next)
{
if (!var->used)
continue;
const int regnum = item->regnum;
const int lo = var->index;
if ( (regnum >= lo) && (regnum < (lo + var->count)) )
{
assert(!var->constant);
item->array = var; // used when building parseData.
arraybase = lo;
arraysize = var->count;
break;
} // if
} // for
} // if
if (arraysize < 0) // not part of an array?
ctx->uniform_count++;
ctx->profile->uniform_emitter(ctx, item->regtype, item->regnum,
arraybase, arraysize);
} // for
// ...and samplers...
for (item = ctx->samplers.next; item != NULL; item = item->next)
{
ctx->sampler_count++;
ctx->profile->sampler_emitter(ctx, item->regnum,
(TextureType) item->index);
} // for
// ...and attributes... (find input POSITION0 here, so it's always first).
for (item = ctx->attributes.next; item != NULL; item = item->next)
{
if ( (item->regtype == REG_TYPE_INPUT) &&
(item->usage == MOJOSHADER_USAGE_POSITION) && (item->index == 0) )
{
ctx->attribute_count++;
ctx->profile->attribute_emitter(ctx, item->regtype, item->regnum,
MOJOSHADER_USAGE_POSITION, 0,
item->writemask);
break;
} // if
} // for
// ...and attributes... (everything but input POSITION0).
for (item = ctx->attributes.next; item != NULL; item = item->next)
{
if ( (item->regtype != REG_TYPE_INPUT) ||
(item->usage != MOJOSHADER_USAGE_POSITION) || (item->index != 0) )
{
ctx->attribute_count++;
ctx->profile->attribute_emitter(ctx, item->regtype, item->regnum,
item->usage, item->index,
item->writemask);
} // if
} // for
} // process_definitions
// API entry point...
const MOJOSHADER_parseData *MOJOSHADER_parse(const char *profile,
const unsigned char *tokenbuf,
const unsigned int bufsize,
MOJOSHADER_malloc m,
MOJOSHADER_free f, void *d)
{
MOJOSHADER_parseData *retval = NULL;
Context *ctx = NULL;
int rc = FAIL;
if ( ((m == NULL) && (f != NULL)) || ((m != NULL) && (f == NULL)) )
return &out_of_mem_data; // supply both or neither.
if ((ctx = build_context(profile, tokenbuf, bufsize, m, f, d)) == NULL)
return &out_of_mem_data;
// Version token always comes first.
rc = parse_version_token(ctx, profile);
// parse out the rest of the tokens after the version token...
while ( (rc > 0) && (!isfail(ctx)) )
{
// reset for every token, and consider an error if it ever overflows!
ctx->scratchidx = 0;
if ( ((uint32) rc) > ctx->tokencount )
fail(ctx, "Corrupted or truncated shader");
else
{
ctx->tokens += rc;
ctx->tokencount -= rc;
rc = parse_token(ctx);
} // else
} // while
if (!isfail(ctx))
process_definitions(ctx);
if (!isfail(ctx))
ctx->profile->finalize_emitter(ctx);
retval = build_parsedata(ctx);
destroy_context(ctx);
return retval;
} // MOJOSHADER_parse
void MOJOSHADER_freeParseData(const MOJOSHADER_parseData *_data)
{
MOJOSHADER_parseData *data = (MOJOSHADER_parseData *) _data;
if ((data == NULL) || (data == &out_of_mem_data))
return; // no-op.
MOJOSHADER_free f = (data->free == NULL) ? internal_free : data->free;
void *d = data->malloc_data;
int i;
// we don't f(data->profile), because that's internal static data.
if (data->output != NULL) // check for NULL in case of dumb free() impl.
f((void *) data->output, d);
if (data->constants != NULL)
f((void *) data->constants, d);
if (data->uniforms != NULL)
{
for (i = 0; i < data->uniform_count; i++)
{
if (data->uniforms[i].name != NULL)
f((void *) data->uniforms[i].name, d);
} // for
f((void *) data->uniforms, d);
} // if
if (data->attributes != NULL)
{
for (i = 0; i < data->attribute_count; i++)
{
if (data->attributes[i].name != NULL)
f((void *) data->attributes[i].name, d);
} // for
f((void *) data->attributes, d);
} // if
if (data->samplers != NULL)
{
for (i = 0; i < data->sampler_count; i++)
{
if (data->samplers[i].name != NULL)
f((void *) data->samplers[i].name, d);
} // for
f((void *) data->samplers, d);
} // if
if ((data->error != NULL) && (data->error != out_of_mem_str))
f((void *) data->error, d);
f(data, d);
} // MOJOSHADER_freeParseData
int MOJOSHADER_version(void)
{
return MOJOSHADER_VERSION;
} // MOJOSHADER_version
// end of mojoshader.c ...