From 6766661ca78623de9c92243764f9a6bc1eefe9df Mon Sep 17 00:00:00 2001 From: Caleb Cornett Date: Thu, 21 May 2020 12:00:55 -0400 Subject: [PATCH] Implement HLSL emitter, MOJOSHADER_d3d11 API --- CMakeLists.txt | 6 + mojoshader.c | 14 + mojoshader.h | 208 +++ mojoshader_d3d11.c | 912 +++++++++++ mojoshader_internal.h | 4 + profiles/mojoshader_profile.h | 6 +- profiles/mojoshader_profile_glsl.c | 6 +- profiles/mojoshader_profile_hlsl.c | 2271 ++++++++++++++++++++++++++++ 8 files changed, 3423 insertions(+), 4 deletions(-) create mode 100644 mojoshader_d3d11.c create mode 100644 profiles/mojoshader_profile_hlsl.c diff --git a/CMakeLists.txt b/CMakeLists.txt index dd77646d..219d099f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,7 @@ PROJECT(MojoShader) OPTION(BUILD_SHARED_LIBS "Build MojoShader as a shared library" OFF) OPTION(PROFILE_D3D "Build MojoShader with support for the D3D profile" ON) OPTION(PROFILE_BYTECODE "Build MojoShader with support for the BYTECODE profile" ON) +OPTION(PROFILE_HLSL "Build MojoShader with support for the HLSL profile" ON) OPTION(PROFILE_GLSL120 "Build MojoShader with support for the GLSL120 profile" ON) OPTION(PROFILE_GLSLES "Build MojoShader with support for the GLSLES profile" ON) OPTION(PROFILE_GLSL "Build MojoShader with support for the GLSL profile" ON) @@ -108,6 +109,9 @@ ENDIF(NOT PROFILE_D3D) IF(NOT PROFILE_BYTECODE) ADD_DEFINITIONS(-DSUPPORT_PROFILE_BYTECODE=0) ENDIF(NOT PROFILE_BYTECODE) +IF(NOT PROFILE_HLSL) + ADD_DEFINITIONS(-DSUPPORT_PROFILE_HLSL=0) +ENDIF(NOT PROFILE_HLSL) IF(NOT PROFILE_GLSL120) ADD_DEFINITIONS(-DSUPPORT_PROFILE_GLSL120=0) ENDIF(NOT PROFILE_GLSL120) @@ -159,9 +163,11 @@ ADD_LIBRARY(mojoshader mojoshader_common.c mojoshader_opengl.c mojoshader_metal.c + mojoshader_d3d11.c profiles/mojoshader_profile_arb1.c profiles/mojoshader_profile_bytecode.c profiles/mojoshader_profile_d3d.c + profiles/mojoshader_profile_hlsl.c profiles/mojoshader_profile_glsl.c profiles/mojoshader_profile_metal.c profiles/mojoshader_profile_spirv.c diff --git a/mojoshader.c b/mojoshader.c index 1055667b..07ca810b 100644 --- a/mojoshader.c +++ b/mojoshader.c @@ -236,6 +236,15 @@ PREDECLARE_PROFILE(BYTECODE) PREDECLARE_PROFILE(D3D) #endif +#if !SUPPORT_PROFILE_HLSL +#define PROFILE_EMITTER_HLSL(op) +#else +#undef AT_LEAST_ONE_PROFILE +#define AT_LEAST_ONE_PROFILE 1 +#define PROFILE_EMITTER_HLSL(op) emit_HLSL_##op, +PREDECLARE_PROFILE(HLSL) +#endif + #if !SUPPORT_PROFILE_GLSL #define PROFILE_EMITTER_GLSL(op) #else @@ -300,6 +309,9 @@ static const Profile profiles[] = #if SUPPORT_PROFILE_BYTECODE DEFINE_PROFILE(BYTECODE) #endif +#if SUPPORT_PROFILE_HLSL + DEFINE_PROFILE(HLSL) +#endif #if SUPPORT_PROFILE_GLSL DEFINE_PROFILE(GLSL) #endif @@ -331,6 +343,7 @@ static const struct { const char *from; const char *to; } profileMap[] = #define PROFILE_EMITTERS(op) { \ PROFILE_EMITTER_D3D(op) \ PROFILE_EMITTER_BYTECODE(op) \ + PROFILE_EMITTER_HLSL(op) \ PROFILE_EMITTER_GLSL(op) \ PROFILE_EMITTER_ARB1(op) \ PROFILE_EMITTER_METAL(op) \ @@ -3866,6 +3879,7 @@ int MOJOSHADER_maxShaderModel(const char *profile) #define PROFILE_SHADER_MODEL(p,v) if (strcmp(profile, p) == 0) return v; PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_D3D, 3); PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_BYTECODE, 3); + PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_HLSL, 3); PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSL, 3); PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSL120, 3); PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSLES, 3); diff --git a/mojoshader.h b/mojoshader.h index a0804adb..3b402cf1 100644 --- a/mojoshader.h +++ b/mojoshader.h @@ -685,6 +685,11 @@ typedef struct MOJOSHADER_parseData */ #define MOJOSHADER_PROFILE_BYTECODE "bytecode" +/* + * Profile string for HLSL Shader Model 4 output. + */ +#define MOJOSHADER_PROFILE_HLSL "hlsl" + /* * Profile string for GLSL: OpenGL high-level shader language output. */ @@ -3528,6 +3533,209 @@ DECLSPEC void MOJOSHADER_mtlEndFrame(void); DECLSPEC void MOJOSHADER_mtlDestroyContext(void); +/* D3D11 interface... */ + +typedef struct MOJOSHADER_d3d11Shader MOJOSHADER_d3d11Shader; + +/* + * Prepare MojoShader to manage Direct3D 11 shaders. + * + * You do not need to call this if all you want is MOJOSHADER_parse(). + * + * You must call this once AFTER you have successfully built your D3D11 context. + * + * As MojoShader requires some memory to be allocated, you may provide a + * custom allocator to this function, which will be used to allocate/free + * memory. They function just like malloc() and free(). We do not use + * realloc(). If you don't care, pass NULL in for the allocator functions. + * If your allocator needs instance-specific data, you may supply it with the + * (malloc_d) parameter. This pointer is passed as-is to your (m) and (f) + * functions. + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC int MOJOSHADER_d3d11CreateContext(void *device, void *deviceContext, + MOJOSHADER_malloc m, MOJOSHADER_free f, + void *malloc_d); + +/* + * Get any error state we might have picked up, such as failed shader + * compilation. + * + * Returns a human-readable string. This string is for debugging purposes, and + * not guaranteed to be localized, coherent, or user-friendly in any way. + * It's for programmers! + * + * The latest error may remain between calls. New errors replace any existing + * error. Don't check this string for a sign that an error happened, check + * return codes instead and use this for explanation when debugging. + * + * Do not free the returned string: it's a pointer to a static internal + * buffer. Do not keep the pointer around, either, as it's likely to become + * invalid as soon as you call into MojoShader again. + */ +DECLSPEC const char *MOJOSHADER_d3d11GetError(void); + +/* + * Compile a buffer of Direct3D 9 shader bytecode into a Direct3D 11 shader. + * You still need to link the shader before you may render with it. + * + * (mainfn) is the name of the shader's main function. + * (tokenbuf) is a buffer of Direct3D shader bytecode. + * (bufsize) is the size, in bytes, of the bytecode buffer. + * (swiz), (swizcount), (smap), and (smapcount) are passed to + * MOJOSHADER_parse() unmolested. + * + * Returns NULL on error, or a shader handle on success. + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC MOJOSHADER_d3d11Shader *MOJOSHADER_d3d11CompileShader(const char *mainfn, + const unsigned char *tokenbuf, + const unsigned int bufsize, + const MOJOSHADER_swizzle *swiz, + const unsigned int swizcount, + const MOJOSHADER_samplerMap *smap, + const unsigned int smapcount); + +/* + * Increments a shader's internal refcount. To decrement the refcount, call + * MOJOSHADER_glDeleteShader(). + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC void MOJOSHADER_d3d11ShaderAddRef(MOJOSHADER_d3d11Shader *shader); + +/* + * Get the MOJOSHADER_parseData structure that was produced from the + * call to MOJOSHADER_d3d11CompileShader(). + * + * This data is read-only, and you should NOT attempt to free it. This + * pointer remains valid until the shader is deleted. + */ +DECLSPEC const MOJOSHADER_parseData *MOJOSHADER_d3d11GetShaderParseData( + MOJOSHADER_d3d11Shader *shader); + +/* + * This binds individual shaders together, to be linked into a single working + * program once MOJOSHADER_d3d11ProgramReady is called. + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC void MOJOSHADER_d3d11BindShaders(MOJOSHADER_d3d11Shader *vshader, + MOJOSHADER_d3d11Shader *pshader); + +/* + * This queries for the shaders currently bound to the active context. + * + * This function is only for convenience, specifically for compatibility with + * the effects API. + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC void MOJOSHADER_d3d11GetBoundShaders(MOJOSHADER_d3d11Shader **vshader, + MOJOSHADER_d3d11Shader **pshader); + +/* + * Fills register pointers with pointers that are directly used to push uniform + * data to the D3D11 shader context. + * + * This function is really just for the effects API, you should NOT be using + * this unless you know every single line of MojoShader from memory. + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC void MOJOSHADER_d3d11MapUniformBufferMemory(float **vsf, int **vsi, unsigned char **vsb, + float **psf, int **psi, unsigned char **psb); + +/* + * Tells the context that you are done with the memory mapped by + * MOJOSHADER_d3d11MapUniformBufferMemory(). + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC void MOJOSHADER_d3d11UnmapUniformBufferMemory(); + +/* + * Return the location of a vertex attribute for the given vertex shader. + * + * (usage) and (index) map to Direct3D vertex declaration values: COLOR1 would + * be MOJOSHADER_USAGE_COLOR and 1. + * + * The return value is the index of the attribute to be used when building the + * input layout object. + */ +DECLSPEC int MOJOSHADER_d3d11GetVertexAttribLocation(MOJOSHADER_d3d11Shader *vert, + MOJOSHADER_usage usage, + int index); + +/* + * Using the given input layout, compiles the vertex shader with input + * parameters that will be compatible with the incoming vertex data. + * + * (inputLayoutHash) is an application-defined value to differentiate unique + * vertex declarations that will be passed to the vertex shader. + * (elements) is an array of D3D11_INPUT_ELEMENT_DESCs, with (elementCount) + * entries. (bytecode) and (bytecodeLength) will be filled with the final + * compiled D3D11 vertex shader. + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC void MOJOSHADER_d3d11CompileVertexShader(unsigned long long inputLayoutHash, + void *elements, int elementCount, + void **bytecode, int *bytecodeLength); + +/* + * Inform MojoShader that it should commit any pending state and prepare the + * final shader program object, linking the input/output parameter data to + * be compatible with the more-struct Shader Model 4 rule set. This must be + * called after you bind shaders and update any inputs, right before you start + * drawing, so any outstanding changes made to the shared constants array (etc) + * can propagate to the shader during this call. + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC void MOJOSHADER_d3d11ProgramReady(unsigned long long inputLayoutHash); + +/* + * Free the resources of a compiled shader. This will delete the shader object + * and free memory. + * + * This call is only as thread safe as your D3D11 context! If you call your + * context from multiple threads, you must protect this call with whatever + * thread synchronization technique you have for your other D3D calls. + */ +DECLSPEC void MOJOSHADER_d3d11DeleteShader(MOJOSHADER_d3d11Shader *shader); + +/* + * Deinitialize MojoShader's D3D11 shader management. + * + * This will clean up resources previously allocated for the active context. + * + * This will NOT clean up shaders you created! Please destroy all shaders + * before calling this function. + */ +DECLSPEC void MOJOSHADER_d3d11DestroyContext(void); + + /* Effects interface... */ #include "mojoshader_effects.h" diff --git a/mojoshader_d3d11.c b/mojoshader_d3d11.c new file mode 100644 index 00000000..52b7343a --- /dev/null +++ b/mojoshader_d3d11.c @@ -0,0 +1,912 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN 1 +#include // Include this early to avoid SDL conflicts +#endif + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_internal.h" + +#if SUPPORT_PROFILE_HLSL + +#define D3D11_NO_HELPERS +#define CINTERFACE +#define COBJMACROS +#include + +#ifndef WINAPI_FAMILY_WINRT +#define WINAPI_FAMILY_WINRT 0 +#endif +#if WINAPI_FAMILY_WINRT +#include +#endif + +/* Error state */ + +static char error_buffer[1024] = { '\0' }; + +static void set_error(const char *str) +{ + snprintf(error_buffer, sizeof (error_buffer), "%s", str); +} // set_error + +static inline void out_of_memory(void) +{ + set_error("out of memory"); +} // out_of_memory + +/* D3DCompile signature */ + +typedef HRESULT(WINAPI *PFN_D3DCOMPILE)( + LPCVOID pSrcData, + SIZE_T SrcDataSize, + LPCSTR pSourceName, + const D3D_SHADER_MACRO *pDefines, + ID3DInclude *pInclude, + LPCSTR pEntrypoint, + LPCSTR pTarget, + UINT Flags1, + UINT Flags2, + ID3DBlob **ppCode, + ID3DBlob **ppErrorMsgs +); + +/* Structs */ + +typedef struct d3d11ShaderMap +{ + void *val; + union + { + struct + { + uint64 layoutHash; + ID3D10Blob *blob; + } vertex; + struct + { + MOJOSHADER_d3d11Shader *vshader; + } pixel; + }; +} d3d11ShaderMap; + +typedef struct MOJOSHADER_d3d11Shader +{ + const MOJOSHADER_parseData *parseData; + uint32 refcount; + ID3D11Buffer *ubo; + size_t buflen; + unsigned char *constantData; + unsigned int mapCapacity; + unsigned int numMaps; + d3d11ShaderMap *shaderMaps; +} MOJOSHADER_d3d11Shader; + +// Max entries for each register file type... +#define MAX_REG_FILE_F 8192 +#define MAX_REG_FILE_I 2047 +#define MAX_REG_FILE_B 2047 + +typedef struct MOJOSHADER_d3d11Context +{ + // Allocators... + MOJOSHADER_malloc malloc_fn; + MOJOSHADER_free free_fn; + void *malloc_data; + + // The constant register files... + // !!! FIXME: Man, it kills me how much memory this takes... + // !!! FIXME: ... make this dynamically allocated on demand. + float vs_reg_file_f[MAX_REG_FILE_F * 4]; + int vs_reg_file_i[MAX_REG_FILE_I * 4]; + uint8 vs_reg_file_b[MAX_REG_FILE_B]; + float ps_reg_file_f[MAX_REG_FILE_F * 4]; + int ps_reg_file_i[MAX_REG_FILE_I * 4]; + uint8 ps_reg_file_b[MAX_REG_FILE_B]; + + // Pointer to the active ID3D11Device. + ID3D11Device *device; + + // Pointer to the ID3D11DeviceContext. + ID3D11DeviceContext *deviceContext; + + // Currently bound vertex and pixel shaders. + MOJOSHADER_d3d11Shader *vertexShader; + MOJOSHADER_d3d11Shader *pixelShader; + int vertexNeedsBound; + int pixelNeedsBound; + + // D3DCompile function pointer. + PFN_D3DCOMPILE D3DCompileFunc; +#if !WINAPI_FAMILY_WINRT + HMODULE d3dcompilerDLL; +#endif +} MOJOSHADER_d3d11Context; + +static MOJOSHADER_d3d11Context *ctx = NULL; + +/* Uniform buffer utilities */ + +static inline int next_highest_alignment(int n) +{ + const int align = 16; + return align * ((n + align - 1) / align); +} // next_highest_alignment + +static inline void *get_uniform_buffer(MOJOSHADER_d3d11Shader *shader) +{ + return (shader == NULL || shader->ubo == NULL) ? NULL : shader->ubo; +} // get_uniform_buffer + +static void update_uniform_buffer(MOJOSHADER_d3d11Shader *shader) +{ + if (shader == NULL || shader->ubo == NULL) + return; + + float *regF; int *regI; uint8 *regB; + if (shader->parseData->shader_type == MOJOSHADER_TYPE_VERTEX) + { + regF = ctx->vs_reg_file_f; + regI = ctx->vs_reg_file_i; + regB = ctx->vs_reg_file_b; + } // if + else + { + regF = ctx->ps_reg_file_f; + regI = ctx->ps_reg_file_i; + regB = ctx->ps_reg_file_b; + } // else + + // Update the buffer contents + int needsUpdate = 0; + size_t offset = 0; + for (int i = 0; i < shader->parseData->uniform_count; i++) + { + if (shader->parseData->uniforms[i].constant) + continue; + + int idx = shader->parseData->uniforms[i].index; + int arrayCount = shader->parseData->uniforms[i].array_count; + + void *src = NULL; + void *dst = NULL; + size_t size = arrayCount ? arrayCount : 1; + + switch (shader->parseData->uniforms[i].type) + { + case MOJOSHADER_UNIFORM_FLOAT: + src = ®F[4 * idx]; + dst = shader->constantData + offset; + size *= 16; + break; + + case MOJOSHADER_UNIFORM_INT: + src = ®I[4 * idx]; + dst = shader->constantData + offset; + size *= 16; + break; + + case MOJOSHADER_UNIFORM_BOOL: + src = ®B[idx]; + dst = shader->constantData + offset; + break; + + default: + assert(0); // This should never happen. + break; + } // switch + + if (memcmp(dst, src, size) != 0) + { + memcpy(dst, src, size); + needsUpdate = 1; + } // if + + offset += size; + } // for + + if (needsUpdate) + { + // Map the buffer + D3D11_MAPPED_SUBRESOURCE res; + ID3D11DeviceContext_Map((ID3D11DeviceContext*) ctx->deviceContext, + (ID3D11Resource*) shader->ubo, 0, + D3D11_MAP_WRITE_DISCARD, 0, &res); + + // Copy the contents + memcpy(res.pData, shader->constantData, shader->buflen); + + // Unmap the buffer + ID3D11DeviceContext_Unmap( + (ID3D11DeviceContext*) ctx->deviceContext, + (ID3D11Resource*) shader->ubo, + 0 + ); + } // if +} // update_uniform_buffer + +static inline void expand_map(MOJOSHADER_d3d11Shader *shader) +{ + if (shader->numMaps == shader->mapCapacity) + { + d3d11ShaderMap *newMap = (d3d11ShaderMap *) ctx->malloc_fn( + sizeof(d3d11ShaderMap) * shader->mapCapacity * 2, + ctx->malloc_data + ); + memcpy(newMap, shader->shaderMaps, + sizeof(d3d11ShaderMap) * shader->mapCapacity); + shader->mapCapacity *= 2; + ctx->free_fn(shader->shaderMaps, ctx->malloc_data); + shader->shaderMaps = newMap; + newMap = NULL; + } // if +} // expand_map + +static inline int element_is_uint(DXGI_FORMAT format) +{ + return format == DXGI_FORMAT_R32G32B32A32_UINT || + format == DXGI_FORMAT_R32G32B32_UINT || + format == DXGI_FORMAT_R16G16B16A16_UINT || + format == DXGI_FORMAT_R32G32_UINT || + format == DXGI_FORMAT_R10G10B10A2_UINT || + format == DXGI_FORMAT_R8G8B8A8_UINT || + format == DXGI_FORMAT_R16G16_UINT || + format == DXGI_FORMAT_R32_UINT || + format == DXGI_FORMAT_R8G8_UINT || + format == DXGI_FORMAT_R16_UINT || + format == DXGI_FORMAT_R8_UINT; +} // element_is_uint + +static inline int element_is_int(DXGI_FORMAT format) +{ + return format == DXGI_FORMAT_R32G32B32A32_SINT || + format == DXGI_FORMAT_R32G32B32_SINT || + format == DXGI_FORMAT_R16G16B16A16_SINT || + format == DXGI_FORMAT_R32G32_SINT || + format == DXGI_FORMAT_R8G8B8A8_SINT || + format == DXGI_FORMAT_R16G16_SINT || + format == DXGI_FORMAT_R32_SINT || + format == DXGI_FORMAT_R8G8_SINT || + format == DXGI_FORMAT_R16_SINT || + format == DXGI_FORMAT_R8_SINT; +} // element_is_int + +/* Shader Compilation Utilities */ + +static ID3D11VertexShader *compileVertexShader(MOJOSHADER_d3d11Shader *shader, + const char *src, int src_len, + ID3D10Blob **blob) +{ + const MOJOSHADER_parseData *pd = shader->parseData; + HRESULT result = ctx->D3DCompileFunc(src, src_len, pd->mainfn, + NULL, NULL, pd->mainfn, "vs_4_0", + 0, 0, blob, blob); + + if (result < 0) + { + set_error((const char *) ID3D10Blob_GetBufferPointer(*blob)); + ID3D10Blob_Release(*blob); + return NULL; + } // if + + void *bytecode = ID3D10Blob_GetBufferPointer(*blob); + int bytecodeLength = ID3D10Blob_GetBufferSize(*blob); + ID3D11VertexShader *ret = NULL; + ID3D11Device_CreateVertexShader(ctx->device, bytecode, bytecodeLength, + NULL, &ret); + return ret; +} // compileVertexShader + +static void replaceVarname(const char *find, const char *replace, + const char **source) +{ + const char *srcbuf = *source; + size_t find_len = strlen(find); + size_t replace_len = strlen(replace); + + #define IS_PARTIAL_TOKEN(token) \ + (isalnum(*(token + find_len)) || isalnum(*(token-1))) + + // How many times does `find` occur in the source buffer? + int count = 0; + char *ptr = (char *) strstr(srcbuf, find); + while (ptr != NULL) + { + if (!IS_PARTIAL_TOKEN(ptr)) + count++; + ptr = strstr(ptr + find_len, find); + } // while + + // How big should we make the new text buffer? + size_t oldlen = strlen(srcbuf) + 1; + size_t newlen = oldlen + (count * (replace_len - find_len)); + + // Easy case; just find/replace in the original buffer + if (newlen == oldlen) + { + ptr = (char *) strstr(srcbuf, find); + while (ptr != NULL) + { + if (!IS_PARTIAL_TOKEN(ptr)) + memcpy(ptr, replace, replace_len); + ptr = strstr(ptr + find_len, find); + } // while + return; + } // if + + // Allocate a new buffer + char *newbuf = (char *) ctx->malloc_fn(newlen, ctx->malloc_data); + memset(newbuf, '\0', newlen); + + // Find + replace + char *prev_ptr = (char *) srcbuf; + char *curr_ptr = (char *) newbuf; + ptr = (char*) strstr(srcbuf, find); + while (ptr != NULL) + { + memcpy(curr_ptr, prev_ptr, ptr - prev_ptr); + curr_ptr += ptr - prev_ptr; + + if (!IS_PARTIAL_TOKEN(ptr)) + { + memcpy(curr_ptr, replace, replace_len); + curr_ptr += replace_len; + } // if + else + { + // Don't accidentally eat partial tokens... + memcpy(curr_ptr, find, find_len); + curr_ptr += find_len; + } // else + + prev_ptr = ptr + find_len; + ptr = strstr(prev_ptr, find); + } // while + + #undef IS_PARTIAL_TOKEN + + // Copy the remaining part of the source buffer + memcpy(curr_ptr, prev_ptr, (srcbuf + oldlen) - prev_ptr); + + // Free the source buffer + ctx->free_fn((void *) srcbuf, ctx->malloc_data); + + // Point the source parameter to the new buffer + *source = newbuf; +} // replaceVarname + +static char *rewritePixelShader(MOJOSHADER_d3d11Shader *vshader, + MOJOSHADER_d3d11Shader *pshader) +{ + const MOJOSHADER_parseData *vpd = vshader->parseData; + const MOJOSHADER_parseData *ppd = pshader->parseData; + const char *_Output = "_Output" ENDLINE_STR "{" ENDLINE_STR; + const char *_Input = "_Input" ENDLINE_STR "{" ENDLINE_STR; + const char *vsrc = vpd->output; + const char *psrc = ppd->output; + const char *a, *b, *vout, *pstart, *vface, *pend; + size_t substr_len; + char *pfinal; + + #define MAKE_STRBUF(buf) \ + substr_len = b - a; \ + buf = (const char *) ctx->malloc_fn(substr_len + 1, ctx->malloc_data); \ + memset((void *) buf, '\0', substr_len + 1); \ + memcpy((void *) buf, a, substr_len); + + // Copy the vertex function's output struct into a buffer + a = strstr(vsrc, _Output) + strlen(_Output); + b = a; + while (*(b++) != '}'); + b--; + MAKE_STRBUF(vout) + + // Split up the pixel shader text... + + // ...everything up to the input contents... + a = psrc; + b = strstr(psrc, _Input) + strlen(_Input); + MAKE_STRBUF(pstart) + + // ...everything after the input contents. + a = b; + while (*(a++) != '}'); + a--; + while (*(b++) != '\0'); + MAKE_STRBUF(pend) + + // Find matching semantics + int i, j; + int vfaceidx = -1; + const char *pvarname, *vvarname; + for (i = 0; i < ppd->attribute_count; i++) + { + for (j = 0; j < vpd->output_count; j++) + { + if (ppd->attributes[i].usage == vpd->outputs[j].usage && + ppd->attributes[i].index == vpd->outputs[j].index) + { + pvarname = ppd->attributes[i].name; + vvarname = vpd->outputs[j].name; + if (strcmp(pvarname, vvarname) != 0) + replaceVarname(pvarname, vvarname, &pend); + } // if + else if (strcmp(ppd->attributes[i].name, "vPos") == 0 && + vpd->outputs[j].usage == MOJOSHADER_USAGE_POSITION && + vpd->outputs[j].index == 0) + { + pvarname = ppd->attributes[i].name; + vvarname = vpd->outputs[j].name; + if (strcmp(pvarname, vvarname) != 0) + replaceVarname(pvarname, vvarname, &pend); + } // else if + } // for + + if (strcmp(ppd->attributes[i].name, "vFace") == 0) + vfaceidx = i; + } // for + + // Special handling for VFACE + vface = (vfaceidx != -1) ? "\tbool m_vFace : SV_IsFrontFace;\n" : ""; + + // Concatenate the shader pieces together + substr_len = strlen(pstart) + strlen(vout) + strlen(vface) + strlen(pend); + pfinal = (char *) ctx->malloc_fn(substr_len + 1, ctx->malloc_data); + memset((void *) pfinal, '\0', substr_len + 1); + memcpy(pfinal, pstart, strlen(pstart)); + memcpy(pfinal + strlen(pstart), vout, strlen(vout)); + memcpy(pfinal + strlen(pstart) + strlen(vout), vface, strlen(vface)); + memcpy(pfinal + strlen(pstart) + strlen(vout) + strlen(vface), pend, strlen(pend)); + + // Free the temporary buffers + ctx->free_fn((void *) vout, ctx->malloc_data); + ctx->free_fn((void *) pstart, ctx->malloc_data); + ctx->free_fn((void *) pend, ctx->malloc_data); + + #undef MAKE_STRBUF + + return pfinal; +} // spliceVertexShaderInput + +static ID3D11PixelShader *compilePixelShader(MOJOSHADER_d3d11Shader *vshader, + MOJOSHADER_d3d11Shader *pshader) +{ + ID3D11PixelShader *retval = NULL; + const char *source; + ID3DBlob *blob; + HRESULT result; + int needs_free; + + if (pshader->parseData->attribute_count > 0) + { + source = rewritePixelShader(vshader, pshader); + needs_free = 1; + } // if + else + { + source = pshader->parseData->output; + needs_free = 0; + } // else + + result = ctx->D3DCompileFunc(source, strlen(source), + pshader->parseData->mainfn, NULL, NULL, + pshader->parseData->mainfn, "ps_4_0", 0, 0, + &blob, &blob); + + if (result < 0) + { + set_error((const char *) ID3D10Blob_GetBufferPointer(blob)); + ctx->free_fn((void *) source, ctx->malloc_data); + return NULL; + } // if + + ID3D11Device_CreatePixelShader(ctx->device, + ID3D10Blob_GetBufferPointer(blob), + ID3D10Blob_GetBufferSize(blob), + NULL, &retval); + + ID3D10Blob_Release(blob); + if (needs_free) + ctx->free_fn((void *) source, ctx->malloc_data); + return retval; +} // compilePixelShader + +/* Public API */ + +int MOJOSHADER_d3d11CreateContext(void *device, void *deviceContext, + MOJOSHADER_malloc m, MOJOSHADER_free f, + void *malloc_d) +{ + assert(ctx == NULL); + + if (m == NULL) m = MOJOSHADER_internal_malloc; + if (f == NULL) f = MOJOSHADER_internal_free; + + ctx = (MOJOSHADER_d3d11Context *) m(sizeof(MOJOSHADER_d3d11Context), malloc_d); + if (ctx == NULL) + { + out_of_memory(); + goto init_fail; + } // if + + memset(ctx, '\0', sizeof (MOJOSHADER_d3d11Context)); + ctx->malloc_fn = m; + ctx->free_fn = f; + ctx->malloc_data = malloc_d; + + // Store references to the D3D device and immediate context + ctx->device = (ID3D11Device*) device; + ctx->deviceContext = (ID3D11DeviceContext*) deviceContext; + + // Grab the D3DCompile function pointer +#if WINAPI_FAMILY_WINRT + ctx->D3DCompileFunc = D3DCompile; +#else + ctx->d3dcompilerDLL = LoadLibrary("d3dcompiler_47.dll"); + assert(ctx->d3dcompilerDLL != NULL); + ctx->D3DCompileFunc = (PFN_D3DCOMPILE) GetProcAddress(ctx->d3dcompilerDLL, + "D3DCompile"); +#endif /* WINAPI_FAMILY_WINRT */ + + return 0; + +init_fail: + if (ctx != NULL) + f(ctx, malloc_d); + return -1; +} // MOJOSHADER_d3d11CreateContext + +void MOJOSHADER_d3d11DestroyContext(void) +{ +#if !WINAPI_FAMILY_WINRT + FreeLibrary(ctx->d3dcompilerDLL); +#endif + ctx->free_fn(ctx, ctx->malloc_data); + ctx = NULL; +} // MOJOSHADER_d3d11DestroyContext + +MOJOSHADER_d3d11Shader *MOJOSHADER_d3d11CompileShader(const char *mainfn, + const unsigned char *tokenbuf, + const unsigned int bufsize, + const MOJOSHADER_swizzle *swiz, + const unsigned int swizcount, + const MOJOSHADER_samplerMap *smap, + const unsigned int smapcount) +{ + MOJOSHADER_malloc m = ctx->malloc_fn; + MOJOSHADER_free f = ctx->free_fn; + void *d = ctx->malloc_data; + int i; + + const MOJOSHADER_parseData *pd = MOJOSHADER_parse("hlsl", mainfn, tokenbuf, + bufsize, swiz, swizcount, + smap, smapcount, m, f, d); + + if (pd->error_count > 0) + { + // !!! FIXME: put multiple errors in the buffer? Don't use + // !!! FIXME: MOJOSHADER_d3d11GetError() for this? + set_error(pd->errors[0].error); + goto compile_shader_fail; + } // if + + MOJOSHADER_d3d11Shader *retval = (MOJOSHADER_d3d11Shader *) m(sizeof(MOJOSHADER_d3d11Shader), d); + if (retval == NULL) + goto compile_shader_fail; + + retval->parseData = pd; + retval->refcount = 1; + retval->ubo = NULL; + retval->constantData = NULL; + retval->buflen = 0; + retval->numMaps = 0; + + // Allocate shader maps + retval->mapCapacity = 4; // arbitrary! + retval->shaderMaps = (d3d11ShaderMap *) m(retval->mapCapacity * sizeof(d3d11ShaderMap), d); + if (retval->shaderMaps == NULL) + goto compile_shader_fail; + + memset(retval->shaderMaps, '\0', retval->mapCapacity * sizeof(d3d11ShaderMap)); + + // Create the uniform buffer, if needed + if (pd->uniform_count > 0) + { + // Calculate how big we need to make the buffer + for (i = 0; i < pd->uniform_count; i++) + { + int arrayCount = pd->uniforms[i].array_count; + int uniformSize = 16; + if (pd->uniforms[i].type == MOJOSHADER_UNIFORM_BOOL) + uniformSize = 1; + retval->buflen += (arrayCount ? arrayCount : 1) * uniformSize; + } // for + + D3D11_BUFFER_DESC bdesc; + bdesc.ByteWidth = next_highest_alignment(retval->buflen); + bdesc.Usage = D3D11_USAGE_DYNAMIC; + bdesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + bdesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + bdesc.MiscFlags = 0; + bdesc.StructureByteStride = 0; + ID3D11Device_CreateBuffer((ID3D11Device*) ctx->device, &bdesc, NULL, + (ID3D11Buffer**) &retval->ubo); + + // Additionally allocate a CPU-side staging buffer + retval->constantData = (unsigned char *) m(retval->buflen, d); + memset(retval->constantData, '\0', retval->buflen); + } // if + + return retval; + +compile_shader_fail: + MOJOSHADER_freeParseData(pd); + return NULL; +} // MOJOSHADER_d3d11CompileShader + +void MOJOSHADER_d3d11ShaderAddRef(MOJOSHADER_d3d11Shader *shader) +{ + if (shader != NULL) + shader->refcount++; +} // MOJOSHADER_d3d11ShaderAddRef + +void MOJOSHADER_d3d11DeleteShader(MOJOSHADER_d3d11Shader *shader) +{ + if (shader != NULL) + { + if (shader->refcount > 1) + shader->refcount--; + else + { + if (shader->ubo != NULL) + { + ID3D11Buffer_Release((ID3D11Buffer*) shader->ubo); + ctx->free_fn(shader->constantData, ctx->malloc_data); + } // if + + if (shader->parseData->shader_type == MOJOSHADER_TYPE_VERTEX) + { + for (int i = 0; i < shader->numMaps; i++) + { + ID3D11VertexShader_Release((ID3D11VertexShader *) shader->shaderMaps[i].val); + ID3D10Blob_Release(shader->shaderMaps[i].vertex.blob); + } // for + } // if + else if (shader->parseData->shader_type == MOJOSHADER_TYPE_PIXEL) + { + for (int i = 0; i < shader->numMaps; i++) + ID3D11PixelShader_Release((ID3D11PixelShader *) shader->shaderMaps[i].val); + } // else if + + ctx->free_fn(shader->shaderMaps, ctx->malloc_data); + shader->shaderMaps = NULL; + MOJOSHADER_freeParseData(shader->parseData); + ctx->free_fn(shader, ctx->malloc_data); + } // else + } // if +} // MOJOSHADER_d3d11DeleteShader + +const MOJOSHADER_parseData *MOJOSHADER_d3d11GetShaderParseData( + MOJOSHADER_d3d11Shader *shader) +{ + return (shader != NULL) ? shader->parseData : NULL; +} // MOJOSHADER_d3d11GetParseData + +void MOJOSHADER_d3d11BindShaders(MOJOSHADER_d3d11Shader *vshader, + MOJOSHADER_d3d11Shader *pshader) +{ + // Use the last bound shaders in case of NULL + if (vshader != NULL) + { + ctx->vertexShader = vshader; + ctx->vertexNeedsBound = 1; + } // if + + if (pshader != NULL) + { + ctx->pixelShader = pshader; + ctx->pixelNeedsBound = 1; + } // if +} // MOJOSHADER_d3d11BindShaders + +void MOJOSHADER_d3d11GetBoundShaders(MOJOSHADER_d3d11Shader **vshader, + MOJOSHADER_d3d11Shader **pshader) +{ + *vshader = ctx->vertexShader; + *pshader = ctx->pixelShader; +} // MOJOSHADER_d3d11GetBoundShaders + +void MOJOSHADER_d3d11MapUniformBufferMemory(float **vsf, int **vsi, unsigned char **vsb, + float **psf, int **psi, unsigned char **psb) +{ + *vsf = ctx->vs_reg_file_f; + *vsi = ctx->vs_reg_file_i; + *vsb = ctx->vs_reg_file_b; + *psf = ctx->ps_reg_file_f; + *psi = ctx->ps_reg_file_i; + *psb = ctx->ps_reg_file_b; +} // MOJOSHADER_d3d11MapUniformBufferMemory + +void MOJOSHADER_d3d11UnmapUniformBufferMemory() +{ + /* This has nothing to do with unmapping memory + * and everything to do with updating uniform + * buffers with the latest parameter contents. + */ + MOJOSHADER_d3d11Shader *vs, *ps; + MOJOSHADER_d3d11GetBoundShaders(&vs, &ps); + update_uniform_buffer(vs); + update_uniform_buffer(ps); +} // MOJOSHADER_d3d11UnmapUniformBufferMemory + +int MOJOSHADER_d3d11GetVertexAttribLocation(MOJOSHADER_d3d11Shader *vert, + MOJOSHADER_usage usage, int index) +{ + if (vert == NULL) + return -1; + + for (int i = 0; i < vert->parseData->attribute_count; i++) + { + if (vert->parseData->attributes[i].usage == usage && + vert->parseData->attributes[i].index == index) + { + return i; + } // if + } // for + + // failure, couldn't find requested attribute + return -1; +} // MOJOSHADER_d3d11GetVertexAttribLocation + +void MOJOSHADER_d3d11CompileVertexShader(unsigned long long inputLayoutHash, + void* elements, int elementCount, + void **bytecode, int *bytecodeLength) +{ + MOJOSHADER_d3d11Shader *vshader = ctx->vertexShader; + ID3D10Blob *blob; + + // Don't compile if there's already a mapping for this layout. + for (int i = 0; i < vshader->numMaps; i++) + { + if (inputLayoutHash == vshader->shaderMaps[i].vertex.layoutHash) + { + blob = vshader->shaderMaps[i].vertex.blob; + *bytecode = ID3D10Blob_GetBufferPointer(blob); + *bytecodeLength = ID3D10Blob_GetBufferSize(blob); + return; + } // if + } // for + + // Check for and replace non-float types + D3D11_INPUT_ELEMENT_DESC *d3dElements = (D3D11_INPUT_ELEMENT_DESC*) elements; + const char *origSource = vshader->parseData->output; + int srcLength = vshader->parseData->output_len; + char *newSource = (char*) origSource; + for (int i = 0; i < elementCount; i += 1) + { + D3D11_INPUT_ELEMENT_DESC e = d3dElements[i]; + + const char *replace; + if (element_is_uint(e.Format)) + replace = " uint4"; + else if (element_is_int(e.Format)) + replace = " int4"; + else + replace = NULL; + + if (replace != NULL) + { + char sem[16]; + memset(sem, '\0', sizeof(sem)); + snprintf(sem, sizeof(sem), "%s%d", e.SemanticName, e.SemanticIndex); + // !!! FIXME: POSITIONT has no index. What to do? -caleb + + if (newSource == origSource) + { + newSource = (char *) ctx->malloc_fn(srcLength + 1, + ctx->malloc_data); + strcpy(newSource, origSource); + } // if + + char *ptr = strstr(newSource, sem); + assert(ptr != NULL && "Could not find semantic in shader source!"); + + int spaces = 0; + while (spaces < 3) + if (*(--ptr) == ' ') spaces++; + memcpy(ptr - strlen("float4"), replace, strlen(replace)); + } // if + } // for + + // Expand the map array, if needed + expand_map(vshader); + + // Add the new mapping + vshader->shaderMaps[vshader->numMaps].vertex.layoutHash = inputLayoutHash; + ID3D11VertexShader *vs = compileVertexShader(vshader, newSource, + srcLength, &blob); + vshader->shaderMaps[ctx->vertexShader->numMaps].val = vs; + vshader->shaderMaps[ctx->vertexShader->numMaps].vertex.blob = blob; + ctx->vertexShader->numMaps++; + assert(vs != NULL); + + // Return the bytecode info + *bytecode = ID3D10Blob_GetBufferPointer(blob); + *bytecodeLength = ID3D10Blob_GetBufferSize(blob); +} // MOJOSHADER_d3d11CompileVertexShader + +void MOJOSHADER_d3d11ProgramReady(unsigned long long inputLayoutHash) +{ + MOJOSHADER_d3d11Shader *vshader = ctx->vertexShader; + MOJOSHADER_d3d11Shader *pshader = ctx->pixelShader; + + // Vertex shader... + if (ctx->vertexNeedsBound) + { + ID3D11VertexShader *realVS = NULL; + for (int i = 0; i < vshader->numMaps; i++) + { + if (inputLayoutHash == vshader->shaderMaps[i].vertex.layoutHash) + { + realVS = (ID3D11VertexShader *) vshader->shaderMaps[i].val; + break; + } // if + } // for + assert(realVS != NULL); + ID3D11DeviceContext_VSSetShader(ctx->deviceContext, realVS, NULL, 0); + ID3D11DeviceContext_VSSetConstantBuffers(ctx->deviceContext, 0, 1, + &vshader->ubo); + ctx->vertexNeedsBound = 0; + } // if + + // Pixel shader... + if (ctx->pixelNeedsBound) + { + // Is there already a mapping for the current vertex shader? + ID3D11PixelShader *realPS = NULL; + for (int i = 0; i < pshader->numMaps; i++) + { + if (pshader->shaderMaps[i].pixel.vshader == vshader) + { + realPS = (ID3D11PixelShader *) pshader->shaderMaps[i].val; + break; + } // if + } // for + + // We have to create a new vertex/pixel shader mapping... + if (realPS == NULL) + { + // Expand the map array, if needed + expand_map(pshader); + + // Add the new mapping + pshader->shaderMaps[pshader->numMaps].pixel.vshader = vshader; + realPS = compilePixelShader(vshader, pshader); + pshader->shaderMaps[pshader->numMaps].val = realPS; + pshader->numMaps++; + assert(realPS != NULL); + } // if + + ID3D11DeviceContext_PSSetShader(ctx->deviceContext, realPS, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(ctx->deviceContext, 0, 1, + &pshader->ubo); + ctx->pixelNeedsBound = 0; + } // if +} // MOJOSHADER_d3d11ProgramReady + +const char *MOJOSHADER_d3d11GetError(void) +{ + return error_buffer; +} // MOJOSHADER_d3d11GetError + +#endif /* SUPPORT_PROFILE_HLSL */ + +// end of mojoshader_d3d11.c ... diff --git a/mojoshader_internal.h b/mojoshader_internal.h index 8936c856..db8cfce7 100644 --- a/mojoshader_internal.h +++ b/mojoshader_internal.h @@ -131,6 +131,10 @@ typedef Uint64 uint64; #define SUPPORT_PROFILE_BYTECODE 1 #endif +#ifndef SUPPORT_PROFILE_HLSL +#define SUPPORT_PROFILE_HLSL 1 +#endif + #ifndef SUPPORT_PROFILE_GLSL #define SUPPORT_PROFILE_GLSL 1 #endif diff --git a/profiles/mojoshader_profile.h b/profiles/mojoshader_profile.h index 39007327..1b2c6ad6 100644 --- a/profiles/mojoshader_profile.h +++ b/profiles/mojoshader_profile.h @@ -165,12 +165,12 @@ typedef struct Context int predicated; int uses_pointsize; int uses_fog; + int need_max_float; // !!! FIXME: move these into SUPPORT_PROFILE sections. int glsl_generated_lit_helper; int glsl_generated_texlod_setup; int glsl_generated_texm3x3spec_helper; - int glsl_need_max_float; int arb1_wrote_position; // !!! FIXME: move these into SUPPORT_PROFILE sections. @@ -213,6 +213,10 @@ typedef struct Context #if SUPPORT_PROFILE_GLSPIRV int profile_supports_glspirv; #endif + +#if SUPPORT_PROFILE_HLSL + char hlsl_outpos_name[16]; +#endif } Context; // Use these macros so we can remove all bits of these profiles from the build. diff --git a/profiles/mojoshader_profile_glsl.c b/profiles/mojoshader_profile_glsl.c index 6f720085..ac8aae1b 100644 --- a/profiles/mojoshader_profile_glsl.c +++ b/profiles/mojoshader_profile_glsl.c @@ -641,7 +641,7 @@ void emit_GLSL_finalize(Context *ctx) if (shader_is_vertex(ctx)) output_line(ctx, "uniform float vpFlip;"); #endif - if (ctx->glsl_need_max_float) + if (ctx->need_max_float) output_line(ctx, "const float FLT_MAX = 1e38;"); pop_output(ctx); } // emit_GLSL_finalize @@ -1196,7 +1196,7 @@ void emit_GLSL_RCP(Context *ctx) { char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); char code[128]; - ctx->glsl_need_max_float = 1; + ctx->need_max_float = 1; make_GLSL_destarg_assign(ctx, code, sizeof (code), "(%s == 0.0) ? FLT_MAX : 1.0 / %s", src0, src0); output_line(ctx, "%s", code); } // emit_GLSL_RCP @@ -1205,7 +1205,7 @@ void emit_GLSL_RSQ(Context *ctx) { char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); char code[128]; - ctx->glsl_need_max_float = 1; + ctx->need_max_float = 1; make_GLSL_destarg_assign(ctx, code, sizeof (code), "(%s == 0.0) ? FLT_MAX : inversesqrt(abs(%s))", src0, src0); output_line(ctx, "%s", code); } // emit_GLSL_RSQ diff --git a/profiles/mojoshader_profile_hlsl.c b/profiles/mojoshader_profile_hlsl.c new file mode 100644 index 00000000..47fc5317 --- /dev/null +++ b/profiles/mojoshader_profile_hlsl.c @@ -0,0 +1,2271 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_profile.h" + +#pragma GCC visibility push(hidden) + +// !!! FIXME: A lot of this is cut-and-paste from the GLSL/Metal versions. +#if SUPPORT_PROFILE_HLSL + +#define EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \ + void emit_HLSL_##op(Context *ctx) { \ + fail(ctx, #op " unimplemented in hlsl profile"); \ + } + +static inline const char *get_HLSL_register_string(Context *ctx, + const RegisterType regtype, const int regnum, + char *regnum_str, const size_t regnum_size) +{ + // turns out these are identical at the moment. + return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size); +} // get_HLSL_register_string + +const char *get_HLSL_uniform_type(Context *ctx, const RegisterType rtype) +{ + switch (rtype) + { + case REG_TYPE_CONST: return "float4"; + case REG_TYPE_CONSTINT: return "int4"; + case REG_TYPE_CONSTBOOL: return "bool"; + default: fail(ctx, "BUG: used a uniform we don't know how to define."); + } // switch + + return NULL; +} // get_HLSL_uniform_type + +const char *get_HLSL_varname_in_buf(Context *ctx, RegisterType rt, + int regnum, char *buf, + const size_t len) +{ + char regnum_str[16]; + const char *regtype_str = get_HLSL_register_string(ctx, rt, regnum, + regnum_str, sizeof (regnum_str)); + snprintf(buf,len,"%s%s", regtype_str, regnum_str); + return buf; +} // get_HLSL_varname_in_buf + +const char *get_HLSL_varname(Context *ctx, RegisterType rt, int regnum) +{ + char buf[64]; + get_HLSL_varname_in_buf(ctx, rt, regnum, buf, sizeof(buf)); + return StrDup(ctx, buf); +} // get_HLSL_varname + +static inline const char *get_HLSL_const_array_varname_in_buf(Context *ctx, + const int base, const int size, + char *buf, const size_t buflen) +{ + snprintf(buf, buflen, "const_array_%d_%d", base, size); + return buf; +} // get_HLSL_const_array_varname_in_buf + +const char *get_HLSL_const_array_varname(Context *ctx, int base, int size) +{ + char buf[64]; + get_HLSL_const_array_varname_in_buf(ctx, base, size, buf, sizeof(buf)); + return StrDup(ctx, buf); +} // get_HLSL_const_array_varname + +static inline const char *get_HLSL_input_array_varname(Context *ctx, + char *buf, const size_t buflen) +{ + snprintf(buf, buflen, "%s", "vertex_input_array"); + return buf; +} // get_HLSL_input_array_varname + +const char *get_HLSL_uniform_array_varname(Context *ctx, + const RegisterType regtype, + char *buf, const size_t len) +{ + const char *type = get_HLSL_uniform_type(ctx, regtype); + snprintf(buf, len, "uniforms_%s", type); + return buf; +} // get_HLSL_uniform_array_varname + +const char *get_HLSL_destarg_varname(Context *ctx, char *buf, size_t len) +{ + const DestArgInfo *arg = &ctx->dest_arg; + return get_HLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); +} // get_HLSL_destarg_varname + +const char *get_HLSL_srcarg_varname(Context *ctx, const size_t idx, + char *buf, size_t len) +{ + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + *buf = '\0'; + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + return get_HLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len); +} // get_HLSL_srcarg_varname + +const char *make_HLSL_destarg_assign(Context *, char *, const size_t, + const char *, ...) ISPRINTF(4,5); + +const char *make_HLSL_destarg_assign(Context *ctx, char *buf, + const size_t buflen, + const char *fmt, ...) +{ + int need_parens = 0; + const DestArgInfo *arg = &ctx->dest_arg; + + if (arg->writemask == 0) + { + *buf = '\0'; + return buf; // no writemask? It's a no-op. + } // if + + const char *clampleft = ""; + const char *clampright = ""; + if (arg->result_mod & MOD_SATURATE) + { + clampleft = "saturate("; + clampright = ")"; + } // if + + // MSDN says MOD_PP is a hint and many implementations ignore it. So do we. + + // CENTROID only allowed in DCL opcodes, which shouldn't come through here. + assert((arg->result_mod & MOD_CENTROID) == 0); + + if (ctx->predicated) + { + fail(ctx, "predicated destinations unsupported"); // !!! FIXME + *buf = '\0'; + return buf; + } // if + + char operation[256]; + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(operation, sizeof (operation), fmt, ap); + va_end(ap); + if (len >= sizeof (operation)) + { + fail(ctx, "operation string too large"); // I'm lazy. :P + *buf = '\0'; + return buf; + } // if + + const char *result_shift_str = ""; + switch (arg->result_shift) + { + case 0x1: result_shift_str = " * 2.0"; break; + case 0x2: result_shift_str = " * 4.0"; break; + case 0x3: result_shift_str = " * 8.0"; break; + case 0xD: result_shift_str = " / 8.0"; break; + case 0xE: result_shift_str = " / 4.0"; break; + case 0xF: result_shift_str = " / 2.0"; break; + } // switch + need_parens |= (result_shift_str[0] != '\0'); + + char regnum_str[16]; + const char *regtype_str = get_HLSL_register_string(ctx, arg->regtype, + arg->regnum, regnum_str, + sizeof (regnum_str)); + char writemask_str[6]; + size_t i = 0; + const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); + if (!scalar && !writemask_xyzw(arg->writemask)) + { + writemask_str[i++] = '.'; + if (arg->writemask0) writemask_str[i++] = 'x'; + if (arg->writemask1) writemask_str[i++] = 'y'; + if (arg->writemask2) writemask_str[i++] = 'z'; + if (arg->writemask3) writemask_str[i++] = 'w'; + } // if + writemask_str[i] = '\0'; + assert(i < sizeof (writemask_str)); + + const char *leftparen = (need_parens) ? "(" : ""; + const char *rightparen = (need_parens) ? ")" : ""; + + snprintf(buf, buflen, "%s%s%s = %s%s%s%s%s%s;", regtype_str, + regnum_str, writemask_str,clampleft, leftparen, + operation, rightparen, result_shift_str, clampright); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_HLSL_destarg_assign + + +char *make_HLSL_swizzle_string(char *swiz_str, const size_t strsize, + const int swizzle, const int writemask) +{ + size_t i = 0; + if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) ) + { + const int writemask0 = (writemask >> 0) & 0x1; + const int writemask1 = (writemask >> 1) & 0x1; + const int writemask2 = (writemask >> 2) & 0x1; + const int writemask3 = (writemask >> 3) & 0x1; + + const int swizzle_x = (swizzle >> 0) & 0x3; + const int swizzle_y = (swizzle >> 2) & 0x3; + const int swizzle_z = (swizzle >> 4) & 0x3; + const int swizzle_w = (swizzle >> 6) & 0x3; + + swiz_str[i++] = '.'; + if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x]; + if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y]; + if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z]; + if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w]; + } // if + assert(i < strsize); + swiz_str[i] = '\0'; + return swiz_str; +} // make_HLSL_swizzle_string + + +const char *make_HLSL_srcarg_string(Context *ctx, const size_t idx, + const int writemask, char *buf, + const size_t buflen) +{ + *buf = '\0'; + + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + return buf; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + + const char *premod_str = ""; + const char *postmod_str = ""; + switch (arg->src_mod) + { + case SRCMOD_NEGATE: + premod_str = "-"; + break; + + case SRCMOD_BIASNEGATE: + premod_str = "-("; + postmod_str = " - 0.5)"; + break; + + case SRCMOD_BIAS: + premod_str = "("; + postmod_str = " - 0.5)"; + break; + + case SRCMOD_SIGNNEGATE: + premod_str = "-(("; + postmod_str = " - 0.5) * 2.0)"; + break; + + case SRCMOD_SIGN: + premod_str = "(("; + postmod_str = " - 0.5) * 2.0)"; + break; + + case SRCMOD_COMPLEMENT: + premod_str = "(1.0 - "; + postmod_str = ")"; + break; + + case SRCMOD_X2NEGATE: + premod_str = "-("; + postmod_str = " * 2.0)"; + break; + + case SRCMOD_X2: + premod_str = "("; + postmod_str = " * 2.0)"; + break; + + case SRCMOD_DZ: + fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME + postmod_str = "_dz"; + break; + + case SRCMOD_DW: + fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME + postmod_str = "_dw"; + break; + + case SRCMOD_ABSNEGATE: + premod_str = "-abs("; + postmod_str = ")"; + break; + + case SRCMOD_ABS: + premod_str = "abs("; + postmod_str = ")"; + break; + + case SRCMOD_NOT: + premod_str = "!"; + break; + + case SRCMOD_NONE: + case SRCMOD_TOTAL: + break; // stop compiler whining. + } // switch + + const char *regtype_str = NULL; + + if (!arg->relative) + { + regtype_str = get_HLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, + (char *) alloca(64), 64); + } // if + + const char *rel_lbracket = ""; + char rel_offset[32] = { '\0' }; + const char *rel_rbracket = ""; + char rel_swizzle[4] = { '\0' }; + const char *rel_regtype_str = ""; + if (arg->relative) + { + if (arg->regtype == REG_TYPE_INPUT) + regtype_str=get_HLSL_input_array_varname(ctx,(char*)alloca(64),64); + else + { + assert(arg->regtype == REG_TYPE_CONST); + const int arrayidx = arg->relative_array->index; + const int offset = arg->regnum - arrayidx; + assert(offset >= 0); + if (arg->relative_array->constant) + { + const int arraysize = arg->relative_array->count; + regtype_str = get_HLSL_const_array_varname_in_buf(ctx, + arrayidx, arraysize, (char *) alloca(64), 64); + if (offset != 0) + snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset); + } // if + else + { + regtype_str = get_HLSL_uniform_array_varname(ctx, arg->regtype, + (char *) alloca(64), 64); + if (offset == 0) + { + snprintf(rel_offset, sizeof (rel_offset), + "ARRAYBASE_%d + ", arrayidx); + } // if + else + { + snprintf(rel_offset, sizeof (rel_offset), + "(ARRAYBASE_%d + %d) + ", arrayidx, offset); + } // else + } // else + } // else + + rel_lbracket = "["; + + rel_regtype_str = get_HLSL_varname_in_buf(ctx, arg->relative_regtype, + arg->relative_regnum, + (char *) alloca(64), 64); + rel_swizzle[0] = '.'; + rel_swizzle[1] = swizzle_channels[arg->relative_component]; + rel_swizzle[2] = '\0'; + rel_rbracket = "]"; + } // if + + char swiz_str[6] = { '\0' }; + if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum)) + { + make_HLSL_swizzle_string(swiz_str, sizeof (swiz_str), + arg->swizzle, writemask); + } // if + + if (regtype_str == NULL) + { + fail(ctx, "Unknown source register type."); + return buf; + } // if + + snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s", + premod_str, regtype_str, rel_lbracket, rel_offset, + rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str, + postmod_str); + // !!! FIXME: make sure the scratch buffer was large enough. + return buf; +} // make_HLSL_srcarg_string + +// generate some convenience functions. +#define MAKE_HLSL_SRCARG_STRING_(mask, bitmask) \ + static inline const char *make_HLSL_srcarg_string_##mask(Context *ctx, \ + const size_t idx, char *buf, \ + const size_t buflen) { \ + return make_HLSL_srcarg_string(ctx, idx, bitmask, buf, buflen); \ + } +MAKE_HLSL_SRCARG_STRING_(x, (1 << 0)) +MAKE_HLSL_SRCARG_STRING_(y, (1 << 1)) +MAKE_HLSL_SRCARG_STRING_(z, (1 << 2)) +MAKE_HLSL_SRCARG_STRING_(w, (1 << 3)) +MAKE_HLSL_SRCARG_STRING_(scalar, (1 << 0)) +MAKE_HLSL_SRCARG_STRING_(full, 0xF) +MAKE_HLSL_SRCARG_STRING_(masked, ctx->dest_arg.writemask) +MAKE_HLSL_SRCARG_STRING_(vec3, 0x7) +MAKE_HLSL_SRCARG_STRING_(vec2, 0x3) +#undef MAKE_HLSL_SRCARG_STRING_ + +// special cases for comparison opcodes... + +const char *get_HLSL_comparison_string_scalar(Context *ctx) +{ + const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" }; + if (ctx->instruction_controls >= STATICARRAYLEN(comps)) + { + fail(ctx, "unknown comparison control"); + return ""; + } // if + + return comps[ctx->instruction_controls]; +} // get_HLSL_comparison_string_scalar + +const char *get_HLSL_comparison_string_vector(Context *ctx) +{ + return get_HLSL_comparison_string_scalar(ctx); // standard C operators work for vectors in HLSL. +} // get_HLSL_comparison_string_vector + + +void emit_HLSL_start(Context *ctx, const char *profilestr) +{ + if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx)) + { + failf(ctx, "Shader type %u unsupported in this profile.", + (uint) ctx->shader_type); + return; + } // if + + if (!ctx->mainfn) + { + if (shader_is_vertex(ctx)) + ctx->mainfn = StrDup(ctx, "VertexShader"); + else if (shader_is_pixel(ctx)) + ctx->mainfn = StrDup(ctx, "PixelShader"); + } // if + + set_output(ctx, &ctx->mainline); + ctx->indent++; +} // emit_HLSL_start + +void emit_HLSL_RET(Context *ctx); +void emit_HLSL_end(Context *ctx) +{ + // !!! FIXME: maybe handle this at a higher level? + // ps_1_* writes color to r0 instead oC0. We move it to the right place. + // We don't have to worry about a RET opcode messing this up, since + // RET isn't available before ps_2_0. + if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) + { + set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1); + output_line(ctx, "oC0 = r0;"); + } // if + + // !!! FIXME: maybe handle this at a higher level? + // force a RET opcode if we're at the end of the stream without one. + if (ctx->previous_opcode != OPCODE_RET) + emit_HLSL_RET(ctx); +} // emit_HLSL_end + +void emit_HLSL_phase(Context *ctx) +{ + // no-op in HLSL. +} // emit_HLSL_phase + +void output_HLSL_uniform_array(Context *ctx, const RegisterType regtype, + const int size) +{ + if (size > 0) + { + char buf[64]; + get_HLSL_uniform_array_varname(ctx, regtype, buf, sizeof (buf)); + const char *typ; + switch (regtype) + { + case REG_TYPE_CONST: typ = "float4"; break; + case REG_TYPE_CONSTINT: typ = "int4"; break; + case REG_TYPE_CONSTBOOL: typ = "bool"; break; + default: + { + fail(ctx, "BUG: used a uniform we don't know how to define."); + return; + } // default + } // switch + output_line(ctx, "%s %s[%d];", typ, buf, size); + } // if +} // output_HLSL_uniform_array + +void emit_HLSL_finalize(Context *ctx) +{ + if (ctx->have_relative_input_registers) // !!! FIXME + fail(ctx, "Relative addressing of input registers not supported."); + + // Check uniform_float4_count too since TEXBEM affects it + if (ctx->uniform_count > 0 || ctx->uniform_float4_count > 0) + { + push_output(ctx, &ctx->preflight); + output_line(ctx, "cbuffer %s_Uniforms : register(b0)", ctx->mainfn); + output_line(ctx, "{"); + ctx->indent++; + output_HLSL_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count); + output_HLSL_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count); + output_HLSL_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count); + ctx->indent--; + output_line(ctx, "};"); + output_blank_line(ctx); + pop_output(ctx); + } // if + + // Fill in the shader's mainline function signature. + push_output(ctx, &ctx->mainline_intro); + output_line(ctx, "%s%s %s(%s%s%s)", + ctx->outputs ? ctx->mainfn : "void", + ctx->outputs ? "_Output" : "", + ctx->mainfn, + ctx->inputs ? ctx->mainfn : "", + ctx->inputs ? "_Input" : "", + ctx->inputs ? " input" : ""); + output_line(ctx, "{"); + + if (ctx->outputs) + { + ctx->indent++; + output_line(ctx, "%s%s output = (%s%s) 0;", + ctx->mainfn, "_Output", ctx->mainfn, "_Output"); + + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "return output;"); + pop_output(ctx); + } // if + pop_output(ctx); + + if (ctx->inputs) + { + push_output(ctx, &ctx->inputs); + output_line(ctx, "};"); + output_blank_line(ctx); + pop_output(ctx); + } // if + + if (ctx->outputs) + { + push_output(ctx, &ctx->outputs); + + // !!! FIXME: Maybe have a better check for this? + if (ctx->hlsl_outpos_name[0] != '\0') + { + output_line(ctx, "\tfloat4 m_%s : SV_Position;", + ctx->hlsl_outpos_name); + } // if + + output_line(ctx, "};"); + output_blank_line(ctx); + pop_output(ctx); + } // if + + // throw some blank lines around to make source more readable. + if (ctx->globals) // don't add a blank line if the section is empty. + { + push_output(ctx, &ctx->globals); + output_blank_line(ctx); + pop_output(ctx); + } // if + + if (ctx->need_max_float) + { + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "#define FLT_MAX 1e38"); + ctx->indent--; + pop_output(ctx); + } // if +} // emit_HLSL_finalize + +void emit_HLSL_global(Context *ctx, RegisterType regtype, int regnum) +{ + char varname[64]; + get_HLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + + switch (regtype) + { + case REG_TYPE_ADDRESS: + if (shader_is_vertex(ctx)) + output_line(ctx, "int4 %s;", varname); + else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE. + { + // We have to map texture registers to temps for ps_1_1, since + // they work like temps, initialize with tex coords, and the + // ps_1_1 TEX opcode expects to overwrite it. + if (!shader_version_atleast(ctx, 1, 4)) + output_line(ctx, "float4 %s = input.m_%s;",varname,varname); + } // else if + break; + case REG_TYPE_PREDICATE: + output_line(ctx, "bool4 %s;", varname); + break; + case REG_TYPE_TEMP: + output_line(ctx, "float4 %s;", varname); + break; + case REG_TYPE_LOOP: + break; // no-op. We declare these in for loops at the moment. + case REG_TYPE_LABEL: + break; // no-op. If we see it here, it means we optimized it out. + default: + fail(ctx, "BUG: we used a register we don't know how to define."); + break; + } // switch + + pop_output(ctx); +} // emit_HLSL_global + +void emit_HLSL_array(Context *ctx, VariableList *var) +{ + // All uniforms (except constant arrays, which are literally constant + // data embedded in HLSL shaders) are now packed into a single array, + // so we can batch the uniform transfers. So this doesn't actually + // define an array here; the one, big array is emitted during + // finalization instead. + // However, we need to #define the offset into the one, big array here, + // and let dereferences use that #define. + const int base = var->index; + const int hlslbase = ctx->uniform_float4_count; + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const int ARRAYBASE_%d = %d;", base, hlslbase); + pop_output(ctx); + var->emit_position = hlslbase; +} // emit_HLSL_array + +void emit_HLSL_const_array(Context *ctx, const ConstantsList *clist, + int base, int size) +{ + char varname[64]; + get_HLSL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname)); + + const char *cstr = NULL; + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const float4 %s[%d] = {", varname, size); + ctx->indent++; + + int i; + for (i = 0; i < size; i++) + { + while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) + clist = clist->next; + assert(clist->constant.index == (base + i)); + + char val0[32]; + char val1[32]; + char val2[32]; + char val3[32]; + floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1); + floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1); + floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1); + floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1); + + output_line(ctx, "float4(%s, %s, %s, %s)%s", val0, val1, val2, val3, + (i < (size-1)) ? "," : ""); + + clist = clist->next; + } // for + + ctx->indent--; + output_line(ctx, "};"); + pop_output(ctx); +} // emit_HLSL_const_array + +void emit_HLSL_uniform(Context *ctx, RegisterType regtype, int regnum, + const VariableList *var) +{ + // Now that we're pushing all the uniforms as one big array, pack these + // down, so if we only use register c439, it'll actually map to + // HLSL_uniforms_vec4[0]. As we push one big array, this will prevent + // uploading unused data. + + char varname[64]; + char name[64]; + int index = 0; + + get_HLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + + if (var == NULL) + { + get_HLSL_uniform_array_varname(ctx, regtype, name, sizeof (name)); + + if (regtype == REG_TYPE_CONST) + index = ctx->uniform_float4_count; + else if (regtype == REG_TYPE_CONSTINT) + index = ctx->uniform_int4_count; + else if (regtype == REG_TYPE_CONSTBOOL) + index = ctx->uniform_bool_count; + else // get_HLSL_uniform_array_varname() would have called fail(). + assert(!(ctx->isfail)); + + output_line(ctx, "#define %s %s[%d]", varname, name, index); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", varname); // !!! FIXME: gross. + pop_output(ctx); + } // if + + else + { + const int arraybase = var->index; + if (var->constant) + { + get_HLSL_const_array_varname_in_buf(ctx, arraybase, var->count, + name, sizeof (name)); + index = (regnum - arraybase); + } // if + else + { + assert(var->emit_position != -1); + get_HLSL_uniform_array_varname(ctx, regtype, name, sizeof (name)); + index = (regnum - arraybase) + var->emit_position; + } // else + + output_line(ctx, "#define %s %s[%d];", varname, name, index); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", varname); // !!! FIXME: gross. + pop_output(ctx); + } // else + + pop_output(ctx); +} // emit_HLSL_uniform + +void emit_HLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb) +{ + char var[64]; + const char *texsuffix = NULL; + switch (ttype) + { + case TEXTURE_TYPE_2D: texsuffix = "2D"; break; + case TEXTURE_TYPE_CUBE: texsuffix = "Cube"; break; + case TEXTURE_TYPE_VOLUME: texsuffix = "3D"; break; + default: assert(!"unexpected texture type"); return; + } // switch + + get_HLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var)); + + push_output(ctx, &ctx->globals); + output_line(ctx, "Texture%s %s_texture : register(t%d);", texsuffix, var, stage); + output_line(ctx, "SamplerState %s : register(%s);", var, var); + pop_output(ctx); + + if (tb) // This sampler used a ps_1_1 TEXBEM opcode? + { + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + char name[64]; + const int index = ctx->uniform_float4_count; + ctx->uniform_float4_count += 2; + get_HLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof(name)); + output_line(ctx, "const float4 %s_texbem = %s[%d];", var, name, index); + output_line(ctx, "const float4 %s_texbeml = %s[%d];", var, name, index + 1); + pop_output(ctx); + } // if +} // emit_HLSL_sampler + + +void emit_HLSL_attribute(Context *ctx, RegisterType regtype, int regnum, + MOJOSHADER_usage usage, int index, int wmask, + int flags) +{ + // !!! FIXME: this function doesn't deal with write masks at all yet! + const char *usage_str = NULL; + char index_str[16] = { '\0' }; + char var[64]; + char a[256]; + + get_HLSL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var)); + + //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed? + + if (index != 0) // !!! FIXME: a lot of these MUST be zero. + snprintf(index_str, sizeof (index_str), "%u", (uint) index); + + if (shader_is_vertex(ctx)) + { + // pre-vs3 output registers. + // these don't ever happen in DCL opcodes, I think. Map to vs_3_* + // output registers. + if (!shader_version_atleast(ctx, 3, 0)) + { + if (regtype == REG_TYPE_RASTOUT) + { + regtype = REG_TYPE_OUTPUT; + index = regnum; + switch ((const RastOutType) regnum) + { + case RASTOUT_TYPE_POSITION: + usage = MOJOSHADER_USAGE_POSITION; + break; + case RASTOUT_TYPE_FOG: + usage = MOJOSHADER_USAGE_FOG; + break; + case RASTOUT_TYPE_POINT_SIZE: + usage = MOJOSHADER_USAGE_POINTSIZE; + break; + } // switch + } // if + + else if (regtype == REG_TYPE_ATTROUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_COLOR; + index = regnum; + } // else if + + else if (regtype == REG_TYPE_TEXCRDOUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_TEXCOORD; + index = regnum; + } // else if + } // if + + if (regtype == REG_TYPE_INPUT) + { + push_output(ctx, &ctx->inputs); + if (buffer_size(ctx->inputs) == 0) + { + output_line(ctx, "struct %s_Input", ctx->mainfn); + output_line(ctx, "{"); + } // if + + ctx->indent++; + switch (usage) + { + case MOJOSHADER_USAGE_BINORMAL: + output_line(ctx, "float4 m_%s : BINORMAL%d;", var, index); + break; + case MOJOSHADER_USAGE_BLENDINDICES: + output_line(ctx, "float4 m_%s : BLENDINDICES%d;", var, index); + break; + case MOJOSHADER_USAGE_BLENDWEIGHT: + output_line(ctx, "float4 m_%s : BLENDWEIGHT%d;", var, index); + break; + case MOJOSHADER_USAGE_COLOR: + output_line(ctx, "float4 m_%s : COLOR%d;", var, index); + break; + case MOJOSHADER_USAGE_NORMAL: + output_line(ctx, "float4 m_%s : NORMAL%d;", var, index); + break; + case MOJOSHADER_USAGE_POSITION: + output_line(ctx, "float4 m_%s : POSITION%d;", var, index); + break; + case MOJOSHADER_USAGE_POSITIONT: + output_line(ctx, "float4 m_%s : POSITIONT;", var); + break; + case MOJOSHADER_USAGE_POINTSIZE: + output_line(ctx, "float4 m_%s : PSIZE;", var); + break; + case MOJOSHADER_USAGE_TANGENT: + output_line(ctx, "float4 m_%s : TANGENT%d;", var, index); + break; + case MOJOSHADER_USAGE_TEXCOORD: + output_line(ctx, "float4 m_%s : TEXCOORD%d;", var, index); + break; + default: + fail(ctx, "Unknown vertex input semantic type!"); + break; + } // case + pop_output(ctx); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "#define %s input.m_%s", var, var); + pop_output(ctx); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", var); // !!! FIXME: gross. + pop_output(ctx); + } // if + + else if (regtype == REG_TYPE_OUTPUT) + { + push_output(ctx, &ctx->outputs); + if (buffer_size(ctx->outputs) == 0) + { + output_line(ctx, "struct %s_Output", ctx->mainfn); + output_line(ctx, "{"); + } // if + + ctx->indent++; + + switch (usage) + { + case MOJOSHADER_USAGE_BINORMAL: + output_line(ctx, "float4 m_%s : BINORMAL%d;", var, index); + break; + case MOJOSHADER_USAGE_BLENDINDICES: + output_line(ctx, "float4 m_%s : BLENDINDICES%d;", var, index); + break; + case MOJOSHADER_USAGE_BLENDWEIGHT: + output_line(ctx, "float4 m_%s : BLENDWEIGHT%d;", var, index); + break; + case MOJOSHADER_USAGE_COLOR: + output_line(ctx, "float4 m_%s : COLOR%d;", var, index); + break; + case MOJOSHADER_USAGE_FOG: + output_line(ctx, "float m_%s : FOG;", var); + break; + case MOJOSHADER_USAGE_NORMAL: + output_line(ctx, "float4 m_%s : NORMAL%d;", var, index); + break; + case MOJOSHADER_USAGE_POSITION: + if (index == 0) + snprintf(ctx->hlsl_outpos_name, + sizeof(ctx->hlsl_outpos_name), "%s", var); + else + output_line(ctx, "float4 m_%s : POSITION%d;", var, index); + break; + case MOJOSHADER_USAGE_POSITIONT: + output_line(ctx, "float4 m_%s : POSITIONT;", var); + break; + case MOJOSHADER_USAGE_POINTSIZE: + output_line(ctx, "float m_%s : PSIZE;", var); + break; + case MOJOSHADER_USAGE_TANGENT: + output_line(ctx, "float4 m_%s : TANGENT%d;", var, index); + break; + case MOJOSHADER_USAGE_TESSFACTOR: + output_line(ctx, "float m_%s : TESSFACTOR%d;", var, index); + break; + case MOJOSHADER_USAGE_TEXCOORD: + output_line(ctx, "float4 m_%s : TEXCOORD%d;", var, index); + break; + default: + snprintf(a, sizeof(a), "Invalid vertex output semantic %d", usage); + fail(ctx, a); + break; + } // switch + + pop_output(ctx); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "#define %s output.m_%s", var, var); + pop_output(ctx); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", var); // !!! FIXME: gross. + pop_output(ctx); + } // else if + + else + { + fail(ctx, "unknown vertex shader attribute register"); + } // else + } // if + + else if (shader_is_pixel(ctx)) + { + // samplers DCLs get handled in emit_HLSL_sampler(). + + if (flags & MOD_CENTROID) // !!! FIXME + { + failf(ctx, "centroid unsupported in %s profile", ctx->profile->name); + return; + } // if + + if ((regtype == REG_TYPE_COLOROUT) || (regtype == REG_TYPE_DEPTHOUT)) + { + push_output(ctx, &ctx->outputs); + if (buffer_size(ctx->outputs) == 0) + { + output_line(ctx, "struct %s_Output", ctx->mainfn); + output_line(ctx, "{"); + } // if + ctx->indent++; + + if (regtype == REG_TYPE_COLOROUT) + output_line(ctx, "float4 m_%s : SV_Target%d;", var, regnum); + else if (regtype == REG_TYPE_DEPTHOUT) + output_line(ctx, "float m_%s : SV_Depth;", var); + + pop_output(ctx); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "#define %s output.m_%s", var, var); + pop_output(ctx); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", var); // !!! FIXME: gross. + pop_output(ctx); + } // if + + // !!! FIXME: can you actualy have a texture register with COLOR usage? + else if ((regtype == REG_TYPE_TEXTURE) || + (regtype == REG_TYPE_INPUT) || + (regtype == REG_TYPE_MISCTYPE)) + { + int skipreference = 0; + const char *define_start = ""; + const char *define_end = ""; + + push_output(ctx, &ctx->inputs); + if (buffer_size(ctx->inputs) == 0) + { + output_line(ctx, "struct %s_Input", ctx->mainfn); + output_line(ctx, "{"); + output_line(ctx, "\t// This must match the vertex output!"); + output_line(ctx, "\t// Rewrite at link time if needed!"); + } // if + ctx->indent++; + + if (regtype == REG_TYPE_MISCTYPE) + { + const MiscTypeType mt = (MiscTypeType) regnum; + if (mt == MISCTYPE_TYPE_FACE) + { + // In SM 3.0, VFACE was a float whose sign determined + // face direction. In SM 4.0+, it's just a bool, so + // we convert the value when we output the #define. + output_line(ctx, "bool m_%s : SV_IsFrontFace;", var); + define_start = "("; + define_end = " ? 1 : -1)"; + } // if + else if (mt == MISCTYPE_TYPE_POSITION) + output_line(ctx, "float4 m_%s : SV_Position;", var); + else + fail(ctx, "BUG: unhandled misc register"); + } // else if + + else + { + if (usage == MOJOSHADER_USAGE_TEXCOORD) + { + // ps_1_1 does a different hack for this attribute. + // Refer to emit_HLSL_global()'s REG_TYPE_ADDRESS code. + if (!shader_version_atleast(ctx, 1, 4)) + skipreference = 1; + output_line(ctx, "float4 m_%s : TEXCOORD%d;", var, index); + } // if + + else if (usage == MOJOSHADER_USAGE_COLOR) + output_line(ctx, "float4 m_%s : COLOR%d;", var, index); + + else if (usage == MOJOSHADER_USAGE_FOG) + output_line(ctx, "float m_%s : FOG;", var); + + else if (usage == MOJOSHADER_USAGE_NORMAL) + output_line(ctx, "float4 m_%s : NORMAL;", var); + } // else + + pop_output(ctx); + + if (!skipreference) + { + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "#define %s %sinput.m_%s%s", var, + define_start, var, define_end); + pop_output(ctx); + push_output(ctx, &ctx->mainline); + ctx->indent++; + output_line(ctx, "#undef %s", var); // !!! FIXME: gross. + pop_output(ctx); + } // if + } // else if + + else + { + fail(ctx, "unknown pixel shader attribute register"); + } // else + } // else if + + else + { + fail(ctx, "Unknown shader type"); // state machine should catch this. + } // else +} // emit_HLSL_attribute + +void emit_HLSL_NOP(Context *ctx) +{ + // no-op is a no-op. :) +} // emit_HLSL_NOP + +void emit_HLSL_MOV(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "%s", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_MOV + +void emit_HLSL_ADD(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_HLSL_ADD + +void emit_HLSL_SUB(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_HLSL_SUB + +void emit_HLSL_MAD(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_HLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2); + output_line(ctx, "%s", code); +} // emit_HLSL_MAD + +void emit_HLSL_MUL(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1); + output_line(ctx, "%s", code); +} // emit_HLSL_MUL + +void emit_HLSL_RCP(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->need_max_float = 1; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "(%s == 0.0) ? FLT_MAX : 1.0 / %s", src0, src0); + output_line(ctx, "%s", code); +} // emit_HLSL_RCP + +void emit_HLSL_RSQ(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + ctx->need_max_float = 1; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "(%s == 0.0) ? FLT_MAX : rsqrt(abs(%s))", + src0, src0); + output_line(ctx, "%s", code); +} // emit_HLSL_RSQ + +void emit_HLSL_dotprod(Context *ctx, const char *src0, const char *src1, + const char *extra) +{ + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "dot(%s, %s)%s", + src0, src1, extra); + output_line(ctx, "%s", code); +} // emit_HLSL_dotprod + +void emit_HLSL_DP3(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); + emit_HLSL_dotprod(ctx, src0, src1, ""); +} // emit_HLSL_DP3 + +void emit_HLSL_DP4(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_full(ctx, 1, src1, sizeof (src1)); + emit_HLSL_dotprod(ctx, src0, src1, ""); +} // emit_HLSL_DP4 + +void emit_HLSL_MIN(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_HLSL_MIN + +void emit_HLSL_MAX(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_HLSL_MAX + +void emit_HLSL_SLT(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // float(bool) results in 0.0 or 1.0, like SLT wants. + if (vecsize == 1) + make_HLSL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1); + else + make_HLSL_destarg_assign(ctx, code, sizeof (code), "%s < %s", src0, src1); + + output_line(ctx, "%s", code); +} // emit_HLSL_SLT + +void emit_HLSL_SGE(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // float(bool) results in 0.0 or 1.0, like SGE wants. + if (vecsize == 1) + make_HLSL_destarg_assign(ctx, code, sizeof (code), "float(%s >= %s)", src0, src1); + else + make_HLSL_destarg_assign(ctx, code, sizeof (code), "%s >= %s", src0, src1); + + output_line(ctx, "%s", code); +} // emit_HLSL_SGE + +void emit_HLSL_EXP(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_EXP + +void emit_HLSL_LOG(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_LOG + +void emit_HLSL_LIT(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char code[128]; + const char *maxp = "127.9961"; // value from the dx9 reference. + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "lit(%s.x, %s.y, clamp(%s.w, -%s, %s))", + src0, src0, src0, maxp, maxp); + output_line(ctx, "%s", code); +} // emit_HLSL_LIT + +void emit_HLSL_DST(Context *ctx) +{ + // !!! FIXME: needs to take ctx->dst_arg.writemask into account + // !!! FIXME: can we use dst() intrinsic instead? -caleb + char src0_y[64]; make_HLSL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y)); + char src1_y[64]; make_HLSL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y)); + char src0_z[64]; make_HLSL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z)); + char src1_w[64]; make_HLSL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w)); + + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "float4(1.0, %s * %s, %s, %s)", + src0_y, src1_y, src0_z, src1_w); + output_line(ctx, "%s", code); +} // emit_HLSL_DST + +void emit_HLSL_LRP(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_HLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "lerp(%s, %s, %s)", + src2, src1, src0); + output_line(ctx, "%s", code); +} // emit_HLSL_LRP + +void emit_HLSL_FRC(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "frac(%s)", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_FRC + +void emit_HLSL_M4X4(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_HLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_HLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_HLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); + char row3[64]; make_HLSL_srcarg_string_full(ctx, 4, row3, sizeof (row3)); + char code[256]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "float4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2, src0, row3); + output_line(ctx, "%s", code); +} // emit_HLSL_M4X4 + +void emit_HLSL_M4X3(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_HLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_HLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_HLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2)); + char code[256]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "float3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2); + output_line(ctx, "%s", code); +} // emit_HLSL_M4X3 + +void emit_HLSL_M3X4(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_HLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_HLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_HLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); + char row3[64]; make_HLSL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3)); + + char code[256]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "float4(dot(%s, %s), dot(%s, %s), " + "dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, + src0, row2, src0, row3); + output_line(ctx, "%s", code); +} // emit_HLSL_M3X4 + +void emit_HLSL_M3X3(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_HLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_HLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + char row2[64]; make_HLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2)); + char code[256]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "float3(dot(%s, %s), dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1, src0, row2); + output_line(ctx, "%s", code); +} // emit_HLSL_M3X3 + +void emit_HLSL_M3X2(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char row0[64]; make_HLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0)); + char row1[64]; make_HLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1)); + + char code[256]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "float2(dot(%s, %s), dot(%s, %s))", + src0, row0, src0, row1); + output_line(ctx, "%s", code); +} // emit_HLSL_M3X2 + +void emit_HLSL_CALL(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + if (ctx->loops > 0) + output_line(ctx, "%s(aL);", src0); + else + output_line(ctx, "%s();", src0); +} // emit_HLSL_CALL + +void emit_HLSL_CALLNZ(Context *ctx) +{ + // !!! FIXME: if src1 is a constbool that's true, we can remove the + // !!! FIXME: if. If it's false, we can make this a no-op. + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + + if (ctx->loops > 0) + output_line(ctx, "if (%s) { %s(aL); }", src1, src0); + else + output_line(ctx, "if (%s) { %s(); }", src1, src0); +} // emit_HLSL_CALLNZ + +void emit_HLSL_LOOP(Context *ctx) +{ + // !!! FIXME: swizzle? + char var[64]; get_HLSL_srcarg_varname(ctx, 1, var, sizeof (var)); + assert(ctx->source_args[0].regnum == 0); // in case they add aL1 someday. + output_line(ctx, "{"); + ctx->indent++; + output_line(ctx, "const int aLend = %s.x + %s.y;", var, var); + output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var); + ctx->indent++; +} // emit_HLSL_LOOP + +void emit_HLSL_RET(Context *ctx) +{ + // thankfully, the MSDN specs say a RET _has_ to end a function...no + // early returns. So if you hit one, you know you can safely close + // a high-level function. + push_output(ctx, &ctx->postflight); + output_line(ctx, "}"); + output_blank_line(ctx); + set_output(ctx, &ctx->subroutines); // !!! FIXME: is this for LABEL? Maybe set it there so we don't allocate unnecessarily. +} // emit_HLSL_RET + +void emit_HLSL_ENDLOOP(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); + ctx->indent--; + output_line(ctx, "}"); +} // emit_HLSL_ENDLOOP + +void emit_HLSL_LABEL(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + const int label = ctx->source_args[0].regnum; + RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label); + assert(ctx->output == ctx->subroutines); // not mainline, etc. + assert(ctx->indent == 0); // we shouldn't be in the middle of a function. + + // MSDN specs say CALL* has to come before the LABEL, so we know if we + // can ditch the entire function here as unused. + if (reg == NULL) + set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output. + + // !!! FIXME: it would be nice if we could determine if a function is + // !!! FIXME: only called once and, if so, forcibly inline it. + + const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : ""; + output_line(ctx, "void %s(%s)", src0, uses_loopreg); + output_line(ctx, "{"); + ctx->indent++; +} // emit_HLSL_LABEL + +void emit_HLSL_DCL(Context *ctx) +{ + // no-op. We do this in our emit_attribute() and emit_uniform(). +} // emit_HLSL_DCL + +void emit_HLSL_POW(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "pow(abs(%s), %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_HLSL_POW + +void emit_HLSL_CRS(Context *ctx) +{ + // !!! FIXME: needs to take ctx->dst_arg.writemask into account. + char src0[64]; make_HLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "cross(%s, %s)", src0, src1); + output_line(ctx, "%s", code); +} // emit_HLSL_CRS + +void emit_HLSL_SGN(Context *ctx) +{ + // (we don't need the temporary registers specified for the D3D opcode.) + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_SGN + +void emit_HLSL_ABS(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_ABS + +void emit_HLSL_NRM(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_NRM + +void emit_HLSL_SINCOS(Context *ctx) +{ + // we don't care about the temp registers that <= sm2 demands; ignore them. + // sm2 also talks about what components are left untouched vs. undefined, + // but we just leave those all untouched with HLSL write masks (which + // would fulfill the "undefined" requirement, too). + const int mask = ctx->dest_arg.writemask; + char src0[64]; make_HLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char code[128] = { '\0' }; + + if (writemask_x(mask)) + make_HLSL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0); + else if (writemask_y(mask)) + make_HLSL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0); + else if (writemask_xy(mask)) + { + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "float2(cos(%s), sin(%s))", src0, src0); + } // else if + + output_line(ctx, "%s", code); +} // emit_HLSL_SINCOS + +void emit_HLSL_REP(Context *ctx) +{ + // !!! FIXME: + // msdn docs say legal loop values are 0 to 255. We can check DEFI values + // at parse time, but if they are pulling a value from a uniform, do + // we clamp here? + // !!! FIXME: swizzle is legal here, right? + char src0[64]; make_HLSL_srcarg_string_x(ctx, 0, src0, sizeof (src0)); + const uint rep = (uint) ctx->reps; + output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {", + rep, rep, src0, rep); + ctx->indent++; +} // emit_HLSL_REP + +void emit_HLSL_ENDREP(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); +} // emit_HLSL_ENDREP + +void emit_HLSL_IF(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + output_line(ctx, "if (%s) {", src0); + ctx->indent++; +} // emit_HLSL_IF + +void emit_HLSL_IFC(Context *ctx) +{ + const char *comp = get_HLSL_comparison_string_scalar(ctx); + char src0[64]; make_HLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); + output_line(ctx, "if (%s %s %s) {", src0, comp, src1); + ctx->indent++; +} // emit_HLSL_IFC + +void emit_HLSL_ELSE(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "} else {"); + ctx->indent++; +} // emit_HLSL_ELSE + +void emit_HLSL_ENDIF(Context *ctx) +{ + ctx->indent--; + output_line(ctx, "}"); +} // emit_HLSL_ENDIF + +void emit_HLSL_BREAK(Context *ctx) +{ + output_line(ctx, "break;"); +} // emit_HLSL_BREAK + +void emit_HLSL_BREAKC(Context *ctx) +{ + const char *comp = get_HLSL_comparison_string_scalar(ctx); + char src0[64]; make_HLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1)); + output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1); +} // emit_HLSL_BREAKC + +void emit_HLSL_MOVA(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + + if (vecsize == 1) + { + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "int(floor(abs(%s) + 0.5) * sign(%s))", + src0, src0); + } // if + + else + { + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "int%d(floor(abs(%s) + 0.5) * sign(%s))", + vecsize, src0, src0); + } // else + + output_line(ctx, "%s", code); +} // emit_HLSL_MOVA + +void emit_HLSL_DEFB(Context *ctx) +{ + char varname[64]; get_HLSL_destarg_varname(ctx, varname, sizeof (varname)); + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const bool %s = %s;", + varname, ctx->dwords[0] ? "true" : "false"); + ctx->indent--; + pop_output(ctx); +} // emit_HLSL_DEFB + +void emit_HLSL_DEFI(Context *ctx) +{ + char varname[64]; get_HLSL_destarg_varname(ctx, varname, sizeof (varname)); + const int32 *x = (const int32 *) ctx->dwords; + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const int4 %s = int4(%d, %d, %d, %d);", + varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]); + ctx->indent--; + pop_output(ctx); +} // emit_HLSL_DEFI + +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) + +void emit_HLSL_TEXKILL(Context *ctx) +{ + char dst[64]; get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + output_line(ctx, "if (any(%s.xyz < 0.0)) discard;", dst); +} // emit_HLSL_TEXKILL + +void emit_HLSL_TEXLD(Context *ctx) +{ + if (!shader_version_atleast(ctx, 1, 4)) + { + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char sampler[64]; + char code[128] = {0}; + + RegisterList *sreg; + sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum); + const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + if (ttype == TEXTURE_TYPE_2D) + { + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.Sample(%s, %s.xy)", + sampler, sampler, dst); + } // if + else if (ttype == TEXTURE_TYPE_CUBE || ttype == TEXTURE_TYPE_VOLUME) + { + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.Sample(%s, %s.xyz)", + sampler, sampler, dst); + } // else if + else + { + fail(ctx, "unexpected texture type"); + } // else + output_line(ctx, "%s", code); + } // if + + else if (!shader_version_atleast(ctx, 2, 0)) + { + // ps_1_4 is different, too! + fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME + return; + } // else if + + else + { + const SourceArgInfo *samp_arg = &ctx->source_args[1]; + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + samp_arg->regnum); + const char *funcname = NULL; + char src0[64] = { '\0' }; + char src1[64]; get_HLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? + + if (sreg == NULL) + { + fail(ctx, "TEXLD using undeclared sampler"); + return; + } // if + + // !!! FIXME: does the d3d bias value map directly to HLSL? + const char *biassep = ""; + char bias[64] = { '\0' }; + if (ctx->instruction_controls == CONTROL_TEXLDB) + { + biassep = ", "; + make_HLSL_srcarg_string_w(ctx, 0, bias, sizeof (bias)); + funcname = "SampleBias"; + } // if + else + { + funcname = "Sample"; + } // else + + switch ((const TextureType) sreg->index) + { + case TEXTURE_TYPE_2D: + make_HLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); + break; + case TEXTURE_TYPE_CUBE: + if (ctx->instruction_controls == CONTROL_TEXLDP) + fail(ctx, "TEXLDP on a cubemap"); // !!! FIXME: is this legal? + make_HLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + break; + case TEXTURE_TYPE_VOLUME: + make_HLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + break; + default: + fail(ctx, "unknown texture type"); + return; + } // switch + + assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); + char swiz_str[6] = { '\0' }; + make_HLSL_swizzle_string(swiz_str, sizeof (swiz_str), + samp_arg->swizzle, ctx->dest_arg.writemask); + + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.%s(%s, %s%s%s)%s", src1, funcname, + src1, src0, biassep, bias, swiz_str); + + output_line(ctx, "%s", code); + } // else +} // emit_HLSL_TEXLD + +void emit_HLSL_TEXBEM(Context *ctx) +{ + // !!! FIXME: this code counts on the register not having swizzles, etc. + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + char src[64]; get_HLSL_srcarg_varname(ctx, 0, src, sizeof (src)); + char sampler[64]; + char code[512]; + + get_HLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.Sample(%s, float2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," + " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))", + sampler, sampler, + dst, sampler, src, sampler, src, + dst, sampler, src, sampler, src); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXBEM + +void emit_HLSL_TEXBEML(Context *ctx) +{ + // !!! FIXME: this code counts on the register not having swizzles, etc. + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + char src[64]; get_HLSL_srcarg_varname(ctx, 0, src, sizeof (src)); + char sampler[64]; + char code[512]; + + get_HLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "(%s_texture.Sample(%s, float2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y)," + " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *" + " ((%s.z * %s_texbeml.x) + %s_texbem.y)", + sampler, sampler, + dst, sampler, src, sampler, src, + dst, sampler, src, sampler, src, + src, sampler, sampler); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXBEML + +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME + +void emit_HLSL_TEXM3X2PAD(Context *ctx) +{ + // no-op ... work happens in emit_HLSL_TEXM3X2TEX(). +} // emit_HLSL_TEXM3X2PAD + +void emit_HLSL_TEXM3X2TEX(Context *ctx) +{ + if (ctx->texm3x2pad_src0 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char sampler[64]; + char code[512]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_HLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0, + src0, sizeof (src0)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0, + src1, sizeof (src1)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src2, sizeof (src2)); + get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.Sample(%s, float2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))", + sampler, sampler, src0, src1, src2, dst); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXM3X2TEX + +void emit_HLSL_TEXM3X3PAD(Context *ctx) +{ + // no-op ... work happens in emit_HLSL_TEXM3X3*(). +} // emit_HLSL_TEXM3X3PAD + +void emit_HLSL_TEXM3X3TEX(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char sampler[64]; + char code[512]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_HLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.Sample(%s," + " float3(dot(%s.xyz, %s.xyz)," + " dot(%s.xyz, %s.xyz)," + " dot(%s.xyz, %s.xyz)))", + sampler, sampler, src0, src1, src2, src3, dst, src4); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXM3X3TEX + +void emit_HLSL_TEXM3X3SPEC_helper(Context *ctx) +{ + if (ctx->glsl_generated_texm3x3spec_helper) + return; + + ctx->glsl_generated_texm3x3spec_helper = 1; + + push_output(ctx, &ctx->helpers); + output_line(ctx, "float3 TEXM3X3SPEC_reflection(const float3 normal, const float3 eyeray)"); + output_line(ctx, "{"); ctx->indent++; + output_line(ctx, "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--; + output_line(ctx, "}"); + output_blank_line(ctx); + pop_output(ctx); +} // emit_HLSL_TEXM3X3SPEC_helper + +void emit_HLSL_TEXM3X3SPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char src5[64]; + char sampler[64]; + char code[512]; + + emit_HLSL_TEXM3X3SPEC_helper(ctx); + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_HLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum, + src5, sizeof (src5)); + get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.Sample(%s, " + "TEXM3X3SPEC_reflection(" + "float3(" + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz)" + ")," + "%s.xyz," + ")" + ")", + sampler, sampler, src0, src1, src2, src3, dst, src4, src5); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXM3X3SPEC + +void emit_HLSL_TEXM3X3VSPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + DestArgInfo *info = &ctx->dest_arg; + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char sampler[64]; + char code[512]; + + emit_HLSL_TEXM3X3SPEC_helper(ctx); + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_HLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum, + sampler, sizeof (sampler)); + + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.Sample(%s, " + "TEXM3X3SPEC_reflection(" + "float3(" + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz), " + "dot(%s.xyz, %s.xyz)" + "), " + "float3(%s.w, %s.w, %s.w)" + ")" + ")", + sampler, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXM3X3VSPEC + +void emit_HLSL_EXPP(Context *ctx) +{ + // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation. + emit_HLSL_EXP(ctx); // I guess this is just partial precision EXP? +} // emit_HLSL_EXPP + +void emit_HLSL_LOGP(Context *ctx) +{ + // LOGP is just low-precision LOG, but we'll take the higher precision. + emit_HLSL_LOG(ctx); +} // emit_HLSL_LOGP + +// common code between CMP and CND. +void emit_HLSL_comparison_operations(Context *ctx, const char *cmp) +{ + int i, j; + DestArgInfo *dst = &ctx->dest_arg; + const SourceArgInfo *srcarg0 = &ctx->source_args[0]; + const int origmask = dst->writemask; + int used_swiz[4] = { 0, 0, 0, 0 }; + const int writemask[4] = { dst->writemask0, dst->writemask1, + dst->writemask2, dst->writemask3 }; + const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y, + srcarg0->swizzle_z, srcarg0->swizzle_w }; + + for (i = 0; i < 4; i++) + { + int mask = (1 << i); + + if (!writemask[i]) continue; + if (used_swiz[i]) continue; + + // This is a swizzle we haven't checked yet. + used_swiz[i] = 1; + + // see if there are any other elements swizzled to match (.yyyy) + for (j = i + 1; j < 4; j++) + { + if (!writemask[j]) continue; + if (src0swiz[i] != src0swiz[j]) continue; + mask |= (1 << j); + used_swiz[j] = 1; + } // for + + // okay, (mask) should be the writemask of swizzles we like. + + char src0[64]; + char src1[64]; + char src2[64]; + make_HLSL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0)); + make_HLSL_srcarg_string(ctx, 1, mask, src1, sizeof (src1)); + make_HLSL_srcarg_string(ctx, 2, mask, src2, sizeof (src2)); + + set_dstarg_writemask(dst, mask); + + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "((%s %s) ? %s : %s)", + src0, cmp, src1, src2); + output_line(ctx, "%s", code); + } // for + + set_dstarg_writemask(dst, origmask); +} // emit_HLSL_comparison_operations + +void emit_HLSL_CND(Context *ctx) +{ + emit_HLSL_comparison_operations(ctx, "> 0.5"); +} // emit_HLSL_CND + +void emit_HLSL_DEF(Context *ctx) +{ + const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? + char varname[64]; get_HLSL_destarg_varname(ctx, varname, sizeof (varname)); + char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1); + char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1); + char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1); + char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1); + + push_output(ctx, &ctx->mainline_top); + ctx->indent++; + output_line(ctx, "const float4 %s = float4(%s, %s, %s, %s);", + varname, val0, val1, val2, val3); + ctx->indent--; + pop_output(ctx); +} // emit_HLSL_DEF + +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME + +void emit_HLSL_TEXM3X3(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + char dst[64]; + char src0[64]; + char src1[64]; + char src2[64]; + char src3[64]; + char src4[64]; + char code[512]; + + // !!! FIXME: this code counts on the register not having swizzles, etc. + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, + src0, sizeof (src0)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, + src1, sizeof (src1)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, + src2, sizeof (src2)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, + src3, sizeof (src3)); + get_HLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, + src4, sizeof (src4)); + get_HLSL_destarg_varname(ctx, dst, sizeof (dst)); + + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "float4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)", + src0, src1, src2, src3, dst, src4); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXM3X3 + +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME + +void emit_HLSL_CMP(Context *ctx) +{ + emit_HLSL_comparison_operations(ctx, ">= 0.0"); +} // emit_HLSL_CMP + +EMIT_HLSL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME + +void emit_HLSL_DP2ADD(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1)); + char src2[64]; make_HLSL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2)); + char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2); + emit_HLSL_dotprod(ctx, src0, src1, extra); +} // emit_HLSL_DP2ADD + +void emit_HLSL_DSX(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "ddx(%s)", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_DSX + +void emit_HLSL_DSY(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), "ddy(%s)", src0); + output_line(ctx, "%s", code); +} // emit_HLSL_DSY + +void emit_HLSL_TEXLDD(Context *ctx) +{ + const SourceArgInfo *samp_arg = &ctx->source_args[1]; + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + samp_arg->regnum); + char src0[64] = { '\0' }; + char src1[64]; get_HLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? + char src2[64] = { '\0' }; + char src3[64] = { '\0' }; + + if (sreg == NULL) + { + fail(ctx, "TEXLDD using undeclared sampler"); + return; + } // if + + switch ((const TextureType) sreg->index) + { + case TEXTURE_TYPE_2D: + make_HLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0)); + make_HLSL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2)); + make_HLSL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3)); + break; + case TEXTURE_TYPE_CUBE: + case TEXTURE_TYPE_VOLUME: + make_HLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0)); + make_HLSL_srcarg_string_vec3(ctx, 2, src2, sizeof (src2)); + make_HLSL_srcarg_string_vec3(ctx, 3, src3, sizeof (src3)); + break; + default: + fail(ctx, "unknown texture type"); + return; + } // switch + + assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); + char swiz_str[6] = { '\0' }; + make_HLSL_swizzle_string(swiz_str, sizeof (swiz_str), + samp_arg->swizzle, ctx->dest_arg.writemask); + + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "%s_texture.SampleGrad(%s, %s, %s, %s)%s", + src1, src1, src0, src2, src3, swiz_str); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXLDD + +void emit_HLSL_SETP(Context *ctx) +{ + const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask); + char src0[64]; make_HLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0)); + char src1[64]; make_HLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1)); + char code[128]; + + // destination is always predicate register (which is type bvec4). + const char *comp = (vecsize == 1) ? + get_HLSL_comparison_string_scalar(ctx) : + get_HLSL_comparison_string_vector(ctx); + + make_HLSL_destarg_assign(ctx, code, sizeof (code), + "(%s %s %s)", src0, comp, src1); + output_line(ctx, "%s", code); +} // emit_HLSL_SETP + +void emit_HLSL_TEXLDL(Context *ctx) +{ + const SourceArgInfo *samp_arg = &ctx->source_args[1]; + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, + samp_arg->regnum); + const char *pattern = NULL; + char src0[64]; + char src1[64]; + make_HLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0)); + get_HLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? + + if (sreg == NULL) + { + fail(ctx, "TEXLDL using undeclared sampler"); + return; + } // if + + switch ((const TextureType) sreg->index) + { + case TEXTURE_TYPE_2D: + pattern = "%s_texture.SampleLevel(%s, %s.xy, %s.w)%s"; + break; + case TEXTURE_TYPE_CUBE: + case TEXTURE_TYPE_VOLUME: + pattern = "%s_texture.SampleLevel(%s, %s.xyz, %s.w)%s"; + break; + default: + fail(ctx, "unknown texture type"); + return; + } // switch + + assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); + char swiz_str[6] = { '\0' }; + make_HLSL_swizzle_string(swiz_str, sizeof (swiz_str), + samp_arg->swizzle, ctx->dest_arg.writemask); + + char code[128]; + make_HLSL_destarg_assign(ctx, code, sizeof(code), + pattern, src1, src1, src0, src0, swiz_str); + + output_line(ctx, "%s", code); +} // emit_HLSL_TEXLDL + +void emit_HLSL_BREAKP(Context *ctx) +{ + char src0[64]; make_HLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0)); + output_line(ctx, "if (%s) { break; }", src0); +} // emit_HLSL_BREAKP + +void emit_HLSL_RESERVED(Context *ctx) +{ + // do nothing; fails in the state machine. +} // emit_HLSL_RESERVED + +#endif // SUPPORT_PROFILE_HLSL + +#pragma GCC visibility pop