From 801b57d5379e1e7a7ee143ea6a9649cb856c5b6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kro=C5=A1l=C3=A1k?= Date: Tue, 31 Dec 2019 12:22:44 -0500 Subject: [PATCH] Add support for emitting SPIR-V shaders. Co-authors include: - Angus Holder - Melker Narikka - Caleb Cornett - Ethan Lee --- CMakeLists.txt | 15 + mojoshader.c | 36 + mojoshader.h | 5 + mojoshader_common.c | 43 + mojoshader_internal.h | 42 + mojoshader_opengl.c | 289 +- profiles/mojoshader_profile.h | 15 + profiles/mojoshader_profile_common.c | 4 + profiles/mojoshader_profile_spirv.c | 4060 ++++++++++++++++++++++++++ profiles/mojoshader_profile_spirv.h | 202 ++ spirv/GLSL.std.450.h | 131 + spirv/spirv.h | 871 ++++++ utils/testparse.c | 97 +- 13 files changed, 5798 insertions(+), 12 deletions(-) create mode 100644 profiles/mojoshader_profile_spirv.c create mode 100644 profiles/mojoshader_profile_spirv.h create mode 100644 spirv/GLSL.std.450.h create mode 100644 spirv/spirv.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 429c969d..466da148 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ OPTION(PROFILE_GLSL "Build MojoShader with support for the GLSL profile" ON) OPTION(PROFILE_ARB1 "Build MojoShader with support for the ARB1 profile" ON) OPTION(PROFILE_ARB1_NV "Build MojoShader with support for the ARB1_NV profile" ON) OPTION(PROFILE_METAL "Build MojoShader with support for the Metal profile" ON) +OPTION(PROFILE_SPIRV "Build MojoShader with support for the SPIR-V profile" ON) OPTION(EFFECT_SUPPORT "Build MojoShader with support for Effect framework files" ON) OPTION(COMPILER_SUPPORT "Build MojoShader with support for HLSL source files" OFF) OPTION(FLIP_VIEWPORT "Build MojoShader with the ability to flip the GL viewport" OFF) @@ -124,6 +125,9 @@ ENDIF(NOT PROFILE_ARB1_NV) IF(NOT PROFILE_METAL) ADD_DEFINITIONS(-DSUPPORT_PROFILE_METAL=0) ENDIF(NOT PROFILE_METAL) +IF(NOT PROFILE_SPIRV) + ADD_DEFINITIONS(-DSUPPORT_PROFILE_SPIRV=0) +ENDIF(NOT PROFILE_SPIRV) IF(EFFECT_SUPPORT) IF(UNIX) @@ -159,6 +163,7 @@ ADD_LIBRARY(mojoshader ${LIBRARY_FORMAT} profiles/mojoshader_profile_d3d.c profiles/mojoshader_profile_glsl.c profiles/mojoshader_profile_metal.c + profiles/mojoshader_profile_spirv.c profiles/mojoshader_profile_common.c ) IF(EFFECT_SUPPORT) @@ -219,8 +224,18 @@ IF(COMPILER_SUPPORT) ENDIF(SDL2) ENDIF(COMPILER_SUPPORT) +FIND_PATH(SPIRV_TOOLS_INCLUDE_DIR "spirv-tools/libspirv.h" PATH_SUFFIXES "include") +FIND_LIBRARY(SPIRV_TOOLS_LIBRARY NAMES SPIRV-Tools-shared) +IF(SPIRV_TOOLS_INCLUDE_DIR AND SPIRV_TOOLS_LIBRARY) + INCLUDE_DIRECTORIES(${SPIRV_TOOLS_INCLUDE_DIR}) + ADD_DEFINITIONS(-DMOJOSHADER_HAS_SPIRV_TOOLS) +ENDIF(SPIRV_TOOLS_INCLUDE_DIR AND SPIRV_TOOLS_LIBRARY) + ADD_EXECUTABLE(testparse utils/testparse.c) TARGET_LINK_LIBRARIES(testparse mojoshader ${LIBM} ${CARBON_FRAMEWORK}) +IF(SPIRV_TOOLS_INCLUDE_DIR AND SPIRV_TOOLS_LIBRARY) + TARGET_LINK_LIBRARIES(testparse ${SPIRV_TOOLS_LIBRARY}) +ENDIF(SPIRV_TOOLS_INCLUDE_DIR AND SPIRV_TOOLS_LIBRARY) ADD_EXECUTABLE(testoutput utils/testoutput.c) TARGET_LINK_LIBRARIES(testoutput mojoshader ${LIBM} ${CARBON_FRAMEWORK}) IF(COMPILER_SUPPORT) diff --git a/mojoshader.c b/mojoshader.c index bee34f20..9491a82e 100644 --- a/mojoshader.c +++ b/mojoshader.c @@ -263,6 +263,15 @@ PREDECLARE_PROFILE(METAL) PREDECLARE_PROFILE(ARB1) #endif +#if !SUPPORT_PROFILE_SPIRV +#define PROFILE_EMITTER_SPIRV(op) +#else +#undef AT_LEAST_ONE_PROFILE +#define AT_LEAST_ONE_PROFILE 1 +#define PROFILE_EMITTER_SPIRV(op) emit_SPIRV_##op, +PREDECLARE_PROFILE(SPIRV) +#endif + #if !AT_LEAST_ONE_PROFILE #error No profiles are supported. Fix your build. #endif @@ -300,6 +309,9 @@ static const Profile profiles[] = #if SUPPORT_PROFILE_METAL DEFINE_PROFILE(METAL) #endif +#if SUPPORT_PROFILE_SPIRV + DEFINE_PROFILE(SPIRV) +#endif }; #undef DEFINE_PROFILE @@ -321,6 +333,7 @@ static const struct { const char *from; const char *to; } profileMap[] = PROFILE_EMITTER_GLSL(op) \ PROFILE_EMITTER_ARB1(op) \ PROFILE_EMITTER_METAL(op) \ + PROFILE_EMITTER_SPIRV(op) \ } static int parse_destination_token(Context *ctx, DestArgInfo *info) @@ -3445,6 +3458,28 @@ static MOJOSHADER_parseData *build_parsedata(Context *ctx) retval->preshader = ctx->preshader; retval->mainfn = ctx->mainfn; +#if SUPPORT_PROFILE_SPIRV + if (strcmp(retval->profile, "spirv") == 0) + { + size_t i, max; + int binary_size = retval->output_len - sizeof(SpirvPatchTable); + uint32 *binary = (uint32 *) retval->output; + SpirvPatchTable *table = (SpirvPatchTable *) &retval->output[binary_size]; + + if (table->vpflip.offset) binary[table->vpflip.offset] = table->vpflip.location; + if (table->array_vec4.offset) binary[table->array_vec4.offset] = table->array_vec4.location; + if (table->array_ivec4.offset) binary[table->array_ivec4.offset] = table->array_ivec4.location; + if (table->array_bool.offset) binary[table->array_bool.offset] = table->array_bool.location; + + for (i = 0, max = STATICARRAYLEN(table->samplers); i < max; i++) + { + SpirvPatchEntry entry = table->samplers[i]; + if (entry.offset) + binary[entry.offset] = entry.location; + } // for + } // if +#endif + // we don't own these now, retval does. ctx->ctab.symbols = NULL; ctx->preshader = NULL; @@ -3828,6 +3863,7 @@ int MOJOSHADER_maxShaderModel(const char *profile) PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV3, 2); PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV4, 3); PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_METAL, 3); + PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_SPIRV, 3); #undef PROFILE_SHADER_MODEL return -1; // unknown profile? } // MOJOSHADER_maxShaderModel diff --git a/mojoshader.h b/mojoshader.h index a289c7a1..d1e487ee 100644 --- a/mojoshader.h +++ b/mojoshader.h @@ -719,6 +719,11 @@ typedef struct MOJOSHADER_parseData */ #define MOJOSHADER_PROFILE_METAL "metal" +/* + * Profile string for SPIR-V binary output + */ +#define MOJOSHADER_PROFILE_SPIRV "spirv" + /* * Determine the highest supported Shader Model for a profile. */ diff --git a/mojoshader_common.c b/mojoshader_common.c index 807f28d8..9c84697d 100644 --- a/mojoshader_common.c +++ b/mojoshader_common.c @@ -1036,6 +1036,49 @@ ssize_t buffer_find(Buffer *buffer, const size_t start, return -1; // no match found. } // buffer_find +void buffer_patch(Buffer *buffer, const size_t start, + const void *_data, const size_t len) +{ + if (len == 0) + return; // Nothing to do. + + if ((start + len) > buffer->total_bytes) + return; // definitely can't patch. + + // Find the start point somewhere in the center of a buffer. + BufferBlock *item = buffer->head; + size_t pos = 0; + if (start > 0) + { + while (1) + { + assert(item != NULL); + if ((pos + item->bytes) > start) // start is in this block. + break; + + pos += item->bytes; + item = item->next; + } // while + } // if + + const uint8 *data = (const uint8 *) _data; + size_t write_pos = start - pos; + size_t write_remain = len; + size_t written = 0; + while (write_remain) + { + size_t write_end = write_pos + write_remain; + if (write_end > item->bytes) + write_end = item->bytes; + + size_t to_write = write_end - write_pos; + memcpy(item->data + write_pos, data + written, to_write); + write_remain -= to_write; + written += to_write; + write_pos = 0; + item = item->next; + } // while +} // buffer_patch // Based on SDL_string.c's SDL_PrintFloat function size_t MOJOSHADER_printFloat(char *text, size_t maxlen, float arg) diff --git a/mojoshader_internal.h b/mojoshader_internal.h index 9bb03133..07dc9e5d 100644 --- a/mojoshader_internal.h +++ b/mojoshader_internal.h @@ -64,6 +64,10 @@ #define SUPPORT_PROFILE_METAL 1 #endif +#ifndef SUPPORT_PROFILE_SPIRV +#define SUPPORT_PROFILE_SPIRV 1 +#endif + #if SUPPORT_PROFILE_ARB1_NV && !SUPPORT_PROFILE_ARB1 #error nv profiles require arb1 profile. Fix your build. #endif @@ -265,6 +269,8 @@ char *buffer_merge(Buffer **buffers, const size_t n, size_t *_len); void buffer_destroy(Buffer *buffer); ssize_t buffer_find(Buffer *buffer, const size_t start, const void *data, const size_t len); +void buffer_patch(Buffer *buffer, const size_t start, + const void *data, const size_t len); @@ -596,6 +602,42 @@ void MOJOSHADER_print_debug_token(const char *subsystem, const char *token, const unsigned int tokenlen, const Token tokenval); + +#if SUPPORT_PROFILE_SPIRV +// Patching SPIR-V binaries before linking is needed to ensure locations do not +// overlap between shader stages. Unfortunately, OpDecorate takes Literal, so we +// can't use Result from OpSpecConstant and leave this up to specialization +// mechanism. +// Patch table must be propagated from parsing to program linking, but since +// MOJOSHADER_parseData is public and I'd like to avoid changing ABI and exposing +// this, it is appended to MOJOSHADER_parseData::output using postflight buffer. +typedef struct SpirvPatchEntry +{ + uint32 offset; + int32 location; +} SpirvPatchEntry; + +typedef struct SpirvPatchTable +{ + SpirvPatchEntry vpflip; + SpirvPatchEntry array_vec4; + SpirvPatchEntry array_ivec4; + SpirvPatchEntry array_bool; + SpirvPatchEntry samplers[16]; + int32 location_count; + union + { + // VS only; non-0 when there is PSIZE output + uint32 vs_has_psize; + + // PS only; offset to TEXCOORD0 location part of OpDecorate. + // Used to find OpDecorate and patch it to BuiltIn PointCoord when + // VS outputs PSIZE. + uint32 ps_texcoord0_offset; + }; +} SpirvPatchTable; +#endif + #endif // _INCLUDE_MOJOSHADER_INTERNAL_H_ diff --git a/mojoshader_opengl.c b/mojoshader_opengl.c index 11c8ec3b..a74604e3 100644 --- a/mojoshader_opengl.c +++ b/mojoshader_opengl.c @@ -36,6 +36,10 @@ #include "GL/gl.h" #include "GL/glext.h" +#if SUPPORT_PROFILE_SPIRV +#include "spirv/spirv.h" +#endif + #ifndef GL_HALF_FLOAT_NV #define GL_HALF_FLOAT_NV 0x140B #endif @@ -53,6 +57,19 @@ #define GL_PROGRAM_POINT_SIZE 0x8642 #endif +// FIXME: ARB_gl_spirv in glext.h? -flibit +#ifndef GL_ARB_gl_spirv +#define GL_ARB_gl_spirv 1 +#define GL_SHADER_BINARY_FORMAT_SPIR_V_ARB 0x9551 +typedef void (APIENTRYP PFNGLSPECIALIZESHADERARBPROC) ( + GLuint shader, + const GLchar* pEntryPoint, + GLuint numSpecializationConstants, + const GLuint* pConstantIndex, + const GLuint* pConstantValue +); +#endif + struct MOJOSHADER_glShader { const MOJOSHADER_parseData *parseData; @@ -206,6 +223,8 @@ struct MOJOSHADER_glContext int have_GL_ARB_half_float_vertex; int have_GL_OES_vertex_half_float; int have_GL_ARB_instanced_arrays; + int have_GL_ARB_ES2_compatibility; + int have_GL_ARB_gl_spirv; // Entry points... PFNGLGETSTRINGPROC glGetString; @@ -267,6 +286,8 @@ struct MOJOSHADER_glContext PFNGLBINDPROGRAMARBPROC glBindProgramARB; PFNGLPROGRAMSTRINGARBPROC glProgramStringARB; PFNGLVERTEXATTRIBDIVISORARBPROC glVertexAttribDivisorARB; + PFNGLSHADERBINARYPROC glShaderBinary; + PFNGLSPECIALIZESHADERARBPROC glSpecializeShaderARB; // interface for profile-specific things. int (*profileMaxUniforms)(MOJOSHADER_shaderType shader_type); @@ -381,7 +402,7 @@ static inline void toggle_gl_state(GLenum state, int val) // profile-specific implementations... -#if SUPPORT_PROFILE_GLSL +#if SUPPORT_PROFILE_GLSL || SUPPORT_PROFILE_SPIRV static inline GLenum glsl_shader_type(const MOJOSHADER_shaderType t) { // these enums match between core 2.0 and the ARB extensions. @@ -415,7 +436,230 @@ static int impl_GLSL_MaxUniforms(MOJOSHADER_shaderType shader_type) return (int) val; } // impl_GLSL_MaxUniforms +#if SUPPORT_PROFILE_SPIRV +static const SpirvPatchTable* spv_getPatchTable(MOJOSHADER_glShader *shader) +{ + const MOJOSHADER_parseData *pd = shader->parseData; + size_t table_offset = pd->output_len - sizeof(SpirvPatchTable); + return (const SpirvPatchTable *) (pd->output + table_offset); +} // spv_getPatchTable + +static int spv_CompileShader(const MOJOSHADER_parseData *pd, int32 base_location, GLuint *s, int32 patch_pcoord) +{ + GLint ok = 0; + + GLsizei data_len = pd->output_len - sizeof(SpirvPatchTable); + const GLvoid* data = pd->output; + uint32 *patched_data = NULL; + if (base_location || patch_pcoord) + { + size_t i, max; + + patched_data = (uint32 *) Malloc(data_len); + memcpy(patched_data, data, data_len); + const SpirvPatchTable *table = (const SpirvPatchTable *) &pd->output[data_len]; + if (table->vpflip.offset) patched_data[table->vpflip.offset] += base_location; + if (table->array_vec4.offset) patched_data[table->array_vec4.offset] += base_location; + if (table->array_ivec4.offset) patched_data[table->array_ivec4.offset] += base_location; + if (table->array_bool.offset) patched_data[table->array_bool.offset] += base_location; + + for (i = 0, max = STATICARRAYLEN(table->samplers); i < max; i++) + { + SpirvPatchEntry entry = table->samplers[i]; + if (entry.offset) + patched_data[entry.offset] += base_location; + } // for + + if (patch_pcoord && table->ps_texcoord0_offset) + { + // Subtract 3 to get from Location value offset to start of op. + uint32 op_base = table->ps_texcoord0_offset - 3; + assert(patched_data[op_base+0] == (SpvOpDecorate | (4 << 16))); + assert(patched_data[op_base+2] == SpvDecorationLocation); + patched_data[op_base+2] = SpvDecorationBuiltIn; + patched_data[op_base+3] = SpvBuiltInPointCoord; + } // if + + data = patched_data; + } // if + + const GLuint shader = ctx->glCreateShader(glsl_shader_type(pd->shader_type)); + ctx->glShaderBinary(1, &shader, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, data, data_len); + ctx->glSpecializeShaderARB(shader, pd->mainfn, 0, NULL, NULL); // FIXME: Spec Constants? -flibit + ctx->glGetShaderiv(shader, GL_COMPILE_STATUS, &ok); + + if (patched_data) + Free(patched_data); + + if (!ok) + { + GLsizei len = 0; + ctx->glGetShaderInfoLog(shader, sizeof(error_buffer), &len, + (GLchar *) error_buffer); + ctx->glDeleteShader(shader); + *s = 0; + return 0; + } // if + + *s = shader; + + return 1; +} // spv_CompileShader + +static int impl_SPIRV_CompileShader(const MOJOSHADER_parseData *pd, GLuint *s) +{ + // Compilation postponed until linking, but generate dummy shader id so hash table lookups work. + *s = ctx->glCreateShader(glsl_shader_type(pd->shader_type)); + return 1; +} // impl_SPIRV_CompileShader + +static GLuint impl_SPIRV_LinkProgram(MOJOSHADER_glShader *vshader, + MOJOSHADER_glShader *pshader) +{ + GLint ok = 0; + + // Shader compilation postponed until linking due to uniform locations being global in program. + // To avoid overlap between VS and PS, we need to know about other shader stages to assign final + // uniform locations before compilation. + GLuint vs_handle = 0; + int32 base_location = 0; + int32 patch_pcoord = 0; + if (vshader) + { + if (!spv_CompileShader(vshader->parseData, base_location, &vs_handle, patch_pcoord)) + return 0; + + const SpirvPatchTable* patch_table = spv_getPatchTable(vshader); + base_location += patch_table->location_count; + patch_pcoord = patch_table->vs_has_psize; + } // if + + GLuint ps_handle = 0; + if (pshader) + { + if (!spv_CompileShader(pshader->parseData, base_location, &ps_handle, patch_pcoord)) + return 0; + } // if + + if (ctx->have_opengl_2) + { + const GLuint program = ctx->glCreateProgram(); + if (vs_handle) + { + ctx->glAttachShader(program, vs_handle); + ctx->glDeleteShader(vs_handle); + } // if + if (ps_handle) + { + ctx->glAttachShader(program, ps_handle); + ctx->glDeleteShader(ps_handle); + } // if + ctx->glLinkProgram(program); + ctx->glGetProgramiv(program, GL_LINK_STATUS, &ok); + if (!ok) + { + GLsizei len = 0; + ctx->glGetProgramInfoLog(program, sizeof (error_buffer), + &len, (GLchar *) error_buffer); + ctx->glDeleteProgram(program); + return 0; + } // if + + return program; + } // if + else + { + const GLhandleARB program = ctx->glCreateProgramObjectARB(); + assert(sizeof(program) == sizeof(GLuint)); // not always true on OS X! + if (vs_handle) + { + ctx->glAttachObjectARB(program, (GLhandleARB) vs_handle); + ctx->glDeleteObjectARB((GLhandleARB) vs_handle); + } // if + if (ps_handle) + { + ctx->glAttachObjectARB(program, (GLhandleARB) ps_handle); + ctx->glDeleteObjectARB((GLhandleARB) ps_handle); + } // if + ctx->glLinkProgramARB(program); + ctx->glGetObjectParameterivARB(program, GL_OBJECT_LINK_STATUS_ARB, &ok); + if (!ok) + { + GLsizei len = 0; + ctx->glGetInfoLogARB(program, sizeof (error_buffer), + &len, (GLcharARB *) error_buffer); + ctx->glDeleteObjectARB(program); + return 0; + } // if + + return (GLuint) program; + } // else +} // impl_SPIRV_LinkProgram + +static void impl_SPIRV_DeleteShader(const GLuint shader) +{ + ctx->glDeleteShader(shader); +} // impl_SPIRV_DeleteShader + +static void impl_SPIRV_DeleteProgram(const GLuint program) +{ + if (ctx->have_opengl_2) + ctx->glDeleteProgram(program); + else + ctx->glDeleteObjectARB((GLhandleARB) program); +} // impl_SPIRV_DeleteProgram + +static GLint impl_SPIRV_GetAttribLocation(MOJOSHADER_glProgram *program, int idx) +{ + return idx; +} // impl_SPIRV_GetAttribLocation +static GLint impl_SPIRV_GetUniformLocation(MOJOSHADER_glProgram *program, MOJOSHADER_glShader *shader, int idx) +{ + return 0; // no-op, we push this as one big-ass array now. +} // impl_SPIRV_GetUniformLocation + +static GLint impl_SPIRV_GetSamplerLocation(MOJOSHADER_glProgram *program, MOJOSHADER_glShader *shader, int idx) +{ + const SpirvPatchTable *table = spv_getPatchTable(shader); + GLint location = table->samplers[idx].location; + if (location == -1) + return location; + + assert(location >= 0); + if (shader->parseData->shader_type == MOJOSHADER_TYPE_PIXEL) + location += spv_getPatchTable(program->vertex)->location_count; + + return location; +} // impl_SPIRV_GetSamplerLocation + +static void impl_SPIRV_FinalInitProgram(MOJOSHADER_glProgram *program) +{ + const SpirvPatchTable *vs_table = spv_getPatchTable(program->vertex); + const SpirvPatchTable *ps_table = spv_getPatchTable(program->fragment); + program->vs_float4_loc = vs_table->array_vec4.location; + program->vs_int4_loc = vs_table->array_ivec4.location; + program->vs_bool_loc = vs_table->array_bool.location; + program->ps_float4_loc = ps_table->array_vec4.location; + program->ps_int4_loc = ps_table->array_ivec4.location; + program->ps_bool_loc = ps_table->array_bool.location; + program->ps_vpos_flip_loc = ps_table->vpflip.location; +#ifdef MOJOSHADER_FLIP_RENDERTARGET + program->vs_flip_loc = vs_table->vpflip.location; +#endif + + int32 ps_base_location = vs_table->location_count; + if (ps_base_location) + { + if (program->ps_float4_loc != -1) program->ps_float4_loc += ps_base_location; + if (program->ps_int4_loc != -1) program->ps_int4_loc += ps_base_location; + if (program->ps_bool_loc != -1) program->ps_bool_loc += ps_base_location; + if (program->ps_vpos_flip_loc != -1) program->ps_vpos_flip_loc += ps_base_location; + } // if +} // impl_SPIRV_FinalInitProgram +#endif // SUPPORT_PROFILE_SPIRV + +#if SUPPORT_PROFILE_GLSL static int impl_GLSL_CompileShader(const MOJOSHADER_parseData *pd, GLuint *s) { GLint ok = 0; @@ -463,6 +707,7 @@ static int impl_GLSL_CompileShader(const MOJOSHADER_parseData *pd, GLuint *s) return 1; } // impl_GLSL_CompileShader +#endif // SUPPORT_PROFILE_GLSL static void impl_GLSL_DeleteShader(const GLuint shader) @@ -664,7 +909,7 @@ static void impl_GLSL_PushSampler(GLint loc, GLuint sampler) ctx->glUniform1i(loc, sampler); } // impl_GLSL_PushSampler -#endif // SUPPORT_PROFILE_GLSL +#endif // SUPPORT_PROFILE_GLSL || SUPPORT_PROFILE_SPIRV #if SUPPORT_PROFILE_ARB1 @@ -1026,6 +1271,8 @@ static void lookup_entry_points(MOJOSHADER_glGetProcAddress lookup, void *d) DO_LOOKUP(GL_ARB_vertex_program, PFNGLPROGRAMSTRINGARBPROC, glProgramStringARB); DO_LOOKUP(GL_NV_gpu_program4, PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC, glProgramLocalParameterI4ivNV); DO_LOOKUP(GL_ARB_instanced_arrays, PFNGLVERTEXATTRIBDIVISORARBPROC, glVertexAttribDivisorARB); + DO_LOOKUP(GL_ARB_ES2_compatibility, PFNGLSHADERBINARYPROC, glShaderBinary); + DO_LOOKUP(GL_ARB_gl_spirv, PFNGLSPECIALIZESHADERARBPROC, glSpecializeShaderARB); #undef DO_LOOKUP } // lookup_entry_points @@ -1144,6 +1391,8 @@ static void load_extensions(MOJOSHADER_glGetProcAddress lookup, void *d) ctx->have_GL_ARB_half_float_vertex = 1; ctx->have_GL_OES_vertex_half_float = 1; ctx->have_GL_ARB_instanced_arrays = 1; + ctx->have_GL_ARB_ES2_compatibility = 1; + ctx->have_GL_ARB_gl_spirv = 1; lookup_entry_points(lookup, d); @@ -1242,6 +1491,8 @@ static void load_extensions(MOJOSHADER_glGetProcAddress lookup, void *d) VERIFY_EXT(GL_ARB_half_float_vertex, 3, 0); VERIFY_EXT(GL_OES_vertex_half_float, -1, -1); VERIFY_EXT(GL_ARB_instanced_arrays, 3, 3); + VERIFY_EXT(GL_ARB_ES2_compatibility, 4, 1); + VERIFY_EXT(GL_ARB_gl_spirv, -1, -1); #undef VERIFY_EXT @@ -1302,6 +1553,14 @@ static int valid_profile(const char *profile) } // else if #endif + #if SUPPORT_PROFILE_SPIRV + else if (strcmp(profile, MOJOSHADER_PROFILE_SPIRV) == 0) + { + MUST_HAVE(MOJOSHADER_PROFILE_SPIRV, GL_ARB_ES2_compatibility); + MUST_HAVE(MOJOSHADER_PROFILE_SPIRV, GL_ARB_gl_spirv); + } // else if + #endif + #if SUPPORT_PROFILE_GLSLES else if (strcmp(profile, MOJOSHADER_PROFILE_GLSLES) == 0) { @@ -1336,6 +1595,9 @@ static int valid_profile(const char *profile) static const char *profile_priorities[] = { +#if SUPPORT_PROFILE_SPIRV + MOJOSHADER_PROFILE_SPIRV, +#endif #if SUPPORT_PROFILE_GLSL120 MOJOSHADER_PROFILE_GLSL120, #endif @@ -1464,6 +1726,29 @@ MOJOSHADER_glContext *MOJOSHADER_glCreateContext(const char *profile, // !!! FIXME: generalize this part. if (profile == NULL) {} + // We don't check SUPPORT_PROFILE_SPIRV here, since valid_profile() does. +#if SUPPORT_PROFILE_SPIRV + else if (strcmp(profile, MOJOSHADER_PROFILE_SPIRV) == 0) + { + ctx->profileMaxUniforms = impl_GLSL_MaxUniforms; + ctx->profileCompileShader = impl_SPIRV_CompileShader; + ctx->profileDeleteShader = impl_SPIRV_DeleteShader; + ctx->profileDeleteProgram = impl_SPIRV_DeleteProgram; + ctx->profileGetAttribLocation = impl_SPIRV_GetAttribLocation; + ctx->profileGetUniformLocation = impl_SPIRV_GetUniformLocation; + ctx->profileGetSamplerLocation = impl_SPIRV_GetSamplerLocation; + ctx->profileLinkProgram = impl_SPIRV_LinkProgram; + ctx->profileFinalInitProgram = impl_SPIRV_FinalInitProgram; + ctx->profileUseProgram = impl_GLSL_UseProgram; + ctx->profilePushConstantArray = impl_GLSL_PushConstantArray; + ctx->profilePushUniforms = impl_GLSL_PushUniforms; + ctx->profilePushSampler = impl_GLSL_PushSampler; + ctx->profileMustPushConstantArrays = impl_GLSL_MustPushConstantArrays; + ctx->profileMustPushSamplers = impl_GLSL_MustPushSamplers; + ctx->profileToggleProgramPointSize = impl_REAL_ToggleProgramPointSize; + } // if +#endif + // We don't check SUPPORT_PROFILE_GLSL120/ES here, since valid_profile() does. #if SUPPORT_PROFILE_GLSL else if ( (strcmp(profile, MOJOSHADER_PROFILE_GLSL) == 0) || diff --git a/profiles/mojoshader_profile.h b/profiles/mojoshader_profile.h index 4b13b05a..f9db8a1b 100644 --- a/profiles/mojoshader_profile.h +++ b/profiles/mojoshader_profile.h @@ -12,6 +12,10 @@ #include "../mojoshader_internal.h" +#if SUPPORT_PROFILE_SPIRV +#include "mojoshader_profile_spirv.h" +#endif + typedef struct ConstantsList { MOJOSHADER_constant constant; @@ -38,6 +42,12 @@ typedef struct RegisterList int writemask; int misc; int written; +#if SUPPORT_PROFILE_SPIRV + struct { + uint32 iddecl; + int is_ssa; // FIXME(krolli): Is there an existing way to tell constants and uniforms apart? + } spirv; +#endif const VariableList *array; struct RegisterList *next; } RegisterList; @@ -195,6 +205,11 @@ typedef struct Context int metal_need_header_graphics; int metal_need_header_texture; #endif + +#if SUPPORT_PROFILE_SPIRV + int branch_labels_patch_stack[32]; + SpirvContext spirv; +#endif } Context; // Use these macros so we can remove all bits of these profiles from the build. diff --git a/profiles/mojoshader_profile_common.c b/profiles/mojoshader_profile_common.c index ff7d014e..b0d84138 100644 --- a/profiles/mojoshader_profile_common.c +++ b/profiles/mojoshader_profile_common.c @@ -249,6 +249,10 @@ RegisterList *reglist_insert(Context *ctx, RegisterList *prev, item->writemask = 0; item->misc = 0; item->written = 0; +#if SUPPORT_PROFILE_SPIRV + item->spirv.iddecl = 0; + item->spirv.is_ssa = 0; +#endif item->array = NULL; item->next = prev->next; prev->next = item; diff --git a/profiles/mojoshader_profile_spirv.c b/profiles/mojoshader_profile_spirv.c new file mode 100644 index 00000000..d5f30a0c --- /dev/null +++ b/profiles/mojoshader_profile_spirv.c @@ -0,0 +1,4060 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_profile.h" + +#pragma GCC visibility push(hidden) + +#if SUPPORT_PROFILE_SPIRV +#include "spirv/spirv.h" +#include "spirv/GLSL.std.450.h" +#include + +static const int SPV_NO_SWIZZLE = 0xE4; // 0xE4 == 11100100 ... 0 1 2 3. No swizzle. + +#define EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(op) \ + void emit_SPIRV_##op(Context *ctx) { \ + fail(ctx, #op " unimplemented in spirv profile"); \ + } + +typedef struct SpirvTexm3x3SetupResult +{ + // vec4 load results + uint32 id_dst_pad0; + uint32 id_dst_pad1; + uint32 id_dst; + + // float dot results + uint32 id_res_x; + uint32 id_res_y; + uint32 id_res_z; +} SpirvTexm3x3SetupResult; + +static const char *spv_get_uniform_array_varname(Context *ctx, + const RegisterType regtype, + char *buf, const size_t len) +{ + const char *shadertype = ctx->shader_type_str; + const char *type = ""; + switch (regtype) + { + case REG_TYPE_CONST: type = "vec4"; break; + case REG_TYPE_CONSTINT: type = "ivec4"; break; + case REG_TYPE_CONSTBOOL: type = "bool"; break; + default: fail(ctx, "BUG: used a uniform we don't know how to define."); + } // switch + snprintf(buf, len, "%s_uniforms_%s", shadertype, type); + return buf; +} // spv_get_uniform_array_varname + +static uint32 spv_bumpid(Context *ctx) +{ + return (ctx->spirv.idmax += 1); +} // spv_bumpid + +static RegisterList *spv_getreg(Context *ctx, const RegisterType regtype, const int regnum) +{ + RegisterList *r = reglist_find(&ctx->used_registers, regtype, regnum); + if (!r) + { + failf(ctx, "register not found rt=%d, rn=%d", regtype, regnum); + return NULL; + } // if + return r; +} // spv_getreg + +static void spv_componentlist_free(Context *ctx, ComponentList *cl) +{ + ComponentList *next; + while (cl) + { + next = cl->next; + Free(ctx, cl); + cl = next; + } // while +} // spv_componentlist_free + +static ComponentList *spv_componentlist_alloc(Context *ctx) +{ + ComponentList *ret = (ComponentList *) Malloc(ctx, sizeof(ComponentList)); + if (!ret) return NULL; + ret->id = 0; + ret->v.i = 0; + ret->next = NULL; + return ret; +} // spv_componentlist_alloc + +static const char *get_SPIRV_varname_in_buf(Context *ctx, const RegisterType rt, + const int regnum, char *buf, + const size_t buflen) +{ + // turns out these are identical at the moment. + return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen); +} // get_SPIRV_varname_in_buf + +const char *get_SPIRV_varname(Context *ctx, const RegisterType rt, + const int regnum) +{ + // turns out these are identical at the moment. + return get_D3D_varname(ctx, rt, regnum); +} // get_SPIRV_varname + + +static inline const char *get_SPIRV_const_array_varname_in_buf(Context *ctx, + const int base, const int size, + char *buf, const size_t buflen) +{ + snprintf(buf, buflen, "c_array_%d_%d", base, size); + return buf; +} // get_SPIRV_const_array_varname_in_buf + + +const char *get_SPIRV_const_array_varname(Context *ctx, int base, int size) +{ + char buf[64]; + get_SPIRV_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf)); + return StrDup(ctx, buf); +} // get_SPIRV_const_array_varname + +static uint32 spv_get_uniform_array_id(Context *ctx, const RegisterType regtype) +{ + uint32 id; + switch (regtype) + { + case REG_TYPE_CONST: + id = ctx->spirv.uniform_arrays.idvec4; + if (id == 0) + { + id = spv_bumpid(ctx); + ctx->spirv.uniform_arrays.idvec4 = id; + } // if + break; + + case REG_TYPE_CONSTINT: + id = ctx->spirv.uniform_arrays.idivec4; + if (id == 0) + { + id = spv_bumpid(ctx); + ctx->spirv.uniform_arrays.idivec4 = id; + } // if + break; + + case REG_TYPE_CONSTBOOL: + id = ctx->spirv.uniform_arrays.idbool; + if (id == 0) + { + id = spv_bumpid(ctx); + ctx->spirv.uniform_arrays.idbool = id; + } // if + break; + + default: + fail(ctx, "Unexpected register type used to access uniform array."); + id = 0; + } // switch + + return id; +} // spv_get_uniform_array_id + +static void spv_emit_part_va(Context* ctx, uint32 word_count, uint32 argc, SpvOp op, va_list args) +{ + assert(ctx->output != NULL); + if (isfail(ctx)) + return; // we failed previously, don't go on... + + uint32 word = op | (word_count << 16); + buffer_append(ctx->output, &word, sizeof(word)); + while (--argc) + { + word = va_arg(args, uint32); + buffer_append(ctx->output, &word, sizeof(word)); + } // while +} // spv_emit_part_va + +static void spv_emit_part(Context* ctx, uint32 word_count, uint32 argc, SpvOp op, ...) +{ + va_list args; + va_start(args, op); + spv_emit_part_va(ctx, word_count, argc, op, args); + va_end(args); +} // spv_emit_part + +static void spv_emit(Context *ctx, uint32 word_count, SpvOp op, ...) +{ + va_list args; + va_start(args, op); + spv_emit_part_va(ctx, word_count, word_count, op, args); + va_end(args); +} // spv_emit + +static void spv_emit_word(Context *ctx, uint32 word) +{ + assert(ctx->output != NULL); + if (isfail(ctx)) + return; // we failed previously, don't go on... + + buffer_append(ctx->output, &word, sizeof(word)); +} // spv_emit_word + +static void spv_emit_str(Context *ctx, const char *str) +{ + size_t len; + uint32 trail; + assert(ctx->output != NULL); + if (isfail(ctx)) + return; // we failed previously, don't go on... + + if (str == NULL) + return spv_emit_word(ctx, 0); + len = strlen(str) + 1; + buffer_append(ctx->output, str, len); + len = len % 4; + if (len) + { + trail = 0; + buffer_append(ctx->output, &trail, 4 - len); + } // if +} // spv_emit_str + +// get the word count of a string +static uint32 spv_strlen(const char *str) +{ + size_t len = strlen(str); + return (uint32) ((len / 4) + 1); +} // spv_strlen + +// emits an OpName straight into ctx->globals +static void spv_output_name(Context *ctx, uint32 id, const char *str) +{ + if (isfail(ctx)) + return; // we failed previously, don't go on... + + push_output(ctx, &ctx->globals); + spv_emit_part(ctx, 2 + spv_strlen(str), 2, SpvOpName, id); + spv_emit_str(ctx, str); + pop_output(ctx); +} // spv_output_name + +// emit an OpName instruction to identify a register +static void spv_output_regname(Context *ctx, uint32 id, RegisterType regtype, int regnum) +{ + char varname[64]; + snprintf(varname, sizeof(varname), "%s_", ctx->shader_type_str); + size_t offset = strlen(varname); + get_SPIRV_varname_in_buf(ctx, regtype, regnum, varname + offset, sizeof(varname) - offset); + spv_output_name(ctx, id, varname); +} // spv_output_regname + +// emits an OpDecorate BuiltIn straight into ctx->helpers +static void spv_output_builtin(Context *ctx, uint32 id, SpvBuiltIn builtin) +{ + if (isfail(ctx)) + return; // we failed previously, don't go on... + + push_output(ctx, &ctx->helpers); + spv_emit(ctx, 4, SpvOpDecorate, id, SpvDecorationBuiltIn, builtin); + pop_output(ctx); +} // spv_output_builtin + +static uint32 spv_output_location(Context *ctx, uint32 id, uint32 loc) +{ + push_output(ctx, &ctx->helpers); + spv_emit(ctx, 4, SpvOpDecorate, id, SpvDecorationLocation, loc); + pop_output(ctx); + return (buffer_size(ctx->helpers) >> 2) - 1; +} // spv_output_location + +static void spv_output_set_binding(Context *ctx, uint32 id, uint32 set, uint32 binding) +{ + if (isfail(ctx)) + return; + + push_output(ctx, &ctx->helpers); + spv_emit(ctx, 4, SpvOpDecorate, id, SpvDecorationDescriptorSet, set); + spv_emit(ctx, 4, SpvOpDecorate, id, SpvDecorationBinding, binding); + pop_output(ctx); +} // spv_output_set_binding + +static SpirvTypeIdx spv_change_base_type_vec_dim(SpirvTypeIdx sti, uint32 dim) +{ + uint32 dimSub1 = dim - 1; + assert(STI_CORE_START_ <= sti && sti < STI_CORE_END_); + assert(dimSub1 < 4); + + SpirvTypeIdx sti_base = (SpirvTypeIdx)(sti & ~0x3); + SpirvTypeIdx sti_new = (SpirvTypeIdx)(sti_base | dimSub1); + return sti_new; +} // spv_change_base_type_vec_dim + +static uint32 spv_get_type(Context *ctx, SpirvTypeIdx tidx) +{ + assert(((uint32)tidx) < ((uint32)STI_LENGTH_)); + + uint32 tid = ctx->spirv.tid[tidx]; + if (tid) + return tid; + + push_output(ctx, &ctx->mainline_intro); + if (STI_CORE_START_ <= tidx && tidx < STI_CORE_END_) + { + uint32 dim = tidx & 0x3; + SpirvType type = (SpirvType)((tidx >> 2) & 0x3); + if (dim) + { + uint32 tid_base = spv_get_type(ctx, (SpirvTypeIdx)(tidx - dim)); + tid = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpTypeVector, tid, tid_base, dim + 1); + } // if + else + { + tid = spv_bumpid(ctx); + switch (type) + { + case ST_FLOAT: spv_emit(ctx, 3, SpvOpTypeFloat, tid, 32); break; + case ST_SINT: spv_emit(ctx, 4, SpvOpTypeInt, tid, 32, 1); break; + case ST_UINT: spv_emit(ctx, 4, SpvOpTypeInt, tid, 32, 0); break; + case ST_BOOL: spv_emit(ctx, 2, SpvOpTypeBool, tid); break; + default: assert(!"Unexpected value of SpirvType."); break; + } // switch + } // else + } // if + else if (STI_IMAGE2D <= tidx && tidx <= STI_IMAGECUBE) + { + static const SpvDim dim_table[] = {SpvDim2D, SpvDim3D, SpvDimCube}; + SpvDim dim = dim_table[tidx - STI_IMAGE2D]; + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 id_image = spv_bumpid(ctx); + tid = spv_bumpid(ctx); + spv_emit(ctx, 9, SpvOpTypeImage, id_image, tid_float, dim, 0, 0, 0, 1, SpvImageFormatUnknown); + spv_emit(ctx, 3, SpvOpTypeSampledImage, tid, id_image); + } // else if + else if (tidx == STI_VOID) + { + tid = spv_bumpid(ctx); + spv_emit(ctx, 2, SpvOpTypeVoid, tid); + } // else if + else if (tidx == STI_FUNC_VOID) + { + uint32 tid_void = spv_get_type(ctx, STI_VOID); + tid = spv_bumpid(ctx); + spv_emit(ctx, 3, SpvOpTypeFunction, tid, tid_void); + } // else if + else if (tidx == STI_FUNC_LIT) + { + uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4); + tid = spv_bumpid(ctx); + spv_emit(ctx, 3 + 1, SpvOpTypeFunction, tid, tid_vec4, tid_vec4); + } // else if + else if (STI_PTR_START_ <= tidx && tidx < STI_PTR_END_) + { + uint32 dim = (tidx & (1 << 4)) ? 3 : 0; + SpirvType type = (SpirvType)((tidx >> 2) & 0x3); + uint32 tid_base = spv_get_type(ctx, (SpirvTypeIdx)((1 << 4) | (type << 2) | dim)); + static const SpvStorageClass sc_map[] = { + SpvStorageClassInput, + SpvStorageClassOutput, + SpvStorageClassPrivate, + SpvStorageClassUniformConstant, + }; + SpvStorageClass sc = sc_map[tidx & 0x3]; + tid = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpTypePointer, tid, sc, tid_base); + } // else if + else if (STI_PTR_IMAGE2D <= tidx && tidx <= STI_PTR_IMAGECUBE) + { + uint32 tid_image = spv_get_type(ctx, (SpirvTypeIdx)(tidx - (STI_PTR_IMAGE2D - STI_IMAGE2D))); + tid = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpTypePointer, tid, SpvStorageClassUniformConstant, tid_image); + } // else if + else + assert(!"Unexpected value of type index."); + pop_output(ctx); + + ctx->spirv.tid[tidx] = tid; + return tid; +} // spv_get_type + +static uint32 spv_gettrue(Context *ctx) +{ + if (ctx->spirv.idtrue) + return ctx->spirv.idtrue; + + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + uint32 id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 3, SpvOpConstantTrue, tid_bool, id); + pop_output(ctx); + return ctx->spirv.idtrue = id; +} // spv_gettrue + +static uint32 spv_getfalse(Context *ctx) +{ + if (ctx->spirv.idfalse) + return ctx->spirv.idfalse; + + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + uint32 id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 3, SpvOpConstantFalse, tid_bool, id); + pop_output(ctx); + return ctx->spirv.idfalse = id; +} // spv_getfalse + +static uint32 spv_getext(Context *ctx) +{ + if (ctx->spirv.idext) + return ctx->spirv.idext; + + return ctx->spirv.idext = spv_bumpid(ctx); +} // spv_getext + +static uint32 spv_output_scalar(Context *ctx, ComponentList *cl, + MOJOSHADER_attributeType type) +{ + uint32 idret, idtype; + if (type == MOJOSHADER_ATTRIBUTE_FLOAT) + idtype = spv_get_type(ctx, STI_FLOAT); + else if (type == MOJOSHADER_ATTRIBUTE_INT) + idtype = spv_get_type(ctx, STI_INT); + else if (type == MOJOSHADER_ATTRIBUTE_UINT) + idtype = spv_get_type(ctx, STI_UINT); + else + { + failf(ctx, "%s: invalid attribute type %d", __func__, type); + return 0; + } // else + idret = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpConstant, idtype, idret, cl->v.u); + pop_output(ctx); + return idret; +} // spv_output_scalar + +// The spv_getscalar* functions retrieve the result id of an OpConstant +// instruction with the corresponding value v, or generate a new one. +static uint32 spv_getscalarf(Context *ctx, float v) +{ + ComponentList *prev = &(ctx->spirv.cl.f), *cl = ctx->spirv.cl.f.next; + while (cl) + { + if (v == cl->v.f) + return cl->id; + else if (v < cl->v.f) + break; + prev = cl; + cl = cl->next; + } // while + cl = spv_componentlist_alloc(ctx); + cl->next = prev->next; + prev->next = cl; + cl->v.f = v; + cl->id = spv_output_scalar(ctx, cl, MOJOSHADER_ATTRIBUTE_FLOAT); + return cl->id; +} // spv_getscalarf + +static uint32 spv_getscalari(Context *ctx, int v) +{ + ComponentList *prev = &(ctx->spirv.cl.i), *cl = ctx->spirv.cl.i.next; + while (cl) + { + if (v == cl->v.i) + return cl->id; + else if (v < cl->v.i) + break; + prev = cl; + cl = cl->next; + } // while + cl = spv_componentlist_alloc(ctx); + cl->next = prev->next; + prev->next = cl; + cl->v.i = v; + cl->id = spv_output_scalar(ctx, cl, MOJOSHADER_ATTRIBUTE_INT); + return cl->id; +} // spv_getscalari + +static uint32 spv_get_constant_composite(Context *ctx, uint32 tid, uint32* cache, float scalar) +{ + uint32 i; + + assert(tid != 0); + uint32 dim = + (tid == ctx->spirv.tid[STI_VEC4]) ? 4 : + (tid == ctx->spirv.tid[STI_VEC3]) ? 3 : + (tid == ctx->spirv.tid[STI_VEC2]) ? 2 : 1; + + uint32 id = cache[dim - 1]; + if (id) + return id; + + uint32 sid = spv_getscalarf(ctx, scalar); + if (dim == 1) + { + cache[0] = sid; + return sid; + } // if + + id = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline_intro); + spv_emit_part(ctx, 3 + dim, 3, SpvOpConstantComposite, tid, id); + for (i = 0; i < dim; i++) + spv_emit_word(ctx, sid); + pop_output(ctx); + cache[dim - 1] = id; + return id; +} // spv_get_constant_composite + +static uint32 spv_get_zero(Context *ctx, uint32 tid) +{ + return spv_get_constant_composite(ctx, tid, ctx->spirv.id_0_0, 0.0f); +} // spv_get_zero + +static uint32 spv_get_one(Context *ctx, uint32 tid) +{ + return spv_get_constant_composite(ctx, tid, ctx->spirv.id_1_0, 1.0f); +} // spv_get_one + +static uint32 spv_get_flt_max(Context *ctx, uint32 tid) +{ + return spv_get_constant_composite(ctx, tid, ctx->spirv.id_flt_max, FLT_MAX); +} // spv_get_one + +static uint32 spv_getvec4_zero(Context *ctx) +{ + return spv_get_constant_composite(ctx, spv_get_type(ctx, STI_VEC4), ctx->spirv.id_0_0, 0.0f); +} // spv_getvec4_zero + +static uint32 spv_getvec4_one(Context *ctx) +{ + return spv_get_constant_composite(ctx, spv_get_type(ctx, STI_VEC4), ctx->spirv.id_1_0, 1.0f); +} // spv_getvec4_one + +// Make a 4-channel vector with a value broadcast across all channels. Roughly equivalent to `vec4(value)` in GLSL +static uint32 spv_vectorbroadcast(Context *ctx, uint32 tid, uint32 value) +{ + uint32 result = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, tid, result, value, value, value, value); + pop_output(ctx); + return result; +} // spv_vectorbroadcast + +static void spv_branch_push(Context *ctx, uint32 id_merge, uint32 patch_offset) +{ + assert(((size_t)ctx->branch_labels_stack_index) < STATICARRAYLEN(ctx->branch_labels_stack)); + int pos = ctx->branch_labels_stack_index++; + ctx->branch_labels_stack[pos] = id_merge; + ctx->branch_labels_patch_stack[pos] = patch_offset; +} // spv_branch_push + +static void spv_branch_get(Context *ctx, uint32* out_id_merge, uint32* out_patch_offset) +{ + assert(ctx->branch_labels_stack_index > 0); + int pos = ctx->branch_labels_stack_index - 1; + *out_id_merge = ctx->branch_labels_stack[pos]; + *out_patch_offset = ctx->branch_labels_patch_stack[pos]; +} // spv_branch_get + +static void spv_branch_pop(Context *ctx, uint32* out_id_merge, uint32* out_patch_offset) +{ + spv_branch_get(ctx, out_id_merge, out_patch_offset); + ctx->branch_labels_stack_index--; +} // spv_branch_pop + +static void spv_loop_push(Context *ctx, const SpirvLoopInfo *loop) +{ + assert(((size_t)ctx->spirv.loop_stack_idx) < STATICARRAYLEN(ctx->spirv.loop_stack)); + int pos = ctx->spirv.loop_stack_idx++; + ctx->spirv.loop_stack[pos] = *loop; +} // spv_loop_push + +static void spv_loop_get(Context *ctx, SpirvLoopInfo *loop) +{ + assert(ctx->spirv.loop_stack_idx > 0); + int pos = ctx->spirv.loop_stack_idx - 1; + *loop = ctx->spirv.loop_stack[pos]; +} // spv_loop_get + +static void spv_loop_pop(Context *ctx, SpirvLoopInfo *loop) +{ + spv_loop_get(ctx, loop); + ctx->spirv.loop_stack_idx--; +} // spv_loop_pop + +static uint32 spv_loop_get_aL(Context *ctx) +{ + int i; + + // Find the first enclosing loop..endloop. There may be rep..endrep nested inside, so it might + // not be at the top of the stack. + for (i = ctx->spirv.loop_stack_idx - 1; i >= 0; i--) + { + uint32 id_aL = ctx->spirv.loop_stack[i].id_aL; + if (id_aL) + return id_aL; + } // for + + assert(!"Referencing loop counter register aL in code not part of loop..endloop region."); + return 0; +} // spv_loop_get_aL + +static SpvOp spv_get_comparison(Context *ctx) +{ + static const SpvOp spv_cmp_ops[] = { + SpvOpUndef, + SpvOpFOrdGreaterThan, + SpvOpFOrdEqual, + SpvOpFOrdGreaterThanEqual, + SpvOpFOrdLessThan, + SpvOpFOrdNotEqual, + SpvOpFOrdLessThanEqual, + }; + + if (ctx->instruction_controls >= STATICARRAYLEN(spv_cmp_ops)) + { + fail(ctx, "unknown comparison control"); + return SpvOpUndef; + } // if + + return spv_cmp_ops[ctx->instruction_controls]; +} // spv_get_comparison + +static void spv_check_read_reg_id(Context *ctx, RegisterList *r) +{ + if (r->spirv.iddecl == 0) + { + assert(r->regtype != REG_TYPE_SAMPLER || (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 1, 4))); + assert(r->regtype != REG_TYPE_TEXTURE || (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 1, 4))); + switch (r->regtype) + { + case REG_TYPE_SAMPLER: // s# (only ps_1_1) + case REG_TYPE_TEXTURE: // t# (only ps_1_1) + case REG_TYPE_INPUT: // v# + case REG_TYPE_TEMP: // r# + case REG_TYPE_CONST: // c# + case REG_TYPE_CONSTINT: // i# + case REG_TYPE_CONSTBOOL: // b# + case REG_TYPE_LABEL: // l# + case REG_TYPE_PREDICATE: // p0 + r->spirv.iddecl = spv_bumpid(ctx); + break; + + case REG_TYPE_LOOP: // aL + r->spirv.iddecl = spv_loop_get_aL(ctx); + break; + + default: + { + char varname[64]; + get_SPIRV_varname_in_buf(ctx, r->regtype, r->regnum, varname, sizeof(varname)); + failf(ctx, "register type %s is unimplemented\n", varname); + break; + } // default + } // switch + } // if +} // spv_check_read_reg_id + +static void spv_check_write_reg_id(Context *ctx, RegisterList *r) +{ + if (r->spirv.iddecl == 0) + { + switch (r->regtype) + { + // These registers require no declarations, so we can just create them as we see them + case REG_TYPE_ADDRESS: + case REG_TYPE_TEMP: + case REG_TYPE_RASTOUT: + case REG_TYPE_COLOROUT: + case REG_TYPE_TEXCRDOUT: + case REG_TYPE_DEPTHOUT: + case REG_TYPE_ATTROUT: + case REG_TYPE_PREDICATE: + r->spirv.iddecl = spv_bumpid(ctx); + break; + + // Other register types should be explicitly declared, so it is an error for them to have iddecl == 0 by now + default: + { + char varname[64]; + get_SPIRV_varname_in_buf(ctx, r->regtype, r->regnum, varname, sizeof(varname)); + failf(ctx, "tried to write to undeclared register %s\n", varname); + break; + } // default + } // switch + } // if +} // spv_check_write_reg_id + +static uint32 spv_ptrimage_from_texturetype(Context *ctx, TextureType ttype) +{ + switch (ttype) + { + case TEXTURE_TYPE_2D: + return spv_get_type(ctx, STI_PTR_IMAGE2D); + case TEXTURE_TYPE_CUBE: + return spv_get_type(ctx, STI_PTR_IMAGECUBE); + case TEXTURE_TYPE_VOLUME: + return spv_get_type(ctx, STI_PTR_IMAGE3D); + default: + fail(ctx, "BUG: used a sampler we don't know how to define."); + return 0; + } // switch +} // spv_ptrimage_from_texturetype + +static uint32 spv_image_from_texturetype(Context *ctx, TextureType ttype) +{ + switch (ttype) + { + case TEXTURE_TYPE_2D: + return spv_get_type(ctx, STI_IMAGE2D); + case TEXTURE_TYPE_CUBE: + return spv_get_type(ctx, STI_IMAGECUBE); + case TEXTURE_TYPE_VOLUME: + return spv_get_type(ctx, STI_IMAGE3D); + default: + fail(ctx, "BUG: used a sampler we don't know how to define."); + return 0; + } // switch +} // spv_ptrimage_from_texturetype + +static uint32 spv_access_uniform(Context *ctx, SpirvTypeIdx sti_ptr, RegisterType regtype, uint32 id_offset) +{ + uint32 tid_ptr = spv_get_type(ctx, sti_ptr); + uint32 id_arr = spv_get_uniform_array_id(ctx, regtype); + uint32 id_access = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpAccessChain, tid_ptr, id_access, id_arr, id_offset); + pop_output(ctx); + return id_access; +} // spv_access_uniform + +static SpirvResult spv_loadreg(Context *ctx, RegisterList *r) +{ + const RegisterType regtype = r->regtype; + + spv_check_read_reg_id(ctx, r); + + uint32 id_src = r->spirv.iddecl; + SpirvResult result; + if (regtype == REG_TYPE_SAMPLER) + { + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, r->regnum); + result.tid = spv_image_from_texturetype(ctx, (TextureType)sreg->index); + } // if + else if (regtype == REG_TYPE_CONSTBOOL) + { + if (!r->spirv.is_ssa) + id_src = spv_access_uniform(ctx, STI_PTR_INT_U, regtype, r->spirv.iddecl); + + result.tid = spv_get_type(ctx, STI_INT); + } // else if + else if (regtype == REG_TYPE_CONSTINT) + { + if (!r->spirv.is_ssa) + id_src = spv_access_uniform(ctx, STI_PTR_IVEC4_U, regtype, r->spirv.iddecl); + + result.tid = spv_get_type(ctx, STI_IVEC4); + } // else if + else if (regtype == REG_TYPE_CONST) + { + if (!r->spirv.is_ssa) + id_src = spv_access_uniform(ctx, STI_PTR_VEC4_U, regtype, r->spirv.iddecl); + + result.tid = spv_get_type(ctx, STI_VEC4); + } // else if + else if (regtype == REG_TYPE_LOOP) + result.tid = spv_get_type(ctx, STI_INT); + else if (regtype == REG_TYPE_PREDICATE) + result.tid = spv_get_type(ctx, STI_BVEC4); + else + result.tid = spv_get_type(ctx, STI_VEC4); + + // Constants can be used directly, no need to load them. + assert(r->spirv.is_ssa == 0 || r->spirv.is_ssa == 1); + if (r->spirv.is_ssa) + { + result.id = r->spirv.iddecl; + return result; + } // if + + assert(id_src); + result.id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 4, SpvOpLoad, result.tid, result.id, id_src); + pop_output(ctx); + + return result; +} // spv_loadreg + +static uint32 spv_emit_swizzle(Context *ctx, uint32 arg, uint32 rtid, const int swizzle, const int writemask) +{ + uint32 result = spv_bumpid(ctx); + + const int writemask0 = (writemask >> 0) & 0x1; + const int writemask1 = (writemask >> 1) & 0x1; + const int writemask2 = (writemask >> 2) & 0x1; + const int writemask3 = (writemask >> 3) & 0x1; + + const uint32 swizzle_x = (swizzle >> 0) & 0x3; + const uint32 swizzle_y = (swizzle >> 2) & 0x3; + const uint32 swizzle_z = (swizzle >> 4) & 0x3; + const uint32 swizzle_w = (swizzle >> 6) & 0x3; + + push_output(ctx, &ctx->mainline); + // OpVectorShuffle takes two vectors to shuffle, but to do a swizzle + // operation we can just ignore the second argument (meaning it can be + // anything, and I am just making it `arg` for convenience) + uint32 word_count = 5 + writemask0 + writemask1 + writemask2 + writemask3; + spv_emit_part(ctx, word_count, 5, SpvOpVectorShuffle, rtid, result, arg, arg); + if (writemask0) spv_emit_word(ctx, swizzle_x); + if (writemask1) spv_emit_word(ctx, swizzle_y); + if (writemask2) spv_emit_word(ctx, swizzle_z); + if (writemask3) spv_emit_word(ctx, swizzle_w); + pop_output(ctx); + + return result; +} // spv_emit_swizzle + +SpirvResult spv_swizzle(Context *ctx, SpirvResult arg, const int swizzle, const int writemask) +{ + int i; + + // Nothing to do, so return the same SSA value + if (no_swizzle(swizzle) && writemask_xyzw(writemask)) + return arg; + + assert(arg.tid != 0); + assert(writemask == 1 + || writemask == 3 + || writemask == 7 + || writemask == 15 + ); + + SpirvTypeIdx sti_arg = STI_VOID; + for (i = STI_CORE_START_; i < STI_CORE_END_; i++) + { + if (ctx->spirv.tid[i] == arg.tid) + { + sti_arg = (SpirvTypeIdx)i; + break; + } // if + } // for + assert(sti_arg != STI_VOID); + + // We should not leave any value undefined, as it may end up used (eg. dot + // product), which will make everything relying on it's result undefined. + // Therefore, we specifically determine true dimensionality of the result. + int resdim = 0; + switch (writemask) + { + case 1: + resdim = 1; + break; + + case 3: + resdim = 2; + break; + + case 7: + resdim = 3; + break; + + case 15: + resdim = 4; + break; + + default: + failf(ctx, "Unexpected write mask in swizzle: 0x%X"); + assert(0); + break; + } // switch + + SpirvTypeIdx sti_result = spv_change_base_type_vec_dim(sti_arg, resdim); + + SpirvResult result = {0}; + result.id = (resdim != 1 || sti_arg != sti_result) ? spv_bumpid(ctx) : arg.id; + result.tid = spv_get_type(ctx, sti_result); + assert(result.tid != 0); + + push_output(ctx, &ctx->mainline); + if (resdim != 1) + { + // OpVectorShuffle takes two vectors to shuffle, but to do a swizzle + // operation we can just ignore the second argument (meaning it can be + // anything, and I am just making it `arg` for convenience) + spv_emit_part(ctx, 5 + resdim, 5, SpvOpVectorShuffle, result.tid, result.id, arg.id, arg.id); + + for (i = 0; i < resdim; i++) + spv_emit_word(ctx, (swizzle >> (2*i)) & 0x3); + } // if + else if (sti_arg != sti_result) + { + // OpVectorShuffle may not produce a scalar. Instead we use OpCompositeExtract. + spv_emit(ctx, 5, SpvOpCompositeExtract, result.tid, result.id, arg.id, swizzle & 0x3); + } // else if + + pop_output(ctx); + + return result; +} // make_GLSL_swizzle_string + +static SpirvResult spv_load_srcarg(Context *ctx, const size_t idx, const int writemask) +{ + SpirvResult result = {0}; + if (idx >= STATICARRAYLEN(ctx->source_args)) + { + fail(ctx, "Too many source args"); + return result; + } // if + + const SourceArgInfo *arg = &ctx->source_args[idx]; + + RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum); + + if (arg->relative) + { + if (arg->regtype == REG_TYPE_INPUT) + fail(ctx, "relative input array access is unimplemented"); + else + { + assert(arg->regtype == REG_TYPE_CONST); + const int arrayidx = arg->relative_array->index; + const int offset = arg->regnum - arrayidx; + assert(offset >= 0); + + int is_constant = (arg->relative_array->constant != NULL); + uint32 id_array = 0; + if (is_constant) + { + id_array = ctx->spirv.constant_arrays.idvec4; + if (id_array == 0) + { + id_array = spv_bumpid(ctx); + ctx->spirv.constant_arrays.idvec4 = id_array; + } // if + } // if + else + id_array = spv_get_uniform_array_id(ctx, arg->regtype); + + RegisterList *reg_rel = spv_getreg(ctx, arg->relative_regtype, arg->relative_regnum); + + spv_check_read_reg_id(ctx, reg_rel); + spv_check_read_reg_id(ctx, reg); + + uint32 id_int = spv_get_type(ctx, STI_INT); + uint32 id_offset; + if (reg_rel->regtype == REG_TYPE_LOOP) + id_offset = reg_rel->spirv.iddecl; + else + { + uint32 id_pint = spv_get_type(ctx, STI_PTR_INT_P); + uint32 id_compidx = spv_getscalari(ctx, arg->relative_component); + uint32 id_pcomp = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpAccessChain, id_pint, id_pcomp, reg_rel->spirv.iddecl, id_compidx); + + id_offset = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpLoad, id_int, id_offset, id_pcomp); + } // else + + if (!is_constant) + { + uint32 id_arraybase = reg->spirv.iddecl; + uint32 id_a = id_offset; + uint32 id_b = id_arraybase; + id_offset = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpIAdd, id_int, id_offset, id_a, id_b); + } // if + + if (offset) + { + uint32 id_a = id_offset; + uint32 id_b = spv_getscalari(ctx, offset); + id_offset = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpIAdd, id_int, id_offset, id_a, id_b); + } // if + + uint32 id_pvec4 = is_constant + ? spv_get_type(ctx, STI_PTR_VEC4_P) + : spv_get_type(ctx, STI_PTR_VEC4_U); + uint32 id_pvalue = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpAccessChain, id_pvec4, id_pvalue, id_array, id_offset); + + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpLoad, result.tid, result.id, id_pvalue); + } // else + } // if + else + result = spv_loadreg(ctx, reg); + + result = spv_swizzle(ctx, result, arg->swizzle, writemask); + + switch (arg->src_mod) + { + case SRCMOD_NEGATE: + { + uint32 id_neg = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_neg, result.id); + result.id = id_neg; + break; + } // case + + case SRCMOD_BIASNEGATE: + { + uint32 id_half = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_0_5, 0.5f); + uint32 id_tmp = spv_bumpid(ctx); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFSub, result.tid, id_tmp, result.id, id_half); + spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_new, id_tmp); + result.id = id_new; + break; + } // case + + case SRCMOD_BIAS: + { + uint32 id_half = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_0_5, 0.5f); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFSub, result.tid, id_new, result.id, id_half); + result.id = id_new; + break; + } // case + + case SRCMOD_SIGNNEGATE: + { + uint32 id_half = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_0_5, 0.5f); + uint32 id_two = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_2_0, 2.0f); + uint32 id_tmp0 = spv_bumpid(ctx); + uint32 id_tmp1 = spv_bumpid(ctx); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFSub, result.tid, id_tmp0, result.id, id_half); + spv_emit(ctx, 5, SpvOpFMul, result.tid, id_tmp1, id_tmp0, id_two); + spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_new, id_tmp1); + result.id = id_new; + break; + } // case + + case SRCMOD_SIGN: + { + uint32 id_half = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_0_5, 0.5f); + uint32 id_two = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_2_0, 2.0f); + uint32 id_tmp = spv_bumpid(ctx); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFSub, result.tid, id_tmp, result.id, id_half); + spv_emit(ctx, 5, SpvOpFMul, result.tid, id_new, id_tmp, id_two); + result.id = id_new; + break; + } // case + + case SRCMOD_COMPLEMENT: + { + uint32 id_one = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_1_0, 1.0f); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFSub, result.tid, id_new, id_one, result.id); + result.id = id_new; + break; + } // case + + case SRCMOD_X2NEGATE: + { + uint32 id_two = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_2_0, 2.0f); + uint32 id_tmp = spv_bumpid(ctx); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFMul, result.tid, id_tmp, result.id, id_two); + spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_new, id_tmp); + result.id = id_new; + break; + } // case + + case SRCMOD_X2: + { + uint32 id_two = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_2_0, 2.0f); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFMul, result.tid, id_new, result.id, id_two); + result.id = id_new; + break; + } // case + + // case SRCMOD_DZ: + // fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME + // postmod_str = "_dz"; + // break; + + // case SRCMOD_DW: + // fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME + // postmod_str = "_dw"; + // break; + + case SRCMOD_ABSNEGATE: + { + uint32 id_abs = spv_bumpid(ctx); + uint32 id_neg = spv_bumpid(ctx); + spv_emit(ctx, 5 + 1, SpvOpExtInst, result.tid, id_abs, spv_getext(ctx), GLSLstd450FAbs, result.id); + spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_neg, id_abs); + result.id = id_neg; + break; + } // case + + case SRCMOD_ABS: + { + uint32 id_abs = spv_bumpid(ctx); + spv_emit(ctx, 5 + 1, SpvOpExtInst, result.tid, id_abs, spv_getext(ctx), GLSLstd450FAbs, result.id); + result.id = id_abs; + break; + } // case + + case SRCMOD_NOT: + { + uint32 id_not = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpLogicalNot, result.tid, id_not, result.id); + result.id = id_not; + break; + } // case + + case SRCMOD_NONE: + case SRCMOD_TOTAL: + break; // stop compiler whining. + + default: + failf(ctx, "unsupported source modifier %d", arg->src_mod); + return result; + } // switch + + return result; +} // spv_load_srcarg + +static inline SpirvResult spv_load_srcarg_full(Context *ctx, const size_t idx) +{ + return spv_load_srcarg(ctx, idx, 0xF); +} // spv_load_srcarg_full + +static void spv_assign_destarg(Context *ctx, SpirvResult value) +{ + const DestArgInfo *arg = &ctx->dest_arg; + RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum); + + spv_check_write_reg_id(ctx, reg); + + if (arg->writemask == 0) + { + // Return without updating the reg->spirv.iddecl (all-zero writemask = no-op) + return; + } // if + + if (arg->result_mod & MOD_SATURATE) + { + uint32 new_value = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5 + 3, SpvOpExtInst, + value.tid, new_value, spv_getext(ctx), GLSLstd450FClamp, + value.id, spv_get_zero(ctx, value.tid), spv_get_one(ctx, value.tid) + ); + pop_output(ctx); + value.id = new_value; + } // if + + // MSDN says MOD_PP is a hint and many implementations ignore it. So do we. + + // CENTROID only allowed in DCL opcodes, which shouldn't come through here. + assert((arg->result_mod & MOD_CENTROID) == 0); + + if (ctx->predicated) + { + fail(ctx, "predicated destinations unsupported"); // !!! FIXME + return; + } // if + + if (arg->result_shift) + { + float factor = 1.0f; + uint32* cache = ctx->spirv.id_1_0; + switch (arg->result_shift) + { + case 0x1: factor = 2.0f; cache = ctx->spirv.id_2_0; break; + case 0x2: factor = 4.0f; cache = ctx->spirv.id_4_0; break; + case 0x3: factor = 8.0f; cache = ctx->spirv.id_8_0; break; + case 0xD: factor = 0.125f; cache = ctx->spirv.id_0_125; break; + case 0xE: factor = 0.25f; cache = ctx->spirv.id_0_25; break; + case 0xF: factor = 0.5f; cache = ctx->spirv.id_0_5; break; + default: + failf(ctx, "unexpected result shift %d", arg->result_shift); + } // switch + + uint32 id_factor = spv_get_constant_composite(ctx, value.tid, cache, factor); + push_output(ctx, &ctx->mainline); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFMul, value.tid, id_new, value.id, id_factor); + pop_output(ctx); + value.id = id_new; + } // if + + if (reg->regtype == REG_TYPE_DEPTHOUT + || isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum)) + { + assert(arg->writemask == 0x1); + SpirvTypeIdx sti_reg = STI_FLOAT; + uint32 rtid = spv_get_type(ctx, sti_reg); + uint32 new_value = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpCompositeExtract, rtid, new_value, value.id, 0); + pop_output(ctx); + value.tid = rtid; + value.id = new_value; + } // if + else if (!writemask_xyzw(arg->writemask)) + { + SpirvTypeIdx sti_reg; + switch (reg->regtype) + { + case REG_TYPE_ADDRESS: sti_reg = STI_IVEC4; break; + case REG_TYPE_PREDICATE: sti_reg = STI_BVEC4; break; + default: sti_reg = STI_VEC4; break; + } // switch + + uint32 rtid = spv_get_type(ctx, sti_reg); + uint32 new_value = spv_bumpid(ctx); + uint32 current_value = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + + spv_emit(ctx, 4, SpvOpLoad, rtid, current_value, reg->spirv.iddecl); + + // output id is new_value + // select between current value and new value based on writemask + // in the shuffle, components [0, 3] are the new value, and components + // [4, 7] are the existing value + spv_emit_part(ctx, 5 + 4, 5, SpvOpVectorShuffle, rtid, new_value, value.id, current_value); + if (arg->writemask0) spv_emit_word(ctx, 0); else spv_emit_word(ctx, 4); + if (arg->writemask1) spv_emit_word(ctx, 1); else spv_emit_word(ctx, 5); + if (arg->writemask2) spv_emit_word(ctx, 2); else spv_emit_word(ctx, 6); + if (arg->writemask3) spv_emit_word(ctx, 3); else spv_emit_word(ctx, 7); + + pop_output(ctx); + + value.tid = rtid; + value.id = new_value; + } // if + + switch (reg->regtype) + { + case REG_TYPE_OUTPUT: + case REG_TYPE_ADDRESS: + case REG_TYPE_TEMP: + case REG_TYPE_DEPTHOUT: + case REG_TYPE_COLOROUT: + case REG_TYPE_RASTOUT: + case REG_TYPE_ATTROUT: + case REG_TYPE_PREDICATE: + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 3, SpvOpStore, reg->spirv.iddecl, value.id); + pop_output(ctx); + break; + + default: + { + char varname[64]; + get_SPIRV_varname_in_buf(ctx, reg->regtype, reg->regnum, varname, sizeof(varname)); + failf(ctx, "register %s is unimplemented for storing", varname); + break; + } // default + } // switch +} // spv_assign_destarg + +static void spv_emit_vs_main_end(Context* ctx) +{ +#if defined(MOJOSHADER_DEPTH_CLIPPING) || defined(MOJOSHADER_FLIP_RENDERTARGET) + if (!shader_is_vertex(ctx)) + return; + + uint32 tid_void = spv_get_type(ctx, STI_VOID); + uint32 tid_func = spv_get_type(ctx, STI_FUNC_VOID); + uint32 id_func = ctx->spirv.id_vs_main_end; + uint32 id_label = spv_bumpid(ctx); + assert(id_func != 0); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpFunction, tid_void, id_func, SpvFunctionControlMaskNone, tid_func); + spv_emit(ctx, 2, SpvOpLabel, id_label); + + RegisterList *reg; + for (reg = ctx->used_registers.next; reg != NULL; reg = reg->next) + { + if (reg->usage == MOJOSHADER_USAGE_POSITION && + (reg->regtype == REG_TYPE_RASTOUT || reg->regtype == REG_TYPE_OUTPUT)) + break; + } // for + SpirvResult output = spv_loadreg(ctx, reg); + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 id_new_output; + +#ifdef MOJOSHADER_FLIP_RENDERTARGET + // gl_Position.y = gl_Position.y * vpFlip; + uint32 tid_pvpflip = spv_bumpid(ctx); + uint32 id_old_y = spv_bumpid(ctx); + uint32 id_pvpflip = spv_bumpid(ctx); + uint32 id_vpflip = spv_bumpid(ctx); + uint32 id_new_y = spv_bumpid(ctx); + id_new_output = spv_bumpid(ctx); + + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_old_y, output.id, 1); + spv_emit(ctx, 4, SpvOpLoad, tid_float, id_vpflip, id_pvpflip); + spv_emit(ctx, 5, SpvOpFMul, tid_float, id_new_y, id_old_y, id_vpflip); + spv_emit(ctx, 6, SpvOpCompositeInsert, output.tid, id_new_output, id_new_y, output.id, 1); + output.id = id_new_output; + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpTypePointer, tid_pvpflip, SpvStorageClassUniformConstant, tid_float); + spv_emit(ctx, 4, SpvOpVariable, tid_pvpflip, id_pvpflip, SpvStorageClassUniformConstant); + pop_output(ctx); + + spv_output_name(ctx, id_pvpflip, "vpFlip"); + ctx->spirv.patch_table.vpflip.offset = spv_output_location(ctx, id_pvpflip, ~0u); +#endif + +#ifdef MOJOSHADER_DEPTH_CLIPPING + // gl_Position.z = gl_Position.z * 2.0 - gl_Position.w; + uint32 id_2 = spv_getscalarf(ctx, 2.0f); + uint32 id_old_z = spv_bumpid(ctx); + uint32 id_old_w = spv_bumpid(ctx); + uint32 id_2z = spv_bumpid(ctx); + uint32 id_new_z = spv_bumpid(ctx); + id_new_output = spv_bumpid(ctx); + + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_old_z, output.id, 2); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_old_w, output.id, 3); + spv_emit(ctx, 5, SpvOpFMul, tid_float, id_2z, id_old_z, id_2); + spv_emit(ctx, 5, SpvOpFSub, tid_float, id_new_z, id_2z, id_old_w); + spv_emit(ctx, 6, SpvOpCompositeInsert, output.tid, id_new_output, id_new_z, output.id, 2); + output.id = id_new_output; +#endif + + spv_emit(ctx, 3, SpvOpStore, reg->spirv.iddecl, output.id); + spv_emit(ctx, 1, SpvOpReturn); + spv_emit(ctx, 1, SpvOpFunctionEnd); + pop_output(ctx); + + spv_output_name(ctx, id_func, "vs_epilogue"); +#endif +} // spv_emit_vs_main_end + +static void spv_emit_func_lit(Context *ctx) +{ + if (!ctx->spirv.id_func_lit) + return; + + // vec4 LIT(const vec4 src) + // { + // float retval_y, retval_z; + // if (src.x > 0.0) { + // retval_y = src.x; + // if (src.y > 0.0) { + // float power = clamp(src.w, -127.9961, 127.9961); + // retval_z = pow(src.y, power); + // } else { + // retval_z = 0.0; + // } + // } else { + // retval_y = 0.0; + // retval_z = 0.0; + // } + // vec4 retval = vec4(1.0, retval_y, retval_z, 1.0); + // return retval; + // } + + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4); + uint32 tid_func = spv_get_type(ctx, STI_FUNC_LIT); + uint32 id_func = ctx->spirv.id_func_lit; + uint32 id_src = spv_bumpid(ctx); + uint32 id_block_start = spv_bumpid(ctx); + uint32 id_src_x = spv_bumpid(ctx); + uint32 id_src_x_pos = spv_bumpid(ctx); + uint32 id_0_0 = spv_get_zero(ctx, tid_float); + uint32 id_branch0_true = spv_bumpid(ctx); + uint32 id_src_y = spv_bumpid(ctx); + uint32 id_src_y_pos = spv_bumpid(ctx); + uint32 id_branch1_true = spv_bumpid(ctx); + uint32 id_src_w = spv_bumpid(ctx); + uint32 id_maxp = spv_getscalarf(ctx, 127.9961f); + uint32 id_maxp_neg = spv_getscalarf(ctx, -127.9961f); + uint32 id_power = spv_bumpid(ctx); + uint32 id_pow_result = spv_bumpid(ctx); + uint32 id_branch1_merge = spv_bumpid(ctx); + uint32 id_branch1_result = spv_bumpid(ctx); + uint32 id_branch0_merge = spv_bumpid(ctx); + uint32 id_result_y = spv_bumpid(ctx); + uint32 id_result_z = spv_bumpid(ctx); + uint32 id_1_0 = spv_get_one(ctx, tid_float); + uint32 id_result = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpFunction, tid_vec4, id_func, SpvFunctionControlMaskNone, tid_func); + spv_emit(ctx, 3, SpvOpFunctionParameter, tid_vec4, id_src); + + // id_block_start + spv_emit(ctx, 2, SpvOpLabel, id_block_start); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src_x, id_src, 0); + spv_emit(ctx, 5, SpvOpFOrdGreaterThan, tid_bool, id_src_x_pos, id_src_x, id_0_0); + spv_emit(ctx, 3, SpvOpSelectionMerge, id_branch0_merge, 0); + spv_emit(ctx, 4, SpvOpBranchConditional, id_src_x_pos, id_branch0_true, id_branch0_merge); + + // id_branch0_true + spv_emit(ctx, 2, SpvOpLabel, id_branch0_true); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src_y, id_src, 1); + spv_emit(ctx, 5, SpvOpFOrdGreaterThan, tid_bool, id_src_y_pos, id_src_y, id_0_0); + spv_emit(ctx, 3, SpvOpSelectionMerge, id_branch1_merge, 0); + spv_emit(ctx, 4, SpvOpBranchConditional, id_src_y_pos, id_branch1_true, id_branch1_merge); + + // id_branch1_true + spv_emit(ctx, 2, SpvOpLabel, id_branch1_true); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src_w, id_src, 3); + spv_emit(ctx, 5 + 3, SpvOpExtInst, + tid_float, id_power, spv_getext(ctx), GLSLstd450FClamp, id_src_w, id_maxp_neg, id_maxp + ); + spv_emit(ctx, 5 + 2, SpvOpExtInst, + tid_float, id_pow_result, spv_getext(ctx), GLSLstd450Pow, id_src_y, id_power + ); + spv_emit(ctx, 2, SpvOpBranch, id_branch1_merge); + + // id_branch1_merge + spv_emit(ctx, 2, SpvOpLabel, id_branch1_merge); + spv_emit(ctx, 7, SpvOpPhi, tid_float, id_branch1_result, + id_pow_result, id_branch1_true, + id_0_0, id_branch0_true + ); + spv_emit(ctx, 2, SpvOpBranch, id_branch0_merge); + + // id_branch0_merge + spv_emit(ctx, 2, SpvOpLabel, id_branch0_merge); + spv_emit(ctx, 7, SpvOpPhi, tid_float, id_result_y, + id_src_x, id_branch1_merge, + id_0_0, id_block_start + ); + spv_emit(ctx, 7, SpvOpPhi, tid_float, id_result_z, + id_branch1_result, id_branch1_merge, + id_0_0, id_block_start + ); + spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, tid_vec4, id_result, + id_1_0, id_result_y, id_result_z, id_1_0 + ); + spv_emit(ctx, 2, SpvOpReturnValue, id_result); + spv_emit(ctx, 1, SpvOpFunctionEnd); + + pop_output(ctx); + + spv_output_name(ctx, ctx->spirv.id_func_lit, "LIT"); +} // spv_emit_func_lit + +static void spv_emit_func_end(Context *ctx) +{ + push_output(ctx, &ctx->mainline); + +#if defined(MOJOSHADER_DEPTH_CLIPPING) || defined(MOJOSHADER_FLIP_RENDERTARGET) + if (shader_is_vertex(ctx) && ctx->spirv.id_vs_main_end == 0) + { + ctx->spirv.id_vs_main_end = spv_bumpid(ctx); + uint32 tid_void = spv_get_type(ctx, STI_VOID); + uint32 id_res = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 4, SpvOpFunctionCall, tid_void, id_res, ctx->spirv.id_vs_main_end); + pop_output(ctx); + } // if +#endif + + spv_emit(ctx, 1, SpvOpReturn); + spv_emit(ctx, 1, SpvOpFunctionEnd); + pop_output(ctx); +} // spv_emit_func_end + +static void spv_link_vs_attributes(Context *ctx, uint32 id, MOJOSHADER_usage usage, int index) +{ + // Some usages map to specific ranges. Keep those in sync with spv_link_ps_attributes(). + switch (usage) + { + case MOJOSHADER_USAGE_POSITION: + assert(index == 0); + spv_output_builtin(ctx, id, SpvBuiltInPosition); + break; + case MOJOSHADER_USAGE_POINTSIZE: + spv_output_builtin(ctx, id, SpvBuiltInPointSize); + break; + case MOJOSHADER_USAGE_COLOR: // locations [0,1] + assert(index < 2); + spv_output_location(ctx, id, 0 + index); + break; + case MOJOSHADER_USAGE_TEXCOORD: // locations [2,11] + assert(index < 10); + spv_output_location(ctx, id, 2 + index); + break; + case MOJOSHADER_USAGE_NORMAL: // locations [12,21] + // FIXME: SM_3_0 allows basically any non-built-in semantic to use any index. We can + // either blow up the number of indices and use them sparsely, or patch them when linking + // vertex and pixel shader together. + assert(index < 10); + spv_output_location(ctx, id, 12 + index); + break; + + case MOJOSHADER_USAGE_FOG: // location [12] + // FIXME: Missing PS handling. + spv_output_location(ctx, id, 12); + break; + case MOJOSHADER_USAGE_TANGENT: // location [13] + // FIXME: Missing PS handling. + assert(index == 0); + spv_output_location(ctx, id, 13 + index); + break; + + default: + failf(ctx, "unexpected attribute usage %d in vertex shader", usage); + break; + } // switch +} // spv_link_vs_attributes + +static void spv_link_ps_attributes(Context *ctx, uint32 id, RegisterType regtype, MOJOSHADER_usage usage, int index) +{ + switch (regtype) + { + case REG_TYPE_COLOROUT: + // nothing to do for color, OpenGL should hook it up automatically?? + break; + case REG_TYPE_INPUT: // v# (MOJOSHADER_USAGE_COLOR aka `oC#` in vertex shader) + switch (usage) + { + case MOJOSHADER_USAGE_COLOR: + assert(index < 2); + spv_output_location(ctx, id, 0 + index); + break; + case MOJOSHADER_USAGE_TEXCOORD: + { + assert(index < 10); + uint32 location_offset = spv_output_location(ctx, id, 2 + index); + if (index == 0) + ctx->spirv.patch_table.ps_texcoord0_offset = location_offset; + break; + } // case + case MOJOSHADER_USAGE_NORMAL: + assert(index < 10); + spv_output_location(ctx, id, 12 + index); + break; + default: + failf(ctx, "unexpected attribute usage %d in pixel shader", usage); + break; + } // switch + break; + case REG_TYPE_TEXTURE: // t# (MOJOSHADER_USAGE_TEXCOORD aka `oT#` in vertex shader) + assert(index < 10); + spv_output_location(ctx, id, 2 + index); + break; + case REG_TYPE_DEPTHOUT: + spv_output_builtin(ctx, id, SpvBuiltInFragDepth); + break; + case REG_TYPE_MISCTYPE: + // inputs + switch ((MiscTypeType)index) + { + case MISCTYPE_TYPE_POSITION: // vPos + { + // In SM3.0 vPos only has x and y defined, but we should be fine to leave the z and w attributes in + // that SpvBuiltInFragCoord gives + + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 tid_vec2 = spv_get_type(ctx, STI_VEC2); + uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4); + uint32 tid_pvec4i = spv_get_type(ctx, STI_PTR_VEC4_I); + uint32 tid_pvec2u = spv_bumpid(ctx); + uint32 tid_pvec4p = spv_get_type(ctx, STI_PTR_VEC4_P); + + uint32 id_var_fragcoord = spv_bumpid(ctx); + uint32 id_var_vposflip = spv_bumpid(ctx); + uint32 id_var_vpos = id; + + uint32 id_fragcoord = spv_bumpid(ctx); + uint32 id_fragcoord_y = spv_bumpid(ctx); + uint32 id_vposflip = spv_bumpid(ctx); + uint32 id_vposflip_x = spv_bumpid(ctx); + uint32 id_vposflip_y = spv_bumpid(ctx); + uint32 id_tmp = spv_bumpid(ctx); + uint32 id_vpos_y = spv_bumpid(ctx); + uint32 id_vpos = spv_bumpid(ctx); + + // vec4 gl_FragCoord = ; + // uniform vec2 vposFlip; + // vec4 ps_vPos = vec4( + // gl_FragCoord.x, + // (gl_FragCoord.y * vposFlip.x) + vposFlip.y, + // gl_FragCoord.z, + // gl_FragCoord.w + // ); + + push_output(ctx, &ctx->mainline_intro); + // Define uniform vec2*. This is the only place that uses it right now. + spv_emit(ctx, 4, SpvOpTypePointer, tid_pvec2u, SpvStorageClassUniformConstant, tid_vec2); + // Define all variables involved. + spv_emit(ctx, 4, SpvOpVariable, tid_pvec4i, id_var_fragcoord, SpvStorageClassInput); + spv_emit(ctx, 4, SpvOpVariable, tid_pvec2u, id_var_vposflip, SpvStorageClassUniformConstant); + spv_emit(ctx, 4, SpvOpVariable, tid_pvec4p, id_var_vpos, SpvStorageClassPrivate); + pop_output(ctx); + + spv_output_builtin(ctx, id_var_fragcoord, SpvBuiltInFragCoord); + spv_output_name(ctx, id_var_vposflip, "vposFlip"); + + // Initialize vPos using vPosFlip and built in FragCoord. + push_output(ctx, &ctx->mainline_top); + spv_emit(ctx, 4, SpvOpLoad, tid_vec4, id_fragcoord, id_var_fragcoord); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_fragcoord_y, id_fragcoord, 1); + spv_emit(ctx, 4, SpvOpLoad, tid_vec2, id_vposflip, id_var_vposflip); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_vposflip_x, id_vposflip, 0); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_vposflip_y, id_vposflip, 1); + spv_emit(ctx, 5, SpvOpFMul, tid_float, id_tmp, id_fragcoord_y, id_vposflip_x); + spv_emit(ctx, 5, SpvOpFAdd, tid_float, id_vpos_y, id_tmp, id_vposflip_y); + spv_emit(ctx, 6, SpvOpCompositeInsert, tid_vec4, id_vpos, id_vpos_y, id_fragcoord, 1); + spv_emit(ctx, 3, SpvOpStore, id_var_vpos, id_vpos); + pop_output(ctx); + + ctx->spirv.id_var_fragcoord = id_var_fragcoord; + ctx->spirv.id_var_vpos = id_var_vpos; + ctx->spirv.patch_table.vpflip.offset = spv_output_location(ctx, id_var_vposflip, ~0u); + break; + } // case + + case MISCTYPE_TYPE_FACE: // vFace + { + // The much more wordy equivalent of: + // bool gl_FrontFacing = ; + // vec4 vFace; + // vFace = vec4(gl_FrontFacing ? 1.0 : 0.0); + + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4); + uint32 tid_pbooli = spv_get_type(ctx, STI_PTR_BOOL_I); + uint32 tid_pvec4p = spv_get_type(ctx, STI_PTR_VEC4_P); + + uint32 id_1_0 = spv_getscalarf(ctx, 1.0f); + uint32 id_0_0 = spv_getscalarf(ctx, 0.0f); + + uint32 id_var_frontfacing = spv_bumpid(ctx); + uint32 id_var_vface = id; + + uint32 id_frontfacing = spv_bumpid(ctx); + uint32 id_tmp = spv_bumpid(ctx); + uint32 id_vface = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpVariable, tid_pbooli, id_var_frontfacing, SpvStorageClassInput); + spv_emit(ctx, 4, SpvOpVariable, tid_pvec4p, id_var_vface, SpvStorageClassPrivate); + pop_output(ctx); + + spv_output_builtin(ctx, id_var_frontfacing, SpvBuiltInFrontFacing); + + push_output(ctx, &ctx->mainline_top); + spv_emit(ctx, 4, SpvOpLoad, tid_bool, id_frontfacing, id_var_frontfacing); + spv_emit(ctx, 6, SpvOpSelect, tid_float, id_tmp, id_frontfacing, id_1_0, id_0_0); + spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, tid_vec4, id_vface, id_tmp, id_tmp, id_tmp, id_tmp); + spv_emit(ctx, 3, SpvOpStore, id_var_vface, id_vface); + pop_output(ctx); + + ctx->spirv.id_var_frontfacing = id_var_frontfacing; + ctx->spirv.id_var_vface = id_var_vface; + break; + } // case + } // switch + break; + default: + fail(ctx, "unknown pixel shader attribute register"); + } // switch +} // spv_link_ps_attributes + +static void spv_texbem(Context* ctx, int luminanceCorrection) +{ + DestArgInfo *info = &ctx->dest_arg; + uint32 sampler_idx = info->regnum; + RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, sampler_idx); + RegisterList *pSrc = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum); + RegisterList *pDst = spv_getreg(ctx, info->regtype, sampler_idx); + + push_output(ctx, &ctx->mainline); + + SpirvResult sampler = spv_loadreg(ctx, pSReg); + SpirvResult src0 = spv_loadreg(ctx, pSrc); + SpirvResult src1 = spv_loadreg(ctx, pDst); + + // = texture( + // , + // vec2( + // (_texbem.x * .x) + (_texbem.z * .y) + .x, + // (_texbem.y * .x) + (_texbem.w * .y) + .y + // ) + // ); + + // Load 2x2 transform matrix from uniform data (stored as vec4). + uint32 id_array = spv_get_uniform_array_id(ctx, REG_TYPE_CONST); + assert(sampler_idx < 4); + uint32 id_offset = ctx->spirv.sampler_extras[sampler_idx].idtexbem; + if (!id_offset) + { + id_offset = spv_bumpid(ctx); + ctx->spirv.sampler_extras[sampler_idx].idtexbem = id_offset; + } // if + uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4); + uint32 tid_pvec4 = spv_get_type(ctx, STI_PTR_VEC4_U); + uint32 id_pmatrix = spv_bumpid(ctx); + SpirvResult matrix; + matrix.tid = tid_vec4; + matrix.id = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpAccessChain, tid_pvec4, id_pmatrix, id_array, id_offset); + spv_emit(ctx, 4, SpvOpLoad, matrix.tid, matrix.id, id_pmatrix); + + // transform src0 using matrix and translate result using src1 + // ie. src0 * matrix + src1 + SpirvResult matrix_xy = spv_swizzle(ctx, matrix, 0x4, 0x3); + SpirvResult matrix_zw = spv_swizzle(ctx, matrix, 0xE, 0x3); + SpirvResult src0_xx = spv_swizzle(ctx, src0, 0x0, 0x3); + SpirvResult src0_yy = spv_swizzle(ctx, src0, 0x5, 0x3); + SpirvResult src1_xy = spv_swizzle(ctx, src1, 0x4, 0x3); + uint32 tid_vec2 = src0_xx.tid; + uint32 id_a = spv_bumpid(ctx); + uint32 id_b = spv_bumpid(ctx); + uint32 id_c = spv_bumpid(ctx); + uint32 id_d = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFMul, tid_vec2, id_a, matrix_xy.id, src0_xx.id); + spv_emit(ctx, 5, SpvOpFMul, tid_vec2, id_b, matrix_zw.id, src0_yy.id); + spv_emit(ctx, 5, SpvOpFAdd, tid_vec2, id_c, id_a, id_b); + spv_emit(ctx, 5, SpvOpFAdd, tid_vec2, id_d, id_c, src1_xy.id); + + // sample texture + SpirvResult result; + result.tid = tid_vec4; + result.id = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_d); + if (luminanceCorrection) + { + uint32 id_l_offset = ctx->spirv.sampler_extras[sampler_idx].idtexbeml; + if (!id_l_offset) + { + id_l_offset = spv_bumpid(ctx); + ctx->spirv.sampler_extras[sampler_idx].idtexbeml = id_l_offset; + } // if + + // = * ((.z * _texbeml.x) + _texbeml.y) + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + + SpirvResult src0_z = spv_swizzle(ctx, src0, 0x2, 0x1); + uint32 id_l_ptr = spv_bumpid(ctx); + + SpirvResult l; + l.tid = tid_vec4; + l.id = spv_bumpid(ctx); + + spv_emit(ctx, 5, SpvOpAccessChain, tid_pvec4, id_l_ptr, id_array, id_l_offset); + spv_emit(ctx, 4, SpvOpLoad, l.tid, l.id, id_l_ptr); + + SpirvResult l_x = spv_swizzle(ctx, l, 0x0, 0x1); + SpirvResult l_y = spv_swizzle(ctx, l, 0x1, 0x1); + assert(tid_float == l_x.tid); + assert(tid_float == l_y.tid); + assert(tid_float == src0_z.tid); + + uint32 id_e = spv_bumpid(ctx); + uint32 id_f = spv_bumpid(ctx); + uint32 id_ffff = spv_bumpid(ctx); + uint32 id_new = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFMul, tid_float, id_e, src0_z.id, l_x.id); + spv_emit(ctx, 5, SpvOpFAdd, tid_float, id_f, id_e, l_y.id); + spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, tid_vec4, id_ffff, + id_f, id_f, id_f, id_f + ); + spv_emit(ctx, 5, SpvOpFMul, tid_vec4, id_new, result.id, id_ffff); + result.id = id_new; + } // if + + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} + +void emit_SPIRV_start(Context *ctx, const char *profilestr) +{ + if (!(shader_is_vertex(ctx) || shader_is_pixel(ctx))) + { + failf(ctx, "Shader type %u unsupported in this profile.", + (uint) ctx->shader_type); + return; + } // if + + if (strcmp(profilestr, MOJOSHADER_PROFILE_SPIRV) != 0) + failf(ctx, "Profile '%s' unsupported or unknown.", profilestr); + + memset(&(ctx->spirv), '\0', sizeof(ctx->spirv)); + + ctx->spirv.idmain = spv_bumpid(ctx); + + // calls spv_getvoid as well + uint32 tid_void = spv_get_type(ctx, STI_VOID); + uint32 tid_func = spv_get_type(ctx, STI_FUNC_VOID); + + // slap the function declaration itself in mainline_top, so we can do type + // declaration in mainline_intro (= before this in the output) + push_output(ctx, &ctx->mainline_top); + spv_emit(ctx, 5, SpvOpFunction, tid_void, ctx->spirv.idmain, SpvFunctionControlMaskNone, tid_func); + spv_emit(ctx, 2, SpvOpLabel, spv_bumpid(ctx)); + pop_output(ctx); + + // also emit the name for the function + spv_output_name(ctx, ctx->spirv.idmain, ctx->mainfn); + + set_output(ctx, &ctx->mainline); +} // emit_SPIRV_start + +void emit_SPIRV_end(Context *ctx) +{ + if (ctx->previous_opcode != OPCODE_RET) + spv_emit_func_end(ctx); +} // emit_SPIRV_end + +void emit_SPIRV_phase(Context *ctx) +{ + // no-op +} // emit_SPIRV_phase + +void emit_SPIRV_global(Context *ctx, RegisterType regtype, int regnum) +{ + RegisterList *r = reglist_find(&ctx->used_registers, regtype, regnum); + + SpvStorageClass sc = SpvStorageClassPrivate; + uint32 tid = 0; + switch (regtype) + { + case REG_TYPE_LABEL: + failf(ctx, "unimplemented regtype %d", regtype); + return; + + case REG_TYPE_LOOP: + // Using SSA id to represent loop counters, instead of a variable. + return; + + case REG_TYPE_PREDICATE: + tid = spv_get_type(ctx, STI_PTR_BVEC4_P); + break; + + case REG_TYPE_ADDRESS: + if (shader_is_vertex(ctx)) + tid = spv_get_type(ctx, STI_PTR_IVEC4_P); + else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE + { + if (!shader_version_atleast(ctx, 1, 4)) + { + // ps_1_1 texture/address registers work like temporaries. They are initialized + // with tex coords and TEX instruction then reads tex coords from it and writes + // sampling result back into it. Because Input storage class is read-only, we + // create private variable that is initialized to value of input. + + uint32 tid_pvec4_i = spv_get_type(ctx, STI_PTR_VEC4_I); + uint32 tid_pvec4_p = spv_get_type(ctx, STI_PTR_VEC4_P); + uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4); + uint32 id_input_var = spv_bumpid(ctx); + uint32 id_private_var = r->spirv.iddecl; + uint32 id_tmp = spv_bumpid(ctx); + + // Create one Input and one Private variable. Input variable is linked to prev stage. + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpVariable, tid_pvec4_i, id_input_var, SpvStorageClassInput); + spv_emit(ctx, 4, SpvOpVariable, tid_pvec4_p, id_private_var, SpvStorageClassPrivate); + pop_output(ctx); + spv_link_ps_attributes(ctx, id_input_var, regtype, MOJOSHADER_USAGE_TEXCOORD, regnum); + + // Initialize Private variable with Input variable. + push_output(ctx, &ctx->mainline_top); + spv_emit(ctx, 4, SpvOpLoad, tid_vec4, id_tmp, id_input_var); + spv_emit(ctx, 3, SpvOpStore, id_private_var, id_tmp); + pop_output(ctx); + + // TEX instruction have already been emitted that work with Private variable. + + // Overwrite Private variable with Input variable, so emit_SPIRV_finalize outputs + // OpEntryPoint with correct references to Input and Output variables. + r->spirv.iddecl = id_input_var; + return; + } // if + tid = spv_get_type(ctx, STI_PTR_VEC4_P); + } // else if + break; + + case REG_TYPE_TEMP: + if (regnum == 0 && shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) + { + // Value of r0 is at the end of shader execution is color output. + sc = SpvStorageClassOutput; + tid = spv_get_type(ctx, STI_PTR_VEC4_O); + } + else + tid = spv_get_type(ctx, STI_PTR_VEC4_P); + break; + + default: + fail(ctx, "BUG: Unexpected regtype in emit_SPIRV_global"); + return; + } // switch + + // TODO: If the SSA id for this register is still 0 by this point, that + // means no instructions actually loaded from/stored to this variable... + + if (r->spirv.iddecl == 0) + r->spirv.iddecl = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, sc); + pop_output(ctx); + + spv_output_regname(ctx, r->spirv.iddecl, regtype, regnum); +} // emit_SPIRV_global + +void emit_SPIRV_array(Context *ctx, VariableList *var) +{ + var->emit_position = ctx->uniform_float4_count; +} // emit_SPIRV_array + +void emit_SPIRV_const_array(Context *ctx, + const struct ConstantsList *clist, + int base, int size) +{ + int i; + + assert(ctx->spirv.constant_arrays.idvec4 != 0); + + push_output(ctx, &ctx->mainline_intro); + + // FIXME: This code potentially duplicates constants defined using DEF ops. + // FIXME: Multiple constant arrays probably won't work. Are those even possible? + // Maybe it would be better to do this in emit_SPIRV_finalize and use used_registers for it? + uint32 *constituents = (uint32 *)Malloc(ctx, size * sizeof(uint32)); + uint32 tid_constituent = spv_get_type(ctx, STI_VEC4); + for (i = 0; i < size; i++) + { + while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) + clist = clist->next; + assert(clist->constant.index == (base + i)); + + uint32 id_x = spv_getscalarf(ctx, clist->constant.value.f[0]); + uint32 id_y = spv_getscalarf(ctx, clist->constant.value.f[1]); + uint32 id_z = spv_getscalarf(ctx, clist->constant.value.f[2]); + uint32 id_w = spv_getscalarf(ctx, clist->constant.value.f[3]); + + uint32 id = spv_bumpid(ctx); + spv_emit(ctx, 3 + 4, SpvOpConstantComposite, tid_constituent, id, id_x, id_y, id_z, id_w); + constituents[i] = id; + + clist = clist->next; + } // for + + uint32 id_array_len = spv_getscalari(ctx, size); + + uint32 tid_array = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpTypeArray, tid_array, tid_constituent, id_array_len); + + uint32 id_array = spv_bumpid(ctx); + spv_emit_part(ctx, 3+size, 3, SpvOpConstantComposite, tid_array, id_array); + for (i = 0; i < size; i++) + spv_emit_word(ctx, constituents[i]); + + uint32 tid_parray = spv_bumpid(ctx); + spv_emit(ctx, 4, SpvOpTypePointer, tid_parray, SpvStorageClassPrivate, tid_array); + + uint32 id_array_var = ctx->spirv.constant_arrays.idvec4; + spv_emit(ctx, 5, SpvOpVariable, tid_parray, id_array_var, SpvStorageClassPrivate, id_array); + + Free(ctx, constituents); + pop_output(ctx); +} // emit_SPIRV_const_array + +void emit_SPIRV_uniform(Context *ctx, RegisterType regtype, int regnum, + const VariableList *var) +{ + RegisterList *r = reglist_find(&ctx->uniforms, regtype, regnum); + + // TODO: If the SSA id for this register is still 0 by this point, that means no instructions actually + // loaded from/stored to this variable... + + if (r->spirv.iddecl == 0) + r->spirv.iddecl = spv_bumpid(ctx); + + if (var == NULL) + { + uint32 tid = spv_get_type(ctx, STI_INT); + int offset = 0; + switch (regtype) + { + case REG_TYPE_CONST: + offset = ctx->uniform_float4_count; + break; + + case REG_TYPE_CONSTINT: + offset = ctx->uniform_int4_count; + break; + + case REG_TYPE_CONSTBOOL: + offset = ctx->uniform_bool_count; + break; + + default: + fail(ctx, "BUG: used a uniform we don't know how to define."); + return; + } // switch + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpConstant, tid, r->spirv.iddecl, offset); + pop_output(ctx); + + char varname[64]; + get_SPIRV_varname_in_buf(ctx, regtype, regnum, varname, sizeof(varname)); + spv_output_name(ctx, r->spirv.iddecl, varname); + } // if + else + { + if (var->constant) + fail(ctx, "const array not implemented"); + else + { + // Instructions needed to reference this constant before its value was known, so unique + // id had to be generated. Unfortunately, this prevents reusing already emitted + // constants. + assert(var->emit_position != -1); + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpConstant, spv_get_type(ctx, STI_INT), r->spirv.iddecl, var->emit_position); + pop_output(ctx); + + char varname[64]; + get_SPIRV_varname_in_buf(ctx, regtype, regnum, varname, sizeof(varname)); + spv_output_name(ctx, r->spirv.iddecl, varname); + } // else + } // else +} // emit_SPIRV_uniform + +void emit_SPIRV_sampler(Context *ctx, int stage, TextureType ttype, int texbem) +{ + uint32 type = spv_ptrimage_from_texturetype(ctx, ttype); + + RegisterList *sampler_reg; + // Pre ps_2_0 samplers were not dcl-ed, so we won't find them using spv_getreg(). + if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) + sampler_reg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); + else + sampler_reg = spv_getreg(ctx, REG_TYPE_SAMPLER, stage); + + uint32 result = sampler_reg->spirv.iddecl; + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpVariable, type, result, SpvStorageClassUniformConstant); + if (texbem) // This sampler used a ps_1_1 TEXBEM opcode? + { + uint32 tid_int = spv_get_type(ctx, STI_INT); + uint32 id_texbem = ctx->spirv.sampler_extras[stage].idtexbem; + uint32 id_texbeml = ctx->spirv.sampler_extras[stage].idtexbeml; + const int offset = ctx->uniform_float4_count; + ctx->uniform_float4_count += 2; + if (id_texbem) + spv_emit(ctx, 4, SpvOpConstant, tid_int, id_texbem, offset); + if (id_texbeml) + spv_emit(ctx, 4, SpvOpConstant, tid_int, id_texbeml, offset + 1); + } // if + pop_output(ctx); + + // hnn: specify uniform location for SPIR-V shaders (required per gl_arb_spirv spec) + spv_output_set_binding(ctx, result, 0, sampler_reg->regnum); + uint32 location_offset = spv_output_location(ctx, result, ~0u); + + assert(sampler_reg->regnum < STATICARRAYLEN(ctx->spirv.patch_table.samplers)); + ctx->spirv.patch_table.samplers[sampler_reg->regnum].offset = location_offset; + + spv_output_regname(ctx, result, REG_TYPE_SAMPLER, stage); +} // emit_SPIRV_sampler + +void emit_SPIRV_attribute(Context *ctx, RegisterType regtype, int regnum, + MOJOSHADER_usage usage, int index, int wmask, + int flags) +{ + uint32 tid; + RegisterList *r = spv_getreg(ctx, regtype, regnum); + + ctx->spirv.inoutcount += 1; + + spv_output_regname(ctx, r->spirv.iddecl, regtype, regnum); + + if (shader_is_vertex(ctx)) + { + // pre-vs3 output registers. + // these don't ever happen in DCL opcodes, I think. Map to vs_3_* + // output registers. + if (!shader_version_atleast(ctx, 3, 0)) + { + if (regtype == REG_TYPE_RASTOUT) + { + regtype = REG_TYPE_OUTPUT; + index = regnum; + switch ((const RastOutType) regnum) + { + case RASTOUT_TYPE_POSITION: + usage = MOJOSHADER_USAGE_POSITION; + break; + case RASTOUT_TYPE_FOG: + usage = MOJOSHADER_USAGE_FOG; + break; + case RASTOUT_TYPE_POINT_SIZE: + usage = MOJOSHADER_USAGE_POINTSIZE; + break; + } // switch + } // if + + else if (regtype == REG_TYPE_ATTROUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_COLOR; + index = regnum; + } // else if + + else if (regtype == REG_TYPE_TEXCRDOUT) + { + regtype = REG_TYPE_OUTPUT; + usage = MOJOSHADER_USAGE_TEXCOORD; + index = regnum; + } // else if + } // if + assert(r->usage == MOJOSHADER_USAGE_UNKNOWN); + r->usage = usage; + + switch (regtype) + { + case REG_TYPE_INPUT: + { + push_output(ctx, &ctx->mainline_intro); + SpirvTypeIdx sti = STI_PTR_VEC4_I; + tid = spv_get_type(ctx, sti); + spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassInput); + pop_output(ctx); + + // hnn: generate location decorators for the input + spv_output_location(ctx, r->spirv.iddecl, regnum); + break; + } + + case REG_TYPE_OUTPUT: + { + push_output(ctx, &ctx->mainline_intro); + SpirvTypeIdx sti = STI_PTR_VEC4_O; + if (usage == MOJOSHADER_USAGE_POINTSIZE) + { + sti = STI_PTR_FLOAT_O; + ctx->spirv.patch_table.vs_has_psize = 1; + } // if + else if (usage == MOJOSHADER_USAGE_FOG) + sti = STI_PTR_FLOAT_O; + + tid = spv_get_type(ctx, sti); + spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassOutput); + pop_output(ctx); + + spv_link_vs_attributes(ctx, r->spirv.iddecl, usage, index); + break; + } // case + + default: + fail(ctx, "unknown vertex shader attribute register"); + } // switch + } // if + + else if (shader_is_pixel(ctx)) + { + // samplers DCLs get handled in emit_SPIRV_sampler(). + + if (flags & MOD_CENTROID) // !!! FIXME + { + failf(ctx, "centroid unsupported in %s profile", ctx->profile->name); + return; + } // if + + switch (regtype) + { + case REG_TYPE_COLOROUT: + spv_link_ps_attributes(ctx, r->spirv.iddecl, regtype, usage, index); + push_output(ctx, &ctx->mainline_intro); + tid = spv_get_type(ctx, STI_PTR_VEC4_O); + spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassOutput); + pop_output(ctx); + break; + case REG_TYPE_DEPTHOUT: + // maps to BuiltIn FragDepth + spv_link_ps_attributes(ctx, r->spirv.iddecl, regtype, usage, index); + push_output(ctx, &ctx->mainline_intro); + tid = spv_get_type(ctx, STI_PTR_FLOAT_O); + spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassOutput); + pop_output(ctx); + break; + case REG_TYPE_MISCTYPE: + assert((MiscTypeType)regnum == MISCTYPE_TYPE_FACE || (MiscTypeType)regnum == MISCTYPE_TYPE_POSITION); + // SpvBuiltInFrontFacing is a input bool, and for the DX bytecode + // we need to map it to a float that's either -1.0 or 1.0. + // SpvBuiltInFragCoord needs to be modified using vposFlip uniform + // to match vPos. + // Both of these take place in spv_link_ps_attributes() so don't + // create an input variable for it here. + spv_link_ps_attributes(ctx, r->spirv.iddecl, regtype, usage, regnum); + break; + + case REG_TYPE_TEXTURE: + case REG_TYPE_INPUT: + // ps_1_1 is dealt with in emit_SPIRV_global(). + if (usage != MOJOSHADER_USAGE_TEXCOORD || shader_version_atleast(ctx, 1, 4)) + { + spv_link_ps_attributes(ctx, r->spirv.iddecl, regtype, usage, index); + push_output(ctx, &ctx->mainline_intro); + tid = spv_get_type(ctx, STI_PTR_VEC4_I); + spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassInput); + pop_output(ctx); + } // if + break; + default: + fail(ctx, "unknown pixel shader attribute register"); + } // switch + } // else if + + else + fail(ctx, "Unknown shader type"); // state machine should catch this. +} // emit_SPIRV_attribute + +static void output_SPIRV_uniform_array(Context *ctx, const RegisterType regtype, + const int size) +{ + if (size <= 0) + return; + + uint32 id_var, id_type_base; + uint32* dst_location_offset; + switch (regtype) + { + case REG_TYPE_CONST: + id_var = ctx->spirv.uniform_arrays.idvec4; + id_type_base = spv_get_type(ctx, STI_VEC4); + dst_location_offset = &ctx->spirv.patch_table.array_vec4.offset; + break; + + case REG_TYPE_CONSTINT: + id_var = ctx->spirv.uniform_arrays.idivec4; + id_type_base = spv_get_type(ctx, STI_IVEC4); + dst_location_offset = &ctx->spirv.patch_table.array_ivec4.offset; + break; + + case REG_TYPE_CONSTBOOL: + id_var = ctx->spirv.uniform_arrays.idbool; + id_type_base = spv_get_type(ctx, STI_INT); + dst_location_offset = &ctx->spirv.patch_table.array_bool.offset; + break; + + default: + fail(ctx, "BUG: used a uniform we don't know how to define."); + return; + } // switch + + if (id_var == 0) + return; // Never used, no need to declare. + + uint32 id_size = spv_getscalari(ctx, size); + uint32 id_type = spv_bumpid(ctx); + uint32 id_type_ptr = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 4, SpvOpTypeArray, id_type, id_type_base, id_size); + spv_emit(ctx, 4, SpvOpTypePointer, id_type_ptr, SpvStorageClassUniformConstant, id_type); + spv_emit(ctx, 4, SpvOpVariable, id_type_ptr, id_var, SpvStorageClassUniformConstant); + pop_output(ctx); + + char buf[64]; + spv_get_uniform_array_varname(ctx, regtype, buf, sizeof(buf)); + spv_output_name(ctx, id_var, buf); + + *dst_location_offset = spv_output_location(ctx, id_var, ~0u); +} // output_SPIRV_uniform_array + +void emit_SPIRV_finalize(Context *ctx) +{ + size_t i, max; + + /* The generator's magic number, this could be registered with Khronos + * if we wanted to. 0 is fine though, so use that for now. */ + uint32 genmagic = 0x00000000; + + /* Vertex shader main() function may need to do some position adjustments. However, + position may be written in subroutines, so we can't write position adjust code + at the end of main(), because output register might not be in ctx->used_registers + yet. Instead, we do adjust in a subroutine generated here and called at the + end of main(). */ + spv_emit_vs_main_end(ctx); + spv_emit_func_lit(ctx); + + output_SPIRV_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count); + output_SPIRV_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count); + output_SPIRV_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count); + + push_output(ctx, &ctx->preflight); + + spv_emit_word(ctx, SpvMagicNumber); + spv_emit_word(ctx, SpvVersion); + spv_emit_word(ctx, genmagic); + // "Bound: where all s in this module are guaranteed to satisfy 0 < id < Bound" + // `idmax` holds the last id that was given out, so we need to emit `idmax + 1` + spv_emit_word(ctx, ctx->spirv.idmax + 1); + spv_emit_word(ctx, 0); + + spv_emit(ctx, 2, SpvOpCapability, SpvCapabilityShader); + + // only non-zero when actually needed + if (ctx->spirv.idext) + { + const char *extstr = "GLSL.std.450"; + spv_emit_part(ctx, 2 + spv_strlen(extstr), 2, SpvOpExtInstImport, ctx->spirv.idext); + spv_emit_str(ctx, extstr); + } // if + + spv_emit(ctx, 3, SpvOpMemoryModel, SpvAddressingModelLogical, SpvMemoryModelSimple); + + assert(shader_is_vertex(ctx) || shader_is_pixel(ctx)); + SpvExecutionModel model = SpvExecutionModelVertex; + if (shader_is_pixel(ctx)) + model = SpvExecutionModelFragment; + + /* 3 is for opcode + exec. model + idmain */ + uint32 inoutcount = ctx->spirv.inoutcount; + + if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) + inoutcount += 1; + + spv_emit_part(ctx, 3 + spv_strlen(ctx->mainfn) + inoutcount, 3, SpvOpEntryPoint, + model, ctx->spirv.idmain + ); + spv_emit_str(ctx, ctx->mainfn); + + RegisterList *p = &ctx->attributes, *r = NULL; + // !!! FIXME: The first element of the list is always empty and I don't know why! + p = p->next; + while (p) + { + r = spv_getreg(ctx, p->regtype, p->regnum); + if (r) + { + if (r->spirv.iddecl == ctx->spirv.id_var_vpos) + spv_emit_word(ctx, ctx->spirv.id_var_fragcoord); + else if (r->spirv.iddecl == ctx->spirv.id_var_vface) + spv_emit_word(ctx, ctx->spirv.id_var_frontfacing); + else + spv_emit_word(ctx, r->spirv.iddecl); + } // if + else + { + char varname[64]; + get_SPIRV_varname_in_buf(ctx, p->regtype, p->regnum, varname, sizeof (varname)); + failf( + ctx, + "missing attribute register %s (rt=%u, rn=%u, u=%u)", + varname, p->regtype, p->regnum, p->usage + ); + } // else + p = p->next; + } // while + + // only applies to pixel shaders + if (shader_is_pixel(ctx)) + { + if (!shader_version_atleast(ctx, 2, 0)) + { + // r0 is used as color output. + r = spv_getreg(ctx, REG_TYPE_TEMP, 0); + spv_emit_word(ctx, r->spirv.iddecl); + } // if + + spv_emit(ctx, 3, SpvOpExecutionMode, ctx->spirv.idmain, SpvExecutionModeOriginUpperLeft); + } // if + + pop_output(ctx); + + // Generate final patch table. + + uint32 base_offset = 0; + if (ctx->preflight) base_offset += buffer_size(ctx->preflight); + if (ctx->globals) base_offset += buffer_size(ctx->globals); + if (ctx->inputs) base_offset += buffer_size(ctx->inputs); + if (ctx->outputs) base_offset += buffer_size(ctx->outputs); + base_offset >>= 2; + + int32 location_count = 0; + SpirvPatchTable* table = &ctx->spirv.patch_table; + if (table->vpflip.offset) + { + table->vpflip.offset += base_offset; + table->vpflip.location = location_count; + location_count += 1; + } // if + else + table->vpflip.location = -1; + + if (table->array_vec4.offset) + { + table->array_vec4.offset += base_offset; + table->array_vec4.location = location_count; + location_count += ctx->uniform_float4_count; + } // if + else + table->array_vec4.location = -1; + + if (table->array_ivec4.offset) + { + table->array_ivec4.offset += base_offset; + table->array_ivec4.location = location_count; + location_count += ctx->uniform_int4_count; + } // if + else + table->array_ivec4.location = -1; + + if (table->array_bool.offset) + { + table->array_bool.offset += base_offset; + table->array_bool.location = location_count; + location_count += ctx->uniform_bool_count; + } // if + else + table->array_bool.location = -1; + + for (i = 0, max = STATICARRAYLEN(table->samplers); i < max; i++) + { + SpirvPatchEntry* entry = &table->samplers[i]; + if (entry->offset) + { + entry->offset += base_offset; + entry->location = location_count; + location_count++; + } // if + else + entry->location = -1; + } // for + + if (shader_is_pixel(ctx) && table->ps_texcoord0_offset) + table->ps_texcoord0_offset += base_offset; + + table->location_count = location_count; + + push_output(ctx, &ctx->postflight); + buffer_append(ctx->output, &ctx->spirv.patch_table, sizeof(ctx->spirv.patch_table)); + pop_output(ctx); + + spv_componentlist_free(ctx, ctx->spirv.cl.f.next); + spv_componentlist_free(ctx, ctx->spirv.cl.i.next); + spv_componentlist_free(ctx, ctx->spirv.cl.u.next); +} // emit_SPIRV_finalize + +void emit_SPIRV_NOP(Context *ctx) +{ + // no-op is a no-op. :) + // TODO: (hnn) SPIR-V has OpNop :O +} // emit_SPIRV_NOP + +void emit_SPIRV_DEF(Context *ctx) +{ + RegisterList *rl; + uint32 val0, val1, val2, val3, idv4; + const float *raw = (const float *) ctx->dwords; + + rl = spv_getreg(ctx, ctx->dest_arg.regtype, ctx->dest_arg.regnum); + rl->spirv.iddecl = spv_bumpid(ctx); + rl->spirv.is_ssa = 1; + + val0 = spv_getscalarf(ctx, raw[0]); + val1 = spv_getscalarf(ctx, raw[1]); + val2 = spv_getscalarf(ctx, raw[2]); + val3 = spv_getscalarf(ctx, raw[3]); + + idv4 = spv_get_type(ctx, STI_VEC4); + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 3 + 4, SpvOpConstantComposite, idv4, rl->spirv.iddecl, val0, val1, val2, val3); + pop_output(ctx); +} // emit_SPIRV_DEF + +void emit_SPIRV_DEFI(Context *ctx) +{ + RegisterList *rl; + uint32 val0, val1, val2, val3, idiv4; + const int *raw = (const int *) ctx->dwords; + + rl = spv_getreg(ctx, ctx->dest_arg.regtype, ctx->dest_arg.regnum); + rl->spirv.iddecl = spv_bumpid(ctx); + rl->spirv.is_ssa = 1; + + val0 = spv_getscalari(ctx, raw[0]); + val1 = spv_getscalari(ctx, raw[1]); + val2 = spv_getscalari(ctx, raw[2]); + val3 = spv_getscalari(ctx, raw[3]); + + idiv4 = spv_get_type(ctx, STI_IVEC4); + + push_output(ctx, &ctx->mainline_intro); + spv_emit(ctx, 3 + 4, SpvOpConstantComposite, idiv4, rl->spirv.iddecl, val0, val1, val2, val3); + pop_output(ctx); +} // emit_SPIRV_DEFI + +void emit_SPIRV_DEFB(Context *ctx) +{ + RegisterList *rl = spv_getreg(ctx, ctx->dest_arg.regtype, ctx->dest_arg.regnum); + rl->spirv.iddecl = ctx->dwords[0] ? spv_gettrue(ctx) : spv_getfalse(ctx); + rl->spirv.is_ssa = 1; +} // emit_SPIRV_DEFB + +void emit_SPIRV_DCL(Context *ctx) +{ + // state_DCL handles checking if the registers are valid for this + // instruction, and collecting samplers and attribs + RegisterList *reg = spv_getreg(ctx, ctx->dest_arg.regtype, ctx->dest_arg.regnum); + + // This id will be assigned to in emit_SPIRV_attribute, but + // emit_SPIRV_attribute is called after instructions are emitted, + // so we generate the id here so it can be used in instructions + reg->spirv.iddecl = spv_bumpid(ctx); +} // emit_SPIRV_DCL + +static void emit_SPIRV_dotproduct(Context *ctx, SpirvResult src0, SpirvResult src1) +{ + SpirvResult result; + + assert(src0.tid == src1.tid); + + result.tid = spv_get_type(ctx, STI_FLOAT); + result.id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpDot, result.tid, result.id, src0.id, src1.id); + + // Broadcast scalar result across all channels of a vec4 + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_vectorbroadcast(ctx, result.tid, result.id); + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} // emit_SPIRV_dotproduct + +void emit_SPIRV_DP4(Context *ctx) +{ + SpirvResult src0 = spv_load_srcarg_full(ctx, 0); + SpirvResult src1 = spv_load_srcarg_full(ctx, 1); + + emit_SPIRV_dotproduct(ctx, src0, src1); +} // emit_SPIRV_DP4 + +void emit_SPIRV_DP3(Context *ctx) +{ + SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x7); + SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x7); + + emit_SPIRV_dotproduct(ctx, src0, src1); +} // emit_SPIRV_DP3 + +static void spv_emit_begin_ds(Context *ctx, SpirvResult* dst, SpirvResult* src) +{ + *src = spv_load_srcarg_full(ctx, 0); + dst->tid = spv_get_type(ctx, STI_VEC4); + dst->id = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline); +} // spv_emit_begin_ds + +static void spv_emit_begin_dss(Context *ctx, SpirvResult* dst, SpirvResult* src0, SpirvResult* src1) +{ + *src0 = spv_load_srcarg_full(ctx, 0); + *src1 = spv_load_srcarg_full(ctx, 1); + dst->tid = spv_get_type(ctx, STI_VEC4); + dst->id = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline); +} // spv_emit_begin_dss + +static void spv_emit_begin_dsss(Context *ctx, SpirvResult* dst, + SpirvResult* src0, SpirvResult* src1, SpirvResult* src2) +{ + *src0 = spv_load_srcarg_full(ctx, 0); + *src1 = spv_load_srcarg_full(ctx, 1); + *src2 = spv_load_srcarg_full(ctx, 2); + dst->tid = spv_get_type(ctx, STI_VEC4); + dst->id = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline); +} // spv_emit_begin_dsss + +static void spv_emit_end(Context *ctx, SpirvResult dst) +{ + pop_output(ctx); + spv_assign_destarg(ctx, dst); +} // spv_emit_end + +static SpirvTexm3x3SetupResult spv_texm3x3_setup(Context *ctx) +{ + SpirvTexm3x3SetupResult result; + + DestArgInfo *pDstInfo = &ctx->dest_arg; + + RegisterList *pSrc0 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0); + RegisterList *pSrc1 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0); + RegisterList *pSrc2 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1); + RegisterList *pSrc3 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1); + RegisterList *pSrc4 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum); + RegisterList *pDst = spv_getreg(ctx, pDstInfo->regtype, pDstInfo->regnum); + + SpirvResult src0 = spv_loadreg(ctx, pSrc0); + SpirvResult src1 = spv_loadreg(ctx, pSrc1); + SpirvResult src2 = spv_loadreg(ctx, pSrc2); + SpirvResult src3 = spv_loadreg(ctx, pSrc3); + SpirvResult src4 = spv_loadreg(ctx, pSrc4); + SpirvResult dst = spv_loadreg(ctx, pDst); + + result.id_dst_pad0 = src0.id; + result.id_dst_pad1 = src2.id; + result.id_dst = dst.id; + + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 tid_vec3 = spv_get_type(ctx, STI_VEC3); + + uint32 id_src0_xyz = spv_bumpid(ctx); + uint32 id_src1_xyz = spv_bumpid(ctx); + uint32 id_src2_xyz = spv_bumpid(ctx); + uint32 id_src3_xyz = spv_bumpid(ctx); + uint32 id_src4_xyz = spv_bumpid(ctx); + uint32 id_dst_xyz = spv_bumpid(ctx); + uint32 id_res_x = spv_bumpid(ctx); + uint32 id_res_y = spv_bumpid(ctx); + uint32 id_res_z = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src0_xyz, src0.id, src0.id, 0, 1, 2); + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src1_xyz, src1.id, src1.id, 0, 1, 2); + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src2_xyz, src2.id, src2.id, 0, 1, 2); + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src3_xyz, src3.id, src3.id, 0, 1, 2); + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src4_xyz, src4.id, src4.id, 0, 1, 2); + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_dst_xyz, dst.id, dst.id, 0, 1, 2); + + spv_emit(ctx, 5, SpvOpDot, tid_float, id_res_x, id_src0_xyz, id_src1_xyz); + spv_emit(ctx, 5, SpvOpDot, tid_float, id_res_y, id_src2_xyz, id_src3_xyz); + spv_emit(ctx, 5, SpvOpDot, tid_float, id_res_z, id_dst_xyz, id_src4_xyz); + + pop_output(ctx); + + result.id_res_x = id_res_x; + result.id_res_y = id_res_y; + result.id_res_z = id_res_z; + + return result; +} // spv_texm3x3_setup + +static uint32 spv_reflect(Context *ctx, uint32 id_normal, uint32 id_eyeray) +{ + // reflect(E : vec3 = eyeray, N : vec3 = normal) -> vec3 + // 2 * [(N*E) / (N*N)] * N - E + + uint32 tid_vec3 = spv_get_type(ctx, STI_VEC3); + uint32 id_2 = spv_getscalarf(ctx, 2.0f); + uint32 id_2_v3 = spv_bumpid(ctx); + uint32 id_refl_0 = spv_bumpid(ctx); + uint32 id_refl_1 = spv_bumpid(ctx); + uint32 id_refl_2 = spv_bumpid(ctx); + uint32 id_refl_3 = spv_bumpid(ctx); + uint32 id_refl_4 = spv_bumpid(ctx); + uint32 id_reflected = spv_bumpid(ctx); + + spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_2_v3, id_2, id_2, id_2); + spv_emit(ctx, 5, SpvOpFMul, tid_vec3, id_refl_0, id_normal, id_eyeray); + spv_emit(ctx, 5, SpvOpFMul, tid_vec3, id_refl_1, id_normal, id_normal); + spv_emit(ctx, 5, SpvOpFDiv, tid_vec3, id_refl_2, id_refl_0, id_refl_1); + spv_emit(ctx, 5, SpvOpFMul, tid_vec3, id_refl_3, id_refl_2, id_normal); + spv_emit(ctx, 5, SpvOpFMul, tid_vec3, id_refl_4, id_refl_3, id_2_v3); + spv_emit(ctx, 5, SpvOpFSub, tid_vec3, id_reflected, id_refl_4, id_eyeray); + + return id_reflected; +} // spv_reflect + +void emit_SPIRV_ADD(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + spv_emit(ctx, 5, SpvOpFAdd, dst.tid, dst.id, src0.id, src1.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_ADD + +void emit_SPIRV_SUB(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + spv_emit(ctx, 5, SpvOpFSub, dst.tid, dst.id, src0.id, src1.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_SUB + +void emit_SPIRV_MUL(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + spv_emit(ctx, 5, SpvOpFMul, dst.tid, dst.id, src0.id, src1.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_MUL + +void emit_SPIRV_SLT(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + + // https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050(v=vs.85).aspx + // "The comparisons EQ, GT, GE, LT, and LE, when either or both operands is NaN returns FALSE" + uint32 bool_result = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFOrdLessThan, spv_get_type(ctx, STI_BVEC4), bool_result, src0.id, src1.id); + + uint32 ones = spv_getvec4_one(ctx); + uint32 zeros = spv_getvec4_zero(ctx); + spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, bool_result, ones, zeros); + + spv_emit_end(ctx, dst); +} // emit_SPIRV_SLT + +void emit_SPIRV_SGE(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + + // https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050(v=vs.85).aspx + // "The comparisons EQ, GT, GE, LT, and LE, when either or both operands is NaN returns FALSE" + uint32 bool_result = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFOrdGreaterThanEqual, spv_get_type(ctx, STI_BVEC4), bool_result, src0.id, src1.id); + + uint32 ones = spv_getvec4_one(ctx); + uint32 zeros = spv_getvec4_zero(ctx); + + spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, bool_result, ones, zeros); + spv_emit_end(ctx, dst); +} // emit_SPIRV_SGE + +void emit_SPIRV_MIN(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + spv_emit(ctx, 5 + 2, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450FMin, src0.id, src1.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_MIN + +void emit_SPIRV_MAX(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + spv_emit(ctx, 5 + 2, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450FMax, src0.id, src1.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_MAX + +void emit_SPIRV_POW(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + uint32 id_abs = spv_bumpid(ctx); + spv_emit(ctx, 5 + 1, SpvOpExtInst, src0.tid, id_abs, spv_getext(ctx), GLSLstd450FAbs, src0.id); + spv_emit(ctx, 5 + 2, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450Pow, id_abs, src1.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_POW + +static uint32 spv_extract_vec3(Context *ctx, uint32 input) +{ + uint32 vec3 = spv_get_type(ctx, STI_VEC3); + uint32 result = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, vec3, result, input, input, 0, 1, 2); + pop_output(ctx); + + return result; +} // spv_extract_vec3 + +void emit_SPIRV_CRS(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + + uint32 vec3 = spv_get_type(ctx, STI_VEC3); + uint32 src0_vec3 = spv_extract_vec3(ctx, src0.id); + uint32 src1_vec3 = spv_extract_vec3(ctx, src1.id); + uint32 result_vec3 = spv_bumpid(ctx); + + spv_emit(ctx, 5 + 2, SpvOpExtInst, vec3, result_vec3, spv_getext(ctx), + GLSLstd450Cross, src0_vec3, src1_vec3); + + // According to DirectX docs, CRS doesn't allow `w` in its writemask, so we + // can make this component anything and the code generated by + // `spv_assign_destarg()` will just throw it away. + spv_emit(ctx, 5 + 4, SpvOpVectorShuffle, dst.tid, dst.id, + result_vec3, result_vec3, 0, 1, 2, 0xFFFFFFFF); + + spv_emit_end(ctx, dst); +} // emit_SPIRV_CRS + +void emit_SPIRV_MAD(Context *ctx) +{ + SpirvResult src0 = spv_load_srcarg_full(ctx, 0); + SpirvResult src1 = spv_load_srcarg_full(ctx, 1); + SpirvResult src2 = spv_load_srcarg_full(ctx, 2); + assert(src0.tid == src1.tid); + assert(src0.tid == src2.tid); + uint32 mul_result = spv_bumpid(ctx); + SpirvResult result; + result.tid = src0.tid; + result.id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpFMul, src0.tid, mul_result, src0.id, src1.id); + spv_emit(ctx, 5, SpvOpFAdd, src0.tid, result.id, mul_result, src2.id); + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} // emit_SPIRV_MAD + +void emit_SPIRV_TEXKILL(Context *ctx) +{ + const DestArgInfo *pDstInfo = &ctx->dest_arg; + RegisterList *pDst = spv_getreg(ctx, pDstInfo->regtype, pDstInfo->regnum); + SpirvResult dst = spv_loadreg(ctx, pDst); + + uint32 vec3 = spv_get_type(ctx, STI_VEC3); + uint32 bvec3 = spv_get_type(ctx, STI_BVEC3); + + uint32 zeros = spv_get_zero(ctx, vec3); + + push_output(ctx, &ctx->mainline); + uint32 res_swiz = spv_emit_swizzle(ctx, dst.id, vec3, (0 << 0) | (1 << 2) | (2 << 4), 0x7); + uint32 res_lt = spv_bumpid(ctx); + uint32 res_any = spv_bumpid(ctx); + uint32 label_true = spv_bumpid(ctx); + uint32 label_merge = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFOrdLessThan, bvec3, res_lt, res_swiz, zeros); + spv_emit(ctx, 4, SpvOpAny, spv_get_type(ctx, STI_BOOL), res_any, res_lt); + spv_emit(ctx, 3, SpvOpSelectionMerge, label_merge, 0); + spv_emit(ctx, 4, SpvOpBranchConditional, res_any, label_true, label_merge); + spv_emit(ctx, 2, SpvOpLabel, label_true); + spv_emit(ctx, 1, SpvOpKill); + spv_emit(ctx, 2, SpvOpLabel, label_merge); + pop_output(ctx); +} // emit_SPIRV_TEXKILL + +void emit_SPIRV_DP2ADD(Context *ctx) +{ + SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x3); + SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x3); + SpirvResult src2 = spv_load_srcarg(ctx, 2, 0x1); + + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 id_dot = spv_bumpid(ctx); + uint32 id_add = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpDot, tid_float, id_dot, src0.id, src1.id); + spv_emit(ctx, 5, SpvOpFAdd, tid_float, id_add, id_dot, src2.id); + SpirvResult result; + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_vectorbroadcast(ctx, result.tid, id_add); + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} // emit_SPIRV_DP2ADD + +void emit_SPIRV_MOV(Context *ctx) +{ + SpirvResult src0 = spv_load_srcarg_full(ctx, 0); + spv_assign_destarg(ctx, src0); +} // emit_SPIRV_MOV + +void emit_SPIRV_RCP(Context *ctx) +{ + /* + if (src != 0.0f) + dst = 1.0f / src; + else + dst = FLT_MAX; + */ + + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + + SpirvTypeIdx sti_bvec = + (src.tid == ctx->spirv.tid[STI_VEC4]) ? STI_BVEC4 : + (src.tid == ctx->spirv.tid[STI_VEC3]) ? STI_BVEC3 : + (src.tid == ctx->spirv.tid[STI_VEC2]) ? STI_BVEC2 : STI_BOOL; + + uint32 tid_bvec = spv_get_type(ctx, sti_bvec); + uint32 id_one = spv_get_one(ctx, src.tid); + uint32 id_zero = spv_get_zero(ctx, src.tid); + uint32 id_flt_max = spv_get_flt_max(ctx, src.tid); + uint32 id_mask = spv_bumpid(ctx); + uint32 id_div = spv_bumpid(ctx); + + spv_emit(ctx, 5, SpvOpFOrdNotEqual, tid_bvec, id_mask, src.id, id_zero); + spv_emit(ctx, 5, SpvOpFDiv, dst.tid, id_div, id_one, src.id); + spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, id_mask, id_div, id_flt_max); + + spv_emit_end(ctx, dst); +} // emit_SPIRV_RCP + +void emit_SPIRV_RSQ(Context *ctx) +{ + /* + if (src != 0.0f) + dst = 1.0f / abs(src); + else + dst = FLT_MAX; + */ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + + SpirvTypeIdx sti_bvec = + (src.tid == ctx->spirv.tid[STI_VEC4]) ? STI_BVEC4 : + (src.tid == ctx->spirv.tid[STI_VEC3]) ? STI_BVEC3 : + (src.tid == ctx->spirv.tid[STI_VEC2]) ? STI_BVEC2 : STI_BOOL; + + uint32 tid_bvec = spv_get_type(ctx, sti_bvec); + uint32 id_zero = spv_get_zero(ctx, src.tid); + uint32 id_flt_max = spv_get_flt_max(ctx, src.tid); + uint32 id_mask = spv_bumpid(ctx); + uint32 id_abs = spv_bumpid(ctx); + uint32 id_rsq = spv_bumpid(ctx); + + spv_emit(ctx, 5, SpvOpFOrdNotEqual, tid_bvec, id_mask, src.id, id_zero); + spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, id_abs, spv_getext(ctx), GLSLstd450FAbs, src.id); + spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, id_rsq, spv_getext(ctx), GLSLstd450InverseSqrt, id_abs); + spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, id_mask, id_rsq, id_flt_max); + + spv_emit_end(ctx, dst); +} // emit_SPIRV_RSQ + +void emit_SPIRV_EXP(Context *ctx) +{ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450Exp2, src.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_EXP + +void emit_SPIRV_SGN(Context *ctx) +{ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + + // SGN also takes a src1 and src2 to use for intermediate results, they are + // left undefined after the instruction executes, and as such it is + // perfectly valid for us to not touch those registers in our implementation + spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450FSign, src.id); + + spv_emit_end(ctx, dst); +} // emit_SPIRV_SGN + +void emit_SPIRV_ABS(Context *ctx) +{ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450FAbs, src.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_ABS + +void emit_SPIRV_NRM(Context *ctx) +{ + /* + float dot = dot(src, src); + + float f; + if (dot != 0) + f = (float)(1/sqrt(dot)); + else + f = FLT_MAX; + + dst = src0*f; + */ + + SpirvResult src = spv_load_srcarg_full(ctx, 0); + uint32 tid_vec3 = spv_get_type(ctx, STI_VEC3); + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + uint32 id_zero = spv_getscalarf(ctx, 0.0f); + uint32 id_flt_max = spv_getscalarf(ctx, FLT_MAX); + uint32 id_src_xyz = spv_bumpid(ctx); + uint32 id_dot = spv_bumpid(ctx); + uint32 id_dot_valid = spv_bumpid(ctx); + uint32 id_f = spv_bumpid(ctx); + uint32 id_f_sane = spv_bumpid(ctx); + uint32 id_f_vec = spv_bumpid(ctx); + + SpirvResult dst; + dst.tid = src.tid; + dst.id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src_xyz, src.id, src.id, 0, 1, 2); + spv_emit(ctx, 5, SpvOpDot, tid_float, id_dot, id_src_xyz, id_src_xyz); + spv_emit(ctx, 5, SpvOpFOrdNotEqual, tid_bool, id_dot_valid, id_dot, id_zero); + spv_emit(ctx, 5 + 1, SpvOpExtInst, tid_float, id_f, spv_getext(ctx), GLSLstd450InverseSqrt, id_dot); + spv_emit(ctx, 6, SpvOpSelect, tid_float, id_f_sane, id_dot_valid, id_f, id_flt_max); + spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, dst.tid, id_f_vec, id_f_sane, id_f_sane, id_f_sane, id_f_sane); + spv_emit(ctx, 5, SpvOpFMul, dst.tid, dst.id, src.id, id_f_vec); + pop_output(ctx); + spv_assign_destarg(ctx, dst); +} // emit_SPIRV_NRM + +void emit_SPIRV_FRC(Context *ctx) +{ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450Fract, src.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_FRC + +void emit_SPIRV_LOG(Context *ctx) +{ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + + // LOG(x) := (x == vec4(0.0)) ? vec4(-FLT_MAX) : log2(abs(x)) + + // abs(x) + uint32 abs_src0 = spv_bumpid(ctx); + spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, abs_src0, spv_getext(ctx), GLSLstd450FAbs, src.id); + + // vec4(0.0) + uint32 vec4_zero = spv_vectorbroadcast(ctx, dst.tid, spv_getscalarf(ctx, 0.0f)); + + // x == vec4(0.0) + uint32 is_zero = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFOrdEqual, spv_get_type(ctx, STI_BVEC4), is_zero, abs_src0, vec4_zero); + + // log2(abs(x)) + uint32 log2_of_nonzero = spv_bumpid(ctx); + spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, log2_of_nonzero, spv_getext(ctx), GLSLstd450Log2, abs_src0); + + // vec4(-FLT_MAX) + uint32 vec4_neg_flt_max = spv_vectorbroadcast(ctx, dst.tid, spv_getscalarf(ctx, -FLT_MAX)); + + // (x == vec4(0.0)) ? vec4(-FLT_MAX) : log2(abs(x)) + spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, is_zero, vec4_neg_flt_max, log2_of_nonzero); + + spv_emit_end(ctx, dst); +} // emit_SPIRV_LOG + +void emit_SPIRV_SINCOS(Context *ctx) +{ + SpirvResult src = spv_load_srcarg(ctx, 0, 0x1); + + // For vs_2_0 and vs_2_x this instruction also has a src1 and src2 which provide a couple of constants + // We just ignore these in any case + + // float V = src0.x; + + int writemask = ctx->dest_arg.writemask; + uint32 id_zero = spv_get_zero(ctx, src.tid); + + uint32 id_cos; + if (writemask & 1) // .x = cos(V) + { + id_cos = spv_bumpid(ctx); + spv_emit(ctx, 5 + 1, SpvOpExtInst, src.tid, id_cos, spv_getext(ctx), GLSLstd450Cos, src.id); + } // if + else + id_cos = id_zero; + + uint32 id_sin; + if (writemask & 2) // .y = sin(V) + { + id_sin = spv_bumpid(ctx); + spv_emit(ctx, 5 + 1, SpvOpExtInst, src.tid, id_sin, spv_getext(ctx), GLSLstd450Sin, src.id); + } // if + else + id_sin = id_zero; + + SpirvResult dst; + dst.tid = spv_get_type(ctx, STI_VEC4); + dst.id = spv_bumpid(ctx); + spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, dst.tid, dst.id, id_cos, id_sin, id_zero, id_zero); + + spv_assign_destarg(ctx, dst); +} // emit_SPIRV_SINCOS + +void emit_SPIRV_MOVA(Context *ctx) +{ + SpirvResult src = spv_load_srcarg_full(ctx, 0); + assert(src.tid == spv_get_type(ctx, STI_VEC4)); + + uint32 id_rounded = spv_bumpid(ctx); + + SpirvResult dst; + dst.tid = spv_get_type(ctx, STI_IVEC4); + dst.id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5 + 1, SpvOpExtInst, spv_get_type(ctx, STI_VEC4), id_rounded, + spv_getext(ctx), GLSLstd450Round, src.id); + spv_emit(ctx, 4, SpvOpConvertFToS, dst.tid, dst.id, id_rounded); + pop_output(ctx); + + spv_assign_destarg(ctx, dst); +} // emit_SPIRV_MOVA + +void emit_SPIRV_CMP(Context *ctx) +{ + SpirvResult dst, src0, src1, src2; + spv_emit_begin_dsss(ctx, &dst, &src0, &src1, &src2); + uint32 id_0_0 = spv_get_zero(ctx, src0.tid); + + uint32 id_cmp = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFUnordGreaterThanEqual, spv_get_type(ctx, STI_BVEC4), id_cmp, src0.id, id_0_0); + spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, id_cmp, src1.id, src2.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_CMP + +void emit_SPIRV_CND(Context *ctx) +{ + SpirvResult dst, src0, src1, src2; + spv_emit_begin_dsss(ctx, &dst, &src0, &src1, &src2); + uint32 id_0_5 = spv_get_constant_composite(ctx, src0.tid, ctx->spirv.id_0_5, 0.5f); + + uint32 id_cmp = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpFUnordGreaterThan, spv_get_type(ctx, STI_BVEC4), id_cmp, src0.id, id_0_5); + spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, id_cmp, src1.id, src2.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_CND + +void emit_SPIRV_LIT(Context *ctx) +{ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + + if (!ctx->spirv.id_func_lit) + ctx->spirv.id_func_lit = spv_bumpid(ctx); + + spv_emit(ctx, 5, SpvOpFunctionCall, dst.tid, dst.id, ctx->spirv.id_func_lit, src.id); + + spv_emit_end(ctx, dst); +} // emit_SPIRV_LIT + +void emit_SPIRV_DST(Context *ctx) +{ + SpirvResult dst, src0, src1; + spv_emit_begin_dss(ctx, &dst, &src0, &src1); + + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + dst.tid = spv_get_type(ctx, STI_VEC4); + uint32 id_1_0 = spv_getscalarf(ctx, 1.0f); + uint32 id_src0_y = spv_bumpid(ctx); + uint32 id_src1_y = spv_bumpid(ctx); + uint32 id_src0_z = spv_bumpid(ctx); + uint32 id_src1_w = spv_bumpid(ctx); + uint32 id_dst_y = spv_bumpid(ctx); + dst.id = spv_bumpid(ctx); + + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src0_y, src0.id, 1); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src1_y, src1.id, 1); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src0_z, src0.id, 2); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src1_w, src1.id, 3); + spv_emit(ctx, 5, SpvOpFMul, tid_float, id_dst_y, id_src0_y, id_src1_y); + spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, dst.tid, dst.id, id_1_0, id_dst_y, id_src0_z, id_src1_w); + + spv_emit_end(ctx, dst); +} // emit_SPIRV_DST + +void emit_SPIRV_LRP(Context *ctx) +{ + // lerp(x, y, a) = x + a*(y - x) + // = x*(1 - a) + y*a + SpirvResult a = spv_load_srcarg_full(ctx, 0); // 'scale' + SpirvResult y = spv_load_srcarg_full(ctx, 1); // 'end' + SpirvResult x = spv_load_srcarg_full(ctx, 2); // 'start' + assert(x.tid == y.tid); + SpirvResult result; + result.id = spv_bumpid(ctx); + result.tid = x.tid; + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5 + 3, SpvOpExtInst, result.tid, result.id, spv_getext(ctx), GLSLstd450FMix, x.id, y.id, a.id); + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} // emit_SPIRV_LRP + +static void spv_emit_vecXmatrix(Context *ctx, int rows, int writemask) +{ + int i; + + assert(rows <= 4); + assert(writemask == 0x7 || writemask == 0xF); + + uint32 src0 = spv_load_srcarg(ctx, 0, writemask).id; + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + + RegisterType src1type = ctx->source_args[1].regtype; + int src1num = ctx->source_args[1].regnum; + + uint32 result_components[4]; + for (i = 0; i < rows; i++) + { + SpirvResult row = spv_loadreg(ctx, spv_getreg(ctx, src1type, src1num + i)); + row = spv_swizzle(ctx, row, SPV_NO_SWIZZLE, writemask); + uint32 dot_result = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpDot, tid_float, dot_result, src0, row.id); + pop_output(ctx); + + result_components[i] = dot_result; + } // for + + SpirvResult r; + r.tid = spv_get_type(ctx, STI_VEC4); + r.id = spv_bumpid(ctx); + + uint32 id_zero = 0; + if (rows < 4) + id_zero = spv_getscalarf(ctx, 0.0f); + + push_output(ctx, &ctx->mainline); + spv_emit_part(ctx, 3 + 4, 3, SpvOpCompositeConstruct, r.tid, r.id); + for (i = 0; i < rows; i++) spv_emit_word(ctx, result_components[i]); + for (i = rows; i < 4; i++) spv_emit_word(ctx, id_zero); + pop_output(ctx); + + spv_assign_destarg(ctx, r); +} // spv_emit_vecXmatrix + +void emit_SPIRV_M4X4(Context *ctx) +{ + // float4 * (4 columns, 4 rows) -> float4 + spv_emit_vecXmatrix(ctx, 4, 0xF); +} // emit_SPIRV_M4X4 + +void emit_SPIRV_M4X3(Context *ctx) +{ + // float4 * (4 columns, 3 rows) -> float3 + spv_emit_vecXmatrix(ctx, 3, 0xF); +} // emit_SPIRV_M4X3 + +void emit_SPIRV_M3X4(Context *ctx) +{ + // float3 * (3 columns, 4 rows) -> float4 + spv_emit_vecXmatrix(ctx, 4, 0x7); +} // emit_SPIRV_M3X4 + +void emit_SPIRV_M3X3(Context *ctx) +{ + // float3 * (3 columns, 3 rows) -> float3 + spv_emit_vecXmatrix(ctx, 3, 0x7); +} // emit_SPIRV_M3X3 + +void emit_SPIRV_M3X2(Context *ctx) +{ + // float3 * (3 columns, 2 rows) -> float2 + spv_emit_vecXmatrix(ctx, 2, 0x7); +} // emit_SPIRV_M3X2 + +void emit_SPIRV_TEXLD(Context *ctx) +{ + if (!shader_version_atleast(ctx, 1, 4)) + { + DestArgInfo *dst_info = &ctx->dest_arg; + + RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, dst_info->regnum); + RegisterList *treg = spv_getreg(ctx, dst_info->regtype, dst_info->regnum); + + // Variables are not declared using dcl opcodes, so handle it in this instruction. + assert(sreg->spirv.iddecl == 0); + assert(treg->spirv.iddecl == 0); + + // Prep the result + SpirvResult result; + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_bumpid(ctx); + SpirvResult sampler = spv_loadreg(ctx, sreg); + // OpImageSampleImplicitLod should ignore the components of this argument that + // it doesn't need, so we don't need to mask it + SpirvResult texcoord = spv_loadreg(ctx, treg); + + // Generate the instruction. + // OpImageSampleImplicitLod should ignore the components of the + // texcoord that it doesn't need, so we don't need to mask it. + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, + sampler.id, texcoord.id); + pop_output(ctx); + + // Emit the result, finally. + assert(!isscalar(ctx, ctx->shader_type, sreg->regtype, sreg->regnum)); + spv_assign_destarg(ctx, result); + } // if + + else if (!shader_version_atleast(ctx, 2, 0)) + { + // ps_1_4 is different, too! + fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME + return; + } // else if + + else + { + const SourceArgInfo *samp_arg = &ctx->source_args[1]; + RegisterList *sampler_reg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, samp_arg->regnum); + const SourceArgInfo *texcoord_arg = &ctx->source_args[0]; + RegisterList *texcoord_reg = spv_getreg(ctx, texcoord_arg->regtype, texcoord_arg->regnum); + + if (sampler_reg == NULL) + { + fail(ctx, "TEXLD using undeclared sampler"); + return; + } // if + + // Special case for TEXLDB + // !!! FIXME: does the d3d bias value map directly to GLSL? + uint32 bias; + uint32 instruction_length; + if (ctx->instruction_controls == CONTROL_TEXLDB) + { + uint32 float_tid = spv_get_type(ctx, STI_FLOAT); + bias = spv_bumpid(ctx); + instruction_length = 7; + + // The w component of texcoord_reg specifies the bias. Extract it from texcoord_reg + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 4 + 1, SpvOpCompositeExtract, float_tid, bias, texcoord_reg->spirv.iddecl, 3); + pop_output(ctx); + } // if + else + { + bias = 0; + instruction_length = 5; + } // else + + // Determine the opcode + SpvOp opcode; + if (ctx->instruction_controls == CONTROL_TEXLDP) + { + if ((TextureType) sampler_reg->index == TEXTURE_TYPE_CUBE) + fail(ctx, "TEXLDP on a cubemap"); // !!! FIXME: is this legal? + opcode = SpvOpImageSampleProjImplicitLod; + } // if + else + opcode = SpvOpImageSampleImplicitLod; + + // Prep the result + uint32 vec4_tid = spv_get_type(ctx, STI_VEC4); + uint32 result = spv_bumpid(ctx); + uint32 sampler = spv_load_srcarg_full(ctx, 1).id; + // OpImageSampleImplicitLod should ignore the components of this argument that + // it doesn't need, so we don't need to mask it + uint32 texcoord = spv_load_srcarg_full(ctx, 0).id; + + // Generate the instruction. + // OpImageSampleImplicitLod should ignore the components of the + // texcoord that it doesn't need, so we don't need to mask it. + push_output(ctx, &ctx->mainline); + spv_emit_part(ctx, instruction_length, 5, opcode, vec4_tid, result, + sampler, texcoord); + if (ctx->instruction_controls == CONTROL_TEXLDB) + { + // ... include the bias operand, if applicable + spv_emit_word(ctx, SpvImageOperandsBiasMask); + spv_emit_word(ctx, bias); + } // if + pop_output(ctx); + + // Emit the result, finally. + assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); + SpirvResult r; + r.id = result; + r.tid = vec4_tid; + spv_assign_destarg(ctx, r); + } // else +} // emit_SPIRV_TEXLD + +void emit_SPIRV_IF(Context *ctx) +{ + SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1); + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + uint32 id_cond = src0.id; + + // Predicate register is already boolean so no need to convert. + if (src0.tid != tid_bool) + { + uint32 id_zero = spv_getscalari(ctx, 0); + id_cond = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpINotEqual, tid_bool, id_cond, src0.id, id_zero); + } // if + + uint32 id_label_branch = spv_bumpid(ctx); + uint32 id_label_merge = spv_bumpid(ctx); + spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0); + spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, id_label_branch, id_label_merge); + spv_branch_push(ctx, id_label_merge, buffer_size(ctx->output) - 4); + spv_emit(ctx, 2, SpvOpLabel, id_label_branch); +} // emit_SPIRV_IF + +void emit_SPIRV_IFC(Context *ctx) +{ + SpvOp cmp_op = spv_get_comparison(ctx); + SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1); + SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x1); + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + uint32 id_cond = spv_bumpid(ctx); + uint32 id_label_branch = spv_bumpid(ctx); + uint32 id_label_merge = spv_bumpid(ctx); + + spv_emit(ctx, 5, cmp_op, tid_bool, id_cond, src0.id, src1.id); + spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0); + spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, id_label_branch, id_label_merge); + spv_branch_push(ctx, id_label_merge, buffer_size(ctx->output) - 4); + spv_emit(ctx, 2, SpvOpLabel, id_label_branch); +} // emit_SPIRV_IFC + +void emit_SPIRV_ELSE(Context *ctx) +{ + uint32 id_label_merge, patch_offset; + spv_branch_get(ctx, &id_label_merge, &patch_offset); + uint32 id_label_else = spv_bumpid(ctx); + + buffer_patch(ctx->output, patch_offset, &id_label_else, sizeof(id_label_else)); + spv_emit(ctx, 2, SpvOpBranch, id_label_merge); + spv_emit(ctx, 2, SpvOpLabel, id_label_else); +} // emit_SPIRV_ELSE + +void emit_SPIRV_ENDIF(Context *ctx) +{ + uint32 id_label_merge, patch_offset; + spv_branch_pop(ctx, &id_label_merge, &patch_offset); + + spv_emit(ctx, 2, SpvOpBranch, id_label_merge); + spv_emit(ctx, 2, SpvOpLabel, id_label_merge); +} // emit_SPIRV_ENDIF + +void emit_SPIRV_REP(Context *ctx) +{ + SpirvLoopInfo loop = {0}; + uint32 id_label_init = spv_bumpid(ctx); + loop.id_label_header = spv_bumpid(ctx); + uint32 id_label_cond = spv_bumpid(ctx); + uint32 id_label_body = spv_bumpid(ctx); + loop.id_label_continue = spv_bumpid(ctx); + loop.id_label_merge = spv_bumpid(ctx); + + // emit end of previous block + spv_emit(ctx, 2, SpvOpBranch, id_label_init); + + // emit loop init block + spv_emit(ctx, 2, SpvOpLabel, id_label_init); + // This block only exists to allow use of SpvOpPhi in loop header block. + // SpvOpPhi needs to refer to predecessor by it's label ID, so insert dummy + // block just so we know what the ID is. + SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1); + + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + loop.tid_counter = src0.tid; + loop.id_counter = spv_bumpid(ctx); + loop.id_counter_next = spv_bumpid(ctx); + + uint32 id_cond = spv_bumpid(ctx); + uint32 id_zero = spv_getscalari(ctx, 0); + spv_emit(ctx, 2, SpvOpBranch, loop.id_label_header); + + // emit loop header block + spv_emit(ctx, 2, SpvOpLabel, loop.id_label_header); + spv_emit(ctx, 7, SpvOpPhi, loop.tid_counter, loop.id_counter, + src0.id, id_label_init, + loop.id_counter_next, loop.id_label_continue + ); + spv_emit(ctx, 4, SpvOpLoopMerge, loop.id_label_merge, loop.id_label_continue, 0); + spv_emit(ctx, 2, SpvOpBranch, id_label_cond); + + // emit loop condition block + spv_emit(ctx, 2, SpvOpLabel, id_label_cond); + spv_emit(ctx, 5, SpvOpINotEqual, tid_bool, id_cond, loop.id_counter, id_zero); + spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, id_label_body, loop.id_label_merge); + + // emit start of loop body block + spv_emit(ctx, 2, SpvOpLabel, id_label_body); + + spv_loop_push(ctx, &loop); +} // emit_SPIRV_REP + +void emit_SPIRV_ENDREP(Context *ctx) +{ + uint32 id_one = spv_getscalari(ctx, 1); + SpirvLoopInfo loop; + spv_loop_pop(ctx, &loop); + + // emit end of loop body block + spv_emit(ctx, 2, SpvOpBranch, loop.id_label_continue); + + // emit loop continue block + spv_emit(ctx, 2, SpvOpLabel, loop.id_label_continue); + spv_emit(ctx, 5, SpvOpISub, loop.tid_counter, loop.id_counter_next, loop.id_counter, id_one); + spv_emit(ctx, 2, SpvOpBranch, loop.id_label_header); + + // emit start of next block + spv_emit(ctx, 2, SpvOpLabel, loop.id_label_merge); +} // emit_SPIRV_ENDREP + +void emit_SPIRV_LOOP(Context *ctx) +{ + SpirvLoopInfo loop = {0}; + uint32 id_label_init = spv_bumpid(ctx); + loop.id_label_header = spv_bumpid(ctx); + uint32 id_label_cond = spv_bumpid(ctx); + uint32 id_label_body = spv_bumpid(ctx); + loop.id_label_continue = spv_bumpid(ctx); + loop.id_label_merge = spv_bumpid(ctx); + + /* + i#.x = iteration count; every round we decrement it and terminate on 0. + i#.y = aL initial value; every round we subtract aL step from it. + i#.z = aL step value; + + We use copy of i# as iteration variable. Compared to rep loop, we only + need to add single instruction for extracting current aL value as single + int. + + rep i0 + for (int i = i0.x; i; i--) + + loop aL, i0 + for (int3 i = i0, int aL = i.y; i.x; i.x--, aL += i.z) + */ + + // emit end of previous block + spv_emit(ctx, 2, SpvOpBranch, id_label_init); + + // emit loop init block + spv_emit(ctx, 2, SpvOpLabel, id_label_init); + // This block only exists to allow use of SpvOpPhi in loop header block. + // SpvOpPhi needs to refer to predecessor by it's label ID, so insert dummy block just so we + // know what the ID is. + + // src0 has aL register. Does it hold any interesting information? + SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x7); + uint32 tid_int = spv_get_type(ctx, STI_INT); + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + + loop.tid_counter = src1.tid; + loop.id_counter = spv_bumpid(ctx); + loop.id_counter_next = spv_bumpid(ctx); + loop.id_aL = spv_bumpid(ctx); + uint32 id_counter_x = spv_bumpid(ctx); + + uint32 id_cond = spv_bumpid(ctx); + uint32 id_zero = spv_getscalari(ctx, 0); + spv_emit(ctx, 2, SpvOpBranch, loop.id_label_header); + + // emit loop header block + spv_emit(ctx, 2, SpvOpLabel, loop.id_label_header); + spv_emit(ctx, 7, SpvOpPhi, loop.tid_counter, loop.id_counter, + src1.id, id_label_init, + loop.id_counter_next, loop.id_label_continue + ); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_int, loop.id_aL, loop.id_counter, 1); + spv_emit(ctx, 4, SpvOpLoopMerge, loop.id_label_merge, loop.id_label_continue, 0); + spv_emit(ctx, 2, SpvOpBranch, id_label_cond); + + // emit loop condition block + spv_emit(ctx, 2, SpvOpLabel, id_label_cond); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_int, id_counter_x, loop.id_counter, 0); + spv_emit(ctx, 5, SpvOpINotEqual, tid_bool, id_cond, id_counter_x, id_zero); + spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, id_label_body, loop.id_label_merge); + + // emit start of loop body block + spv_emit(ctx, 2, SpvOpLabel, id_label_body); + + spv_loop_push(ctx, &loop); +} // emit_SPIRV_LOOP + +void emit_SPIRV_ENDLOOP(Context *ctx) +{ + uint32 tid_int = spv_get_type(ctx, STI_INT); + uint32 tid_ivec2 = spv_get_type(ctx, STI_IVEC2); + + uint32 id_minus_one = spv_getscalari(ctx, -1); + uint32 id_counter_z = spv_bumpid(ctx); + uint32 id_inc = spv_bumpid(ctx); + uint32 id_counter_xy = spv_bumpid(ctx); + uint32 id_counter_next_xy = spv_bumpid(ctx); + + SpirvLoopInfo loop; + spv_loop_pop(ctx, &loop); + + // emit end of loop body block + spv_emit(ctx, 2, SpvOpBranch, loop.id_label_continue); + + // emit loop continue block + spv_emit(ctx, 2, SpvOpLabel, loop.id_label_continue); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_int, id_counter_z, loop.id_counter, 2); + spv_emit(ctx, 5, SpvOpCompositeConstruct, tid_ivec2, id_inc, id_minus_one, id_counter_z); + spv_emit(ctx, 7, SpvOpVectorShuffle, tid_ivec2, id_counter_xy, loop.id_counter, loop.id_counter, 0, 1); + spv_emit(ctx, 5, SpvOpIAdd, tid_ivec2, id_counter_next_xy, id_counter_xy, id_inc); + spv_emit(ctx, 5, SpvOpCompositeConstruct, loop.tid_counter, loop.id_counter_next, id_counter_next_xy, id_counter_z); + spv_emit(ctx, 2, SpvOpBranch, loop.id_label_header); + + // emit start of next block + spv_emit(ctx, 2, SpvOpLabel, loop.id_label_merge); +} // emit_SPIRV_ENDLOOP + +void emit_SPIRV_BREAKC(Context *ctx) +{ + SpirvLoopInfo loop; + spv_loop_get(ctx, &loop); + + SpvOp cmp_op = spv_get_comparison(ctx); + SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1); + SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x1); + uint32 tid_bool = spv_get_type(ctx, STI_BOOL); + uint32 id_cond = spv_bumpid(ctx); + uint32 id_label_merge = spv_bumpid(ctx); + + // emit branch to merge target + spv_emit(ctx, 5, cmp_op, tid_bool, id_cond, src0.id, src1.id); + spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0); + spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, loop.id_label_merge, id_label_merge); + spv_emit(ctx, 2, SpvOpLabel, id_label_merge); +} // emit_SPIRV_BREAKC + +void emit_SPIRV_BREAKP(Context *ctx) +{ + SpirvLoopInfo loop; + spv_loop_get(ctx, &loop); + + SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1); + + uint32 id_label_merge = spv_bumpid(ctx); + + spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0); + spv_emit(ctx, 4, SpvOpBranchConditional, src0.id, loop.id_label_merge, id_label_merge); + spv_emit(ctx, 2, SpvOpLabel, id_label_merge); +} // emit_SPIRV_BREAKP + +void emit_SPIRV_LABEL(Context *ctx) +{ + const SourceArgInfo* arg = &ctx->source_args[0]; + RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum); + spv_check_read_reg_id(ctx, reg); + + uint32 tid_void = spv_get_type(ctx, STI_VOID); + uint32 tid_func = spv_get_type(ctx, STI_FUNC_VOID); + uint32 id_func = reg->spirv.iddecl; + uint32 id_label = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpFunction, tid_void, id_func, 0, tid_func); + spv_emit(ctx, 2, SpvOpLabel, id_label); + pop_output(ctx); +} // emit_SPIRV_LABEL + +void emit_SPIRV_RET(Context *ctx) +{ + spv_emit_func_end(ctx); +} // emit_SPIRV_RET + +void emit_SPIRV_CALL(Context *ctx) +{ + const SourceArgInfo* arg = &ctx->source_args[0]; + RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum); + spv_check_read_reg_id(ctx, reg); + + uint32 tid_void = spv_get_type(ctx, STI_VOID); + uint32 id_res = spv_bumpid(ctx); + uint32 id_func = reg->spirv.iddecl; + + push_output(ctx, &ctx->mainline); + if (ctx->loops > 0) + failf(ctx, "Function calls referencing aL not implemented."); + else + spv_emit(ctx, 4, SpvOpFunctionCall, tid_void, id_res, id_func); + + pop_output(ctx); +} // emit_SPIRV_CALL + +void emit_SPIRV_CALLNZ(Context *ctx) +{ + const SourceArgInfo* arg = &ctx->source_args[0]; + RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum); + spv_check_read_reg_id(ctx, reg); + + SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x1); + + uint32 tid_void = spv_get_type(ctx, STI_VOID); + uint32 id_label_then = spv_bumpid(ctx); + uint32 id_func = reg->spirv.iddecl; + uint32 id_call_res = spv_bumpid(ctx); + uint32 id_label_merge = spv_bumpid(ctx); + + spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0); + spv_emit(ctx, 4, SpvOpBranchConditional, src1.id, id_label_then, id_label_merge); + + spv_emit(ctx, 2, SpvOpLabel, id_label_then); + if (ctx->loops > 0) + failf(ctx, "Function calls referencing aL not implemented."); + else + spv_emit(ctx, 4, SpvOpFunctionCall, tid_void, id_call_res, id_func); + spv_emit(ctx, 2, SpvOpBranch, id_label_merge); + + spv_emit(ctx, 2, SpvOpLabel, id_label_merge); +} // emit_SPIRV_CALLNZ + +void emit_SPIRV_TEXLDD(Context *ctx) +{ + const SourceArgInfo *samp_arg = &ctx->source_args[1]; + if (!reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, samp_arg->regnum)) + { + fail(ctx, "TEXLDD using undeclared sampler"); + return; + } // if + + // Prep the result + SpirvResult result; + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_bumpid(ctx); + + SpirvResult texcoord = spv_load_srcarg_full(ctx, 0); + SpirvResult sampler = spv_load_srcarg_full(ctx, 1); + SpirvResult grad_x = spv_load_srcarg_full(ctx, 2); + SpirvResult grad_y = spv_load_srcarg_full(ctx, 3); + + // Generate the instruction. + // SpvOpImageSampleExplicitLod should ignore the components of the + // texcoord that it doesn't need, so we don't need to mask it. + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 8, SpvOpImageSampleExplicitLod, result.tid, result.id, sampler.id, + texcoord.id, SpvImageOperandsGradMask, grad_x.id, grad_y.id); + pop_output(ctx); + + // Emit the result, finally. + assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); + spv_assign_destarg(ctx, result); +} // emit_SPIRV_TEXLDD + +void emit_SPIRV_SETP(Context *ctx) +{ + SpirvResult src0 = spv_load_srcarg_full(ctx, 0); + SpirvResult src1 = spv_load_srcarg_full(ctx, 1); + + SpirvResult dst; + dst.tid = spv_get_type(ctx, STI_BVEC4); + dst.id = spv_bumpid(ctx); + + SpvOp cmp_op = spv_get_comparison(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, cmp_op, dst.tid, dst.id, src0.id, src1.id); + pop_output(ctx); + + spv_assign_destarg(ctx, dst); +} // emit_SPIRV_SETP + +void emit_SPIRV_TEXLDL(Context *ctx) +{ + const SourceArgInfo *samp_arg = &ctx->source_args[1]; + RegisterList *sampler_reg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, samp_arg->regnum); + if (sampler_reg == NULL) + { + fail(ctx, "TEXLDL using undeclared sampler"); + return; + } // if + assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum)); + + // Prep the result + SpirvResult result; + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_bumpid(ctx); + + SpirvResult sampler = spv_load_srcarg_full(ctx, 1); + SpirvResult texcoord = spv_load_srcarg_full(ctx, 0); + + // The w component of texcoord_reg specifies the LOD. Extract it from texcoord_reg + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 id_lod = spv_bumpid(ctx); + + // Generate the instruction. + // SpvOpImageSampleExplicitLod should ignore the components of the + // texcoord that it doesn't need, so we don't need to mask it. + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 4 + 1, SpvOpCompositeExtract, tid_float, id_lod, texcoord.id, 3); + spv_emit(ctx, 7, SpvOpImageSampleExplicitLod, result.tid, result.id, sampler.id, + texcoord.id, SpvImageOperandsLodMask, id_lod); + pop_output(ctx); + + // Emit the result, finally. + spv_assign_destarg(ctx, result); +} // emit_SPIRV_TEXLDL + +void emit_SPIRV_BREAK(Context *ctx) +{ + uint32 id_label_merge = spv_bumpid(ctx); + spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0); + spv_emit(ctx, 2, SpvOpBranch, id_label_merge); + spv_emit(ctx, 2, SpvOpLabel, id_label_merge); +} // emit_SPIRV_BREAK + +void emit_SPIRV_TEXM3X2PAD(Context *ctx) +{ + // no-op ... work happens in emit_SPIRV_TEXM3X2TEX(). +} // emit_SPIRV_TEXM3X2PAD + +void emit_SPIRV_TEXM3X2TEX(Context *ctx) +{ + if (ctx->texm3x2pad_src0 == -1) + return; + + DestArgInfo *pDstInfo = &ctx->dest_arg; + + RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, pDstInfo->regnum); + RegisterList *pSrc0 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0); + RegisterList *pSrc1 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0); + RegisterList *pSrc2 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum); + RegisterList *pDst = spv_getreg(ctx, pDstInfo->regtype, pDstInfo->regnum); + + SpirvResult sampler = spv_loadreg(ctx, pSReg); + SpirvResult src0 = spv_loadreg(ctx, pSrc0); + SpirvResult src1 = spv_loadreg(ctx, pSrc1); + SpirvResult src2 = spv_loadreg(ctx, pSrc2); + SpirvResult src3 = spv_loadreg(ctx, pDst); + + src0 = spv_swizzle(ctx, src0, SPV_NO_SWIZZLE, 0x7); + src1 = spv_swizzle(ctx, src1, SPV_NO_SWIZZLE, 0x7); + src2 = spv_swizzle(ctx, src2, SPV_NO_SWIZZLE, 0x7); + src3 = spv_swizzle(ctx, src3, SPV_NO_SWIZZLE, 0x7); + + SpirvResult result; + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 tid_vec2 = spv_get_type(ctx, STI_VEC2); + result.tid = spv_get_type(ctx, STI_VEC4); + uint32 id_x = spv_bumpid(ctx); + uint32 id_y = spv_bumpid(ctx); + uint32 id_texcoord = spv_bumpid(ctx); + result.id = spv_bumpid(ctx); + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 5, SpvOpDot, tid_float, id_x, src0.id, src1.id); + spv_emit(ctx, 5, SpvOpDot, tid_float, id_y, src2.id, src3.id); + spv_emit(ctx, 3+2, SpvOpCompositeConstruct, tid_vec2, id_texcoord, id_x, id_y); + spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_texcoord); + pop_output(ctx); + spv_assign_destarg(ctx, result); +} // emit_SPIRV_TEXM3X2TEX + +void emit_SPIRV_TEXM3X3PAD(Context *ctx) +{ + // no-op ... work happens in emit_SPIRV_TEXM3X3*(). +} // emit_SPIRV_TEXM3X3PAD + +void emit_SPIRV_TEXM3X3(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + // vec4( + // dot({src0}.xyz, {src1}.xyz), + // dot({src2}.xyz, {src3}.xyz), + // dot({dst}.xyz, {src4}.xyz), + // 1 + // ) + + uint32 id_1 = spv_getscalarf(ctx, 1.0f); + + SpirvTexm3x3SetupResult setup = spv_texm3x3_setup(ctx); + + SpirvResult result; + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, result.tid, result.id, + setup.id_res_x, setup.id_res_y, setup.id_res_z, id_1 + ); + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} // emit_SPIRV_TEXM3X3 + +void emit_SPIRV_TEXM3X3TEX(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, ctx->dest_arg.regnum); + + // texture{ttypestr}({sampler}, + // vec3( + // dot({src0}.xyz, {src1}.xyz), + // dot({src2}.xyz, {src3}.xyz), + // dot({dst}.xyz, {src4}.xyz) + // ), + // ) + + SpirvResult sampler = spv_loadreg(ctx, pSReg); + + SpirvTexm3x3SetupResult setup = spv_texm3x3_setup(ctx); + + uint32 tid_vec3 = spv_get_type(ctx, STI_VEC3); + uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4); + uint32 id_tc = spv_bumpid(ctx); + + SpirvResult result; + result.tid = tid_vec4; + result.id = spv_bumpid(ctx); + + push_output(ctx, &ctx->mainline); + spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_tc, + setup.id_res_x, setup.id_res_y, setup.id_res_z + ); + spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_tc); + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} // emit_SPIRV_TEXM3X3TEX + +void emit_SPIRV_TEXM3X3SPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, ctx->dest_arg.regnum); + RegisterList *pSrc5 = spv_getreg(ctx, ctx->source_args[1].regtype, ctx->source_args[1].regnum); + + SpirvTexm3x3SetupResult setup = spv_texm3x3_setup(ctx); + + uint32 tid_vec3 = spv_get_type(ctx, STI_VEC3); + + push_output(ctx, &ctx->mainline); + + uint32 id_normal = spv_bumpid(ctx); + spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_normal, + setup.id_res_x, setup.id_res_y, setup.id_res_z + ); + + SpirvResult src5 = spv_loadreg(ctx, pSrc5); + + uint32 id_eyeray = spv_bumpid(ctx); + spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_eyeray, src5.id, src5.id, 0, 1, 2); + + uint32 id_reflected = spv_reflect(ctx, id_normal, id_eyeray); + + SpirvResult sampler = spv_loadreg(ctx, pSReg); + + SpirvResult result; + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_reflected); + + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} // emit_SPIRV_TEXM3X3SPEC + +void emit_SPIRV_TEXM3X3VSPEC(Context *ctx) +{ + if (ctx->texm3x3pad_src1 == -1) + return; + + RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, ctx->dest_arg.regnum); + + SpirvTexm3x3SetupResult setup = spv_texm3x3_setup(ctx); + + uint32 tid_float = spv_get_type(ctx, STI_FLOAT); + uint32 tid_vec3 = spv_get_type(ctx, STI_VEC3); + + push_output(ctx, &ctx->mainline); + + uint32 id_normal = spv_bumpid(ctx); + spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_normal, + setup.id_res_x, setup.id_res_y, setup.id_res_z + ); + + uint32 id_eyeray_x = spv_bumpid(ctx); + uint32 id_eyeray_y = spv_bumpid(ctx); + uint32 id_eyeray_z = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_eyeray_x, setup.id_dst_pad0, 3); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_eyeray_y, setup.id_dst_pad1, 3); + spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_eyeray_z, setup.id_dst, 3); + + uint32 id_eyeray = spv_bumpid(ctx); + spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_eyeray, + id_eyeray_x, id_eyeray_y, id_eyeray_z + ); + + uint32 id_reflected = spv_reflect(ctx, id_normal, id_eyeray); + + SpirvResult sampler = spv_loadreg(ctx, pSReg); + + SpirvResult result; + result.tid = spv_get_type(ctx, STI_VEC4); + result.id = spv_bumpid(ctx); + spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_reflected); + + pop_output(ctx); + + spv_assign_destarg(ctx, result); +} // emit_SPIRV_TEXM3X3VSPEC + +void emit_SPIRV_TEXBEM(Context *ctx) +{ + spv_texbem(ctx, 0); +} // emit_SPIRV_TEXBEM + +void emit_SPIRV_TEXBEML(Context *ctx) +{ + spv_texbem(ctx, 1); +} // emit_SPIRV_TEXBEML + +void emit_SPIRV_EXPP(Context *ctx) +{ + // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation. + emit_SPIRV_EXP(ctx); // I guess this is just partial precision EXP? +} // emit_SPIRV_EXPP + +void emit_SPIRV_LOGP(Context *ctx) +{ + // LOGP is just low-precision LOG, but we'll take the higher precision. + emit_SPIRV_LOG(ctx); +} // emit_SPIRV_LOGP + +void emit_SPIRV_DSX(Context *ctx) +{ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + spv_emit(ctx, 4, SpvOpDPdx, dst.tid, dst.id, src.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_DSX + +void emit_SPIRV_DSY(Context *ctx) +{ + SpirvResult dst, src; + spv_emit_begin_ds(ctx, &dst, &src); + spv_emit(ctx, 4, SpvOpDPdy, dst.tid, dst.id, src.id); + spv_emit_end(ctx, dst); +} // emit_SPIRV_DSY + +void emit_SPIRV_RESERVED(Context *ctx) +{ + // do nothing; fails in the state machine. +} // emit_SPIRV_RESERVED + +// !!! FIXME: The following are unimplemented even in the GLSL emitter. +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) +EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(BEM) + +#endif // SUPPORT_PROFILE_SPIRV + +#pragma GCC visibility pop diff --git a/profiles/mojoshader_profile_spirv.h b/profiles/mojoshader_profile_spirv.h new file mode 100644 index 00000000..ceea039f --- /dev/null +++ b/profiles/mojoshader_profile_spirv.h @@ -0,0 +1,202 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +#ifndef MOJOSHADER_PROFILE_SPIRV_H +#define MOJOSHADER_PROFILE_SPIRV_H + +#if SUPPORT_PROFILE_SPIRV + +// For baked-in constants in SPIR-V we want to store scalar values that we can +// use in composites, since OpConstantComposite uses result ids constituates +// rather than value literals. +// We'll store these lists grouped by type and have the lists themselves +// ordered by value in the ctx.spirv struct. +typedef struct ComponentList +{ + // result id from OpConstant + uint32 id; + union { + float f; + int i; + uint32 u; + } v; + struct ComponentList *next; +} ComponentList; + +typedef struct SpirvLoopInfo +{ + uint32 tid_counter; + uint32 id_counter; + uint32 id_counter_next; + uint32 id_aL; + uint32 id_label_header; + uint32 id_label_continue; + uint32 id_label_merge; +} SpirvLoopInfo; + +typedef enum SpirvType +{ + ST_FLOAT = 0, + ST_SINT = 1, + ST_UINT = 2, + ST_BOOL = 3, +} SpirvType; + +typedef enum SpirvStorageClass +{ + SC_INPUT = 0, + SC_OUTPUT = 1, + SC_PRIVATE = 2, + SC_UNIFORM_CONSTANT = 3, +} SpirvStorageClass; + +/* Not all type parameter combinations are actually used, but it's all rounded up to 64 so + * it's easier to work with. + */ +typedef enum SpirvTypeIdx +{ + STI_VOID = 0, + STI_FUNC_VOID = 1, + STI_FUNC_LIT = 2, + STI_IMAGE2D = 3, + STI_IMAGE3D = 4, + STI_IMAGECUBE = 5, + STI_PTR_IMAGE2D = 6, + STI_PTR_IMAGE3D = 7, + STI_PTR_IMAGECUBE = 8, + + // 7 unused entries + + // 4 base types * 4 vector sizes = 16 entries + STI_FLOAT = (0 << 5) | (1 << 4) | (ST_FLOAT << 2) | 0, + STI_VEC2 = (0 << 5) | (1 << 4) | (ST_FLOAT << 2) | 1, + STI_VEC3 = (0 << 5) | (1 << 4) | (ST_FLOAT << 2) | 2, + STI_VEC4 = (0 << 5) | (1 << 4) | (ST_FLOAT << 2) | 3, + STI_INT = (0 << 5) | (1 << 4) | (ST_SINT << 2) | 0, + STI_IVEC2 = (0 << 5) | (1 << 4) | (ST_SINT << 2) | 1, + STI_IVEC3 = (0 << 5) | (1 << 4) | (ST_SINT << 2) | 2, + STI_IVEC4 = (0 << 5) | (1 << 4) | (ST_SINT << 2) | 3, + STI_UINT = (0 << 5) | (1 << 4) | (ST_UINT << 2) | 0, + STI_UVEC2 = (0 << 5) | (1 << 4) | (ST_UINT << 2) | 1, + STI_UVEC3 = (0 << 5) | (1 << 4) | (ST_UINT << 2) | 2, + STI_UVEC4 = (0 << 5) | (1 << 4) | (ST_UINT << 2) | 3, + STI_BOOL = (0 << 5) | (1 << 4) | (ST_BOOL << 2) | 0, + STI_BVEC2 = (0 << 5) | (1 << 4) | (ST_BOOL << 2) | 1, + STI_BVEC3 = (0 << 5) | (1 << 4) | (ST_BOOL << 2) | 2, + STI_BVEC4 = (0 << 5) | (1 << 4) | (ST_BOOL << 2) | 3, + + // 2 dims (vec4 + scalar) * 4 base types * 4 storage classes + STI_PTR_FLOAT_I = (1 << 5) | (0 << 4) | (ST_FLOAT << 2) | SC_INPUT, + STI_PTR_FLOAT_O = (1 << 5) | (0 << 4) | (ST_FLOAT << 2) | SC_OUTPUT, + STI_PTR_FLOAT_P = (1 << 5) | (0 << 4) | (ST_FLOAT << 2) | SC_PRIVATE, + STI_PTR_FLOAT_U = (1 << 5) | (0 << 4) | (ST_FLOAT << 2) | SC_UNIFORM_CONSTANT, + STI_PTR_INT_I = (1 << 5) | (0 << 4) | (ST_SINT << 2) | SC_INPUT, + STI_PTR_INT_O = (1 << 5) | (0 << 4) | (ST_SINT << 2) | SC_OUTPUT, + STI_PTR_INT_P = (1 << 5) | (0 << 4) | (ST_SINT << 2) | SC_PRIVATE, + STI_PTR_INT_U = (1 << 5) | (0 << 4) | (ST_SINT << 2) | SC_UNIFORM_CONSTANT, + STI_PTR_UINT_I = (1 << 5) | (0 << 4) | (ST_UINT << 2) | SC_INPUT, + STI_PTR_UINT_O = (1 << 5) | (0 << 4) | (ST_UINT << 2) | SC_OUTPUT, + STI_PTR_UINT_P = (1 << 5) | (0 << 4) | (ST_UINT << 2) | SC_PRIVATE, + STI_PTR_UINT_U = (1 << 5) | (0 << 4) | (ST_UINT << 2) | SC_UNIFORM_CONSTANT, + STI_PTR_BOOL_I = (1 << 5) | (0 << 4) | (ST_BOOL << 2) | SC_INPUT, + STI_PTR_BOOL_O = (1 << 5) | (0 << 4) | (ST_BOOL << 2) | SC_OUTPUT, + STI_PTR_BOOL_P = (1 << 5) | (0 << 4) | (ST_BOOL << 2) | SC_PRIVATE, + STI_PTR_BOOL_U = (1 << 5) | (0 << 4) | (ST_BOOL << 2) | SC_UNIFORM_CONSTANT, + STI_PTR_VEC4_I = (1 << 5) | (1 << 4) | (ST_FLOAT << 2) | SC_INPUT, + STI_PTR_VEC4_O = (1 << 5) | (1 << 4) | (ST_FLOAT << 2) | SC_OUTPUT, + STI_PTR_VEC4_P = (1 << 5) | (1 << 4) | (ST_FLOAT << 2) | SC_PRIVATE, + STI_PTR_VEC4_U = (1 << 5) | (1 << 4) | (ST_FLOAT << 2) | SC_UNIFORM_CONSTANT, + STI_PTR_IVEC4_I = (1 << 5) | (1 << 4) | (ST_SINT << 2) | SC_INPUT, + STI_PTR_IVEC4_O = (1 << 5) | (1 << 4) | (ST_SINT << 2) | SC_OUTPUT, + STI_PTR_IVEC4_P = (1 << 5) | (1 << 4) | (ST_SINT << 2) | SC_PRIVATE, + STI_PTR_IVEC4_U = (1 << 5) | (1 << 4) | (ST_SINT << 2) | SC_UNIFORM_CONSTANT, + STI_PTR_UVEC4_I = (1 << 5) | (1 << 4) | (ST_UINT << 2) | SC_INPUT, + STI_PTR_UVEC4_O = (1 << 5) | (1 << 4) | (ST_UINT << 2) | SC_OUTPUT, + STI_PTR_UVEC4_P = (1 << 5) | (1 << 4) | (ST_UINT << 2) | SC_PRIVATE, + STI_PTR_UVEC4_U = (1 << 5) | (1 << 4) | (ST_UINT << 2) | SC_UNIFORM_CONSTANT, + STI_PTR_BVEC4_I = (1 << 5) | (1 << 4) | (ST_BOOL << 2) | SC_INPUT, + STI_PTR_BVEC4_O = (1 << 5) | (1 << 4) | (ST_BOOL << 2) | SC_OUTPUT, + STI_PTR_BVEC4_P = (1 << 5) | (1 << 4) | (ST_BOOL << 2) | SC_PRIVATE, + STI_PTR_BVEC4_U = (1 << 5) | (1 << 4) | (ST_BOOL << 2) | SC_UNIFORM_CONSTANT, + + // 2 + 6 + 16 + 32 = 56 entries (+ 8 unused) + + // Helpers + STI_LENGTH_, + + STI_MISC_START_ = 0, + STI_MISC_END_ = 8, + STI_CORE_START_ = (0 << 5) | (1 << 4), + STI_PTR_START_ = (1 << 5) | (0 << 4), + STI_CORE_END_ = STI_PTR_START_, + STI_PTR_END_ = STI_LENGTH_, +} SpirvTypeIdx; + +// In addition to result ID we also need type ID (can't assume everything is vec4). +typedef struct SpirvResult +{ + uint32 tid; + uint32 id; +} SpirvResult; + +typedef struct SpirvContext +{ + uint32 id_vs_main_end; + // ext. glsl instructions have been imported + uint32 idext; + uint32 idmax; + uint32 idmain; + uint32 id_func_lit; + uint32 inoutcount; + uint32 id_var_fragcoord; + uint32 id_var_vpos; + uint32 id_var_frontfacing; + uint32 id_var_vface; + // ids for types so we can reuse them after they're declared + uint32 tid[STI_LENGTH_]; + uint32 idtrue; + uint32 idfalse; + uint32 id_0_0[4]; + uint32 id_0_125[4]; + uint32 id_0_25[4]; + uint32 id_0_5[4]; + uint32 id_1_0[4]; + uint32 id_2_0[4]; + uint32 id_4_0[4]; + uint32 id_8_0[4]; + uint32 id_flt_max[4]; + struct { + uint32 idvec4; + uint32 idivec4; + uint32 idbool; + } uniform_arrays; + struct { + uint32 idvec4; + } constant_arrays; + struct { + ComponentList f; + ComponentList i; + ComponentList u; + } cl; + + SpirvPatchTable patch_table; + + // Required only on ps_1_3 and below, which only has 4 registers for this purpose. + struct { + uint32 idtexbem; + uint32 idtexbeml; + } sampler_extras[4]; + + int loop_stack_idx; + SpirvLoopInfo loop_stack[32]; +} SpirvContext; + +#endif // if SUPPORT_PROFILE_SPIRV + +#endif diff --git a/spirv/GLSL.std.450.h b/spirv/GLSL.std.450.h new file mode 100644 index 00000000..df31092b --- /dev/null +++ b/spirv/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 1; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/spirv/spirv.h b/spirv/spirv.h new file mode 100644 index 00000000..d48488e9 --- /dev/null +++ b/spirv/spirv.h @@ -0,0 +1,871 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 3 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010000; +static const unsigned int SpvRevision = 3; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, +} SpvCapability; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, + SpvOpImageSparseRead = 320, +} SpvOp; + +#endif // #ifndef spirv_H + diff --git a/utils/testparse.c b/utils/testparse.c index 2de9880e..bf7ce96e 100644 --- a/utils/testparse.c +++ b/utils/testparse.c @@ -9,7 +9,14 @@ #include #include -#include "mojoshader.h" +#include +#include +#include "../mojoshader.h" +#define __MOJOSHADER_INTERNAL__ 1 +#include "../mojoshader_internal.h" +#ifdef MOJOSHADER_HAS_SPIRV_TOOLS +#include "spirv-tools/libspirv.h" +#endif #ifdef _MSC_VER #define snprintf _snprintf @@ -257,6 +264,7 @@ static void print_attrs(const char *category, const int count, for (i = 0; i < count; i++) { static const char *usagenames[] = { + "", "position", "blendweight", "blendindices", "normal", "psize", "texcoord", "tangent", "binormal", "tessfactor", "positiont", "color", "fog", "depth", "sample" @@ -266,7 +274,7 @@ static void print_attrs(const char *category, const int count, if (a->index != 0) snprintf(numstr, sizeof (numstr), "%d", a->index); INDENT(); - printf(" * %s%s", usagenames[(int) a->usage], numstr); + printf(" * %s%s", usagenames[1 + (int) a->usage], numstr); if (a->name != NULL) printf(" (\"%s\")", a->name); printf("\n"); @@ -287,8 +295,8 @@ static void print_shader(const char *fname, const MOJOSHADER_parseData *pd, const MOJOSHADER_error *err = &pd->errors[i]; INDENT(); printf("%s:%d: ERROR: %s\n", - err->filename ? err->filename : fname, - err->error_position, err->error); + err->filename ? err->filename : fname, + err->error_position, err->error); } // for } // if else @@ -311,7 +319,7 @@ static void print_shader(const char *fname, const MOJOSHADER_parseData *pd, { static const char *typenames[] = { "float", "int", "bool" }; const MOJOSHADER_constant *c = &pd->constants[i]; - INDENT(); + INDENT(); printf(" * %d: %s (", c->index, typenames[(int) c->type]); if (c->type == MOJOSHADER_UNIFORM_FLOAT) { @@ -358,7 +366,7 @@ static void print_shader(const char *fname, const MOJOSHADER_parseData *pd, INDENT(); printf(" * %d: %s%s%s%s", u->index, constant, arrayof, - arrayrange, typenames[(int) u->type]); + arrayrange, typenames[(int) u->type]); if (u->name != NULL) printf(" (\"%s\")", u->name); printf("\n"); @@ -393,15 +401,64 @@ static void print_shader(const char *fname, const MOJOSHADER_parseData *pd, if (pd->output != NULL) { + const char *output; + int output_len; int i; + + if (strcmp(pd->profile, "spirv") == 0) + { +#if SUPPORT_PROFILE_SPIRV && defined(MOJOSHADER_HAS_SPIRV_TOOLS) + int binary_len = pd->output_len - sizeof(SpirvPatchTable); + + uint32_t *words = (uint32_t *) pd->output; + size_t word_count = binary_len / 4; + + spv_text text; + spv_diagnostic diagnostic; + spv_context ctx = spvContextCreate(SPV_ENV_UNIVERSAL_1_0); + int options = /*SPV_BINARY_TO_TEXT_OPTION_COLOR |*/ SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES; + spv_result_t disResult = spvBinaryToText(ctx, words, word_count, options, &text, &diagnostic); + if (disResult == SPV_SUCCESS) + { + output = text->str; + output_len = text->length; + } // if + else + { + fprintf(stderr, "\nERROR DIAGNOSTIC: %s\n\n", diagnostic->error); + } // else + + spv_result_t validateResult = spvValidateBinary(ctx, words, word_count, &diagnostic); + if (validateResult != SPV_SUCCESS) + { + fprintf(stderr, "\nVALIDATION FAILURE: %s\n\n", diagnostic->error); + } // if + + if (disResult != SPV_SUCCESS || validateResult != SPV_SUCCESS) + { + exit(EXIT_FAILURE); + } // if + + // FIXME: we're currently just leaking this disassembly... +#else + output = pd->output; + output_len = pd->output_len; +#endif + } // if + else + { + output = pd->output; + output_len = pd->output_len; + } // else + INDENT(); printf("OUTPUT:\n"); indent++; INDENT(); - for (i = 0; i < pd->output_len; i++) + for (i = 0; i < output_len; i++) { - putchar((int) pd->output[i]); - if (pd->output[i] == '\n') + putchar((int) output[i]); + if (output[i] == '\n') INDENT(); } // for printf("\n"); @@ -674,6 +731,7 @@ static void print_effect(const char *fname, const MOJOSHADER_effect *effect, static int do_parse(const char *fname, const unsigned char *buf, const int len, const char *prof) { + int i; int retval = 0; // magic for an effects file (!!! FIXME: I _think_). @@ -686,7 +744,26 @@ static int do_parse(const char *fname, const unsigned char *buf, const MOJOSHADER_effect *effect; effect = MOJOSHADER_parseEffect(prof, buf, len, NULL, 0, NULL, 0, Malloc, Free, 0); - retval = (effect->error_count == 0); + int error_count = effect->error_count; + for (i = 0; i < effect->object_count; i++) + { + MOJOSHADER_effectObject *object = &effect->objects[i]; + switch (object->type) + { + case MOJOSHADER_SYMTYPE_VERTEXSHADER: + case MOJOSHADER_SYMTYPE_PIXELSHADER: + if (!object->shader.is_preshader) + { + const MOJOSHADER_parseData *shader = object->shader.shader; + if (shader) + error_count += shader->error_count; + } // if + break; + default: + break; + } + } + retval = (error_count == 0); printf("EFFECT: %s\n", fname); print_effect(fname, effect, 1); MOJOSHADER_freeEffect(effect);