From 801b57d5379e1e7a7ee143ea6a9649cb856c5b6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kro=C5=A1l=C3=A1k?= <kroslakma@gmail.com>
Date: Tue, 31 Dec 2019 12:22:44 -0500
Subject: [PATCH] Add support for emitting SPIR-V shaders.

Co-authors include:
- Angus Holder <aholder97@gmail.com>
- Melker Narikka <meklu@meklu.org>
- Caleb Cornett <caleb.cornett@outlook.com>
- Ethan Lee <flibitijibibo@flibitijibibo.com>
---
 CMakeLists.txt                       |   15 +
 mojoshader.c                         |   36 +
 mojoshader.h                         |    5 +
 mojoshader_common.c                  |   43 +
 mojoshader_internal.h                |   42 +
 mojoshader_opengl.c                  |  289 +-
 profiles/mojoshader_profile.h        |   15 +
 profiles/mojoshader_profile_common.c |    4 +
 profiles/mojoshader_profile_spirv.c  | 4060 ++++++++++++++++++++++++++
 profiles/mojoshader_profile_spirv.h  |  202 ++
 spirv/GLSL.std.450.h                 |  131 +
 spirv/spirv.h                        |  871 ++++++
 utils/testparse.c                    |   97 +-
 13 files changed, 5798 insertions(+), 12 deletions(-)
 create mode 100644 profiles/mojoshader_profile_spirv.c
 create mode 100644 profiles/mojoshader_profile_spirv.h
 create mode 100644 spirv/GLSL.std.450.h
 create mode 100644 spirv/spirv.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 429c969d..466da148 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,7 @@ OPTION(PROFILE_GLSL "Build MojoShader with support for the GLSL profile" ON)
 OPTION(PROFILE_ARB1 "Build MojoShader with support for the ARB1 profile" ON)
 OPTION(PROFILE_ARB1_NV "Build MojoShader with support for the ARB1_NV profile" ON)
 OPTION(PROFILE_METAL "Build MojoShader with support for the Metal profile" ON)
+OPTION(PROFILE_SPIRV "Build MojoShader with support for the SPIR-V profile" ON)
 OPTION(EFFECT_SUPPORT "Build MojoShader with support for Effect framework files" ON)
 OPTION(COMPILER_SUPPORT "Build MojoShader with support for HLSL source files" OFF)
 OPTION(FLIP_VIEWPORT "Build MojoShader with the ability to flip the GL viewport" OFF)
@@ -124,6 +125,9 @@ ENDIF(NOT PROFILE_ARB1_NV)
 IF(NOT PROFILE_METAL)
     ADD_DEFINITIONS(-DSUPPORT_PROFILE_METAL=0)
 ENDIF(NOT PROFILE_METAL)
+IF(NOT PROFILE_SPIRV)
+    ADD_DEFINITIONS(-DSUPPORT_PROFILE_SPIRV=0)
+ENDIF(NOT PROFILE_SPIRV)
 
 IF(EFFECT_SUPPORT)
     IF(UNIX)
@@ -159,6 +163,7 @@ ADD_LIBRARY(mojoshader ${LIBRARY_FORMAT}
     profiles/mojoshader_profile_d3d.c
     profiles/mojoshader_profile_glsl.c
     profiles/mojoshader_profile_metal.c
+    profiles/mojoshader_profile_spirv.c
     profiles/mojoshader_profile_common.c
 )
 IF(EFFECT_SUPPORT)
@@ -219,8 +224,18 @@ IF(COMPILER_SUPPORT)
     ENDIF(SDL2)
 ENDIF(COMPILER_SUPPORT)
 
+FIND_PATH(SPIRV_TOOLS_INCLUDE_DIR "spirv-tools/libspirv.h" PATH_SUFFIXES "include")
+FIND_LIBRARY(SPIRV_TOOLS_LIBRARY NAMES SPIRV-Tools-shared)
+IF(SPIRV_TOOLS_INCLUDE_DIR AND SPIRV_TOOLS_LIBRARY)
+    INCLUDE_DIRECTORIES(${SPIRV_TOOLS_INCLUDE_DIR})
+    ADD_DEFINITIONS(-DMOJOSHADER_HAS_SPIRV_TOOLS)
+ENDIF(SPIRV_TOOLS_INCLUDE_DIR AND SPIRV_TOOLS_LIBRARY)
+
 ADD_EXECUTABLE(testparse utils/testparse.c)
 TARGET_LINK_LIBRARIES(testparse mojoshader ${LIBM} ${CARBON_FRAMEWORK})
+IF(SPIRV_TOOLS_INCLUDE_DIR AND SPIRV_TOOLS_LIBRARY)
+    TARGET_LINK_LIBRARIES(testparse ${SPIRV_TOOLS_LIBRARY})
+ENDIF(SPIRV_TOOLS_INCLUDE_DIR AND SPIRV_TOOLS_LIBRARY)
 ADD_EXECUTABLE(testoutput utils/testoutput.c)
 TARGET_LINK_LIBRARIES(testoutput mojoshader ${LIBM} ${CARBON_FRAMEWORK})
 IF(COMPILER_SUPPORT)
diff --git a/mojoshader.c b/mojoshader.c
index bee34f20..9491a82e 100644
--- a/mojoshader.c
+++ b/mojoshader.c
@@ -263,6 +263,15 @@ PREDECLARE_PROFILE(METAL)
 PREDECLARE_PROFILE(ARB1)
 #endif
 
+#if !SUPPORT_PROFILE_SPIRV
+#define PROFILE_EMITTER_SPIRV(op)
+#else
+#undef AT_LEAST_ONE_PROFILE
+#define AT_LEAST_ONE_PROFILE 1
+#define PROFILE_EMITTER_SPIRV(op) emit_SPIRV_##op,
+PREDECLARE_PROFILE(SPIRV)
+#endif
+
 #if !AT_LEAST_ONE_PROFILE
 #error No profiles are supported. Fix your build.
 #endif
@@ -300,6 +309,9 @@ static const Profile profiles[] =
 #if SUPPORT_PROFILE_METAL
     DEFINE_PROFILE(METAL)
 #endif
+#if SUPPORT_PROFILE_SPIRV
+    DEFINE_PROFILE(SPIRV)
+#endif
 };
 
 #undef DEFINE_PROFILE
@@ -321,6 +333,7 @@ static const struct { const char *from; const char *to; } profileMap[] =
      PROFILE_EMITTER_GLSL(op) \
      PROFILE_EMITTER_ARB1(op) \
      PROFILE_EMITTER_METAL(op) \
+     PROFILE_EMITTER_SPIRV(op) \
 }
 
 static int parse_destination_token(Context *ctx, DestArgInfo *info)
@@ -3445,6 +3458,28 @@ static MOJOSHADER_parseData *build_parsedata(Context *ctx)
         retval->preshader = ctx->preshader;
         retval->mainfn = ctx->mainfn;
 
+#if SUPPORT_PROFILE_SPIRV
+        if (strcmp(retval->profile, "spirv") == 0)
+        {
+            size_t i, max;
+            int binary_size = retval->output_len - sizeof(SpirvPatchTable);
+            uint32 *binary = (uint32 *) retval->output;
+            SpirvPatchTable *table = (SpirvPatchTable *) &retval->output[binary_size];
+
+            if (table->vpflip.offset)      binary[table->vpflip.offset]      = table->vpflip.location;
+            if (table->array_vec4.offset)  binary[table->array_vec4.offset]  = table->array_vec4.location;
+            if (table->array_ivec4.offset) binary[table->array_ivec4.offset] = table->array_ivec4.location;
+            if (table->array_bool.offset)  binary[table->array_bool.offset]  = table->array_bool.location;
+
+            for (i = 0, max = STATICARRAYLEN(table->samplers); i < max; i++)
+            {
+                SpirvPatchEntry entry = table->samplers[i];
+                if (entry.offset)
+                    binary[entry.offset] = entry.location;
+            } // for
+        } // if
+#endif
+
         // we don't own these now, retval does.
         ctx->ctab.symbols = NULL;
         ctx->preshader = NULL;
@@ -3828,6 +3863,7 @@ int MOJOSHADER_maxShaderModel(const char *profile)
     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV3, 2);
     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV4, 3);
     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_METAL, 3);
+    PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_SPIRV, 3);
     #undef PROFILE_SHADER_MODEL
     return -1;  // unknown profile?
 } // MOJOSHADER_maxShaderModel
diff --git a/mojoshader.h b/mojoshader.h
index a289c7a1..d1e487ee 100644
--- a/mojoshader.h
+++ b/mojoshader.h
@@ -719,6 +719,11 @@ typedef struct MOJOSHADER_parseData
  */
 #define MOJOSHADER_PROFILE_METAL "metal"
 
+/*
+ * Profile string for SPIR-V binary output
+ */
+#define MOJOSHADER_PROFILE_SPIRV "spirv"
+
 /*
  * Determine the highest supported Shader Model for a profile.
  */
diff --git a/mojoshader_common.c b/mojoshader_common.c
index 807f28d8..9c84697d 100644
--- a/mojoshader_common.c
+++ b/mojoshader_common.c
@@ -1036,6 +1036,49 @@ ssize_t buffer_find(Buffer *buffer, const size_t start,
     return -1;  // no match found.
 } // buffer_find
 
+void buffer_patch(Buffer *buffer, const size_t start,
+                  const void *_data, const size_t len)
+{
+    if (len == 0)
+        return;  // Nothing to do.
+
+    if ((start + len) > buffer->total_bytes)
+        return;  // definitely can't patch.
+
+    // Find the start point somewhere in the center of a buffer.
+    BufferBlock *item = buffer->head;
+    size_t pos = 0;
+    if (start > 0)
+    {
+        while (1)
+        {
+            assert(item != NULL);
+            if ((pos + item->bytes) > start)  // start is in this block.
+                break;
+
+            pos += item->bytes;
+            item = item->next;
+        } // while
+    } // if
+
+    const uint8 *data = (const uint8 *) _data;
+    size_t write_pos = start - pos;
+    size_t write_remain = len;
+    size_t written = 0;
+    while (write_remain)
+    {
+        size_t write_end = write_pos + write_remain;
+        if (write_end > item->bytes)
+            write_end = item->bytes;
+
+        size_t to_write = write_end - write_pos;
+        memcpy(item->data + write_pos, data + written, to_write);
+        write_remain -= to_write;
+        written      += to_write;
+        write_pos     = 0;
+        item          = item->next;
+    } // while
+} // buffer_patch
 
 // Based on SDL_string.c's SDL_PrintFloat function
 size_t MOJOSHADER_printFloat(char *text, size_t maxlen, float arg)
diff --git a/mojoshader_internal.h b/mojoshader_internal.h
index 9bb03133..07dc9e5d 100644
--- a/mojoshader_internal.h
+++ b/mojoshader_internal.h
@@ -64,6 +64,10 @@
 #define SUPPORT_PROFILE_METAL 1
 #endif
 
+#ifndef SUPPORT_PROFILE_SPIRV
+#define SUPPORT_PROFILE_SPIRV 1
+#endif
+
 #if SUPPORT_PROFILE_ARB1_NV && !SUPPORT_PROFILE_ARB1
 #error nv profiles require arb1 profile. Fix your build.
 #endif
@@ -265,6 +269,8 @@ char *buffer_merge(Buffer **buffers, const size_t n, size_t *_len);
 void buffer_destroy(Buffer *buffer);
 ssize_t buffer_find(Buffer *buffer, const size_t start,
                     const void *data, const size_t len);
+void buffer_patch(Buffer *buffer, const size_t start,
+                  const void *data, const size_t len);
 
 
 
@@ -596,6 +602,42 @@ void MOJOSHADER_print_debug_token(const char *subsystem, const char *token,
                                   const unsigned int tokenlen,
                                   const Token tokenval);
 
+
+#if SUPPORT_PROFILE_SPIRV
+// Patching SPIR-V binaries before linking is needed to ensure locations do not
+// overlap between shader stages. Unfortunately, OpDecorate takes Literal, so we
+// can't use Result <id> from OpSpecConstant and leave this up to specialization
+// mechanism.
+// Patch table must be propagated from parsing to program linking, but since
+// MOJOSHADER_parseData is public and I'd like to avoid changing ABI and exposing
+// this, it is appended to MOJOSHADER_parseData::output using postflight buffer.
+typedef struct SpirvPatchEntry
+{
+    uint32 offset;
+    int32 location;
+} SpirvPatchEntry;
+
+typedef struct SpirvPatchTable
+{
+    SpirvPatchEntry vpflip;
+    SpirvPatchEntry array_vec4;
+    SpirvPatchEntry array_ivec4;
+    SpirvPatchEntry array_bool;
+    SpirvPatchEntry samplers[16];
+    int32 location_count;
+    union
+    {
+        // VS only; non-0 when there is PSIZE output
+        uint32 vs_has_psize;
+
+        // PS only; offset to TEXCOORD0 location part of OpDecorate.
+        // Used to find OpDecorate and patch it to BuiltIn PointCoord when
+        // VS outputs PSIZE.
+        uint32 ps_texcoord0_offset;
+    };
+} SpirvPatchTable;
+#endif
+
 #endif  // _INCLUDE_MOJOSHADER_INTERNAL_H_
 
 
diff --git a/mojoshader_opengl.c b/mojoshader_opengl.c
index 11c8ec3b..a74604e3 100644
--- a/mojoshader_opengl.c
+++ b/mojoshader_opengl.c
@@ -36,6 +36,10 @@
 #include "GL/gl.h"
 #include "GL/glext.h"
 
+#if SUPPORT_PROFILE_SPIRV
+#include "spirv/spirv.h"
+#endif
+
 #ifndef GL_HALF_FLOAT_NV
 #define GL_HALF_FLOAT_NV 0x140B
 #endif
@@ -53,6 +57,19 @@
 #define GL_PROGRAM_POINT_SIZE 0x8642
 #endif
 
+// FIXME: ARB_gl_spirv in glext.h? -flibit
+#ifndef GL_ARB_gl_spirv
+#define GL_ARB_gl_spirv 1
+#define GL_SHADER_BINARY_FORMAT_SPIR_V_ARB 0x9551
+typedef void (APIENTRYP PFNGLSPECIALIZESHADERARBPROC) (
+    GLuint shader,
+    const GLchar* pEntryPoint,
+    GLuint numSpecializationConstants,
+    const GLuint* pConstantIndex,
+    const GLuint* pConstantValue
+);
+#endif
+
 struct MOJOSHADER_glShader
 {
     const MOJOSHADER_parseData *parseData;
@@ -206,6 +223,8 @@ struct MOJOSHADER_glContext
     int have_GL_ARB_half_float_vertex;
     int have_GL_OES_vertex_half_float;
     int have_GL_ARB_instanced_arrays;
+    int have_GL_ARB_ES2_compatibility;
+    int have_GL_ARB_gl_spirv;
 
     // Entry points...
     PFNGLGETSTRINGPROC glGetString;
@@ -267,6 +286,8 @@ struct MOJOSHADER_glContext
     PFNGLBINDPROGRAMARBPROC glBindProgramARB;
     PFNGLPROGRAMSTRINGARBPROC glProgramStringARB;
     PFNGLVERTEXATTRIBDIVISORARBPROC glVertexAttribDivisorARB;
+    PFNGLSHADERBINARYPROC glShaderBinary;
+    PFNGLSPECIALIZESHADERARBPROC glSpecializeShaderARB;
 
     // interface for profile-specific things.
     int (*profileMaxUniforms)(MOJOSHADER_shaderType shader_type);
@@ -381,7 +402,7 @@ static inline void toggle_gl_state(GLenum state, int val)
 
 // profile-specific implementations...
 
-#if SUPPORT_PROFILE_GLSL
+#if SUPPORT_PROFILE_GLSL || SUPPORT_PROFILE_SPIRV
 static inline GLenum glsl_shader_type(const MOJOSHADER_shaderType t)
 {
     // these enums match between core 2.0 and the ARB extensions.
@@ -415,7 +436,230 @@ static int impl_GLSL_MaxUniforms(MOJOSHADER_shaderType shader_type)
     return (int) val;
 } // impl_GLSL_MaxUniforms
 
+#if SUPPORT_PROFILE_SPIRV
+static const SpirvPatchTable* spv_getPatchTable(MOJOSHADER_glShader *shader)
+{
+    const MOJOSHADER_parseData *pd = shader->parseData;
+    size_t table_offset = pd->output_len - sizeof(SpirvPatchTable);
+    return (const SpirvPatchTable *) (pd->output + table_offset);
+} // spv_getPatchTable
+
+static int spv_CompileShader(const MOJOSHADER_parseData *pd, int32 base_location, GLuint *s, int32 patch_pcoord)
+{
+    GLint ok = 0;
+
+    GLsizei data_len = pd->output_len - sizeof(SpirvPatchTable);
+    const GLvoid* data = pd->output;
+    uint32 *patched_data = NULL;
+    if (base_location || patch_pcoord)
+    {
+        size_t i, max;
+
+        patched_data = (uint32 *) Malloc(data_len);
+        memcpy(patched_data, data, data_len);
+        const SpirvPatchTable *table = (const SpirvPatchTable *) &pd->output[data_len];
+        if (table->vpflip.offset)      patched_data[table->vpflip.offset]      += base_location;
+        if (table->array_vec4.offset)  patched_data[table->array_vec4.offset]  += base_location;
+        if (table->array_ivec4.offset) patched_data[table->array_ivec4.offset] += base_location;
+        if (table->array_bool.offset)  patched_data[table->array_bool.offset]  += base_location;
+
+        for (i = 0, max = STATICARRAYLEN(table->samplers); i < max; i++)
+        {
+            SpirvPatchEntry entry = table->samplers[i];
+            if (entry.offset)
+                patched_data[entry.offset] += base_location;
+        } // for
+
+        if (patch_pcoord && table->ps_texcoord0_offset)
+        {
+            // Subtract 3 to get from Location value offset to start of op.
+            uint32 op_base = table->ps_texcoord0_offset - 3;
+            assert(patched_data[op_base+0] == (SpvOpDecorate | (4 << 16)));
+            assert(patched_data[op_base+2] == SpvDecorationLocation);
+            patched_data[op_base+2] = SpvDecorationBuiltIn;
+            patched_data[op_base+3] = SpvBuiltInPointCoord;
+        } // if
+
+        data = patched_data;
+    } // if
+
+    const GLuint shader = ctx->glCreateShader(glsl_shader_type(pd->shader_type));
+    ctx->glShaderBinary(1, &shader, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, data, data_len);
+    ctx->glSpecializeShaderARB(shader, pd->mainfn, 0, NULL, NULL); // FIXME: Spec Constants? -flibit
+    ctx->glGetShaderiv(shader, GL_COMPILE_STATUS, &ok);
+
+    if (patched_data)
+        Free(patched_data);
+
+    if (!ok)
+    {
+        GLsizei len = 0;
+        ctx->glGetShaderInfoLog(shader, sizeof(error_buffer), &len,
+                             (GLchar *) error_buffer);
+        ctx->glDeleteShader(shader);
+        *s = 0;
+        return 0;
+    } // if
+
+    *s = shader;
+
+    return 1;
+} // spv_CompileShader
+
+static int impl_SPIRV_CompileShader(const MOJOSHADER_parseData *pd, GLuint *s)
+{
+    // Compilation postponed until linking, but generate dummy shader id so hash table lookups work.
+    *s = ctx->glCreateShader(glsl_shader_type(pd->shader_type));
+    return 1;
+} // impl_SPIRV_CompileShader
+
+static GLuint impl_SPIRV_LinkProgram(MOJOSHADER_glShader *vshader,
+                                     MOJOSHADER_glShader *pshader)
+{
+    GLint ok = 0;
+
+    // Shader compilation postponed until linking due to uniform locations being global in program.
+    // To avoid overlap between VS and PS, we need to know about other shader stages to assign final
+    // uniform locations before compilation.
+    GLuint vs_handle = 0;
+    int32 base_location = 0;
+    int32 patch_pcoord = 0;
+    if (vshader)
+    {
+        if (!spv_CompileShader(vshader->parseData, base_location, &vs_handle, patch_pcoord))
+            return 0;
+
+        const SpirvPatchTable* patch_table = spv_getPatchTable(vshader);
+        base_location += patch_table->location_count;
+        patch_pcoord = patch_table->vs_has_psize;
+    } // if
+
+    GLuint ps_handle = 0;
+    if (pshader)
+    {
+        if (!spv_CompileShader(pshader->parseData, base_location, &ps_handle, patch_pcoord))
+            return 0;
+    } // if
+
+    if (ctx->have_opengl_2)
+    {
+        const GLuint program = ctx->glCreateProgram();
+        if (vs_handle)
+        {
+            ctx->glAttachShader(program, vs_handle);
+            ctx->glDeleteShader(vs_handle);
+        } // if
+        if (ps_handle)
+        {
+            ctx->glAttachShader(program, ps_handle);
+            ctx->glDeleteShader(ps_handle);
+        } // if
+        ctx->glLinkProgram(program);
+        ctx->glGetProgramiv(program, GL_LINK_STATUS, &ok);
+        if (!ok)
+        {
+            GLsizei len = 0;
+            ctx->glGetProgramInfoLog(program, sizeof (error_buffer),
+                                     &len, (GLchar *) error_buffer);
+            ctx->glDeleteProgram(program);
+            return 0;
+        } // if
+
+        return program;
+    } // if
+    else
+    {
+        const GLhandleARB program = ctx->glCreateProgramObjectARB();
+        assert(sizeof(program) == sizeof(GLuint));  // not always true on OS X!
+        if (vs_handle)
+        {
+            ctx->glAttachObjectARB(program, (GLhandleARB) vs_handle);
+            ctx->glDeleteObjectARB((GLhandleARB) vs_handle);
+        } // if
+        if (ps_handle)
+        {
+            ctx->glAttachObjectARB(program, (GLhandleARB) ps_handle);
+            ctx->glDeleteObjectARB((GLhandleARB) ps_handle);
+        } // if
+        ctx->glLinkProgramARB(program);
+        ctx->glGetObjectParameterivARB(program, GL_OBJECT_LINK_STATUS_ARB, &ok);
+        if (!ok)
+        {
+            GLsizei len = 0;
+            ctx->glGetInfoLogARB(program, sizeof (error_buffer),
+                                 &len, (GLcharARB *) error_buffer);
+            ctx->glDeleteObjectARB(program);
+            return 0;
+        } // if
+
+        return (GLuint) program;
+    } // else
+} // impl_SPIRV_LinkProgram
+
+static void impl_SPIRV_DeleteShader(const GLuint shader)
+{
+    ctx->glDeleteShader(shader);
+} // impl_SPIRV_DeleteShader
+
+static void impl_SPIRV_DeleteProgram(const GLuint program)
+{
+    if (ctx->have_opengl_2)
+        ctx->glDeleteProgram(program);
+    else
+        ctx->glDeleteObjectARB((GLhandleARB) program);
+} // impl_SPIRV_DeleteProgram
+
+static GLint impl_SPIRV_GetAttribLocation(MOJOSHADER_glProgram *program, int idx)
+{
+    return idx;
+} // impl_SPIRV_GetAttribLocation
 
+static GLint impl_SPIRV_GetUniformLocation(MOJOSHADER_glProgram *program, MOJOSHADER_glShader *shader, int idx)
+{
+    return 0;  // no-op, we push this as one big-ass array now.
+} // impl_SPIRV_GetUniformLocation
+
+static GLint impl_SPIRV_GetSamplerLocation(MOJOSHADER_glProgram *program, MOJOSHADER_glShader *shader, int idx)
+{
+    const SpirvPatchTable *table = spv_getPatchTable(shader);
+    GLint location = table->samplers[idx].location;
+    if (location == -1)
+        return location;
+
+    assert(location >= 0);
+    if (shader->parseData->shader_type == MOJOSHADER_TYPE_PIXEL)
+        location += spv_getPatchTable(program->vertex)->location_count;
+
+    return location;
+} // impl_SPIRV_GetSamplerLocation
+
+static void impl_SPIRV_FinalInitProgram(MOJOSHADER_glProgram *program)
+{
+    const SpirvPatchTable *vs_table = spv_getPatchTable(program->vertex);
+    const SpirvPatchTable *ps_table = spv_getPatchTable(program->fragment);
+    program->vs_float4_loc = vs_table->array_vec4.location;
+    program->vs_int4_loc   = vs_table->array_ivec4.location;
+    program->vs_bool_loc   = vs_table->array_bool.location;
+    program->ps_float4_loc = ps_table->array_vec4.location;
+    program->ps_int4_loc   = ps_table->array_ivec4.location;
+    program->ps_bool_loc   = ps_table->array_bool.location;
+    program->ps_vpos_flip_loc = ps_table->vpflip.location;
+#ifdef MOJOSHADER_FLIP_RENDERTARGET
+    program->vs_flip_loc   = vs_table->vpflip.location;
+#endif
+
+    int32 ps_base_location = vs_table->location_count;
+    if (ps_base_location)
+    {
+        if (program->ps_float4_loc != -1) program->ps_float4_loc += ps_base_location;
+        if (program->ps_int4_loc   != -1) program->ps_int4_loc   += ps_base_location;
+        if (program->ps_bool_loc   != -1) program->ps_bool_loc   += ps_base_location;
+        if (program->ps_vpos_flip_loc != -1) program->ps_vpos_flip_loc += ps_base_location;
+    } // if
+} // impl_SPIRV_FinalInitProgram
+#endif // SUPPORT_PROFILE_SPIRV
+
+#if SUPPORT_PROFILE_GLSL
 static int impl_GLSL_CompileShader(const MOJOSHADER_parseData *pd, GLuint *s)
 {
     GLint ok = 0;
@@ -463,6 +707,7 @@ static int impl_GLSL_CompileShader(const MOJOSHADER_parseData *pd, GLuint *s)
 
     return 1;
 } // impl_GLSL_CompileShader
+#endif // SUPPORT_PROFILE_GLSL
 
 
 static void impl_GLSL_DeleteShader(const GLuint shader)
@@ -664,7 +909,7 @@ static void impl_GLSL_PushSampler(GLint loc, GLuint sampler)
     ctx->glUniform1i(loc, sampler);
 } // impl_GLSL_PushSampler
 
-#endif  // SUPPORT_PROFILE_GLSL
+#endif // SUPPORT_PROFILE_GLSL || SUPPORT_PROFILE_SPIRV
 
 
 #if SUPPORT_PROFILE_ARB1
@@ -1026,6 +1271,8 @@ static void lookup_entry_points(MOJOSHADER_glGetProcAddress lookup, void *d)
     DO_LOOKUP(GL_ARB_vertex_program, PFNGLPROGRAMSTRINGARBPROC, glProgramStringARB);
     DO_LOOKUP(GL_NV_gpu_program4, PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC, glProgramLocalParameterI4ivNV);
     DO_LOOKUP(GL_ARB_instanced_arrays, PFNGLVERTEXATTRIBDIVISORARBPROC, glVertexAttribDivisorARB);
+    DO_LOOKUP(GL_ARB_ES2_compatibility, PFNGLSHADERBINARYPROC, glShaderBinary);
+    DO_LOOKUP(GL_ARB_gl_spirv, PFNGLSPECIALIZESHADERARBPROC, glSpecializeShaderARB);
 
     #undef DO_LOOKUP
 } // lookup_entry_points
@@ -1144,6 +1391,8 @@ static void load_extensions(MOJOSHADER_glGetProcAddress lookup, void *d)
     ctx->have_GL_ARB_half_float_vertex = 1;
     ctx->have_GL_OES_vertex_half_float = 1;
     ctx->have_GL_ARB_instanced_arrays = 1;
+    ctx->have_GL_ARB_ES2_compatibility = 1;
+    ctx->have_GL_ARB_gl_spirv = 1;
 
     lookup_entry_points(lookup, d);
 
@@ -1242,6 +1491,8 @@ static void load_extensions(MOJOSHADER_glGetProcAddress lookup, void *d)
     VERIFY_EXT(GL_ARB_half_float_vertex, 3, 0);
     VERIFY_EXT(GL_OES_vertex_half_float, -1, -1);
     VERIFY_EXT(GL_ARB_instanced_arrays, 3, 3);
+    VERIFY_EXT(GL_ARB_ES2_compatibility, 4, 1);
+    VERIFY_EXT(GL_ARB_gl_spirv, -1, -1);
 
     #undef VERIFY_EXT
 
@@ -1302,6 +1553,14 @@ static int valid_profile(const char *profile)
     } // else if
     #endif
 
+    #if SUPPORT_PROFILE_SPIRV
+    else if (strcmp(profile, MOJOSHADER_PROFILE_SPIRV) == 0)
+    {
+        MUST_HAVE(MOJOSHADER_PROFILE_SPIRV, GL_ARB_ES2_compatibility);
+        MUST_HAVE(MOJOSHADER_PROFILE_SPIRV, GL_ARB_gl_spirv);
+    } // else if
+    #endif
+
     #if SUPPORT_PROFILE_GLSLES
     else if (strcmp(profile, MOJOSHADER_PROFILE_GLSLES) == 0)
     {
@@ -1336,6 +1595,9 @@ static int valid_profile(const char *profile)
 
 
 static const char *profile_priorities[] = {
+#if SUPPORT_PROFILE_SPIRV
+    MOJOSHADER_PROFILE_SPIRV,
+#endif
 #if SUPPORT_PROFILE_GLSL120
     MOJOSHADER_PROFILE_GLSL120,
 #endif
@@ -1464,6 +1726,29 @@ MOJOSHADER_glContext *MOJOSHADER_glCreateContext(const char *profile,
     // !!! FIXME: generalize this part.
     if (profile == NULL) {}
 
+    // We don't check SUPPORT_PROFILE_SPIRV here, since valid_profile() does.
+#if SUPPORT_PROFILE_SPIRV
+    else if (strcmp(profile, MOJOSHADER_PROFILE_SPIRV) == 0)
+    {
+        ctx->profileMaxUniforms = impl_GLSL_MaxUniforms;
+        ctx->profileCompileShader = impl_SPIRV_CompileShader;
+        ctx->profileDeleteShader = impl_SPIRV_DeleteShader;
+        ctx->profileDeleteProgram = impl_SPIRV_DeleteProgram;
+        ctx->profileGetAttribLocation = impl_SPIRV_GetAttribLocation;
+        ctx->profileGetUniformLocation = impl_SPIRV_GetUniformLocation;
+        ctx->profileGetSamplerLocation = impl_SPIRV_GetSamplerLocation;
+        ctx->profileLinkProgram = impl_SPIRV_LinkProgram;
+        ctx->profileFinalInitProgram = impl_SPIRV_FinalInitProgram;
+        ctx->profileUseProgram = impl_GLSL_UseProgram;
+        ctx->profilePushConstantArray = impl_GLSL_PushConstantArray;
+        ctx->profilePushUniforms = impl_GLSL_PushUniforms;
+        ctx->profilePushSampler = impl_GLSL_PushSampler;
+        ctx->profileMustPushConstantArrays = impl_GLSL_MustPushConstantArrays;
+        ctx->profileMustPushSamplers = impl_GLSL_MustPushSamplers;
+        ctx->profileToggleProgramPointSize = impl_REAL_ToggleProgramPointSize;
+    } // if
+#endif
+
     // We don't check SUPPORT_PROFILE_GLSL120/ES here, since valid_profile() does.
 #if SUPPORT_PROFILE_GLSL
     else if ( (strcmp(profile, MOJOSHADER_PROFILE_GLSL) == 0) ||
diff --git a/profiles/mojoshader_profile.h b/profiles/mojoshader_profile.h
index 4b13b05a..f9db8a1b 100644
--- a/profiles/mojoshader_profile.h
+++ b/profiles/mojoshader_profile.h
@@ -12,6 +12,10 @@
 
 #include "../mojoshader_internal.h"
 
+#if SUPPORT_PROFILE_SPIRV
+#include "mojoshader_profile_spirv.h"
+#endif
+
 typedef struct ConstantsList
 {
     MOJOSHADER_constant constant;
@@ -38,6 +42,12 @@ typedef struct RegisterList
     int writemask;
     int misc;
     int written;
+#if SUPPORT_PROFILE_SPIRV
+    struct {
+        uint32 iddecl;
+        int is_ssa; // FIXME(krolli): Is there an existing way to tell constants and uniforms apart?
+    } spirv;
+#endif
     const VariableList *array;
     struct RegisterList *next;
 } RegisterList;
@@ -195,6 +205,11 @@ typedef struct Context
     int metal_need_header_graphics;
     int metal_need_header_texture;
 #endif
+
+#if SUPPORT_PROFILE_SPIRV
+    int branch_labels_patch_stack[32];
+    SpirvContext spirv;
+#endif
 } Context;
 
 // Use these macros so we can remove all bits of these profiles from the build.
diff --git a/profiles/mojoshader_profile_common.c b/profiles/mojoshader_profile_common.c
index ff7d014e..b0d84138 100644
--- a/profiles/mojoshader_profile_common.c
+++ b/profiles/mojoshader_profile_common.c
@@ -249,6 +249,10 @@ RegisterList *reglist_insert(Context *ctx, RegisterList *prev,
         item->writemask = 0;
         item->misc = 0;
         item->written = 0;
+#if SUPPORT_PROFILE_SPIRV
+        item->spirv.iddecl = 0;
+        item->spirv.is_ssa = 0;
+#endif
         item->array = NULL;
         item->next = prev->next;
         prev->next = item;
diff --git a/profiles/mojoshader_profile_spirv.c b/profiles/mojoshader_profile_spirv.c
new file mode 100644
index 00000000..d5f30a0c
--- /dev/null
+++ b/profiles/mojoshader_profile_spirv.c
@@ -0,0 +1,4060 @@
+/**
+ * MojoShader; generate shader programs from bytecode of compiled
+ *  Direct3D shaders.
+ *
+ * Please see the file LICENSE.txt in the source's root directory.
+ *
+ *  This file written by Ryan C. Gordon.
+ */
+
+#define __MOJOSHADER_INTERNAL__ 1
+#include "mojoshader_profile.h"
+
+#pragma GCC visibility push(hidden)
+
+#if SUPPORT_PROFILE_SPIRV
+#include "spirv/spirv.h"
+#include "spirv/GLSL.std.450.h"
+#include <float.h>
+
+static const int SPV_NO_SWIZZLE = 0xE4; // 0xE4 == 11100100 ... 0 1 2 3. No swizzle.
+
+#define EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(op) \
+    void emit_SPIRV_##op(Context *ctx) { \
+        fail(ctx, #op " unimplemented in spirv profile"); \
+    }
+
+typedef struct SpirvTexm3x3SetupResult
+{
+    // vec4 load results
+    uint32 id_dst_pad0;
+    uint32 id_dst_pad1;
+    uint32 id_dst;
+
+    // float dot results
+    uint32 id_res_x;
+    uint32 id_res_y;
+    uint32 id_res_z;
+} SpirvTexm3x3SetupResult;
+
+static const char *spv_get_uniform_array_varname(Context *ctx,
+                                                   const RegisterType regtype,
+                                                   char *buf, const size_t len)
+{
+    const char *shadertype = ctx->shader_type_str;
+    const char *type = "";
+    switch (regtype)
+    {
+        case REG_TYPE_CONST: type = "vec4"; break;
+        case REG_TYPE_CONSTINT: type = "ivec4"; break;
+        case REG_TYPE_CONSTBOOL: type = "bool"; break;
+        default: fail(ctx, "BUG: used a uniform we don't know how to define.");
+    } // switch
+    snprintf(buf, len, "%s_uniforms_%s", shadertype, type);
+    return buf;
+} // spv_get_uniform_array_varname
+
+static uint32 spv_bumpid(Context *ctx)
+{
+    return (ctx->spirv.idmax += 1);
+} // spv_bumpid
+
+static RegisterList *spv_getreg(Context *ctx, const RegisterType regtype, const int regnum)
+{
+    RegisterList *r = reglist_find(&ctx->used_registers, regtype, regnum);
+    if (!r)
+    {
+        failf(ctx, "register not found rt=%d, rn=%d", regtype, regnum);
+        return NULL;
+    } // if
+    return r;
+} // spv_getreg
+
+static void spv_componentlist_free(Context *ctx, ComponentList *cl)
+{
+    ComponentList *next;
+    while (cl)
+    {
+        next = cl->next;
+        Free(ctx, cl);
+        cl = next;
+    } // while
+} // spv_componentlist_free
+
+static ComponentList *spv_componentlist_alloc(Context *ctx)
+{
+    ComponentList *ret = (ComponentList *) Malloc(ctx, sizeof(ComponentList));
+    if (!ret) return NULL;
+    ret->id = 0;
+    ret->v.i = 0;
+    ret->next = NULL;
+    return ret;
+} // spv_componentlist_alloc
+
+static const char *get_SPIRV_varname_in_buf(Context *ctx, const RegisterType rt,
+                                           const int regnum, char *buf,
+                                           const size_t buflen)
+{
+    // turns out these are identical at the moment.
+    return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen);
+} // get_SPIRV_varname_in_buf
+
+const char *get_SPIRV_varname(Context *ctx, const RegisterType rt,
+                                    const int regnum)
+{
+    // turns out these are identical at the moment.
+    return get_D3D_varname(ctx, rt, regnum);
+} // get_SPIRV_varname
+
+
+static inline const char *get_SPIRV_const_array_varname_in_buf(Context *ctx,
+                                                const int base, const int size,
+                                                char *buf, const size_t buflen)
+{
+    snprintf(buf, buflen, "c_array_%d_%d", base, size);
+    return buf;
+} // get_SPIRV_const_array_varname_in_buf
+
+
+const char *get_SPIRV_const_array_varname(Context *ctx, int base, int size)
+{
+    char buf[64];
+    get_SPIRV_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
+    return StrDup(ctx, buf);
+} // get_SPIRV_const_array_varname
+
+static uint32 spv_get_uniform_array_id(Context *ctx, const RegisterType regtype)
+{
+    uint32 id;
+    switch (regtype)
+    {
+        case REG_TYPE_CONST:
+            id = ctx->spirv.uniform_arrays.idvec4;
+            if (id == 0)
+            {
+                id = spv_bumpid(ctx);
+                ctx->spirv.uniform_arrays.idvec4 = id;
+            } // if
+            break;
+
+        case REG_TYPE_CONSTINT:
+            id = ctx->spirv.uniform_arrays.idivec4;
+            if (id == 0)
+            {
+                id = spv_bumpid(ctx);
+                ctx->spirv.uniform_arrays.idivec4 = id;
+            } // if
+            break;
+
+        case REG_TYPE_CONSTBOOL:
+            id = ctx->spirv.uniform_arrays.idbool;
+            if (id == 0)
+            {
+                id = spv_bumpid(ctx);
+                ctx->spirv.uniform_arrays.idbool = id;
+            } // if
+            break;
+
+        default:
+            fail(ctx, "Unexpected register type used to access uniform array.");
+            id = 0;
+    } // switch
+
+    return id;
+} // spv_get_uniform_array_id
+
+static void spv_emit_part_va(Context* ctx, uint32 word_count, uint32 argc, SpvOp op, va_list args)
+{
+    assert(ctx->output != NULL);
+    if (isfail(ctx))
+        return;  // we failed previously, don't go on...
+
+    uint32 word = op | (word_count << 16);
+    buffer_append(ctx->output, &word, sizeof(word));
+    while (--argc)
+    {
+        word = va_arg(args, uint32);
+        buffer_append(ctx->output, &word, sizeof(word));
+    } // while
+} // spv_emit_part_va
+
+static void spv_emit_part(Context* ctx, uint32 word_count, uint32 argc, SpvOp op, ...)
+{
+    va_list args;
+    va_start(args, op);
+    spv_emit_part_va(ctx, word_count, argc, op, args);
+    va_end(args);
+} // spv_emit_part
+
+static void spv_emit(Context *ctx, uint32 word_count, SpvOp op, ...)
+{
+    va_list args;
+    va_start(args, op);
+    spv_emit_part_va(ctx, word_count, word_count, op, args);
+    va_end(args);
+} // spv_emit
+
+static void spv_emit_word(Context *ctx, uint32 word)
+{
+    assert(ctx->output != NULL);
+    if (isfail(ctx))
+        return;  // we failed previously, don't go on...
+
+    buffer_append(ctx->output, &word, sizeof(word));
+} // spv_emit_word
+
+static void spv_emit_str(Context *ctx, const char *str)
+{
+    size_t len;
+    uint32 trail;
+    assert(ctx->output != NULL);
+    if (isfail(ctx))
+        return;  // we failed previously, don't go on...
+
+    if (str == NULL)
+        return spv_emit_word(ctx, 0);
+    len = strlen(str) + 1;
+    buffer_append(ctx->output, str, len);
+    len = len % 4;
+    if (len)
+    {
+        trail = 0;
+        buffer_append(ctx->output, &trail, 4 - len);
+    } // if
+} // spv_emit_str
+
+// get the word count of a string
+static uint32 spv_strlen(const char *str)
+{
+    size_t len = strlen(str);
+    return (uint32) ((len / 4) + 1);
+} // spv_strlen
+
+// emits an OpName straight into ctx->globals
+static void spv_output_name(Context *ctx, uint32 id, const char *str)
+{
+    if (isfail(ctx))
+        return;  // we failed previously, don't go on...
+
+    push_output(ctx, &ctx->globals);
+    spv_emit_part(ctx, 2 + spv_strlen(str), 2, SpvOpName, id);
+    spv_emit_str(ctx, str);
+    pop_output(ctx);
+} // spv_output_name
+
+// emit an OpName instruction to identify a register
+static void spv_output_regname(Context *ctx, uint32 id, RegisterType regtype, int regnum)
+{
+    char varname[64];
+    snprintf(varname, sizeof(varname), "%s_", ctx->shader_type_str);
+    size_t offset = strlen(varname);
+    get_SPIRV_varname_in_buf(ctx, regtype, regnum, varname + offset, sizeof(varname) - offset);
+    spv_output_name(ctx, id, varname);
+} // spv_output_regname
+
+// emits an OpDecorate BuiltIn straight into ctx->helpers
+static void spv_output_builtin(Context *ctx, uint32 id, SpvBuiltIn builtin)
+{
+    if (isfail(ctx))
+        return;  // we failed previously, don't go on...
+
+    push_output(ctx, &ctx->helpers);
+    spv_emit(ctx, 4, SpvOpDecorate, id, SpvDecorationBuiltIn, builtin);
+    pop_output(ctx);
+} // spv_output_builtin
+
+static uint32 spv_output_location(Context *ctx, uint32 id, uint32 loc)
+{
+    push_output(ctx, &ctx->helpers);
+    spv_emit(ctx, 4, SpvOpDecorate, id, SpvDecorationLocation, loc);
+    pop_output(ctx);
+    return (buffer_size(ctx->helpers) >> 2) - 1;
+} // spv_output_location
+
+static void spv_output_set_binding(Context *ctx, uint32 id, uint32 set, uint32 binding)
+{
+    if (isfail(ctx))
+        return;
+
+    push_output(ctx, &ctx->helpers);
+    spv_emit(ctx, 4, SpvOpDecorate, id, SpvDecorationDescriptorSet, set);
+    spv_emit(ctx, 4, SpvOpDecorate, id, SpvDecorationBinding, binding);
+    pop_output(ctx);
+} // spv_output_set_binding
+
+static SpirvTypeIdx spv_change_base_type_vec_dim(SpirvTypeIdx sti, uint32 dim)
+{
+    uint32 dimSub1 = dim - 1;
+    assert(STI_CORE_START_ <= sti && sti < STI_CORE_END_);
+    assert(dimSub1 < 4);
+
+    SpirvTypeIdx sti_base = (SpirvTypeIdx)(sti & ~0x3);
+    SpirvTypeIdx sti_new = (SpirvTypeIdx)(sti_base | dimSub1);
+    return sti_new;
+} // spv_change_base_type_vec_dim
+
+static uint32 spv_get_type(Context *ctx, SpirvTypeIdx tidx)
+{
+    assert(((uint32)tidx) < ((uint32)STI_LENGTH_));
+
+    uint32 tid = ctx->spirv.tid[tidx];
+    if (tid)
+        return tid;
+
+    push_output(ctx, &ctx->mainline_intro);
+    if (STI_CORE_START_ <= tidx && tidx < STI_CORE_END_)
+    {
+        uint32 dim = tidx & 0x3;
+        SpirvType type = (SpirvType)((tidx >> 2) & 0x3);
+        if (dim)
+        {
+            uint32 tid_base = spv_get_type(ctx, (SpirvTypeIdx)(tidx - dim));
+            tid = spv_bumpid(ctx);
+            spv_emit(ctx, 4, SpvOpTypeVector, tid, tid_base, dim + 1);
+        } // if
+        else
+        {
+            tid = spv_bumpid(ctx);
+            switch (type)
+            {
+                case ST_FLOAT: spv_emit(ctx, 3, SpvOpTypeFloat, tid, 32); break;
+                case ST_SINT: spv_emit(ctx, 4, SpvOpTypeInt, tid, 32, 1); break;
+                case ST_UINT: spv_emit(ctx, 4, SpvOpTypeInt, tid, 32, 0); break;
+                case ST_BOOL: spv_emit(ctx, 2, SpvOpTypeBool, tid); break;
+                default: assert(!"Unexpected value of SpirvType."); break;
+            } // switch
+        } // else
+    } // if
+    else if (STI_IMAGE2D <= tidx && tidx <= STI_IMAGECUBE)
+    {
+        static const SpvDim dim_table[] = {SpvDim2D, SpvDim3D, SpvDimCube};
+        SpvDim dim = dim_table[tidx - STI_IMAGE2D];
+        uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+        uint32 id_image = spv_bumpid(ctx);
+        tid = spv_bumpid(ctx);
+        spv_emit(ctx, 9, SpvOpTypeImage, id_image, tid_float, dim, 0, 0, 0, 1, SpvImageFormatUnknown);
+        spv_emit(ctx, 3, SpvOpTypeSampledImage, tid, id_image);
+    } // else if
+    else if (tidx == STI_VOID)
+    {
+        tid = spv_bumpid(ctx);
+        spv_emit(ctx, 2, SpvOpTypeVoid, tid);
+    } // else if
+    else if (tidx == STI_FUNC_VOID)
+    {
+        uint32 tid_void = spv_get_type(ctx, STI_VOID);
+        tid = spv_bumpid(ctx);
+        spv_emit(ctx, 3, SpvOpTypeFunction, tid, tid_void);
+    } // else if
+    else if (tidx == STI_FUNC_LIT)
+    {
+        uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4);
+        tid = spv_bumpid(ctx);
+        spv_emit(ctx, 3 + 1, SpvOpTypeFunction, tid, tid_vec4, tid_vec4);
+    } // else if
+    else if (STI_PTR_START_ <= tidx && tidx < STI_PTR_END_)
+    {
+        uint32 dim = (tidx & (1 << 4)) ? 3 : 0;
+        SpirvType type = (SpirvType)((tidx >> 2) & 0x3);
+        uint32 tid_base = spv_get_type(ctx, (SpirvTypeIdx)((1 << 4) | (type << 2) | dim));
+        static const SpvStorageClass sc_map[] = {
+            SpvStorageClassInput,
+            SpvStorageClassOutput,
+            SpvStorageClassPrivate,
+            SpvStorageClassUniformConstant,
+        };
+        SpvStorageClass sc = sc_map[tidx & 0x3];
+        tid = spv_bumpid(ctx);
+        spv_emit(ctx, 4, SpvOpTypePointer, tid, sc, tid_base);
+    } // else if
+    else if (STI_PTR_IMAGE2D <= tidx && tidx <= STI_PTR_IMAGECUBE)
+    {
+        uint32 tid_image = spv_get_type(ctx, (SpirvTypeIdx)(tidx - (STI_PTR_IMAGE2D - STI_IMAGE2D)));
+        tid = spv_bumpid(ctx);
+        spv_emit(ctx, 4, SpvOpTypePointer, tid, SpvStorageClassUniformConstant, tid_image);
+    } // else if
+    else
+        assert(!"Unexpected value of type index.");
+    pop_output(ctx);
+
+    ctx->spirv.tid[tidx] = tid;
+    return tid;
+} // spv_get_type
+
+static uint32 spv_gettrue(Context *ctx)
+{
+    if (ctx->spirv.idtrue)
+        return ctx->spirv.idtrue;
+
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+    uint32 id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 3, SpvOpConstantTrue, tid_bool, id);
+    pop_output(ctx);
+    return ctx->spirv.idtrue = id;
+} // spv_gettrue
+
+static uint32 spv_getfalse(Context *ctx)
+{
+    if (ctx->spirv.idfalse)
+        return ctx->spirv.idfalse;
+
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+    uint32 id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 3, SpvOpConstantFalse, tid_bool, id);
+    pop_output(ctx);
+    return ctx->spirv.idfalse = id;
+} // spv_getfalse
+
+static uint32 spv_getext(Context *ctx)
+{
+    if (ctx->spirv.idext)
+        return ctx->spirv.idext;
+
+    return ctx->spirv.idext = spv_bumpid(ctx);
+} // spv_getext
+
+static uint32 spv_output_scalar(Context *ctx, ComponentList *cl,
+                             MOJOSHADER_attributeType type)
+{
+    uint32 idret, idtype;
+    if (type == MOJOSHADER_ATTRIBUTE_FLOAT)
+        idtype = spv_get_type(ctx, STI_FLOAT);
+    else if (type == MOJOSHADER_ATTRIBUTE_INT)
+        idtype = spv_get_type(ctx, STI_INT);
+    else if (type == MOJOSHADER_ATTRIBUTE_UINT)
+        idtype = spv_get_type(ctx, STI_UINT);
+    else
+    {
+        failf(ctx, "%s: invalid attribute type %d", __func__, type);
+        return 0;
+    } // else
+    idret = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 4, SpvOpConstant, idtype, idret, cl->v.u);
+    pop_output(ctx);
+    return idret;
+} // spv_output_scalar
+
+// The spv_getscalar* functions retrieve the result id of an OpConstant
+// instruction with the corresponding value v, or generate a new one.
+static uint32 spv_getscalarf(Context *ctx, float v)
+{
+    ComponentList *prev = &(ctx->spirv.cl.f), *cl = ctx->spirv.cl.f.next;
+    while (cl)
+    {
+        if (v == cl->v.f)
+            return cl->id;
+        else if (v < cl->v.f)
+            break;
+        prev = cl;
+        cl = cl->next;
+    } // while
+    cl = spv_componentlist_alloc(ctx);
+    cl->next = prev->next;
+    prev->next = cl;
+    cl->v.f = v;
+    cl->id = spv_output_scalar(ctx, cl, MOJOSHADER_ATTRIBUTE_FLOAT);
+    return cl->id;
+} // spv_getscalarf
+
+static uint32 spv_getscalari(Context *ctx, int v)
+{
+    ComponentList *prev = &(ctx->spirv.cl.i), *cl = ctx->spirv.cl.i.next;
+    while (cl)
+    {
+        if (v == cl->v.i)
+            return cl->id;
+        else if (v < cl->v.i)
+            break;
+        prev = cl;
+        cl = cl->next;
+    } // while
+    cl = spv_componentlist_alloc(ctx);
+    cl->next = prev->next;
+    prev->next = cl;
+    cl->v.i = v;
+    cl->id = spv_output_scalar(ctx, cl, MOJOSHADER_ATTRIBUTE_INT);
+    return cl->id;
+} // spv_getscalari
+
+static uint32 spv_get_constant_composite(Context *ctx, uint32 tid, uint32* cache, float scalar)
+{
+    uint32 i;
+
+    assert(tid != 0);
+    uint32 dim =
+       (tid == ctx->spirv.tid[STI_VEC4]) ? 4 :
+       (tid == ctx->spirv.tid[STI_VEC3]) ? 3 :
+       (tid == ctx->spirv.tid[STI_VEC2]) ? 2 : 1;
+
+    uint32 id = cache[dim - 1];
+    if (id)
+        return id;
+
+    uint32 sid = spv_getscalarf(ctx, scalar);
+    if (dim == 1)
+    {
+        cache[0] = sid;
+        return sid;
+    } // if
+
+    id = spv_bumpid(ctx);
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit_part(ctx, 3 + dim, 3, SpvOpConstantComposite, tid, id);
+    for (i = 0; i < dim; i++)
+        spv_emit_word(ctx, sid);
+    pop_output(ctx);
+    cache[dim - 1] = id;
+    return id;
+} // spv_get_constant_composite
+
+static uint32 spv_get_zero(Context *ctx, uint32 tid)
+{
+    return spv_get_constant_composite(ctx, tid, ctx->spirv.id_0_0, 0.0f);
+} // spv_get_zero
+
+static uint32 spv_get_one(Context *ctx, uint32 tid)
+{
+    return spv_get_constant_composite(ctx, tid, ctx->spirv.id_1_0, 1.0f);
+} // spv_get_one
+
+static uint32 spv_get_flt_max(Context *ctx, uint32 tid)
+{
+    return spv_get_constant_composite(ctx, tid, ctx->spirv.id_flt_max, FLT_MAX);
+} // spv_get_one
+
+static uint32 spv_getvec4_zero(Context *ctx)
+{
+    return spv_get_constant_composite(ctx, spv_get_type(ctx, STI_VEC4), ctx->spirv.id_0_0, 0.0f);
+} // spv_getvec4_zero
+
+static uint32 spv_getvec4_one(Context *ctx)
+{
+    return spv_get_constant_composite(ctx, spv_get_type(ctx, STI_VEC4), ctx->spirv.id_1_0, 1.0f);
+} // spv_getvec4_one
+
+// Make a 4-channel vector with a value broadcast across all channels. Roughly equivalent to `vec4(value)` in GLSL
+static uint32 spv_vectorbroadcast(Context *ctx, uint32 tid, uint32 value)
+{
+    uint32 result = spv_bumpid(ctx);
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, tid, result, value, value, value, value);
+    pop_output(ctx);
+    return result;
+} // spv_vectorbroadcast
+
+static void spv_branch_push(Context *ctx, uint32 id_merge, uint32 patch_offset)
+{
+    assert(((size_t)ctx->branch_labels_stack_index) < STATICARRAYLEN(ctx->branch_labels_stack));
+    int pos = ctx->branch_labels_stack_index++;
+    ctx->branch_labels_stack[pos] = id_merge;
+    ctx->branch_labels_patch_stack[pos] = patch_offset;
+} // spv_branch_push
+
+static void spv_branch_get(Context *ctx, uint32* out_id_merge, uint32* out_patch_offset)
+{
+    assert(ctx->branch_labels_stack_index > 0);
+    int pos = ctx->branch_labels_stack_index - 1;
+    *out_id_merge = ctx->branch_labels_stack[pos];
+    *out_patch_offset = ctx->branch_labels_patch_stack[pos];
+} // spv_branch_get
+
+static void spv_branch_pop(Context *ctx, uint32* out_id_merge, uint32* out_patch_offset)
+{
+    spv_branch_get(ctx, out_id_merge, out_patch_offset);
+    ctx->branch_labels_stack_index--;
+} // spv_branch_pop
+
+static void spv_loop_push(Context *ctx, const SpirvLoopInfo *loop)
+{
+    assert(((size_t)ctx->spirv.loop_stack_idx) < STATICARRAYLEN(ctx->spirv.loop_stack));
+    int pos = ctx->spirv.loop_stack_idx++;
+    ctx->spirv.loop_stack[pos] = *loop;
+} // spv_loop_push
+
+static void spv_loop_get(Context *ctx, SpirvLoopInfo *loop)
+{
+    assert(ctx->spirv.loop_stack_idx > 0);
+    int pos = ctx->spirv.loop_stack_idx - 1;
+    *loop = ctx->spirv.loop_stack[pos];
+} // spv_loop_get
+
+static void spv_loop_pop(Context *ctx, SpirvLoopInfo *loop)
+{
+    spv_loop_get(ctx, loop);
+    ctx->spirv.loop_stack_idx--;
+} // spv_loop_pop
+
+static uint32 spv_loop_get_aL(Context *ctx)
+{
+    int i;
+
+    // Find the first enclosing loop..endloop. There may be rep..endrep nested inside, so it might
+    // not be at the top of the stack.
+    for (i = ctx->spirv.loop_stack_idx - 1; i >= 0; i--)
+    {
+        uint32 id_aL = ctx->spirv.loop_stack[i].id_aL;
+        if (id_aL)
+            return id_aL;
+    } // for
+
+    assert(!"Referencing loop counter register aL in code not part of loop..endloop region.");
+    return 0;
+} // spv_loop_get_aL
+
+static SpvOp spv_get_comparison(Context *ctx)
+{
+    static const SpvOp spv_cmp_ops[] = {
+        SpvOpUndef,
+        SpvOpFOrdGreaterThan,
+        SpvOpFOrdEqual,
+        SpvOpFOrdGreaterThanEqual,
+        SpvOpFOrdLessThan,
+        SpvOpFOrdNotEqual,
+        SpvOpFOrdLessThanEqual,
+    };
+
+    if (ctx->instruction_controls >= STATICARRAYLEN(spv_cmp_ops))
+    {
+        fail(ctx, "unknown comparison control");
+        return SpvOpUndef;
+    } // if
+
+    return spv_cmp_ops[ctx->instruction_controls];
+} // spv_get_comparison
+
+static void spv_check_read_reg_id(Context *ctx, RegisterList *r)
+{
+    if (r->spirv.iddecl == 0)
+    {
+        assert(r->regtype != REG_TYPE_SAMPLER || (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 1, 4)));
+        assert(r->regtype != REG_TYPE_TEXTURE || (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 1, 4)));
+        switch (r->regtype)
+        {
+            case REG_TYPE_SAMPLER: // s# (only ps_1_1)
+            case REG_TYPE_TEXTURE: // t# (only ps_1_1)
+            case REG_TYPE_INPUT: // v#
+            case REG_TYPE_TEMP: // r#
+            case REG_TYPE_CONST: // c#
+            case REG_TYPE_CONSTINT: // i#
+            case REG_TYPE_CONSTBOOL: // b#
+            case REG_TYPE_LABEL: // l#
+            case REG_TYPE_PREDICATE: // p0
+                r->spirv.iddecl = spv_bumpid(ctx);
+                break;
+
+            case REG_TYPE_LOOP: // aL
+                r->spirv.iddecl = spv_loop_get_aL(ctx);
+                break;
+
+            default:
+            {
+                char varname[64];
+                get_SPIRV_varname_in_buf(ctx, r->regtype, r->regnum, varname, sizeof(varname));
+                failf(ctx, "register type %s is unimplemented\n", varname);
+                break;
+            } // default
+        } // switch
+    } // if
+} // spv_check_read_reg_id
+
+static void spv_check_write_reg_id(Context *ctx, RegisterList *r)
+{
+    if (r->spirv.iddecl == 0)
+    {
+        switch (r->regtype)
+        {
+            // These registers require no declarations, so we can just create them as we see them
+            case REG_TYPE_ADDRESS:
+            case REG_TYPE_TEMP:
+            case REG_TYPE_RASTOUT:
+            case REG_TYPE_COLOROUT:
+            case REG_TYPE_TEXCRDOUT:
+            case REG_TYPE_DEPTHOUT:
+            case REG_TYPE_ATTROUT:
+            case REG_TYPE_PREDICATE:
+                r->spirv.iddecl = spv_bumpid(ctx);
+                break;
+
+            // Other register types should be explicitly declared, so it is an error for them to have iddecl == 0 by now
+            default:
+            {
+                char varname[64];
+                get_SPIRV_varname_in_buf(ctx, r->regtype, r->regnum, varname, sizeof(varname));
+                failf(ctx, "tried to write to undeclared register %s\n", varname);
+                break;
+            } // default
+        } // switch
+    } // if
+} // spv_check_write_reg_id
+
+static uint32 spv_ptrimage_from_texturetype(Context *ctx, TextureType ttype)
+{
+    switch (ttype)
+    {
+        case TEXTURE_TYPE_2D:
+            return spv_get_type(ctx, STI_PTR_IMAGE2D);
+        case TEXTURE_TYPE_CUBE:
+            return spv_get_type(ctx, STI_PTR_IMAGECUBE);
+        case TEXTURE_TYPE_VOLUME:
+            return spv_get_type(ctx, STI_PTR_IMAGE3D);
+        default:
+            fail(ctx, "BUG: used a sampler we don't know how to define.");
+            return 0;
+    } // switch
+} // spv_ptrimage_from_texturetype
+
+static uint32 spv_image_from_texturetype(Context *ctx, TextureType ttype)
+{
+    switch (ttype)
+    {
+        case TEXTURE_TYPE_2D:
+            return spv_get_type(ctx, STI_IMAGE2D);
+        case TEXTURE_TYPE_CUBE:
+            return spv_get_type(ctx, STI_IMAGECUBE);
+        case TEXTURE_TYPE_VOLUME:
+            return spv_get_type(ctx, STI_IMAGE3D);
+        default:
+            fail(ctx, "BUG: used a sampler we don't know how to define.");
+            return 0;
+    } // switch
+} // spv_ptrimage_from_texturetype
+
+static uint32 spv_access_uniform(Context *ctx, SpirvTypeIdx sti_ptr, RegisterType regtype, uint32 id_offset)
+{
+    uint32 tid_ptr = spv_get_type(ctx, sti_ptr);
+    uint32 id_arr = spv_get_uniform_array_id(ctx, regtype);
+    uint32 id_access = spv_bumpid(ctx);
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, SpvOpAccessChain, tid_ptr, id_access, id_arr, id_offset);
+    pop_output(ctx);
+    return id_access;
+} // spv_access_uniform
+
+static SpirvResult spv_loadreg(Context *ctx, RegisterList *r)
+{
+    const RegisterType regtype = r->regtype;
+
+    spv_check_read_reg_id(ctx, r);
+
+    uint32 id_src = r->spirv.iddecl;
+    SpirvResult result;
+    if (regtype == REG_TYPE_SAMPLER)
+    {
+        RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, r->regnum);
+        result.tid = spv_image_from_texturetype(ctx, (TextureType)sreg->index);
+    } // if
+    else if (regtype == REG_TYPE_CONSTBOOL)
+    {
+        if (!r->spirv.is_ssa)
+            id_src = spv_access_uniform(ctx, STI_PTR_INT_U, regtype, r->spirv.iddecl);
+
+        result.tid = spv_get_type(ctx, STI_INT);
+    } // else if
+    else if (regtype == REG_TYPE_CONSTINT)
+    {
+        if (!r->spirv.is_ssa)
+            id_src = spv_access_uniform(ctx, STI_PTR_IVEC4_U, regtype, r->spirv.iddecl);
+
+        result.tid = spv_get_type(ctx, STI_IVEC4);
+    } // else if
+    else if (regtype == REG_TYPE_CONST)
+    {
+        if (!r->spirv.is_ssa)
+            id_src = spv_access_uniform(ctx, STI_PTR_VEC4_U, regtype, r->spirv.iddecl);
+
+        result.tid = spv_get_type(ctx, STI_VEC4);
+    } // else if
+    else if (regtype == REG_TYPE_LOOP)
+        result.tid = spv_get_type(ctx, STI_INT);
+    else if (regtype == REG_TYPE_PREDICATE)
+        result.tid = spv_get_type(ctx, STI_BVEC4);
+    else
+        result.tid = spv_get_type(ctx, STI_VEC4);
+
+    // Constants can be used directly, no need to load them.
+    assert(r->spirv.is_ssa == 0 || r->spirv.is_ssa == 1);
+    if (r->spirv.is_ssa)
+    {
+        result.id = r->spirv.iddecl;
+        return result;
+    } // if
+
+    assert(id_src);
+    result.id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 4, SpvOpLoad, result.tid, result.id, id_src);
+    pop_output(ctx);
+
+    return result;
+} // spv_loadreg
+
+static uint32 spv_emit_swizzle(Context *ctx, uint32 arg, uint32 rtid, const int swizzle, const int writemask)
+{
+    uint32 result = spv_bumpid(ctx);
+
+    const int writemask0 = (writemask >> 0) & 0x1;
+    const int writemask1 = (writemask >> 1) & 0x1;
+    const int writemask2 = (writemask >> 2) & 0x1;
+    const int writemask3 = (writemask >> 3) & 0x1;
+
+    const uint32 swizzle_x = (swizzle >> 0) & 0x3;
+    const uint32 swizzle_y = (swizzle >> 2) & 0x3;
+    const uint32 swizzle_z = (swizzle >> 4) & 0x3;
+    const uint32 swizzle_w = (swizzle >> 6) & 0x3;
+
+    push_output(ctx, &ctx->mainline);
+    // OpVectorShuffle takes two vectors to shuffle, but to do a swizzle
+    // operation we can just ignore the second argument (meaning it can be
+    // anything, and I am just making it `arg` for convenience)
+    uint32 word_count = 5 + writemask0 + writemask1 + writemask2 + writemask3;
+    spv_emit_part(ctx, word_count, 5, SpvOpVectorShuffle, rtid, result, arg, arg);
+    if (writemask0) spv_emit_word(ctx, swizzle_x);
+    if (writemask1) spv_emit_word(ctx, swizzle_y);
+    if (writemask2) spv_emit_word(ctx, swizzle_z);
+    if (writemask3) spv_emit_word(ctx, swizzle_w);
+    pop_output(ctx);
+
+    return result;
+} // spv_emit_swizzle
+
+SpirvResult spv_swizzle(Context *ctx, SpirvResult arg, const int swizzle, const int writemask)
+{
+    int i;
+
+    // Nothing to do, so return the same SSA value
+    if (no_swizzle(swizzle) && writemask_xyzw(writemask))
+        return arg;
+
+    assert(arg.tid != 0);
+    assert(writemask == 1
+        || writemask == 3
+        || writemask == 7
+        || writemask == 15
+    );
+
+    SpirvTypeIdx sti_arg = STI_VOID;
+    for (i = STI_CORE_START_; i < STI_CORE_END_; i++)
+    {
+        if (ctx->spirv.tid[i] == arg.tid)
+        {
+            sti_arg = (SpirvTypeIdx)i;
+            break;
+        } // if
+    } // for
+    assert(sti_arg != STI_VOID);
+
+    // We should not leave any value undefined, as it may end up used (eg. dot
+    // product), which will make everything relying on it's result undefined.
+    // Therefore, we specifically determine true dimensionality of the result.
+    int resdim = 0;
+    switch (writemask)
+    {
+        case 1:
+            resdim = 1;
+            break;
+
+        case 3:
+            resdim = 2;
+            break;
+
+        case 7:
+            resdim = 3;
+            break;
+
+        case 15:
+            resdim = 4;
+            break;
+
+        default:
+            failf(ctx, "Unexpected write mask in swizzle: 0x%X");
+            assert(0);
+            break;
+    } // switch
+
+    SpirvTypeIdx sti_result = spv_change_base_type_vec_dim(sti_arg, resdim);
+
+    SpirvResult result = {0};
+    result.id = (resdim != 1 || sti_arg != sti_result) ? spv_bumpid(ctx) : arg.id;
+    result.tid = spv_get_type(ctx, sti_result);
+    assert(result.tid != 0);
+
+    push_output(ctx, &ctx->mainline);
+    if (resdim != 1)
+    {
+        // OpVectorShuffle takes two vectors to shuffle, but to do a swizzle
+        // operation we can just ignore the second argument (meaning it can be
+        // anything, and I am just making it `arg` for convenience)
+        spv_emit_part(ctx, 5 + resdim, 5, SpvOpVectorShuffle, result.tid, result.id, arg.id, arg.id);
+
+        for (i = 0; i < resdim; i++)
+            spv_emit_word(ctx, (swizzle >> (2*i)) & 0x3);
+    } // if
+    else if (sti_arg != sti_result)
+    {
+        // OpVectorShuffle may not produce a scalar. Instead we use OpCompositeExtract.
+        spv_emit(ctx, 5, SpvOpCompositeExtract, result.tid, result.id, arg.id, swizzle & 0x3);
+    } // else if
+
+    pop_output(ctx);
+
+    return result;
+} // make_GLSL_swizzle_string
+
+static SpirvResult spv_load_srcarg(Context *ctx, const size_t idx, const int writemask)
+{
+    SpirvResult result = {0};
+    if (idx >= STATICARRAYLEN(ctx->source_args))
+    {
+        fail(ctx, "Too many source args");
+        return result;
+    } // if
+
+    const SourceArgInfo *arg = &ctx->source_args[idx];
+
+    RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum);
+
+    if (arg->relative)
+    {
+        if (arg->regtype == REG_TYPE_INPUT)
+            fail(ctx, "relative input array access is unimplemented");
+        else
+        {
+            assert(arg->regtype == REG_TYPE_CONST);
+            const int arrayidx = arg->relative_array->index;
+            const int offset = arg->regnum - arrayidx;
+            assert(offset >= 0);
+
+            int is_constant = (arg->relative_array->constant != NULL);
+            uint32 id_array = 0;
+            if (is_constant)
+            {
+                id_array = ctx->spirv.constant_arrays.idvec4;
+                if (id_array == 0)
+                {
+                    id_array = spv_bumpid(ctx);
+                    ctx->spirv.constant_arrays.idvec4 = id_array;
+                } // if
+            } // if
+            else
+                id_array = spv_get_uniform_array_id(ctx, arg->regtype);
+
+            RegisterList *reg_rel = spv_getreg(ctx, arg->relative_regtype, arg->relative_regnum);
+
+            spv_check_read_reg_id(ctx, reg_rel);
+            spv_check_read_reg_id(ctx, reg);
+
+            uint32 id_int = spv_get_type(ctx, STI_INT);
+            uint32 id_offset;
+            if (reg_rel->regtype == REG_TYPE_LOOP)
+                id_offset = reg_rel->spirv.iddecl;
+            else
+            {
+                uint32 id_pint = spv_get_type(ctx, STI_PTR_INT_P);
+                uint32 id_compidx = spv_getscalari(ctx, arg->relative_component);
+                uint32 id_pcomp = spv_bumpid(ctx);
+                spv_emit(ctx, 5, SpvOpAccessChain, id_pint, id_pcomp, reg_rel->spirv.iddecl, id_compidx);
+
+                id_offset = spv_bumpid(ctx);
+                spv_emit(ctx, 4, SpvOpLoad, id_int, id_offset, id_pcomp);
+            } // else
+
+            if (!is_constant)
+            {
+                uint32 id_arraybase = reg->spirv.iddecl;
+                uint32 id_a = id_offset;
+                uint32 id_b = id_arraybase;
+                id_offset = spv_bumpid(ctx);
+                spv_emit(ctx, 5, SpvOpIAdd, id_int, id_offset, id_a, id_b);
+            } // if
+
+            if (offset)
+            {
+                uint32 id_a = id_offset;
+                uint32 id_b = spv_getscalari(ctx, offset);
+                id_offset = spv_bumpid(ctx);
+                spv_emit(ctx, 5, SpvOpIAdd, id_int, id_offset, id_a, id_b);
+            } // if
+
+            uint32 id_pvec4 = is_constant
+                ? spv_get_type(ctx, STI_PTR_VEC4_P)
+                : spv_get_type(ctx, STI_PTR_VEC4_U);
+            uint32 id_pvalue = spv_bumpid(ctx);
+            spv_emit(ctx, 5, SpvOpAccessChain, id_pvec4, id_pvalue, id_array, id_offset);
+
+            result.tid = spv_get_type(ctx, STI_VEC4);
+            result.id = spv_bumpid(ctx);
+            spv_emit(ctx, 4, SpvOpLoad, result.tid, result.id, id_pvalue);
+        } // else
+    } // if
+    else
+        result = spv_loadreg(ctx, reg);
+
+    result = spv_swizzle(ctx, result, arg->swizzle, writemask);
+
+    switch (arg->src_mod)
+    {
+        case SRCMOD_NEGATE:
+        {
+            uint32 id_neg = spv_bumpid(ctx);
+            spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_neg, result.id);
+            result.id = id_neg;
+            break;
+        } // case
+
+        case SRCMOD_BIASNEGATE:
+        {
+            uint32 id_half = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_0_5, 0.5f);
+            uint32 id_tmp  = spv_bumpid(ctx);
+            uint32 id_new  = spv_bumpid(ctx);
+            spv_emit(ctx, 5, SpvOpFSub, result.tid, id_tmp, result.id, id_half);
+            spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_new, id_tmp);
+            result.id = id_new;
+            break;
+        } // case
+
+        case SRCMOD_BIAS:
+        {
+            uint32 id_half = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_0_5, 0.5f);
+            uint32 id_new  = spv_bumpid(ctx);
+            spv_emit(ctx, 5, SpvOpFSub, result.tid, id_new, result.id, id_half);
+            result.id = id_new;
+            break;
+        } // case
+
+        case SRCMOD_SIGNNEGATE:
+        {
+            uint32 id_half = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_0_5, 0.5f);
+            uint32 id_two  = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_2_0, 2.0f);
+            uint32 id_tmp0 = spv_bumpid(ctx);
+            uint32 id_tmp1 = spv_bumpid(ctx);
+            uint32 id_new  = spv_bumpid(ctx);
+            spv_emit(ctx, 5, SpvOpFSub, result.tid, id_tmp0, result.id, id_half);
+            spv_emit(ctx, 5, SpvOpFMul, result.tid, id_tmp1, id_tmp0, id_two);
+            spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_new, id_tmp1);
+            result.id = id_new;
+            break;
+        } // case
+
+        case SRCMOD_SIGN:
+        {
+            uint32 id_half = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_0_5, 0.5f);
+            uint32 id_two  = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_2_0, 2.0f);
+            uint32 id_tmp  = spv_bumpid(ctx);
+            uint32 id_new  = spv_bumpid(ctx);
+            spv_emit(ctx, 5, SpvOpFSub, result.tid, id_tmp, result.id, id_half);
+            spv_emit(ctx, 5, SpvOpFMul, result.tid, id_new, id_tmp, id_two);
+            result.id = id_new;
+            break;
+        } // case
+
+        case SRCMOD_COMPLEMENT:
+        {
+            uint32 id_one = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_1_0, 1.0f);
+            uint32 id_new = spv_bumpid(ctx);
+            spv_emit(ctx, 5, SpvOpFSub, result.tid, id_new, id_one, result.id);
+            result.id = id_new;
+            break;
+        } // case
+
+        case SRCMOD_X2NEGATE:
+        {
+            uint32 id_two = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_2_0, 2.0f);
+            uint32 id_tmp = spv_bumpid(ctx);
+            uint32 id_new = spv_bumpid(ctx);
+            spv_emit(ctx, 5, SpvOpFMul, result.tid, id_tmp, result.id, id_two);
+            spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_new, id_tmp);
+            result.id = id_new;
+            break;
+        } // case
+
+        case SRCMOD_X2:
+        {
+            uint32 id_two = spv_get_constant_composite(ctx, result.tid, ctx->spirv.id_2_0, 2.0f);
+            uint32 id_new = spv_bumpid(ctx);
+            spv_emit(ctx, 5, SpvOpFMul, result.tid, id_new, result.id, id_two);
+            result.id = id_new;
+            break;
+        } // case
+
+        // case SRCMOD_DZ:
+        //     fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME
+        //     postmod_str = "_dz";
+        //     break;
+
+        // case SRCMOD_DW:
+        //     fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME
+        //     postmod_str = "_dw";
+        //     break;
+
+        case SRCMOD_ABSNEGATE:
+        {
+            uint32 id_abs = spv_bumpid(ctx);
+            uint32 id_neg = spv_bumpid(ctx);
+            spv_emit(ctx, 5 + 1, SpvOpExtInst, result.tid, id_abs, spv_getext(ctx), GLSLstd450FAbs, result.id);
+            spv_emit(ctx, 4, SpvOpFNegate, result.tid, id_neg, id_abs);
+            result.id = id_neg;
+            break;
+        } // case
+
+        case SRCMOD_ABS:
+        {
+            uint32 id_abs = spv_bumpid(ctx);
+            spv_emit(ctx, 5 + 1, SpvOpExtInst, result.tid, id_abs, spv_getext(ctx), GLSLstd450FAbs, result.id);
+            result.id = id_abs;
+            break;
+        } // case
+
+        case SRCMOD_NOT:
+        {
+            uint32 id_not = spv_bumpid(ctx);
+            spv_emit(ctx, 4, SpvOpLogicalNot, result.tid, id_not, result.id);
+            result.id = id_not;
+            break;
+        } // case
+
+        case SRCMOD_NONE:
+        case SRCMOD_TOTAL:
+            break;  // stop compiler whining.
+
+        default:
+            failf(ctx, "unsupported source modifier %d", arg->src_mod);
+            return result;
+    } // switch
+
+    return result;
+} // spv_load_srcarg
+
+static inline SpirvResult spv_load_srcarg_full(Context *ctx, const size_t idx)
+{
+    return spv_load_srcarg(ctx, idx, 0xF);
+} // spv_load_srcarg_full
+
+static void spv_assign_destarg(Context *ctx, SpirvResult value)
+{
+    const DestArgInfo *arg = &ctx->dest_arg;
+    RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum);
+
+    spv_check_write_reg_id(ctx, reg);
+
+    if (arg->writemask == 0)
+    {
+        // Return without updating the reg->spirv.iddecl (all-zero writemask = no-op)
+        return;
+    } // if
+
+    if (arg->result_mod & MOD_SATURATE)
+    {
+        uint32 new_value = spv_bumpid(ctx);
+        push_output(ctx, &ctx->mainline);
+        spv_emit(ctx, 5 + 3, SpvOpExtInst,
+            value.tid, new_value, spv_getext(ctx), GLSLstd450FClamp,
+            value.id, spv_get_zero(ctx, value.tid), spv_get_one(ctx, value.tid)
+        );
+        pop_output(ctx);
+        value.id = new_value;
+    } // if
+
+    // MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
+
+    // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
+    assert((arg->result_mod & MOD_CENTROID) == 0);
+
+    if (ctx->predicated)
+    {
+        fail(ctx, "predicated destinations unsupported");  // !!! FIXME
+        return;
+    } // if
+
+    if (arg->result_shift)
+    {
+        float factor = 1.0f;
+        uint32* cache = ctx->spirv.id_1_0;
+        switch (arg->result_shift)
+        {
+            case 0x1: factor = 2.0f;   cache = ctx->spirv.id_2_0;   break;
+            case 0x2: factor = 4.0f;   cache = ctx->spirv.id_4_0;   break;
+            case 0x3: factor = 8.0f;   cache = ctx->spirv.id_8_0;   break;
+            case 0xD: factor = 0.125f; cache = ctx->spirv.id_0_125; break;
+            case 0xE: factor = 0.25f;  cache = ctx->spirv.id_0_25;  break;
+            case 0xF: factor = 0.5f;   cache = ctx->spirv.id_0_5;   break;
+            default:
+                failf(ctx, "unexpected result shift %d", arg->result_shift);
+        } // switch
+
+        uint32 id_factor = spv_get_constant_composite(ctx, value.tid, cache, factor);
+        push_output(ctx, &ctx->mainline);
+        uint32 id_new = spv_bumpid(ctx);
+        spv_emit(ctx, 5, SpvOpFMul, value.tid, id_new, value.id, id_factor);
+        pop_output(ctx);
+        value.id = id_new;
+    } // if
+
+    if (reg->regtype == REG_TYPE_DEPTHOUT
+     || isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum))
+    {
+        assert(arg->writemask == 0x1);
+        SpirvTypeIdx sti_reg = STI_FLOAT;
+        uint32 rtid = spv_get_type(ctx, sti_reg);
+        uint32 new_value = spv_bumpid(ctx);
+        push_output(ctx, &ctx->mainline);
+        spv_emit(ctx, 5, SpvOpCompositeExtract, rtid, new_value, value.id, 0);
+        pop_output(ctx);
+        value.tid = rtid;
+        value.id = new_value;
+    } // if
+    else if (!writemask_xyzw(arg->writemask))
+    {
+        SpirvTypeIdx sti_reg;
+        switch (reg->regtype)
+        {
+            case REG_TYPE_ADDRESS: sti_reg = STI_IVEC4; break;
+            case REG_TYPE_PREDICATE: sti_reg = STI_BVEC4; break;
+            default: sti_reg = STI_VEC4; break;
+        } // switch
+
+        uint32 rtid = spv_get_type(ctx, sti_reg);
+        uint32 new_value = spv_bumpid(ctx);
+        uint32 current_value = spv_bumpid(ctx);
+
+        push_output(ctx, &ctx->mainline);
+
+        spv_emit(ctx, 4, SpvOpLoad, rtid, current_value, reg->spirv.iddecl);
+
+        // output id is new_value
+        // select between current value and new value based on writemask
+        // in the shuffle, components [0, 3] are the new value, and components
+        // [4, 7] are the existing value
+        spv_emit_part(ctx, 5 + 4, 5, SpvOpVectorShuffle, rtid, new_value, value.id, current_value);
+        if (arg->writemask0) spv_emit_word(ctx, 0); else spv_emit_word(ctx, 4);
+        if (arg->writemask1) spv_emit_word(ctx, 1); else spv_emit_word(ctx, 5);
+        if (arg->writemask2) spv_emit_word(ctx, 2); else spv_emit_word(ctx, 6);
+        if (arg->writemask3) spv_emit_word(ctx, 3); else spv_emit_word(ctx, 7);
+
+        pop_output(ctx);
+
+        value.tid = rtid;
+        value.id = new_value;
+    } // if
+
+    switch (reg->regtype)
+    {
+        case REG_TYPE_OUTPUT:
+        case REG_TYPE_ADDRESS:
+        case REG_TYPE_TEMP:
+        case REG_TYPE_DEPTHOUT:
+        case REG_TYPE_COLOROUT:
+        case REG_TYPE_RASTOUT:
+        case REG_TYPE_ATTROUT:
+        case REG_TYPE_PREDICATE:
+            push_output(ctx, &ctx->mainline);
+            spv_emit(ctx, 3, SpvOpStore, reg->spirv.iddecl, value.id);
+            pop_output(ctx);
+            break;
+
+        default:
+        {
+            char varname[64];
+            get_SPIRV_varname_in_buf(ctx, reg->regtype, reg->regnum, varname, sizeof(varname));
+            failf(ctx, "register %s is unimplemented for storing", varname);
+            break;
+        } // default
+    } // switch
+} // spv_assign_destarg
+
+static void spv_emit_vs_main_end(Context* ctx)
+{
+#if defined(MOJOSHADER_DEPTH_CLIPPING) || defined(MOJOSHADER_FLIP_RENDERTARGET)
+    if (!shader_is_vertex(ctx))
+        return;
+
+    uint32 tid_void = spv_get_type(ctx, STI_VOID);
+    uint32 tid_func = spv_get_type(ctx, STI_FUNC_VOID);
+    uint32 id_func = ctx->spirv.id_vs_main_end;
+    uint32 id_label = spv_bumpid(ctx);
+    assert(id_func != 0);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, SpvOpFunction, tid_void, id_func, SpvFunctionControlMaskNone, tid_func);
+    spv_emit(ctx, 2, SpvOpLabel, id_label);
+
+    RegisterList *reg;
+    for (reg = ctx->used_registers.next; reg != NULL; reg = reg->next)
+    {
+        if (reg->usage == MOJOSHADER_USAGE_POSITION &&
+            (reg->regtype == REG_TYPE_RASTOUT || reg->regtype == REG_TYPE_OUTPUT))
+            break;
+    } // for
+    SpirvResult output = spv_loadreg(ctx, reg);
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+    uint32 id_new_output;
+
+#ifdef MOJOSHADER_FLIP_RENDERTARGET
+    // gl_Position.y = gl_Position.y * vpFlip;
+    uint32 tid_pvpflip = spv_bumpid(ctx);
+    uint32 id_old_y = spv_bumpid(ctx);
+    uint32 id_pvpflip = spv_bumpid(ctx);
+    uint32 id_vpflip = spv_bumpid(ctx);
+    uint32 id_new_y = spv_bumpid(ctx);
+    id_new_output = spv_bumpid(ctx);
+
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_old_y, output.id, 1);
+    spv_emit(ctx, 4, SpvOpLoad, tid_float, id_vpflip, id_pvpflip);
+    spv_emit(ctx, 5, SpvOpFMul, tid_float, id_new_y, id_old_y, id_vpflip);
+    spv_emit(ctx, 6, SpvOpCompositeInsert, output.tid, id_new_output, id_new_y, output.id, 1);
+    output.id = id_new_output;
+
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 4, SpvOpTypePointer, tid_pvpflip, SpvStorageClassUniformConstant, tid_float);
+    spv_emit(ctx, 4, SpvOpVariable, tid_pvpflip, id_pvpflip, SpvStorageClassUniformConstant);
+    pop_output(ctx);
+
+    spv_output_name(ctx, id_pvpflip, "vpFlip");
+    ctx->spirv.patch_table.vpflip.offset = spv_output_location(ctx, id_pvpflip, ~0u);
+#endif
+
+#ifdef MOJOSHADER_DEPTH_CLIPPING
+    // gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;
+    uint32 id_2 = spv_getscalarf(ctx, 2.0f);
+    uint32 id_old_z = spv_bumpid(ctx);
+    uint32 id_old_w = spv_bumpid(ctx);
+    uint32 id_2z = spv_bumpid(ctx);
+    uint32 id_new_z = spv_bumpid(ctx);
+    id_new_output = spv_bumpid(ctx);
+
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_old_z, output.id, 2);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_old_w, output.id, 3);
+    spv_emit(ctx, 5, SpvOpFMul, tid_float, id_2z, id_old_z, id_2);
+    spv_emit(ctx, 5, SpvOpFSub, tid_float, id_new_z, id_2z, id_old_w);
+    spv_emit(ctx, 6, SpvOpCompositeInsert, output.tid, id_new_output, id_new_z, output.id, 2);
+    output.id = id_new_output;
+#endif
+
+    spv_emit(ctx, 3, SpvOpStore, reg->spirv.iddecl, output.id);
+    spv_emit(ctx, 1, SpvOpReturn);
+    spv_emit(ctx, 1, SpvOpFunctionEnd);
+    pop_output(ctx);
+
+    spv_output_name(ctx, id_func, "vs_epilogue");
+#endif
+} // spv_emit_vs_main_end
+
+static void spv_emit_func_lit(Context *ctx)
+{
+    if (!ctx->spirv.id_func_lit)
+        return;
+
+    // vec4 LIT(const vec4 src)
+    // {
+    //     float retval_y, retval_z;
+    //     if (src.x > 0.0) {
+    //         retval_y = src.x;
+    //         if (src.y > 0.0) {
+    //             float power = clamp(src.w, -127.9961, 127.9961);
+    //             retval_z = pow(src.y, power);
+    //         } else {
+    //             retval_z = 0.0;
+    //         }
+    //     } else {
+    //         retval_y = 0.0;
+    //         retval_z = 0.0;
+    //     }
+    //     vec4 retval = vec4(1.0, retval_y, retval_z, 1.0);
+    //     return retval;
+    // }
+
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+    uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4);
+    uint32 tid_func = spv_get_type(ctx, STI_FUNC_LIT);
+    uint32 id_func = ctx->spirv.id_func_lit;
+    uint32 id_src = spv_bumpid(ctx);
+    uint32 id_block_start = spv_bumpid(ctx);
+    uint32 id_src_x = spv_bumpid(ctx);
+    uint32 id_src_x_pos = spv_bumpid(ctx);
+    uint32 id_0_0 = spv_get_zero(ctx, tid_float);
+    uint32 id_branch0_true = spv_bumpid(ctx);
+    uint32 id_src_y = spv_bumpid(ctx);
+    uint32 id_src_y_pos = spv_bumpid(ctx);
+    uint32 id_branch1_true = spv_bumpid(ctx);
+    uint32 id_src_w = spv_bumpid(ctx);
+    uint32 id_maxp = spv_getscalarf(ctx, 127.9961f);
+    uint32 id_maxp_neg = spv_getscalarf(ctx, -127.9961f);
+    uint32 id_power = spv_bumpid(ctx);
+    uint32 id_pow_result = spv_bumpid(ctx);
+    uint32 id_branch1_merge = spv_bumpid(ctx);
+    uint32 id_branch1_result = spv_bumpid(ctx);
+    uint32 id_branch0_merge = spv_bumpid(ctx);
+    uint32 id_result_y = spv_bumpid(ctx);
+    uint32 id_result_z = spv_bumpid(ctx);
+    uint32 id_1_0 = spv_get_one(ctx, tid_float);
+    uint32 id_result = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, SpvOpFunction, tid_vec4, id_func, SpvFunctionControlMaskNone, tid_func);
+    spv_emit(ctx, 3, SpvOpFunctionParameter, tid_vec4, id_src);
+
+    // id_block_start
+    spv_emit(ctx, 2, SpvOpLabel, id_block_start);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src_x, id_src, 0);
+    spv_emit(ctx, 5, SpvOpFOrdGreaterThan, tid_bool, id_src_x_pos, id_src_x, id_0_0);
+    spv_emit(ctx, 3, SpvOpSelectionMerge, id_branch0_merge, 0);
+    spv_emit(ctx, 4, SpvOpBranchConditional, id_src_x_pos, id_branch0_true, id_branch0_merge);
+
+    // id_branch0_true
+    spv_emit(ctx, 2, SpvOpLabel, id_branch0_true);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src_y, id_src, 1);
+    spv_emit(ctx, 5, SpvOpFOrdGreaterThan, tid_bool, id_src_y_pos, id_src_y, id_0_0);
+    spv_emit(ctx, 3, SpvOpSelectionMerge, id_branch1_merge, 0);
+    spv_emit(ctx, 4, SpvOpBranchConditional, id_src_y_pos, id_branch1_true, id_branch1_merge);
+
+    // id_branch1_true
+    spv_emit(ctx, 2, SpvOpLabel, id_branch1_true);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src_w, id_src, 3);
+    spv_emit(ctx, 5 + 3, SpvOpExtInst,
+        tid_float, id_power, spv_getext(ctx), GLSLstd450FClamp, id_src_w, id_maxp_neg, id_maxp
+    );
+    spv_emit(ctx, 5 + 2, SpvOpExtInst,
+        tid_float, id_pow_result, spv_getext(ctx), GLSLstd450Pow, id_src_y, id_power
+    );
+    spv_emit(ctx, 2, SpvOpBranch, id_branch1_merge);
+
+    // id_branch1_merge
+    spv_emit(ctx, 2, SpvOpLabel, id_branch1_merge);
+    spv_emit(ctx, 7, SpvOpPhi, tid_float, id_branch1_result,
+        id_pow_result, id_branch1_true,
+        id_0_0, id_branch0_true
+    );
+    spv_emit(ctx, 2, SpvOpBranch, id_branch0_merge);
+
+    // id_branch0_merge
+    spv_emit(ctx, 2, SpvOpLabel, id_branch0_merge);
+    spv_emit(ctx, 7, SpvOpPhi, tid_float, id_result_y,
+        id_src_x, id_branch1_merge,
+        id_0_0, id_block_start
+    );
+    spv_emit(ctx, 7, SpvOpPhi, tid_float, id_result_z,
+        id_branch1_result, id_branch1_merge,
+        id_0_0, id_block_start
+    );
+    spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, tid_vec4, id_result,
+        id_1_0, id_result_y, id_result_z, id_1_0
+    );
+    spv_emit(ctx, 2, SpvOpReturnValue, id_result);
+    spv_emit(ctx, 1, SpvOpFunctionEnd);
+
+    pop_output(ctx);
+
+    spv_output_name(ctx, ctx->spirv.id_func_lit, "LIT");
+} // spv_emit_func_lit
+
+static void spv_emit_func_end(Context *ctx)
+{
+    push_output(ctx, &ctx->mainline);
+
+#if defined(MOJOSHADER_DEPTH_CLIPPING) || defined(MOJOSHADER_FLIP_RENDERTARGET)
+    if (shader_is_vertex(ctx) && ctx->spirv.id_vs_main_end == 0)
+    {
+        ctx->spirv.id_vs_main_end = spv_bumpid(ctx);
+        uint32 tid_void = spv_get_type(ctx, STI_VOID);
+        uint32 id_res = spv_bumpid(ctx);
+
+        push_output(ctx, &ctx->mainline);
+        spv_emit(ctx, 4, SpvOpFunctionCall, tid_void, id_res, ctx->spirv.id_vs_main_end);
+        pop_output(ctx);
+    } // if
+#endif
+
+    spv_emit(ctx, 1, SpvOpReturn);
+    spv_emit(ctx, 1, SpvOpFunctionEnd);
+    pop_output(ctx);
+} // spv_emit_func_end
+
+static void spv_link_vs_attributes(Context *ctx, uint32 id, MOJOSHADER_usage usage, int index)
+{
+    // Some usages map to specific ranges. Keep those in sync with spv_link_ps_attributes().
+    switch (usage)
+    {
+        case MOJOSHADER_USAGE_POSITION:
+            assert(index == 0);
+            spv_output_builtin(ctx, id, SpvBuiltInPosition);
+            break;
+        case MOJOSHADER_USAGE_POINTSIZE:
+            spv_output_builtin(ctx, id, SpvBuiltInPointSize);
+            break;
+        case MOJOSHADER_USAGE_COLOR: // locations [0,1]
+            assert(index < 2);
+            spv_output_location(ctx, id, 0 + index);
+            break;
+        case MOJOSHADER_USAGE_TEXCOORD: // locations [2,11]
+            assert(index < 10);
+            spv_output_location(ctx, id, 2 + index);
+            break;
+        case MOJOSHADER_USAGE_NORMAL: // locations [12,21]
+            // FIXME: SM_3_0 allows basically any non-built-in semantic to use any index. We can
+            // either blow up the number of indices and use them sparsely, or patch them when linking
+            // vertex and pixel shader together.
+            assert(index < 10);
+            spv_output_location(ctx, id, 12 + index);
+            break;
+
+        case MOJOSHADER_USAGE_FOG: // location [12]
+            // FIXME: Missing PS handling.
+            spv_output_location(ctx, id, 12);
+            break;
+        case MOJOSHADER_USAGE_TANGENT: // location [13]
+            // FIXME: Missing PS handling.
+            assert(index == 0);
+            spv_output_location(ctx, id, 13 + index);
+            break;
+
+        default:
+            failf(ctx, "unexpected attribute usage %d in vertex shader", usage);
+            break;
+    } // switch
+} // spv_link_vs_attributes
+
+static void spv_link_ps_attributes(Context *ctx, uint32 id, RegisterType regtype, MOJOSHADER_usage usage, int index)
+{
+    switch (regtype)
+    {
+        case REG_TYPE_COLOROUT:
+            // nothing to do for color, OpenGL should hook it up automatically??
+            break;
+        case REG_TYPE_INPUT: // v# (MOJOSHADER_USAGE_COLOR aka `oC#` in vertex shader)
+            switch (usage)
+            {
+                case MOJOSHADER_USAGE_COLOR:
+                    assert(index < 2);
+                    spv_output_location(ctx, id, 0 + index);
+                    break;
+                case MOJOSHADER_USAGE_TEXCOORD:
+                {
+                    assert(index < 10);
+                    uint32 location_offset = spv_output_location(ctx, id, 2 + index);
+                    if (index == 0)
+                        ctx->spirv.patch_table.ps_texcoord0_offset = location_offset;
+                    break;
+                } // case
+                case MOJOSHADER_USAGE_NORMAL:
+                    assert(index < 10);
+                    spv_output_location(ctx, id, 12 + index);
+                    break;
+                default:
+                    failf(ctx, "unexpected attribute usage %d in pixel shader", usage);
+                    break;
+            } // switch
+            break;
+        case REG_TYPE_TEXTURE: // t# (MOJOSHADER_USAGE_TEXCOORD aka `oT#` in vertex shader)
+            assert(index < 10);
+            spv_output_location(ctx, id, 2 + index);
+            break;
+        case REG_TYPE_DEPTHOUT:
+            spv_output_builtin(ctx, id, SpvBuiltInFragDepth);
+            break;
+        case REG_TYPE_MISCTYPE:
+            // inputs
+            switch ((MiscTypeType)index)
+            {
+                case MISCTYPE_TYPE_POSITION: // vPos
+                {
+                    // In SM3.0 vPos only has x and y defined, but we should be fine to leave the z and w attributes in
+                    // that SpvBuiltInFragCoord gives
+
+                    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+                    uint32 tid_vec2 = spv_get_type(ctx, STI_VEC2);
+                    uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4);
+                    uint32 tid_pvec4i = spv_get_type(ctx, STI_PTR_VEC4_I);
+                    uint32 tid_pvec2u = spv_bumpid(ctx);
+                    uint32 tid_pvec4p = spv_get_type(ctx, STI_PTR_VEC4_P);
+
+                    uint32 id_var_fragcoord = spv_bumpid(ctx);
+                    uint32 id_var_vposflip = spv_bumpid(ctx);
+                    uint32 id_var_vpos = id;
+
+                    uint32 id_fragcoord = spv_bumpid(ctx);
+                    uint32 id_fragcoord_y = spv_bumpid(ctx);
+                    uint32 id_vposflip = spv_bumpid(ctx);
+                    uint32 id_vposflip_x = spv_bumpid(ctx);
+                    uint32 id_vposflip_y = spv_bumpid(ctx);
+                    uint32 id_tmp = spv_bumpid(ctx);
+                    uint32 id_vpos_y = spv_bumpid(ctx);
+                    uint32 id_vpos = spv_bumpid(ctx);
+
+                    // vec4 gl_FragCoord = <compiler magic builtin>;
+                    // uniform vec2 vposFlip;
+                    // vec4 ps_vPos = vec4(
+                    //     gl_FragCoord.x,
+                    //     (gl_FragCoord.y * vposFlip.x) + vposFlip.y,
+                    //     gl_FragCoord.z,
+                    //     gl_FragCoord.w
+                    // );
+
+                    push_output(ctx, &ctx->mainline_intro);
+                    // Define uniform vec2*. This is the only place that uses it right now.
+                    spv_emit(ctx, 4, SpvOpTypePointer, tid_pvec2u, SpvStorageClassUniformConstant, tid_vec2);
+                    // Define all variables involved.
+                    spv_emit(ctx, 4, SpvOpVariable, tid_pvec4i, id_var_fragcoord, SpvStorageClassInput);
+                    spv_emit(ctx, 4, SpvOpVariable, tid_pvec2u, id_var_vposflip, SpvStorageClassUniformConstant);
+                    spv_emit(ctx, 4, SpvOpVariable, tid_pvec4p, id_var_vpos, SpvStorageClassPrivate);
+                    pop_output(ctx);
+
+                    spv_output_builtin(ctx, id_var_fragcoord, SpvBuiltInFragCoord);
+                    spv_output_name(ctx, id_var_vposflip, "vposFlip");
+
+                    // Initialize vPos using vPosFlip and built in FragCoord.
+                    push_output(ctx, &ctx->mainline_top);
+                    spv_emit(ctx, 4, SpvOpLoad, tid_vec4, id_fragcoord, id_var_fragcoord);
+                    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_fragcoord_y, id_fragcoord, 1);
+                    spv_emit(ctx, 4, SpvOpLoad, tid_vec2, id_vposflip, id_var_vposflip);
+                    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_vposflip_x, id_vposflip, 0);
+                    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_vposflip_y, id_vposflip, 1);
+                    spv_emit(ctx, 5, SpvOpFMul, tid_float, id_tmp, id_fragcoord_y, id_vposflip_x);
+                    spv_emit(ctx, 5, SpvOpFAdd, tid_float, id_vpos_y, id_tmp, id_vposflip_y);
+                    spv_emit(ctx, 6, SpvOpCompositeInsert, tid_vec4, id_vpos, id_vpos_y, id_fragcoord, 1);
+                    spv_emit(ctx, 3, SpvOpStore, id_var_vpos, id_vpos);
+                    pop_output(ctx);
+
+                    ctx->spirv.id_var_fragcoord = id_var_fragcoord;
+                    ctx->spirv.id_var_vpos = id_var_vpos;
+                    ctx->spirv.patch_table.vpflip.offset = spv_output_location(ctx, id_var_vposflip, ~0u);
+                    break;
+                } // case
+
+                case MISCTYPE_TYPE_FACE: // vFace
+                {
+                    // The much more wordy equivalent of:
+                    // bool gl_FrontFacing = <compiler magic builtin>;
+                    // vec4 vFace;
+                    // vFace = vec4(gl_FrontFacing ? 1.0 : 0.0);
+
+                    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+                    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+                    uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4);
+                    uint32 tid_pbooli = spv_get_type(ctx, STI_PTR_BOOL_I);
+                    uint32 tid_pvec4p = spv_get_type(ctx, STI_PTR_VEC4_P);
+
+                    uint32 id_1_0 = spv_getscalarf(ctx, 1.0f);
+                    uint32 id_0_0 = spv_getscalarf(ctx, 0.0f);
+
+                    uint32 id_var_frontfacing = spv_bumpid(ctx);
+                    uint32 id_var_vface = id;
+
+                    uint32 id_frontfacing = spv_bumpid(ctx);
+                    uint32 id_tmp = spv_bumpid(ctx);
+                    uint32 id_vface = spv_bumpid(ctx);
+
+                    push_output(ctx, &ctx->mainline_intro);
+                    spv_emit(ctx, 4, SpvOpVariable, tid_pbooli, id_var_frontfacing, SpvStorageClassInput);
+                    spv_emit(ctx, 4, SpvOpVariable, tid_pvec4p, id_var_vface, SpvStorageClassPrivate);
+                    pop_output(ctx);
+
+                    spv_output_builtin(ctx, id_var_frontfacing, SpvBuiltInFrontFacing);
+
+                    push_output(ctx, &ctx->mainline_top);
+                    spv_emit(ctx, 4, SpvOpLoad, tid_bool, id_frontfacing, id_var_frontfacing);
+                    spv_emit(ctx, 6, SpvOpSelect, tid_float, id_tmp, id_frontfacing, id_1_0, id_0_0);
+                    spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, tid_vec4, id_vface, id_tmp, id_tmp, id_tmp, id_tmp);
+                    spv_emit(ctx, 3, SpvOpStore, id_var_vface, id_vface);
+                    pop_output(ctx);
+
+                    ctx->spirv.id_var_frontfacing = id_var_frontfacing;
+                    ctx->spirv.id_var_vface = id_var_vface;
+                    break;
+                } // case
+            } // switch
+            break;
+        default:
+            fail(ctx, "unknown pixel shader attribute register");
+    } // switch
+} // spv_link_ps_attributes
+
+static void spv_texbem(Context* ctx, int luminanceCorrection)
+{
+    DestArgInfo *info = &ctx->dest_arg;
+    uint32 sampler_idx = info->regnum;
+    RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, sampler_idx);
+    RegisterList *pSrc = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum);
+    RegisterList *pDst = spv_getreg(ctx, info->regtype, sampler_idx);
+
+    push_output(ctx, &ctx->mainline);
+
+    SpirvResult sampler = spv_loadreg(ctx, pSReg);
+    SpirvResult src0 = spv_loadreg(ctx, pSrc);
+    SpirvResult src1 = spv_loadreg(ctx, pDst);
+
+    // <dst> = texture(
+    //     <sampler>,
+    //     vec2(
+    //         (<sampler>_texbem.x * <src0>.x) + (<sampler>_texbem.z * <src0>.y) + <src1>.x,
+    //         (<sampler>_texbem.y * <src0>.x) + (<sampler>_texbem.w * <src0>.y) + <src1>.y
+    //     )
+    // );
+
+    // Load 2x2 transform matrix from uniform data (stored as vec4).
+    uint32 id_array = spv_get_uniform_array_id(ctx, REG_TYPE_CONST);
+    assert(sampler_idx < 4);
+    uint32 id_offset = ctx->spirv.sampler_extras[sampler_idx].idtexbem;
+    if (!id_offset)
+    {
+        id_offset = spv_bumpid(ctx);
+        ctx->spirv.sampler_extras[sampler_idx].idtexbem = id_offset;
+    } // if
+    uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4);
+    uint32 tid_pvec4 = spv_get_type(ctx, STI_PTR_VEC4_U);
+    uint32 id_pmatrix = spv_bumpid(ctx);
+    SpirvResult matrix;
+    matrix.tid = tid_vec4;
+    matrix.id = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpAccessChain, tid_pvec4, id_pmatrix, id_array, id_offset);
+    spv_emit(ctx, 4, SpvOpLoad, matrix.tid, matrix.id, id_pmatrix);
+
+    // transform src0 using matrix and translate result using src1
+    // ie. src0 * matrix + src1
+    SpirvResult matrix_xy = spv_swizzle(ctx, matrix, 0x4, 0x3);
+    SpirvResult matrix_zw = spv_swizzle(ctx, matrix, 0xE, 0x3);
+    SpirvResult src0_xx = spv_swizzle(ctx, src0, 0x0, 0x3);
+    SpirvResult src0_yy = spv_swizzle(ctx, src0, 0x5, 0x3);
+    SpirvResult src1_xy = spv_swizzle(ctx, src1, 0x4, 0x3);
+    uint32 tid_vec2 = src0_xx.tid;
+    uint32 id_a = spv_bumpid(ctx);
+    uint32 id_b = spv_bumpid(ctx);
+    uint32 id_c = spv_bumpid(ctx);
+    uint32 id_d = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpFMul, tid_vec2, id_a, matrix_xy.id, src0_xx.id);
+    spv_emit(ctx, 5, SpvOpFMul, tid_vec2, id_b, matrix_zw.id, src0_yy.id);
+    spv_emit(ctx, 5, SpvOpFAdd, tid_vec2, id_c, id_a, id_b);
+    spv_emit(ctx, 5, SpvOpFAdd, tid_vec2, id_d, id_c, src1_xy.id);
+
+    // sample texture
+    SpirvResult result;
+    result.tid = tid_vec4;
+    result.id = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_d);
+    if (luminanceCorrection)
+    {
+        uint32 id_l_offset = ctx->spirv.sampler_extras[sampler_idx].idtexbeml;
+        if (!id_l_offset)
+        {
+            id_l_offset = spv_bumpid(ctx);
+            ctx->spirv.sampler_extras[sampler_idx].idtexbeml = id_l_offset;
+        } // if
+
+        // <dst> = <dst> * ((<src0>.z * <sampler>_texbeml.x) + <sampler>_texbeml.y)
+        uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+
+        SpirvResult src0_z = spv_swizzle(ctx, src0, 0x2, 0x1);
+        uint32 id_l_ptr = spv_bumpid(ctx);
+
+        SpirvResult l;
+        l.tid = tid_vec4;
+        l.id = spv_bumpid(ctx);
+
+        spv_emit(ctx, 5, SpvOpAccessChain, tid_pvec4, id_l_ptr, id_array, id_l_offset);
+        spv_emit(ctx, 4, SpvOpLoad, l.tid, l.id, id_l_ptr);
+
+        SpirvResult l_x = spv_swizzle(ctx, l, 0x0, 0x1);
+        SpirvResult l_y = spv_swizzle(ctx, l, 0x1, 0x1);
+        assert(tid_float == l_x.tid);
+        assert(tid_float == l_y.tid);
+        assert(tid_float == src0_z.tid);
+
+        uint32 id_e = spv_bumpid(ctx);
+        uint32 id_f = spv_bumpid(ctx);
+        uint32 id_ffff = spv_bumpid(ctx);
+        uint32 id_new = spv_bumpid(ctx);
+        spv_emit(ctx, 5, SpvOpFMul, tid_float, id_e, src0_z.id, l_x.id);
+        spv_emit(ctx, 5, SpvOpFAdd, tid_float, id_f, id_e, l_y.id);
+        spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, tid_vec4, id_ffff,
+            id_f, id_f, id_f, id_f
+        );
+        spv_emit(ctx, 5, SpvOpFMul, tid_vec4, id_new, result.id, id_ffff);
+        result.id = id_new;
+    } // if
+
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+}
+
+void emit_SPIRV_start(Context *ctx, const char *profilestr)
+{
+    if (!(shader_is_vertex(ctx) || shader_is_pixel(ctx)))
+    {
+        failf(ctx, "Shader type %u unsupported in this profile.",
+              (uint) ctx->shader_type);
+        return;
+    } // if
+
+    if (strcmp(profilestr, MOJOSHADER_PROFILE_SPIRV) != 0)
+        failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
+
+    memset(&(ctx->spirv), '\0', sizeof(ctx->spirv));
+
+    ctx->spirv.idmain = spv_bumpid(ctx);
+
+    // calls spv_getvoid as well
+    uint32 tid_void = spv_get_type(ctx, STI_VOID);
+    uint32 tid_func = spv_get_type(ctx, STI_FUNC_VOID);
+
+    // slap the function declaration itself in mainline_top, so we can do type
+    // declaration in mainline_intro (= before this in the output)
+    push_output(ctx, &ctx->mainline_top);
+    spv_emit(ctx, 5, SpvOpFunction, tid_void, ctx->spirv.idmain, SpvFunctionControlMaskNone, tid_func);
+    spv_emit(ctx, 2, SpvOpLabel, spv_bumpid(ctx));
+    pop_output(ctx);
+
+    // also emit the name for the function
+    spv_output_name(ctx, ctx->spirv.idmain, ctx->mainfn);
+
+    set_output(ctx, &ctx->mainline);
+} // emit_SPIRV_start
+
+void emit_SPIRV_end(Context *ctx)
+{
+    if (ctx->previous_opcode != OPCODE_RET)
+        spv_emit_func_end(ctx);
+} // emit_SPIRV_end
+
+void emit_SPIRV_phase(Context *ctx)
+{
+    // no-op
+} // emit_SPIRV_phase
+
+void emit_SPIRV_global(Context *ctx, RegisterType regtype, int regnum)
+{
+    RegisterList *r = reglist_find(&ctx->used_registers, regtype, regnum);
+
+    SpvStorageClass sc = SpvStorageClassPrivate;
+    uint32 tid = 0;
+    switch (regtype)
+    {
+        case REG_TYPE_LABEL:
+            failf(ctx, "unimplemented regtype %d", regtype);
+            return;
+
+        case REG_TYPE_LOOP:
+            // Using SSA id to represent loop counters, instead of a variable.
+            return;
+
+        case REG_TYPE_PREDICATE:
+            tid = spv_get_type(ctx, STI_PTR_BVEC4_P);
+            break;
+
+        case REG_TYPE_ADDRESS:
+            if (shader_is_vertex(ctx))
+                tid = spv_get_type(ctx, STI_PTR_IVEC4_P);
+            else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE
+            {
+                if (!shader_version_atleast(ctx, 1, 4))
+                {
+                    // ps_1_1 texture/address registers work like temporaries. They are initialized
+                    // with tex coords and TEX instruction then reads tex coords from it and writes
+                    // sampling result back into it. Because Input storage class is read-only, we
+                    // create private variable that is initialized to value of input.
+
+                    uint32 tid_pvec4_i = spv_get_type(ctx, STI_PTR_VEC4_I);
+                    uint32 tid_pvec4_p = spv_get_type(ctx, STI_PTR_VEC4_P);
+                    uint32 tid_vec4 = spv_get_type(ctx, STI_VEC4);
+                    uint32 id_input_var = spv_bumpid(ctx);
+                    uint32 id_private_var = r->spirv.iddecl;
+                    uint32 id_tmp = spv_bumpid(ctx);
+
+                    // Create one Input and one Private variable. Input variable is linked to prev stage.
+                    push_output(ctx, &ctx->mainline_intro);
+                    spv_emit(ctx, 4, SpvOpVariable, tid_pvec4_i, id_input_var, SpvStorageClassInput);
+                    spv_emit(ctx, 4, SpvOpVariable, tid_pvec4_p, id_private_var, SpvStorageClassPrivate);
+                    pop_output(ctx);
+                    spv_link_ps_attributes(ctx, id_input_var, regtype, MOJOSHADER_USAGE_TEXCOORD, regnum);
+
+                    // Initialize Private variable with Input variable.
+                    push_output(ctx, &ctx->mainline_top);
+                    spv_emit(ctx, 4, SpvOpLoad, tid_vec4, id_tmp, id_input_var);
+                    spv_emit(ctx, 3, SpvOpStore, id_private_var, id_tmp);
+                    pop_output(ctx);
+
+                    // TEX instruction have already been emitted that work with Private variable.
+
+                    // Overwrite Private variable with Input variable, so emit_SPIRV_finalize outputs
+                    // OpEntryPoint with correct references to Input and Output variables.
+                    r->spirv.iddecl = id_input_var;
+                    return;
+                } // if
+                tid = spv_get_type(ctx, STI_PTR_VEC4_P);
+            } // else if
+            break;
+
+        case REG_TYPE_TEMP:
+            if (regnum == 0 && shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
+            {
+                // Value of r0 is at the end of shader execution is color output.
+                sc = SpvStorageClassOutput;
+                tid = spv_get_type(ctx, STI_PTR_VEC4_O);
+            }
+            else
+                tid = spv_get_type(ctx, STI_PTR_VEC4_P);
+            break;
+
+        default:
+            fail(ctx, "BUG: Unexpected regtype in emit_SPIRV_global");
+            return;
+    } // switch
+
+    // TODO: If the SSA id for this register is still 0 by this point, that
+    // means no instructions actually loaded from/stored to this variable...
+
+    if (r->spirv.iddecl == 0)
+        r->spirv.iddecl = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, sc);
+    pop_output(ctx);
+
+    spv_output_regname(ctx, r->spirv.iddecl, regtype, regnum);
+} // emit_SPIRV_global
+
+void emit_SPIRV_array(Context *ctx, VariableList *var)
+{
+    var->emit_position = ctx->uniform_float4_count;
+} // emit_SPIRV_array
+
+void emit_SPIRV_const_array(Context *ctx,
+                            const struct ConstantsList *clist,
+                            int base, int size)
+{
+    int i;
+
+    assert(ctx->spirv.constant_arrays.idvec4 != 0);
+
+    push_output(ctx, &ctx->mainline_intro);
+
+    // FIXME: This code potentially duplicates constants defined using DEF ops.
+    // FIXME: Multiple constant arrays probably won't work. Are those even possible?
+    // Maybe it would be better to do this in emit_SPIRV_finalize and use used_registers for it?
+    uint32 *constituents = (uint32 *)Malloc(ctx, size * sizeof(uint32));
+    uint32 tid_constituent = spv_get_type(ctx, STI_VEC4);
+    for (i = 0; i < size; i++)
+    {
+        while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
+            clist = clist->next;
+        assert(clist->constant.index == (base + i));
+
+        uint32 id_x = spv_getscalarf(ctx, clist->constant.value.f[0]);
+        uint32 id_y = spv_getscalarf(ctx, clist->constant.value.f[1]);
+        uint32 id_z = spv_getscalarf(ctx, clist->constant.value.f[2]);
+        uint32 id_w = spv_getscalarf(ctx, clist->constant.value.f[3]);
+
+        uint32 id = spv_bumpid(ctx);
+        spv_emit(ctx, 3 + 4, SpvOpConstantComposite, tid_constituent, id, id_x, id_y, id_z, id_w);
+        constituents[i] = id;
+
+        clist = clist->next;
+    } // for
+
+    uint32 id_array_len = spv_getscalari(ctx, size);
+
+    uint32 tid_array = spv_bumpid(ctx);
+    spv_emit(ctx, 4, SpvOpTypeArray, tid_array, tid_constituent, id_array_len);
+
+    uint32 id_array = spv_bumpid(ctx);
+    spv_emit_part(ctx, 3+size, 3, SpvOpConstantComposite, tid_array, id_array);
+    for (i = 0; i < size; i++)
+        spv_emit_word(ctx, constituents[i]);
+
+    uint32 tid_parray = spv_bumpid(ctx);
+    spv_emit(ctx, 4, SpvOpTypePointer, tid_parray, SpvStorageClassPrivate, tid_array);
+
+    uint32 id_array_var = ctx->spirv.constant_arrays.idvec4;
+    spv_emit(ctx, 5, SpvOpVariable, tid_parray, id_array_var, SpvStorageClassPrivate, id_array);
+
+    Free(ctx, constituents);
+    pop_output(ctx);
+} // emit_SPIRV_const_array
+
+void emit_SPIRV_uniform(Context *ctx, RegisterType regtype, int regnum,
+                        const VariableList *var)
+{
+    RegisterList *r = reglist_find(&ctx->uniforms, regtype, regnum);
+
+    // TODO: If the SSA id for this register is still 0 by this point, that means no instructions actually
+    // loaded from/stored to this variable...
+
+    if (r->spirv.iddecl == 0)
+        r->spirv.iddecl = spv_bumpid(ctx);
+
+    if (var == NULL)
+    {
+        uint32 tid = spv_get_type(ctx, STI_INT);
+        int offset = 0;
+        switch (regtype)
+        {
+            case REG_TYPE_CONST:
+                offset = ctx->uniform_float4_count;
+                break;
+
+            case REG_TYPE_CONSTINT:
+                offset = ctx->uniform_int4_count;
+                break;
+
+            case REG_TYPE_CONSTBOOL:
+                offset = ctx->uniform_bool_count;
+                break;
+
+            default:
+                fail(ctx, "BUG: used a uniform we don't know how to define.");
+                return;
+        } // switch
+
+        push_output(ctx, &ctx->mainline_intro);
+        spv_emit(ctx, 4, SpvOpConstant, tid, r->spirv.iddecl, offset);
+        pop_output(ctx);
+
+        char varname[64];
+        get_SPIRV_varname_in_buf(ctx, regtype, regnum, varname, sizeof(varname));
+        spv_output_name(ctx, r->spirv.iddecl, varname);
+    } // if
+    else
+    {
+        if (var->constant)
+            fail(ctx, "const array not implemented");
+        else
+        {
+            // Instructions needed to reference this constant before its value was known, so unique
+            // id had to be generated. Unfortunately, this prevents reusing already emitted
+            // constants.
+            assert(var->emit_position != -1);
+            push_output(ctx, &ctx->mainline_intro);
+            spv_emit(ctx, 4, SpvOpConstant, spv_get_type(ctx, STI_INT), r->spirv.iddecl, var->emit_position);
+            pop_output(ctx);
+
+            char varname[64];
+            get_SPIRV_varname_in_buf(ctx, regtype, regnum, varname, sizeof(varname));
+            spv_output_name(ctx, r->spirv.iddecl, varname);
+        } // else
+    } // else
+} // emit_SPIRV_uniform
+
+void emit_SPIRV_sampler(Context *ctx, int stage, TextureType ttype, int texbem)
+{
+    uint32 type = spv_ptrimage_from_texturetype(ctx, ttype);
+
+    RegisterList *sampler_reg;
+    // Pre ps_2_0 samplers were not dcl-ed, so we won't find them using spv_getreg().
+    if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
+        sampler_reg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
+    else
+        sampler_reg = spv_getreg(ctx, REG_TYPE_SAMPLER, stage);
+
+    uint32 result = sampler_reg->spirv.iddecl;
+
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 4, SpvOpVariable, type, result, SpvStorageClassUniformConstant);
+    if (texbem)  // This sampler used a ps_1_1 TEXBEM opcode?
+    {
+        uint32 tid_int = spv_get_type(ctx, STI_INT);
+        uint32 id_texbem = ctx->spirv.sampler_extras[stage].idtexbem;
+        uint32 id_texbeml = ctx->spirv.sampler_extras[stage].idtexbeml;
+        const int offset = ctx->uniform_float4_count;
+        ctx->uniform_float4_count += 2;
+        if (id_texbem)
+            spv_emit(ctx, 4, SpvOpConstant, tid_int, id_texbem, offset);
+        if (id_texbeml)
+            spv_emit(ctx, 4, SpvOpConstant, tid_int, id_texbeml, offset + 1);
+    } // if
+    pop_output(ctx);
+
+    // hnn: specify uniform location for SPIR-V shaders (required per gl_arb_spirv spec)
+    spv_output_set_binding(ctx, result, 0, sampler_reg->regnum);
+    uint32 location_offset = spv_output_location(ctx, result, ~0u);
+
+    assert(sampler_reg->regnum < STATICARRAYLEN(ctx->spirv.patch_table.samplers));
+    ctx->spirv.patch_table.samplers[sampler_reg->regnum].offset = location_offset;
+
+    spv_output_regname(ctx, result, REG_TYPE_SAMPLER, stage);
+} // emit_SPIRV_sampler
+
+void emit_SPIRV_attribute(Context *ctx, RegisterType regtype, int regnum,
+                          MOJOSHADER_usage usage, int index, int wmask,
+                          int flags)
+{
+    uint32 tid;
+    RegisterList *r = spv_getreg(ctx, regtype, regnum);
+
+    ctx->spirv.inoutcount += 1;
+
+    spv_output_regname(ctx, r->spirv.iddecl, regtype, regnum);
+
+    if (shader_is_vertex(ctx))
+    {
+        // pre-vs3 output registers.
+        // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
+        //  output registers.
+        if (!shader_version_atleast(ctx, 3, 0))
+        {
+            if (regtype == REG_TYPE_RASTOUT)
+            {
+                regtype = REG_TYPE_OUTPUT;
+                index = regnum;
+                switch ((const RastOutType) regnum)
+                {
+                    case RASTOUT_TYPE_POSITION:
+                        usage = MOJOSHADER_USAGE_POSITION;
+                        break;
+                    case RASTOUT_TYPE_FOG:
+                        usage = MOJOSHADER_USAGE_FOG;
+                        break;
+                    case RASTOUT_TYPE_POINT_SIZE:
+                        usage = MOJOSHADER_USAGE_POINTSIZE;
+                        break;
+                } // switch
+            } // if
+
+            else if (regtype == REG_TYPE_ATTROUT)
+            {
+                regtype = REG_TYPE_OUTPUT;
+                usage = MOJOSHADER_USAGE_COLOR;
+                index = regnum;
+            } // else if
+
+            else if (regtype == REG_TYPE_TEXCRDOUT)
+            {
+                regtype = REG_TYPE_OUTPUT;
+                usage = MOJOSHADER_USAGE_TEXCOORD;
+                index = regnum;
+            } // else if
+        } // if
+        assert(r->usage == MOJOSHADER_USAGE_UNKNOWN);
+        r->usage = usage;
+
+        switch (regtype)
+        {
+            case REG_TYPE_INPUT:
+            {
+                push_output(ctx, &ctx->mainline_intro);
+                SpirvTypeIdx sti = STI_PTR_VEC4_I;
+                tid = spv_get_type(ctx, sti);
+                spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassInput);
+                pop_output(ctx);
+
+                // hnn: generate location decorators for the input
+                spv_output_location(ctx, r->spirv.iddecl, regnum);
+                break;
+            }
+
+            case REG_TYPE_OUTPUT:
+            {
+                push_output(ctx, &ctx->mainline_intro);
+                SpirvTypeIdx sti = STI_PTR_VEC4_O;
+                if (usage == MOJOSHADER_USAGE_POINTSIZE)
+                {
+                    sti = STI_PTR_FLOAT_O;
+                    ctx->spirv.patch_table.vs_has_psize = 1;
+                } // if
+                else if (usage == MOJOSHADER_USAGE_FOG)
+                    sti = STI_PTR_FLOAT_O;
+
+                tid = spv_get_type(ctx, sti);
+                spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassOutput);
+                pop_output(ctx);
+
+                spv_link_vs_attributes(ctx, r->spirv.iddecl, usage, index);
+                break;
+            } // case
+
+            default:
+                fail(ctx, "unknown vertex shader attribute register");
+        } // switch
+    } // if
+
+    else if (shader_is_pixel(ctx))
+    {
+        // samplers DCLs get handled in emit_SPIRV_sampler().
+
+        if (flags & MOD_CENTROID)  // !!! FIXME
+        {
+            failf(ctx, "centroid unsupported in %s profile", ctx->profile->name);
+            return;
+        } // if
+
+        switch (regtype)
+        {
+            case REG_TYPE_COLOROUT:
+                spv_link_ps_attributes(ctx, r->spirv.iddecl, regtype, usage, index);
+                push_output(ctx, &ctx->mainline_intro);
+                tid = spv_get_type(ctx, STI_PTR_VEC4_O);
+                spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassOutput);
+                pop_output(ctx);
+                break;
+            case REG_TYPE_DEPTHOUT:
+                // maps to BuiltIn FragDepth
+                spv_link_ps_attributes(ctx, r->spirv.iddecl, regtype, usage, index);
+                push_output(ctx, &ctx->mainline_intro);
+                tid = spv_get_type(ctx, STI_PTR_FLOAT_O);
+                spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassOutput);
+                pop_output(ctx);
+                break;
+            case REG_TYPE_MISCTYPE:
+                assert((MiscTypeType)regnum == MISCTYPE_TYPE_FACE || (MiscTypeType)regnum == MISCTYPE_TYPE_POSITION);
+                // SpvBuiltInFrontFacing is a input bool, and for the DX bytecode
+                // we need to map it to a float that's either -1.0 or 1.0.
+                // SpvBuiltInFragCoord needs to be modified using vposFlip uniform
+                // to match vPos.
+                // Both of these take place in spv_link_ps_attributes() so don't
+                // create an input variable for it here.
+                spv_link_ps_attributes(ctx, r->spirv.iddecl, regtype, usage, regnum);
+                break;
+
+            case REG_TYPE_TEXTURE:
+            case REG_TYPE_INPUT:
+                // ps_1_1 is dealt with in emit_SPIRV_global().
+                if (usage != MOJOSHADER_USAGE_TEXCOORD || shader_version_atleast(ctx, 1, 4))
+                {
+                    spv_link_ps_attributes(ctx, r->spirv.iddecl, regtype, usage, index);
+                    push_output(ctx, &ctx->mainline_intro);
+                    tid = spv_get_type(ctx, STI_PTR_VEC4_I);
+                    spv_emit(ctx, 4, SpvOpVariable, tid, r->spirv.iddecl, SpvStorageClassInput);
+                    pop_output(ctx);
+                } // if
+                break;
+            default:
+                fail(ctx, "unknown pixel shader attribute register");
+        } // switch
+    } // else if
+
+    else
+        fail(ctx, "Unknown shader type");  // state machine should catch this.
+} // emit_SPIRV_attribute
+
+static void output_SPIRV_uniform_array(Context *ctx, const RegisterType regtype,
+                                       const int size)
+{
+    if (size <= 0)
+        return;
+
+    uint32 id_var, id_type_base;
+    uint32* dst_location_offset;
+    switch (regtype)
+    {
+        case REG_TYPE_CONST:
+            id_var = ctx->spirv.uniform_arrays.idvec4;
+            id_type_base = spv_get_type(ctx, STI_VEC4);
+            dst_location_offset = &ctx->spirv.patch_table.array_vec4.offset;
+            break;
+
+        case REG_TYPE_CONSTINT:
+            id_var = ctx->spirv.uniform_arrays.idivec4;
+            id_type_base = spv_get_type(ctx, STI_IVEC4);
+            dst_location_offset = &ctx->spirv.patch_table.array_ivec4.offset;
+            break;
+
+        case REG_TYPE_CONSTBOOL:
+            id_var = ctx->spirv.uniform_arrays.idbool;
+            id_type_base = spv_get_type(ctx, STI_INT);
+            dst_location_offset = &ctx->spirv.patch_table.array_bool.offset;
+            break;
+
+        default:
+            fail(ctx, "BUG: used a uniform we don't know how to define.");
+            return;
+    } // switch
+
+    if (id_var == 0)
+        return; // Never used, no need to declare.
+
+    uint32 id_size = spv_getscalari(ctx, size);
+    uint32 id_type = spv_bumpid(ctx);
+    uint32 id_type_ptr = spv_bumpid(ctx);
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 4, SpvOpTypeArray, id_type, id_type_base, id_size);
+    spv_emit(ctx, 4, SpvOpTypePointer, id_type_ptr, SpvStorageClassUniformConstant, id_type);
+    spv_emit(ctx, 4, SpvOpVariable, id_type_ptr, id_var, SpvStorageClassUniformConstant);
+    pop_output(ctx);
+
+    char buf[64];
+    spv_get_uniform_array_varname(ctx, regtype, buf, sizeof(buf));
+    spv_output_name(ctx, id_var, buf);
+
+    *dst_location_offset = spv_output_location(ctx, id_var, ~0u);
+} // output_SPIRV_uniform_array
+
+void emit_SPIRV_finalize(Context *ctx)
+{
+    size_t i, max;
+
+    /* The generator's magic number, this could be registered with Khronos
+     * if we wanted to. 0 is fine though, so use that for now. */
+    uint32 genmagic = 0x00000000;
+
+    /* Vertex shader main() function may need to do some position adjustments. However,
+    position may be written in subroutines, so we can't write position adjust code
+    at the end of main(), because output register might not be in ctx->used_registers
+    yet. Instead, we do adjust in a subroutine generated here and called at the
+    end of main(). */
+    spv_emit_vs_main_end(ctx);
+    spv_emit_func_lit(ctx);
+
+    output_SPIRV_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count);
+    output_SPIRV_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count);
+    output_SPIRV_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count);
+
+    push_output(ctx, &ctx->preflight);
+
+    spv_emit_word(ctx, SpvMagicNumber);
+    spv_emit_word(ctx, SpvVersion);
+    spv_emit_word(ctx, genmagic);
+    // "Bound: where all <id>s in this module are guaranteed to satisfy 0 < id < Bound"
+    // `idmax` holds the last id that was given out, so we need to emit `idmax + 1`
+    spv_emit_word(ctx, ctx->spirv.idmax + 1);
+    spv_emit_word(ctx, 0);
+
+    spv_emit(ctx, 2, SpvOpCapability, SpvCapabilityShader);
+
+    // only non-zero when actually needed
+    if (ctx->spirv.idext)
+    {
+        const char *extstr = "GLSL.std.450";
+        spv_emit_part(ctx, 2 + spv_strlen(extstr), 2, SpvOpExtInstImport, ctx->spirv.idext);
+        spv_emit_str(ctx, extstr);
+    } // if
+
+    spv_emit(ctx, 3, SpvOpMemoryModel, SpvAddressingModelLogical, SpvMemoryModelSimple);
+
+    assert(shader_is_vertex(ctx) || shader_is_pixel(ctx));
+    SpvExecutionModel model = SpvExecutionModelVertex;
+    if (shader_is_pixel(ctx))
+        model = SpvExecutionModelFragment;
+
+    /* 3 is for opcode + exec. model + idmain */
+    uint32 inoutcount = ctx->spirv.inoutcount;
+
+    if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
+        inoutcount += 1;
+
+    spv_emit_part(ctx, 3 + spv_strlen(ctx->mainfn) + inoutcount, 3, SpvOpEntryPoint,
+        model, ctx->spirv.idmain
+    );
+    spv_emit_str(ctx, ctx->mainfn);
+
+    RegisterList *p = &ctx->attributes, *r = NULL;
+    // !!! FIXME: The first element of the list is always empty and I don't know why!
+    p = p->next;
+    while (p)
+    {
+        r = spv_getreg(ctx, p->regtype, p->regnum);
+        if (r)
+        {
+            if (r->spirv.iddecl == ctx->spirv.id_var_vpos)
+                spv_emit_word(ctx, ctx->spirv.id_var_fragcoord);
+            else if (r->spirv.iddecl == ctx->spirv.id_var_vface)
+                spv_emit_word(ctx, ctx->spirv.id_var_frontfacing);
+            else
+                spv_emit_word(ctx, r->spirv.iddecl);
+        } // if
+        else
+        {
+            char varname[64];
+            get_SPIRV_varname_in_buf(ctx, p->regtype, p->regnum, varname, sizeof (varname));
+            failf(
+                ctx,
+                "missing attribute register %s (rt=%u, rn=%u, u=%u)",
+                varname, p->regtype, p->regnum, p->usage
+            );
+        } // else
+        p = p->next;
+    } // while
+
+    // only applies to pixel shaders
+    if (shader_is_pixel(ctx))
+    {
+        if (!shader_version_atleast(ctx, 2, 0))
+        {
+            // r0 is used as color output.
+            r = spv_getreg(ctx, REG_TYPE_TEMP, 0);
+            spv_emit_word(ctx, r->spirv.iddecl);
+        } // if
+
+        spv_emit(ctx, 3, SpvOpExecutionMode, ctx->spirv.idmain, SpvExecutionModeOriginUpperLeft);
+    } // if
+
+    pop_output(ctx);
+
+    // Generate final patch table.
+
+    uint32 base_offset = 0;
+    if (ctx->preflight) base_offset += buffer_size(ctx->preflight);
+    if (ctx->globals)   base_offset += buffer_size(ctx->globals);
+    if (ctx->inputs)    base_offset += buffer_size(ctx->inputs);
+    if (ctx->outputs)   base_offset += buffer_size(ctx->outputs);
+    base_offset >>= 2;
+
+    int32 location_count = 0;
+    SpirvPatchTable* table = &ctx->spirv.patch_table;
+    if (table->vpflip.offset)
+    {
+        table->vpflip.offset += base_offset;
+        table->vpflip.location = location_count;
+        location_count += 1;
+    } // if
+    else
+        table->vpflip.location = -1;
+
+    if (table->array_vec4.offset)
+    {
+        table->array_vec4.offset += base_offset;
+        table->array_vec4.location = location_count;
+        location_count += ctx->uniform_float4_count;
+    } // if
+    else
+        table->array_vec4.location = -1;
+
+    if (table->array_ivec4.offset)
+    {
+        table->array_ivec4.offset += base_offset;
+        table->array_ivec4.location = location_count;
+        location_count += ctx->uniform_int4_count;
+    } // if
+    else
+        table->array_ivec4.location = -1;
+
+    if (table->array_bool.offset)
+    {
+        table->array_bool.offset += base_offset;
+        table->array_bool.location = location_count;
+        location_count += ctx->uniform_bool_count;
+    } // if
+    else
+        table->array_bool.location = -1;
+
+    for (i = 0, max = STATICARRAYLEN(table->samplers); i < max; i++)
+    {
+        SpirvPatchEntry* entry = &table->samplers[i];
+        if (entry->offset)
+        {
+            entry->offset += base_offset;
+            entry->location = location_count;
+            location_count++;
+        } // if
+        else
+            entry->location = -1;
+    } // for
+
+    if (shader_is_pixel(ctx) && table->ps_texcoord0_offset)
+        table->ps_texcoord0_offset += base_offset;
+
+    table->location_count = location_count;
+
+    push_output(ctx, &ctx->postflight);
+    buffer_append(ctx->output, &ctx->spirv.patch_table, sizeof(ctx->spirv.patch_table));
+    pop_output(ctx);
+
+    spv_componentlist_free(ctx, ctx->spirv.cl.f.next);
+    spv_componentlist_free(ctx, ctx->spirv.cl.i.next);
+    spv_componentlist_free(ctx, ctx->spirv.cl.u.next);
+} // emit_SPIRV_finalize
+
+void emit_SPIRV_NOP(Context *ctx)
+{
+    // no-op is a no-op.  :)
+    // TODO: (hnn) SPIR-V has OpNop :O
+} // emit_SPIRV_NOP
+
+void emit_SPIRV_DEF(Context *ctx)
+{
+    RegisterList *rl;
+    uint32 val0, val1, val2, val3, idv4;
+    const float *raw = (const float *) ctx->dwords;
+
+    rl = spv_getreg(ctx, ctx->dest_arg.regtype, ctx->dest_arg.regnum);
+    rl->spirv.iddecl = spv_bumpid(ctx);
+    rl->spirv.is_ssa = 1;
+
+    val0 = spv_getscalarf(ctx, raw[0]);
+    val1 = spv_getscalarf(ctx, raw[1]);
+    val2 = spv_getscalarf(ctx, raw[2]);
+    val3 = spv_getscalarf(ctx, raw[3]);
+
+    idv4 = spv_get_type(ctx, STI_VEC4);
+
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 3 + 4, SpvOpConstantComposite, idv4, rl->spirv.iddecl, val0, val1, val2, val3);
+    pop_output(ctx);
+} // emit_SPIRV_DEF
+
+void emit_SPIRV_DEFI(Context *ctx)
+{
+    RegisterList *rl;
+    uint32 val0, val1, val2, val3, idiv4;
+    const int *raw = (const int *) ctx->dwords;
+
+    rl = spv_getreg(ctx, ctx->dest_arg.regtype, ctx->dest_arg.regnum);
+    rl->spirv.iddecl = spv_bumpid(ctx);
+    rl->spirv.is_ssa = 1;
+
+    val0 = spv_getscalari(ctx, raw[0]);
+    val1 = spv_getscalari(ctx, raw[1]);
+    val2 = spv_getscalari(ctx, raw[2]);
+    val3 = spv_getscalari(ctx, raw[3]);
+
+    idiv4 = spv_get_type(ctx, STI_IVEC4);
+
+    push_output(ctx, &ctx->mainline_intro);
+    spv_emit(ctx, 3 + 4, SpvOpConstantComposite, idiv4, rl->spirv.iddecl, val0, val1, val2, val3);
+    pop_output(ctx);
+} // emit_SPIRV_DEFI
+
+void emit_SPIRV_DEFB(Context *ctx)
+{
+    RegisterList *rl = spv_getreg(ctx, ctx->dest_arg.regtype, ctx->dest_arg.regnum);
+    rl->spirv.iddecl = ctx->dwords[0] ? spv_gettrue(ctx) : spv_getfalse(ctx);
+    rl->spirv.is_ssa = 1;
+} // emit_SPIRV_DEFB
+
+void emit_SPIRV_DCL(Context *ctx)
+{
+    // state_DCL handles checking if the registers are valid for this
+    // instruction, and collecting samplers and attribs
+    RegisterList *reg = spv_getreg(ctx, ctx->dest_arg.regtype, ctx->dest_arg.regnum);
+
+    // This id will be assigned to in emit_SPIRV_attribute, but
+    // emit_SPIRV_attribute is called after instructions are emitted,
+    // so we generate the id here so it can be used in instructions
+    reg->spirv.iddecl = spv_bumpid(ctx);
+} // emit_SPIRV_DCL
+
+static void emit_SPIRV_dotproduct(Context *ctx, SpirvResult src0, SpirvResult src1)
+{
+    SpirvResult result;
+
+    assert(src0.tid == src1.tid);
+
+    result.tid = spv_get_type(ctx, STI_FLOAT);
+    result.id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, SpvOpDot, result.tid, result.id, src0.id, src1.id);
+
+    // Broadcast scalar result across all channels of a vec4
+    result.tid = spv_get_type(ctx, STI_VEC4);
+    result.id = spv_vectorbroadcast(ctx, result.tid, result.id);
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_dotproduct
+
+void emit_SPIRV_DP4(Context *ctx)
+{
+    SpirvResult src0 = spv_load_srcarg_full(ctx, 0);
+    SpirvResult src1 = spv_load_srcarg_full(ctx, 1);
+
+    emit_SPIRV_dotproduct(ctx, src0, src1);
+} // emit_SPIRV_DP4
+
+void emit_SPIRV_DP3(Context *ctx)
+{
+    SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x7);
+    SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x7);
+
+    emit_SPIRV_dotproduct(ctx, src0, src1);
+} // emit_SPIRV_DP3
+
+static void spv_emit_begin_ds(Context *ctx, SpirvResult* dst, SpirvResult* src)
+{
+    *src = spv_load_srcarg_full(ctx, 0);
+    dst->tid = spv_get_type(ctx, STI_VEC4);
+    dst->id = spv_bumpid(ctx);
+    push_output(ctx, &ctx->mainline);
+} // spv_emit_begin_ds
+
+static void spv_emit_begin_dss(Context *ctx, SpirvResult* dst, SpirvResult* src0, SpirvResult* src1)
+{
+    *src0 = spv_load_srcarg_full(ctx, 0);
+    *src1 = spv_load_srcarg_full(ctx, 1);
+    dst->tid = spv_get_type(ctx, STI_VEC4);
+    dst->id = spv_bumpid(ctx);
+    push_output(ctx, &ctx->mainline);
+} // spv_emit_begin_dss
+
+static void spv_emit_begin_dsss(Context *ctx, SpirvResult* dst,
+                                SpirvResult* src0, SpirvResult* src1, SpirvResult* src2)
+{
+    *src0 = spv_load_srcarg_full(ctx, 0);
+    *src1 = spv_load_srcarg_full(ctx, 1);
+    *src2 = spv_load_srcarg_full(ctx, 2);
+    dst->tid = spv_get_type(ctx, STI_VEC4);
+    dst->id = spv_bumpid(ctx);
+    push_output(ctx, &ctx->mainline);
+} // spv_emit_begin_dsss
+
+static void spv_emit_end(Context *ctx, SpirvResult dst)
+{
+    pop_output(ctx);
+    spv_assign_destarg(ctx, dst);
+} // spv_emit_end
+
+static SpirvTexm3x3SetupResult spv_texm3x3_setup(Context *ctx)
+{
+    SpirvTexm3x3SetupResult result;
+
+    DestArgInfo *pDstInfo = &ctx->dest_arg;
+
+    RegisterList *pSrc0 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0);
+    RegisterList *pSrc1 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0);
+    RegisterList *pSrc2 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1);
+    RegisterList *pSrc3 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1);
+    RegisterList *pSrc4 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum);
+    RegisterList *pDst  = spv_getreg(ctx, pDstInfo->regtype, pDstInfo->regnum);
+
+    SpirvResult src0 = spv_loadreg(ctx, pSrc0);
+    SpirvResult src1 = spv_loadreg(ctx, pSrc1);
+    SpirvResult src2 = spv_loadreg(ctx, pSrc2);
+    SpirvResult src3 = spv_loadreg(ctx, pSrc3);
+    SpirvResult src4 = spv_loadreg(ctx, pSrc4);
+    SpirvResult dst  = spv_loadreg(ctx, pDst);
+
+    result.id_dst_pad0 = src0.id;
+    result.id_dst_pad1 = src2.id;
+    result.id_dst      = dst.id;
+
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+    uint32 tid_vec3  = spv_get_type(ctx, STI_VEC3);
+
+    uint32 id_src0_xyz = spv_bumpid(ctx);
+    uint32 id_src1_xyz = spv_bumpid(ctx);
+    uint32 id_src2_xyz = spv_bumpid(ctx);
+    uint32 id_src3_xyz = spv_bumpid(ctx);
+    uint32 id_src4_xyz = spv_bumpid(ctx);
+    uint32 id_dst_xyz  = spv_bumpid(ctx);
+    uint32 id_res_x    = spv_bumpid(ctx);
+    uint32 id_res_y    = spv_bumpid(ctx);
+    uint32 id_res_z    = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src0_xyz, src0.id, src0.id, 0, 1, 2);
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src1_xyz, src1.id, src1.id, 0, 1, 2);
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src2_xyz, src2.id, src2.id, 0, 1, 2);
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src3_xyz, src3.id, src3.id, 0, 1, 2);
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src4_xyz, src4.id, src4.id, 0, 1, 2);
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_dst_xyz,  dst.id,  dst.id,  0, 1, 2);
+
+    spv_emit(ctx, 5, SpvOpDot, tid_float, id_res_x, id_src0_xyz, id_src1_xyz);
+    spv_emit(ctx, 5, SpvOpDot, tid_float, id_res_y, id_src2_xyz, id_src3_xyz);
+    spv_emit(ctx, 5, SpvOpDot, tid_float, id_res_z, id_dst_xyz,  id_src4_xyz);
+
+    pop_output(ctx);
+
+    result.id_res_x = id_res_x;
+    result.id_res_y = id_res_y;
+    result.id_res_z = id_res_z;
+
+    return result;
+} // spv_texm3x3_setup
+
+static uint32 spv_reflect(Context *ctx, uint32 id_normal, uint32 id_eyeray)
+{
+    // reflect(E : vec3 = eyeray, N : vec3 = normal) -> vec3
+    // 2 * [(N*E) / (N*N)] * N - E
+
+    uint32 tid_vec3     = spv_get_type(ctx, STI_VEC3);
+    uint32 id_2         = spv_getscalarf(ctx, 2.0f);
+    uint32 id_2_v3      = spv_bumpid(ctx);
+    uint32 id_refl_0    = spv_bumpid(ctx);
+    uint32 id_refl_1    = spv_bumpid(ctx);
+    uint32 id_refl_2    = spv_bumpid(ctx);
+    uint32 id_refl_3    = spv_bumpid(ctx);
+    uint32 id_refl_4    = spv_bumpid(ctx);
+    uint32 id_reflected = spv_bumpid(ctx);
+
+    spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_2_v3, id_2, id_2, id_2);
+    spv_emit(ctx, 5, SpvOpFMul, tid_vec3, id_refl_0, id_normal, id_eyeray);
+    spv_emit(ctx, 5, SpvOpFMul, tid_vec3, id_refl_1, id_normal, id_normal);
+    spv_emit(ctx, 5, SpvOpFDiv, tid_vec3, id_refl_2, id_refl_0, id_refl_1);
+    spv_emit(ctx, 5, SpvOpFMul, tid_vec3, id_refl_3, id_refl_2, id_normal);
+    spv_emit(ctx, 5, SpvOpFMul, tid_vec3, id_refl_4, id_refl_3, id_2_v3);
+    spv_emit(ctx, 5, SpvOpFSub, tid_vec3, id_reflected, id_refl_4, id_eyeray);
+
+    return id_reflected;
+} // spv_reflect
+
+void emit_SPIRV_ADD(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+    spv_emit(ctx, 5, SpvOpFAdd, dst.tid, dst.id, src0.id, src1.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_ADD
+
+void emit_SPIRV_SUB(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+    spv_emit(ctx, 5, SpvOpFSub, dst.tid, dst.id, src0.id, src1.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_SUB
+
+void emit_SPIRV_MUL(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+    spv_emit(ctx, 5, SpvOpFMul, dst.tid, dst.id, src0.id, src1.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_MUL
+
+void emit_SPIRV_SLT(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+
+    // https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050(v=vs.85).aspx
+    // "The comparisons EQ, GT, GE, LT, and LE, when either or both operands is NaN returns FALSE"
+    uint32 bool_result = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpFOrdLessThan, spv_get_type(ctx, STI_BVEC4), bool_result, src0.id, src1.id);
+
+    uint32 ones  = spv_getvec4_one(ctx);
+    uint32 zeros = spv_getvec4_zero(ctx);
+    spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, bool_result, ones, zeros);
+
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_SLT
+
+void emit_SPIRV_SGE(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+
+    // https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050(v=vs.85).aspx
+    // "The comparisons EQ, GT, GE, LT, and LE, when either or both operands is NaN returns FALSE"
+    uint32 bool_result = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpFOrdGreaterThanEqual, spv_get_type(ctx, STI_BVEC4), bool_result, src0.id, src1.id);
+
+    uint32 ones  = spv_getvec4_one(ctx);
+    uint32 zeros = spv_getvec4_zero(ctx);
+
+    spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, bool_result, ones, zeros);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_SGE
+
+void emit_SPIRV_MIN(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+    spv_emit(ctx, 5 + 2, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450FMin, src0.id, src1.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_MIN
+
+void emit_SPIRV_MAX(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+    spv_emit(ctx, 5 + 2, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450FMax, src0.id, src1.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_MAX
+
+void emit_SPIRV_POW(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+    uint32 id_abs = spv_bumpid(ctx);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, src0.tid, id_abs, spv_getext(ctx), GLSLstd450FAbs, src0.id);
+    spv_emit(ctx, 5 + 2, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450Pow, id_abs, src1.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_POW
+
+static uint32 spv_extract_vec3(Context *ctx, uint32 input)
+{
+    uint32 vec3 = spv_get_type(ctx, STI_VEC3);
+    uint32 result = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, vec3, result, input, input, 0, 1, 2);
+    pop_output(ctx);
+
+    return result;
+} // spv_extract_vec3
+
+void emit_SPIRV_CRS(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+
+    uint32 vec3 = spv_get_type(ctx, STI_VEC3);
+    uint32 src0_vec3 = spv_extract_vec3(ctx, src0.id);
+    uint32 src1_vec3 = spv_extract_vec3(ctx, src1.id);
+    uint32 result_vec3 = spv_bumpid(ctx);
+
+    spv_emit(ctx, 5 + 2, SpvOpExtInst, vec3, result_vec3, spv_getext(ctx),
+             GLSLstd450Cross, src0_vec3, src1_vec3);
+
+    // According to DirectX docs, CRS doesn't allow `w` in its writemask, so we
+    // can make this component anything and the code generated by
+    // `spv_assign_destarg()` will just throw it away.
+    spv_emit(ctx, 5 + 4, SpvOpVectorShuffle, dst.tid, dst.id,
+             result_vec3, result_vec3, 0, 1, 2, 0xFFFFFFFF);
+
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_CRS
+
+void emit_SPIRV_MAD(Context *ctx)
+{
+    SpirvResult src0 = spv_load_srcarg_full(ctx, 0);
+    SpirvResult src1 = spv_load_srcarg_full(ctx, 1);
+    SpirvResult src2 = spv_load_srcarg_full(ctx, 2);
+    assert(src0.tid == src1.tid);
+    assert(src0.tid == src2.tid);
+    uint32 mul_result = spv_bumpid(ctx);
+    SpirvResult result;
+    result.tid = src0.tid;
+    result.id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, SpvOpFMul, src0.tid, mul_result, src0.id, src1.id);
+    spv_emit(ctx, 5, SpvOpFAdd, src0.tid, result.id, mul_result, src2.id);
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_MAD
+
+void emit_SPIRV_TEXKILL(Context *ctx)
+{
+    const DestArgInfo *pDstInfo = &ctx->dest_arg;
+    RegisterList *pDst  = spv_getreg(ctx, pDstInfo->regtype, pDstInfo->regnum);
+    SpirvResult dst = spv_loadreg(ctx, pDst);
+
+    uint32 vec3 = spv_get_type(ctx, STI_VEC3);
+    uint32 bvec3 = spv_get_type(ctx, STI_BVEC3);
+
+    uint32 zeros = spv_get_zero(ctx, vec3);
+
+    push_output(ctx, &ctx->mainline);
+    uint32 res_swiz = spv_emit_swizzle(ctx, dst.id, vec3, (0 << 0) | (1 << 2) | (2 << 4), 0x7);
+    uint32 res_lt = spv_bumpid(ctx);
+    uint32 res_any = spv_bumpid(ctx);
+    uint32 label_true = spv_bumpid(ctx);
+    uint32 label_merge = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpFOrdLessThan, bvec3, res_lt, res_swiz, zeros);
+    spv_emit(ctx, 4, SpvOpAny, spv_get_type(ctx, STI_BOOL), res_any, res_lt);
+    spv_emit(ctx, 3, SpvOpSelectionMerge, label_merge, 0);
+    spv_emit(ctx, 4, SpvOpBranchConditional, res_any, label_true, label_merge);
+    spv_emit(ctx, 2, SpvOpLabel, label_true);
+    spv_emit(ctx, 1, SpvOpKill);
+    spv_emit(ctx, 2, SpvOpLabel, label_merge);
+    pop_output(ctx);
+} // emit_SPIRV_TEXKILL
+
+void emit_SPIRV_DP2ADD(Context *ctx)
+{
+    SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x3);
+    SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x3);
+    SpirvResult src2 = spv_load_srcarg(ctx, 2, 0x1);
+
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+    uint32 id_dot = spv_bumpid(ctx);
+    uint32 id_add = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, SpvOpDot, tid_float, id_dot, src0.id, src1.id);
+    spv_emit(ctx, 5, SpvOpFAdd, tid_float, id_add, id_dot, src2.id);
+    SpirvResult result;
+    result.tid = spv_get_type(ctx, STI_VEC4);
+    result.id = spv_vectorbroadcast(ctx, result.tid, id_add);
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_DP2ADD
+
+void emit_SPIRV_MOV(Context *ctx)
+{
+    SpirvResult src0 = spv_load_srcarg_full(ctx, 0);
+    spv_assign_destarg(ctx, src0);
+} // emit_SPIRV_MOV
+
+void emit_SPIRV_RCP(Context *ctx)
+{
+    /*
+    if (src != 0.0f)
+        dst = 1.0f / src;
+    else
+        dst = FLT_MAX;
+    */
+
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+
+    SpirvTypeIdx sti_bvec =
+       (src.tid == ctx->spirv.tid[STI_VEC4]) ? STI_BVEC4 :
+       (src.tid == ctx->spirv.tid[STI_VEC3]) ? STI_BVEC3 :
+       (src.tid == ctx->spirv.tid[STI_VEC2]) ? STI_BVEC2 : STI_BOOL;
+
+    uint32 tid_bvec = spv_get_type(ctx, sti_bvec);
+    uint32 id_one = spv_get_one(ctx, src.tid);
+    uint32 id_zero = spv_get_zero(ctx, src.tid);
+    uint32 id_flt_max = spv_get_flt_max(ctx, src.tid);
+    uint32 id_mask = spv_bumpid(ctx);
+    uint32 id_div = spv_bumpid(ctx);
+
+    spv_emit(ctx, 5, SpvOpFOrdNotEqual, tid_bvec, id_mask, src.id, id_zero);
+    spv_emit(ctx, 5, SpvOpFDiv, dst.tid, id_div, id_one, src.id);
+    spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, id_mask, id_div, id_flt_max);
+
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_RCP
+
+void emit_SPIRV_RSQ(Context *ctx)
+{
+    /*
+    if (src != 0.0f)
+        dst = 1.0f / abs(src);
+    else
+        dst = FLT_MAX;
+    */
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+
+    SpirvTypeIdx sti_bvec =
+       (src.tid == ctx->spirv.tid[STI_VEC4]) ? STI_BVEC4 :
+       (src.tid == ctx->spirv.tid[STI_VEC3]) ? STI_BVEC3 :
+       (src.tid == ctx->spirv.tid[STI_VEC2]) ? STI_BVEC2 : STI_BOOL;
+
+    uint32 tid_bvec = spv_get_type(ctx, sti_bvec);
+    uint32 id_zero = spv_get_zero(ctx, src.tid);
+    uint32 id_flt_max = spv_get_flt_max(ctx, src.tid);
+    uint32 id_mask = spv_bumpid(ctx);
+    uint32 id_abs = spv_bumpid(ctx);
+    uint32 id_rsq = spv_bumpid(ctx);
+
+    spv_emit(ctx, 5, SpvOpFOrdNotEqual, tid_bvec, id_mask, src.id, id_zero);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, id_abs, spv_getext(ctx), GLSLstd450FAbs, src.id);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, id_rsq, spv_getext(ctx), GLSLstd450InverseSqrt, id_abs);
+    spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, id_mask, id_rsq, id_flt_max);
+
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_RSQ
+
+void emit_SPIRV_EXP(Context *ctx)
+{
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450Exp2, src.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_EXP
+
+void emit_SPIRV_SGN(Context *ctx)
+{
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+
+    // SGN also takes a src1 and src2 to use for intermediate results, they are
+    // left undefined after the instruction executes, and as such it is
+    // perfectly valid for us to not touch those registers in our implementation
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450FSign, src.id);
+
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_SGN
+
+void emit_SPIRV_ABS(Context *ctx)
+{
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450FAbs, src.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_ABS
+
+void emit_SPIRV_NRM(Context *ctx)
+{
+    /*
+        float dot = dot(src, src);
+
+        float f;
+        if (dot != 0)
+            f = (float)(1/sqrt(dot));
+        else
+            f = FLT_MAX;
+
+        dst = src0*f;
+    */
+
+    SpirvResult src = spv_load_srcarg_full(ctx, 0);
+    uint32 tid_vec3 = spv_get_type(ctx, STI_VEC3);
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+    uint32 id_zero = spv_getscalarf(ctx, 0.0f);
+    uint32 id_flt_max = spv_getscalarf(ctx, FLT_MAX);
+    uint32 id_src_xyz = spv_bumpid(ctx);
+    uint32 id_dot = spv_bumpid(ctx);
+    uint32 id_dot_valid = spv_bumpid(ctx);
+    uint32 id_f = spv_bumpid(ctx);
+    uint32 id_f_sane = spv_bumpid(ctx);
+    uint32 id_f_vec = spv_bumpid(ctx);
+
+    SpirvResult dst;
+    dst.tid = src.tid;
+    dst.id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_src_xyz, src.id, src.id, 0, 1, 2);
+    spv_emit(ctx, 5, SpvOpDot, tid_float, id_dot, id_src_xyz, id_src_xyz);
+    spv_emit(ctx, 5, SpvOpFOrdNotEqual, tid_bool, id_dot_valid, id_dot, id_zero);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, tid_float, id_f, spv_getext(ctx), GLSLstd450InverseSqrt, id_dot);
+    spv_emit(ctx, 6, SpvOpSelect, tid_float, id_f_sane, id_dot_valid, id_f, id_flt_max);
+    spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, dst.tid, id_f_vec, id_f_sane, id_f_sane, id_f_sane, id_f_sane);
+    spv_emit(ctx, 5, SpvOpFMul, dst.tid, dst.id, src.id, id_f_vec);
+    pop_output(ctx);
+    spv_assign_destarg(ctx, dst);
+} // emit_SPIRV_NRM
+
+void emit_SPIRV_FRC(Context *ctx)
+{
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, dst.id, spv_getext(ctx), GLSLstd450Fract, src.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_FRC
+
+void emit_SPIRV_LOG(Context *ctx)
+{
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+
+    // LOG(x) := (x == vec4(0.0)) ? vec4(-FLT_MAX) : log2(abs(x))
+
+    // abs(x)
+    uint32 abs_src0 = spv_bumpid(ctx);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, abs_src0, spv_getext(ctx), GLSLstd450FAbs, src.id);
+
+    // vec4(0.0)
+    uint32 vec4_zero = spv_vectorbroadcast(ctx, dst.tid, spv_getscalarf(ctx, 0.0f));
+
+    // x == vec4(0.0)
+    uint32 is_zero = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpFOrdEqual, spv_get_type(ctx, STI_BVEC4), is_zero, abs_src0, vec4_zero);
+
+    // log2(abs(x))
+    uint32 log2_of_nonzero = spv_bumpid(ctx);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, dst.tid, log2_of_nonzero, spv_getext(ctx), GLSLstd450Log2, abs_src0);
+
+    // vec4(-FLT_MAX)
+    uint32 vec4_neg_flt_max = spv_vectorbroadcast(ctx, dst.tid, spv_getscalarf(ctx, -FLT_MAX));
+
+    // (x == vec4(0.0)) ? vec4(-FLT_MAX) : log2(abs(x))
+    spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, is_zero, vec4_neg_flt_max, log2_of_nonzero);
+
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_LOG
+
+void emit_SPIRV_SINCOS(Context *ctx)
+{
+    SpirvResult src = spv_load_srcarg(ctx, 0, 0x1);
+
+    // For vs_2_0 and vs_2_x this instruction also has a src1 and src2 which provide a couple of constants
+    // We just ignore these in any case
+
+    // float V = src0.x;
+
+    int writemask = ctx->dest_arg.writemask;
+    uint32 id_zero = spv_get_zero(ctx, src.tid);
+
+    uint32 id_cos;
+    if (writemask & 1) // .x = cos(V)
+    {
+        id_cos = spv_bumpid(ctx);
+        spv_emit(ctx, 5 + 1, SpvOpExtInst, src.tid, id_cos, spv_getext(ctx), GLSLstd450Cos, src.id);
+    } // if
+    else
+        id_cos = id_zero;
+
+    uint32 id_sin;
+    if (writemask & 2) // .y = sin(V)
+    {
+        id_sin = spv_bumpid(ctx);
+        spv_emit(ctx, 5 + 1, SpvOpExtInst, src.tid, id_sin, spv_getext(ctx), GLSLstd450Sin, src.id);
+    } // if
+    else
+        id_sin = id_zero;
+
+    SpirvResult dst;
+    dst.tid = spv_get_type(ctx, STI_VEC4);
+    dst.id = spv_bumpid(ctx);
+    spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, dst.tid, dst.id, id_cos, id_sin, id_zero, id_zero);
+
+    spv_assign_destarg(ctx, dst);
+} // emit_SPIRV_SINCOS
+
+void emit_SPIRV_MOVA(Context *ctx)
+{
+    SpirvResult src = spv_load_srcarg_full(ctx, 0);
+    assert(src.tid == spv_get_type(ctx, STI_VEC4));
+
+    uint32 id_rounded = spv_bumpid(ctx);
+
+    SpirvResult dst;
+    dst.tid = spv_get_type(ctx, STI_IVEC4);
+    dst.id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5 + 1, SpvOpExtInst, spv_get_type(ctx, STI_VEC4), id_rounded,
+             spv_getext(ctx), GLSLstd450Round, src.id);
+    spv_emit(ctx, 4, SpvOpConvertFToS, dst.tid, dst.id, id_rounded);
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, dst);
+} // emit_SPIRV_MOVA
+
+void emit_SPIRV_CMP(Context *ctx)
+{
+    SpirvResult dst, src0, src1, src2;
+    spv_emit_begin_dsss(ctx, &dst, &src0, &src1, &src2);
+    uint32 id_0_0 = spv_get_zero(ctx, src0.tid);
+
+    uint32 id_cmp = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpFUnordGreaterThanEqual, spv_get_type(ctx, STI_BVEC4), id_cmp, src0.id, id_0_0);
+    spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, id_cmp, src1.id, src2.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_CMP
+
+void emit_SPIRV_CND(Context *ctx)
+{
+    SpirvResult dst, src0, src1, src2;
+    spv_emit_begin_dsss(ctx, &dst, &src0, &src1, &src2);
+    uint32 id_0_5 = spv_get_constant_composite(ctx, src0.tid, ctx->spirv.id_0_5, 0.5f);
+
+    uint32 id_cmp = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpFUnordGreaterThan, spv_get_type(ctx, STI_BVEC4), id_cmp, src0.id, id_0_5);
+    spv_emit(ctx, 6, SpvOpSelect, dst.tid, dst.id, id_cmp, src1.id, src2.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_CND
+
+void emit_SPIRV_LIT(Context *ctx)
+{
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+
+    if (!ctx->spirv.id_func_lit)
+        ctx->spirv.id_func_lit = spv_bumpid(ctx);
+
+    spv_emit(ctx, 5, SpvOpFunctionCall, dst.tid, dst.id, ctx->spirv.id_func_lit, src.id);
+
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_LIT
+
+void emit_SPIRV_DST(Context *ctx)
+{
+    SpirvResult dst, src0, src1;
+    spv_emit_begin_dss(ctx, &dst, &src0, &src1);
+
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+    dst.tid = spv_get_type(ctx, STI_VEC4);
+    uint32 id_1_0 = spv_getscalarf(ctx, 1.0f);
+    uint32 id_src0_y = spv_bumpid(ctx);
+    uint32 id_src1_y = spv_bumpid(ctx);
+    uint32 id_src0_z = spv_bumpid(ctx);
+    uint32 id_src1_w = spv_bumpid(ctx);
+    uint32 id_dst_y = spv_bumpid(ctx);
+    dst.id = spv_bumpid(ctx);
+
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src0_y, src0.id, 1);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src1_y, src1.id, 1);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src0_z, src0.id, 2);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_src1_w, src1.id, 3);
+    spv_emit(ctx, 5, SpvOpFMul, tid_float, id_dst_y, id_src0_y, id_src1_y);
+    spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, dst.tid, dst.id, id_1_0, id_dst_y, id_src0_z, id_src1_w);
+
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_DST
+
+void emit_SPIRV_LRP(Context *ctx)
+{
+    // lerp(x, y, a) = x + a*(y - x)
+    //               = x*(1 - a) + y*a
+    SpirvResult a = spv_load_srcarg_full(ctx, 0); // 'scale'
+    SpirvResult y = spv_load_srcarg_full(ctx, 1); // 'end'
+    SpirvResult x = spv_load_srcarg_full(ctx, 2); // 'start'
+    assert(x.tid == y.tid);
+    SpirvResult result;
+    result.id = spv_bumpid(ctx);
+    result.tid = x.tid;
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5 + 3, SpvOpExtInst, result.tid, result.id, spv_getext(ctx), GLSLstd450FMix, x.id, y.id, a.id);
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_LRP
+
+static void spv_emit_vecXmatrix(Context *ctx, int rows, int writemask)
+{
+    int i;
+
+    assert(rows <= 4);
+    assert(writemask == 0x7 || writemask == 0xF);
+
+    uint32 src0 = spv_load_srcarg(ctx, 0, writemask).id;
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+
+    RegisterType src1type = ctx->source_args[1].regtype;
+    int src1num = ctx->source_args[1].regnum;
+
+    uint32 result_components[4];
+    for (i = 0; i < rows; i++)
+    {
+        SpirvResult row = spv_loadreg(ctx, spv_getreg(ctx, src1type, src1num + i));
+        row = spv_swizzle(ctx, row, SPV_NO_SWIZZLE, writemask);
+        uint32 dot_result = spv_bumpid(ctx);
+
+        push_output(ctx, &ctx->mainline);
+        spv_emit(ctx, 5, SpvOpDot, tid_float, dot_result, src0, row.id);
+        pop_output(ctx);
+
+        result_components[i] = dot_result;
+    } // for
+
+    SpirvResult r;
+    r.tid = spv_get_type(ctx, STI_VEC4);
+    r.id = spv_bumpid(ctx);
+
+    uint32 id_zero = 0;
+    if (rows < 4)
+        id_zero = spv_getscalarf(ctx, 0.0f);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit_part(ctx, 3 + 4, 3, SpvOpCompositeConstruct, r.tid, r.id);
+    for (i = 0; i < rows; i++) spv_emit_word(ctx, result_components[i]);
+    for (i = rows; i < 4; i++) spv_emit_word(ctx, id_zero);
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, r);
+} // spv_emit_vecXmatrix
+
+void emit_SPIRV_M4X4(Context *ctx)
+{
+    // float4 * (4 columns, 4 rows) -> float4
+    spv_emit_vecXmatrix(ctx, 4, 0xF);
+} // emit_SPIRV_M4X4
+
+void emit_SPIRV_M4X3(Context *ctx)
+{
+    // float4 * (4 columns, 3 rows) -> float3
+    spv_emit_vecXmatrix(ctx, 3, 0xF);
+} // emit_SPIRV_M4X3
+
+void emit_SPIRV_M3X4(Context *ctx)
+{
+    // float3 * (3 columns, 4 rows) -> float4
+    spv_emit_vecXmatrix(ctx, 4, 0x7);
+} // emit_SPIRV_M3X4
+
+void emit_SPIRV_M3X3(Context *ctx)
+{
+    // float3 * (3 columns, 3 rows) -> float3
+    spv_emit_vecXmatrix(ctx, 3, 0x7);
+} // emit_SPIRV_M3X3
+
+void emit_SPIRV_M3X2(Context *ctx)
+{
+    // float3 * (3 columns, 2 rows) -> float2
+    spv_emit_vecXmatrix(ctx, 2, 0x7);
+} // emit_SPIRV_M3X2
+
+void emit_SPIRV_TEXLD(Context *ctx)
+{
+    if (!shader_version_atleast(ctx, 1, 4))
+    {
+        DestArgInfo *dst_info = &ctx->dest_arg;
+
+        RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, dst_info->regnum);
+        RegisterList *treg = spv_getreg(ctx, dst_info->regtype, dst_info->regnum);
+
+        // Variables are not declared using dcl opcodes, so handle it in this instruction.
+        assert(sreg->spirv.iddecl == 0);
+        assert(treg->spirv.iddecl == 0);
+
+        // Prep the result
+        SpirvResult result;
+        result.tid = spv_get_type(ctx, STI_VEC4);
+        result.id = spv_bumpid(ctx);
+        SpirvResult sampler = spv_loadreg(ctx, sreg);
+        // OpImageSampleImplicitLod should ignore the components of this argument that
+        // it doesn't need, so we don't need to mask it
+        SpirvResult texcoord = spv_loadreg(ctx, treg);
+
+        // Generate the instruction.
+        // OpImageSampleImplicitLod should ignore the components of the
+        // texcoord that it doesn't need, so we don't need to mask it.
+        push_output(ctx, &ctx->mainline);
+        spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id,
+                 sampler.id, texcoord.id);
+        pop_output(ctx);
+
+        // Emit the result, finally.
+        assert(!isscalar(ctx, ctx->shader_type, sreg->regtype, sreg->regnum));
+        spv_assign_destarg(ctx, result);
+    } // if
+
+    else if (!shader_version_atleast(ctx, 2, 0))
+    {
+        // ps_1_4 is different, too!
+        fail(ctx, "TEXLD == Shader Model 1.4 unimplemented.");  // !!! FIXME
+        return;
+    } // else if
+
+    else
+    {
+        const SourceArgInfo *samp_arg = &ctx->source_args[1];
+        RegisterList *sampler_reg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, samp_arg->regnum);
+        const SourceArgInfo *texcoord_arg = &ctx->source_args[0];
+        RegisterList *texcoord_reg = spv_getreg(ctx, texcoord_arg->regtype, texcoord_arg->regnum);
+
+        if (sampler_reg == NULL)
+        {
+            fail(ctx, "TEXLD using undeclared sampler");
+            return;
+        } // if
+
+        // Special case for TEXLDB
+        // !!! FIXME: does the d3d bias value map directly to GLSL?
+        uint32 bias;
+        uint32 instruction_length;
+        if (ctx->instruction_controls == CONTROL_TEXLDB)
+        {
+            uint32 float_tid = spv_get_type(ctx, STI_FLOAT);
+            bias = spv_bumpid(ctx);
+            instruction_length = 7;
+
+            // The w component of texcoord_reg specifies the bias. Extract it from texcoord_reg
+            push_output(ctx, &ctx->mainline);
+            spv_emit(ctx, 4 + 1, SpvOpCompositeExtract, float_tid, bias, texcoord_reg->spirv.iddecl, 3);
+            pop_output(ctx);
+        } // if
+        else
+        {
+            bias = 0;
+            instruction_length = 5;
+        } // else
+
+        // Determine the opcode
+        SpvOp opcode;
+        if (ctx->instruction_controls == CONTROL_TEXLDP)
+        {
+            if ((TextureType) sampler_reg->index == TEXTURE_TYPE_CUBE)
+                fail(ctx, "TEXLDP on a cubemap");  // !!! FIXME: is this legal?
+            opcode = SpvOpImageSampleProjImplicitLod;
+        } // if
+        else
+            opcode = SpvOpImageSampleImplicitLod;
+
+        // Prep the result
+        uint32 vec4_tid = spv_get_type(ctx, STI_VEC4);
+        uint32 result = spv_bumpid(ctx);
+        uint32 sampler = spv_load_srcarg_full(ctx, 1).id;
+        // OpImageSampleImplicitLod should ignore the components of this argument that
+        // it doesn't need, so we don't need to mask it
+        uint32 texcoord = spv_load_srcarg_full(ctx, 0).id;
+
+        // Generate the instruction.
+        // OpImageSampleImplicitLod should ignore the components of the
+        // texcoord that it doesn't need, so we don't need to mask it.
+        push_output(ctx, &ctx->mainline);
+        spv_emit_part(ctx, instruction_length, 5, opcode, vec4_tid, result,
+                      sampler, texcoord);
+        if (ctx->instruction_controls == CONTROL_TEXLDB)
+        {
+            // ... include the bias operand, if applicable
+            spv_emit_word(ctx, SpvImageOperandsBiasMask);
+            spv_emit_word(ctx, bias);
+        } // if
+        pop_output(ctx);
+
+        // Emit the result, finally.
+        assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum));
+        SpirvResult r;
+        r.id = result;
+        r.tid = vec4_tid;
+        spv_assign_destarg(ctx, r);
+    } // else
+} // emit_SPIRV_TEXLD
+
+void emit_SPIRV_IF(Context *ctx)
+{
+    SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1);
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+    uint32 id_cond = src0.id;
+
+    // Predicate register is already boolean so no need to convert.
+    if (src0.tid != tid_bool)
+    {
+        uint32 id_zero = spv_getscalari(ctx, 0);
+        id_cond = spv_bumpid(ctx);
+        spv_emit(ctx, 5, SpvOpINotEqual, tid_bool, id_cond, src0.id, id_zero);
+    } // if
+
+    uint32 id_label_branch = spv_bumpid(ctx);
+    uint32 id_label_merge = spv_bumpid(ctx);
+    spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0);
+    spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, id_label_branch, id_label_merge);
+    spv_branch_push(ctx, id_label_merge, buffer_size(ctx->output) - 4);
+    spv_emit(ctx, 2, SpvOpLabel, id_label_branch);
+} // emit_SPIRV_IF
+
+void emit_SPIRV_IFC(Context *ctx)
+{
+    SpvOp cmp_op = spv_get_comparison(ctx);
+    SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1);
+    SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x1);
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+    uint32 id_cond = spv_bumpid(ctx);
+    uint32 id_label_branch = spv_bumpid(ctx);
+    uint32 id_label_merge = spv_bumpid(ctx);
+
+    spv_emit(ctx, 5, cmp_op, tid_bool, id_cond, src0.id, src1.id);
+    spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0);
+    spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, id_label_branch, id_label_merge);
+    spv_branch_push(ctx, id_label_merge, buffer_size(ctx->output) - 4);
+    spv_emit(ctx, 2, SpvOpLabel, id_label_branch);
+} // emit_SPIRV_IFC
+
+void emit_SPIRV_ELSE(Context *ctx)
+{
+    uint32 id_label_merge, patch_offset;
+    spv_branch_get(ctx, &id_label_merge, &patch_offset);
+    uint32 id_label_else = spv_bumpid(ctx);
+
+    buffer_patch(ctx->output, patch_offset, &id_label_else, sizeof(id_label_else));
+    spv_emit(ctx, 2, SpvOpBranch, id_label_merge);
+    spv_emit(ctx, 2, SpvOpLabel, id_label_else);
+} // emit_SPIRV_ELSE
+
+void emit_SPIRV_ENDIF(Context *ctx)
+{
+    uint32 id_label_merge, patch_offset;
+    spv_branch_pop(ctx, &id_label_merge, &patch_offset);
+
+    spv_emit(ctx, 2, SpvOpBranch, id_label_merge);
+    spv_emit(ctx, 2, SpvOpLabel, id_label_merge);
+} // emit_SPIRV_ENDIF
+
+void emit_SPIRV_REP(Context *ctx)
+{
+    SpirvLoopInfo loop = {0};
+    uint32 id_label_init = spv_bumpid(ctx);
+    loop.id_label_header = spv_bumpid(ctx);
+    uint32 id_label_cond = spv_bumpid(ctx);
+    uint32 id_label_body = spv_bumpid(ctx);
+    loop.id_label_continue = spv_bumpid(ctx);
+    loop.id_label_merge = spv_bumpid(ctx);
+
+    // emit end of previous block
+    spv_emit(ctx, 2, SpvOpBranch, id_label_init);
+
+    // emit loop init block
+    spv_emit(ctx, 2, SpvOpLabel, id_label_init);
+    // This block only exists to allow use of SpvOpPhi in loop header block.
+    // SpvOpPhi needs to refer to predecessor by it's label ID, so insert dummy
+    // block just so we know what the ID is.
+    SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1);
+
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+    loop.tid_counter = src0.tid;
+    loop.id_counter = spv_bumpid(ctx);
+    loop.id_counter_next = spv_bumpid(ctx);
+
+    uint32 id_cond = spv_bumpid(ctx);
+    uint32 id_zero = spv_getscalari(ctx, 0);
+    spv_emit(ctx, 2, SpvOpBranch, loop.id_label_header);
+
+    // emit loop header block
+    spv_emit(ctx, 2, SpvOpLabel, loop.id_label_header);
+    spv_emit(ctx, 7, SpvOpPhi, loop.tid_counter, loop.id_counter,
+        src0.id, id_label_init,
+        loop.id_counter_next, loop.id_label_continue
+    );
+    spv_emit(ctx, 4, SpvOpLoopMerge, loop.id_label_merge, loop.id_label_continue, 0);
+    spv_emit(ctx, 2, SpvOpBranch, id_label_cond);
+
+    // emit loop condition block
+    spv_emit(ctx, 2, SpvOpLabel, id_label_cond);
+    spv_emit(ctx, 5, SpvOpINotEqual, tid_bool, id_cond, loop.id_counter, id_zero);
+    spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, id_label_body, loop.id_label_merge);
+
+    // emit start of loop body block
+    spv_emit(ctx, 2, SpvOpLabel, id_label_body);
+
+    spv_loop_push(ctx, &loop);
+} // emit_SPIRV_REP
+
+void emit_SPIRV_ENDREP(Context *ctx)
+{
+    uint32 id_one = spv_getscalari(ctx, 1);
+    SpirvLoopInfo loop;
+    spv_loop_pop(ctx, &loop);
+
+    // emit end of loop body block
+    spv_emit(ctx, 2, SpvOpBranch, loop.id_label_continue);
+
+    // emit loop continue block
+    spv_emit(ctx, 2, SpvOpLabel, loop.id_label_continue);
+    spv_emit(ctx, 5, SpvOpISub, loop.tid_counter, loop.id_counter_next, loop.id_counter, id_one);
+    spv_emit(ctx, 2, SpvOpBranch, loop.id_label_header);
+
+    // emit start of next block
+    spv_emit(ctx, 2, SpvOpLabel, loop.id_label_merge);
+} // emit_SPIRV_ENDREP
+
+void emit_SPIRV_LOOP(Context *ctx)
+{
+    SpirvLoopInfo loop = {0};
+    uint32 id_label_init = spv_bumpid(ctx);
+    loop.id_label_header = spv_bumpid(ctx);
+    uint32 id_label_cond = spv_bumpid(ctx);
+    uint32 id_label_body = spv_bumpid(ctx);
+    loop.id_label_continue = spv_bumpid(ctx);
+    loop.id_label_merge = spv_bumpid(ctx);
+
+    /*
+        i#.x = iteration count; every round we decrement it and terminate on 0.
+        i#.y = aL initial value; every round we subtract aL step from it.
+        i#.z = aL step value;
+
+        We use copy of i# as iteration variable. Compared to rep loop, we only
+        need to add single instruction for extracting current aL value as single
+        int.
+
+        rep i0
+            for (int i = i0.x; i; i--)
+
+        loop aL, i0
+            for (int3 i = i0, int aL = i.y; i.x; i.x--, aL += i.z)
+    */
+
+    // emit end of previous block
+    spv_emit(ctx, 2, SpvOpBranch, id_label_init);
+
+    // emit loop init block
+    spv_emit(ctx, 2, SpvOpLabel, id_label_init);
+    // This block only exists to allow use of SpvOpPhi in loop header block.
+    // SpvOpPhi needs to refer to predecessor by it's label ID, so insert dummy block just so we
+    // know what the ID is.
+
+    // src0 has aL register. Does it hold any interesting information?
+    SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x7);
+    uint32 tid_int = spv_get_type(ctx, STI_INT);
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+
+    loop.tid_counter = src1.tid;
+    loop.id_counter = spv_bumpid(ctx);
+    loop.id_counter_next = spv_bumpid(ctx);
+    loop.id_aL = spv_bumpid(ctx);
+    uint32 id_counter_x = spv_bumpid(ctx);
+
+    uint32 id_cond = spv_bumpid(ctx);
+    uint32 id_zero = spv_getscalari(ctx, 0);
+    spv_emit(ctx, 2, SpvOpBranch, loop.id_label_header);
+
+    // emit loop header block
+    spv_emit(ctx, 2, SpvOpLabel, loop.id_label_header);
+    spv_emit(ctx, 7, SpvOpPhi, loop.tid_counter, loop.id_counter,
+        src1.id, id_label_init,
+        loop.id_counter_next, loop.id_label_continue
+    );
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_int, loop.id_aL, loop.id_counter, 1);
+    spv_emit(ctx, 4, SpvOpLoopMerge, loop.id_label_merge, loop.id_label_continue, 0);
+    spv_emit(ctx, 2, SpvOpBranch, id_label_cond);
+
+    // emit loop condition block
+    spv_emit(ctx, 2, SpvOpLabel, id_label_cond);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_int, id_counter_x, loop.id_counter, 0);
+    spv_emit(ctx, 5, SpvOpINotEqual, tid_bool, id_cond, id_counter_x, id_zero);
+    spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, id_label_body, loop.id_label_merge);
+
+    // emit start of loop body block
+    spv_emit(ctx, 2, SpvOpLabel, id_label_body);
+
+    spv_loop_push(ctx, &loop);
+} // emit_SPIRV_LOOP
+
+void emit_SPIRV_ENDLOOP(Context *ctx)
+{
+    uint32 tid_int = spv_get_type(ctx, STI_INT);
+    uint32 tid_ivec2 = spv_get_type(ctx, STI_IVEC2);
+
+    uint32 id_minus_one = spv_getscalari(ctx, -1);
+    uint32 id_counter_z = spv_bumpid(ctx);
+    uint32 id_inc = spv_bumpid(ctx);
+    uint32 id_counter_xy = spv_bumpid(ctx);
+    uint32 id_counter_next_xy = spv_bumpid(ctx);
+
+    SpirvLoopInfo loop;
+    spv_loop_pop(ctx, &loop);
+
+    // emit end of loop body block
+    spv_emit(ctx, 2, SpvOpBranch, loop.id_label_continue);
+
+    // emit loop continue block
+    spv_emit(ctx, 2, SpvOpLabel, loop.id_label_continue);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_int, id_counter_z, loop.id_counter, 2);
+    spv_emit(ctx, 5, SpvOpCompositeConstruct, tid_ivec2, id_inc, id_minus_one, id_counter_z);
+    spv_emit(ctx, 7, SpvOpVectorShuffle, tid_ivec2, id_counter_xy, loop.id_counter, loop.id_counter, 0, 1);
+    spv_emit(ctx, 5, SpvOpIAdd, tid_ivec2, id_counter_next_xy, id_counter_xy, id_inc);
+    spv_emit(ctx, 5, SpvOpCompositeConstruct, loop.tid_counter, loop.id_counter_next, id_counter_next_xy, id_counter_z);
+    spv_emit(ctx, 2, SpvOpBranch, loop.id_label_header);
+
+    // emit start of next block
+    spv_emit(ctx, 2, SpvOpLabel, loop.id_label_merge);
+} // emit_SPIRV_ENDLOOP
+
+void emit_SPIRV_BREAKC(Context *ctx)
+{
+    SpirvLoopInfo loop;
+    spv_loop_get(ctx, &loop);
+
+    SpvOp cmp_op = spv_get_comparison(ctx);
+    SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1);
+    SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x1);
+    uint32 tid_bool = spv_get_type(ctx, STI_BOOL);
+    uint32 id_cond = spv_bumpid(ctx);
+    uint32 id_label_merge = spv_bumpid(ctx);
+
+    // emit branch to merge target
+    spv_emit(ctx, 5, cmp_op, tid_bool, id_cond, src0.id, src1.id);
+    spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0);
+    spv_emit(ctx, 4, SpvOpBranchConditional, id_cond, loop.id_label_merge, id_label_merge);
+    spv_emit(ctx, 2, SpvOpLabel, id_label_merge);
+} // emit_SPIRV_BREAKC
+
+void emit_SPIRV_BREAKP(Context *ctx)
+{
+    SpirvLoopInfo loop;
+    spv_loop_get(ctx, &loop);
+
+    SpirvResult src0 = spv_load_srcarg(ctx, 0, 0x1);
+
+    uint32 id_label_merge = spv_bumpid(ctx);
+
+    spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0);
+    spv_emit(ctx, 4, SpvOpBranchConditional, src0.id, loop.id_label_merge, id_label_merge);
+    spv_emit(ctx, 2, SpvOpLabel, id_label_merge);
+} // emit_SPIRV_BREAKP
+
+void emit_SPIRV_LABEL(Context *ctx)
+{
+    const SourceArgInfo* arg = &ctx->source_args[0];
+    RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum);
+    spv_check_read_reg_id(ctx, reg);
+
+    uint32 tid_void = spv_get_type(ctx, STI_VOID);
+    uint32 tid_func = spv_get_type(ctx, STI_FUNC_VOID);
+    uint32 id_func = reg->spirv.iddecl;
+    uint32 id_label = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, SpvOpFunction, tid_void, id_func, 0, tid_func);
+    spv_emit(ctx, 2, SpvOpLabel, id_label);
+    pop_output(ctx);
+} // emit_SPIRV_LABEL
+
+void emit_SPIRV_RET(Context *ctx)
+{
+    spv_emit_func_end(ctx);
+} // emit_SPIRV_RET
+
+void emit_SPIRV_CALL(Context *ctx)
+{
+    const SourceArgInfo* arg = &ctx->source_args[0];
+    RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum);
+    spv_check_read_reg_id(ctx, reg);
+
+    uint32 tid_void = spv_get_type(ctx, STI_VOID);
+    uint32 id_res = spv_bumpid(ctx);
+    uint32 id_func = reg->spirv.iddecl;
+
+    push_output(ctx, &ctx->mainline);
+    if (ctx->loops > 0)
+        failf(ctx, "Function calls referencing aL not implemented.");
+    else
+        spv_emit(ctx, 4, SpvOpFunctionCall, tid_void, id_res, id_func);
+
+    pop_output(ctx);
+} // emit_SPIRV_CALL
+
+void emit_SPIRV_CALLNZ(Context *ctx)
+{
+    const SourceArgInfo* arg = &ctx->source_args[0];
+    RegisterList *reg = spv_getreg(ctx, arg->regtype, arg->regnum);
+    spv_check_read_reg_id(ctx, reg);
+
+    SpirvResult src1 = spv_load_srcarg(ctx, 1, 0x1);
+
+    uint32 tid_void = spv_get_type(ctx, STI_VOID);
+    uint32 id_label_then = spv_bumpid(ctx);
+    uint32 id_func = reg->spirv.iddecl;
+    uint32 id_call_res = spv_bumpid(ctx);
+    uint32 id_label_merge = spv_bumpid(ctx);
+
+    spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0);
+    spv_emit(ctx, 4, SpvOpBranchConditional, src1.id, id_label_then, id_label_merge);
+
+    spv_emit(ctx, 2, SpvOpLabel, id_label_then);
+    if (ctx->loops > 0)
+        failf(ctx, "Function calls referencing aL not implemented.");
+    else
+        spv_emit(ctx, 4, SpvOpFunctionCall, tid_void, id_call_res, id_func);
+    spv_emit(ctx, 2, SpvOpBranch, id_label_merge);
+
+    spv_emit(ctx, 2, SpvOpLabel, id_label_merge);
+} // emit_SPIRV_CALLNZ
+
+void emit_SPIRV_TEXLDD(Context *ctx)
+{
+    const SourceArgInfo *samp_arg = &ctx->source_args[1];
+    if (!reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, samp_arg->regnum))
+    {
+        fail(ctx, "TEXLDD using undeclared sampler");
+        return;
+    } // if
+
+    // Prep the result
+    SpirvResult result;
+    result.tid = spv_get_type(ctx, STI_VEC4);
+    result.id = spv_bumpid(ctx);
+
+    SpirvResult texcoord = spv_load_srcarg_full(ctx, 0);
+    SpirvResult sampler = spv_load_srcarg_full(ctx, 1);
+    SpirvResult grad_x = spv_load_srcarg_full(ctx, 2);
+    SpirvResult grad_y = spv_load_srcarg_full(ctx, 3);
+
+    // Generate the instruction.
+    // SpvOpImageSampleExplicitLod should ignore the components of the
+    // texcoord that it doesn't need, so we don't need to mask it.
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 8, SpvOpImageSampleExplicitLod, result.tid, result.id, sampler.id,
+             texcoord.id, SpvImageOperandsGradMask, grad_x.id, grad_y.id);
+    pop_output(ctx);
+
+    // Emit the result, finally.
+    assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum));
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_TEXLDD
+
+void emit_SPIRV_SETP(Context *ctx)
+{
+    SpirvResult src0 = spv_load_srcarg_full(ctx, 0);
+    SpirvResult src1 = spv_load_srcarg_full(ctx, 1);
+
+    SpirvResult dst;
+    dst.tid = spv_get_type(ctx, STI_BVEC4);
+    dst.id = spv_bumpid(ctx);
+
+    SpvOp cmp_op = spv_get_comparison(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, cmp_op, dst.tid, dst.id, src0.id, src1.id);
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, dst);
+} // emit_SPIRV_SETP
+
+void emit_SPIRV_TEXLDL(Context *ctx)
+{
+    const SourceArgInfo *samp_arg = &ctx->source_args[1];
+    RegisterList *sampler_reg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, samp_arg->regnum);
+    if (sampler_reg == NULL)
+    {
+        fail(ctx, "TEXLDL using undeclared sampler");
+        return;
+    } // if
+    assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum));
+
+    // Prep the result
+    SpirvResult result;
+    result.tid = spv_get_type(ctx, STI_VEC4);
+    result.id = spv_bumpid(ctx);
+
+    SpirvResult sampler = spv_load_srcarg_full(ctx, 1);
+    SpirvResult texcoord = spv_load_srcarg_full(ctx, 0);
+
+    // The w component of texcoord_reg specifies the LOD. Extract it from texcoord_reg
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+    uint32 id_lod = spv_bumpid(ctx);
+
+    // Generate the instruction.
+    // SpvOpImageSampleExplicitLod should ignore the components of the
+    // texcoord that it doesn't need, so we don't need to mask it.
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 4 + 1, SpvOpCompositeExtract, tid_float, id_lod, texcoord.id, 3);
+    spv_emit(ctx, 7, SpvOpImageSampleExplicitLod, result.tid, result.id, sampler.id,
+             texcoord.id, SpvImageOperandsLodMask, id_lod);
+    pop_output(ctx);
+
+    // Emit the result, finally.
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_TEXLDL
+
+void emit_SPIRV_BREAK(Context *ctx)
+{
+    uint32 id_label_merge = spv_bumpid(ctx);
+    spv_emit(ctx, 3, SpvOpSelectionMerge, id_label_merge, 0);
+    spv_emit(ctx, 2, SpvOpBranch, id_label_merge);
+    spv_emit(ctx, 2, SpvOpLabel, id_label_merge);
+} // emit_SPIRV_BREAK
+
+void emit_SPIRV_TEXM3X2PAD(Context *ctx)
+{
+    // no-op ... work happens in emit_SPIRV_TEXM3X2TEX().
+} // emit_SPIRV_TEXM3X2PAD
+
+void emit_SPIRV_TEXM3X2TEX(Context *ctx)
+{
+    if (ctx->texm3x2pad_src0 == -1)
+        return;
+
+    DestArgInfo *pDstInfo = &ctx->dest_arg;
+
+    RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, pDstInfo->regnum);
+    RegisterList *pSrc0 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0);
+    RegisterList *pSrc1 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0);
+    RegisterList *pSrc2 = spv_getreg(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum);
+    RegisterList *pDst  = spv_getreg(ctx, pDstInfo->regtype, pDstInfo->regnum);
+
+    SpirvResult sampler = spv_loadreg(ctx, pSReg);
+    SpirvResult src0 = spv_loadreg(ctx, pSrc0);
+    SpirvResult src1 = spv_loadreg(ctx, pSrc1);
+    SpirvResult src2 = spv_loadreg(ctx, pSrc2);
+    SpirvResult src3 = spv_loadreg(ctx, pDst);
+
+    src0 = spv_swizzle(ctx, src0, SPV_NO_SWIZZLE, 0x7);
+    src1 = spv_swizzle(ctx, src1, SPV_NO_SWIZZLE, 0x7);
+    src2 = spv_swizzle(ctx, src2, SPV_NO_SWIZZLE, 0x7);
+    src3 = spv_swizzle(ctx, src3, SPV_NO_SWIZZLE, 0x7);
+
+    SpirvResult result;
+    uint32 tid_float = spv_get_type(ctx, STI_FLOAT);
+    uint32 tid_vec2  = spv_get_type(ctx, STI_VEC2);
+    result.tid  = spv_get_type(ctx, STI_VEC4);
+    uint32 id_x = spv_bumpid(ctx);
+    uint32 id_y = spv_bumpid(ctx);
+    uint32 id_texcoord = spv_bumpid(ctx);
+    result.id = spv_bumpid(ctx);
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 5, SpvOpDot, tid_float, id_x, src0.id, src1.id);
+    spv_emit(ctx, 5, SpvOpDot, tid_float, id_y, src2.id, src3.id);
+    spv_emit(ctx, 3+2, SpvOpCompositeConstruct, tid_vec2, id_texcoord, id_x, id_y);
+    spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_texcoord);
+    pop_output(ctx);
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_TEXM3X2TEX
+
+void emit_SPIRV_TEXM3X3PAD(Context *ctx)
+{
+    // no-op ... work happens in emit_SPIRV_TEXM3X3*().
+} // emit_SPIRV_TEXM3X3PAD
+
+void emit_SPIRV_TEXM3X3(Context *ctx)
+{
+    if (ctx->texm3x3pad_src1 == -1)
+        return;
+
+    // vec4(
+    //      dot({src0}.xyz, {src1}.xyz),
+    //      dot({src2}.xyz, {src3}.xyz),
+    //      dot({dst}.xyz,  {src4}.xyz),
+    //      1
+    // )
+
+    uint32 id_1 = spv_getscalarf(ctx, 1.0f);
+
+    SpirvTexm3x3SetupResult setup = spv_texm3x3_setup(ctx);
+
+    SpirvResult result;
+    result.tid = spv_get_type(ctx, STI_VEC4);
+    result.id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 3 + 4, SpvOpCompositeConstruct, result.tid, result.id,
+        setup.id_res_x, setup.id_res_y, setup.id_res_z, id_1
+    );
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_TEXM3X3
+
+void emit_SPIRV_TEXM3X3TEX(Context *ctx)
+{
+    if (ctx->texm3x3pad_src1 == -1)
+        return;
+
+    RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, ctx->dest_arg.regnum);
+
+    // texture{ttypestr}({sampler},
+    //     vec3(
+    //         dot({src0}.xyz, {src1}.xyz),
+    //         dot({src2}.xyz, {src3}.xyz),
+    //         dot({dst}.xyz,  {src4}.xyz)
+    //     ),
+    // )
+
+    SpirvResult sampler = spv_loadreg(ctx, pSReg);
+
+    SpirvTexm3x3SetupResult setup = spv_texm3x3_setup(ctx);
+
+    uint32 tid_vec3    = spv_get_type(ctx, STI_VEC3);
+    uint32 tid_vec4    = spv_get_type(ctx, STI_VEC4);
+    uint32 id_tc       = spv_bumpid(ctx);
+
+    SpirvResult result;
+    result.tid = tid_vec4;
+    result.id = spv_bumpid(ctx);
+
+    push_output(ctx, &ctx->mainline);
+    spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_tc,
+        setup.id_res_x, setup.id_res_y, setup.id_res_z
+    );
+    spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_tc);
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_TEXM3X3TEX
+
+void emit_SPIRV_TEXM3X3SPEC(Context *ctx)
+{
+    if (ctx->texm3x3pad_src1 == -1)
+        return;
+
+    RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, ctx->dest_arg.regnum);
+    RegisterList *pSrc5 = spv_getreg(ctx, ctx->source_args[1].regtype, ctx->source_args[1].regnum);
+
+    SpirvTexm3x3SetupResult setup = spv_texm3x3_setup(ctx);
+
+    uint32 tid_vec3 = spv_get_type(ctx, STI_VEC3);
+
+    push_output(ctx, &ctx->mainline);
+
+    uint32 id_normal = spv_bumpid(ctx);
+    spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_normal,
+        setup.id_res_x, setup.id_res_y, setup.id_res_z
+    );
+
+    SpirvResult src5 = spv_loadreg(ctx, pSrc5);
+
+    uint32 id_eyeray  = spv_bumpid(ctx);
+    spv_emit(ctx, 5 + 3, SpvOpVectorShuffle, tid_vec3, id_eyeray, src5.id, src5.id, 0, 1, 2);
+
+    uint32 id_reflected = spv_reflect(ctx, id_normal, id_eyeray);
+
+    SpirvResult sampler = spv_loadreg(ctx, pSReg);
+
+    SpirvResult result;
+    result.tid = spv_get_type(ctx, STI_VEC4);
+    result.id = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_reflected);
+
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_TEXM3X3SPEC
+
+void emit_SPIRV_TEXM3X3VSPEC(Context *ctx)
+{
+    if (ctx->texm3x3pad_src1 == -1)
+        return;
+
+    RegisterList *pSReg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, ctx->dest_arg.regnum);
+
+    SpirvTexm3x3SetupResult setup = spv_texm3x3_setup(ctx);
+
+    uint32 tid_float    = spv_get_type(ctx, STI_FLOAT);
+    uint32 tid_vec3     = spv_get_type(ctx, STI_VEC3);
+
+    push_output(ctx, &ctx->mainline);
+
+    uint32 id_normal = spv_bumpid(ctx);
+    spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_normal,
+        setup.id_res_x, setup.id_res_y, setup.id_res_z
+    );
+
+    uint32 id_eyeray_x = spv_bumpid(ctx);
+    uint32 id_eyeray_y = spv_bumpid(ctx);
+    uint32 id_eyeray_z = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_eyeray_x, setup.id_dst_pad0, 3);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_eyeray_y, setup.id_dst_pad1, 3);
+    spv_emit(ctx, 5, SpvOpCompositeExtract, tid_float, id_eyeray_z, setup.id_dst,      3);
+
+    uint32 id_eyeray = spv_bumpid(ctx);
+    spv_emit(ctx, 3 + 3, SpvOpCompositeConstruct, tid_vec3, id_eyeray,
+        id_eyeray_x, id_eyeray_y, id_eyeray_z
+    );
+
+    uint32 id_reflected = spv_reflect(ctx, id_normal, id_eyeray);
+
+    SpirvResult sampler = spv_loadreg(ctx, pSReg);
+
+    SpirvResult result;
+    result.tid = spv_get_type(ctx, STI_VEC4);
+    result.id = spv_bumpid(ctx);
+    spv_emit(ctx, 5, SpvOpImageSampleImplicitLod, result.tid, result.id, sampler.id, id_reflected);
+
+    pop_output(ctx);
+
+    spv_assign_destarg(ctx, result);
+} // emit_SPIRV_TEXM3X3VSPEC
+
+void emit_SPIRV_TEXBEM(Context *ctx)
+{
+    spv_texbem(ctx, 0);
+} // emit_SPIRV_TEXBEM
+
+void emit_SPIRV_TEXBEML(Context *ctx)
+{
+    spv_texbem(ctx, 1);
+} // emit_SPIRV_TEXBEML
+
+void emit_SPIRV_EXPP(Context *ctx)
+{
+    // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation.
+    emit_SPIRV_EXP(ctx);  // I guess this is just partial precision EXP?
+} // emit_SPIRV_EXPP
+
+void emit_SPIRV_LOGP(Context *ctx)
+{
+    // LOGP is just low-precision LOG, but we'll take the higher precision.
+    emit_SPIRV_LOG(ctx);
+} // emit_SPIRV_LOGP
+
+void emit_SPIRV_DSX(Context *ctx)
+{
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+    spv_emit(ctx, 4, SpvOpDPdx, dst.tid, dst.id, src.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_DSX
+
+void emit_SPIRV_DSY(Context *ctx)
+{
+    SpirvResult dst, src;
+    spv_emit_begin_ds(ctx, &dst, &src);
+    spv_emit(ctx, 4, SpvOpDPdy, dst.tid, dst.id, src.id);
+    spv_emit_end(ctx, dst);
+} // emit_SPIRV_DSY
+
+void emit_SPIRV_RESERVED(Context *ctx)
+{
+    // do nothing; fails in the state machine.
+} // emit_SPIRV_RESERVED
+
+// !!! FIXME: The following are unimplemented even in the GLSL emitter.
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR)
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB)
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB)
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX)
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH)
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3)
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH)
+EMIT_SPIRV_OPCODE_UNIMPLEMENTED_FUNC(BEM)
+
+#endif  // SUPPORT_PROFILE_SPIRV
+
+#pragma GCC visibility pop
diff --git a/profiles/mojoshader_profile_spirv.h b/profiles/mojoshader_profile_spirv.h
new file mode 100644
index 00000000..ceea039f
--- /dev/null
+++ b/profiles/mojoshader_profile_spirv.h
@@ -0,0 +1,202 @@
+/**
+ * MojoShader; generate shader programs from bytecode of compiled
+ *  Direct3D shaders.
+ *
+ * Please see the file LICENSE.txt in the source's root directory.
+ *
+ *  This file written by Ryan C. Gordon.
+ */
+
+#ifndef MOJOSHADER_PROFILE_SPIRV_H
+#define MOJOSHADER_PROFILE_SPIRV_H
+
+#if SUPPORT_PROFILE_SPIRV
+
+// For baked-in constants in SPIR-V we want to store scalar values that we can
+// use in composites, since OpConstantComposite uses result ids constituates
+// rather than value literals.
+// We'll store these lists grouped by type and have the lists themselves
+// ordered by value in the ctx.spirv struct.
+typedef struct ComponentList
+{
+    // result id from OpConstant
+    uint32 id;
+    union {
+        float f;
+        int i;
+        uint32 u;
+    } v;
+    struct ComponentList *next;
+} ComponentList;
+
+typedef struct SpirvLoopInfo
+{
+    uint32 tid_counter;
+    uint32 id_counter;
+    uint32 id_counter_next;
+    uint32 id_aL;
+    uint32 id_label_header;
+    uint32 id_label_continue;
+    uint32 id_label_merge;
+} SpirvLoopInfo;
+
+typedef enum SpirvType
+{
+    ST_FLOAT = 0,
+    ST_SINT = 1,
+    ST_UINT = 2,
+    ST_BOOL = 3,
+} SpirvType;
+
+typedef enum SpirvStorageClass
+{
+    SC_INPUT = 0,
+    SC_OUTPUT = 1,
+    SC_PRIVATE = 2,
+    SC_UNIFORM_CONSTANT = 3,
+} SpirvStorageClass;
+
+/* Not all type parameter combinations are actually used, but it's all rounded up to 64 so
+ * it's easier to work with.
+ */
+typedef enum SpirvTypeIdx
+{
+    STI_VOID          = 0,
+    STI_FUNC_VOID     = 1,
+    STI_FUNC_LIT      = 2,
+    STI_IMAGE2D       = 3,
+    STI_IMAGE3D       = 4,
+    STI_IMAGECUBE     = 5,
+    STI_PTR_IMAGE2D   = 6,
+    STI_PTR_IMAGE3D   = 7,
+    STI_PTR_IMAGECUBE = 8,
+
+    // 7 unused entries
+
+    // 4 base types * 4 vector sizes = 16 entries
+    STI_FLOAT = (0 << 5) | (1 << 4) | (ST_FLOAT << 2) | 0,
+    STI_VEC2  = (0 << 5) | (1 << 4) | (ST_FLOAT << 2) | 1,
+    STI_VEC3  = (0 << 5) | (1 << 4) | (ST_FLOAT << 2) | 2,
+    STI_VEC4  = (0 << 5) | (1 << 4) | (ST_FLOAT << 2) | 3,
+    STI_INT   = (0 << 5) | (1 << 4) | (ST_SINT  << 2) | 0,
+    STI_IVEC2 = (0 << 5) | (1 << 4) | (ST_SINT  << 2) | 1,
+    STI_IVEC3 = (0 << 5) | (1 << 4) | (ST_SINT  << 2) | 2,
+    STI_IVEC4 = (0 << 5) | (1 << 4) | (ST_SINT  << 2) | 3,
+    STI_UINT  = (0 << 5) | (1 << 4) | (ST_UINT  << 2) | 0,
+    STI_UVEC2 = (0 << 5) | (1 << 4) | (ST_UINT  << 2) | 1,
+    STI_UVEC3 = (0 << 5) | (1 << 4) | (ST_UINT  << 2) | 2,
+    STI_UVEC4 = (0 << 5) | (1 << 4) | (ST_UINT  << 2) | 3,
+    STI_BOOL  = (0 << 5) | (1 << 4) | (ST_BOOL  << 2) | 0,
+    STI_BVEC2 = (0 << 5) | (1 << 4) | (ST_BOOL  << 2) | 1,
+    STI_BVEC3 = (0 << 5) | (1 << 4) | (ST_BOOL  << 2) | 2,
+    STI_BVEC4 = (0 << 5) | (1 << 4) | (ST_BOOL  << 2) | 3,
+
+    // 2 dims (vec4 + scalar) * 4 base types * 4 storage classes
+    STI_PTR_FLOAT_I = (1 << 5) | (0 << 4) | (ST_FLOAT << 2) | SC_INPUT,
+    STI_PTR_FLOAT_O = (1 << 5) | (0 << 4) | (ST_FLOAT << 2) | SC_OUTPUT,
+    STI_PTR_FLOAT_P = (1 << 5) | (0 << 4) | (ST_FLOAT << 2) | SC_PRIVATE,
+    STI_PTR_FLOAT_U = (1 << 5) | (0 << 4) | (ST_FLOAT << 2) | SC_UNIFORM_CONSTANT,
+    STI_PTR_INT_I   = (1 << 5) | (0 << 4) | (ST_SINT  << 2) | SC_INPUT,
+    STI_PTR_INT_O   = (1 << 5) | (0 << 4) | (ST_SINT  << 2) | SC_OUTPUT,
+    STI_PTR_INT_P   = (1 << 5) | (0 << 4) | (ST_SINT  << 2) | SC_PRIVATE,
+    STI_PTR_INT_U   = (1 << 5) | (0 << 4) | (ST_SINT  << 2) | SC_UNIFORM_CONSTANT,
+    STI_PTR_UINT_I  = (1 << 5) | (0 << 4) | (ST_UINT  << 2) | SC_INPUT,
+    STI_PTR_UINT_O  = (1 << 5) | (0 << 4) | (ST_UINT  << 2) | SC_OUTPUT,
+    STI_PTR_UINT_P  = (1 << 5) | (0 << 4) | (ST_UINT  << 2) | SC_PRIVATE,
+    STI_PTR_UINT_U  = (1 << 5) | (0 << 4) | (ST_UINT  << 2) | SC_UNIFORM_CONSTANT,
+    STI_PTR_BOOL_I  = (1 << 5) | (0 << 4) | (ST_BOOL  << 2) | SC_INPUT,
+    STI_PTR_BOOL_O  = (1 << 5) | (0 << 4) | (ST_BOOL  << 2) | SC_OUTPUT,
+    STI_PTR_BOOL_P  = (1 << 5) | (0 << 4) | (ST_BOOL  << 2) | SC_PRIVATE,
+    STI_PTR_BOOL_U  = (1 << 5) | (0 << 4) | (ST_BOOL  << 2) | SC_UNIFORM_CONSTANT,
+    STI_PTR_VEC4_I  = (1 << 5) | (1 << 4) | (ST_FLOAT << 2) | SC_INPUT,
+    STI_PTR_VEC4_O  = (1 << 5) | (1 << 4) | (ST_FLOAT << 2) | SC_OUTPUT,
+    STI_PTR_VEC4_P  = (1 << 5) | (1 << 4) | (ST_FLOAT << 2) | SC_PRIVATE,
+    STI_PTR_VEC4_U  = (1 << 5) | (1 << 4) | (ST_FLOAT << 2) | SC_UNIFORM_CONSTANT,
+    STI_PTR_IVEC4_I = (1 << 5) | (1 << 4) | (ST_SINT  << 2) | SC_INPUT,
+    STI_PTR_IVEC4_O = (1 << 5) | (1 << 4) | (ST_SINT  << 2) | SC_OUTPUT,
+    STI_PTR_IVEC4_P = (1 << 5) | (1 << 4) | (ST_SINT  << 2) | SC_PRIVATE,
+    STI_PTR_IVEC4_U = (1 << 5) | (1 << 4) | (ST_SINT  << 2) | SC_UNIFORM_CONSTANT,
+    STI_PTR_UVEC4_I = (1 << 5) | (1 << 4) | (ST_UINT  << 2) | SC_INPUT,
+    STI_PTR_UVEC4_O = (1 << 5) | (1 << 4) | (ST_UINT  << 2) | SC_OUTPUT,
+    STI_PTR_UVEC4_P = (1 << 5) | (1 << 4) | (ST_UINT  << 2) | SC_PRIVATE,
+    STI_PTR_UVEC4_U = (1 << 5) | (1 << 4) | (ST_UINT  << 2) | SC_UNIFORM_CONSTANT,
+    STI_PTR_BVEC4_I = (1 << 5) | (1 << 4) | (ST_BOOL  << 2) | SC_INPUT,
+    STI_PTR_BVEC4_O = (1 << 5) | (1 << 4) | (ST_BOOL  << 2) | SC_OUTPUT,
+    STI_PTR_BVEC4_P = (1 << 5) | (1 << 4) | (ST_BOOL  << 2) | SC_PRIVATE,
+    STI_PTR_BVEC4_U = (1 << 5) | (1 << 4) | (ST_BOOL  << 2) | SC_UNIFORM_CONSTANT,
+
+    // 2 + 6 + 16 + 32 = 56 entries (+ 8 unused)
+
+    // Helpers
+    STI_LENGTH_,
+
+    STI_MISC_START_ = 0,
+    STI_MISC_END_   = 8,
+    STI_CORE_START_ = (0 << 5) | (1 << 4),
+    STI_PTR_START_  = (1 << 5) | (0 << 4),
+    STI_CORE_END_   = STI_PTR_START_,
+    STI_PTR_END_    = STI_LENGTH_,
+} SpirvTypeIdx;
+
+// In addition to result ID we also need type ID (can't assume everything is vec4).
+typedef struct SpirvResult
+{
+    uint32 tid;
+    uint32 id;
+} SpirvResult;
+
+typedef struct SpirvContext
+{
+    uint32 id_vs_main_end;
+    // ext. glsl instructions have been imported
+    uint32 idext;
+    uint32 idmax;
+    uint32 idmain;
+    uint32 id_func_lit;
+    uint32 inoutcount;
+    uint32 id_var_fragcoord;
+    uint32 id_var_vpos;
+    uint32 id_var_frontfacing;
+    uint32 id_var_vface;
+    // ids for types so we can reuse them after they're declared
+    uint32 tid[STI_LENGTH_];
+    uint32 idtrue;
+    uint32 idfalse;
+    uint32 id_0_0[4];
+    uint32 id_0_125[4];
+    uint32 id_0_25[4];
+    uint32 id_0_5[4];
+    uint32 id_1_0[4];
+    uint32 id_2_0[4];
+    uint32 id_4_0[4];
+    uint32 id_8_0[4];
+    uint32 id_flt_max[4];
+    struct {
+        uint32 idvec4;
+        uint32 idivec4;
+        uint32 idbool;
+    } uniform_arrays;
+    struct {
+        uint32 idvec4;
+    } constant_arrays;
+    struct {
+        ComponentList f;
+        ComponentList i;
+        ComponentList u;
+    } cl;
+
+    SpirvPatchTable patch_table;
+
+    // Required only on ps_1_3 and below, which only has 4 registers for this purpose.
+    struct {
+        uint32 idtexbem;
+        uint32 idtexbeml;
+    } sampler_extras[4];
+
+    int loop_stack_idx;
+    SpirvLoopInfo loop_stack[32];
+} SpirvContext;
+
+#endif // if SUPPORT_PROFILE_SPIRV
+
+#endif
diff --git a/spirv/GLSL.std.450.h b/spirv/GLSL.std.450.h
new file mode 100644
index 00000000..df31092b
--- /dev/null
+++ b/spirv/GLSL.std.450.h
@@ -0,0 +1,131 @@
+/*
+** Copyright (c) 2014-2016 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and/or associated documentation files (the "Materials"),
+** to deal in the Materials without restriction, including without limitation
+** the rights to use, copy, modify, merge, publish, distribute, sublicense,
+** and/or sell copies of the Materials, and to permit persons to whom the
+** Materials are furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Materials.
+**
+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
+** IN THE MATERIALS.
+*/
+
+#ifndef GLSLstd450_H
+#define GLSLstd450_H
+
+static const int GLSLstd450Version = 100;
+static const int GLSLstd450Revision = 1;
+
+enum GLSLstd450 {
+    GLSLstd450Bad = 0,              // Don't use
+
+    GLSLstd450Round = 1,
+    GLSLstd450RoundEven = 2,
+    GLSLstd450Trunc = 3,
+    GLSLstd450FAbs = 4,
+    GLSLstd450SAbs = 5,
+    GLSLstd450FSign = 6,
+    GLSLstd450SSign = 7,
+    GLSLstd450Floor = 8,
+    GLSLstd450Ceil = 9,
+    GLSLstd450Fract = 10,
+
+    GLSLstd450Radians = 11,
+    GLSLstd450Degrees = 12,
+    GLSLstd450Sin = 13,
+    GLSLstd450Cos = 14,
+    GLSLstd450Tan = 15,
+    GLSLstd450Asin = 16,
+    GLSLstd450Acos = 17,
+    GLSLstd450Atan = 18,
+    GLSLstd450Sinh = 19,
+    GLSLstd450Cosh = 20,
+    GLSLstd450Tanh = 21,
+    GLSLstd450Asinh = 22,
+    GLSLstd450Acosh = 23,
+    GLSLstd450Atanh = 24,
+    GLSLstd450Atan2 = 25,
+
+    GLSLstd450Pow = 26,
+    GLSLstd450Exp = 27,
+    GLSLstd450Log = 28,
+    GLSLstd450Exp2 = 29,
+    GLSLstd450Log2 = 30,
+    GLSLstd450Sqrt = 31,
+    GLSLstd450InverseSqrt = 32,
+
+    GLSLstd450Determinant = 33,
+    GLSLstd450MatrixInverse = 34,
+
+    GLSLstd450Modf = 35,            // second operand needs an OpVariable to write to
+    GLSLstd450ModfStruct = 36,      // no OpVariable operand
+    GLSLstd450FMin = 37,
+    GLSLstd450UMin = 38,
+    GLSLstd450SMin = 39,
+    GLSLstd450FMax = 40,
+    GLSLstd450UMax = 41,
+    GLSLstd450SMax = 42,
+    GLSLstd450FClamp = 43,
+    GLSLstd450UClamp = 44,
+    GLSLstd450SClamp = 45,
+    GLSLstd450FMix = 46,
+    GLSLstd450IMix = 47,            // Reserved
+    GLSLstd450Step = 48,
+    GLSLstd450SmoothStep = 49,
+
+    GLSLstd450Fma = 50,
+    GLSLstd450Frexp = 51,            // second operand needs an OpVariable to write to
+    GLSLstd450FrexpStruct = 52,      // no OpVariable operand
+    GLSLstd450Ldexp = 53,
+
+    GLSLstd450PackSnorm4x8 = 54,
+    GLSLstd450PackUnorm4x8 = 55,
+    GLSLstd450PackSnorm2x16 = 56,
+    GLSLstd450PackUnorm2x16 = 57,
+    GLSLstd450PackHalf2x16 = 58,
+    GLSLstd450PackDouble2x32 = 59,
+    GLSLstd450UnpackSnorm2x16 = 60,
+    GLSLstd450UnpackUnorm2x16 = 61,
+    GLSLstd450UnpackHalf2x16 = 62,
+    GLSLstd450UnpackSnorm4x8 = 63,
+    GLSLstd450UnpackUnorm4x8 = 64,
+    GLSLstd450UnpackDouble2x32 = 65,
+
+    GLSLstd450Length = 66,
+    GLSLstd450Distance = 67,
+    GLSLstd450Cross = 68,
+    GLSLstd450Normalize = 69,
+    GLSLstd450FaceForward = 70,
+    GLSLstd450Reflect = 71,
+    GLSLstd450Refract = 72,
+
+    GLSLstd450FindILsb = 73,
+    GLSLstd450FindSMsb = 74,
+    GLSLstd450FindUMsb = 75,
+
+    GLSLstd450InterpolateAtCentroid = 76,
+    GLSLstd450InterpolateAtSample = 77,
+    GLSLstd450InterpolateAtOffset = 78,
+
+    GLSLstd450NMin = 79,
+    GLSLstd450NMax = 80,
+    GLSLstd450NClamp = 81,
+
+    GLSLstd450Count
+};
+
+#endif  // #ifndef GLSLstd450_H
diff --git a/spirv/spirv.h b/spirv/spirv.h
new file mode 100644
index 00000000..d48488e9
--- /dev/null
+++ b/spirv/spirv.h
@@ -0,0 +1,871 @@
+/*
+** Copyright (c) 2014-2016 The Khronos Group Inc.
+** 
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and/or associated documentation files (the "Materials"),
+** to deal in the Materials without restriction, including without limitation
+** the rights to use, copy, modify, merge, publish, distribute, sublicense,
+** and/or sell copies of the Materials, and to permit persons to whom the
+** Materials are furnished to do so, subject to the following conditions:
+** 
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Materials.
+** 
+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 
+** 
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
+** IN THE MATERIALS.
+*/
+
+/*
+** This header is automatically generated by the same tool that creates
+** the Binary Section of the SPIR-V specification.
+*/
+
+/*
+** Enumeration tokens for SPIR-V, in various styles:
+**   C, C++, C++11, JSON, Lua, Python
+** 
+** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
+** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL
+** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL
+** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL
+** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL']
+** 
+** Some tokens act like mask values, which can be OR'd together,
+** while others are mutually exclusive.  The mask-like ones have
+** "Mask" in their name, and a parallel enum that has the shift
+** amount (1 << x) for each corresponding enumerant.
+*/
+
+#ifndef spirv_H
+#define spirv_H
+
+typedef unsigned int SpvId;
+
+#define SPV_VERSION 0x10000
+#define SPV_REVISION 3
+
+static const unsigned int SpvMagicNumber = 0x07230203;
+static const unsigned int SpvVersion = 0x00010000;
+static const unsigned int SpvRevision = 3;
+static const unsigned int SpvOpCodeMask = 0xffff;
+static const unsigned int SpvWordCountShift = 16;
+
+typedef enum SpvSourceLanguage_ {
+    SpvSourceLanguageUnknown = 0,
+    SpvSourceLanguageESSL = 1,
+    SpvSourceLanguageGLSL = 2,
+    SpvSourceLanguageOpenCL_C = 3,
+    SpvSourceLanguageOpenCL_CPP = 4,
+} SpvSourceLanguage;
+
+typedef enum SpvExecutionModel_ {
+    SpvExecutionModelVertex = 0,
+    SpvExecutionModelTessellationControl = 1,
+    SpvExecutionModelTessellationEvaluation = 2,
+    SpvExecutionModelGeometry = 3,
+    SpvExecutionModelFragment = 4,
+    SpvExecutionModelGLCompute = 5,
+    SpvExecutionModelKernel = 6,
+} SpvExecutionModel;
+
+typedef enum SpvAddressingModel_ {
+    SpvAddressingModelLogical = 0,
+    SpvAddressingModelPhysical32 = 1,
+    SpvAddressingModelPhysical64 = 2,
+} SpvAddressingModel;
+
+typedef enum SpvMemoryModel_ {
+    SpvMemoryModelSimple = 0,
+    SpvMemoryModelGLSL450 = 1,
+    SpvMemoryModelOpenCL = 2,
+} SpvMemoryModel;
+
+typedef enum SpvExecutionMode_ {
+    SpvExecutionModeInvocations = 0,
+    SpvExecutionModeSpacingEqual = 1,
+    SpvExecutionModeSpacingFractionalEven = 2,
+    SpvExecutionModeSpacingFractionalOdd = 3,
+    SpvExecutionModeVertexOrderCw = 4,
+    SpvExecutionModeVertexOrderCcw = 5,
+    SpvExecutionModePixelCenterInteger = 6,
+    SpvExecutionModeOriginUpperLeft = 7,
+    SpvExecutionModeOriginLowerLeft = 8,
+    SpvExecutionModeEarlyFragmentTests = 9,
+    SpvExecutionModePointMode = 10,
+    SpvExecutionModeXfb = 11,
+    SpvExecutionModeDepthReplacing = 12,
+    SpvExecutionModeDepthGreater = 14,
+    SpvExecutionModeDepthLess = 15,
+    SpvExecutionModeDepthUnchanged = 16,
+    SpvExecutionModeLocalSize = 17,
+    SpvExecutionModeLocalSizeHint = 18,
+    SpvExecutionModeInputPoints = 19,
+    SpvExecutionModeInputLines = 20,
+    SpvExecutionModeInputLinesAdjacency = 21,
+    SpvExecutionModeTriangles = 22,
+    SpvExecutionModeInputTrianglesAdjacency = 23,
+    SpvExecutionModeQuads = 24,
+    SpvExecutionModeIsolines = 25,
+    SpvExecutionModeOutputVertices = 26,
+    SpvExecutionModeOutputPoints = 27,
+    SpvExecutionModeOutputLineStrip = 28,
+    SpvExecutionModeOutputTriangleStrip = 29,
+    SpvExecutionModeVecTypeHint = 30,
+    SpvExecutionModeContractionOff = 31,
+} SpvExecutionMode;
+
+typedef enum SpvStorageClass_ {
+    SpvStorageClassUniformConstant = 0,
+    SpvStorageClassInput = 1,
+    SpvStorageClassUniform = 2,
+    SpvStorageClassOutput = 3,
+    SpvStorageClassWorkgroup = 4,
+    SpvStorageClassCrossWorkgroup = 5,
+    SpvStorageClassPrivate = 6,
+    SpvStorageClassFunction = 7,
+    SpvStorageClassGeneric = 8,
+    SpvStorageClassPushConstant = 9,
+    SpvStorageClassAtomicCounter = 10,
+    SpvStorageClassImage = 11,
+} SpvStorageClass;
+
+typedef enum SpvDim_ {
+    SpvDim1D = 0,
+    SpvDim2D = 1,
+    SpvDim3D = 2,
+    SpvDimCube = 3,
+    SpvDimRect = 4,
+    SpvDimBuffer = 5,
+    SpvDimSubpassData = 6,
+} SpvDim;
+
+typedef enum SpvSamplerAddressingMode_ {
+    SpvSamplerAddressingModeNone = 0,
+    SpvSamplerAddressingModeClampToEdge = 1,
+    SpvSamplerAddressingModeClamp = 2,
+    SpvSamplerAddressingModeRepeat = 3,
+    SpvSamplerAddressingModeRepeatMirrored = 4,
+} SpvSamplerAddressingMode;
+
+typedef enum SpvSamplerFilterMode_ {
+    SpvSamplerFilterModeNearest = 0,
+    SpvSamplerFilterModeLinear = 1,
+} SpvSamplerFilterMode;
+
+typedef enum SpvImageFormat_ {
+    SpvImageFormatUnknown = 0,
+    SpvImageFormatRgba32f = 1,
+    SpvImageFormatRgba16f = 2,
+    SpvImageFormatR32f = 3,
+    SpvImageFormatRgba8 = 4,
+    SpvImageFormatRgba8Snorm = 5,
+    SpvImageFormatRg32f = 6,
+    SpvImageFormatRg16f = 7,
+    SpvImageFormatR11fG11fB10f = 8,
+    SpvImageFormatR16f = 9,
+    SpvImageFormatRgba16 = 10,
+    SpvImageFormatRgb10A2 = 11,
+    SpvImageFormatRg16 = 12,
+    SpvImageFormatRg8 = 13,
+    SpvImageFormatR16 = 14,
+    SpvImageFormatR8 = 15,
+    SpvImageFormatRgba16Snorm = 16,
+    SpvImageFormatRg16Snorm = 17,
+    SpvImageFormatRg8Snorm = 18,
+    SpvImageFormatR16Snorm = 19,
+    SpvImageFormatR8Snorm = 20,
+    SpvImageFormatRgba32i = 21,
+    SpvImageFormatRgba16i = 22,
+    SpvImageFormatRgba8i = 23,
+    SpvImageFormatR32i = 24,
+    SpvImageFormatRg32i = 25,
+    SpvImageFormatRg16i = 26,
+    SpvImageFormatRg8i = 27,
+    SpvImageFormatR16i = 28,
+    SpvImageFormatR8i = 29,
+    SpvImageFormatRgba32ui = 30,
+    SpvImageFormatRgba16ui = 31,
+    SpvImageFormatRgba8ui = 32,
+    SpvImageFormatR32ui = 33,
+    SpvImageFormatRgb10a2ui = 34,
+    SpvImageFormatRg32ui = 35,
+    SpvImageFormatRg16ui = 36,
+    SpvImageFormatRg8ui = 37,
+    SpvImageFormatR16ui = 38,
+    SpvImageFormatR8ui = 39,
+} SpvImageFormat;
+
+typedef enum SpvImageChannelOrder_ {
+    SpvImageChannelOrderR = 0,
+    SpvImageChannelOrderA = 1,
+    SpvImageChannelOrderRG = 2,
+    SpvImageChannelOrderRA = 3,
+    SpvImageChannelOrderRGB = 4,
+    SpvImageChannelOrderRGBA = 5,
+    SpvImageChannelOrderBGRA = 6,
+    SpvImageChannelOrderARGB = 7,
+    SpvImageChannelOrderIntensity = 8,
+    SpvImageChannelOrderLuminance = 9,
+    SpvImageChannelOrderRx = 10,
+    SpvImageChannelOrderRGx = 11,
+    SpvImageChannelOrderRGBx = 12,
+    SpvImageChannelOrderDepth = 13,
+    SpvImageChannelOrderDepthStencil = 14,
+    SpvImageChannelOrdersRGB = 15,
+    SpvImageChannelOrdersRGBx = 16,
+    SpvImageChannelOrdersRGBA = 17,
+    SpvImageChannelOrdersBGRA = 18,
+} SpvImageChannelOrder;
+
+typedef enum SpvImageChannelDataType_ {
+    SpvImageChannelDataTypeSnormInt8 = 0,
+    SpvImageChannelDataTypeSnormInt16 = 1,
+    SpvImageChannelDataTypeUnormInt8 = 2,
+    SpvImageChannelDataTypeUnormInt16 = 3,
+    SpvImageChannelDataTypeUnormShort565 = 4,
+    SpvImageChannelDataTypeUnormShort555 = 5,
+    SpvImageChannelDataTypeUnormInt101010 = 6,
+    SpvImageChannelDataTypeSignedInt8 = 7,
+    SpvImageChannelDataTypeSignedInt16 = 8,
+    SpvImageChannelDataTypeSignedInt32 = 9,
+    SpvImageChannelDataTypeUnsignedInt8 = 10,
+    SpvImageChannelDataTypeUnsignedInt16 = 11,
+    SpvImageChannelDataTypeUnsignedInt32 = 12,
+    SpvImageChannelDataTypeHalfFloat = 13,
+    SpvImageChannelDataTypeFloat = 14,
+    SpvImageChannelDataTypeUnormInt24 = 15,
+    SpvImageChannelDataTypeUnormInt101010_2 = 16,
+} SpvImageChannelDataType;
+
+typedef enum SpvImageOperandsShift_ {
+    SpvImageOperandsBiasShift = 0,
+    SpvImageOperandsLodShift = 1,
+    SpvImageOperandsGradShift = 2,
+    SpvImageOperandsConstOffsetShift = 3,
+    SpvImageOperandsOffsetShift = 4,
+    SpvImageOperandsConstOffsetsShift = 5,
+    SpvImageOperandsSampleShift = 6,
+    SpvImageOperandsMinLodShift = 7,
+} SpvImageOperandsShift;
+
+typedef enum SpvImageOperandsMask_ {
+    SpvImageOperandsMaskNone = 0,
+    SpvImageOperandsBiasMask = 0x00000001,
+    SpvImageOperandsLodMask = 0x00000002,
+    SpvImageOperandsGradMask = 0x00000004,
+    SpvImageOperandsConstOffsetMask = 0x00000008,
+    SpvImageOperandsOffsetMask = 0x00000010,
+    SpvImageOperandsConstOffsetsMask = 0x00000020,
+    SpvImageOperandsSampleMask = 0x00000040,
+    SpvImageOperandsMinLodMask = 0x00000080,
+} SpvImageOperandsMask;
+
+typedef enum SpvFPFastMathModeShift_ {
+    SpvFPFastMathModeNotNaNShift = 0,
+    SpvFPFastMathModeNotInfShift = 1,
+    SpvFPFastMathModeNSZShift = 2,
+    SpvFPFastMathModeAllowRecipShift = 3,
+    SpvFPFastMathModeFastShift = 4,
+} SpvFPFastMathModeShift;
+
+typedef enum SpvFPFastMathModeMask_ {
+    SpvFPFastMathModeMaskNone = 0,
+    SpvFPFastMathModeNotNaNMask = 0x00000001,
+    SpvFPFastMathModeNotInfMask = 0x00000002,
+    SpvFPFastMathModeNSZMask = 0x00000004,
+    SpvFPFastMathModeAllowRecipMask = 0x00000008,
+    SpvFPFastMathModeFastMask = 0x00000010,
+} SpvFPFastMathModeMask;
+
+typedef enum SpvFPRoundingMode_ {
+    SpvFPRoundingModeRTE = 0,
+    SpvFPRoundingModeRTZ = 1,
+    SpvFPRoundingModeRTP = 2,
+    SpvFPRoundingModeRTN = 3,
+} SpvFPRoundingMode;
+
+typedef enum SpvLinkageType_ {
+    SpvLinkageTypeExport = 0,
+    SpvLinkageTypeImport = 1,
+} SpvLinkageType;
+
+typedef enum SpvAccessQualifier_ {
+    SpvAccessQualifierReadOnly = 0,
+    SpvAccessQualifierWriteOnly = 1,
+    SpvAccessQualifierReadWrite = 2,
+} SpvAccessQualifier;
+
+typedef enum SpvFunctionParameterAttribute_ {
+    SpvFunctionParameterAttributeZext = 0,
+    SpvFunctionParameterAttributeSext = 1,
+    SpvFunctionParameterAttributeByVal = 2,
+    SpvFunctionParameterAttributeSret = 3,
+    SpvFunctionParameterAttributeNoAlias = 4,
+    SpvFunctionParameterAttributeNoCapture = 5,
+    SpvFunctionParameterAttributeNoWrite = 6,
+    SpvFunctionParameterAttributeNoReadWrite = 7,
+} SpvFunctionParameterAttribute;
+
+typedef enum SpvDecoration_ {
+    SpvDecorationRelaxedPrecision = 0,
+    SpvDecorationSpecId = 1,
+    SpvDecorationBlock = 2,
+    SpvDecorationBufferBlock = 3,
+    SpvDecorationRowMajor = 4,
+    SpvDecorationColMajor = 5,
+    SpvDecorationArrayStride = 6,
+    SpvDecorationMatrixStride = 7,
+    SpvDecorationGLSLShared = 8,
+    SpvDecorationGLSLPacked = 9,
+    SpvDecorationCPacked = 10,
+    SpvDecorationBuiltIn = 11,
+    SpvDecorationNoPerspective = 13,
+    SpvDecorationFlat = 14,
+    SpvDecorationPatch = 15,
+    SpvDecorationCentroid = 16,
+    SpvDecorationSample = 17,
+    SpvDecorationInvariant = 18,
+    SpvDecorationRestrict = 19,
+    SpvDecorationAliased = 20,
+    SpvDecorationVolatile = 21,
+    SpvDecorationConstant = 22,
+    SpvDecorationCoherent = 23,
+    SpvDecorationNonWritable = 24,
+    SpvDecorationNonReadable = 25,
+    SpvDecorationUniform = 26,
+    SpvDecorationSaturatedConversion = 28,
+    SpvDecorationStream = 29,
+    SpvDecorationLocation = 30,
+    SpvDecorationComponent = 31,
+    SpvDecorationIndex = 32,
+    SpvDecorationBinding = 33,
+    SpvDecorationDescriptorSet = 34,
+    SpvDecorationOffset = 35,
+    SpvDecorationXfbBuffer = 36,
+    SpvDecorationXfbStride = 37,
+    SpvDecorationFuncParamAttr = 38,
+    SpvDecorationFPRoundingMode = 39,
+    SpvDecorationFPFastMathMode = 40,
+    SpvDecorationLinkageAttributes = 41,
+    SpvDecorationNoContraction = 42,
+    SpvDecorationInputAttachmentIndex = 43,
+    SpvDecorationAlignment = 44,
+} SpvDecoration;
+
+typedef enum SpvBuiltIn_ {
+    SpvBuiltInPosition = 0,
+    SpvBuiltInPointSize = 1,
+    SpvBuiltInClipDistance = 3,
+    SpvBuiltInCullDistance = 4,
+    SpvBuiltInVertexId = 5,
+    SpvBuiltInInstanceId = 6,
+    SpvBuiltInPrimitiveId = 7,
+    SpvBuiltInInvocationId = 8,
+    SpvBuiltInLayer = 9,
+    SpvBuiltInViewportIndex = 10,
+    SpvBuiltInTessLevelOuter = 11,
+    SpvBuiltInTessLevelInner = 12,
+    SpvBuiltInTessCoord = 13,
+    SpvBuiltInPatchVertices = 14,
+    SpvBuiltInFragCoord = 15,
+    SpvBuiltInPointCoord = 16,
+    SpvBuiltInFrontFacing = 17,
+    SpvBuiltInSampleId = 18,
+    SpvBuiltInSamplePosition = 19,
+    SpvBuiltInSampleMask = 20,
+    SpvBuiltInFragDepth = 22,
+    SpvBuiltInHelperInvocation = 23,
+    SpvBuiltInNumWorkgroups = 24,
+    SpvBuiltInWorkgroupSize = 25,
+    SpvBuiltInWorkgroupId = 26,
+    SpvBuiltInLocalInvocationId = 27,
+    SpvBuiltInGlobalInvocationId = 28,
+    SpvBuiltInLocalInvocationIndex = 29,
+    SpvBuiltInWorkDim = 30,
+    SpvBuiltInGlobalSize = 31,
+    SpvBuiltInEnqueuedWorkgroupSize = 32,
+    SpvBuiltInGlobalOffset = 33,
+    SpvBuiltInGlobalLinearId = 34,
+    SpvBuiltInSubgroupSize = 36,
+    SpvBuiltInSubgroupMaxSize = 37,
+    SpvBuiltInNumSubgroups = 38,
+    SpvBuiltInNumEnqueuedSubgroups = 39,
+    SpvBuiltInSubgroupId = 40,
+    SpvBuiltInSubgroupLocalInvocationId = 41,
+    SpvBuiltInVertexIndex = 42,
+    SpvBuiltInInstanceIndex = 43,
+} SpvBuiltIn;
+
+typedef enum SpvSelectionControlShift_ {
+    SpvSelectionControlFlattenShift = 0,
+    SpvSelectionControlDontFlattenShift = 1,
+} SpvSelectionControlShift;
+
+typedef enum SpvSelectionControlMask_ {
+    SpvSelectionControlMaskNone = 0,
+    SpvSelectionControlFlattenMask = 0x00000001,
+    SpvSelectionControlDontFlattenMask = 0x00000002,
+} SpvSelectionControlMask;
+
+typedef enum SpvLoopControlShift_ {
+    SpvLoopControlUnrollShift = 0,
+    SpvLoopControlDontUnrollShift = 1,
+} SpvLoopControlShift;
+
+typedef enum SpvLoopControlMask_ {
+    SpvLoopControlMaskNone = 0,
+    SpvLoopControlUnrollMask = 0x00000001,
+    SpvLoopControlDontUnrollMask = 0x00000002,
+} SpvLoopControlMask;
+
+typedef enum SpvFunctionControlShift_ {
+    SpvFunctionControlInlineShift = 0,
+    SpvFunctionControlDontInlineShift = 1,
+    SpvFunctionControlPureShift = 2,
+    SpvFunctionControlConstShift = 3,
+} SpvFunctionControlShift;
+
+typedef enum SpvFunctionControlMask_ {
+    SpvFunctionControlMaskNone = 0,
+    SpvFunctionControlInlineMask = 0x00000001,
+    SpvFunctionControlDontInlineMask = 0x00000002,
+    SpvFunctionControlPureMask = 0x00000004,
+    SpvFunctionControlConstMask = 0x00000008,
+} SpvFunctionControlMask;
+
+typedef enum SpvMemorySemanticsShift_ {
+    SpvMemorySemanticsAcquireShift = 1,
+    SpvMemorySemanticsReleaseShift = 2,
+    SpvMemorySemanticsAcquireReleaseShift = 3,
+    SpvMemorySemanticsSequentiallyConsistentShift = 4,
+    SpvMemorySemanticsUniformMemoryShift = 6,
+    SpvMemorySemanticsSubgroupMemoryShift = 7,
+    SpvMemorySemanticsWorkgroupMemoryShift = 8,
+    SpvMemorySemanticsCrossWorkgroupMemoryShift = 9,
+    SpvMemorySemanticsAtomicCounterMemoryShift = 10,
+    SpvMemorySemanticsImageMemoryShift = 11,
+} SpvMemorySemanticsShift;
+
+typedef enum SpvMemorySemanticsMask_ {
+    SpvMemorySemanticsMaskNone = 0,
+    SpvMemorySemanticsAcquireMask = 0x00000002,
+    SpvMemorySemanticsReleaseMask = 0x00000004,
+    SpvMemorySemanticsAcquireReleaseMask = 0x00000008,
+    SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010,
+    SpvMemorySemanticsUniformMemoryMask = 0x00000040,
+    SpvMemorySemanticsSubgroupMemoryMask = 0x00000080,
+    SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100,
+    SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
+    SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400,
+    SpvMemorySemanticsImageMemoryMask = 0x00000800,
+} SpvMemorySemanticsMask;
+
+typedef enum SpvMemoryAccessShift_ {
+    SpvMemoryAccessVolatileShift = 0,
+    SpvMemoryAccessAlignedShift = 1,
+    SpvMemoryAccessNontemporalShift = 2,
+} SpvMemoryAccessShift;
+
+typedef enum SpvMemoryAccessMask_ {
+    SpvMemoryAccessMaskNone = 0,
+    SpvMemoryAccessVolatileMask = 0x00000001,
+    SpvMemoryAccessAlignedMask = 0x00000002,
+    SpvMemoryAccessNontemporalMask = 0x00000004,
+} SpvMemoryAccessMask;
+
+typedef enum SpvScope_ {
+    SpvScopeCrossDevice = 0,
+    SpvScopeDevice = 1,
+    SpvScopeWorkgroup = 2,
+    SpvScopeSubgroup = 3,
+    SpvScopeInvocation = 4,
+} SpvScope;
+
+typedef enum SpvGroupOperation_ {
+    SpvGroupOperationReduce = 0,
+    SpvGroupOperationInclusiveScan = 1,
+    SpvGroupOperationExclusiveScan = 2,
+} SpvGroupOperation;
+
+typedef enum SpvKernelEnqueueFlags_ {
+    SpvKernelEnqueueFlagsNoWait = 0,
+    SpvKernelEnqueueFlagsWaitKernel = 1,
+    SpvKernelEnqueueFlagsWaitWorkGroup = 2,
+} SpvKernelEnqueueFlags;
+
+typedef enum SpvKernelProfilingInfoShift_ {
+    SpvKernelProfilingInfoCmdExecTimeShift = 0,
+} SpvKernelProfilingInfoShift;
+
+typedef enum SpvKernelProfilingInfoMask_ {
+    SpvKernelProfilingInfoMaskNone = 0,
+    SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001,
+} SpvKernelProfilingInfoMask;
+
+typedef enum SpvCapability_ {
+    SpvCapabilityMatrix = 0,
+    SpvCapabilityShader = 1,
+    SpvCapabilityGeometry = 2,
+    SpvCapabilityTessellation = 3,
+    SpvCapabilityAddresses = 4,
+    SpvCapabilityLinkage = 5,
+    SpvCapabilityKernel = 6,
+    SpvCapabilityVector16 = 7,
+    SpvCapabilityFloat16Buffer = 8,
+    SpvCapabilityFloat16 = 9,
+    SpvCapabilityFloat64 = 10,
+    SpvCapabilityInt64 = 11,
+    SpvCapabilityInt64Atomics = 12,
+    SpvCapabilityImageBasic = 13,
+    SpvCapabilityImageReadWrite = 14,
+    SpvCapabilityImageMipmap = 15,
+    SpvCapabilityPipes = 17,
+    SpvCapabilityGroups = 18,
+    SpvCapabilityDeviceEnqueue = 19,
+    SpvCapabilityLiteralSampler = 20,
+    SpvCapabilityAtomicStorage = 21,
+    SpvCapabilityInt16 = 22,
+    SpvCapabilityTessellationPointSize = 23,
+    SpvCapabilityGeometryPointSize = 24,
+    SpvCapabilityImageGatherExtended = 25,
+    SpvCapabilityStorageImageMultisample = 27,
+    SpvCapabilityUniformBufferArrayDynamicIndexing = 28,
+    SpvCapabilitySampledImageArrayDynamicIndexing = 29,
+    SpvCapabilityStorageBufferArrayDynamicIndexing = 30,
+    SpvCapabilityStorageImageArrayDynamicIndexing = 31,
+    SpvCapabilityClipDistance = 32,
+    SpvCapabilityCullDistance = 33,
+    SpvCapabilityImageCubeArray = 34,
+    SpvCapabilitySampleRateShading = 35,
+    SpvCapabilityImageRect = 36,
+    SpvCapabilitySampledRect = 37,
+    SpvCapabilityGenericPointer = 38,
+    SpvCapabilityInt8 = 39,
+    SpvCapabilityInputAttachment = 40,
+    SpvCapabilitySparseResidency = 41,
+    SpvCapabilityMinLod = 42,
+    SpvCapabilitySampled1D = 43,
+    SpvCapabilityImage1D = 44,
+    SpvCapabilitySampledCubeArray = 45,
+    SpvCapabilitySampledBuffer = 46,
+    SpvCapabilityImageBuffer = 47,
+    SpvCapabilityImageMSArray = 48,
+    SpvCapabilityStorageImageExtendedFormats = 49,
+    SpvCapabilityImageQuery = 50,
+    SpvCapabilityDerivativeControl = 51,
+    SpvCapabilityInterpolationFunction = 52,
+    SpvCapabilityTransformFeedback = 53,
+    SpvCapabilityGeometryStreams = 54,
+    SpvCapabilityStorageImageReadWithoutFormat = 55,
+    SpvCapabilityStorageImageWriteWithoutFormat = 56,
+    SpvCapabilityMultiViewport = 57,
+} SpvCapability;
+
+typedef enum SpvOp_ {
+    SpvOpNop = 0,
+    SpvOpUndef = 1,
+    SpvOpSourceContinued = 2,
+    SpvOpSource = 3,
+    SpvOpSourceExtension = 4,
+    SpvOpName = 5,
+    SpvOpMemberName = 6,
+    SpvOpString = 7,
+    SpvOpLine = 8,
+    SpvOpExtension = 10,
+    SpvOpExtInstImport = 11,
+    SpvOpExtInst = 12,
+    SpvOpMemoryModel = 14,
+    SpvOpEntryPoint = 15,
+    SpvOpExecutionMode = 16,
+    SpvOpCapability = 17,
+    SpvOpTypeVoid = 19,
+    SpvOpTypeBool = 20,
+    SpvOpTypeInt = 21,
+    SpvOpTypeFloat = 22,
+    SpvOpTypeVector = 23,
+    SpvOpTypeMatrix = 24,
+    SpvOpTypeImage = 25,
+    SpvOpTypeSampler = 26,
+    SpvOpTypeSampledImage = 27,
+    SpvOpTypeArray = 28,
+    SpvOpTypeRuntimeArray = 29,
+    SpvOpTypeStruct = 30,
+    SpvOpTypeOpaque = 31,
+    SpvOpTypePointer = 32,
+    SpvOpTypeFunction = 33,
+    SpvOpTypeEvent = 34,
+    SpvOpTypeDeviceEvent = 35,
+    SpvOpTypeReserveId = 36,
+    SpvOpTypeQueue = 37,
+    SpvOpTypePipe = 38,
+    SpvOpTypeForwardPointer = 39,
+    SpvOpConstantTrue = 41,
+    SpvOpConstantFalse = 42,
+    SpvOpConstant = 43,
+    SpvOpConstantComposite = 44,
+    SpvOpConstantSampler = 45,
+    SpvOpConstantNull = 46,
+    SpvOpSpecConstantTrue = 48,
+    SpvOpSpecConstantFalse = 49,
+    SpvOpSpecConstant = 50,
+    SpvOpSpecConstantComposite = 51,
+    SpvOpSpecConstantOp = 52,
+    SpvOpFunction = 54,
+    SpvOpFunctionParameter = 55,
+    SpvOpFunctionEnd = 56,
+    SpvOpFunctionCall = 57,
+    SpvOpVariable = 59,
+    SpvOpImageTexelPointer = 60,
+    SpvOpLoad = 61,
+    SpvOpStore = 62,
+    SpvOpCopyMemory = 63,
+    SpvOpCopyMemorySized = 64,
+    SpvOpAccessChain = 65,
+    SpvOpInBoundsAccessChain = 66,
+    SpvOpPtrAccessChain = 67,
+    SpvOpArrayLength = 68,
+    SpvOpGenericPtrMemSemantics = 69,
+    SpvOpInBoundsPtrAccessChain = 70,
+    SpvOpDecorate = 71,
+    SpvOpMemberDecorate = 72,
+    SpvOpDecorationGroup = 73,
+    SpvOpGroupDecorate = 74,
+    SpvOpGroupMemberDecorate = 75,
+    SpvOpVectorExtractDynamic = 77,
+    SpvOpVectorInsertDynamic = 78,
+    SpvOpVectorShuffle = 79,
+    SpvOpCompositeConstruct = 80,
+    SpvOpCompositeExtract = 81,
+    SpvOpCompositeInsert = 82,
+    SpvOpCopyObject = 83,
+    SpvOpTranspose = 84,
+    SpvOpSampledImage = 86,
+    SpvOpImageSampleImplicitLod = 87,
+    SpvOpImageSampleExplicitLod = 88,
+    SpvOpImageSampleDrefImplicitLod = 89,
+    SpvOpImageSampleDrefExplicitLod = 90,
+    SpvOpImageSampleProjImplicitLod = 91,
+    SpvOpImageSampleProjExplicitLod = 92,
+    SpvOpImageSampleProjDrefImplicitLod = 93,
+    SpvOpImageSampleProjDrefExplicitLod = 94,
+    SpvOpImageFetch = 95,
+    SpvOpImageGather = 96,
+    SpvOpImageDrefGather = 97,
+    SpvOpImageRead = 98,
+    SpvOpImageWrite = 99,
+    SpvOpImage = 100,
+    SpvOpImageQueryFormat = 101,
+    SpvOpImageQueryOrder = 102,
+    SpvOpImageQuerySizeLod = 103,
+    SpvOpImageQuerySize = 104,
+    SpvOpImageQueryLod = 105,
+    SpvOpImageQueryLevels = 106,
+    SpvOpImageQuerySamples = 107,
+    SpvOpConvertFToU = 109,
+    SpvOpConvertFToS = 110,
+    SpvOpConvertSToF = 111,
+    SpvOpConvertUToF = 112,
+    SpvOpUConvert = 113,
+    SpvOpSConvert = 114,
+    SpvOpFConvert = 115,
+    SpvOpQuantizeToF16 = 116,
+    SpvOpConvertPtrToU = 117,
+    SpvOpSatConvertSToU = 118,
+    SpvOpSatConvertUToS = 119,
+    SpvOpConvertUToPtr = 120,
+    SpvOpPtrCastToGeneric = 121,
+    SpvOpGenericCastToPtr = 122,
+    SpvOpGenericCastToPtrExplicit = 123,
+    SpvOpBitcast = 124,
+    SpvOpSNegate = 126,
+    SpvOpFNegate = 127,
+    SpvOpIAdd = 128,
+    SpvOpFAdd = 129,
+    SpvOpISub = 130,
+    SpvOpFSub = 131,
+    SpvOpIMul = 132,
+    SpvOpFMul = 133,
+    SpvOpUDiv = 134,
+    SpvOpSDiv = 135,
+    SpvOpFDiv = 136,
+    SpvOpUMod = 137,
+    SpvOpSRem = 138,
+    SpvOpSMod = 139,
+    SpvOpFRem = 140,
+    SpvOpFMod = 141,
+    SpvOpVectorTimesScalar = 142,
+    SpvOpMatrixTimesScalar = 143,
+    SpvOpVectorTimesMatrix = 144,
+    SpvOpMatrixTimesVector = 145,
+    SpvOpMatrixTimesMatrix = 146,
+    SpvOpOuterProduct = 147,
+    SpvOpDot = 148,
+    SpvOpIAddCarry = 149,
+    SpvOpISubBorrow = 150,
+    SpvOpUMulExtended = 151,
+    SpvOpSMulExtended = 152,
+    SpvOpAny = 154,
+    SpvOpAll = 155,
+    SpvOpIsNan = 156,
+    SpvOpIsInf = 157,
+    SpvOpIsFinite = 158,
+    SpvOpIsNormal = 159,
+    SpvOpSignBitSet = 160,
+    SpvOpLessOrGreater = 161,
+    SpvOpOrdered = 162,
+    SpvOpUnordered = 163,
+    SpvOpLogicalEqual = 164,
+    SpvOpLogicalNotEqual = 165,
+    SpvOpLogicalOr = 166,
+    SpvOpLogicalAnd = 167,
+    SpvOpLogicalNot = 168,
+    SpvOpSelect = 169,
+    SpvOpIEqual = 170,
+    SpvOpINotEqual = 171,
+    SpvOpUGreaterThan = 172,
+    SpvOpSGreaterThan = 173,
+    SpvOpUGreaterThanEqual = 174,
+    SpvOpSGreaterThanEqual = 175,
+    SpvOpULessThan = 176,
+    SpvOpSLessThan = 177,
+    SpvOpULessThanEqual = 178,
+    SpvOpSLessThanEqual = 179,
+    SpvOpFOrdEqual = 180,
+    SpvOpFUnordEqual = 181,
+    SpvOpFOrdNotEqual = 182,
+    SpvOpFUnordNotEqual = 183,
+    SpvOpFOrdLessThan = 184,
+    SpvOpFUnordLessThan = 185,
+    SpvOpFOrdGreaterThan = 186,
+    SpvOpFUnordGreaterThan = 187,
+    SpvOpFOrdLessThanEqual = 188,
+    SpvOpFUnordLessThanEqual = 189,
+    SpvOpFOrdGreaterThanEqual = 190,
+    SpvOpFUnordGreaterThanEqual = 191,
+    SpvOpShiftRightLogical = 194,
+    SpvOpShiftRightArithmetic = 195,
+    SpvOpShiftLeftLogical = 196,
+    SpvOpBitwiseOr = 197,
+    SpvOpBitwiseXor = 198,
+    SpvOpBitwiseAnd = 199,
+    SpvOpNot = 200,
+    SpvOpBitFieldInsert = 201,
+    SpvOpBitFieldSExtract = 202,
+    SpvOpBitFieldUExtract = 203,
+    SpvOpBitReverse = 204,
+    SpvOpBitCount = 205,
+    SpvOpDPdx = 207,
+    SpvOpDPdy = 208,
+    SpvOpFwidth = 209,
+    SpvOpDPdxFine = 210,
+    SpvOpDPdyFine = 211,
+    SpvOpFwidthFine = 212,
+    SpvOpDPdxCoarse = 213,
+    SpvOpDPdyCoarse = 214,
+    SpvOpFwidthCoarse = 215,
+    SpvOpEmitVertex = 218,
+    SpvOpEndPrimitive = 219,
+    SpvOpEmitStreamVertex = 220,
+    SpvOpEndStreamPrimitive = 221,
+    SpvOpControlBarrier = 224,
+    SpvOpMemoryBarrier = 225,
+    SpvOpAtomicLoad = 227,
+    SpvOpAtomicStore = 228,
+    SpvOpAtomicExchange = 229,
+    SpvOpAtomicCompareExchange = 230,
+    SpvOpAtomicCompareExchangeWeak = 231,
+    SpvOpAtomicIIncrement = 232,
+    SpvOpAtomicIDecrement = 233,
+    SpvOpAtomicIAdd = 234,
+    SpvOpAtomicISub = 235,
+    SpvOpAtomicSMin = 236,
+    SpvOpAtomicUMin = 237,
+    SpvOpAtomicSMax = 238,
+    SpvOpAtomicUMax = 239,
+    SpvOpAtomicAnd = 240,
+    SpvOpAtomicOr = 241,
+    SpvOpAtomicXor = 242,
+    SpvOpPhi = 245,
+    SpvOpLoopMerge = 246,
+    SpvOpSelectionMerge = 247,
+    SpvOpLabel = 248,
+    SpvOpBranch = 249,
+    SpvOpBranchConditional = 250,
+    SpvOpSwitch = 251,
+    SpvOpKill = 252,
+    SpvOpReturn = 253,
+    SpvOpReturnValue = 254,
+    SpvOpUnreachable = 255,
+    SpvOpLifetimeStart = 256,
+    SpvOpLifetimeStop = 257,
+    SpvOpGroupAsyncCopy = 259,
+    SpvOpGroupWaitEvents = 260,
+    SpvOpGroupAll = 261,
+    SpvOpGroupAny = 262,
+    SpvOpGroupBroadcast = 263,
+    SpvOpGroupIAdd = 264,
+    SpvOpGroupFAdd = 265,
+    SpvOpGroupFMin = 266,
+    SpvOpGroupUMin = 267,
+    SpvOpGroupSMin = 268,
+    SpvOpGroupFMax = 269,
+    SpvOpGroupUMax = 270,
+    SpvOpGroupSMax = 271,
+    SpvOpReadPipe = 274,
+    SpvOpWritePipe = 275,
+    SpvOpReservedReadPipe = 276,
+    SpvOpReservedWritePipe = 277,
+    SpvOpReserveReadPipePackets = 278,
+    SpvOpReserveWritePipePackets = 279,
+    SpvOpCommitReadPipe = 280,
+    SpvOpCommitWritePipe = 281,
+    SpvOpIsValidReserveId = 282,
+    SpvOpGetNumPipePackets = 283,
+    SpvOpGetMaxPipePackets = 284,
+    SpvOpGroupReserveReadPipePackets = 285,
+    SpvOpGroupReserveWritePipePackets = 286,
+    SpvOpGroupCommitReadPipe = 287,
+    SpvOpGroupCommitWritePipe = 288,
+    SpvOpEnqueueMarker = 291,
+    SpvOpEnqueueKernel = 292,
+    SpvOpGetKernelNDrangeSubGroupCount = 293,
+    SpvOpGetKernelNDrangeMaxSubGroupSize = 294,
+    SpvOpGetKernelWorkGroupSize = 295,
+    SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296,
+    SpvOpRetainEvent = 297,
+    SpvOpReleaseEvent = 298,
+    SpvOpCreateUserEvent = 299,
+    SpvOpIsValidEvent = 300,
+    SpvOpSetUserEventStatus = 301,
+    SpvOpCaptureEventProfilingInfo = 302,
+    SpvOpGetDefaultQueue = 303,
+    SpvOpBuildNDRange = 304,
+    SpvOpImageSparseSampleImplicitLod = 305,
+    SpvOpImageSparseSampleExplicitLod = 306,
+    SpvOpImageSparseSampleDrefImplicitLod = 307,
+    SpvOpImageSparseSampleDrefExplicitLod = 308,
+    SpvOpImageSparseSampleProjImplicitLod = 309,
+    SpvOpImageSparseSampleProjExplicitLod = 310,
+    SpvOpImageSparseSampleProjDrefImplicitLod = 311,
+    SpvOpImageSparseSampleProjDrefExplicitLod = 312,
+    SpvOpImageSparseFetch = 313,
+    SpvOpImageSparseGather = 314,
+    SpvOpImageSparseDrefGather = 315,
+    SpvOpImageSparseTexelsResident = 316,
+    SpvOpNoLine = 317,
+    SpvOpAtomicFlagTestAndSet = 318,
+    SpvOpAtomicFlagClear = 319,
+    SpvOpImageSparseRead = 320,
+} SpvOp;
+
+#endif  // #ifndef spirv_H
+
diff --git a/utils/testparse.c b/utils/testparse.c
index 2de9880e..bf7ce96e 100644
--- a/utils/testparse.c
+++ b/utils/testparse.c
@@ -9,7 +9,14 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "mojoshader.h"
+#include <string.h>
+#include <assert.h>
+#include "../mojoshader.h"
+#define __MOJOSHADER_INTERNAL__ 1
+#include "../mojoshader_internal.h"
+#ifdef MOJOSHADER_HAS_SPIRV_TOOLS
+#include "spirv-tools/libspirv.h"
+#endif
 
 #ifdef _MSC_VER
 #define snprintf _snprintf
@@ -257,6 +264,7 @@ static void print_attrs(const char *category, const int count,
         for (i = 0; i < count; i++)
         {
             static const char *usagenames[] = {
+                "<unknown>",
                 "position", "blendweight", "blendindices", "normal",
                 "psize", "texcoord", "tangent", "binormal", "tessfactor",
                 "positiont", "color", "fog", "depth", "sample"
@@ -266,7 +274,7 @@ static void print_attrs(const char *category, const int count,
             if (a->index != 0)
                 snprintf(numstr, sizeof (numstr), "%d", a->index);
             INDENT();
-            printf("    * %s%s", usagenames[(int) a->usage], numstr);
+            printf("    * %s%s", usagenames[1 + (int) a->usage], numstr);
             if (a->name != NULL)
                 printf(" (\"%s\")", a->name);
             printf("\n");
@@ -287,8 +295,8 @@ static void print_shader(const char *fname, const MOJOSHADER_parseData *pd,
             const MOJOSHADER_error *err = &pd->errors[i];
             INDENT();
             printf("%s:%d: ERROR: %s\n",
-                    err->filename ? err->filename : fname,
-                    err->error_position, err->error);
+                   err->filename ? err->filename : fname,
+                   err->error_position, err->error);
         } // for
     } // if
     else
@@ -311,7 +319,7 @@ static void print_shader(const char *fname, const MOJOSHADER_parseData *pd,
             {
                 static const char *typenames[] = { "float", "int", "bool" };
                 const MOJOSHADER_constant *c = &pd->constants[i];
-                INDENT(); 
+                INDENT();
                 printf("    * %d: %s (", c->index, typenames[(int) c->type]);
                 if (c->type == MOJOSHADER_UNIFORM_FLOAT)
                 {
@@ -358,7 +366,7 @@ static void print_shader(const char *fname, const MOJOSHADER_parseData *pd,
 
                 INDENT();
                 printf("    * %d: %s%s%s%s", u->index, constant, arrayof,
-                        arrayrange, typenames[(int) u->type]);
+                       arrayrange, typenames[(int) u->type]);
                 if (u->name != NULL)
                     printf(" (\"%s\")", u->name);
                 printf("\n");
@@ -393,15 +401,64 @@ static void print_shader(const char *fname, const MOJOSHADER_parseData *pd,
 
         if (pd->output != NULL)
         {
+            const char *output;
+            int output_len;
             int i;
+
+            if (strcmp(pd->profile, "spirv") == 0)
+            {
+#if SUPPORT_PROFILE_SPIRV && defined(MOJOSHADER_HAS_SPIRV_TOOLS)
+                int binary_len = pd->output_len - sizeof(SpirvPatchTable);
+
+                uint32_t *words = (uint32_t *) pd->output;
+                size_t word_count = binary_len / 4;
+
+                spv_text text;
+                spv_diagnostic diagnostic;
+                spv_context ctx = spvContextCreate(SPV_ENV_UNIVERSAL_1_0);
+                int options = /*SPV_BINARY_TO_TEXT_OPTION_COLOR |*/ SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES;
+                spv_result_t disResult = spvBinaryToText(ctx, words, word_count, options, &text, &diagnostic);
+                if (disResult == SPV_SUCCESS)
+                {
+                    output = text->str;
+                    output_len = text->length;
+                } // if
+                else
+                {
+                    fprintf(stderr, "\nERROR DIAGNOSTIC: %s\n\n", diagnostic->error);
+                } // else
+
+                spv_result_t validateResult = spvValidateBinary(ctx, words, word_count, &diagnostic);
+                if (validateResult != SPV_SUCCESS)
+                {
+                    fprintf(stderr, "\nVALIDATION FAILURE: %s\n\n", diagnostic->error);
+                } // if
+
+                if (disResult != SPV_SUCCESS || validateResult != SPV_SUCCESS)
+                {
+                    exit(EXIT_FAILURE);
+                } // if
+
+                // FIXME: we're currently just leaking this disassembly...
+#else
+                output = pd->output;
+                output_len = pd->output_len;
+#endif
+            } // if
+            else
+            {
+                output = pd->output;
+                output_len = pd->output_len;
+            } // else
+
             INDENT();
             printf("OUTPUT:\n");
             indent++;
             INDENT();
-            for (i = 0; i < pd->output_len; i++)
+            for (i = 0; i < output_len; i++)
             {
-                putchar((int) pd->output[i]);
-                if (pd->output[i] == '\n')
+                putchar((int) output[i]);
+                if (output[i] == '\n')
                     INDENT();
             } // for
             printf("\n");
@@ -674,6 +731,7 @@ static void print_effect(const char *fname, const MOJOSHADER_effect *effect,
 static int do_parse(const char *fname, const unsigned char *buf,
                     const int len, const char *prof)
 {
+    int i;
     int retval = 0;
 
     // magic for an effects file (!!! FIXME: I _think_).
@@ -686,7 +744,26 @@ static int do_parse(const char *fname, const unsigned char *buf,
         const MOJOSHADER_effect *effect;
         effect = MOJOSHADER_parseEffect(prof, buf, len, NULL, 0,
                                         NULL, 0, Malloc, Free, 0);
-        retval = (effect->error_count == 0);
+        int error_count = effect->error_count;
+        for (i = 0; i < effect->object_count; i++)
+        {
+            MOJOSHADER_effectObject *object = &effect->objects[i];
+            switch (object->type)
+            {
+                case MOJOSHADER_SYMTYPE_VERTEXSHADER:
+                case MOJOSHADER_SYMTYPE_PIXELSHADER:
+                    if (!object->shader.is_preshader)
+                    {
+                        const MOJOSHADER_parseData *shader = object->shader.shader;
+                        if (shader)
+                            error_count += shader->error_count;
+                    } // if
+                    break;
+                default:
+                    break;
+            }
+        }
+        retval = (error_count == 0);
         printf("EFFECT: %s\n", fname);
         print_effect(fname, effect, 1);
         MOJOSHADER_freeEffect(effect);