Metal uniform buffer overhaul.
authorCaleb Cornett <caleb.cornett@outlook.com>
Tue, 07 Jul 2020 19:25:47 -0400
changeset 1279 b1f51cfa8933
parent 1278 be0f548f321f
child 1280 d2a0d76469f9
Metal uniform buffer overhaul. 1. Metal now uses a single, ~1MB uniform buffer per frame that is shared among all shaders. From my testing this should be more than enough room for the FNA catalog (aside from possibly Little Racers Street with its memory leak). This is a major boon for Metal memory management since it only requires one buffer instead of 3,000 (in LRS's case)! And it simplifies the internal workings of mojoshader_metal dramatically. 2. Metal now has a MOJOSHADER_mtlMakeContextCurrent API to match GL and Vulkan. Other parts of the context API have been rewritten as well to match the behavior you'd expect. 3. The MOJOSHADER_mtlGetUniformBuffers function has been replaced with MOJOSHADER_mtlGetUniformData. Now that we only use a single uniform buffer, there's no need to return one for each of the shaders. 4. The MOJOSHADER_mtlGetFunctionHandle function no longer allocates memory. Instead, all MTLFunction handles are cached in the MOJOSHADER_mtlShader structs at library creation time. This removes the burden of memory management from the user. 5. A variety of formatting and documentation updates.
mojoshader.h
mojoshader_metal.c
--- a/mojoshader.h	Tue Jul 07 17:44:32 2020 -0400
+++ b/mojoshader.h	Tue Jul 07 19:25:47 2020 -0400
@@ -3323,6 +3323,7 @@
 
 /* Metal interface... */
 
+typedef struct MOJOSHADER_mtlContext MOJOSHADER_mtlContext;
 typedef struct MOJOSHADER_mtlShader MOJOSHADER_mtlShader;
 
 /*
@@ -3337,10 +3338,6 @@
  *
  * (device) refers to the active MTLDevice, cast from id<MTLDevice> to void*.
  *
- * (framesInFlight) is the maximum number of frames that can be processed
- *  simultaneously. This determines how many uniform buffers will be
- *  allocated for each shader.
- *
  * As MojoShader requires some memory to be allocated, you may provide a
  *  custom allocator to this function, which will be used to allocate/free
  *  memory. They function just like malloc() and free(). We do not use
@@ -3349,32 +3346,23 @@
  *  (malloc_d) parameter. This pointer is passed as-is to your (m) and (f)
  *  functions.
  *
- * The context created by this function will automatically become the current
- *  context. No further action is needed by the caller.
- *
- * Returns 0 on success or -1 on failure.
+ * Returns a new context on success, NULL on error. If you get a new context,
+ *  you need to make it current before using it with
+ *  MOJOSHADER_mtlMakeContextCurrent().
  */
-DECLSPEC int MOJOSHADER_mtlCreateContext(void *mtlDevice, int framesInFlight,
-                                         MOJOSHADER_malloc m, MOJOSHADER_free f,
-                                         void *malloc_d);
+DECLSPEC MOJOSHADER_mtlContext *MOJOSHADER_mtlCreateContext(void *mtlDevice,
+                                        MOJOSHADER_malloc m, MOJOSHADER_free f,
+                                        void *malloc_d);
 
 /*
- * Get any error state we might have picked up, such as failed shader
- *  compilation.
- *
- * Returns a human-readable string. This string is for debugging purposes, and
- *  not guaranteed to be localized, coherent, or user-friendly in any way.
- *  It's for programmers!
- *
- * The latest error may remain between calls. New errors replace any existing
- *  error. Don't check this string for a sign that an error happened, check
- *  return codes instead and use this for explanation when debugging.
- *
- * Do not free the returned string: it's a pointer to a static internal
- *  buffer. Do not keep the pointer around, either, as it's likely to become
- *  invalid as soon as you call into MojoShader again.
+ * You must call this before using the context that you got from
+ *  MOJOSHADER_mtlCreateContext(), and must use it when you switch to a new
+ *  context.
+ *
+ * It is legal to call this with a NULL pointer to make no context current,
+ *  but you need a valid context to be current to use most of MojoShader.
  */
-DECLSPEC const char *MOJOSHADER_mtlGetError(void);
+DECLSPEC void MOJOSHADER_mtlMakeContextCurrent(MOJOSHADER_mtlContext *ctx);
 
 /*
  * Transform a buffer of Direct3D shader bytecode into a struct containing
@@ -3395,16 +3383,16 @@
  *
  * Returns NULL on error, or a shader handle on success.
  *
- * This call requires a valid MOJOSHADER_mtlContext to have been created,
- *  or it will crash your program. See MOJOSHADER_mtlCreateContext().
+ * This call requires a valid MOJOSHADER_mtlContext to have been made current,
+ *  or it will crash your program. See MOJOSHADER_mtlMakeContextCurrent().
  */
 DECLSPEC MOJOSHADER_mtlShader *MOJOSHADER_mtlCompileShader(const char *mainfn,
-                                                           const unsigned char *tokenbuf,
-                                                           const unsigned int bufsize,
-                                                           const MOJOSHADER_swizzle *swiz,
-                                                           const unsigned int swizcount,
-                                                           const MOJOSHADER_samplerMap *smap,
-                                                           const unsigned int smapcount);
+                                                const unsigned char *tokenbuf,
+                                                const unsigned int bufsize,
+                                                const MOJOSHADER_swizzle *swiz,
+                                                const unsigned int swizcount,
+                                                const MOJOSHADER_samplerMap *smap,
+                                                const unsigned int smapcount);
 
 /*
  * Increments a shader's internal refcount. To decrement the refcount, call
@@ -3430,8 +3418,8 @@
  * This function is only for convenience, specifically for compatibility with
  *  the effects API.
  *
- * This call requires a valid MOJOSHADER_mtlContext to have been created,
- *  or it will crash your program. See MOJOSHADER_mtlCreateContext().
+ * This call requires a valid MOJOSHADER_mtlContext to have been made current,
+ *  or it will crash your program. See MOJOSHADER_mtlMakeContextCurrent().
  */
 DECLSPEC void MOJOSHADER_mtlBindShaders(MOJOSHADER_mtlShader *vshader,
                                         MOJOSHADER_mtlShader *pshader);
@@ -3443,33 +3431,20 @@
  *  the effects API.
  *
  * This call requires a valid MOJOSHADER_mtlContext to have been created,
- *  or it will crash your program. See MOJOSHADER_mtlCreateContext().
+ *  or it will crash your program. See MOJOSHADER_mtlMakeContextCurrent().
  */
 DECLSPEC void MOJOSHADER_mtlGetBoundShaders(MOJOSHADER_mtlShader **vshader,
                                             MOJOSHADER_mtlShader **pshader);
 
 /*
- * This queries for the uniform buffer and byte offset for each of the
- *  currently bound shaders.
- *
- * This function is only for convenience, specifically for compatibility with
- *  the effects API.
- *
- * This call requires a valid MOJOSHADER_mtlContext to have been created,
- *  or it will crash your program. See MOJOSHADER_mtlCreateContext().
- */
-DECLSPEC void MOJOSHADER_mtlGetUniformBuffers(void **vbuf, int *voff,
-                                              void **pbuf, int *poff);
-
-/*
  * Fills register pointers with pointers that are directly used to push uniform
  *  data to the Metal shader context.
  *
  * This function is really just for the effects API, you should NOT be using
  *  this unless you know every single line of MojoShader from memory.
  *
- * This call requires a valid MOJOSHADER_mtlContext to have been created,
- *  or it will crash your program. See MOJOSHADER_mtlCreateContext().
+ * This call requires a valid MOJOSHADER_mtlContext to have been made current,
+ *  or it will crash your program. See MOJOSHADER_mtlMakeContextCurrent().
  */
 DECLSPEC void MOJOSHADER_mtlMapUniformBufferMemory(float **vsf, int **vsi, unsigned char **vsb,
                                                    float **psf, int **psi, unsigned char **psb);
@@ -3478,12 +3453,36 @@
  * Tells the context that you are done with the memory mapped by
  *  MOJOSHADER_mtlMapUniformBufferMemory().
  *
- * This call requires a valid MOJOSHADER_mtlContext to have been created,
- *  or it will crash your program. See MOJOSHADER_mtlCreateContext().
+ * This call requires a valid MOJOSHADER_mtlContext to have been made current,
+ *  or it will crash your program. See MOJOSHADER_mtlMakeContextCurrent().
  */
 DECLSPEC void MOJOSHADER_mtlUnmapUniformBufferMemory();
 
 /*
+ * This queries for the uniform buffer and byte offsets for each of the
+ *  currently bound shaders.
+ *
+ * This function is only for convenience, specifically for compatibility with
+ *  the effects API.
+ *
+ * This call requires a valid MOJOSHADER_mtlContext to have been made current,
+ *  or it will crash your program. See MOJOSHADER_mtlMakeContextCurrent().
+ */
+DECLSPEC void MOJOSHADER_mtlGetUniformData(void **buf, int *voff, int *poff);
+
+/*
+ * Get the MTLFunction* from the given MOJOSHADER_mtlShader.
+ */
+DECLSPEC void *MOJOSHADER_mtlGetFunctionHandle(MOJOSHADER_mtlShader *shader);
+
+/*
+ * Resets buffer offsets to prepare for the next frame.
+ *
+ * Always call this after submitting the final command buffer for a frame!
+ */
+DECLSPEC void MOJOSHADER_mtlEndFrame(void);
+
+/*
  * Return the location of a vertex attribute for the given shader.
  *
  * (usage) and (index) map to Direct3D vertex declaration values: COLOR1 would
@@ -3496,41 +3495,66 @@
                                           MOJOSHADER_usage usage, int index);
 
 /*
+ * Get any error state we might have picked up, such as failed shader
+ *  compilation.
+ *
+ * Returns a human-readable string. This string is for debugging purposes, and
+ *  not guaranteed to be localized, coherent, or user-friendly in any way.
+ *  It's for programmers!
+ *
+ * The latest error may remain between calls. New errors replace any existing
+ *  error. Don't check this string for a sign that an error happened, check
+ *  return codes instead and use this for explanation when debugging.
+ *
+ * This call does NOT require a valid MOJOSHADER_mtlContext to have been made
+ *  current. The error buffer is shared between contexts, so you can get
+ *  error results from a failed MOJOSHADER_mtlCreateContext().
+ *
+ * Do not free the returned string: it's a pointer to a static internal
+ *  buffer. Do not keep the pointer around, either, as it's likely to become
+ *  invalid as soon as you call into MojoShader again.
+ */
+DECLSPEC const char *MOJOSHADER_mtlGetError(void);
+
+/*
+ * Release the given MTLLibrary and all MTLFunctions it contains.
+ *
+ * This does NOT free MOJOSHADER_mtlShaders! Call MOJOSHADER_mtlDeleteShader()
+ *  on all the shaders of the library before you call this.
+ *
+ * This call requires a valid MOJOSHADER_mtlContext to have been made current,
+ *  or it will crash your program. See MOJOSHADER_mtlMakeContextCurrent().
+ */
+DECLSPEC void MOJOSHADER_mtlDeleteLibrary(void *library);
+
+/*
  * Free the resources of a compiled shader. This will delete the MojoShader
  *  shader struct and free memory.
  *
  * This does NOT release the actual shader! The shader data belongs to an
  *  MTLLibrary that must be deleted with MOJOSHADER_mtlDeleteLibrary().
  *
- * This call requires a valid MOJOSHADER_mtlContext to have been created,
- *  or it will crash your program. See MOJOSHADER_mtlCreateContext().
+ * This call requires a valid MOJOSHADER_mtlContext to have been made current,
+ *  or it will crash your program. See MOJOSHADER_mtlMakeContextCurrent().
  */
 DECLSPEC void MOJOSHADER_mtlDeleteShader(MOJOSHADER_mtlShader *shader);
 
 /*
- * Get the MTLFunction* from the given MOJOSHADER_mtlShader.
- *
- * This function calls [retain] on the MTLFunction* before returning!
- *  Please call [release] on the result when you no longer need it.
- */
-DECLSPEC void *MOJOSHADER_mtlGetFunctionHandle(MOJOSHADER_mtlShader *shader);
-
-/*
- * Swaps uniform buffers and resets offsets to prepare for the next frame.
- *
- * Always call this after submitting the final command buffer for a frame!
+ * Deinitialize MojoShader's Metal shader management.
+ *
+ * This should be the last MOJOSHADER_mtl* function you call until you've
+ *  prepared a context again.
+ *
+ * This will clean up resources previously allocated, and may call into Metal.
+ *
+ * This will not clean up shaders and libraries you created! Please call
+ *  MOJOSHADER_mtlDeleteShader() and MOJOSHADER_mtlDeleteLibrary() to clean
+ *  those up before calling this function!
+ *
+ * This function destroys the MOJOSHADER_mtlContext you pass it. If it's the
+ *  current context, then no context will be current upon return.
  */
-DECLSPEC void MOJOSHADER_mtlEndFrame(void);
-
-/*
- * Deinitialize MojoShader's Metal shader management.
- *
- * This will clean up resources previously allocated for the active context.
- *
- * This will NOT clean up shaders you created! Please destroy all shaders
- *  before calling this function.
- */
-DECLSPEC void MOJOSHADER_mtlDestroyContext(void);
+DECLSPEC void MOJOSHADER_mtlDestroyContext(MOJOSHADER_mtlContext *_ctx);
 
 /* Vulkan interface */
 
--- a/mojoshader_metal.c	Tue Jul 07 17:44:32 2020 -0400
+++ b/mojoshader_metal.c	Tue Jul 07 19:25:47 2020 -0400
@@ -7,28 +7,32 @@
  *  This file written by Ryan C. Gordon.
  */
 
+#define __MOJOSHADER_INTERNAL__ 1
+#include "mojoshader_internal.h"
+
 #if (defined(__APPLE__) && defined(__MACH__))
 #define PLATFORM_APPLE 1
+#endif /* (defined(__APPLE__) && defined(__MACH__)) */
+
+typedef struct MOJOSHADER_mtlShader
+{
+    const MOJOSHADER_parseData *parseData;
+    uint32 refcount;
+    void *handle; // MTLFunction*
+} MOJOSHADER_mtlShader;
+
+// profile-specific implementations...
+
+#if SUPPORT_PROFILE_METAL && PLATFORM_APPLE
+#ifdef MOJOSHADER_EFFECT_SUPPORT
+
 #include "TargetConditionals.h"
 #include <objc/message.h>
 #define msg     ((void* (*)(void*, void*))objc_msgSend)
 #define msg_s   ((void* (*)(void*, void*, const char*))objc_msgSend)
 #define msg_p   ((void* (*)(void*, void*, void*))objc_msgSend)
-#define msg_ip  ((void* (*)(void*, void*, int, void*))objc_msgSend)
+#define msg_uu  ((void* (*)(void*, void*, uint64, uint64))objc_msgSend)
 #define msg_ppp ((void* (*)(void*, void*, void*, void*, void*))objc_msgSend)
-#endif /* (defined(__APPLE__) && defined(__MACH__)) */
-
-#define __MOJOSHADER_INTERNAL__ 1
-#include "mojoshader_internal.h"
-
-typedef struct MOJOSHADER_mtlUniformBuffer MOJOSHADER_mtlUniformBuffer;
-typedef struct MOJOSHADER_mtlShader
-{
-    const MOJOSHADER_parseData *parseData;
-    MOJOSHADER_mtlUniformBuffer *ubo;
-    uint32 refcount;
-    void *library; // MTLLibrary*
-} MOJOSHADER_mtlShader;
 
 // Error state...
 static char error_buffer[1024] = { '\0' };
@@ -43,23 +47,6 @@
     set_error("out of memory");
 } // out_of_memory
 
-// profile-specific implementations...
-
-#if SUPPORT_PROFILE_METAL && PLATFORM_APPLE
-#ifdef MOJOSHADER_EFFECT_SUPPORT
-
-/* Structs */
-
-typedef struct MOJOSHADER_mtlUniformBuffer
-{
-    int bufferSize;
-    void **internalBuffers; // MTLBuffer*
-    int internalBufferSize;
-    int internalOffset;
-    int currentFrame;
-    int inUse;
-} MOJOSHADER_mtlUniformBuffer;
-
 // Max entries for each register file type...
 #define MAX_REG_FILE_F 8192
 #define MAX_REG_FILE_I 2047
@@ -85,17 +72,13 @@
     // Pointer to the active MTLDevice.
     void* device;
 
-    // The maximum number of frames in flight.
-    int framesInFlight;
-
-    // Array of UBOs that are being used in the current frame.
-    MOJOSHADER_mtlUniformBuffer **buffersInUse;
+    // The uniform MTLBuffer shared between all shaders in the context.
+    void *ubo;
 
-    // The current capacity of the uniform buffer array.
-    int bufferArrayCapacity;
-
-    // The actual number of UBOs used in the current frame.
-    int numBuffersInUse;
+    // The current offsets into the UBO, per shader.
+    int vertexUniformOffset;
+    int pixelUniformOffset;
+    int totalUniformOffset;
 
     // The currently bound shaders.
     MOJOSHADER_mtlShader *vertexShader;
@@ -104,16 +87,15 @@
     // Objective-C Selectors
     void* classNSString;
     void* selAlloc;
+    void* selContents;
     void* selInitWithUTF8String;
-    void* selUTF8String;
     void* selLength;
-    void* selContents;
+    void* selLocalizedDescription;
     void* selNewBufferWithLength;
-    void* selRelease;
+    void* selNewFunctionWithName;
     void* selNewLibraryWithSource;
-    void* selLocalizedDescription;
-    void* selNewFunctionWithName;
-    void* selRetain;
+    void* selRelease;
+    void* selUTF8String;
 } MOJOSHADER_mtlContext;
 
 static MOJOSHADER_mtlContext *ctx = NULL;
@@ -125,206 +107,61 @@
     #if TARGET_OS_IOS || TARGET_OS_TV || TARGET_OS_SIMULATOR
     int align = 16;
     #else
+    // !!! FIXME: Will Apple Silicon Macs have a different minimum alignment?
     int align = 256;
     #endif
 
     return align * ((n + align - 1) / align);
 } // next_highest_alignment
 
-static void* create_ubo_backing_buffer(MOJOSHADER_mtlUniformBuffer *ubo,
-                                                               int frame)
-{
-    void *oldBuffer = ubo->internalBuffers[frame];
-    void *newBuffer = msg_ip(
-        ctx->device,
-        ctx->selNewBufferWithLength,
-        ubo->internalBufferSize,
-        NULL
-    );
-    if (oldBuffer != NULL)
-    {
-        // Copy over data from old buffer
-        memcpy(
-            msg(newBuffer, ctx->selContents),
-            msg(oldBuffer, ctx->selContents),
-            (int) msg(oldBuffer, ctx->selLength)
-        );
-
-        // Free the old buffer
-        msg(oldBuffer, ctx->selRelease);
-    } //if
-
-    return newBuffer;
-} // create_ubo_backing_buffer
-
-static void predraw_ubo(MOJOSHADER_mtlUniformBuffer *ubo)
-{
-    if (!ubo->inUse)
-    {
-        ubo->inUse = 1;
-        ctx->buffersInUse[ctx->numBuffersInUse++] = ubo;
-
-        // Double the array size if we run out of room
-        if (ctx->numBuffersInUse >= ctx->bufferArrayCapacity)
-        {
-            int oldlen = ctx->bufferArrayCapacity;
-            ctx->bufferArrayCapacity *= 2;
-            MOJOSHADER_mtlUniformBuffer **tmp;
-            tmp = (MOJOSHADER_mtlUniformBuffer**) ctx->malloc_fn(
-                ctx->bufferArrayCapacity * sizeof(MOJOSHADER_mtlUniformBuffer *),
-                ctx->malloc_data
-            );
-            memcpy(tmp, ctx->buffersInUse, oldlen * sizeof(MOJOSHADER_mtlUniformBuffer *));
-            ctx->free_fn(ctx->buffersInUse, ctx->malloc_data);
-            ctx->buffersInUse = tmp;
-        }
-        return;
-    } // if
-
-    ubo->internalOffset += ubo->bufferSize;
-
-    int buflen = (int) msg(
-        ubo->internalBuffers[ubo->currentFrame],
-        ctx->selLength
-    );
-    if (ubo->internalOffset >= buflen)
-    {
-        // Double capacity when we're out of room
-        if (ubo->internalOffset >= ubo->internalBufferSize)
-            ubo->internalBufferSize *= 2;
-
-        ubo->internalBuffers[ubo->currentFrame] =
-            create_ubo_backing_buffer(ubo, ubo->currentFrame);
-    } //if
-} // predraw_ubo
-
-static MOJOSHADER_mtlUniformBuffer* create_ubo(MOJOSHADER_mtlShader *shader,
-                                               MOJOSHADER_malloc m, void* d)
-{
-    int uniformCount = shader->parseData->uniform_count;
-    if (uniformCount == 0)
-        return NULL;
-
-    // Calculate how big we need to make the buffer
-    int buflen = 0;
-    for (int i = 0; i < uniformCount; i += 1)
-    {
-        int arrayCount = shader->parseData->uniforms[i].array_count;
-        int uniformSize = 16;
-        if (shader->parseData->uniforms[i].type == MOJOSHADER_UNIFORM_BOOL)
-            uniformSize = 1;
-        buflen += (arrayCount ? arrayCount : 1) * uniformSize;
-    } // for
-
-    // Allocate the UBO
-    MOJOSHADER_mtlUniformBuffer *retval;
-    retval = (MOJOSHADER_mtlUniformBuffer *) m(sizeof(MOJOSHADER_mtlUniformBuffer), d);
-    retval->bufferSize = next_highest_alignment(buflen);
-    retval->internalBufferSize = retval->bufferSize * 16; // pre-allocate some extra room!
-    retval->internalBuffers = m(ctx->framesInFlight * sizeof(void*), d);
-    retval->internalOffset = 0;
-    retval->inUse = 0;
-    retval->currentFrame = 0;
-
-    // Create the backing buffers
-    for (int i = 0; i < ctx->framesInFlight; i++)
-    {
-        retval->internalBuffers[i] = NULL; // basically a memset('\0')
-        retval->internalBuffers[i] = create_ubo_backing_buffer(retval, i);
-    } // for
-
-    return retval;
-} // create_ubo
-
-static void dealloc_ubo(MOJOSHADER_mtlShader *shader,
-                        MOJOSHADER_free f,
-                        void* d)
-{
-    if (shader->ubo == NULL)
-        return;
-
-    for (int i = 0; i < ctx->framesInFlight; i++)
-    {
-        msg(shader->ubo->internalBuffers[i], ctx->selRelease);
-        shader->ubo->internalBuffers[i] = NULL;
-    } // for
-
-    f(shader->ubo->internalBuffers, d);
-    f(shader->ubo, d);
-} // dealloc_ubo
-
-static void *get_uniform_buffer(MOJOSHADER_mtlShader *shader)
-{
-    if (shader == NULL || shader->ubo == NULL)
-        return NULL;
-
-    return shader->ubo->internalBuffers[shader->ubo->currentFrame];
-} // get_uniform_buffer
-
-static int get_uniform_offset(MOJOSHADER_mtlShader *shader)
-{
-    if (shader == NULL || shader->ubo == NULL)
-        return 0;
-
-    return shader->ubo->internalOffset;
-} // get_uniform_offset
-
 static void update_uniform_buffer(MOJOSHADER_mtlShader *shader)
 {
-    if (shader == NULL || shader->ubo == NULL)
+    if (shader == NULL || shader->parseData->uniform_count == 0)
         return;
 
     float *regF; int *regI; uint8 *regB;
     if (shader->parseData->shader_type == MOJOSHADER_TYPE_VERTEX)
     {
+        ctx->vertexUniformOffset = ctx->totalUniformOffset;
         regF = ctx->vs_reg_file_f;
         regI = ctx->vs_reg_file_i;
         regB = ctx->vs_reg_file_b;
     } // if
     else
     {
+        ctx->pixelUniformOffset = ctx->totalUniformOffset;
         regF = ctx->ps_reg_file_f;
         regI = ctx->ps_reg_file_i;
         regB = ctx->ps_reg_file_b;
     } // else
 
-    predraw_ubo(shader->ubo);
-    void *buf = shader->ubo->internalBuffers[shader->ubo->currentFrame];
-    void *contents = msg(buf, ctx->selContents) + shader->ubo->internalOffset;
-
+    void *contents = msg(ctx->ubo, ctx->selContents) + ctx->totalUniformOffset;
     int offset = 0;
     for (int i = 0; i < shader->parseData->uniform_count; i++)
     {
+        if (shader->parseData->uniforms[i].constant)
+            continue;
+
         int idx = shader->parseData->uniforms[i].index;
         int arrayCount = shader->parseData->uniforms[i].array_count;
+
+        void *src = NULL;
         int size = arrayCount ? arrayCount : 1;
 
         switch (shader->parseData->uniforms[i].type)
         {
             case MOJOSHADER_UNIFORM_FLOAT:
-                memcpy(
-                    contents + (offset * 16),
-                    &regF[4 * idx],
-                    size * 16
-                );
+                src = &regF[4 * idx];
+                size *= 16;
                 break;
 
             case MOJOSHADER_UNIFORM_INT:
-                // !!! FIXME: Need a test case
-                memcpy(
-                    contents + (offset * 16),
-                    &regI[4 * idx],
-                    size * 16
-                );
+                src = &regI[4 * idx];
+                size *= 16;
                 break;
 
             case MOJOSHADER_UNIFORM_BOOL:
-                // !!! FIXME: Need a test case
-                memcpy(
-                    contents + offset,
-                    &regB[idx],
-                    size
-                );
+                src = &regB[idx];
                 break;
 
             default:
@@ -332,22 +169,34 @@
                 break;
         } // switch
 
+        memcpy(contents + offset, src, size);
         offset += size;
     } // for
+
+    ctx->totalUniformOffset = next_highest_alignment(ctx->totalUniformOffset + offset);
+    if (ctx->totalUniformOffset >= (int) msg(ctx->ubo, ctx->selLength))
+    {
+        // !!! FIXME: Is there a better way to handle this?
+        assert(0 && "Uniform data exceeded the size of the buffer!");
+    } // if
 } // update_uniform_buffer
 
 /* Public API */
 
-int MOJOSHADER_mtlCreateContext(void* mtlDevice, int framesInFlight,
-                                MOJOSHADER_malloc m, MOJOSHADER_free f,
-                                void *malloc_d)
+MOJOSHADER_mtlContext *MOJOSHADER_mtlCreateContext(void* mtlDevice,
+                                    MOJOSHADER_malloc m, MOJOSHADER_free f,
+                                    void *malloc_d)
 {
-    assert(ctx == NULL);
+    MOJOSHADER_mtlContext *retval = NULL;
+    MOJOSHADER_mtlContext *current_ctx = ctx;
+    int i;
+
+    ctx = NULL;
 
     if (m == NULL) m = MOJOSHADER_internal_malloc;
     if (f == NULL) f = MOJOSHADER_internal_free;
 
-    ctx = (MOJOSHADER_mtlContext *) m(sizeof(MOJOSHADER_mtlContext), malloc_d);
+    ctx = (MOJOSHADER_mtlContext *) m(sizeof (MOJOSHADER_mtlContext), malloc_d);
     if (ctx == NULL)
     {
         out_of_memory();
@@ -361,43 +210,41 @@
 
     // Initialize the Metal state
     ctx->device = mtlDevice;
-    ctx->framesInFlight = framesInFlight;
-
-    // Allocate the uniform buffer object array
-    ctx->bufferArrayCapacity = 32; // arbitrary!
-    ctx->buffersInUse = ctx->malloc_fn(
-        ctx->bufferArrayCapacity * sizeof(MOJOSHADER_mtlUniformBuffer *),
-        ctx->malloc_data
-    );
 
     // Grab references to Objective-C selectors
     ctx->classNSString = objc_getClass("NSString");
     ctx->selAlloc = sel_registerName("alloc");
+    ctx->selContents = sel_registerName("contents");
     ctx->selInitWithUTF8String = sel_registerName("initWithUTF8String:");
-    ctx->selUTF8String = sel_registerName("UTF8String");
     ctx->selLength = sel_registerName("length");
-    ctx->selContents = sel_registerName("contents");
+    ctx->selLocalizedDescription = sel_registerName("localizedDescription");
     ctx->selNewBufferWithLength = sel_registerName("newBufferWithLength:options:");
+    ctx->selNewFunctionWithName = sel_registerName("newFunctionWithName:");
+    ctx->selNewLibraryWithSource = sel_registerName("newLibraryWithSource:options:error:");
     ctx->selRelease = sel_registerName("release");
-    ctx->selNewLibraryWithSource = sel_registerName("newLibraryWithSource:options:error:");
-    ctx->selLocalizedDescription = sel_registerName("localizedDescription");
-    ctx->selNewFunctionWithName = sel_registerName("newFunctionWithName:");
-    ctx->selRetain = sel_registerName("retain");
+    ctx->selUTF8String = sel_registerName("UTF8String");
 
-    return 0;
+    // Create the uniform buffer
+    ctx->ubo = msg_uu(mtlDevice, ctx->selNewBufferWithLength,
+                      next_highest_alignment(1000000), 0);
+
+    retval = ctx;
+    ctx = current_ctx;
+    return retval;
 
 init_fail:
     if (ctx != NULL)
         f(ctx, malloc_d);
-    return -1;
+    ctx = current_ctx;
+    return NULL;
 } // MOJOSHADER_mtlCreateContext
 
-void MOJOSHADER_mtlDestroyContext(void)
+
+void MOJOSHADER_mtlMakeContextCurrent(MOJOSHADER_mtlContext *_ctx)
 {
-    ctx->free_fn(ctx->buffersInUse, ctx->malloc_data);
-    ctx->free_fn(ctx, ctx->malloc_data);
-    ctx = NULL;
-} // MOJOSHADER_mtlDestroyContext
+    ctx = _ctx;
+} // MOJOSHADER_mtlMakeContextCurrent
+
 
 void *MOJOSHADER_mtlCompileLibrary(MOJOSHADER_effect *effect)
 {
@@ -410,7 +257,7 @@
     const char *repl;
     MOJOSHADER_effectObject *object;
     MOJOSHADER_mtlShader *shader;
-    void *retval, *compileError, *shader_source_ns;
+    void *retval, *compileError, *shader_source_ns, *fnname;
 
     // Count the number of shaders before allocating
     src_len = 0;
@@ -488,7 +335,7 @@
         return NULL;
     } // if
 
-    // Run through the shaders again, setting the library reference
+    // Run through the shaders again, getting the function handles
     for (i = 0; i < effect->object_count; i++)
     {
         object = &effect->objects[i];
@@ -498,25 +345,32 @@
             if (object->shader.is_preshader)
                 continue;
 
-            ((MOJOSHADER_mtlShader*) object->shader.shader)->library = retval;
+            shader = (MOJOSHADER_mtlShader*) object->shader.shader;
+            fnname = msg_s(
+                msg(ctx->classNSString, ctx->selAlloc),
+                ctx->selInitWithUTF8String,
+                shader->parseData->mainfn
+            );
+            shader->handle = msg_p(
+                retval,
+                ctx->selNewFunctionWithName,
+                fnname
+            );
+            msg(fnname, ctx->selRelease);
         } // if
     } // for
 
     return retval;
 } // MOJOSHADER_mtlCompileLibrary
 
-void MOJOSHADER_mtlDeleteLibrary(void *library)
-{
-    msg(library, ctx->selRelease);
-} // MOJOSHADER_mtlDeleteLibrary
 
 MOJOSHADER_mtlShader *MOJOSHADER_mtlCompileShader(const char *mainfn,
-                                                  const unsigned char *tokenbuf,
-                                                  const unsigned int bufsize,
-                                                  const MOJOSHADER_swizzle *swiz,
-                                                  const unsigned int swizcount,
-                                                  const MOJOSHADER_samplerMap *smap,
-                                                  const unsigned int smapcount)
+                                            const unsigned char *tokenbuf,
+                                            const unsigned int bufsize,
+                                            const MOJOSHADER_swizzle *swiz,
+                                            const unsigned int swizcount,
+                                            const MOJOSHADER_samplerMap *smap,
+                                            const unsigned int smapcount)
 {
     MOJOSHADER_malloc m = ctx->malloc_fn;
     MOJOSHADER_free f = ctx->free_fn;
@@ -539,8 +393,7 @@
 
     retval->parseData = pd;
     retval->refcount = 1;
-    retval->ubo = create_ubo(retval, m, d);
-    retval->library = NULL; // populated by MOJOSHADER_mtlCompileLibrary
+    retval->handle = NULL; // populated by MOJOSHADER_mtlCompileLibrary
 
     return retval;
 
@@ -550,26 +403,13 @@
     return NULL;
 } // MOJOSHADER_mtlCompileShader
 
+
 void MOJOSHADER_mtlShaderAddRef(MOJOSHADER_mtlShader *shader)
 {
     if (shader != NULL)
         shader->refcount++;
 } // MOJOSHADER_mtlShaderAddRef
 
-void MOJOSHADER_mtlDeleteShader(MOJOSHADER_mtlShader *shader)
-{
-    if (shader != NULL)
-    {
-        if (shader->refcount > 1)
-            shader->refcount--;
-        else
-        {
-            dealloc_ubo(shader, ctx->free_fn, ctx->malloc_data);
-            MOJOSHADER_freeParseData(shader->parseData);
-            ctx->free_fn(shader, ctx->malloc_data);
-        } // else
-    } // if
-} // MOJOSHADER_mtlDeleteShader
 
 const MOJOSHADER_parseData *MOJOSHADER_mtlGetShaderParseData(
                                                 MOJOSHADER_mtlShader *shader)
@@ -577,6 +417,7 @@
     return (shader != NULL) ? shader->parseData : NULL;
 } // MOJOSHADER_mtlGetParseData
 
+
 void MOJOSHADER_mtlBindShaders(MOJOSHADER_mtlShader *vshader,
                                MOJOSHADER_mtlShader *pshader)
 {
@@ -588,6 +429,7 @@
         ctx->pixelShader = pshader;
 } // MOJOSHADER_mtlBindShaders
 
+
 void MOJOSHADER_mtlGetBoundShaders(MOJOSHADER_mtlShader **vshader,
                                    MOJOSHADER_mtlShader **pshader)
 {
@@ -595,6 +437,7 @@
     *pshader = ctx->pixelShader;
 } // MOJOSHADER_mtlGetBoundShaders
 
+
 void MOJOSHADER_mtlMapUniformBufferMemory(float **vsf, int **vsi, unsigned char **vsb,
                                           float **psf, int **psi, unsigned char **psb)
 {
@@ -606,6 +449,7 @@
     *psb = ctx->ps_reg_file_b;
 } // MOJOSHADER_mtlMapUniformBufferMemory
 
+
 void MOJOSHADER_mtlUnmapUniformBufferMemory()
 {
     /* This has nothing to do with unmapping memory
@@ -616,48 +460,32 @@
     update_uniform_buffer(ctx->pixelShader);
 } // MOJOSHADER_mtlUnmapUniformBufferMemory
 
-void MOJOSHADER_mtlGetUniformBuffers(void **vbuf, int *voff,
-                                     void **pbuf, int *poff)
+
+void MOJOSHADER_mtlGetUniformData(void **buf, int *voff, int *poff)
 {
-    *vbuf = get_uniform_buffer(ctx->vertexShader);
-    *voff = get_uniform_offset(ctx->vertexShader);
-    *pbuf = get_uniform_buffer(ctx->pixelShader);
-    *poff = get_uniform_offset(ctx->pixelShader);
+    *buf = ctx->ubo;
+    *voff = ctx->vertexUniformOffset;
+    *poff = ctx->pixelUniformOffset;
 } // MOJOSHADER_mtlGetUniformBuffers
 
+
 void *MOJOSHADER_mtlGetFunctionHandle(MOJOSHADER_mtlShader *shader)
 {
     if (shader == NULL)
         return NULL;
 
-    void *fnname = msg_s(
-        msg(ctx->classNSString, ctx->selAlloc),
-        ctx->selInitWithUTF8String,
-        shader->parseData->mainfn
-    );
-    void *ret = msg_p(
-        shader->library,
-        ctx->selNewFunctionWithName,
-        fnname
-    );
-    msg(fnname, ctx->selRelease);
-    msg(ret, ctx->selRetain);
+    return shader->handle;
+} // MOJOSHADER_mtlGetFunctionHandle
 
-    return ret;
-} // MOJOSHADER_mtlGetFunctionHandle
 
 void MOJOSHADER_mtlEndFrame()
 {
-    for (int i = 0; i < ctx->numBuffersInUse; i += 1)
-    {
-        MOJOSHADER_mtlUniformBuffer *buf = ctx->buffersInUse[i];
-        buf->internalOffset = 0;
-        buf->currentFrame = (buf->currentFrame + 1) % ctx->framesInFlight;
-        buf->inUse = 0;
-    } // for
-    ctx->numBuffersInUse = 0;
+    ctx->totalUniformOffset = 0;
+    ctx->vertexUniformOffset = 0;
+    ctx->pixelUniformOffset = 0;
 } // MOJOSHADER_mtlEndFrame
 
+
 int MOJOSHADER_mtlGetVertexAttribLocation(MOJOSHADER_mtlShader *vert,
                                           MOJOSHADER_usage usage, int index)
 {
@@ -677,11 +505,48 @@
     return -1;
 } // MOJOSHADER_mtlGetVertexAttribLocation
 
+
 const char *MOJOSHADER_mtlGetError(void)
 {
     return error_buffer;
 } // MOJOSHADER_mtlGetError
 
+
+void MOJOSHADER_mtlDeleteLibrary(void *library)
+{
+    msg(library, ctx->selRelease);
+} // MOJOSHADER_mtlDeleteLibrary
+
+
+void MOJOSHADER_mtlDeleteShader(MOJOSHADER_mtlShader *shader)
+{
+    if (shader != NULL)
+    {
+        if (shader->refcount > 1)
+            shader->refcount--;
+        else
+        {
+            msg(shader->handle, ctx->selRelease);
+            MOJOSHADER_freeParseData(shader->parseData);
+            ctx->free_fn(shader, ctx->malloc_data);
+        } // else
+    } // if
+} // MOJOSHADER_mtlDeleteShader
+
+
+void MOJOSHADER_mtlDestroyContext(MOJOSHADER_mtlContext *_ctx)
+{
+    MOJOSHADER_mtlContext *current_ctx = ctx;
+    ctx = _ctx;
+
+    if (ctx->ubo != NULL)
+        msg(ctx->ubo, ctx->selRelease);
+
+    if (ctx != NULL)
+        ctx->free_fn(ctx, ctx->malloc_data);
+    ctx = ((current_ctx == _ctx) ? NULL : current_ctx);
+} // MOJOSHADER_mtlDestroyContext
+
 #endif /* MOJOSHADER_EFFECT_SUPPORT */
 #endif /* SUPPORT_PROFILE_METAL && PLATFORM_APPLE */