Updated blend semantics so blending uses the following formula:
authorSam Lantinga <slouken@libsdl.org>
Tue, 23 Jul 2013 08:06:49 -0700
changeset 7502 6ff02ff3cf06
parent 7501 b27825bb5879
child 7503 701f4a25df89
Updated blend semantics so blending uses the following formula: dstRGB = (srcRGB * srcA) + (dstRGB * (1-srcA)) dstA = srcA + (dstA * (1-srcA)) This allows proper compositing semantics without requiring premultiplied alpha. Needs full unit test coverage and bug fixes!
include/SDL_blendmode.h
include/SDL_surface.h
src/render/direct3d/SDL_render_d3d.c
src/render/opengl/SDL_glfuncs.h
src/render/opengl/SDL_render_gl.c
src/render/opengles/SDL_glesfuncs.h
src/render/opengles/SDL_render_gles.c
src/render/opengles2/SDL_gles2funcs.h
src/render/opengles2/SDL_render_gles2.c
src/video/SDL_blit.h
src/video/SDL_blit_0.c
src/video/SDL_blit_1.c
src/video/SDL_blit_A.c
src/video/SDL_blit_auto.c
src/video/sdlgenblit.pl
test/testrendertarget.c
test/testsprite2.c
--- a/include/SDL_blendmode.h	Mon Jul 22 02:51:45 2013 -0700
+++ b/include/SDL_blendmode.h	Tue Jul 23 08:06:49 2013 -0700
@@ -39,10 +39,17 @@
  */
 typedef enum
 {
-    SDL_BLENDMODE_NONE = 0x00000000,     /**< No blending */
-    SDL_BLENDMODE_BLEND = 0x00000001,    /**< dst = (src * A) + (dst * (1-A)) */
-    SDL_BLENDMODE_ADD = 0x00000002,      /**< dst = (src * A) + dst */
-    SDL_BLENDMODE_MOD = 0x00000004       /**< dst = src * dst */
+    SDL_BLENDMODE_NONE = 0x00000000,     /**< no blending
+                                              dstRGBA = srcRGBA */
+    SDL_BLENDMODE_BLEND = 0x00000001,    /**< alpha blending
+                                              dstRGB = (srcRGB * srcA) + (dstRGB * (1-srcA))
+                                              dstA = srcA + (dstA * (1-srcA)) */
+    SDL_BLENDMODE_ADD = 0x00000002,      /**< additive blending
+                                              dstRGB = (srcRGB * srcA) + dstRGB
+                                              dstA = dstA */
+    SDL_BLENDMODE_MOD = 0x00000004       /**< color modulate
+                                              dstRGB = srcRGB * dstRGB
+                                              dstA = dstA */
 } SDL_BlendMode;
 
 /* Ends C function definitions when using C++ */
--- a/include/SDL_surface.h	Mon Jul 22 02:51:45 2013 -0700
+++ b/include/SDL_surface.h	Tue Jul 23 08:06:49 2013 -0700
@@ -399,44 +399,42 @@
  *
  *  The blit function should not be called on a locked surface.
  *
- *  The blit semantics for surfaces with and without alpha and colorkey
+ *  The blit semantics for surfaces with and without blending and colorkey
  *  are defined as follows:
  *  \verbatim
     RGBA->RGB:
-      SDL_SRCALPHA set:
-        alpha-blend (using alpha-channel).
+      Source surface blend mode set to SDL_BLENDMODE_BLEND:
+        alpha-blend (using the source alpha-channel and per-surface alpha)
         SDL_SRCCOLORKEY ignored.
-      SDL_SRCALPHA not set:
+      Source surface blend mode set to SDL_BLENDMODE_NONE:
         copy RGB.
         if SDL_SRCCOLORKEY set, only copy the pixels matching the
         RGB values of the source color key, ignoring alpha in the
         comparison.
 
     RGB->RGBA:
-      SDL_SRCALPHA set:
-        alpha-blend (using the source per-surface alpha value);
-        set destination alpha to opaque.
-      SDL_SRCALPHA not set:
+      Source surface blend mode set to SDL_BLENDMODE_BLEND:
+        alpha-blend (using the source per-surface alpha)
+      Source surface blend mode set to SDL_BLENDMODE_NONE:
         copy RGB, set destination alpha to source per-surface alpha value.
       both:
         if SDL_SRCCOLORKEY set, only copy the pixels matching the
         source color key.
 
     RGBA->RGBA:
-      SDL_SRCALPHA set:
-        alpha-blend (using the source alpha channel) the RGB values;
-        leave destination alpha untouched. [Note: is this correct?]
+      Source surface blend mode set to SDL_BLENDMODE_BLEND:
+        alpha-blend (using the source alpha-channel and per-surface alpha)
         SDL_SRCCOLORKEY ignored.
-      SDL_SRCALPHA not set:
+      Source surface blend mode set to SDL_BLENDMODE_NONE:
         copy all of RGBA to the destination.
         if SDL_SRCCOLORKEY set, only copy the pixels matching the
         RGB values of the source color key, ignoring alpha in the
-       comparison.
+        comparison.
 
     RGB->RGB:
-      SDL_SRCALPHA set:
-        alpha-blend (using the source per-surface alpha value).
-      SDL_SRCALPHA not set:
+      Source surface blend mode set to SDL_BLENDMODE_BLEND:
+        alpha-blend (using the source per-surface alpha)
+      Source surface blend mode set to SDL_BLENDMODE_NONE:
         copy RGB.
       both:
         if SDL_SRCCOLORKEY set, only copy the pixels matching the
--- a/src/render/direct3d/SDL_render_d3d.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/render/direct3d/SDL_render_d3d.c	Tue Jul 23 08:06:49 2013 -0700
@@ -227,6 +227,7 @@
     D3DPRESENT_PARAMETERS pparams;
     SDL_bool updateSize;
     SDL_bool beginScene;
+    SDL_bool enableSeparateAlphaBlend;
     D3DTEXTUREFILTERTYPE scaleMode;
     IDirect3DSurface9 *defaultRenderTarget;
     IDirect3DSurface9 *currentRenderTarget;
@@ -615,6 +616,10 @@
         renderer->info.flags |= SDL_RENDERER_TARGETTEXTURE;
     }
 
+    if (caps.PrimitiveMiscCaps & D3DPMISCCAPS_SEPARATEALPHABLEND) {
+        data->enableSeparateAlphaBlend = SDL_TRUE;
+    }
+
     /* Set up parameters for rendering */
     IDirect3DDevice9_SetVertexShader(data->device, NULL);
     IDirect3DDevice9_SetFVF(data->device,
@@ -637,6 +642,10 @@
                                           D3DTA_TEXTURE);
     IDirect3DDevice9_SetTextureStageState(data->device, 0, D3DTSS_ALPHAARG2,
                                           D3DTA_DIFFUSE);
+    /* Enable separate alpha blend function, if possible */
+    if (data->enableSeparateAlphaBlend) {
+        IDirect3DDevice9_SetRenderState(data->device, D3DRS_SEPARATEALPHABLENDENABLE, TRUE);
+    }
     /* Disable second texture stage, since we're done */
     IDirect3DDevice9_SetTextureStageState(data->device, 1, D3DTSS_COLOROP,
                                           D3DTOP_DISABLE);
@@ -979,6 +988,12 @@
                                         D3DBLEND_SRCALPHA);
         IDirect3DDevice9_SetRenderState(data->device, D3DRS_DESTBLEND,
                                         D3DBLEND_INVSRCALPHA);
+        if (data->enableSeparateAlphaBlend) {
+            IDirect3DDevice9_SetRenderState(data->device, D3DRS_SRCBLENDALPHA,
+                                            D3DBLEND_ONE);
+            IDirect3DDevice9_SetRenderState(data->device, D3DRS_DESTBLENDALPHA,
+                                            D3DBLEND_INVSRCALPHA);
+        }
         break;
     case SDL_BLENDMODE_ADD:
         IDirect3DDevice9_SetRenderState(data->device, D3DRS_ALPHABLENDENABLE,
@@ -987,6 +1002,12 @@
                                         D3DBLEND_SRCALPHA);
         IDirect3DDevice9_SetRenderState(data->device, D3DRS_DESTBLEND,
                                         D3DBLEND_ONE);
+        if (data->enableSeparateAlphaBlend) {
+            IDirect3DDevice9_SetRenderState(data->device, D3DRS_SRCBLENDALPHA,
+                                            D3DBLEND_ZERO);
+            IDirect3DDevice9_SetRenderState(data->device, D3DRS_DESTBLENDALPHA,
+                                            D3DBLEND_ONE);
+        }
         break;
     case SDL_BLENDMODE_MOD:
         IDirect3DDevice9_SetRenderState(data->device, D3DRS_ALPHABLENDENABLE,
@@ -995,6 +1016,12 @@
                                         D3DBLEND_ZERO);
         IDirect3DDevice9_SetRenderState(data->device, D3DRS_DESTBLEND,
                                         D3DBLEND_SRCCOLOR);
+        if (data->enableSeparateAlphaBlend) {
+            IDirect3DDevice9_SetRenderState(data->device, D3DRS_SRCBLENDALPHA,
+                                            D3DBLEND_ZERO);
+            IDirect3DDevice9_SetRenderState(data->device, D3DRS_DESTBLENDALPHA,
+                                            D3DBLEND_ONE);
+        }
         break;
     }
 }
--- a/src/render/opengl/SDL_glfuncs.h	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/render/opengl/SDL_glfuncs.h	Tue Jul 23 08:06:49 2013 -0700
@@ -15,6 +15,7 @@
                 (GLsizei, GLsizei, GLfloat, GLfloat, GLfloat, GLfloat,
                  const GLubyte *))
 SDL_PROC(void, glBlendFunc, (GLenum, GLenum))
+SDL_PROC(void, glBlendFuncSeparate, (GLenum, GLenum, GLenum, GLenum))
 SDL_PROC_UNUSED(void, glCallList, (GLuint))
 SDL_PROC_UNUSED(void, glCallLists, (GLsizei, GLenum, const GLvoid *))
 SDL_PROC(void, glClear, (GLbitfield))
--- a/src/render/opengl/SDL_render_gl.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/render/opengl/SDL_render_gl.c	Tue Jul 23 08:06:49 2013 -0700
@@ -924,17 +924,17 @@
         case SDL_BLENDMODE_BLEND:
             data->glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
             data->glEnable(GL_BLEND);
-            data->glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+            data->glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
             break;
         case SDL_BLENDMODE_ADD:
             data->glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
             data->glEnable(GL_BLEND);
-            data->glBlendFunc(GL_SRC_ALPHA, GL_ONE);
+            data->glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE);
             break;
         case SDL_BLENDMODE_MOD:
             data->glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
             data->glEnable(GL_BLEND);
-            data->glBlendFunc(GL_ZERO, GL_SRC_COLOR);
+            data->glBlendFuncSeparate(GL_ZERO, GL_SRC_COLOR, GL_ZERO, GL_ONE);
             break;
         }
         data->current.blendMode = blendMode;
--- a/src/render/opengles/SDL_glesfuncs.h	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/render/opengles/SDL_glesfuncs.h	Tue Jul 23 08:06:49 2013 -0700
@@ -1,5 +1,6 @@
 SDL_PROC(void, glBindTexture, (GLenum, GLuint))
 SDL_PROC(void, glBlendFunc, (GLenum, GLenum))
+SDL_PROC(void, glBlendFuncSeparateOES, (GLenum, GLenum, GLenum, GLenum))
 SDL_PROC(void, glClear, (GLbitfield))
 SDL_PROC(void, glClearColor, (GLclampf, GLclampf, GLclampf, GLclampf))
 SDL_PROC(void, glColor4f, (GLfloat, GLfloat, GLfloat, GLfloat))
--- a/src/render/opengles/SDL_render_gles.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/render/opengles/SDL_render_gles.c	Tue Jul 23 08:06:49 2013 -0700
@@ -121,6 +121,7 @@
 
     SDL_bool useDrawTexture;
     SDL_bool GL_OES_draw_texture_supported;
+    SDL_bool GL_OES_blend_func_separate_supported;
 } GLES_RenderData;
 
 typedef struct
@@ -376,6 +377,10 @@
     }
     data->framebuffers = NULL;
 
+    if (SDL_GL_ExtensionSupported("GL_OES_blend_func_separate")) {
+        data->GL_OES_blend_func_separate_supported = SDL_TRUE;
+    }
+
     /* Set up parameters for rendering */
     GLES_ResetState(renderer);
 
@@ -680,17 +685,29 @@
         case SDL_BLENDMODE_BLEND:
             data->glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
             data->glEnable(GL_BLEND);
-            data->glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+            if (data->GL_OES_blend_func_separate_supported) {
+                data->glBlendFuncSeparateOES(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
+            } else {
+                data->glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+            }
             break;
         case SDL_BLENDMODE_ADD:
             data->glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
             data->glEnable(GL_BLEND);
-            data->glBlendFunc(GL_SRC_ALPHA, GL_ONE);
+            if (data->GL_OES_blend_func_separate_supported) {
+                data->glBlendFuncSeparateOES(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE);
+            } else {
+                data->glBlendFunc(GL_SRC_ALPHA, GL_ONE);
+            }
             break;
         case SDL_BLENDMODE_MOD:
             data->glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
             data->glEnable(GL_BLEND);
-            data->glBlendFunc(GL_ZERO, GL_SRC_COLOR);
+            if (data->GL_OES_blend_func_separate_supported) {
+                data->glBlendFuncSeparateOES(GL_ZERO, GL_SRC_COLOR, GL_ZERO, GL_ONE);
+            } else {
+                data->glBlendFunc(GL_ZERO, GL_SRC_COLOR);
+            }
             break;
         }
         data->current.blendMode = blendMode;
--- a/src/render/opengles2/SDL_gles2funcs.h	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/render/opengles2/SDL_gles2funcs.h	Tue Jul 23 08:06:49 2013 -0700
@@ -2,7 +2,7 @@
 SDL_PROC(void, glAttachShader, (GLuint, GLuint))
 SDL_PROC(void, glBindAttribLocation, (GLuint, GLuint, const char *))
 SDL_PROC(void, glBindTexture, (GLenum, GLuint))
-SDL_PROC(void, glBlendFunc, (GLenum, GLenum))
+SDL_PROC(void, glBlendFuncSeparate, (GLenum, GLenum, GLenum, GLenum))
 SDL_PROC(void, glClear, (GLbitfield))
 SDL_PROC(void, glClearColor, (GLclampf, GLclampf, GLclampf, GLclampf))
 SDL_PROC(void, glCompileShader, (GLuint))
--- a/src/render/opengles2/SDL_render_gles2.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/render/opengles2/SDL_render_gles2.c	Tue Jul 23 08:06:49 2013 -0700
@@ -994,15 +994,15 @@
             break;
         case SDL_BLENDMODE_BLEND:
             rdata->glEnable(GL_BLEND);
-            rdata->glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+            rdata->glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
             break;
         case SDL_BLENDMODE_ADD:
             rdata->glEnable(GL_BLEND);
-            rdata->glBlendFunc(GL_SRC_ALPHA, GL_ONE);
+            rdata->glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE);
             break;
         case SDL_BLENDMODE_MOD:
             rdata->glEnable(GL_BLEND);
-            rdata->glBlendFunc(GL_ZERO, GL_SRC_COLOR);
+            rdata->glBlendFuncSeparate(GL_ZERO, GL_SRC_COLOR, GL_ZERO, GL_ONE);
             break;
         }
         rdata->current.blendMode = blendMode;
--- a/src/video/SDL_blit.h	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/video/SDL_blit.h	Tue Jul 23 08:06:49 2013 -0700
@@ -440,12 +440,22 @@
     }                                                                   \
 }
 
-/* Blend the RGB values of two Pixels based on a source alpha value */
-#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)                          \
+/* Blend the RGB values of two pixels with an alpha value */
+#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB)                      \
 do {                                                                    \
-    dR = ((((int)(sR-dR)*(int)A)/255)+dR);                              \
-    dG = ((((int)(sG-dG)*(int)A)/255)+dG);                              \
-    dB = ((((int)(sB-dB)*(int)A)/255)+dB);                              \
+    dR = ((((unsigned)(sR-dR)*(unsigned)A)/255)+dR);                    \
+    dG = ((((unsigned)(sG-dG)*(unsigned)A)/255)+dG);                    \
+    dB = ((((unsigned)(sB-dB)*(unsigned)A)/255)+dB);                    \
+} while(0)
+
+
+/* Blend the RGBA values of two pixels */
+#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA)                \
+do {                                                                    \
+    dR = ((((unsigned)(sR-dR)*(unsigned)sA)/255)+dR);                   \
+    dG = ((((unsigned)(sG-dG)*(unsigned)sA)/255)+dG);                   \
+    dB = ((((unsigned)(sB-dB)*(unsigned)sA)/255)+dB);                   \
+    dA = ((unsigned)sA+(unsigned)dA-((unsigned)sA*dA)/255);             \
 } while(0)
 
 
--- a/src/video/SDL_blit_0.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/video/SDL_blit_0.c	Tue Jul 23 08:06:49 2013 -0700
@@ -363,7 +363,10 @@
     SDL_PixelFormat *dstfmt = info->dst_fmt;
     int dstbpp;
     int c;
-    const int A = info->a;
+    Uint32 pixel;
+    unsigned sR, sG, sB;
+    unsigned dR, dG, dB, dA;
+    const unsigned A = info->a;
 
     /* Set up some basic variables */
     dstbpp = dstfmt->BytesPerPixel;
@@ -377,15 +380,12 @@
             }
             bit = (byte & 0x80) >> 7;
             if (1) {
-                Uint32 pixel;
-                unsigned sR, sG, sB;
-                unsigned dR, dG, dB;
                 sR = srcpal[bit].r;
                 sG = srcpal[bit].g;
                 sB = srcpal[bit].b;
-                DISEMBLE_RGB(dst, dstbpp, dstfmt, pixel, dR, dG, dB);
-                ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB);
-                ASSEMBLE_RGB(dst, dstbpp, dstfmt, dR, dG, dB);
+                DISEMBLE_RGBA(dst, dstbpp, dstfmt, pixel, dR, dG, dB, dA);
+                ALPHA_BLEND_RGBA(sR, sG, sB, A, dR, dG, dB, dA);
+                ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
             }
             byte <<= 1;
             dst += dstbpp;
@@ -409,7 +409,10 @@
     const SDL_Color *srcpal = srcfmt->palette->colors;
     int dstbpp;
     int c;
-    const int A = info->a;
+    Uint32 pixel;
+    unsigned sR, sG, sB;
+    unsigned dR, dG, dB, dA;
+    const unsigned A = info->a;
     Uint32 ckey = info->colorkey;
 
     /* Set up some basic variables */
@@ -424,15 +427,12 @@
             }
             bit = (byte & 0x80) >> 7;
             if (bit != ckey) {
-                int sR, sG, sB;
-                int dR, dG, dB;
-                Uint32 pixel;
                 sR = srcpal[bit].r;
                 sG = srcpal[bit].g;
                 sB = srcpal[bit].b;
-                DISEMBLE_RGB(dst, dstbpp, dstfmt, pixel, dR, dG, dB);
-                ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB);
-                ASSEMBLE_RGB(dst, dstbpp, dstfmt, dR, dG, dB);
+                DISEMBLE_RGBA(dst, dstbpp, dstfmt, pixel, dR, dG, dB, dA);
+                ALPHA_BLEND_RGBA(sR, sG, sB, A, dR, dG, dB, dA);
+                ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
             }
             byte <<= 1;
             dst += dstbpp;
--- a/src/video/SDL_blit_1.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/video/SDL_blit_1.c	Tue Jul 23 08:06:49 2013 -0700
@@ -437,30 +437,29 @@
     SDL_PixelFormat *dstfmt = info->dst_fmt;
     const SDL_Color *srcpal = info->src_fmt->palette->colors;
     int dstbpp;
-    const int A = info->a;
+    Uint32 pixel;
+    unsigned sR, sG, sB;
+    unsigned dR, dG, dB, dA;
+    const unsigned A = info->a;
 
     /* Set up some basic variables */
     dstbpp = dstfmt->BytesPerPixel;
 
     while (height--) {
-        int sR, sG, sB;
-        int dR, dG, dB;
-	    	/* *INDENT-OFF* */
-	    	DUFFS_LOOP4(
-			{
-			        Uint32 pixel;
-				sR = srcpal[*src].r;
-				sG = srcpal[*src].g;
-				sB = srcpal[*src].b;
-				DISEMBLE_RGB(dst, dstbpp, dstfmt,
-					     pixel, dR, dG, dB);
-				ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB);
-			  	ASSEMBLE_RGB(dst, dstbpp, dstfmt, dR, dG, dB);
-				src++;
-				dst += dstbpp;
-			},
-			width);
-	    	/* *INDENT-ON* */
+        /* *INDENT-OFF* */
+        DUFFS_LOOP4(
+        {
+            sR = srcpal[*src].r;
+            sG = srcpal[*src].g;
+            sB = srcpal[*src].b;
+            DISEMBLE_RGBA(dst, dstbpp, dstfmt, pixel, dR, dG, dB, dA);
+            ALPHA_BLEND_RGBA(sR, sG, sB, A, dR, dG, dB, dA);
+            ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
+            src++;
+            dst += dstbpp;
+        },
+        width);
+        /* *INDENT-ON* */
         src += srcskip;
         dst += dstskip;
     }
@@ -479,26 +478,25 @@
     const SDL_Color *srcpal = info->src_fmt->palette->colors;
     Uint32 ckey = info->colorkey;
     int dstbpp;
-    const int A = info->a;
+    Uint32 pixel;
+    unsigned sR, sG, sB;
+    unsigned dR, dG, dB, dA;
+    const unsigned A = info->a;
 
     /* Set up some basic variables */
     dstbpp = dstfmt->BytesPerPixel;
 
     while (height--) {
-        int sR, sG, sB;
-        int dR, dG, dB;
 		/* *INDENT-OFF* */
 		DUFFS_LOOP(
 		{
 			if ( *src != ckey ) {
-			        Uint32 pixel;
 				sR = srcpal[*src].r;
 				sG = srcpal[*src].g;
 				sB = srcpal[*src].b;
-				DISEMBLE_RGB(dst, dstbpp, dstfmt,
-							pixel, dR, dG, dB);
-				ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB);
-			  	ASSEMBLE_RGB(dst, dstbpp, dstfmt, dR, dG, dB);
+				DISEMBLE_RGBA(dst, dstbpp, dstfmt, pixel, dR, dG, dB, dA);
+				ALPHA_BLEND_RGBA(sR, sG, sB, A, dR, dG, dB, dA);
+			  	ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
 			}
 			src++;
 			dst += dstbpp;
--- a/src/video/SDL_blit_A.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/video/SDL_blit_A.c	Tue Jul 23 08:06:49 2013 -0700
@@ -39,37 +39,28 @@
     SDL_PixelFormat *srcfmt = info->src_fmt;
     SDL_PixelFormat *dstfmt = info->dst_fmt;
     int srcbpp = srcfmt->BytesPerPixel;
-
+    Uint32 Pixel;
+    unsigned sR, sG, sB;
+    unsigned dR, dG, dB;
     const unsigned A = info->a;
 
     while (height--) {
 	    /* *INDENT-OFF* */
 	    DUFFS_LOOP4(
 	    {
-		Uint32 Pixel;
-		unsigned sR;
-		unsigned sG;
-		unsigned sB;
-		unsigned dR;
-		unsigned dG;
-		unsigned dB;
 		DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
 		dR = dstfmt->palette->colors[*dst].r;
 		dG = dstfmt->palette->colors[*dst].g;
 		dB = dstfmt->palette->colors[*dst].b;
-		ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB);
+		ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
 		dR &= 0xff;
 		dG &= 0xff;
 		dB &= 0xff;
 		/* Pack RGB into 8bit pixel */
 		if ( palmap == NULL ) {
-		    *dst =((dR>>5)<<(3+2))|
-			  ((dG>>5)<<(2))|
-			  ((dB>>6)<<(0));
+		    *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
 		} else {
-		    *dst = palmap[((dR>>5)<<(3+2))|
-				  ((dG>>5)<<(2))  |
-				  ((dB>>6)<<(0))];
+		    *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
 		}
 		dst++;
 		src += srcbpp;
@@ -95,36 +86,27 @@
     SDL_PixelFormat *srcfmt = info->src_fmt;
     SDL_PixelFormat *dstfmt = info->dst_fmt;
     int srcbpp = srcfmt->BytesPerPixel;
+    Uint32 Pixel;
+    unsigned sR, sG, sB, sA;
+    unsigned dR, dG, dB;
 
     while (height--) {
 	    /* *INDENT-OFF* */
 	    DUFFS_LOOP4(
 	    {
-		Uint32 Pixel;
-		unsigned sR;
-		unsigned sG;
-		unsigned sB;
-		unsigned sA;
-		unsigned dR;
-		unsigned dG;
-		unsigned dB;
 		DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA);
 		dR = dstfmt->palette->colors[*dst].r;
 		dG = dstfmt->palette->colors[*dst].g;
 		dB = dstfmt->palette->colors[*dst].b;
-		ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB);
+		ALPHA_BLEND_RGB(sR, sG, sB, sA, dR, dG, dB);
 		dR &= 0xff;
 		dG &= 0xff;
 		dB &= 0xff;
 		/* Pack RGB into 8bit pixel */
 		if ( palmap == NULL ) {
-		    *dst =((dR>>5)<<(3+2))|
-			  ((dG>>5)<<(2))|
-			  ((dB>>6)<<(0));
+		    *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
 		} else {
-		    *dst = palmap[((dR>>5)<<(3+2))|
-				  ((dG>>5)<<(2))  |
-				  ((dB>>6)<<(0))  ];
+		    *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
 		}
 		dst++;
 		src += srcbpp;
@@ -151,38 +133,29 @@
     SDL_PixelFormat *dstfmt = info->dst_fmt;
     int srcbpp = srcfmt->BytesPerPixel;
     Uint32 ckey = info->colorkey;
-
-    const int A = info->a;
+    Uint32 Pixel;
+    unsigned sR, sG, sB;
+    unsigned dR, dG, dB;
+    const unsigned A = info->a;
 
     while (height--) {
 	    /* *INDENT-OFF* */
 	    DUFFS_LOOP(
 	    {
-		Uint32 Pixel;
-		unsigned sR;
-		unsigned sG;
-		unsigned sB;
-		unsigned dR;
-		unsigned dG;
-		unsigned dB;
 		DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
 		if ( Pixel != ckey ) {
 		    dR = dstfmt->palette->colors[*dst].r;
 		    dG = dstfmt->palette->colors[*dst].g;
 		    dB = dstfmt->palette->colors[*dst].b;
-		    ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB);
+		    ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
 		    dR &= 0xff;
 		    dG &= 0xff;
 		    dB &= 0xff;
 		    /* Pack RGB into 8bit pixel */
 		    if ( palmap == NULL ) {
-			*dst =((dR>>5)<<(3+2))|
-			      ((dG>>5)<<(2)) |
-			      ((dB>>6)<<(0));
+                *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
 		    } else {
-			*dst = palmap[((dR>>5)<<(3+2))|
-				      ((dG>>5)<<(2))  |
-				      ((dB>>6)<<(0))  ];
+                *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
 		    }
 		}
 		dst++;
@@ -417,807 +390,6 @@
 
 #endif /* __MMX__ */
 
-#if SDL_ALTIVEC_BLITTERS
-#if __MWERKS__
-#pragma altivec_model on
-#endif
-#if HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-
-#if (defined(__MACOSX__) && (__GNUC__ < 4))
-#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
-        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
-#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
-        (vector unsigned short) ( a,b,c,d,e,f,g,h )
-#else
-#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
-        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
-#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
-        (vector unsigned short) { a,b,c,d,e,f,g,h }
-#endif
-
-#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
-#define VECPRINT(msg, v) do { \
-    vector unsigned int tmpvec = (vector unsigned int)(v); \
-    unsigned int *vp = (unsigned int *)&tmpvec; \
-    printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \
-} while (0)
-
-/* the permuation vector that takes the high bytes out of all the appropriate shorts 
-    (vector unsigned char)(
-        0x00, 0x10, 0x02, 0x12,
-        0x04, 0x14, 0x06, 0x16,
-        0x08, 0x18, 0x0A, 0x1A,
-        0x0C, 0x1C, 0x0E, 0x1E );
-*/
-#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F)))
-#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12)))
-#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
-#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
-    ? vec_lvsl(0, src) \
-    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
-
-
-#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
-    /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \
-    vector unsigned short vtemp1 = vec_mule(vs, valpha); \
-    /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \
-    vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
-    /* valpha2 is 255-alpha */ \
-    vector unsigned char valpha2 = vec_nor(valpha, valpha); \
-    /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \
-    vector unsigned short vtemp3 = vec_mule(vd, valpha2); \
-    /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \
-    vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \
-    /* add source and dest */ \
-    vtemp1 = vec_add(vtemp1, vtemp3); \
-    vtemp2 = vec_add(vtemp2, vtemp4); \
-    /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \
-    vtemp1 = vec_add(vtemp1, v1_16); \
-    vtemp3 = vec_sr(vtemp1, v8_16); \
-    vtemp1 = vec_add(vtemp1, vtemp3); \
-    /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \
-    vtemp2 = vec_add(vtemp2, v1_16); \
-    vtemp4 = vec_sr(vtemp2, v8_16); \
-    vtemp2 = vec_add(vtemp2, vtemp4); \
-    /* (>>8) and get ARGBARGBARGBARGB */ \
-    vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
-} while (0)
-
-/* Calculate the permute vector used for 32->32 swizzling */
-static vector unsigned char
-calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
-{
-    /*
-     * We have to assume that the bits that aren't used by other
-     *  colors is alpha, and it's one complete byte, since some formats
-     *  leave alpha with a zero mask, but we should still swizzle the bits.
-     */
-    /* ARGB */
-    const static struct SDL_PixelFormat default_pixel_format = {
-        0, NULL, 0, 0,
-        {0, 0},
-        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
-        0, 0, 0, 0,
-        16, 8, 0, 24,
-        0, NULL
-    };
-    if (!srcfmt) {
-        srcfmt = &default_pixel_format;
-    }
-    if (!dstfmt) {
-        dstfmt = &default_pixel_format;
-    }
-    const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
-                                                       0x04, 0x04, 0x04, 0x04,
-                                                       0x08, 0x08, 0x08, 0x08,
-                                                       0x0C, 0x0C, 0x0C,
-                                                       0x0C);
-    vector unsigned char vswiz;
-    vector unsigned int srcvec;
-#define RESHIFT(X) (3 - ((X) >> 3))
-    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
-    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
-    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
-    Uint32 amask;
-    /* Use zero for alpha if either surface doesn't have alpha */
-    if (dstfmt->Amask) {
-        amask =
-            ((srcfmt->Amask) ? RESHIFT(srcfmt->
-                                       Ashift) : 0x10) << (dstfmt->Ashift);
-    } else {
-        amask =
-            0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
-                          0xFFFFFFFF);
-    }
-#undef RESHIFT
-    ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
-    vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
-    return (vswiz);
-}
-
-static void
-Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint8 *src = (Uint8 *) info->src;
-    int srcskip = info->src_skip;
-    Uint8 *dst = (Uint8 *) info->dst;
-    int dstskip = info->dst_skip;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-
-    vector unsigned char v0 = vec_splat_u8(0);
-    vector unsigned short v8_16 = vec_splat_u16(8);
-    vector unsigned short v1_16 = vec_splat_u16(1);
-    vector unsigned short v2_16 = vec_splat_u16(2);
-    vector unsigned short v3_16 = vec_splat_u16(3);
-    vector unsigned int v8_32 = vec_splat_u32(8);
-    vector unsigned int v16_32 = vec_add(v8_32, v8_32);
-    vector unsigned short v3f =
-        VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
-                          0x003f, 0x003f, 0x003f, 0x003f);
-    vector unsigned short vfc =
-        VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
-                          0x00fc, 0x00fc, 0x00fc, 0x00fc);
-
-    /* 
-       0x10 - 0x1f is the alpha
-       0x00 - 0x0e evens are the red
-       0x01 - 0x0f odds are zero
-     */
-    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
-                                                       0x10, 0x02, 0x01, 0x01,
-                                                       0x10, 0x04, 0x01, 0x01,
-                                                       0x10, 0x06, 0x01,
-                                                       0x01);
-    vector unsigned char vredalpha2 =
-        (vector unsigned char) (vec_add((vector unsigned int) vredalpha1,
-                                        vec_sl(v8_32, v16_32))
-        );
-    /*
-       0x00 - 0x0f is ARxx ARxx ARxx ARxx
-       0x11 - 0x0f odds are blue
-     */
-    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
-                                                   0x04, 0x05, 0x06, 0x13,
-                                                   0x08, 0x09, 0x0a, 0x15,
-                                                   0x0c, 0x0d, 0x0e, 0x17);
-    vector unsigned char vblue2 =
-        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32)
-        );
-    /*
-       0x00 - 0x0f is ARxB ARxB ARxB ARxB
-       0x10 - 0x0e evens are green
-     */
-    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
-                                                    0x04, 0x05, 0x12, 0x07,
-                                                    0x08, 0x09, 0x14, 0x0b,
-                                                    0x0c, 0x0d, 0x16, 0x0f);
-    vector unsigned char vgreen2 =
-        (vector unsigned
-         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32))
-        );
-    vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
-                                                    0x00, 0x0a, 0x00, 0x0e,
-                                                    0x00, 0x12, 0x00, 0x16,
-                                                    0x00, 0x1a, 0x00, 0x1e);
-    vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
-    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
-    vector unsigned char valphaPermute =
-        vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
-
-    vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
-    vf800 = vec_sl(vf800, vec_splat_u16(8));
-
-    while (height--) {
-        int extrawidth;
-        vector unsigned char valigner;
-        vector unsigned char vsrc;
-        vector unsigned char voverflow;
-        int width = info->dst_w;
-
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while (condition) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, dR, dG, dB, sA; \
-            DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \
-            if(sA) { \
-                unsigned short dstpixel = *((unsigned short *)dst); \
-                dR = (dstpixel >> 8) & 0xf8; \
-                dG = (dstpixel >> 3) & 0xfc; \
-                dB = (dstpixel << 3) & 0xf8; \
-                ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
-                *((unsigned short *)dst) = ( \
-                    ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \
-                ); \
-            } \
-            src += 4; \
-            dst += 2; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width);
-        extrawidth = (width % 8);
-        valigner = VEC_ALIGNER(src);
-        vsrc = (vector unsigned char) vec_ld(0, src);
-        width -= extrawidth;
-        while (width) {
-            vector unsigned char valpha;
-            vector unsigned char vsrc1, vsrc2;
-            vector unsigned char vdst1, vdst2;
-            vector unsigned short vR, vG, vB;
-            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
-
-            /* Load 8 pixels from src as ARGB */
-            voverflow = (vector unsigned char) vec_ld(15, src);
-            vsrc = vec_perm(vsrc, voverflow, valigner);
-            vsrc1 = vec_perm(vsrc, vsrc, vpermute);
-            src += 16;
-            vsrc = (vector unsigned char) vec_ld(15, src);
-            voverflow = vec_perm(voverflow, vsrc, valigner);
-            vsrc2 = vec_perm(voverflow, voverflow, vpermute);
-            src += 16;
-
-            /* Load 8 pixels from dst as XRGB */
-            voverflow = vec_ld(0, dst);
-            vR = vec_and((vector unsigned short) voverflow, vf800);
-            vB = vec_sl((vector unsigned short) voverflow, v3_16);
-            vG = vec_sl(vB, v2_16);
-            vdst1 =
-                (vector unsigned char) vec_perm((vector unsigned char) vR,
-                                                (vector unsigned char) vR,
-                                                vredalpha1);
-            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
-            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
-            vdst2 =
-                (vector unsigned char) vec_perm((vector unsigned char) vR,
-                                                (vector unsigned char) vR,
-                                                vredalpha2);
-            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
-            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
-
-            /* Alpha blend 8 pixels as ARGB */
-            valpha = vec_perm(vsrc1, v0, valphaPermute);
-            VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16,
-                               v8_16);
-            valpha = vec_perm(vsrc2, v0, valphaPermute);
-            VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16,
-                               v8_16);
-
-            /* Convert 8 pixels to 565 */
-            vpixel = (vector unsigned short) vec_packpx((vector unsigned int)
-                                                        vdst1,
-                                                        (vector unsigned int)
-                                                        vdst2);
-            vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge);
-            vgpixel = vec_and(vgpixel, vfc);
-            vgpixel = vec_sl(vgpixel, v3_16);
-            vrpixel = vec_sl(vpixel, v1_16);
-            vrpixel = vec_and(vrpixel, vf800);
-            vbpixel = vec_and(vpixel, v3f);
-            vdst1 =
-                vec_or((vector unsigned char) vrpixel,
-                       (vector unsigned char) vgpixel);
-            vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel);
-
-            /* Store 8 pixels */
-            vec_st(vdst1, 0, dst);
-
-            width -= 8;
-            dst += 16;
-        }
-        ONE_PIXEL_BLEND((extrawidth), extrawidth);
-#undef ONE_PIXEL_BLEND
-        src += srcskip;
-        dst += dstskip;
-    }
-}
-
-static void
-Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    unsigned sA = info->a;
-    unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
-    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
-    Uint32 ckey = info->colorkey;
-    vector unsigned char mergePermute;
-    vector unsigned char vsrcPermute;
-    vector unsigned char vdstPermute;
-    vector unsigned char vsdstPermute;
-    vector unsigned char valpha;
-    vector unsigned char valphamask;
-    vector unsigned char vbits;
-    vector unsigned char v0;
-    vector unsigned short v1;
-    vector unsigned short v8;
-    vector unsigned int vckey;
-    vector unsigned int vrgbmask;
-
-    mergePermute = VEC_MERGE_PERMUTE();
-    v0 = vec_splat_u8(0);
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-
-    /* set the alpha to 255 on the destination surf */
-    valphamask = VEC_ALPHA_MASK();
-
-    vsrcPermute = calc_swizzle32(srcfmt, NULL);
-    vdstPermute = calc_swizzle32(NULL, dstfmt);
-    vsdstPermute = calc_swizzle32(dstfmt, NULL);
-
-    /* set a vector full of alpha and 255-alpha */
-    ((unsigned char *) &valpha)[0] = sA;
-    valpha = vec_splat(valpha, 0);
-    vbits = (vector unsigned char) vec_splat_s8(-1);
-
-    ckey &= rgbmask;
-    ((unsigned int *) (char *) &vckey)[0] = ckey;
-    vckey = vec_splat(vckey, 0);
-    ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
-    vrgbmask = vec_splat(vrgbmask, 0);
-
-    while (height--) {
-        int width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while (condition) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, dR, dG, dB; \
-            RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \
-            if(sA && Pixel != ckey) { \
-                RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
-                DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
-                ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
-                ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
-            } \
-            dstp++; \
-            srcp++; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char vsel;
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-                vector unsigned char vd_orig;
-
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-
-                /* vsel is set for items that match the key */
-                vsel =
-                    (vector unsigned char) vec_and((vector unsigned int) vs,
-                                                   vrgbmask);
-                vsel = (vector unsigned char) vec_cmpeq((vector unsigned int)
-                                                        vsel, vckey);
-
-                /* permute to source format */
-                vs = vec_perm(vs, valpha, vsrcPermute);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-                vd_orig = vd = vec_perm(vd, v0, vsdstPermute);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha channel to full on */
-                vd = vec_or(vd, valphamask);
-
-                /* mask out color key */
-                vd = vec_sel(vd, vd_orig, vsel);
-
-                /* permute to dest format */
-                vd = vec_perm(vd, vbits, vdstPermute);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-#undef ONE_PIXEL_BLEND
-
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-}
-
-
-static void
-Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info)
-{
-    int width = info->dst_w;
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    vector unsigned char mergePermute;
-    vector unsigned char valphaPermute;
-    vector unsigned char vsrcPermute;
-    vector unsigned char vdstPermute;
-    vector unsigned char vsdstPermute;
-    vector unsigned char valphamask;
-    vector unsigned char vpixelmask;
-    vector unsigned char v0;
-    vector unsigned short v1;
-    vector unsigned short v8;
-
-    v0 = vec_splat_u8(0);
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-    mergePermute = VEC_MERGE_PERMUTE();
-    valphamask = VEC_ALPHA_MASK();
-    valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
-    vpixelmask = vec_nor(valphamask, v0);
-    vsrcPermute = calc_swizzle32(srcfmt, NULL);
-    vdstPermute = calc_swizzle32(NULL, dstfmt);
-    vsdstPermute = calc_swizzle32(dstfmt, NULL);
-
-    while (height--) {
-        width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
-            DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
-            if(sA) { \
-              DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \
-              ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
-              ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \
-            } \
-            ++srcp; \
-            ++dstp; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            /* vsrcPermute */
-            /* vdstPermute */
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-                vector unsigned char valpha;
-                vector unsigned char vdstalpha;
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-                vs = vec_perm(vs, v0, vsrcPermute);
-
-                valpha = vec_perm(vs, v0, valphaPermute);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-                vd = vec_perm(vd, v0, vsdstPermute);
-                vdstalpha = vec_and(vd, valphamask);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha to the dest alpha */
-                vd = vec_and(vd, vpixelmask);
-                vd = vec_or(vd, vdstalpha);
-                vd = vec_perm(vd, v0, vdstPermute);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-        srcp += srcskip;
-        dstp += dstskip;
-#undef ONE_PIXEL_BLEND
-    }
-}
-
-/* fast ARGB888->(A)RGB888 blending with pixel alpha */
-static void
-BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info)
-{
-    int width = info->dst_w;
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    vector unsigned char mergePermute;
-    vector unsigned char valphaPermute;
-    vector unsigned char valphamask;
-    vector unsigned char vpixelmask;
-    vector unsigned char v0;
-    vector unsigned short v1;
-    vector unsigned short v8;
-    v0 = vec_splat_u8(0);
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-    mergePermute = VEC_MERGE_PERMUTE();
-    valphamask = VEC_ALPHA_MASK();
-    valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
-
-
-    vpixelmask = vec_nor(valphamask, v0);
-    while (height--) {
-        width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while ((condition)) { \
-            Uint32 dalpha; \
-            Uint32 d; \
-            Uint32 s1; \
-            Uint32 d1; \
-            Uint32 s = *srcp; \
-            Uint32 alpha = s >> 24; \
-            if(alpha) { \
-              if(alpha == SDL_ALPHA_OPAQUE) { \
-                *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \
-              } else { \
-                d = *dstp; \
-                dalpha = d & 0xff000000; \
-                s1 = s & 0xff00ff; \
-                d1 = d & 0xff00ff; \
-                d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
-                s &= 0xff00; \
-                d &= 0xff00; \
-                d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
-                *dstp = d1 | d | dalpha; \
-              } \
-            } \
-            ++srcp; \
-            ++dstp; \
-            widthvar--; \
-	    }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-                vector unsigned char valpha;
-                vector unsigned char vdstalpha;
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-
-                valpha = vec_perm(vs, v0, valphaPermute);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-                vdstalpha = vec_and(vd, valphamask);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha to the dest alpha */
-                vd = vec_and(vd, vpixelmask);
-                vd = vec_or(vd, vdstalpha);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-#undef ONE_PIXEL_BLEND
-}
-
-static void
-Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info)
-{
-    /* XXX : 6 */
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    unsigned sA = info->a;
-    unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
-    vector unsigned char mergePermute;
-    vector unsigned char vsrcPermute;
-    vector unsigned char vdstPermute;
-    vector unsigned char vsdstPermute;
-    vector unsigned char valpha;
-    vector unsigned char valphamask;
-    vector unsigned char vbits;
-    vector unsigned short v1;
-    vector unsigned short v8;
-
-    mergePermute = VEC_MERGE_PERMUTE();
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-
-    /* set the alpha to 255 on the destination surf */
-    valphamask = VEC_ALPHA_MASK();
-
-    vsrcPermute = calc_swizzle32(srcfmt, NULL);
-    vdstPermute = calc_swizzle32(NULL, dstfmt);
-    vsdstPermute = calc_swizzle32(dstfmt, NULL);
-
-    /* set a vector full of alpha and 255-alpha */
-    ((unsigned char *) &valpha)[0] = sA;
-    valpha = vec_splat(valpha, 0);
-    vbits = (vector unsigned char) vec_splat_s8(-1);
-
-    while (height--) {
-        int width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, dR, dG, dB; \
-            DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
-            DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
-            ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
-            ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
-            ++srcp; \
-            ++dstp; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-                vs = vec_perm(vs, valpha, vsrcPermute);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-                vd = vec_perm(vd, vd, vsdstPermute);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha channel to full on */
-                vd = vec_or(vd, valphamask);
-                vd = vec_perm(vd, vbits, vdstPermute);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-#undef ONE_PIXEL_BLEND
-
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-
-}
-
-
-/* fast RGB888->(A)RGB888 blending */
-static void
-BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info)
-{
-    unsigned alpha = info->a;
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    vector unsigned char mergePermute;
-    vector unsigned char valpha;
-    vector unsigned char valphamask;
-    vector unsigned short v1;
-    vector unsigned short v8;
-
-    mergePermute = VEC_MERGE_PERMUTE();
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-
-    /* set the alpha to 255 on the destination surf */
-    valphamask = VEC_ALPHA_MASK();
-
-    /* set a vector full of alpha and 255-alpha */
-    ((unsigned char *) &valpha)[0] = alpha;
-    valpha = vec_splat(valpha, 0);
-
-    while (height--) {
-        int width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
-            Uint32 s = *srcp; \
-            Uint32 d = *dstp; \
-            Uint32 s1 = s & 0xff00ff; \
-            Uint32 d1 = d & 0xff00ff; \
-            d1 = (d1 + ((s1 - d1) * alpha >> 8)) \
-                 & 0xff00ff; \
-            s &= 0xff00; \
-            d &= 0xff00; \
-            d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
-            *dstp = d1 | d | 0xff000000; \
-            ++srcp; \
-            ++dstp; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha channel to full on */
-                vd = vec_or(vd, valphamask);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-#undef ONE_PIXEL_BLEND
-
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-}
-
-#if __MWERKS__
-#pragma altivec_model off
-#endif
-#endif /* SDL_ALTIVEC_BLITTERS */
-
 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
 static void
 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
@@ -1984,24 +1156,19 @@
     SDL_PixelFormat *dstfmt = info->dst_fmt;
     int srcbpp = srcfmt->BytesPerPixel;
     int dstbpp = dstfmt->BytesPerPixel;
-    unsigned sA = info->a;
-    unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
+    Uint32 Pixel;
+    unsigned sR, sG, sB;
+    unsigned dR, dG, dB, dA;
+    const unsigned sA = info->a;
 
     if (sA) {
         while (height--) {
 	    /* *INDENT-OFF* */
 	    DUFFS_LOOP4(
 	    {
-		Uint32 Pixel;
-		unsigned sR;
-		unsigned sG;
-		unsigned sB;
-		unsigned dR;
-		unsigned dG;
-		unsigned dB;
 		DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
-		DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB);
-		ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB);
+		DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
+		ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
 		ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
 		src += srcbpp;
 		dst += dstbpp;
@@ -2029,25 +1196,20 @@
     Uint32 ckey = info->colorkey;
     int srcbpp = srcfmt->BytesPerPixel;
     int dstbpp = dstfmt->BytesPerPixel;
-    unsigned sA = info->a;
-    unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
+    Uint32 Pixel;
+    unsigned sR, sG, sB;
+    unsigned dR, dG, dB, dA;
+    const unsigned sA = info->a;
 
     while (height--) {
 	    /* *INDENT-OFF* */
 	    DUFFS_LOOP4(
 	    {
-		Uint32 Pixel;
-		unsigned sR;
-		unsigned sG;
-		unsigned sB;
-		unsigned dR;
-		unsigned dG;
-		unsigned dB;
 		RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
 		if(sA && Pixel != ckey) {
 		    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
-		    DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB);
-		    ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB);
+		    DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
+		    ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
 		    ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
 		}
 		src += srcbpp;
@@ -2072,9 +1234,11 @@
     int dstskip = info->dst_skip;
     SDL_PixelFormat *srcfmt = info->src_fmt;
     SDL_PixelFormat *dstfmt = info->dst_fmt;
-
     int srcbpp;
     int dstbpp;
+    Uint32 Pixel;
+    unsigned sR, sG, sB, sA;
+    unsigned dR, dG, dB, dA;
 
     /* Set up some basic variables */
     srcbpp = srcfmt->BytesPerPixel;
@@ -2084,20 +1248,11 @@
 	    /* *INDENT-OFF* */
 	    DUFFS_LOOP4(
 	    {
-		Uint32 Pixel;
-		unsigned sR;
-		unsigned sG;
-		unsigned sB;
-		unsigned dR;
-		unsigned dG;
-		unsigned dB;
-		unsigned sA;
-		unsigned dA;
 		DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
 		if(sA) {
-		  DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
-		  ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB);
-		  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
+		    DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
+		    ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
+		    ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
 		}
 		src += srcbpp;
 		dst += dstbpp;
@@ -2124,13 +1279,6 @@
             return BlitNto1PixelAlpha;
 
         case 2:
-#if SDL_ALTIVEC_BLITTERS
-            if (sf->BytesPerPixel == 4
-                && df->Gmask == 0x7e0 && df->Bmask == 0x1f
-                && SDL_HasAltiVec())
-                return Blit32to565PixelAlphaAltivec;
-            else
-#endif
                 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
                     && sf->Gmask == 0xff00
                     && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
@@ -2162,19 +1310,10 @@
                 }
 #endif /* __MMX__ || __3dNOW__ */
                 if (sf->Amask == 0xff000000) {
-#if SDL_ALTIVEC_BLITTERS
-                    if (SDL_HasAltiVec())
-                        return BlitRGBtoRGBPixelAlphaAltivec;
-#endif
                     return BlitRGBtoRGBPixelAlpha;
                 }
             }
-#if SDL_ALTIVEC_BLITTERS
-            if (sf->Amask && sf->BytesPerPixel == 4 && SDL_HasAltiVec())
-                return Blit32to32PixelAlphaAltivec;
-            else
-#endif
-                return BlitNtoNPixelAlpha;
+            return BlitNtoNPixelAlpha;
 
         case 3:
         default:
@@ -2220,19 +1359,10 @@
                         return BlitRGBtoRGBSurfaceAlphaMMX;
 #endif
                     if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
-#if SDL_ALTIVEC_BLITTERS
-                        if (SDL_HasAltiVec())
-                            return BlitRGBtoRGBSurfaceAlphaAltivec;
-#endif
                         return BlitRGBtoRGBSurfaceAlpha;
                     }
                 }
-#if SDL_ALTIVEC_BLITTERS
-                if ((sf->BytesPerPixel == 4) && SDL_HasAltiVec())
-                    return Blit32to32SurfaceAlphaAltivec;
-                else
-#endif
-                    return BlitNtoNSurfaceAlpha;
+                return BlitNtoNSurfaceAlpha;
 
             case 3:
             default:
@@ -2243,16 +1373,11 @@
 
     case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND:
         if (sf->Amask == 0) {
-            if (df->BytesPerPixel == 1)
+            if (df->BytesPerPixel == 1) {
                 return BlitNto1SurfaceAlphaKey;
-            else
-#if SDL_ALTIVEC_BLITTERS
-            if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
-                    SDL_HasAltiVec())
-                return Blit32to32SurfaceAlphaKeyAltivec;
-            else
-#endif
+            } else {
                 return BlitNtoNSurfaceAlphaKey;
+            }
         }
         break;
     }
--- a/src/video/SDL_blit_auto.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/video/SDL_blit_auto.c	Tue Jul 23 08:06:49 2013 -0700
@@ -95,6 +95,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -168,6 +169,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -323,6 +325,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -408,6 +411,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -503,6 +507,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -576,6 +581,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -731,6 +737,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -816,6 +823,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -911,6 +919,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -984,6 +993,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1139,6 +1149,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1224,6 +1235,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1319,6 +1331,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1392,6 +1405,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1547,6 +1561,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1632,6 +1647,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1722,6 +1738,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1795,6 +1812,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -1950,6 +1968,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2035,6 +2054,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2130,6 +2150,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2203,6 +2224,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2358,6 +2380,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2443,6 +2466,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2538,6 +2562,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2611,6 +2636,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2766,6 +2792,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2851,6 +2878,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -2946,6 +2974,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3019,6 +3048,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3174,6 +3204,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3259,6 +3290,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3349,6 +3381,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3422,6 +3455,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3577,6 +3611,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3662,6 +3697,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3757,6 +3793,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3830,6 +3867,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -3985,6 +4023,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4070,6 +4109,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4165,6 +4205,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4238,6 +4279,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4393,6 +4435,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4478,6 +4521,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4573,6 +4617,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4646,6 +4691,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4801,6 +4847,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4886,6 +4933,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -4981,6 +5029,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5054,6 +5103,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5209,6 +5259,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5294,6 +5345,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5389,6 +5441,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5462,6 +5515,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5617,6 +5671,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5702,6 +5757,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5797,6 +5853,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -5870,6 +5927,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6025,6 +6083,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6110,6 +6169,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6205,6 +6265,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6278,6 +6339,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6433,6 +6495,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6518,6 +6581,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6613,6 +6677,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6686,6 +6751,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6841,6 +6907,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -6926,6 +6993,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -7021,6 +7089,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -7094,6 +7163,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -7249,6 +7319,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
@@ -7334,6 +7405,7 @@
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
                 dstB = srcB + ((255 - srcA) * dstB) / 255;
+                dstA = srcA + ((255 - srcA) * dstA) / 255;
                 break;
             case SDL_COPY_ADD:
                 dstR = srcR + dstR; if (dstR > 255) dstR = 255;
--- a/src/video/sdlgenblit.pl	Mon Jul 22 02:51:45 2013 -0700
+++ b/src/video/sdlgenblit.pl	Tue Jul 23 08:06:49 2013 -0700
@@ -82,7 +82,7 @@
 /* DO NOT EDIT!  This file is generated by sdlgenblit.pl */
 /*
   Simple DirectMedia Layer
-  Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
+  Copyright (C) 1997-2013 Sam Lantinga <slouken\@libsdl.org>
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
@@ -241,6 +241,7 @@
                 ${d}R = ${s}R + ((255 - ${s}A) * ${d}R) / 255;
                 ${d}G = ${s}G + ((255 - ${s}A) * ${d}G) / 255;
                 ${d}B = ${s}B + ((255 - ${s}A) * ${d}B) / 255;
+                ${d}A = ${s}A + ((255 - ${s}A) * ${d}A) / 255;
                 break;
             case SDL_COPY_ADD:
                 ${d}R = ${s}R + ${d}R; if (${d}R > 255) ${d}R = 255;
--- a/test/testrendertarget.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/test/testrendertarget.c	Tue Jul 23 08:06:49 2013 -0700
@@ -88,6 +88,88 @@
 }
 
 void
+DrawComposite(DrawState *s)
+{
+    SDL_Rect viewport, R;
+    SDL_Texture *target, *A, *B;
+
+    static SDL_bool blend_tested = SDL_FALSE;
+    if (!blend_tested) {
+        SDL_Texture *A, *B;
+        Uint32 P;
+
+        A = SDL_CreateTexture(s->renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_TARGET, 1, 1);
+        SDL_SetTextureBlendMode(A, SDL_BLENDMODE_BLEND);
+
+        B = SDL_CreateTexture(s->renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_TARGET, 1, 1);
+        SDL_SetTextureBlendMode(B, SDL_BLENDMODE_BLEND);
+
+        SDL_SetRenderTarget(s->renderer, A);
+        SDL_SetRenderDrawColor(s->renderer, 0x00, 0x00, 0x00, 0x80);
+        SDL_RenderFillRect(s->renderer, NULL);
+
+        SDL_SetRenderTarget(s->renderer, B);
+        SDL_SetRenderDrawColor(s->renderer, 0x00, 0x00, 0x00, 0x00);
+        SDL_RenderFillRect(s->renderer, NULL);
+        SDL_RenderCopy(s->renderer, A, NULL, NULL);
+        SDL_RenderReadPixels(s->renderer, NULL, SDL_PIXELFORMAT_ARGB8888, &P, sizeof(P));
+
+        printf("Blended pixel: 0x%8.8X\n", P);
+
+        SDL_DestroyTexture(A);
+        SDL_DestroyTexture(B);
+        blend_tested = SDL_TRUE;
+    }
+
+    SDL_RenderGetViewport(s->renderer, &viewport);
+
+    target = SDL_CreateTexture(s->renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_TARGET, viewport.w, viewport.h);
+    SDL_SetTextureBlendMode(target, SDL_BLENDMODE_BLEND);
+    SDL_SetRenderTarget(s->renderer, target);
+
+    /* Draw the background.
+       This is solid black so when the sprite is copied to it, any per-pixel alpha will be blended through.
+     */
+    SDL_SetRenderDrawColor(s->renderer, 0x00, 0x00, 0x00, 0x00);
+    SDL_RenderFillRect(s->renderer, NULL);
+
+    /* Scale and draw the sprite */
+    s->sprite_rect.w += s->scale_direction;
+    s->sprite_rect.h += s->scale_direction;
+    if (s->scale_direction > 0) {
+        if (s->sprite_rect.w >= viewport.w || s->sprite_rect.h >= viewport.h) {
+            s->scale_direction = -1;
+        }
+    } else {
+        if (s->sprite_rect.w <= 1 || s->sprite_rect.h <= 1) {
+            s->scale_direction = 1;
+        }
+    }
+    s->sprite_rect.x = (viewport.w - s->sprite_rect.w) / 2;
+    s->sprite_rect.y = (viewport.h - s->sprite_rect.h) / 2;
+
+    SDL_RenderCopy(s->renderer, s->sprite, NULL, &s->sprite_rect);
+
+    SDL_SetRenderTarget(s->renderer, NULL);
+    SDL_RenderCopy(s->renderer, s->background, NULL, NULL);
+
+    SDL_SetRenderDrawBlendMode(s->renderer, SDL_BLENDMODE_BLEND);
+    SDL_SetRenderDrawColor(s->renderer, 0xff, 0x00, 0x00, 0x80);
+    R.x = 0;
+    R.y = 0;
+    R.w = 100;
+    R.h = 100;
+    SDL_RenderFillRect(s->renderer, &R);
+    SDL_SetRenderDrawBlendMode(s->renderer, SDL_BLENDMODE_NONE);
+
+    SDL_RenderCopy(s->renderer, target, NULL, NULL);
+    SDL_DestroyTexture(target);
+
+    /* Update the screen! */
+    SDL_RenderPresent(s->renderer);
+}
+
+void
 Draw(DrawState *s)
 {
     SDL_Rect viewport;
@@ -134,6 +216,7 @@
     SDL_Event event;
     int frames;
     Uint32 then, now;
+    SDL_bool test_composite = SDL_FALSE;
 
     /* Initialize test framework */
     state = SDLTest_CommonCreateState(argv, SDL_INIT_VIDEO);
@@ -145,8 +228,17 @@
 
         consumed = SDLTest_CommonArg(state, i);
         if (consumed == 0) {
-            fprintf(stderr, "Usage: %s %s\n", argv[0], SDLTest_CommonUsage(state));
-            return 1;
+            consumed = -1;
+            if (SDL_strcasecmp(argv[i], "--composite") == 0) {
+                test_composite = SDL_TRUE;
+                consumed = 1;
+            }
+        }
+        if (consumed < 0) {
+            fprintf(stderr,
+                    "Usage: %s %s [--composite]\n",
+                    argv[0], SDLTest_CommonUsage(state));
+            quit(1);
         }
         i += consumed;
     }
@@ -160,7 +252,11 @@
 
         drawstate->window = state->windows[i];
         drawstate->renderer = state->renderers[i];
-        drawstate->sprite = LoadTexture(drawstate->renderer, "icon.bmp", SDL_TRUE);
+        if (test_composite) {
+            drawstate->sprite = LoadTexture(drawstate->renderer, "icon-alpha.bmp", SDL_TRUE);
+        } else {
+            drawstate->sprite = LoadTexture(drawstate->renderer, "icon.bmp", SDL_TRUE);
+        }
         drawstate->background = LoadTexture(drawstate->renderer, "sample.bmp", SDL_FALSE);
         if (!drawstate->sprite || !drawstate->background) {
             quit(2);
@@ -181,7 +277,11 @@
             SDLTest_CommonEvent(state, &event, &done);
         }
         for (i = 0; i < state->num_windows; ++i) {
-            Draw(&drawstates[i]);
+            if (test_composite) {
+                DrawComposite(&drawstates[i]);
+            } else {
+                Draw(&drawstates[i]);
+            }
         }
     }
 
--- a/test/testsprite2.c	Mon Jul 22 02:51:45 2013 -0700
+++ b/test/testsprite2.c	Tue Jul 23 08:06:49 2013 -0700
@@ -56,7 +56,7 @@
 }
 
 int
-LoadSprite(char *file)
+LoadSprite(const char *file)
 {
     int i;
     SDL_Surface *temp;
@@ -243,6 +243,7 @@
     SDL_Event event;
     Uint32 then, now, frames;
 	Uint64 seed;
+    const char *icon = "icon.bmp";
 
     /* Initialize parameters */
     num_sprites = NUM_SPRITES;
@@ -292,11 +293,14 @@
             } else if (SDL_isdigit(*argv[i])) {
                 num_sprites = SDL_atoi(argv[i]);
                 consumed = 1;
+            } else if (argv[i][0] != '-') {
+                icon = argv[i];
+                consumed = 1;
             }
         }
         if (consumed < 0) {
             fprintf(stderr,
-                    "Usage: %s %s [--blend none|blend|add|mod] [--cyclecolor] [--cyclealpha] [--iterations N]\n",
+                    "Usage: %s %s [--blend none|blend|add|mod] [--cyclecolor] [--cyclealpha] [--iterations N] [num_sprites] [icon.bmp]\n",
                     argv[0], SDLTest_CommonUsage(state));
             quit(1);
         }
@@ -318,7 +322,7 @@
         SDL_SetRenderDrawColor(renderer, 0xA0, 0xA0, 0xA0, 0xFF);
         SDL_RenderClear(renderer);
     }
-    if (LoadSprite("icon.bmp") < 0) {
+    if (LoadSprite(icon) < 0) {
         quit(2);
     }