Added source color and alpha modulation support.
authorSam Lantinga <slouken@libsdl.org>
Mon, 28 Aug 2006 03:17:39 +0000
changeset 1985 8055185ae4ed
parent 1984 b910bcabec26
child 1986 f4c65e3bfaed
Added source color and alpha modulation support. Added perl script to generate optimized render copy functions.
include/SDL_audio.h
include/SDL_endian.h
include/SDL_video.h
src/SDL_compat.c
src/audio/SDL_audiocvt.c
src/audio/SDL_audiotypecvt.c
src/audio/SDL_mixer.c
src/audio/sdlgenaudiocvt.pl
src/hermes/HeadX86.h
src/video/SDL_blit_N.c
src/video/SDL_glfuncs.h
src/video/SDL_rendercopy.c
src/video/SDL_rendercopy.h
src/video/SDL_renderer_gl.c
src/video/SDL_renderer_sw.c
src/video/SDL_stretch.c
src/video/SDL_sysvideo.h
src/video/SDL_video.c
src/video/dummy/SDL_nullrender.c
src/video/sdlgenblit.pl
src/video/win32/SDL_d3drender.c
src/video/win32/SDL_gdirender.c
test/testgl2.c
test/testsprite2.c
test/testwm2.c
--- a/include/SDL_audio.h	Thu Aug 24 12:49:59 2006 +0000
+++ b/include/SDL_audio.h	Mon Aug 28 03:17:39 2006 +0000
@@ -132,22 +132,22 @@
 
 /* A structure to hold a set of audio conversion filters and buffers */
 struct SDL_AudioCVT;
-typedef void (SDLCALL * SDL_AudioFilter)(struct SDL_AudioCVT *cvt,
-                                         SDL_AudioFormat format);
+typedef void (SDLCALL * SDL_AudioFilter) (struct SDL_AudioCVT * cvt,
+                                          SDL_AudioFormat format);
 
 typedef struct SDL_AudioCVT
 {
-    int needed;                  /* Set to 1 if conversion possible */
-    SDL_AudioFormat src_format;  /* Source audio format */
-    SDL_AudioFormat dst_format;  /* Target audio format */
-    double rate_incr;            /* Rate conversion increment */
-    Uint8 *buf;                  /* Buffer to hold entire audio data */
-    int len;                     /* Length of original audio buffer */
-    int len_cvt;                 /* Length of converted audio buffer */
-    int len_mult;                /* buffer must be len*len_mult big */
-    double len_ratio;            /* Given len, final size is len*len_ratio */
-    SDL_AudioFilter filters[10]; /* Filter list */
-    int filter_index;            /* Current audio conversion function */
+    int needed;                 /* Set to 1 if conversion possible */
+    SDL_AudioFormat src_format; /* Source audio format */
+    SDL_AudioFormat dst_format; /* Target audio format */
+    double rate_incr;           /* Rate conversion increment */
+    Uint8 *buf;                 /* Buffer to hold entire audio data */
+    int len;                    /* Length of original audio buffer */
+    int len_cvt;                /* Length of converted audio buffer */
+    int len_mult;               /* buffer must be len*len_mult big */
+    double len_ratio;           /* Given len, final size is len*len_ratio */
+    SDL_AudioFilter filters[10];        /* Filter list */
+    int filter_index;           /* Current audio conversion function */
 } SDL_AudioCVT;
 
 
--- a/include/SDL_endian.h	Thu Aug 24 12:49:59 2006 +0000
+++ b/include/SDL_endian.h	Mon Aug 28 03:17:39 2006 +0000
@@ -195,7 +195,11 @@
 static __inline__ float
 SDL_SwapFloat(float x)
 {
-    union { float f; Uint32 ui32; } swapper;
+    union
+    {
+        float f;
+        Uint32 ui32;
+    } swapper;
     swapper.f = x;
     swapper.ui32 = SDL_Swap32(swapper.ui32);
     return swapper.f;
--- a/include/SDL_video.h	Thu Aug 24 12:49:59 2006 +0000
+++ b/include/SDL_video.h	Mon Aug 28 03:17:39 2006 +0000
@@ -176,7 +176,7 @@
     SDL_RENDERER_PRESENTFLIP3 = 0x00000008,     /**< Present uses a flip, rotating between two back buffers and a front buffer */
     SDL_RENDERER_PRESENTDISCARD = 0x00000010,   /**< Present leaves the contents of the backbuffer undefined */
     SDL_RENDERER_PRESENTVSYNC = 0x00000020,     /**< Present is synchronized with the refresh rate */
-    SDL_RENDERER_ACCELERATED = 0x00000040,      /**< The renderer uses hardware acceleration */
+    SDL_RENDERER_ACCELERATED = 0x0000040,       /**< The renderer uses hardware acceleration */
 } SDL_RendererFlags;
 
 /**
@@ -188,6 +188,7 @@
 {
     const char *name;           /**< The name of the renderer */
     Uint32 flags;               /**< Supported SDL_RendererFlags */
+    Uint32 mod_modes;           /**< A mask of supported channel modulation */
     Uint32 blend_modes;         /**< A mask of supported blend modes */
     Uint32 scale_modes;         /**< A mask of supported scale modes */
     Uint32 num_texture_formats; /**< The number of available texture formats */
@@ -208,9 +209,21 @@
 } SDL_TextureAccess;
 
 /**
+ * \enum SDL_TextureModulate
+ *
+ * \brief The texture channel modulation used in SDL_RenderCopy()
+ */
+typedef enum
+{
+    SDL_TEXTUREMODULATE_NONE = 0x00000000,     /**< No modulation */
+    SDL_TEXTUREMODULATE_COLOR = 0x00000001,    /**< srcC = srcC * color */
+    SDL_TEXTUREMODULATE_ALPHA = 0x00000002,    /**< srcA = srcA * alpha */
+} SDL_TextureModulate;
+
+/**
  * \enum SDL_TextureBlendMode
  *
- * \brief The blend mode used in SDL_RenderCopy()
+ * \brief The texture blend mode used in SDL_RenderCopy()
  */
 typedef enum
 {
@@ -224,7 +237,7 @@
 /**
  * \enum SDL_TextureScaleMode
  *
- * \brief The scale mode used in SDL_RenderCopy()
+ * \brief The texture scale mode used in SDL_RenderCopy()
  */
 typedef enum
 {
@@ -272,12 +285,15 @@
     int pitch;                  /* Read-only */
     void *pixels;               /* Read-write */
 
+    /* Application data associated with the surfade */
+    void *userdata;             /* Read-write */
+
     /* texture associated with the surface, if any */
-    SDL_TextureID textureID;
+    SDL_TextureID textureID;    /* Read-only */
 
     /* information needed for surfaces requiring locks */
-    int locked;
-    void *lock_data;
+    int locked;                 /* Read-only */
+    void *lock_data;            /* Read-only */
 
     /* clipping information */
     SDL_Rect clip_rect;         /* Read-only */
@@ -998,6 +1014,136 @@
                                                   int ncolors);
 
 /**
+ * \fn int SDL_SetTextureColorMod(SDL_TextureID textureID, Uint8 r, Uint8 g, Uint8 b)
+ *
+ * \brief Set an additional color value used in render copy operations
+ *
+ * \param texture The texture to update
+ * \param r The red source color value multiplied into copy operations
+ * \param g The green source color value multiplied into copy operations
+ * \param b The blue source color value multiplied into copy operations
+ *
+ * \return 0 on success, or -1 if the texture is not valid or color modulation is not supported
+ *
+ * \sa SDL_GetTextureColorMod()
+ */
+extern DECLSPEC int SDLCALL SDL_SetTextureColorMod(SDL_TextureID textureID,
+                                                   Uint8 r, Uint8 g, Uint8 b);
+
+
+/**
+ * \fn int SDL_GetTextureColorMod(SDL_TextureID textureID, Uint8 *r, Uint8 *g, Uint8 *b)
+ *
+ * \brief Get the additional color value used in render copy operations
+ *
+ * \param texture The texture to query
+ * \param r A pointer filled in with the source red color value
+ * \param g A pointer filled in with the source green color value
+ * \param b A pointer filled in with the source blue color value
+ *
+ * \return 0 on success, or -1 if the texture is not valid
+ *
+ * \sa SDL_SetTextureColorMod()
+ */
+extern DECLSPEC int SDLCALL SDL_GetTextureColorMod(SDL_TextureID textureID,
+                                                   Uint8 * r, Uint8 * g,
+                                                   Uint8 * b);
+
+/**
+ * \fn int SDL_SetTextureAlphaMod(SDL_TextureID textureID, Uint8 alpha)
+ *
+ * \brief Set an additional alpha value used in render copy operations
+ *
+ * \param texture The texture to update
+ * \param alpha The source alpha value multiplied into copy operations.
+ *
+ * \return 0 on success, or -1 if the texture is not valid or alpha modulation is not supported
+ *
+ * \sa SDL_GetTextureAlphaMod()
+ */
+extern DECLSPEC int SDLCALL SDL_SetTextureAlphaMod(SDL_TextureID textureID,
+                                                   Uint8 alpha);
+
+/**
+ * \fn int SDL_GetTextureAlphaMod(SDL_TextureID textureID, Uint8 *alpha)
+ *
+ * \brief Get the additional alpha value used in render copy operations
+ *
+ * \param texture The texture to query
+ * \param alpha A pointer filled in with the source alpha value
+ *
+ * \return 0 on success, or -1 if the texture is not valid
+ *
+ * \sa SDL_SetTextureAlphaMod()
+ */
+extern DECLSPEC int SDLCALL SDL_GetTextureAlphaMod(SDL_TextureID textureID,
+                                                   Uint8 * alpha);
+
+/**
+ * \fn int SDL_SetTextureBlendMode(SDL_TextureID textureID, int blendMode)
+ *
+ * \brief Set the blend mode used for texture copy operations
+ *
+ * \param texture The texture to update
+ * \param blendMode SDL_TextureBlendMode to use for texture blending
+ *
+ * \return 0 on success, or -1 if the texture is not valid or the blend mode is not supported
+ *
+ * \note If the blend mode is not supported, the closest supported mode is chosen.
+ *
+ * \sa SDL_GetTextureBlendMode()
+ */
+extern DECLSPEC int SDLCALL SDL_SetTextureBlendMode(SDL_TextureID textureID,
+                                                    int blendMode);
+
+/**
+ * \fn int SDL_GetTextureBlendMode(SDL_TextureID textureID, int *blendMode)
+ *
+ * \brief Get the blend mode used for texture copy operations
+ *
+ * \param texture The texture to query
+ * \param blendMode A pointer filled in with the current blend mode
+ *
+ * \return 0 on success, or -1 if the texture is not valid
+ *
+ * \sa SDL_SetTextureBlendMode()
+ */
+extern DECLSPEC int SDLCALL SDL_GetTextureBlendMode(SDL_TextureID textureID,
+                                                    int *blendMode);
+
+/**
+ * \fn int SDL_SetTextureScaleMode(SDL_TextureID textureID, int scaleMode)
+ *
+ * \brief Set the scale mode used for texture copy operations
+ *
+ * \param texture The texture to update
+ * \param scaleMode SDL_TextureScaleMode to use for texture scaling
+ *
+ * \return 0 on success, or -1 if the texture is not valid or the scale mode is not supported
+ *
+ * \note If the scale mode is not supported, the closest supported mode is chosen.
+ *
+ * \sa SDL_GetTextureScaleMode()
+ */
+extern DECLSPEC int SDLCALL SDL_SetTextureScaleMode(SDL_TextureID textureID,
+                                                    int scaleMode);
+
+/**
+ * \fn int SDL_GetTextureScaleMode(SDL_TextureID textureID, int *scaleMode)
+ *
+ * \brief Get the scale mode used for texture copy operations
+ *
+ * \param texture The texture to query
+ * \param scaleMode A pointer filled in with the current scale mode
+ *
+ * \return 0 on success, or -1 if the texture is not valid
+ *
+ * \sa SDL_SetTextureScaleMode()
+ */
+extern DECLSPEC int SDLCALL SDL_GetTextureScaleMode(SDL_TextureID textureID,
+                                                    int *scaleMode);
+
+/**
  * \fn int SDL_UpdateTexture(SDL_TextureID textureID, const SDL_Rect *rect, const void *pixels, int pitch)
  *
  * \brief Update the given texture rectangle with new pixel data.
@@ -1061,37 +1207,35 @@
                                               const SDL_Rect * rects);
 
 /**
- * \fn void SDL_RenderFill(const SDL_Rect *rect, Uint32 color)
+ * \fn void SDL_RenderFill(Uint8 r, Uint8 g, Uint8 b, Uint8 a, const SDL_Rect *rect)
  *
  * \brief Fill the current rendering target with the specified color.
  *
+ * \param r The red value used to fill the rendering target
+ * \param g The green value used to fill the rendering target
+ * \param b The blue value used to fill the rendering target
+ * \param a The alpha value used to fill the rendering target, usually SDL_ALPHA_OPAQUE (255)
  * \param rect A pointer to the destination rectangle, or NULL for the entire rendering target.
- * \param color An ARGB color value.
  *
  * \return 0 on success, or -1 if there is no rendering context current
  */
-extern DECLSPEC int SDLCALL SDL_RenderFill(const SDL_Rect * rect,
-                                           Uint32 color);
+extern DECLSPEC int SDLCALL SDL_RenderFill(Uint8 r, Uint8 g, Uint8 b, Uint8 a,
+                                           const SDL_Rect * rect);
 
 /**
- * \fn int SDL_RenderCopy(SDL_TextureID textureID, const SDL_Rect *srcrect, const SDL_Rect *dstrect, Uint32 blendMode, Uint32 scaleMode)
+ * \fn int SDL_RenderCopy(SDL_TextureID textureID, const SDL_Rect *srcrect, const SDL_Rect *dstrect)
  *
  * \brief Copy a portion of the texture to the current rendering target.
  *
  * \param texture The source texture.
  * \param srcrect A pointer to the source rectangle, or NULL for the entire texture.
  * \param dstrect A pointer to the destination rectangle, or NULL for the entire rendering target.
- * \param blendMode SDL_TextureBlendMode to be used if the source texture has an alpha channel.
- * \param scaleMode SDL_TextureScaleMode to be used if the source and destination rectangles don't have the same width and height.
  *
  * \return 0 on success, or -1 if there is no rendering context current, or the driver doesn't support the requested operation.
- *
- * \note You can check the video driver info to see what operations are supported.
  */
 extern DECLSPEC int SDLCALL SDL_RenderCopy(SDL_TextureID textureID,
                                            const SDL_Rect * srcrect,
-                                           const SDL_Rect * dstrect,
-                                           int blendMode, int scaleMode);
+                                           const SDL_Rect * dstrect);
 
 /**
  * \fn int SDL_RenderReadPixels(const SDL_Rect *rect, void *pixels, int pitch)
--- a/src/SDL_compat.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/SDL_compat.c	Mon Aug 28 03:17:39 2006 +0000
@@ -673,9 +673,7 @@
     if (screen == SDL_VideoSurface) {
         if (SDL_VideoRendererInfo.flags & SDL_RENDERER_PRESENTCOPY) {
             for (i = 0; i < numrects; ++i) {
-                SDL_RenderCopy(SDL_VideoTexture, &rects[i], &rects[i],
-                               SDL_TEXTUREBLENDMODE_NONE,
-                               SDL_TEXTURESCALEMODE_NONE);
+                SDL_RenderCopy(SDL_VideoTexture, &rects[i], &rects[i]);
             }
         } else {
             SDL_Rect rect;
@@ -683,9 +681,7 @@
             rect.y = 0;
             rect.w = screen->w;
             rect.h = screen->h;
-            SDL_RenderCopy(SDL_VideoTexture, &rect, &rect,
-                           SDL_TEXTUREBLENDMODE_NONE,
-                           SDL_TEXTURESCALEMODE_NONE);
+            SDL_RenderCopy(SDL_VideoTexture, &rect, &rect);
         }
         SDL_RenderPresent();
     }
@@ -1421,9 +1417,7 @@
 int
 SDL_DisplayYUVOverlay(SDL_Overlay * overlay, SDL_Rect * dstrect)
 {
-    if (SDL_RenderCopy(overlay->hwdata->textureID, NULL, dstrect,
-                       SDL_TEXTUREBLENDMODE_NONE,
-                       SDL_TEXTURESCALEMODE_FAST) < 0) {
+    if (SDL_RenderCopy(overlay->hwdata->textureID, NULL, dstrect) < 0) {
         return -1;
     }
     SDL_RenderPresent();
--- a/src/audio/SDL_audiocvt.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/audio/SDL_audiocvt.c	Mon Aug 28 03:17:39 2006 +0000
@@ -36,7 +36,7 @@
 #ifdef DEBUG_CONVERT
     fprintf(stderr, "Converting to mono\n");
 #endif
-    switch (format & (SDL_AUDIO_MASK_SIGNED|SDL_AUDIO_MASK_BITSIZE)) {
+    switch (format & (SDL_AUDIO_MASK_SIGNED | SDL_AUDIO_MASK_BITSIZE)) {
     case AUDIO_U8:
         {
             Uint8 *src, *dst;
@@ -170,15 +170,15 @@
             if (SDL_AUDIO_ISBIGENDIAN(format)) {
                 for (i = cvt->len_cvt / 8; i; --i, src += 2) {
                     const Sint64 added =
-                                    (((Sint64) (Sint32) SDL_SwapBE32(src[0])) +
-                                     ((Sint64) (Sint32) SDL_SwapBE32(src[1])));
+                        (((Sint64) (Sint32) SDL_SwapBE32(src[0])) +
+                         ((Sint64) (Sint32) SDL_SwapBE32(src[1])));
                     *(dst++) = SDL_SwapBE32((Uint32) ((Sint32) (added >> 1)));
                 }
             } else {
                 for (i = cvt->len_cvt / 8; i; --i, src += 2) {
                     const Sint64 added =
-                                    (((Sint64) (Sint32) SDL_SwapLE32(src[0])) +
-                                     ((Sint64) (Sint32) SDL_SwapLE32(src[1])));
+                        (((Sint64) (Sint32) SDL_SwapLE32(src[0])) +
+                         ((Sint64) (Sint32) SDL_SwapLE32(src[1])));
                     *(dst++) = SDL_SwapLE32((Uint32) ((Sint32) (added >> 1)));
                 }
             }
@@ -188,7 +188,11 @@
     case AUDIO_F32:
         {
             /* !!! FIXME: this convert union is nasty. */
-            union { float f; Uint32 ui32; } f2i;
+            union
+            {
+                float f;
+                Uint32 ui32;
+            } f2i;
             const Uint32 *src = (const Uint32 *) cvt->buf;
             Uint32 *dst = (Uint32 *) cvt->buf;
             if (SDL_AUDIO_ISBIGENDIAN(format)) {
@@ -235,7 +239,7 @@
     fprintf(stderr, "Converting down from 6 channels to stereo\n");
 #endif
 
-    #define strip_chans_6_to_2(type) \
+#define strip_chans_6_to_2(type) \
     { \
         const type *src = (const type *) cvt->buf; \
         type *dst = (type *) cvt->buf; \
@@ -249,18 +253,18 @@
 
     /* this function only cares about typesize, and data as a block of bits. */
     switch (SDL_AUDIO_BITSIZE(format)) {
-        case 8:
-            strip_chans_6_to_2(Uint8);
-            break;
-        case 16:
-            strip_chans_6_to_2(Uint16);
-            break;
-        case 32:
-            strip_chans_6_to_2(Uint32);
-            break;
+    case 8:
+        strip_chans_6_to_2(Uint8);
+        break;
+    case 16:
+        strip_chans_6_to_2(Uint16);
+        break;
+    case 32:
+        strip_chans_6_to_2(Uint32);
+        break;
     }
 
-    #undef strip_chans_6_to_2
+#undef strip_chans_6_to_2
 
     cvt->len_cvt /= 3;
     if (cvt->filters[++cvt->filter_index]) {
@@ -279,7 +283,7 @@
     fprintf(stderr, "Converting 6 down to quad\n");
 #endif
 
-    #define strip_chans_6_to_4(type) \
+#define strip_chans_6_to_4(type) \
     { \
         const type *src = (const type *) cvt->buf; \
         type *dst = (type *) cvt->buf; \
@@ -295,18 +299,18 @@
 
     /* this function only cares about typesize, and data as a block of bits. */
     switch (SDL_AUDIO_BITSIZE(format)) {
-        case 8:
-            strip_chans_6_to_4(Uint8);
-            break;
-        case 16:
-            strip_chans_6_to_4(Uint16);
-            break;
-        case 32:
-            strip_chans_6_to_4(Uint32);
-            break;
+    case 8:
+        strip_chans_6_to_4(Uint8);
+        break;
+    case 16:
+        strip_chans_6_to_4(Uint16);
+        break;
+    case 32:
+        strip_chans_6_to_4(Uint32);
+        break;
     }
 
-    #undef strip_chans_6_to_4
+#undef strip_chans_6_to_4
 
     cvt->len_cvt /= 6;
     cvt->len_cvt *= 4;
@@ -325,7 +329,7 @@
     fprintf(stderr, "Converting to stereo\n");
 #endif
 
-    #define dup_chans_1_to_2(type) \
+#define dup_chans_1_to_2(type) \
     { \
         const type *src = (const type *) (cvt->buf + cvt->len_cvt); \
         type *dst = (type *) (cvt->buf + cvt->len_cvt * 2); \
@@ -338,18 +342,18 @@
 
     /* this function only cares about typesize, and data as a block of bits. */
     switch (SDL_AUDIO_BITSIZE(format)) {
-        case 8:
-            dup_chans_1_to_2(Uint8);
-            break;
-        case 16:
-            dup_chans_1_to_2(Uint16);
-            break;
-        case 32:
-            dup_chans_1_to_2(Uint32);
-            break;
+    case 8:
+        dup_chans_1_to_2(Uint8);
+        break;
+    case 16:
+        dup_chans_1_to_2(Uint16);
+        break;
+    case 32:
+        dup_chans_1_to_2(Uint32);
+        break;
     }
 
-    #undef dup_chans_1_to_2
+#undef dup_chans_1_to_2
 
     cvt->len_cvt *= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -368,7 +372,7 @@
     fprintf(stderr, "Converting stereo to surround\n");
 #endif
 
-    switch (format & (SDL_AUDIO_MASK_SIGNED|SDL_AUDIO_MASK_BITSIZE)) {
+    switch (format & (SDL_AUDIO_MASK_SIGNED | SDL_AUDIO_MASK_BITSIZE)) {
     case AUDIO_U8:
         {
             Uint8 *src, *dst, lf, rf, ce;
@@ -573,7 +577,11 @@
 
     case AUDIO_F32:
         {
-            union { float f; Uint32 ui32; } f2i;  /* !!! FIXME: lame. */
+            union
+            {
+                float f;
+                Uint32 ui32;
+            } f2i;              /* !!! FIXME: lame. */
             float lf, rf, ce;
             const Uint32 *src = (const Uint32 *) cvt->buf + cvt->len_cvt;
             Uint32 *dst = (Uint32 *) cvt->buf + cvt->len_cvt * 3;
@@ -640,7 +648,7 @@
     fprintf(stderr, "Converting stereo to quad\n");
 #endif
 
-    switch (format & (SDL_AUDIO_MASK_SIGNED|SDL_AUDIO_MASK_BITSIZE)) {
+    switch (format & (SDL_AUDIO_MASK_SIGNED | SDL_AUDIO_MASK_BITSIZE)) {
     case AUDIO_U8:
         {
             Uint8 *src, *dst, lf, rf, ce;
@@ -831,7 +839,7 @@
     fprintf(stderr, "Converting audio rate * 2 (mono)\n");
 #endif
 
-    #define mul2_mono(type) { \
+#define mul2_mono(type) { \
         const type *src = (const type *) (cvt->buf + cvt->len_cvt); \
         type *dst = (type *) (cvt->buf + (cvt->len_cvt * 2)); \
         for (i = cvt->len_cvt / sizeof (type); i; --i) { \
@@ -853,7 +861,7 @@
         break;
     }
 
-    #undef mul2_mono
+#undef mul2_mono
 
     cvt->len_cvt *= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -872,7 +880,7 @@
     fprintf(stderr, "Converting audio rate * 2 (stereo)\n");
 #endif
 
-    #define mul2_stereo(type) { \
+#define mul2_stereo(type) { \
         const type *src = (const type *) (cvt->buf + cvt->len_cvt); \
         type *dst = (type *) (cvt->buf + (cvt->len_cvt * 2)); \
         for (i = cvt->len_cvt / (sizeof (type) * 2); i; --i) { \
@@ -899,7 +907,7 @@
         break;
     }
 
-    #undef mul2_stereo
+#undef mul2_stereo
 
     cvt->len_cvt *= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -917,7 +925,7 @@
     fprintf(stderr, "Converting audio rate * 2 (quad)\n");
 #endif
 
-    #define mul2_quad(type) { \
+#define mul2_quad(type) { \
         const type *src = (const type *) (cvt->buf + cvt->len_cvt); \
         type *dst = (type *) (cvt->buf + (cvt->len_cvt * 2)); \
         for (i = cvt->len_cvt / (sizeof (type) * 4); i; --i) { \
@@ -950,7 +958,7 @@
         break;
     }
 
-    #undef mul2_quad
+#undef mul2_quad
 
     cvt->len_cvt *= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -969,7 +977,7 @@
     fprintf(stderr, "Converting audio rate * 2 (six channels)\n");
 #endif
 
-    #define mul2_chansix(type) { \
+#define mul2_chansix(type) { \
         const type *src = (const type *) (cvt->buf + cvt->len_cvt); \
         type *dst = (type *) (cvt->buf + (cvt->len_cvt * 2)); \
         for (i = cvt->len_cvt / (sizeof (type) * 6); i; --i) { \
@@ -1008,7 +1016,7 @@
         break;
     }
 
-    #undef mul2_chansix
+#undef mul2_chansix
 
     cvt->len_cvt *= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -1026,7 +1034,7 @@
     fprintf(stderr, "Converting audio rate / 2 (mono)\n");
 #endif
 
-    #define div2_mono(type) { \
+#define div2_mono(type) { \
         const type *src = (const type *) cvt->buf; \
         type *dst = (type *) cvt->buf; \
         for (i = cvt->len_cvt / (sizeof (type) * 2); i; --i) { \
@@ -1048,7 +1056,7 @@
         break;
     }
 
-    #undef div2_mono
+#undef div2_mono
 
     cvt->len_cvt /= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -1067,7 +1075,7 @@
     fprintf(stderr, "Converting audio rate / 2 (stereo)\n");
 #endif
 
-    #define div2_stereo(type) { \
+#define div2_stereo(type) { \
         const type *src = (const type *) cvt->buf; \
         type *dst = (type *) cvt->buf; \
         for (i = cvt->len_cvt / (sizeof (type) * 4); i; --i) { \
@@ -1090,7 +1098,7 @@
         break;
     }
 
-    #undef div2_stereo
+#undef div2_stereo
 
     cvt->len_cvt /= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -1109,7 +1117,7 @@
     fprintf(stderr, "Converting audio rate / 2 (quad)\n");
 #endif
 
-    #define div2_quad(type) { \
+#define div2_quad(type) { \
         const type *src = (const type *) cvt->buf; \
         type *dst = (type *) cvt->buf; \
         for (i = cvt->len_cvt / (sizeof (type) * 8); i; --i) { \
@@ -1134,7 +1142,7 @@
         break;
     }
 
-    #undef div2_quad
+#undef div2_quad
 
     cvt->len_cvt /= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -1152,7 +1160,7 @@
     fprintf(stderr, "Converting audio rate / 2 (six channels)\n");
 #endif
 
-    #define div2_chansix(type) { \
+#define div2_chansix(type) { \
         const type *src = (const type *) cvt->buf; \
         type *dst = (type *) cvt->buf; \
         for (i = cvt->len_cvt / (sizeof (type) * 12); i; --i) { \
@@ -1179,7 +1187,7 @@
         break;
     }
 
-    #undef div_chansix
+#undef div_chansix
 
     cvt->len_cvt /= 2;
     if (cvt->filters[++cvt->filter_index]) {
@@ -1309,7 +1317,7 @@
      *  processor, platform, compiler, or library here.
      */
 
-    return NULL;  /* no specialized converter code available. */
+    return NULL;                /* no specialized converter code available. */
 }
 
 
@@ -1340,7 +1348,7 @@
             }
 
             if (filter == NULL) {
-                return -1;  /* Still no matching converter?! */
+                return -1;      /* Still no matching converter?! */
             }
         }
 
@@ -1354,10 +1362,10 @@
             cvt->len_ratio /= (src_bitsize / dst_bitsize);
         }
 
-        return 1;  /* added a converter. */
+        return 1;               /* added a converter. */
     }
 
-    return 0;  /* no conversion necessary. */
+    return 0;                   /* no conversion necessary. */
 }
 
 
@@ -1379,11 +1387,10 @@
     if ((SDL_AUDIO_BITSIZE(dst_fmt) > 16) && (!SDL_AUDIO_ISSIGNED(dst_fmt))) {
         return -1;
     }
-
-    #ifdef DEBUG_CONVERT
+#ifdef DEBUG_CONVERT
     printf("Build format %04x->%04x, channels %u->%u, rate %d->%d\n",
-		    src_fmt, dst_fmt, src_channels, dst_channels, src_rate, dst_rate);
-    #endif
+           src_fmt, dst_fmt, src_channels, dst_channels, src_rate, dst_rate);
+#endif
 
     /* Start off with no conversion necessary */
 
@@ -1397,7 +1404,7 @@
 
     /* Convert data types, if necessary. Updates (cvt). */
     if (SDL_BuildAudioTypeCVT(cvt, src_fmt, dst_fmt) == -1)
-        return -1;  /* shouldn't happen, but just in case... */
+        return -1;              /* shouldn't happen, but just in case... */
 
     /* Channel conversion */
     if (src_channels != dst_channels) {
--- a/src/audio/SDL_audiotypecvt.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/audio/SDL_audiotypecvt.c	Mon Aug 28 03:17:39 2006 +0000
@@ -1,6 +1,4 @@
-/* DO NOT EDIT THIS FILE! It is generated code. */
-/* Please modify SDL/src/audio/sdlgenaudiocvt.pl instead. */
-
+/* DO NOT EDIT!  This file is generated by sdlgenaudiocvt.pl */
 /*
     SDL - Simple DirectMedia Layer
     Copyright (C) 1997-2006 Sam Lantinga
@@ -27,7 +25,7 @@
 #include "SDL_audio.h"
 #include "SDL_audio_c.h"
 
-/* Now the generated code... */
+/* *INDENT-OFF* */
 
 #define DIVBY127 0.0078740157480315f
 #define DIVBY255 0.00392156862745098f
@@ -2354,3 +2352,6 @@
 };
 
 
+/* *INDENT-ON* */
+
+/* vi: set ts=4 sw=4 expandtab: */
--- a/src/audio/SDL_mixer.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/audio/SDL_mixer.c	Mon Aug 28 03:17:39 2006 +0000
@@ -263,8 +263,8 @@
             Uint32 *dst32 = (Uint32 *) dst;
             Sint32 src1, src2;
             Sint64 dst_sample;
-            const Sint64 max_audioval = ((((Sint64)1) << (32 - 1)) - 1);
-            const Sint64 min_audioval = -(((Sint64)1) << (32 - 1));
+            const Sint64 max_audioval = ((((Sint64) 1) << (32 - 1)) - 1);
+            const Sint64 min_audioval = -(((Sint64) 1) << (32 - 1));
 
             len /= 4;
             while (len--) {
@@ -289,8 +289,8 @@
             Uint32 *dst32 = (Uint32 *) dst;
             Sint32 src1, src2;
             Sint64 dst_sample;
-            const Sint64 max_audioval = ((((Sint64)1) << (32 - 1)) - 1);
-            const Sint64 min_audioval = -(((Sint64)1) << (32 - 1));
+            const Sint64 max_audioval = ((((Sint64) 1) << (32 - 1)) - 1);
+            const Sint64 min_audioval = -(((Sint64) 1) << (32 - 1));
 
             len /= 4;
             while (len--) {
@@ -322,7 +322,11 @@
             const double min_audioval = -3.40282347e+38F;
 
             /* !!! FIXME: this is a little nasty. */
-            union { float f; Uint32 ui32; } cvt;
+            union
+            {
+                float f;
+                Uint32 ui32;
+            } cvt;
 
             len /= 4;
             while (len--) {
@@ -360,7 +364,11 @@
             const double min_audioval = -3.40282347e+38F;
 
             /* !!! FIXME: this is a little nasty. */
-            union { float f; Uint32 ui32; } cvt;
+            union
+            {
+                float f;
+                Uint32 ui32;
+            } cvt;
 
             len /= 4;
             while (len--) {
--- a/src/audio/sdlgenaudiocvt.pl	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/audio/sdlgenaudiocvt.pl	Mon Aug 28 03:17:39 2006 +0000
@@ -23,9 +23,7 @@
 
 sub outputHeader {
     print <<EOF;
-/* DO NOT EDIT THIS FILE! It is generated code. */
-/* Please modify SDL/src/audio/sdlgenaudiocvt.pl instead. */
-
+/* DO NOT EDIT!  This file is generated by sdlgenaudiocvt.pl */
 /*
     SDL - Simple DirectMedia Layer
     Copyright (C) 1997-2006 Sam Lantinga
@@ -52,7 +50,7 @@
 #include "SDL_audio.h"
 #include "SDL_audio_c.h"
 
-/* Now the generated code... */
+/* *INDENT-OFF* */
 
 EOF
 
@@ -66,6 +64,13 @@
     print("\n");
 }
 
+sub outputFooter {
+    print <<EOF;
+/* *INDENT-ON* */
+
+/* vi: set ts=4 sw=4 expandtab: */
+EOF
+}
 
 sub splittype {
     my $t = shift;
@@ -307,7 +312,9 @@
 
 EOF
 
+outputFooter();
+
 exit 0;
 
-# end of sdlaudiocvt.pl ...
+# end of sdlgenaudiocvt.pl ...
 
--- a/src/hermes/HeadX86.h	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/hermes/HeadX86.h	Mon Aug 28 03:17:39 2006 +0000
@@ -186,4 +186,5 @@
 
 
 #endif
+
 /* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_blit_N.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/SDL_blit_N.c	Mon Aug 28 03:17:39 2006 +0000
@@ -2108,7 +2108,7 @@
 		/* *INDENT-OFF* */
 		DUFFS_LOOP(
 		{
-		        Uint32 Pixel;
+            Uint32 Pixel;
 			unsigned sR;
 			unsigned sG;
 			unsigned sB;
@@ -2278,15 +2278,14 @@
 		/* *INDENT-OFF* */
 		DUFFS_LOOP(
 		{
-		        Uint32 Pixel;
+            Uint32 Pixel;
 			unsigned sR;
 			unsigned sG;
 			unsigned sB;
 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
 			if ( (Pixel & rgbmask) != ckey ) {
-			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
-				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
-					      sR, sG, sB, alpha);
+                RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
+				ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
 			}
 			dst += dstbpp;
 			src += srcbpp;
@@ -2327,11 +2326,9 @@
 		/* *INDENT-OFF* */
 		DUFFS_LOOP(
 		{
-			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
-				      sR, sG, sB, sA);
+			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
 			if ( (Pixel & rgbmask) != ckey ) {
-				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
-						sR, sG, sB, sA);
+				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
 			}
 			dst += dstbpp;
 			src += srcbpp;
--- a/src/video/SDL_glfuncs.h	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/SDL_glfuncs.h	Mon Aug 28 03:17:39 2006 +0000
@@ -43,7 +43,7 @@
 SDL_PROC_UNUSED(void, glColor4bv, (const GLbyte *))
 SDL_PROC_UNUSED(void, glColor4d, (GLdouble, GLdouble, GLdouble, GLdouble))
 SDL_PROC_UNUSED(void, glColor4dv, (const GLdouble *))
-SDL_PROC_UNUSED(void, glColor4f, (GLfloat, GLfloat, GLfloat, GLfloat))
+SDL_PROC(void, glColor4f, (GLfloat, GLfloat, GLfloat, GLfloat))
 SDL_PROC_UNUSED(void, glColor4fv, (const GLfloat *))
 SDL_PROC_UNUSED(void, glColor4i, (GLint, GLint, GLint, GLint))
 SDL_PROC_UNUSED(void, glColor4iv, (const GLint *))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_rendercopy.c	Mon Aug 28 03:17:39 2006 +0000
@@ -0,0 +1,5215 @@
+/* DO NOT EDIT!  This file is generated by sdlgenblit.pl */
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+/* *INDENT-OFF* */
+
+#include "SDL_video.h"
+#include "SDL_rendercopy.h"
+
+static struct {
+    Uint32 src_format;
+    Uint32 dst_format;
+    int modMode;
+    int blendMode;
+    int scaleMode;
+    SDL_RenderCopyFunc func;
+} SDL_RenderCopyFuncTable[] = {
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_RGB888_Blend },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGB888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_RGB888_BGR888 },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_BGR888_Blend },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGB888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_BGR888_RGB888 },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_RGB888_Blend },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGR888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_BGR888_Blend },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGR888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_ARGB8888_RGB888 },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_RGB888_Blend },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ARGB8888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_ARGB8888_BGR888 },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_BGR888_Blend },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ARGB8888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_RGBA8888_RGB888 },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_RGB888_Blend },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGBA8888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_RGBA8888_BGR888 },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_BGR888_Blend },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGBA8888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_ABGR8888_RGB888 },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_RGB888_Blend },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ABGR8888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_ABGR8888_BGR888 },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_BGR888_Blend },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ABGR8888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_BGRA8888_RGB888 },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_RGB888_Blend },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGRA8888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_BGRA8888_BGR888 },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_BGR888_Blend },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGRA8888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend_Scale },
+};
+
+SDL_RenderCopyFunc SDL_GetRenderCopyFunc(Uint32 src_format, Uint32 dst_format, int modMode, int blendMode, int scaleMode)
+{
+    int i;
+
+    for (i = 0; i < SDL_arraysize(SDL_RenderCopyFuncTable); ++i) {
+        if (src_format != SDL_RenderCopyFuncTable[i].src_format) {
+            continue;
+        }
+        if (dst_format != SDL_RenderCopyFuncTable[i].dst_format) {
+            continue;
+        }
+        if ((modMode & SDL_RenderCopyFuncTable[i].modMode) != modMode) {
+            continue;
+        }
+        if ((blendMode & SDL_RenderCopyFuncTable[i].blendMode) != blendMode) {
+            continue;
+        }
+        if ((scaleMode & SDL_RenderCopyFuncTable[i].scaleMode) != scaleMode) {
+            continue;
+        }
+        return SDL_RenderCopyFuncTable[i].func;
+    }
+    return NULL;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            *dst = *src;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            *dst = *src;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+/* *INDENT-ON* */
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_rendercopy.h	Mon Aug 28 03:17:39 2006 +0000
@@ -0,0 +1,147 @@
+/* DO NOT EDIT!  This file is generated by sdlgenblit.pl */
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+/* *INDENT-OFF* */
+
+#define SDL_RENDERCOPY_MODULATE_COLOR   0x0001
+#define SDL_RENDERCOPY_MODULATE_ALPHA   0x0002
+#define SDL_RENDERCOPY_BLEND            0x0010
+#define SDL_RENDERCOPY_ADD              0x0020
+#define SDL_RENDERCOPY_MOD              0x0040
+#define SDL_RENDERCOPY_NEAREST          0x0100
+
+typedef struct {
+    void *src;
+    int src_w, src_h;
+    int src_pitch;
+    void *dst;
+    int dst_w, dst_h;
+    int dst_pitch;
+    void *aux_data;
+    int flags;
+    Uint8 r, g, b, a;
+} SDL_RenderCopyData;
+
+typedef int (*SDL_RenderCopyFunc)(SDL_RenderCopyData *data);
+
+extern SDL_RenderCopyFunc SDLCALL SDL_GetRenderCopyFunc(Uint32 src_format, Uint32 dst_format, int modMode, int blendMode, int scaleMode);
+
+extern int SDLCALL SDL_RenderCopy_RGB888_RGB888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_RGB888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_RGB888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_RGB888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_RGB888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_RGB888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_BGR888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_BGR888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_BGR888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_BGR888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_BGR888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_BGR888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_BGR888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGB888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_RGB888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_RGB888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_RGB888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_RGB888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_RGB888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_RGB888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_RGB888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_BGR888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_BGR888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_BGR888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_BGR888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_BGR888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_BGR888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGR888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_RGB888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_RGB888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_RGB888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_RGB888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_RGB888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_BGR888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_BGR888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_BGR888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_BGR888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_BGR888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_RGB888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_RGB888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_RGB888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_RGB888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_RGB888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_BGR888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_BGR888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_BGR888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_BGR888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_BGR888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_RGB888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_RGB888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_RGB888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_RGB888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_RGB888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_BGR888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_BGR888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_BGR888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_BGR888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_BGR888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_RGB888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_RGB888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_RGB888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_RGB888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_RGB888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_BGR888(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_BGR888_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_BGR888_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_BGR888_Blend_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_BGR888_Modulate(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data);
+extern int SDLCALL SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data);
+
+/* *INDENT-ON* */
+
+/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_renderer_gl.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/SDL_renderer_gl.c	Mon Aug 28 03:17:39 2006 +0000
@@ -32,6 +32,8 @@
 
 /* OpenGL renderer implementation */
 
+static const float inv255f = 1.0f / 255.0f;
+
 static SDL_Renderer *GL_CreateRenderer(SDL_Window * window, Uint32 flags);
 static int GL_ActivateRenderer(SDL_Renderer * renderer);
 static int GL_DisplayModeChanged(SDL_Renderer * renderer);
@@ -43,20 +45,27 @@
 static int GL_GetTexturePalette(SDL_Renderer * renderer,
                                 SDL_Texture * texture, SDL_Color * colors,
                                 int firstcolor, int ncolors);
+static int GL_SetTextureColorMod(SDL_Renderer * renderer,
+                                 SDL_Texture * texture);
+static int GL_SetTextureAlphaMod(SDL_Renderer * renderer,
+                                 SDL_Texture * texture);
+static int GL_SetTextureBlendMode(SDL_Renderer * renderer,
+                                  SDL_Texture * texture);
+static int GL_SetTextureScaleMode(SDL_Renderer * renderer,
+                                  SDL_Texture * texture);
 static int GL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                             const SDL_Rect * rect, const void *pixels,
                             int pitch);
 static int GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                          const SDL_Rect * rect, int markDirty,
-                          void **pixels, int *pitch);
+                          const SDL_Rect * rect, int markDirty, void **pixels,
+                          int *pitch);
 static void GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
 static void GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                             int numrects, const SDL_Rect * rects);
-static int GL_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect,
-                         Uint32 color);
+static int GL_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b,
+                         Uint8 a, const SDL_Rect * rect);
 static int GL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-                         const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-                         int blendMode, int scaleMode);
+                         const SDL_Rect * srcrect, const SDL_Rect * dstrect);
 static void GL_RenderPresent(SDL_Renderer * renderer);
 static void GL_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture);
 static void GL_DestroyRenderer(SDL_Renderer * renderer);
@@ -68,6 +77,8 @@
      "opengl",
      (SDL_RENDERER_SINGLEBUFFER | SDL_RENDERER_PRESENTDISCARD |
       SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
+     (SDL_TEXTUREMODULATE_NONE | SDL_TEXTUREMODULATE_COLOR |
+      SDL_TEXTUREMODULATE_ALPHA),
      (SDL_TEXTUREBLENDMODE_NONE | SDL_TEXTUREBLENDMODE_MASK |
       SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD |
       SDL_TEXTUREBLENDMODE_MOD),
@@ -236,6 +247,10 @@
     renderer->CreateTexture = GL_CreateTexture;
     renderer->SetTexturePalette = GL_SetTexturePalette;
     renderer->GetTexturePalette = GL_GetTexturePalette;
+    renderer->SetTextureColorMod = GL_SetTextureColorMod;
+    renderer->SetTextureAlphaMod = GL_SetTextureAlphaMod;
+    renderer->SetTextureBlendMode = GL_SetTextureBlendMode;
+    renderer->SetTextureScaleMode = GL_SetTextureScaleMode;
     renderer->UpdateTexture = GL_UpdateTexture;
     renderer->LockTexture = GL_LockTexture;
     renderer->UnlockTexture = GL_UnlockTexture;
@@ -570,6 +585,54 @@
 }
 
 static int
+GL_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    return -1;
+}
+
+static int
+GL_SetTextureAlphaMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    return -1;
+}
+
+static int
+GL_SetTextureBlendMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->blendMode) {
+    case SDL_TEXTUREBLENDMODE_NONE:
+    case SDL_TEXTUREBLENDMODE_MASK:
+    case SDL_TEXTUREBLENDMODE_BLEND:
+    case SDL_TEXTUREBLENDMODE_ADD:
+    case SDL_TEXTUREBLENDMODE_MOD:
+        return 0;
+    default:
+        SDL_Unsupported();
+        texture->blendMode = SDL_TEXTUREBLENDMODE_NONE;
+        return -1;
+    }
+}
+
+static int
+GL_SetTextureScaleMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->scaleMode) {
+    case SDL_TEXTURESCALEMODE_NONE:
+    case SDL_TEXTURESCALEMODE_FAST:
+    case SDL_TEXTURESCALEMODE_SLOW:
+        return 0;
+    case SDL_TEXTURESCALEMODE_BEST:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_SLOW;
+        return -1;
+    default:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_NONE;
+        return -1;
+    }
+}
+
+static int
 GL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                  const SDL_Rect * rect, const void *pixels, int pitch)
 {
@@ -636,18 +699,14 @@
 }
 
 static int
-GL_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect, Uint32 color)
+GL_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b, Uint8 a,
+              const SDL_Rect * rect)
 {
     GL_RenderData *data = (GL_RenderData *) renderer->driverdata;
     SDL_Window *window = SDL_GetWindowFromID(renderer->window);
-    GLclampf r, g, b, a;
 
-    a = ((GLclampf) ((color >> 24) & 0xFF)) / 255.0f;
-    r = ((GLclampf) ((color >> 16) & 0xFF)) / 255.0f;
-    g = ((GLclampf) ((color >> 8) & 0xFF)) / 255.0f;
-    b = ((GLclampf) (color & 0xFF)) / 255.0f;
-
-    data->glClearColor(r, g, b, a);
+    data->glClearColor((GLclampf) r * inv255f, (GLclampf) g * inv255f,
+                       (GLclampf) b * inv255f, (GLclampf) a * inv255f);
     data->glViewport(rect->x, window->h - rect->y, rect->w, rect->h);
     data->glClear(GL_COLOR_BUFFER_BIT);
     data->glViewport(0, 0, window->w, window->h);
@@ -656,8 +715,7 @@
 
 static int
 GL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-              const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-              int blendMode, int scaleMode)
+              const SDL_Rect * srcrect, const SDL_Rect * dstrect)
 {
     GL_RenderData *data = (GL_RenderData *) renderer->driverdata;
     GL_TextureData *texturedata = (GL_TextureData *) texture->driverdata;
@@ -700,8 +758,17 @@
 
     data->glBindTexture(texturedata->type, texturedata->texture);
 
-    if (blendMode != data->blendMode) {
-        switch (blendMode) {
+    if (texture->modMode) {
+        data->glColor4f((GLfloat) texture->r * inv255f,
+                        (GLfloat) texture->g * inv255f,
+                        (GLfloat) texture->b * inv255f,
+                        (GLfloat) texture->a * inv255f);
+    } else {
+        data->glColor4f(1.0f, 1.0f, 1.0f, 1.0f);
+    }
+
+    if (texture->blendMode != data->blendMode) {
+        switch (texture->blendMode) {
         case SDL_TEXTUREBLENDMODE_NONE:
             data->glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_DECAL);
             data->glDisable(GL_BLEND);
@@ -723,11 +790,11 @@
             data->glBlendFunc(GL_ZERO, GL_SRC_COLOR);
             break;
         }
-        data->blendMode = blendMode;
+        data->blendMode = texture->blendMode;
     }
 
-    if (scaleMode != data->scaleMode) {
-        switch (scaleMode) {
+    if (texture->scaleMode != data->scaleMode) {
+        switch (texture->scaleMode) {
         case SDL_TEXTURESCALEMODE_NONE:
         case SDL_TEXTURESCALEMODE_FAST:
             data->glTexParameteri(texturedata->type, GL_TEXTURE_MIN_FILTER,
@@ -743,7 +810,7 @@
                                   GL_LINEAR);
             break;
         }
-        data->scaleMode = scaleMode;
+        data->scaleMode = texture->scaleMode;
     }
 
     data->glBegin(GL_TRIANGLE_STRIP);
--- a/src/video/SDL_renderer_sw.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/SDL_renderer_sw.c	Mon Aug 28 03:17:39 2006 +0000
@@ -26,6 +26,7 @@
 #include "SDL_pixels_c.h"
 #include "SDL_rect_c.h"
 #include "SDL_yuv_sw_c.h"
+#include "SDL_rendercopy.h"
 
 
 /* SDL surface based renderer implementation */
@@ -44,22 +45,28 @@
 static int SW_GetTexturePalette(SDL_Renderer * renderer,
                                 SDL_Texture * texture, SDL_Color * colors,
                                 int firstcolor, int ncolors);
-static int SW_UpdateTexture(SDL_Renderer * renderer,
-                            SDL_Texture * texture, const SDL_Rect * rect,
-                            const void *pixels, int pitch);
+static int SW_SetTextureColorMod(SDL_Renderer * renderer,
+                                 SDL_Texture * texture);
+static int SW_SetTextureAlphaMod(SDL_Renderer * renderer,
+                                 SDL_Texture * texture);
+static int SW_SetTextureBlendMode(SDL_Renderer * renderer,
+                                  SDL_Texture * texture);
+static int SW_SetTextureScaleMode(SDL_Renderer * renderer,
+                                  SDL_Texture * texture);
+static int SW_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
+                            const SDL_Rect * rect, const void *pixels,
+                            int pitch);
 static int SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                          const SDL_Rect * rect, int markDirty,
-                          void **pixels, int *pitch);
+                          const SDL_Rect * rect, int markDirty, void **pixels,
+                          int *pitch);
 static void SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
 static void SW_DirtyTexture(SDL_Renderer * renderer,
                             SDL_Texture * texture, int numrects,
                             const SDL_Rect * rects);
-static int SW_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect,
-                         Uint32 color);
+static int SW_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b,
+                         Uint8 a, const SDL_Rect * rect);
 static int SW_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-                         const SDL_Rect * srcrect,
-                         const SDL_Rect * dstrect, int blendMode,
-                         int scaleMode);
+                         const SDL_Rect * srcrect, const SDL_Rect * dstrect);
 static void SW_RenderPresent(SDL_Renderer * renderer);
 static void SW_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture);
 static void SW_DestroyRenderer(SDL_Renderer * renderer);
@@ -72,8 +79,11 @@
      (SDL_RENDERER_SINGLEBUFFER | SDL_RENDERER_PRESENTCOPY |
       SDL_RENDERER_PRESENTFLIP2 | SDL_RENDERER_PRESENTFLIP3 |
       SDL_RENDERER_PRESENTDISCARD | SDL_RENDERER_PRESENTVSYNC),
+     (SDL_TEXTUREMODULATE_NONE | SDL_TEXTUREMODULATE_COLOR |
+      SDL_TEXTUREMODULATE_ALPHA),
      (SDL_TEXTUREBLENDMODE_NONE | SDL_TEXTUREBLENDMODE_MASK |
-      SDL_TEXTUREBLENDMODE_BLEND),
+      SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD |
+      SDL_TEXTUREBLENDMODE_MOD),
      (SDL_TEXTURESCALEMODE_NONE | SDL_TEXTURESCALEMODE_FAST),
      11,
      {
@@ -188,6 +198,10 @@
     renderer->QueryTexturePixels = SW_QueryTexturePixels;
     renderer->SetTexturePalette = SW_SetTexturePalette;
     renderer->GetTexturePalette = SW_GetTexturePalette;
+    renderer->SetTextureColorMod = SW_SetTextureColorMod;
+    renderer->SetTextureAlphaMod = SW_SetTextureAlphaMod;
+    renderer->SetTextureBlendMode = SW_SetTextureBlendMode;
+    renderer->SetTextureScaleMode = SW_SetTextureScaleMode;
     renderer->UpdateTexture = SW_UpdateTexture;
     renderer->LockTexture = SW_LockTexture;
     renderer->UnlockTexture = SW_UnlockTexture;
@@ -333,6 +347,7 @@
     if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
         texture->driverdata = SDL_SW_CreateYUVTexture(texture);
     } else {
+        SDL_Surface *surface;
         int bpp;
         Uint32 Rmask, Gmask, Bmask, Amask;
 
@@ -400,6 +415,72 @@
     }
 }
 
+static void
+SW_UpdateRenderCopyFunc(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    SW_RenderData *data = (SW_RenderData *) renderer->driverdata;
+    SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
+
+    surface->userdata =
+        SDL_GetRenderCopyFunc(texture->format, data->format, texture->modMode,
+                              texture->blendMode, texture->scaleMode);
+}
+
+static int
+SW_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    SW_UpdateRenderCopyFunc(renderer, texture);
+    return 0;
+}
+
+static int
+SW_SetTextureAlphaMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    SW_UpdateRenderCopyFunc(renderer, texture);
+    return 0;
+}
+
+static int
+SW_SetTextureBlendMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->blendMode) {
+    case SDL_TEXTUREBLENDMODE_NONE:
+    case SDL_TEXTUREBLENDMODE_MASK:
+    case SDL_TEXTUREBLENDMODE_BLEND:
+    case SDL_TEXTUREBLENDMODE_ADD:
+    case SDL_TEXTUREBLENDMODE_MOD:
+        SW_UpdateRenderCopyFunc(renderer, texture);
+        return 0;
+    default:
+        SDL_Unsupported();
+        texture->blendMode = SDL_TEXTUREBLENDMODE_NONE;
+        SW_UpdateRenderCopyFunc(renderer, texture);
+        return -1;
+    }
+}
+
+static int
+SW_SetTextureScaleMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->scaleMode) {
+    case SDL_TEXTURESCALEMODE_NONE:
+    case SDL_TEXTURESCALEMODE_FAST:
+        SW_UpdateRenderCopyFunc(renderer, texture);
+        return 0;
+    case SDL_TEXTURESCALEMODE_SLOW:
+    case SDL_TEXTURESCALEMODE_BEST:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_FAST;
+        SW_UpdateRenderCopyFunc(renderer, texture);
+        return -1;
+    default:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_NONE;
+        SW_UpdateRenderCopyFunc(renderer, texture);
+        return -1;
+    }
+}
+
 static int
 SW_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                  const SDL_Rect * rect, const void *pixels, int pitch)
@@ -462,10 +543,11 @@
 }
 
 static int
-SW_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect, Uint32 color)
+SW_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b, Uint8 a,
+              const SDL_Rect * rect)
 {
     SW_RenderData *data = (SW_RenderData *) renderer->driverdata;
-    Uint8 r, g, b, a;
+    Uint32 color;
     SDL_Rect real_rect;
     int status;
 
@@ -473,10 +555,6 @@
         SDL_AddDirtyRect(&data->dirty, rect);
     }
 
-    a = (Uint8) ((color >> 24) & 0xFF);
-    r = (Uint8) ((color >> 16) & 0xFF);
-    g = (Uint8) ((color >> 8) & 0xFF);
-    b = (Uint8) (color & 0xFF);
     color = SDL_MapRGBA(data->surface.format, r, g, b, a);
 
     if (data->renderer->
@@ -500,8 +578,7 @@
 
 static int
 SW_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-              const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-              int blendMode, int scaleMode)
+              const SDL_Rect * srcrect, const SDL_Rect * dstrect)
 {
     SW_RenderData *data = (SW_RenderData *) renderer->driverdata;
     SDL_Window *window = SDL_GetWindowFromID(renderer->window);
@@ -525,27 +602,55 @@
                                 data->surface.pixels, data->surface.pitch);
     } else {
         SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
-        SDL_Rect real_srcrect = *srcrect;
-        SDL_Rect real_dstrect;
+        SDL_RenderCopyFunc copyfunc = (SDL_RenderCopyFunc) surface->userdata;
 
-        data->surface.w = dstrect->w;
-        data->surface.h = dstrect->h;
-        data->surface.clip_rect.w = dstrect->w;
-        data->surface.clip_rect.h = dstrect->h;
-        real_dstrect = data->surface.clip_rect;
+        if (copyfunc) {
+            SDL_RenderCopyData copydata;
 
-        if (blendMode &
-            (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND)) {
-            SDL_SetAlpha(surface, SDL_SRCALPHA, 0);
+            copydata.src =
+                (Uint8 *) surface->pixels + srcrect->y * surface->pitch +
+                srcrect->x * surface->format->BytesPerPixel;
+            copydata.src_w = srcrect->w;
+            copydata.src_h = srcrect->h;
+            copydata.src_pitch = surface->pitch;
+            copydata.dst = (Uint8 *) data->surface.pixels;
+            copydata.dst_w = dstrect->w;
+            copydata.dst_h = dstrect->h;
+            copydata.dst_pitch = data->surface.pitch;
+            copydata.flags = 0;
+            if (texture->modMode & SDL_TEXTUREMODULATE_COLOR) {
+                copydata.flags |= SDL_RENDERCOPY_MODULATE_COLOR;
+                copydata.r = texture->r;
+                copydata.g = texture->g;
+                copydata.b = texture->b;
+            }
+            if (texture->modMode & SDL_TEXTUREMODULATE_ALPHA) {
+                copydata.flags |= SDL_RENDERCOPY_MODULATE_ALPHA;
+                copydata.a = texture->a;
+            }
+            if (texture->
+                blendMode & (SDL_TEXTUREBLENDMODE_MASK |
+                             SDL_TEXTUREBLENDMODE_BLEND)) {
+                copydata.flags |= SDL_RENDERCOPY_BLEND;
+            } else if (texture->blendMode & SDL_TEXTUREBLENDMODE_ADD) {
+                copydata.flags |= SDL_RENDERCOPY_ADD;
+            } else if (texture->blendMode & SDL_TEXTUREBLENDMODE_MOD) {
+                copydata.flags |= SDL_RENDERCOPY_MOD;
+            }
+            if (texture->scaleMode) {
+                copydata.flags |= SDL_RENDERCOPY_NEAREST;
+            }
+            status = copyfunc(&copydata);
         } else {
-            SDL_SetAlpha(surface, 0, 0);
-        }
-        if (scaleMode != SDL_TEXTURESCALEMODE_NONE &&
-            (srcrect->w != dstrect->w || srcrect->h != dstrect->h)) {
-            status =
-                SDL_SoftStretch(surface, &real_srcrect, &data->surface,
-                                &real_dstrect);
-        } else {
+            SDL_Rect real_srcrect = *srcrect;
+            SDL_Rect real_dstrect;
+
+            data->surface.w = dstrect->w;
+            data->surface.h = dstrect->h;
+            data->surface.clip_rect.w = dstrect->w;
+            data->surface.clip_rect.h = dstrect->h;
+            real_dstrect = data->surface.clip_rect;
+
             status =
                 SDL_LowerBlit(surface, &real_srcrect, &data->surface,
                               &real_dstrect);
@@ -567,9 +672,7 @@
         SDL_DirtyRect *dirty;
         for (dirty = data->dirty.list; dirty; dirty = dirty->next) {
             data->renderer->RenderCopy(data->renderer, texture, &dirty->rect,
-                                       &dirty->rect,
-                                       SDL_TEXTUREBLENDMODE_NONE,
-                                       SDL_TEXTURESCALEMODE_NONE);
+                                       &dirty->rect);
         }
         SDL_ClearDirtyRects(&data->dirty);
     } else {
@@ -578,9 +681,7 @@
         rect.y = 0;
         rect.w = texture->w;
         rect.h = texture->h;
-        data->renderer->RenderCopy(data->renderer, texture, &rect, &rect,
-                                   SDL_TEXTUREBLENDMODE_NONE,
-                                   SDL_TEXTURESCALEMODE_NONE);
+        data->renderer->RenderCopy(data->renderer, texture, &rect, &rect);
     }
     data->renderer->RenderPresent(data->renderer);
 
--- a/src/video/SDL_stretch.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/SDL_stretch.c	Mon Aug 28 03:17:39 2006 +0000
@@ -121,30 +121,33 @@
 
 #else
 
-#define DEFINE_COPY_ROW(name, type)			\
-void name(type *src, int src_w, type *dst, int dst_w)	\
-{							\
-	int i;						\
-	int pos, inc;					\
-	type pixel = 0;					\
-							\
-	pos = 0x10000;					\
-	inc = (src_w << 16) / dst_w;			\
-	for ( i=dst_w; i>0; --i ) {			\
-		while ( pos >= 0x10000L ) {		\
-			pixel = *src++;			\
-			pos -= 0x10000L;		\
-		}					\
-		*dst++ = pixel;				\
-		pos += inc;				\
-	}						\
+#define DEFINE_COPY_ROW(name, type)                     \
+void name(type *src, int src_w, type *dst, int dst_w)   \
+{                                                       \
+    int i;                                              \
+    int pos, inc;                                       \
+    type pixel = 0;                                     \
+                                                        \
+    pos = 0x10000;                                      \
+    inc = (src_w << 16) / dst_w;                        \
+    for ( i=dst_w; i>0; --i ) {                         \
+        while ( pos >= 0x10000L ) {                     \
+            pixel = *src++;                             \
+            pos -= 0x10000L;                            \
+        }                                               \
+        *dst++ = pixel;                                 \
+        pos += inc;                                     \
+    }                                                   \
 }
+/* *INDENT-OFF* */
 DEFINE_COPY_ROW(copy_row1, Uint8)
-    DEFINE_COPY_ROW(copy_row2, Uint16) DEFINE_COPY_ROW(copy_row4, Uint32)
+DEFINE_COPY_ROW(copy_row2, Uint16)
+DEFINE_COPY_ROW(copy_row4, Uint32)
+/* *INDENT-ON* */
 #endif /* USE_ASM_STRETCH */
 /* The ASM code doesn't handle 24-bpp stretch blits */
-     void
-     copy_row3(Uint8 * src, int src_w, Uint8 * dst, int dst_w)
+void
+copy_row3(Uint8 * src, int src_w, Uint8 * dst, int dst_w)
 {
     int i;
     int pos, inc;
@@ -278,14 +281,20 @@
 #ifdef __GNUC__
           __asm__ __volatile__("call *%4": "=&D"(u1), "=&S"(u2): "0"(dstp), "1"(srcp), "r"(copy_row):"memory");
 #elif defined(_MSC_VER) || defined(__WATCOMC__)
+            /* *INDENT-OFF* */
             {
                 void *code = copy_row;
                 __asm {
-                push edi
-                        push esi
-                        mov edi, dstp
-                        mov esi, srcp call dword ptr code pop esi pop edi}
+                    push edi
+                    push esi
+                    mov edi, dstp
+                    mov esi, srcp
+                    call dword ptr code
+                    pop esi
+                    pop edi
+                }
             }
+            /* *INDENT-ON* */
 #else
 #error Need inline assembly for this compiler
 #endif
--- a/src/video/SDL_sysvideo.h	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/SDL_sysvideo.h	Mon Aug 28 03:17:39 2006 +0000
@@ -44,6 +44,10 @@
     int access;         /**< SDL_TextureAccess */
     int w;              /**< The width of the texture */
     int h;              /**< The height of the texture */
+    int modMode;        /**< The texture modulation mode */
+    int blendMode;      /**< The texture blend mode */
+    int scaleMode;      /**< The texture scale mode */
+    Uint8 r, g, b, a;   /**< Texture modulation values */
 
     SDL_Renderer *renderer;
 
@@ -66,6 +70,14 @@
     int (*GetTexturePalette) (SDL_Renderer * renderer, SDL_Texture * texture,
                               SDL_Color * colors, int firstcolor,
                               int ncolors);
+    int (*SetTextureColorMod) (SDL_Renderer * renderer,
+                               SDL_Texture * texture);
+    int (*SetTextureAlphaMod) (SDL_Renderer * renderer,
+                               SDL_Texture * texture);
+    int (*SetTextureBlendMode) (SDL_Renderer * renderer,
+                                SDL_Texture * texture);
+    int (*SetTextureScaleMode) (SDL_Renderer * renderer,
+                                SDL_Texture * texture);
     int (*UpdateTexture) (SDL_Renderer * renderer, SDL_Texture * texture,
                           const SDL_Rect * rect, const void *pixels,
                           int pitch);
@@ -75,11 +87,10 @@
     void (*UnlockTexture) (SDL_Renderer * renderer, SDL_Texture * texture);
     void (*DirtyTexture) (SDL_Renderer * renderer, SDL_Texture * texture,
                           int numrects, const SDL_Rect * rects);
-    int (*RenderFill) (SDL_Renderer * renderer, const SDL_Rect * rect,
-                       Uint32 color);
+    int (*RenderFill) (SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b,
+                       Uint8 a, const SDL_Rect * rect);
     int (*RenderCopy) (SDL_Renderer * renderer, SDL_Texture * texture,
-                       const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-                       int blendMode, int scaleMode);
+                       const SDL_Rect * srcrect, const SDL_Rect * dstrect);
     void (*RenderPresent) (SDL_Renderer * renderer);
     void (*DestroyTexture) (SDL_Renderer * renderer, SDL_Texture * texture);
 
--- a/src/video/SDL_video.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/SDL_video.c	Mon Aug 28 03:17:39 2006 +0000
@@ -1510,6 +1510,10 @@
     texture->access = access;
     texture->w = w;
     texture->h = h;
+    texture->r = 255;
+    texture->g = 255;
+    texture->b = 255;
+    texture->a = 255;
     texture->renderer = renderer;
 
     if (renderer->CreateTexture(renderer, texture) < 0) {
@@ -1772,6 +1776,162 @@
 }
 
 int
+SDL_SetTextureColorMod(SDL_TextureID textureID, Uint8 r, Uint8 g, Uint8 b)
+{
+    SDL_Texture *texture = SDL_GetTextureFromID(textureID);
+    SDL_Renderer *renderer;
+
+    if (!texture) {
+        return -1;
+    }
+
+    renderer = texture->renderer;
+    if (!renderer->SetTextureColorMod) {
+        return -1;
+    }
+    if (r < 255 | g < 255 | b < 255) {
+        texture->modMode |= SDL_TEXTUREMODULATE_COLOR;
+    } else {
+        texture->modMode &= ~SDL_TEXTUREMODULATE_COLOR;
+    }
+    texture->r = r;
+    texture->g = g;
+    texture->b = b;
+    return renderer->SetTextureColorMod(renderer, texture);
+}
+
+int
+SDL_GetTextureColorMod(SDL_TextureID textureID, Uint8 * r, Uint8 * g,
+                       Uint8 * b)
+{
+    SDL_Texture *texture = SDL_GetTextureFromID(textureID);
+    SDL_Renderer *renderer;
+
+    if (!texture) {
+        return -1;
+    }
+
+    renderer = texture->renderer;
+    if (r) {
+        *r = texture->r;
+    }
+    if (g) {
+        *g = texture->g;
+    }
+    if (b) {
+        *b = texture->b;
+    }
+    return 0;
+}
+
+int
+SDL_SetTextureAlphaMod(SDL_TextureID textureID, Uint8 alpha)
+{
+    SDL_Texture *texture = SDL_GetTextureFromID(textureID);
+    SDL_Renderer *renderer;
+
+    if (!texture) {
+        return -1;
+    }
+
+    renderer = texture->renderer;
+    if (!renderer->SetTextureAlphaMod) {
+        return -1;
+    }
+    if (alpha < 255) {
+        texture->modMode |= SDL_TEXTUREMODULATE_ALPHA;
+    } else {
+        texture->modMode &= ~SDL_TEXTUREMODULATE_ALPHA;
+    }
+    texture->a = alpha;
+    return renderer->SetTextureAlphaMod(renderer, texture);
+}
+
+int
+SDL_GetTextureAlphaMod(SDL_TextureID textureID, Uint8 * alpha)
+{
+    SDL_Texture *texture = SDL_GetTextureFromID(textureID);
+    SDL_Renderer *renderer;
+
+    if (!texture) {
+        return -1;
+    }
+
+    if (alpha) {
+        *alpha = texture->a;
+    }
+    return 0;
+}
+
+int
+SDL_SetTextureBlendMode(SDL_TextureID textureID, int blendMode)
+{
+    SDL_Texture *texture = SDL_GetTextureFromID(textureID);
+    SDL_Renderer *renderer;
+
+    if (!texture) {
+        return -1;
+    }
+
+    renderer = texture->renderer;
+    if (!renderer->SetTextureBlendMode) {
+        return -1;
+    }
+    texture->blendMode = blendMode;
+    return renderer->SetTextureBlendMode(renderer, texture);
+}
+
+int
+SDL_GetTextureBlendMode(SDL_TextureID textureID, int *blendMode)
+{
+    SDL_Texture *texture = SDL_GetTextureFromID(textureID);
+    SDL_Renderer *renderer;
+
+    if (!texture) {
+        return -1;
+    }
+
+    if (blendMode) {
+        *blendMode = texture->blendMode;
+    }
+    return 0;
+}
+
+int
+SDL_SetTextureScaleMode(SDL_TextureID textureID, int scaleMode)
+{
+    SDL_Texture *texture = SDL_GetTextureFromID(textureID);
+    SDL_Renderer *renderer;
+
+    if (!texture) {
+        return -1;
+    }
+
+    renderer = texture->renderer;
+    if (!renderer->SetTextureScaleMode) {
+        return -1;
+    }
+    texture->scaleMode = scaleMode;
+    return renderer->SetTextureScaleMode(renderer, texture);
+}
+
+int
+SDL_GetTextureScaleMode(SDL_TextureID textureID, int *scaleMode)
+{
+    SDL_Texture *texture = SDL_GetTextureFromID(textureID);
+    SDL_Renderer *renderer;
+
+    if (!texture) {
+        return -1;
+    }
+
+    if (scaleMode) {
+        *scaleMode = texture->scaleMode;
+    }
+    return 0;
+}
+
+int
 SDL_UpdateTexture(SDL_TextureID textureID, const SDL_Rect * rect,
                   const void *pixels, int pitch)
 {
@@ -1864,7 +2024,7 @@
 }
 
 int
-SDL_RenderFill(const SDL_Rect * rect, Uint32 color)
+SDL_RenderFill(Uint8 r, Uint8 g, Uint8 b, Uint8 a, const SDL_Rect * rect)
 {
     SDL_Renderer *renderer;
     SDL_Window *window;
@@ -1891,12 +2051,12 @@
         }
     }
 
-    return renderer->RenderFill(renderer, &real_rect, color);
+    return renderer->RenderFill(renderer, r, g, b, a, &real_rect);
 }
 
 int
 SDL_RenderCopy(SDL_TextureID textureID, const SDL_Rect * srcrect,
-               const SDL_Rect * dstrect, int blendMode, int scaleMode)
+               const SDL_Rect * dstrect)
 {
     SDL_Texture *texture = SDL_GetTextureFromID(textureID);
     SDL_Renderer *renderer;
@@ -1932,7 +2092,7 @@
     }
 
     return renderer->RenderCopy(renderer, texture, &real_srcrect,
-                                &real_dstrect, blendMode, scaleMode);
+                                &real_dstrect);
 }
 
 void
--- a/src/video/dummy/SDL_nullrender.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/dummy/SDL_nullrender.c	Mon Aug 28 03:17:39 2006 +0000
@@ -24,6 +24,7 @@
 #include "SDL_video.h"
 #include "../SDL_sysvideo.h"
 #include "../SDL_yuv_sw_c.h"
+#include "../SDL_rendercopy.h"
 
 
 /* SDL surface based renderer implementation */
@@ -43,6 +44,14 @@
                                        SDL_Texture * texture,
                                        SDL_Color * colors, int firstcolor,
                                        int ncolors);
+static int SDL_DUMMY_SetTextureColorMod(SDL_Renderer * renderer,
+                                        SDL_Texture * texture);
+static int SDL_DUMMY_SetTextureAlphaMod(SDL_Renderer * renderer,
+                                        SDL_Texture * texture);
+static int SDL_DUMMY_SetTextureBlendMode(SDL_Renderer * renderer,
+                                         SDL_Texture * texture);
+static int SDL_DUMMY_SetTextureScaleMode(SDL_Renderer * renderer,
+                                         SDL_Texture * texture);
 static int SDL_DUMMY_UpdateTexture(SDL_Renderer * renderer,
                                    SDL_Texture * texture,
                                    const SDL_Rect * rect, const void *pixels,
@@ -57,13 +66,12 @@
                                    const SDL_Rect * rects);
 static void SDL_DUMMY_SelectRenderTexture(SDL_Renderer * renderer,
                                           SDL_Texture * texture);
-static int SDL_DUMMY_RenderFill(SDL_Renderer * renderer,
-                                const SDL_Rect * rect, Uint32 color);
+static int SDL_DUMMY_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g,
+                                Uint8 b, Uint8 a, const SDL_Rect * rect);
 static int SDL_DUMMY_RenderCopy(SDL_Renderer * renderer,
                                 SDL_Texture * texture,
                                 const SDL_Rect * srcrect,
-                                const SDL_Rect * dstrect, int blendMode,
-                                int scaleMode);
+                                const SDL_Rect * dstrect);
 static int SDL_DUMMY_RenderReadPixels(SDL_Renderer * renderer,
                                       const SDL_Rect * rect, void *pixels,
                                       int pitch);
@@ -83,8 +91,11 @@
      (SDL_RENDERER_SINGLEBUFFER | SDL_RENDERER_PRESENTCOPY |
       SDL_RENDERER_PRESENTFLIP2 | SDL_RENDERER_PRESENTFLIP3 |
       SDL_RENDERER_PRESENTDISCARD),
+     (SDL_TEXTUREMODULATE_NONE | SDL_TEXTUREMODULATE_COLOR |
+      SDL_TEXTUREMODULATE_ALPHA),
      (SDL_TEXTUREBLENDMODE_NONE | SDL_TEXTUREBLENDMODE_MASK |
-      SDL_TEXTUREBLENDMODE_BLEND),
+      SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD |
+      SDL_TEXTUREBLENDMODE_MOD),
      (SDL_TEXTURESCALEMODE_NONE | SDL_TEXTURESCALEMODE_FAST),
      11,
      {
@@ -144,6 +155,10 @@
     renderer->QueryTexturePixels = SDL_DUMMY_QueryTexturePixels;
     renderer->SetTexturePalette = SDL_DUMMY_SetTexturePalette;
     renderer->GetTexturePalette = SDL_DUMMY_GetTexturePalette;
+    renderer->SetTextureColorMod = SDL_DUMMY_SetTextureColorMod;
+    renderer->SetTextureAlphaMod = SDL_DUMMY_SetTextureAlphaMod;
+    renderer->SetTextureBlendMode = SDL_DUMMY_SetTextureBlendMode;
+    renderer->SetTextureScaleMode = SDL_DUMMY_SetTextureScaleMode;
     renderer->UpdateTexture = SDL_DUMMY_UpdateTexture;
     renderer->LockTexture = SDL_DUMMY_LockTexture;
     renderer->UnlockTexture = SDL_DUMMY_UnlockTexture;
@@ -258,6 +273,74 @@
     }
 }
 
+static void
+SDL_DUMMY_UpdateRenderCopyFunc(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    SDL_Window *window = SDL_GetWindowFromID(renderer->window);
+    SDL_VideoDisplay *display = SDL_GetDisplayFromWindow(window);
+    SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
+
+    surface->userdata =
+        SDL_GetRenderCopyFunc(texture->format, display->current_mode.format,
+                              texture->modMode, texture->blendMode,
+                              texture->scaleMode);
+}
+
+static int
+SDL_DUMMY_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    SDL_DUMMY_UpdateRenderCopyFunc(renderer, texture);
+    return 0;
+}
+
+static int
+SDL_DUMMY_SetTextureAlphaMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    SDL_DUMMY_UpdateRenderCopyFunc(renderer, texture);
+    return 0;
+}
+
+static int
+SDL_DUMMY_SetTextureBlendMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->blendMode) {
+    case SDL_TEXTUREBLENDMODE_NONE:
+    case SDL_TEXTUREBLENDMODE_MASK:
+    case SDL_TEXTUREBLENDMODE_BLEND:
+    case SDL_TEXTUREBLENDMODE_ADD:
+    case SDL_TEXTUREBLENDMODE_MOD:
+        SDL_DUMMY_UpdateRenderCopyFunc(renderer, texture);
+        return 0;
+    default:
+        SDL_Unsupported();
+        texture->blendMode = SDL_TEXTUREBLENDMODE_NONE;
+        SDL_DUMMY_UpdateRenderCopyFunc(renderer, texture);
+        return -1;
+    }
+}
+
+static int
+SDL_DUMMY_SetTextureScaleMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->scaleMode) {
+    case SDL_TEXTURESCALEMODE_NONE:
+    case SDL_TEXTURESCALEMODE_FAST:
+        SDL_DUMMY_UpdateRenderCopyFunc(renderer, texture);
+        return 0;
+    case SDL_TEXTURESCALEMODE_SLOW:
+    case SDL_TEXTURESCALEMODE_BEST:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_FAST;
+        SDL_DUMMY_UpdateRenderCopyFunc(renderer, texture);
+        return -1;
+    default:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_NONE;
+        SDL_DUMMY_UpdateRenderCopyFunc(renderer, texture);
+        return -1;
+    }
+}
+
 static int
 SDL_DUMMY_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                         const SDL_Rect * rect, const void *pixels, int pitch)
@@ -320,19 +403,15 @@
 }
 
 static int
-SDL_DUMMY_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect,
-                     Uint32 color)
+SDL_DUMMY_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b,
+                     Uint8 a, const SDL_Rect * rect)
 {
     SDL_DUMMY_RenderData *data =
         (SDL_DUMMY_RenderData *) renderer->driverdata;
     SDL_Surface *target = data->screens[data->current_screen];
+    Uint32 color;
     SDL_Rect real_rect = *rect;
-    Uint8 r, g, b, a;
 
-    a = (Uint8) ((color >> 24) & 0xFF);
-    r = (Uint8) ((color >> 16) & 0xFF);
-    g = (Uint8) ((color >> 8) & 0xFF);
-    b = (Uint8) (color & 0xFF);
     color = SDL_MapRGBA(target->format, r, g, b, a);
 
     return SDL_FillRect(target, &real_rect, color);
@@ -340,8 +419,7 @@
 
 static int
 SDL_DUMMY_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-                     const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-                     int blendMode, int scaleMode)
+                     const SDL_Rect * srcrect, const SDL_Rect * dstrect)
 {
     SDL_DUMMY_RenderData *data =
         (SDL_DUMMY_RenderData *) renderer->driverdata;
@@ -360,20 +438,51 @@
     } else {
         SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
         SDL_Surface *target = data->screens[data->current_screen];
-        SDL_Rect real_srcrect = *srcrect;
-        SDL_Rect real_dstrect = *dstrect;
+        SDL_RenderCopyFunc copyfunc = (SDL_RenderCopyFunc) surface->userdata;
+
+        if (copyfunc) {
+            SDL_RenderCopyData copydata;
 
-        if (blendMode &
-            (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND)) {
-            SDL_SetAlpha(surface, SDL_SRCALPHA, 0);
+            copydata.src =
+                (Uint8 *) surface->pixels + srcrect->y * surface->pitch +
+                srcrect->x * surface->format->BytesPerPixel;
+            copydata.src_w = srcrect->w;
+            copydata.src_h = srcrect->h;
+            copydata.src_pitch = surface->pitch;
+            copydata.dst =
+                (Uint8 *) target->pixels + dstrect->y * target->pitch +
+                dstrect->x * target->format->BytesPerPixel;
+            copydata.dst_w = dstrect->w;
+            copydata.dst_h = dstrect->h;
+            copydata.dst_pitch = target->pitch;
+            copydata.flags = 0;
+            if (texture->modMode & SDL_TEXTUREMODULATE_COLOR) {
+                copydata.flags |= SDL_RENDERCOPY_MODULATE_COLOR;
+                copydata.r = texture->r;
+                copydata.g = texture->g;
+                copydata.b = texture->b;
+            }
+            if (texture->modMode & SDL_TEXTUREMODULATE_ALPHA) {
+                copydata.flags |= SDL_RENDERCOPY_MODULATE_ALPHA;
+                copydata.a = texture->a;
+            }
+            if (texture->
+                blendMode & (SDL_TEXTUREBLENDMODE_MASK |
+                             SDL_TEXTUREBLENDMODE_BLEND)) {
+                copydata.flags |= SDL_RENDERCOPY_BLEND;
+            } else if (texture->blendMode & SDL_TEXTUREBLENDMODE_ADD) {
+                copydata.flags |= SDL_RENDERCOPY_ADD;
+            } else if (texture->blendMode & SDL_TEXTUREBLENDMODE_MOD) {
+                copydata.flags |= SDL_RENDERCOPY_MOD;
+            }
+            if (texture->scaleMode) {
+                copydata.flags |= SDL_RENDERCOPY_NEAREST;
+            }
+            return copyfunc(&copydata);
         } else {
-            SDL_SetAlpha(surface, 0, 0);
-        }
-        if (scaleMode != SDL_TEXTURESCALEMODE_NONE &&
-            (srcrect->w != dstrect->w || srcrect->h != dstrect->h)) {
-            return SDL_SoftStretch(surface, &real_srcrect, target,
-                                   &real_dstrect);
-        } else {
+            SDL_Rect real_srcrect = *srcrect;
+            SDL_Rect real_dstrect = *dstrect;
+
             return SDL_LowerBlit(surface, &real_srcrect, target,
                                  &real_dstrect);
         }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/sdlgenblit.pl	Mon Aug 28 03:17:39 2006 +0000
@@ -0,0 +1,521 @@
+#!/usr/bin/perl -w
+#
+# A script to generate optimized C blitters for Simple DirectMedia Layer
+# http://www.libsdl.org/
+
+use warnings;
+use strict;
+
+my %file;
+
+# The formats potentially supported by this script:
+# SDL_PIXELFORMAT_INDEX8
+# SDL_PIXELFORMAT_RGB332
+# SDL_PIXELFORMAT_RGB444
+# SDL_PIXELFORMAT_RGB555
+# SDL_PIXELFORMAT_ARGB4444
+# SDL_PIXELFORMAT_ARGB1555
+# SDL_PIXELFORMAT_RGB565
+# SDL_PIXELFORMAT_RGB24
+# SDL_PIXELFORMAT_BGR24
+# SDL_PIXELFORMAT_RGB888
+# SDL_PIXELFORMAT_BGR888
+# SDL_PIXELFORMAT_ARGB8888
+# SDL_PIXELFORMAT_RGBA8888
+# SDL_PIXELFORMAT_ABGR8888
+# SDL_PIXELFORMAT_BGRA8888
+# SDL_PIXELFORMAT_ARGB2101010
+
+# The formats we're actually creating blitters for:
+my @src_formats = (
+    "RGB888",
+    "BGR888",
+    "ARGB8888",
+    "RGBA8888",
+    "ABGR8888",
+    "BGRA8888",
+);
+my @dst_formats = (
+    "RGB888",
+    "BGR888",
+);
+
+my %format_size = (
+    "RGB888" => 4,
+    "BGR888" => 4,
+    "ARGB8888" => 4,
+    "RGBA8888" => 4,
+    "ABGR8888" => 4,
+    "BGRA8888" => 4,
+);
+
+my %format_type = (
+    "RGB888" => "Uint32",
+    "BGR888" => "Uint32",
+    "ARGB8888" => "Uint32",
+    "RGBA8888" => "Uint32",
+    "ABGR8888" => "Uint32",
+    "BGRA8888" => "Uint32",
+);
+
+my %get_rgba_string = (
+    "RGB888" => "_R = (Uint8)(_pixel >> 16); _G = (Uint8)(_pixel >> 8); _B = (Uint8)_pixel; _A = 0xFF;",
+    "BGR888" => "_B = (Uint8)(_pixel >> 16); _G = (Uint8)(_pixel >> 8); _R = (Uint8)_pixel; _A = 0xFF;",
+    "ARGB8888" => "_A = (Uint8)(_pixel >> 24); _R = (Uint8)(_pixel >> 16); _G = (Uint8)(_pixel >> 8); _B = (Uint8)_pixel;",
+    "RGBA8888" => "_R = (Uint8)(_pixel >> 24); _G = (Uint8)(_pixel >> 16); _B = (Uint8)(_pixel >> 8); _A = (Uint8)_pixel;",
+    "ABGR8888" => "_A = (Uint8)(_pixel >> 24); _B = (Uint8)(_pixel >> 16); _G = (Uint8)(_pixel >> 8); _R = (Uint8)_pixel;",
+    "BGRA8888" => "_B = (Uint8)(_pixel >> 24); _G = (Uint8)(_pixel >> 16); _R = (Uint8)(_pixel >> 8); _A = (Uint8)_pixel;",
+);
+
+my %set_rgba_string = (
+    "RGB888" => "_pixel = ((Uint32)_R << 16) | ((Uint32)_G << 8) | _B;",
+    "BGR888" => "_pixel = ((Uint32)_B << 16) | ((Uint32)_G << 8) | _R;",
+);
+
+sub open_file {
+    my $name = shift;
+    open(FILE, ">$name.new") || die "Cant' open $name.new: $!";
+    print FILE <<__EOF__;
+/* DO NOT EDIT!  This file is generated by sdlgenblit.pl */
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken\@libsdl.org
+*/
+#include "SDL_config.h"
+
+/* *INDENT-OFF* */
+
+__EOF__
+}
+
+sub close_file {
+    my $name = shift;
+    print FILE <<__EOF__;
+/* *INDENT-ON* */
+
+/* vi: set ts=4 sw=4 expandtab: */
+__EOF__
+    close FILE;
+    if ( ! -f $name || system("cmp -s $name $name.new") != 0 ) {
+        rename("$name.new", "$name");
+    } else {
+        unlink("$name.new");
+    }
+}
+
+sub output_copydefs
+{
+    print FILE <<__EOF__;
+#define SDL_RENDERCOPY_MODULATE_COLOR   0x0001
+#define SDL_RENDERCOPY_MODULATE_ALPHA   0x0002
+#define SDL_RENDERCOPY_BLEND            0x0010
+#define SDL_RENDERCOPY_ADD              0x0020
+#define SDL_RENDERCOPY_MOD              0x0040
+#define SDL_RENDERCOPY_NEAREST          0x0100
+
+typedef struct {
+    void *src;
+    int src_w, src_h;
+    int src_pitch;
+    void *dst;
+    int dst_w, dst_h;
+    int dst_pitch;
+    void *aux_data;
+    int flags;
+    Uint8 r, g, b, a;
+} SDL_RenderCopyData;
+
+typedef int (*SDL_RenderCopyFunc)(SDL_RenderCopyData *data);
+
+extern SDL_RenderCopyFunc SDLCALL SDL_GetRenderCopyFunc(Uint32 src_format, Uint32 dst_format, int modMode, int blendMode, int scaleMode);
+
+__EOF__
+}
+
+sub output_copyfuncname
+{
+    my $prefix = shift;
+    my $src = shift;
+    my $dst = shift;
+    my $modulate = shift;
+    my $blend = shift;
+    my $scale = shift;
+    my $args = shift;
+    my $suffix = shift;
+
+    print FILE "$prefix SDL_RenderCopy_${src}_${dst}";
+    if ( $modulate ) {
+        print FILE "_Modulate";
+    }
+    if ( $blend ) {
+        print FILE "_Blend";
+    }
+    if ( $scale ) {
+        print FILE "_Scale";
+    }
+    if ( $args ) {
+        print FILE "(SDL_RenderCopyData *data)";
+    }
+    print FILE "$suffix";
+}
+
+sub get_rgba
+{
+    my $prefix = shift;
+    my $format = shift;
+    my $string = $get_rgba_string{$format};
+    $string =~ s/_/$prefix/g;
+    if ( $prefix ne "" ) {
+        print FILE <<__EOF__;
+            ${prefix}pixel = *$prefix;
+__EOF__
+    } else {
+        print FILE <<__EOF__;
+            pixel = *src;
+__EOF__
+    }
+    print FILE <<__EOF__;
+            $string
+__EOF__
+}
+
+sub set_rgba
+{
+    my $prefix = shift;
+    my $format = shift;
+    my $string = $set_rgba_string{$format};
+    $string =~ s/_/$prefix/g;
+    print FILE <<__EOF__;
+            $string
+            *dst = ${prefix}pixel;
+__EOF__
+}
+
+sub output_copycore
+{
+    my $src = shift;
+    my $dst = shift;
+    my $modulate = shift;
+    my $blend = shift;
+    if ( $modulate ) {
+        print FILE <<__EOF__;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                ${src}R = (${src}R * modulateR) / 255;
+                ${src}G = (${src}G * modulateG) / 255;
+                ${src}B = (${src}B * modulateB) / 255;
+            }
+__EOF__
+    }
+    if ( $modulate && $blend ) {
+        print FILE <<__EOF__;
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                ${src}A = (${src}A * modulateA) / 255;
+            }
+__EOF__
+    }
+    if ( $blend ) {
+        print FILE <<__EOF__;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                ${src}R = (${src}R * ${src}A) / 255;
+                ${src}G = (${src}G * ${src}A) / 255;
+                ${src}B = (${src}B * ${src}A) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                ${dst}R = ${src}R + ((255 - ${src}A) * ${dst}R) / 255;
+                ${dst}G = ${src}G + ((255 - ${src}A) * ${dst}G) / 255;
+                ${dst}B = ${src}B + ((255 - ${src}A) * ${dst}B) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                ${dst}R = ${src}R + ${dst}R; if (${dst}R > 255) ${dst}R = 255;
+                ${dst}G = ${src}G + ${dst}G; if (${dst}G > 255) ${dst}G = 255;
+                ${dst}B = ${src}B + ${dst}B; if (${dst}B > 255) ${dst}B = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                ${dst}R = (${src}R * ${dst}R) / 255;
+                ${dst}G = (${src}G * ${dst}G) / 255;
+                ${dst}B = (${src}B * ${dst}B) / 255;
+                break;
+            }
+__EOF__
+    }
+}
+
+sub output_copyfunc
+{
+    my $src = shift;
+    my $dst = shift;
+    my $modulate = shift;
+    my $blend = shift;
+    my $scale = shift;
+
+    output_copyfuncname("int", $src, $dst, $modulate, $blend, $scale, 1, "\n");
+    print FILE <<__EOF__;
+{
+    const int flags = data->flags;
+__EOF__
+    if ( $modulate ) {
+        print FILE <<__EOF__;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+__EOF__
+    }
+    if ( $blend ) {
+        print FILE <<__EOF__;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+__EOF__
+    } elsif ( $modulate || $src ne $dst ) {
+        print FILE <<__EOF__;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+__EOF__
+    }
+    if ( $scale ) {
+        print FILE <<__EOF__;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        $format_type{$src} *src;
+        $format_type{$dst} *dst = ($format_type{$dst} *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = ($format_type{$src} *)(data->src + (srcy * data->src_pitch) + (srcx * $format_size{$src}));
+__EOF__
+        print FILE <<__EOF__;
+            }
+__EOF__
+        if ( $blend ) {
+            get_rgba("src", $src);
+            get_rgba("dst", $dst);
+            output_copycore("src", "dst", $modulate, $blend);
+            set_rgba("dst", $dst);
+        } elsif ( $modulate || $src ne $dst ) {
+            get_rgba("", $src);
+            output_copycore("", "", $modulate, $blend);
+            set_rgba("", $dst);
+        } else {
+            print FILE <<__EOF__;
+            *dst = *src;
+__EOF__
+        }
+        print FILE <<__EOF__;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+__EOF__
+    } else {
+        print FILE <<__EOF__;
+
+    while (data->dst_h--) {
+        $format_type{$src} *src = ($format_type{$src} *)data->src;
+        $format_type{$dst} *dst = ($format_type{$dst} *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+__EOF__
+        if ( $blend ) {
+            get_rgba("src", $src);
+            get_rgba("dst", $dst);
+            output_copycore("src", "dst", $modulate, $blend);
+            set_rgba("dst", $dst);
+        } elsif ( $modulate || $src ne $dst ) {
+            get_rgba("", $src);
+            output_copycore("", "", $modulate, $blend);
+            set_rgba("", $dst);
+        } else {
+            print FILE <<__EOF__;
+            *dst = *src;
+__EOF__
+        }
+        print FILE <<__EOF__;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+__EOF__
+    }
+    print FILE <<__EOF__;
+    return 0;
+}
+
+__EOF__
+}
+
+sub output_copyfunc_h
+{
+    my $src = shift;
+    my $dst = shift;
+    for (my $modulate = 0; $modulate <= 1; ++$modulate) {
+        for (my $blend = 0; $blend <= 1; ++$blend) {
+            for (my $scale = 0; $scale <= 1; ++$scale) {
+                if ( $modulate != 0 || $blend != 0 || $scale != 0 || $src ne $dst ) {
+                    output_copyfuncname("extern int SDLCALL", $src, $dst, $modulate, $blend, $scale, 1, ";\n");
+                }
+            }
+        }
+    }
+}
+
+sub output_copyinc
+{
+    print FILE <<__EOF__;
+#include "SDL_video.h"
+#include "SDL_rendercopy.h"
+
+__EOF__
+}
+
+sub output_copyfunctable
+{
+    print FILE <<__EOF__;
+static struct {
+    Uint32 src_format;
+    Uint32 dst_format;
+    int modMode;
+    int blendMode;
+    int scaleMode;
+    SDL_RenderCopyFunc func;
+} SDL_RenderCopyFuncTable[] = {
+__EOF__
+    for (my $i = 0; $i <= $#src_formats; ++$i) {
+        my $src = $src_formats[$i];
+        for (my $j = 0; $j <= $#dst_formats; ++$j) {
+            my $dst = $dst_formats[$j];
+            for (my $modulate = 0; $modulate <= 1; ++$modulate) {
+                for (my $blend = 0; $blend <= 1; ++$blend) {
+                    for (my $scale = 0; $scale <= 1; ++$scale) {
+                        if ( $modulate != 0 || $blend != 0 || $scale != 0 || $src ne $dst ) {
+                            print FILE "    { SDL_PIXELFORMAT_$src, SDL_PIXELFORMAT_$dst, ";
+                            if ( $modulate ) {
+                                print FILE "(SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), ";
+                            } else {
+                                print FILE "0, ";
+                            }
+                            if ( $blend ) {
+                                print FILE "(SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), ";
+                            } else {
+                                print FILE "0, ";
+                            }
+                            if ( $scale ) {
+                                print FILE "SDL_TEXTURESCALEMODE_FAST, ";
+                            } else {
+                                print FILE "0, ";
+                            }
+                            output_copyfuncname("", $src_formats[$i], $dst_formats[$j], $modulate, $blend, $scale, 0, " },\n");
+                        }
+                    }
+                }
+            }
+        }
+    }
+    print FILE <<__EOF__;
+};
+
+SDL_RenderCopyFunc SDL_GetRenderCopyFunc(Uint32 src_format, Uint32 dst_format, int modMode, int blendMode, int scaleMode)
+{
+    int i;
+
+    for (i = 0; i < SDL_arraysize(SDL_RenderCopyFuncTable); ++i) {
+        if (src_format != SDL_RenderCopyFuncTable[i].src_format) {
+            continue;
+        }
+        if (dst_format != SDL_RenderCopyFuncTable[i].dst_format) {
+            continue;
+        }
+        if ((modMode & SDL_RenderCopyFuncTable[i].modMode) != modMode) {
+            continue;
+        }
+        if ((blendMode & SDL_RenderCopyFuncTable[i].blendMode) != blendMode) {
+            continue;
+        }
+        if ((scaleMode & SDL_RenderCopyFuncTable[i].scaleMode) != scaleMode) {
+            continue;
+        }
+        return SDL_RenderCopyFuncTable[i].func;
+    }
+    return NULL;
+}
+
+__EOF__
+}
+
+sub output_copyfunc_c
+{
+    my $src = shift;
+    my $dst = shift;
+
+    for (my $modulate = 0; $modulate <= 1; ++$modulate) {
+        for (my $blend = 0; $blend <= 1; ++$blend) {
+            for (my $scale = 0; $scale <= 1; ++$scale) {
+                if ( $modulate != 0 || $blend != 0 || $scale != 0 || $src ne $dst ) {
+                    output_copyfunc($src, $dst, $modulate, $blend, $scale);
+                }
+            }
+        }
+    }
+}
+
+open_file("SDL_rendercopy.h");
+output_copydefs();
+for (my $i = 0; $i <= $#src_formats; ++$i) {
+    for (my $j = 0; $j <= $#dst_formats; ++$j) {
+        output_copyfunc_h($src_formats[$i], $dst_formats[$j]);
+    }
+}
+print FILE "\n";
+close_file("SDL_rendercopy.h");
+
+open_file("SDL_rendercopy.c");
+output_copyinc();
+output_copyfunctable();
+for (my $i = 0; $i <= $#src_formats; ++$i) {
+    for (my $j = 0; $j <= $#dst_formats; ++$j) {
+        output_copyfunc_c($src_formats[$i], $dst_formats[$j]);
+    }
+}
+close_file("SDL_rendercopy.c");
--- a/src/video/win32/SDL_d3drender.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/win32/SDL_d3drender.c	Mon Aug 28 03:17:39 2006 +0000
@@ -37,6 +37,14 @@
 static int D3D_GetTexturePalette(SDL_Renderer * renderer,
                                  SDL_Texture * texture, SDL_Color * colors,
                                  int firstcolor, int ncolors);
+static int D3D_SetTextureColorMod(SDL_Renderer * renderer,
+                                  SDL_Texture * texture);
+static int D3D_SetTextureAlphaMod(SDL_Renderer * renderer,
+                                  SDL_Texture * texture);
+static int D3D_SetTextureBlendMode(SDL_Renderer * renderer,
+                                   SDL_Texture * texture);
+static int D3D_SetTextureScaleMode(SDL_Renderer * renderer,
+                                   SDL_Texture * texture);
 static int D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                              const SDL_Rect * rect, const void *pixels,
                              int pitch);
@@ -46,11 +54,10 @@
 static void D3D_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
 static void D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                              int numrects, const SDL_Rect * rects);
-static int D3D_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect,
-                          Uint32 color);
+static int D3D_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b,
+                          Uint8 a, const SDL_Rect * rect);
 static int D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-                          const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-                          int blendMode, int scaleMode);
+                          const SDL_Rect * srcrect, const SDL_Rect * dstrect);
 static void D3D_RenderPresent(SDL_Renderer * renderer);
 static void D3D_DestroyTexture(SDL_Renderer * renderer,
                                SDL_Texture * texture);
@@ -65,6 +72,8 @@
       SDL_RENDERER_PRESENTFLIP2 | SDL_RENDERER_PRESENTFLIP3 |
       SDL_RENDERER_PRESENTDISCARD | SDL_RENDERER_PRESENTVSYNC |
       SDL_RENDERER_ACCELERATED),
+     (SDL_TEXTUREMODULATE_NONE | SDL_TEXTUREMODULATE_COLOR |
+      SDL_TEXTUREMODULATE_ALPHA),
      (SDL_TEXTUREBLENDMODE_NONE | SDL_TEXTUREBLENDMODE_MASK |
       SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD |
       SDL_TEXTUREBLENDMODE_MOD),
@@ -259,6 +268,10 @@
     renderer->CreateTexture = D3D_CreateTexture;
     renderer->SetTexturePalette = D3D_SetTexturePalette;
     renderer->GetTexturePalette = D3D_GetTexturePalette;
+    renderer->SetTextureColorMod = D3D_SetTextureColorMod;
+    renderer->SetTextureAlphaMod = D3D_SetTextureAlphaMod;
+    renderer->SetTextureBlendMode = D3D_SetTextureBlendMode;
+    renderer->SetTextureScaleMode = D3D_SetTextureScaleMode;
     renderer->UpdateTexture = D3D_UpdateTexture;
     renderer->LockTexture = D3D_LockTexture;
     renderer->UnlockTexture = D3D_UnlockTexture;
@@ -478,6 +491,54 @@
 }
 
 static int
+D3D_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    /* FIXME: implement vertex coloring */
+    return -1;
+}
+
+static int
+D3D_SetTextureAlphaMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    /* FIXME: implement vertex coloring */
+    return -1;
+}
+
+static int
+D3D_SetTextureBlendMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->blendMode) {
+    case SDL_TEXTUREBLENDMODE_NONE:
+    case SDL_TEXTUREBLENDMODE_MASK:
+    case SDL_TEXTUREBLENDMODE_BLEND:
+    case SDL_TEXTUREBLENDMODE_ADD:
+    case SDL_TEXTUREBLENDMODE_MOD:
+        return 0;
+    default:
+        SDL_Unsupported();
+        texture->blendMode = SDL_TEXTUREBLENDMODE_NONE;
+        return -1;
+    }
+}
+
+static int
+D3D_SetTextureScaleMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->scaleMode) {
+    case SDL_TEXTURESCALEMODE_NONE:
+    case SDL_TEXTURESCALEMODE_FAST:
+    case SDL_TEXTURESCALEMODE_SLOW:
+    case SDL_TEXTURESCALEMODE_BEST:
+        return 0;
+    default:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_NONE;
+        return -1;
+    }
+    return 0;
+}
+
+static int
 D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                   const SDL_Rect * rect, const void *pixels, int pitch)
 {
@@ -597,7 +658,8 @@
 }
 
 static int
-D3D_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect, Uint32 color)
+D3D_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b, Uint8 a,
+               const SDL_Rect * rect)
 {
     D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
     D3DRECT d3drect;
@@ -615,7 +677,7 @@
 
     result =
         IDirect3DDevice9_Clear(data->device, 1, &d3drect, D3DCLEAR_TARGET,
-                               (D3DCOLOR) color, 1.0f, 0);
+                               D3DCOLOR_ARGB(a, r, g, b), 1.0f, 0);
     if (FAILED(result)) {
         D3D_SetError("Clear()", result);
         return -1;
@@ -625,8 +687,7 @@
 
 static int
 D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-               const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-               int blendMode, int scaleMode)
+               const SDL_Rect * srcrect, const SDL_Rect * dstrect)
 {
     D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
     D3D_TextureData *texturedata = (D3D_TextureData *) texture->driverdata;
@@ -678,7 +739,7 @@
     vertices[3].u = minu;
     vertices[3].v = maxv;
 
-    switch (blendMode) {
+    switch (texture->blendMode) {
     case SDL_TEXTUREBLENDMODE_NONE:
         IDirect3DDevice9_SetRenderState(data->device, D3DRS_ALPHABLENDENABLE,
                                         FALSE);
@@ -710,7 +771,7 @@
         break;
     }
 
-    switch (scaleMode) {
+    switch (texture->scaleMode) {
     case SDL_TEXTURESCALEMODE_NONE:
     case SDL_TEXTURESCALEMODE_FAST:
         IDirect3DDevice9_SetSamplerState(data->device, 0, D3DSAMP_MINFILTER,
--- a/src/video/win32/SDL_gdirender.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/src/video/win32/SDL_gdirender.c	Mon Aug 28 03:17:39 2006 +0000
@@ -42,6 +42,12 @@
 static int GDI_GetTexturePalette(SDL_Renderer * renderer,
                                  SDL_Texture * texture, SDL_Color * colors,
                                  int firstcolor, int ncolors);
+static int GDI_SetTextureAlphaMod(SDL_Renderer * renderer,
+                                  SDL_Texture * texture);
+static int GDI_SetTextureBlendMode(SDL_Renderer * renderer,
+                                   SDL_Texture * texture);
+static int GDI_SetTextureScaleMode(SDL_Renderer * renderer,
+                                   SDL_Texture * texture);
 static int GDI_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                              const SDL_Rect * rect, const void *pixels,
                              int pitch);
@@ -51,11 +57,10 @@
 static void GDI_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
 static void GDI_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                              int numrects, const SDL_Rect * rects);
-static int GDI_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect,
-                          Uint32 color);
+static int GDI_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b,
+                          Uint8 a, const SDL_Rect * rect);
 static int GDI_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-                          const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-                          int blendMode, int scaleMode);
+                          const SDL_Rect * srcrect, const SDL_Rect * dstrect);
 static void GDI_RenderPresent(SDL_Renderer * renderer);
 static void GDI_DestroyTexture(SDL_Renderer * renderer,
                                SDL_Texture * texture);
@@ -69,6 +74,7 @@
      (SDL_RENDERER_SINGLEBUFFER | SDL_RENDERER_PRESENTCOPY |
       SDL_RENDERER_PRESENTFLIP2 | SDL_RENDERER_PRESENTFLIP3 |
       SDL_RENDERER_PRESENTDISCARD | SDL_RENDERER_ACCELERATED),
+     (SDL_TEXTUREMODULATE_NONE | SDL_TEXTUREMODULATE_ALPHA),
      (SDL_TEXTUREBLENDMODE_NONE | SDL_TEXTUREBLENDMODE_MASK |
       SDL_TEXTUREBLENDMODE_BLEND),
      (SDL_TEXTURESCALEMODE_NONE | SDL_TEXTURESCALEMODE_FAST),
@@ -161,6 +167,9 @@
     renderer->QueryTexturePixels = GDI_QueryTexturePixels;
     renderer->SetTexturePalette = GDI_SetTexturePalette;
     renderer->GetTexturePalette = GDI_GetTexturePalette;
+    renderer->SetTextureAlphaMod = GDI_SetTextureAlphaMod;
+    renderer->SetTextureBlendMode = GDI_SetTextureBlendMode;
+    renderer->SetTextureScaleMode = GDI_SetTextureScaleMode;
     renderer->UpdateTexture = GDI_UpdateTexture;
     renderer->LockTexture = GDI_LockTexture;
     renderer->UnlockTexture = GDI_UnlockTexture;
@@ -438,6 +447,47 @@
 }
 
 static int
+GDI_SetTextureAlphaMod(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    return 0;
+}
+
+static int
+GDI_SetTextureBlendMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->blendMode) {
+    case SDL_TEXTUREBLENDMODE_NONE:
+    case SDL_TEXTUREBLENDMODE_MASK:
+    case SDL_TEXTUREBLENDMODE_BLEND:
+        return 0;
+    default:
+        SDL_Unsupported();
+        texture->blendMode = SDL_TEXTUREBLENDMODE_NONE;
+        return -1;
+    }
+}
+
+static int
+GDI_SetTextureScaleMode(SDL_Renderer * renderer, SDL_Texture * texture)
+{
+    switch (texture->scaleMode) {
+    case SDL_TEXTURESCALEMODE_NONE:
+    case SDL_TEXTURESCALEMODE_FAST:
+        return 0;
+    case SDL_TEXTURESCALEMODE_SLOW:
+    case SDL_TEXTURESCALEMODE_BEST:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_FAST;
+        return -1;
+    default:
+        SDL_Unsupported();
+        texture->scaleMode = SDL_TEXTURESCALEMODE_NONE;
+        return -1;
+    }
+    return 0;
+}
+
+static int
 GDI_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                   const SDL_Rect * rect, const void *pixels, int pitch)
 {
@@ -524,10 +574,10 @@
 }
 
 static int
-GDI_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect, Uint32 color)
+GDI_RenderFill(SDL_Renderer * renderer, Uint8 r, Uint8 g, Uint8 b, Uint8 a,
+               const SDL_Rect * rect)
 {
     GDI_RenderData *data = (GDI_RenderData *) renderer->driverdata;
-    Uint8 r, g, b;
     RECT rc;
     HBRUSH brush;
     int status;
@@ -536,10 +586,6 @@
         SDL_AddDirtyRect(&data->dirty, rect);
     }
 
-    r = (Uint8) ((color >> 16) & 0xFF);
-    g = (Uint8) ((color >> 8) & 0xFF);
-    b = (Uint8) (color & 0xFF);
-
     rc.left = rect->x;
     rc.top = rect->y;
     rc.right = rect->x + rect->w + 1;
@@ -560,8 +606,7 @@
 
 static int
 GDI_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-               const SDL_Rect * srcrect, const SDL_Rect * dstrect,
-               int blendMode, int scaleMode)
+               const SDL_Rect * srcrect, const SDL_Rect * dstrect)
 {
     GDI_RenderData *data = (GDI_RenderData *) renderer->driverdata;
     GDI_TextureData *texturedata = (GDI_TextureData *) texture->driverdata;
@@ -575,11 +620,13 @@
         SelectPalette(data->memory_hdc, texturedata->hpal, TRUE);
         RealizePalette(data->memory_hdc);
     }
-    if (blendMode & (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND)) {
-        static BLENDFUNCTION blendFunc = {
+    if (texture->
+        blendMode & (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND))
+    {
+        BLENDFUNCTION blendFunc = {
             AC_SRC_OVER,
             0,
-            255,
+            texture->a,
             AC_SRC_ALPHA
         };
         /* FIXME: GDI uses premultiplied alpha! */
--- a/test/testgl2.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/test/testgl2.c	Mon Aug 28 03:17:39 2006 +0000
@@ -194,7 +194,7 @@
             }
         }
         if (consumed < 0) {
-            fprintf(stderr, "Usage: %s %s [--fsaa] [--accel]", argv[0],
+            fprintf(stderr, "Usage: %s %s [--fsaa] [--accel]\n", argv[0],
                     CommonUsage(state));
             quit(1);
         }
--- a/test/testsprite2.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/test/testsprite2.c	Mon Aug 28 03:17:39 2006 +0000
@@ -8,11 +8,15 @@
 
 #define NUM_SPRITES	100
 #define MAX_SPEED 	1
-#define BACKGROUND  0x00A0A0A0
 
 static CommonState *state;
 static int num_sprites;
 static SDL_TextureID *sprites;
+static SDL_bool cycle_color;
+static SDL_bool cycle_alpha;
+static int cycle_direction = 1;
+static int current_alpha = 0;
+static int current_color = 0;
 static SDL_Rect *positions;
 static SDL_Rect *velocities;
 static int sprite_w, sprite_h;
@@ -66,6 +70,8 @@
             SDL_FreeSurface(temp);
             return (-1);
         }
+        SDL_SetTextureBlendMode(sprites[i], blendMode);
+        SDL_SetTextureScaleMode(sprites[i], scaleMode);
     }
     SDL_FreeSurface(temp);
 
@@ -85,9 +91,36 @@
     /* Query the sizes */
     SDL_GetWindowSize(window, &window_w, &window_h);
 
+    /* Cycle the color and alpha, if desired */
+    if (cycle_color) {
+        current_color += cycle_direction;
+        if (current_color < 0) {
+            current_color = 0;
+            cycle_direction = -cycle_direction;
+        }
+        if (current_color > 255) {
+            current_color = 255;
+            cycle_direction = -cycle_direction;
+        }
+        SDL_SetTextureColorMod(sprite, 255, (Uint8) current_color,
+                               (Uint8) current_color);
+    }
+    if (cycle_alpha) {
+        current_alpha += cycle_direction;
+        if (current_alpha < 0) {
+            current_alpha = 0;
+            cycle_direction = -cycle_direction;
+        }
+        if (current_alpha > 255) {
+            current_alpha = 255;
+            cycle_direction = -cycle_direction;
+        }
+        SDL_SetTextureAlphaMod(sprite, (Uint8) current_alpha);
+    }
+
     /* Move the sprite, bounce at the wall, and draw */
     n = 0;
-    SDL_RenderFill(NULL, BACKGROUND);
+    SDL_RenderFill(0xA0, 0xA0, 0xA0, 0xFF, NULL);
     for (i = 0; i < num_sprites; ++i) {
         position = &positions[i];
         velocity = &velocities[i];
@@ -103,7 +136,7 @@
         }
 
         /* Blit the sprite onto the screen */
-        SDL_RenderCopy(sprite, NULL, position, blendMode, scaleMode);
+        SDL_RenderCopy(sprite, NULL, position);
     }
 
     /* Update the screen! */
@@ -166,6 +199,12 @@
                         consumed = 2;
                     }
                 }
+            } else if (SDL_strcasecmp(argv[i], "--cyclecolor") == 0) {
+                cycle_color = SDL_TRUE;
+                consumed = 1;
+            } else if (SDL_strcasecmp(argv[i], "--cyclealpha") == 0) {
+                cycle_alpha = SDL_TRUE;
+                consumed = 1;
             } else if (SDL_isdigit(*argv[i])) {
                 num_sprites = SDL_atoi(argv[i]);
                 consumed = 1;
@@ -173,7 +212,7 @@
         }
         if (consumed < 0) {
             fprintf(stderr,
-                    "Usage: %s %s [--blend none|mask|blend|add|mod] [--scale none|fast|slow|best]",
+                    "Usage: %s %s [--blend none|mask|blend|add|mod] [--scale none|fast|slow|best] [--cyclecolor] [--cyclealpha]\n",
                     argv[0], CommonUsage(state));
             quit(1);
         }
@@ -192,7 +231,7 @@
     }
     for (i = 0; i < state->num_windows; ++i) {
         SDL_SelectRenderer(state->windows[i]);
-        SDL_RenderFill(NULL, BACKGROUND);
+        SDL_RenderFill(0xA0, 0xA0, 0xA0, 0xFF, NULL);
     }
     if (LoadSprite("icon.bmp") < 0) {
         quit(2);
@@ -237,7 +276,7 @@
                 switch (event.window.event) {
                 case SDL_WINDOWEVENT_EXPOSED:
                     SDL_SelectRenderer(event.window.windowID);
-                    SDL_RenderFill(NULL, BACKGROUND);
+                    SDL_RenderFill(0xA0, 0xA0, 0xA0, 0xFF, NULL);
                     break;
                 }
                 break;
--- a/test/testwm2.c	Thu Aug 24 12:49:59 2006 +0000
+++ b/test/testwm2.c	Mon Aug 28 03:17:39 2006 +0000
@@ -34,7 +34,7 @@
             consumed = -1;
         }
         if (consumed < 0) {
-            fprintf(stderr, "Usage: %s %s", argv[0], CommonUsage(state));
+            fprintf(stderr, "Usage: %s %s\n", argv[0], CommonUsage(state));
             quit(1);
         }
         i += consumed;