Added notes on the next steps for SDL 1.3
authorSam Lantinga <slouken@libsdl.org>
Thu, 16 Aug 2007 21:43:19 +0000
changeset 2253 6d99edd791bf
parent 2252 b80e3d57941f
child 2254 79e00f5561f4
Added notes on the next steps for SDL 1.3 Moved fill and copy routines to their own files.
NOTES
src/video/SDL_blit.c
src/video/SDL_blit_copy.c
src/video/SDL_blit_copy.h
src/video/SDL_copy.c
src/video/SDL_copy.h
src/video/SDL_fill.c
src/video/SDL_surface.c
--- a/NOTES	Thu Aug 16 06:40:34 2007 +0000
+++ b/NOTES	Thu Aug 16 21:43:19 2007 +0000
@@ -157,6 +157,16 @@
 streaming textures are lockable and may have system memory pixels available.
 SDL_compat will use a streaming video texture, and will never be HWSURFACE,
 but may be PREALLOC, if system memory pixels are available.
+*** DONE Thu Aug 16 14:18:42 PDT 2007
 
 The software renderer will be abstracted so the surface management can be
 used by any renderer that provides functions to copy surfaces to the window.
+
+Blitters...
+----
+Copy blit and fill rect are optimized with MMX and SSE now.
+
+Here are the pieces we still need:
+- Merging SDL texture capabilities into the SDL surface system
+- Generic fallback blitter architecture
+- Custom fast path blitters
--- a/src/video/SDL_blit.c	Thu Aug 16 06:40:34 2007 +0000
+++ b/src/video/SDL_blit.c	Thu Aug 16 21:43:19 2007 +0000
@@ -24,7 +24,7 @@
 #include "SDL_video.h"
 #include "SDL_sysvideo.h"
 #include "SDL_blit.h"
-#include "SDL_blit_copy.h"
+#include "SDL_copy.h"
 #include "SDL_RLEaccel_c.h"
 #include "SDL_pixels_c.h"
 
--- a/src/video/SDL_blit_copy.c	Thu Aug 16 06:40:34 2007 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,158 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2006 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "SDL_video.h"
-#include "SDL_blit.h"
-#include "SDL_blit_copy.h"
-
-
-#ifdef __MMX__
-static __inline__ void
-SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
-{
-    int i;
-
-    __m64 values[8];
-    for (i = len / 64; i--;) {
-        _mm_prefetch(src, _MM_HINT_NTA);
-        values[0] = *(__m64 *) (src + 0);
-        values[1] = *(__m64 *) (src + 8);
-        values[2] = *(__m64 *) (src + 16);
-        values[3] = *(__m64 *) (src + 24);
-        values[4] = *(__m64 *) (src + 32);
-        values[5] = *(__m64 *) (src + 40);
-        values[6] = *(__m64 *) (src + 48);
-        values[7] = *(__m64 *) (src + 56);
-        _mm_stream_pi((__m64 *) (dst + 0), values[0]);
-        _mm_stream_pi((__m64 *) (dst + 8), values[1]);
-        _mm_stream_pi((__m64 *) (dst + 16), values[2]);
-        _mm_stream_pi((__m64 *) (dst + 24), values[3]);
-        _mm_stream_pi((__m64 *) (dst + 32), values[4]);
-        _mm_stream_pi((__m64 *) (dst + 40), values[5]);
-        _mm_stream_pi((__m64 *) (dst + 48), values[6]);
-        _mm_stream_pi((__m64 *) (dst + 56), values[7]);
-        src += 64;
-        dst += 64;
-    }
-
-    if (len & 63)
-        SDL_memcpy(dst, src, len & 63);
-}
-#endif /* __MMX__ */
-
-#ifdef __SSE__
-static __inline__ void
-SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
-{
-    int i;
-
-    __m128 values[4];
-    for (i = len / 64; i--;) {
-        _mm_prefetch(src, _MM_HINT_NTA);
-        values[0] = *(__m128 *) (src + 0);
-        values[1] = *(__m128 *) (src + 16);
-        values[2] = *(__m128 *) (src + 32);
-        values[3] = *(__m128 *) (src + 48);
-        _mm_stream_ps((float *) (dst + 0), values[0]);
-        _mm_stream_ps((float *) (dst + 16), values[1]);
-        _mm_stream_ps((float *) (dst + 32), values[2]);
-        _mm_stream_ps((float *) (dst + 48), values[3]);
-        src += 64;
-        dst += 64;
-    }
-
-    if (len & 63)
-        SDL_memcpy(dst, src, len & 63);
-}
-#endif /* __SSE__ */
-
-void
-SDL_BlitCopy(SDL_BlitInfo * info)
-{
-    Uint8 *src, *dst;
-    int w, h;
-    int srcskip, dstskip;
-
-    w = info->d_width * info->dst->BytesPerPixel;
-    h = info->d_height;
-    src = info->s_pixels;
-    dst = info->d_pixels;
-    srcskip = w + info->s_skip;
-    dstskip = w + info->d_skip;
-
-#ifdef __SSE__
-    if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
-        while (h--) {
-            SDL_memcpySSE(dst, src, w);
-            src += srcskip;
-            dst += dstskip;
-        }
-        return;
-    }
-#endif
-
-#ifdef __MMX__
-    if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
-        while (h--) {
-            SDL_memcpyMMX(dst, src, w);
-            src += srcskip;
-            dst += dstskip;
-        }
-        _mm_empty();
-        return;
-    }
-#endif
-
-    while (h--) {
-        SDL_memcpy(dst, src, w);
-        src += srcskip;
-        dst += dstskip;
-    }
-}
-
-void
-SDL_BlitCopyOverlap(SDL_BlitInfo * info)
-{
-    Uint8 *src, *dst;
-    int w, h;
-    int skip;
-
-    w = info->d_width * info->dst->BytesPerPixel;
-    h = info->d_height;
-    src = info->s_pixels;
-    dst = info->d_pixels;
-    skip = w + info->s_skip;
-    if ((dst < src) || (dst >= (src + h * skip))) {
-        SDL_BlitCopy(info);
-    } else {
-        src += ((h - 1) * skip);
-        dst += ((h - 1) * skip);
-        while (h--) {
-            SDL_revcpy(dst, src, w);
-            src -= skip;
-            dst -= skip;
-        }
-    }
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_blit_copy.h	Thu Aug 16 06:40:34 2007 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2006 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-
-void SDL_BlitCopy(SDL_BlitInfo * info);
-void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
-
-/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_copy.c	Thu Aug 16 21:43:19 2007 +0000
@@ -0,0 +1,160 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#include "SDL_video.h"
+#include "SDL_blit.h"
+#include "SDL_copy.h"
+
+
+#ifdef __SSE__
+/* This assumes 16-byte aligned src and dst */
+static __inline__ void
+SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
+{
+    int i;
+
+    __m128 values[4];
+    for (i = len / 64; i--;) {
+        _mm_prefetch(src, _MM_HINT_NTA);
+        values[0] = *(__m128 *) (src + 0);
+        values[1] = *(__m128 *) (src + 16);
+        values[2] = *(__m128 *) (src + 32);
+        values[3] = *(__m128 *) (src + 48);
+        _mm_stream_ps((float *) (dst + 0), values[0]);
+        _mm_stream_ps((float *) (dst + 16), values[1]);
+        _mm_stream_ps((float *) (dst + 32), values[2]);
+        _mm_stream_ps((float *) (dst + 48), values[3]);
+        src += 64;
+        dst += 64;
+    }
+
+    if (len & 63)
+        SDL_memcpy(dst, src, len & 63);
+}
+#endif /* __SSE__ */
+
+#ifdef __MMX__
+/* This assumes 8-byte aligned src and dst */
+static __inline__ void
+SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
+{
+    int i;
+
+    __m64 values[8];
+    for (i = len / 64; i--;) {
+        _mm_prefetch(src, _MM_HINT_NTA);
+        values[0] = *(__m64 *) (src + 0);
+        values[1] = *(__m64 *) (src + 8);
+        values[2] = *(__m64 *) (src + 16);
+        values[3] = *(__m64 *) (src + 24);
+        values[4] = *(__m64 *) (src + 32);
+        values[5] = *(__m64 *) (src + 40);
+        values[6] = *(__m64 *) (src + 48);
+        values[7] = *(__m64 *) (src + 56);
+        _mm_stream_pi((__m64 *) (dst + 0), values[0]);
+        _mm_stream_pi((__m64 *) (dst + 8), values[1]);
+        _mm_stream_pi((__m64 *) (dst + 16), values[2]);
+        _mm_stream_pi((__m64 *) (dst + 24), values[3]);
+        _mm_stream_pi((__m64 *) (dst + 32), values[4]);
+        _mm_stream_pi((__m64 *) (dst + 40), values[5]);
+        _mm_stream_pi((__m64 *) (dst + 48), values[6]);
+        _mm_stream_pi((__m64 *) (dst + 56), values[7]);
+        src += 64;
+        dst += 64;
+    }
+
+    if (len & 63)
+        SDL_memcpy(dst, src, len & 63);
+}
+#endif /* __MMX__ */
+
+void
+SDL_BlitCopy(SDL_BlitInfo * info)
+{
+    Uint8 *src, *dst;
+    int w, h;
+    int srcskip, dstskip;
+
+    w = info->d_width * info->dst->BytesPerPixel;
+    h = info->d_height;
+    src = info->s_pixels;
+    dst = info->d_pixels;
+    srcskip = w + info->s_skip;
+    dstskip = w + info->d_skip;
+
+#ifdef __SSE__
+    if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
+        while (h--) {
+            SDL_memcpySSE(dst, src, w);
+            src += srcskip;
+            dst += dstskip;
+        }
+        return;
+    }
+#endif
+
+#ifdef __MMX__
+    if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
+        while (h--) {
+            SDL_memcpyMMX(dst, src, w);
+            src += srcskip;
+            dst += dstskip;
+        }
+        _mm_empty();
+        return;
+    }
+#endif
+
+    while (h--) {
+        SDL_memcpy(dst, src, w);
+        src += srcskip;
+        dst += dstskip;
+    }
+}
+
+void
+SDL_BlitCopyOverlap(SDL_BlitInfo * info)
+{
+    Uint8 *src, *dst;
+    int w, h;
+    int skip;
+
+    w = info->d_width * info->dst->BytesPerPixel;
+    h = info->d_height;
+    src = info->s_pixels;
+    dst = info->d_pixels;
+    skip = w + info->s_skip;
+    if ((dst < src) || (dst >= (src + h * skip))) {
+        SDL_BlitCopy(info);
+    } else {
+        src += ((h - 1) * skip);
+        dst += ((h - 1) * skip);
+        while (h--) {
+            SDL_revcpy(dst, src, w);
+            src -= skip;
+            dst -= skip;
+        }
+    }
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_copy.h	Thu Aug 16 21:43:19 2007 +0000
@@ -0,0 +1,26 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+
+void SDL_BlitCopy(SDL_BlitInfo * info);
+void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_fill.c	Thu Aug 16 21:43:19 2007 +0000
@@ -0,0 +1,365 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#include "SDL_video.h"
+#include "SDL_blit.h"
+
+
+#ifdef __SSE__
+/* *INDENT-OFF* */
+
+#ifdef _MSC_VER
+#define SSE_BEGIN \
+    __m128 c128; \
+	c128.m128_u32[0] = color; \
+	c128.m128_u32[1] = color; \
+	c128.m128_u32[2] = color; \
+	c128.m128_u32[3] = color;
+#else
+#define SSE_BEGIN \
+    DECLARE_ALIGNED(Uint32, cccc[4], 16); \
+    cccc[0] = color; \
+    cccc[1] = color; \
+    cccc[2] = color; \
+    cccc[3] = color; \
+    __m128 c128 = *(__m128 *)cccc;
+#endif
+
+#define SSE_WORK \
+    for (i = n / 64; i--;) { \
+        _mm_stream_ps((float *)(p+0), c128); \
+        _mm_stream_ps((float *)(p+16), c128); \
+        _mm_stream_ps((float *)(p+32), c128); \
+        _mm_stream_ps((float *)(p+48), c128); \
+        p += 64; \
+    }
+
+#define SSE_END
+
+#define DEFINE_SSE_FILLRECT(bpp, type) \
+static void \
+SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
+{ \
+    SSE_BEGIN; \
+ \
+    while (h--) { \
+        int i, n = w * bpp; \
+        Uint8 *p = pixels; \
+ \
+        if (n > 15) { \
+            int adjust = 16 - ((uintptr_t)p & 15); \
+            if (adjust < 16) { \
+                n -= adjust; \
+                adjust /= bpp; \
+                while(adjust--) { \
+                    *((type *)p) = (type)color; \
+                    p += bpp; \
+                } \
+            } \
+            SSE_WORK; \
+        } \
+        if (n & 63) { \
+            int remainder = (n & 63); \
+            remainder /= bpp; \
+            while(remainder--) { \
+                *((type *)p) = (type)color; \
+                p += bpp; \
+            } \
+        } \
+        pixels += pitch; \
+    } \
+ \
+    SSE_END; \
+}
+
+DEFINE_SSE_FILLRECT(1, Uint8)
+DEFINE_SSE_FILLRECT(2, Uint16)
+DEFINE_SSE_FILLRECT(4, Uint32)
+
+/* *INDENT-ON* */
+#endif /* __SSE__ */
+
+#ifdef __MMX__
+/* *INDENT-OFF* */
+
+#define MMX_BEGIN \
+    __m64 c64 = _mm_set_pi32(color, color)
+
+#define MMX_WORK \
+    for (i = n / 64; i--;) { \
+        _mm_stream_pi((__m64 *)(p+0), c64); \
+        _mm_stream_pi((__m64 *)(p+8), c64); \
+        _mm_stream_pi((__m64 *)(p+16), c64); \
+        _mm_stream_pi((__m64 *)(p+24), c64); \
+        _mm_stream_pi((__m64 *)(p+32), c64); \
+        _mm_stream_pi((__m64 *)(p+40), c64); \
+        _mm_stream_pi((__m64 *)(p+48), c64); \
+        _mm_stream_pi((__m64 *)(p+56), c64); \
+        p += 64; \
+    }
+
+#define MMX_END \
+    _mm_empty()
+
+#define DEFINE_MMX_FILLRECT(bpp, type) \
+static void \
+SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
+{ \
+    MMX_BEGIN; \
+ \
+    while (h--) { \
+        int i, n = w * bpp; \
+        Uint8 *p = pixels; \
+ \
+        if (n > 7) { \
+            int adjust = 8 - ((uintptr_t)p & 7); \
+            if (adjust < 8) { \
+                n -= adjust; \
+                adjust /= bpp; \
+                while(adjust--) { \
+                    *((type *)p) = (type)color; \
+                    p += bpp; \
+                } \
+            } \
+            MMX_WORK; \
+        } \
+        if (n & 63) { \
+            int remainder = (n & 63); \
+            remainder /= bpp; \
+            while(remainder--) { \
+                *((type *)p) = (type)color; \
+                p += bpp; \
+            } \
+        } \
+        pixels += pitch; \
+    } \
+ \
+    MMX_END; \
+}
+
+DEFINE_MMX_FILLRECT(1, Uint8)
+DEFINE_MMX_FILLRECT(2, Uint16)
+DEFINE_MMX_FILLRECT(4, Uint32)
+
+/* *INDENT-ON* */
+#endif /* __MMX__ */
+
+static void
+SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+    while (h--) {
+        int n = w;
+        Uint8 *p = pixels;
+
+        if (n > 3) {
+            switch ((uintptr_t) p & 3) {
+            case 1:
+                *p++ = (Uint8) color;
+                --n;
+            case 2:
+                *p++ = (Uint8) color;
+                --n;
+            case 3:
+                *p++ = (Uint8) color;
+                --n;
+            }
+            SDL_memset4(p, color, (n >> 2));
+        }
+        if (n & 3) {
+            p += (n & ~3);
+            switch (n & 3) {
+            case 3:
+                *p++ = (Uint8) color;
+            case 2:
+                *p++ = (Uint8) color;
+            case 1:
+                *p++ = (Uint8) color;
+            }
+        }
+        pixels += pitch;
+    }
+}
+
+static void
+SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+    while (h--) {
+        int n = w;
+        Uint16 *p = (Uint16 *) pixels;
+
+        if (n > 1) {
+            if ((uintptr_t) p & 2) {
+                *p++ = (Uint16) color;
+                --n;
+            }
+            SDL_memset4(p, color, (n >> 1));
+        }
+        if (n & 1) {
+            p[n - 1] = (Uint16) color;
+        }
+        pixels += pitch;
+    }
+}
+
+static void
+SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+    Uint8 r = (Uint8) ((color >> 16) & 0xFF);
+    Uint8 g = (Uint8) ((color >> 8) & 0xFF);
+    Uint8 b = (Uint8) (color & 0xFF);
+
+    while (h--) {
+        int n = w;
+        Uint8 *p = pixels;
+
+        while (n--) {
+            *p++ = r;
+            *p++ = g;
+            *p++ = b;
+        }
+        pixels += pitch;
+    }
+}
+
+static void
+SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+    while (h--) {
+        SDL_memset4(pixels, color, w);
+        pixels += pitch;
+    }
+}
+
+/* 
+ * This function performs a fast fill of the given rectangle with 'color'
+ */
+int
+SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
+{
+    Uint8 *pixels;
+
+    /* This function doesn't work on surfaces < 8 bpp */
+    if (dst->format->BitsPerPixel < 8) {
+        SDL_SetError("SDL_FillRect(): Unsupported surface format");
+        return (-1);
+    }
+
+    /* If 'dstrect' == NULL, then fill the whole surface */
+    if (dstrect) {
+        /* Perform clipping */
+        if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
+            return (0);
+        }
+    } else {
+        dstrect = &dst->clip_rect;
+    }
+
+    /* Perform software fill */
+    if (!dst->pixels) {
+        SDL_SetError("SDL_FillRect(): You must lock the surface");
+        return (-1);
+    }
+
+    pixels =
+        (Uint8 *) dst->pixels + dstrect->y * dst->pitch +
+        dstrect->x * dst->format->BytesPerPixel;
+
+    switch (dst->format->BytesPerPixel) {
+    case 1:
+        {
+            color |= (color << 8);
+            color |= (color << 16);
+#ifdef __SSE__
+            if (SDL_HasSSE()) {
+                SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+#ifdef __MMX__
+            if (SDL_HasMMX()) {
+                SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+            SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+            break;
+        }
+
+    case 2:
+        {
+            color |= (color << 16);
+#ifdef __SSE__
+            if (SDL_HasSSE()) {
+                SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+#ifdef __MMX__
+            if (SDL_HasMMX()) {
+                SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+            SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+            break;
+        }
+
+    case 3:
+        /* 24-bit RGB is a slow path, at least for now. */
+        {
+            SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+            break;
+        }
+
+    case 4:
+        {
+#ifdef __SSE__
+            if (SDL_HasSSE()) {
+                SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+#ifdef __MMX__
+            if (SDL_HasMMX()) {
+                SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+            SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+            break;
+        }
+    }
+
+    SDL_UnlockSurface(dst);
+
+    /* We're done! */
+    return (0);
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_surface.c	Thu Aug 16 06:40:34 2007 +0000
+++ b/src/video/SDL_surface.c	Thu Aug 16 21:43:19 2007 +0000
@@ -509,342 +509,6 @@
     return 0;
 }
 
-#ifdef __SSE__
-/* *INDENT-OFF* */
-
-#ifdef _MSC_VER
-#define SSE_BEGIN \
-    __m128 c128; \
-	c128.m128_u32[0] = color; \
-	c128.m128_u32[1] = color; \
-	c128.m128_u32[2] = color; \
-	c128.m128_u32[3] = color;
-#else
-#define SSE_BEGIN \
-    DECLARE_ALIGNED(Uint32, cccc[4], 16); \
-    cccc[0] = color; \
-    cccc[1] = color; \
-    cccc[2] = color; \
-    cccc[3] = color; \
-    __m128 c128 = *(__m128 *)cccc;
-#endif
-
-#define SSE_WORK \
-    for (i = n / 64; i--;) { \
-        _mm_stream_ps((float *)(p+0), c128); \
-        _mm_stream_ps((float *)(p+16), c128); \
-        _mm_stream_ps((float *)(p+32), c128); \
-        _mm_stream_ps((float *)(p+48), c128); \
-        p += 64; \
-    }
-
-#define SSE_END
-
-#define DEFINE_SSE_FILLRECT(bpp, type) \
-static void \
-SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
-{ \
-    SSE_BEGIN; \
- \
-    while (h--) { \
-        int i, n = w * bpp; \
-        Uint8 *p = pixels; \
- \
-        if (n > 15) { \
-            int adjust = 16 - ((uintptr_t)p & 15); \
-            if (adjust < 16) { \
-                n -= adjust; \
-                adjust /= bpp; \
-                while(adjust--) { \
-                    *((type *)p) = (type)color; \
-                    p += bpp; \
-                } \
-            } \
-            SSE_WORK; \
-        } \
-        if (n & 63) { \
-            int remainder = (n & 63); \
-            remainder /= bpp; \
-            while(remainder--) { \
-                *((type *)p) = (type)color; \
-                p += bpp; \
-            } \
-        } \
-        pixels += pitch; \
-    } \
- \
-    SSE_END; \
-}
-
-DEFINE_SSE_FILLRECT(1, Uint8)
-DEFINE_SSE_FILLRECT(2, Uint16)
-DEFINE_SSE_FILLRECT(4, Uint32)
-
-/* *INDENT-ON* */
-#endif /* __SSE__ */
-
-#ifdef __MMX__
-/* *INDENT-OFF* */
-
-#define MMX_BEGIN \
-    __m64 c64 = _mm_set_pi32(color, color)
-
-#define MMX_WORK \
-    for (i = n / 64; i--;) { \
-        _mm_stream_pi((__m64 *)(p+0), c64); \
-        _mm_stream_pi((__m64 *)(p+8), c64); \
-        _mm_stream_pi((__m64 *)(p+16), c64); \
-        _mm_stream_pi((__m64 *)(p+24), c64); \
-        _mm_stream_pi((__m64 *)(p+32), c64); \
-        _mm_stream_pi((__m64 *)(p+40), c64); \
-        _mm_stream_pi((__m64 *)(p+48), c64); \
-        _mm_stream_pi((__m64 *)(p+56), c64); \
-        p += 64; \
-    }
-
-#define MMX_END \
-    _mm_empty()
-
-#define DEFINE_MMX_FILLRECT(bpp, type) \
-static void \
-SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
-{ \
-    MMX_BEGIN; \
- \
-    while (h--) { \
-        int i, n = w * bpp; \
-        Uint8 *p = pixels; \
- \
-        if (n > 7) { \
-            int adjust = 8 - ((uintptr_t)p & 7); \
-            if (adjust < 8) { \
-                n -= adjust; \
-                adjust /= bpp; \
-                while(adjust--) { \
-                    *((type *)p) = (type)color; \
-                    p += bpp; \
-                } \
-            } \
-            MMX_WORK; \
-        } \
-        if (n & 63) { \
-            int remainder = (n & 63); \
-            remainder /= bpp; \
-            while(remainder--) { \
-                *((type *)p) = (type)color; \
-                p += bpp; \
-            } \
-        } \
-        pixels += pitch; \
-    } \
- \
-    MMX_END; \
-}
-
-DEFINE_MMX_FILLRECT(1, Uint8)
-DEFINE_MMX_FILLRECT(2, Uint16)
-DEFINE_MMX_FILLRECT(4, Uint32)
-
-/* *INDENT-ON* */
-#endif /* __MMX__ */
-
-static void
-SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
-    while (h--) {
-        int n = w;
-        Uint8 *p = pixels;
-
-        if (n > 3) {
-            switch ((uintptr_t) p & 3) {
-            case 1:
-                *p++ = (Uint8) color;
-                --n;
-            case 2:
-                *p++ = (Uint8) color;
-                --n;
-            case 3:
-                *p++ = (Uint8) color;
-                --n;
-            }
-            SDL_memset4(p, color, (n >> 2));
-        }
-        if (n & 3) {
-            p += (n & ~3);
-            switch (n & 3) {
-            case 3:
-                *p++ = (Uint8) color;
-            case 2:
-                *p++ = (Uint8) color;
-            case 1:
-                *p++ = (Uint8) color;
-            }
-        }
-        pixels += pitch;
-    }
-}
-
-static void
-SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
-    while (h--) {
-        int n = w;
-        Uint16 *p = (Uint16 *) pixels;
-
-        if (n > 1) {
-            if ((uintptr_t) p & 2) {
-                *p++ = (Uint16) color;
-                --n;
-            }
-            SDL_memset4(p, color, (n >> 1));
-        }
-        if (n & 1) {
-            p[n - 1] = (Uint16) color;
-        }
-        pixels += pitch;
-    }
-}
-
-static void
-SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
-    Uint8 r = (Uint8) (color & 0xFF);
-    Uint8 g = (Uint8) ((color >> 8) & 0xFF);
-    Uint8 b = (Uint8) ((color >> 16) & 0xFF);
-
-    while (h--) {
-        int n = w;
-        Uint8 *p = pixels;
-
-        while (n--) {
-            *p++ = r;
-            *p++ = g;
-            *p++ = b;
-        }
-        pixels += pitch;
-    }
-}
-
-static void
-SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
-    while (h--) {
-        SDL_memset4(pixels, color, w);
-        pixels += pitch;
-    }
-}
-
-/* 
- * This function performs a fast fill of the given rectangle with 'color'
- */
-int
-SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
-{
-    Uint8 *pixels;
-
-    /* This function doesn't work on surfaces < 8 bpp */
-    if (dst->format->BitsPerPixel < 8) {
-        SDL_SetError("Fill rect on unsupported surface format");
-        return (-1);
-    }
-
-    /* If 'dstrect' == NULL, then fill the whole surface */
-    if (dstrect) {
-        /* Perform clipping */
-        if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
-            return (0);
-        }
-    } else {
-        dstrect = &dst->clip_rect;
-    }
-
-    /* Perform software fill */
-    if (SDL_LockSurface(dst) != 0) {
-        return (-1);
-    }
-
-    pixels =
-        (Uint8 *) dst->pixels + dstrect->y * dst->pitch +
-        dstrect->x * dst->format->BytesPerPixel;
-
-    switch (dst->format->BytesPerPixel) {
-    case 1:
-        {
-            color |= (color << 8);
-            color |= (color << 16);
-#ifdef __SSE__
-            if (SDL_HasSSE()) {
-                SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-#ifdef __MMX__
-            if (SDL_HasMMX()) {
-                SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-            SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
-            break;
-        }
-
-    case 2:
-        {
-            color |= (color << 16);
-#ifdef __SSE__
-            if (SDL_HasSSE()) {
-                SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-#ifdef __MMX__
-            if (SDL_HasMMX()) {
-                SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-            SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
-            break;
-        }
-
-    case 3:
-        /* 24-bit RGB is a slow path, at least for now. */
-        {
-            SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
-            break;
-        }
-
-    case 4:
-        {
-#ifdef __SSE__
-            if (SDL_HasSSE()) {
-                SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-#ifdef __MMX__
-            if (SDL_HasMMX()) {
-                SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-            SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
-            break;
-        }
-    }
-
-    SDL_UnlockSurface(dst);
-
-    /* We're done! */
-    return (0);
-}
-
 /*
  * Lock a surface to directly access the pixels
  */