Added notes on the next steps for SDL 1.3
Moved fill and copy routines to their own files.
--- a/NOTES Thu Aug 16 06:40:34 2007 +0000
+++ b/NOTES Thu Aug 16 21:43:19 2007 +0000
@@ -157,6 +157,16 @@
streaming textures are lockable and may have system memory pixels available.
SDL_compat will use a streaming video texture, and will never be HWSURFACE,
but may be PREALLOC, if system memory pixels are available.
+*** DONE Thu Aug 16 14:18:42 PDT 2007
The software renderer will be abstracted so the surface management can be
used by any renderer that provides functions to copy surfaces to the window.
+
+Blitters...
+----
+Copy blit and fill rect are optimized with MMX and SSE now.
+
+Here are the pieces we still need:
+- Merging SDL texture capabilities into the SDL surface system
+- Generic fallback blitter architecture
+- Custom fast path blitters
--- a/src/video/SDL_blit.c Thu Aug 16 06:40:34 2007 +0000
+++ b/src/video/SDL_blit.c Thu Aug 16 21:43:19 2007 +0000
@@ -24,7 +24,7 @@
#include "SDL_video.h"
#include "SDL_sysvideo.h"
#include "SDL_blit.h"
-#include "SDL_blit_copy.h"
+#include "SDL_copy.h"
#include "SDL_RLEaccel_c.h"
#include "SDL_pixels_c.h"
--- a/src/video/SDL_blit_copy.c Thu Aug 16 06:40:34 2007 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,158 +0,0 @@
-/*
- SDL - Simple DirectMedia Layer
- Copyright (C) 1997-2006 Sam Lantinga
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
- Sam Lantinga
- slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "SDL_video.h"
-#include "SDL_blit.h"
-#include "SDL_blit_copy.h"
-
-
-#ifdef __MMX__
-static __inline__ void
-SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
-{
- int i;
-
- __m64 values[8];
- for (i = len / 64; i--;) {
- _mm_prefetch(src, _MM_HINT_NTA);
- values[0] = *(__m64 *) (src + 0);
- values[1] = *(__m64 *) (src + 8);
- values[2] = *(__m64 *) (src + 16);
- values[3] = *(__m64 *) (src + 24);
- values[4] = *(__m64 *) (src + 32);
- values[5] = *(__m64 *) (src + 40);
- values[6] = *(__m64 *) (src + 48);
- values[7] = *(__m64 *) (src + 56);
- _mm_stream_pi((__m64 *) (dst + 0), values[0]);
- _mm_stream_pi((__m64 *) (dst + 8), values[1]);
- _mm_stream_pi((__m64 *) (dst + 16), values[2]);
- _mm_stream_pi((__m64 *) (dst + 24), values[3]);
- _mm_stream_pi((__m64 *) (dst + 32), values[4]);
- _mm_stream_pi((__m64 *) (dst + 40), values[5]);
- _mm_stream_pi((__m64 *) (dst + 48), values[6]);
- _mm_stream_pi((__m64 *) (dst + 56), values[7]);
- src += 64;
- dst += 64;
- }
-
- if (len & 63)
- SDL_memcpy(dst, src, len & 63);
-}
-#endif /* __MMX__ */
-
-#ifdef __SSE__
-static __inline__ void
-SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
-{
- int i;
-
- __m128 values[4];
- for (i = len / 64; i--;) {
- _mm_prefetch(src, _MM_HINT_NTA);
- values[0] = *(__m128 *) (src + 0);
- values[1] = *(__m128 *) (src + 16);
- values[2] = *(__m128 *) (src + 32);
- values[3] = *(__m128 *) (src + 48);
- _mm_stream_ps((float *) (dst + 0), values[0]);
- _mm_stream_ps((float *) (dst + 16), values[1]);
- _mm_stream_ps((float *) (dst + 32), values[2]);
- _mm_stream_ps((float *) (dst + 48), values[3]);
- src += 64;
- dst += 64;
- }
-
- if (len & 63)
- SDL_memcpy(dst, src, len & 63);
-}
-#endif /* __SSE__ */
-
-void
-SDL_BlitCopy(SDL_BlitInfo * info)
-{
- Uint8 *src, *dst;
- int w, h;
- int srcskip, dstskip;
-
- w = info->d_width * info->dst->BytesPerPixel;
- h = info->d_height;
- src = info->s_pixels;
- dst = info->d_pixels;
- srcskip = w + info->s_skip;
- dstskip = w + info->d_skip;
-
-#ifdef __SSE__
- if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
- while (h--) {
- SDL_memcpySSE(dst, src, w);
- src += srcskip;
- dst += dstskip;
- }
- return;
- }
-#endif
-
-#ifdef __MMX__
- if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
- while (h--) {
- SDL_memcpyMMX(dst, src, w);
- src += srcskip;
- dst += dstskip;
- }
- _mm_empty();
- return;
- }
-#endif
-
- while (h--) {
- SDL_memcpy(dst, src, w);
- src += srcskip;
- dst += dstskip;
- }
-}
-
-void
-SDL_BlitCopyOverlap(SDL_BlitInfo * info)
-{
- Uint8 *src, *dst;
- int w, h;
- int skip;
-
- w = info->d_width * info->dst->BytesPerPixel;
- h = info->d_height;
- src = info->s_pixels;
- dst = info->d_pixels;
- skip = w + info->s_skip;
- if ((dst < src) || (dst >= (src + h * skip))) {
- SDL_BlitCopy(info);
- } else {
- src += ((h - 1) * skip);
- dst += ((h - 1) * skip);
- while (h--) {
- SDL_revcpy(dst, src, w);
- src -= skip;
- dst -= skip;
- }
- }
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_blit_copy.h Thu Aug 16 06:40:34 2007 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-/*
- SDL - Simple DirectMedia Layer
- Copyright (C) 1997-2006 Sam Lantinga
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
- Sam Lantinga
- slouken@libsdl.org
-*/
-
-void SDL_BlitCopy(SDL_BlitInfo * info);
-void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
-
-/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_copy.c Thu Aug 16 21:43:19 2007 +0000
@@ -0,0 +1,160 @@
+/*
+ SDL - Simple DirectMedia Layer
+ Copyright (C) 1997-2006 Sam Lantinga
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Sam Lantinga
+ slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#include "SDL_video.h"
+#include "SDL_blit.h"
+#include "SDL_copy.h"
+
+
+#ifdef __SSE__
+/* This assumes 16-byte aligned src and dst */
+static __inline__ void
+SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
+{
+ int i;
+
+ __m128 values[4];
+ for (i = len / 64; i--;) {
+ _mm_prefetch(src, _MM_HINT_NTA);
+ values[0] = *(__m128 *) (src + 0);
+ values[1] = *(__m128 *) (src + 16);
+ values[2] = *(__m128 *) (src + 32);
+ values[3] = *(__m128 *) (src + 48);
+ _mm_stream_ps((float *) (dst + 0), values[0]);
+ _mm_stream_ps((float *) (dst + 16), values[1]);
+ _mm_stream_ps((float *) (dst + 32), values[2]);
+ _mm_stream_ps((float *) (dst + 48), values[3]);
+ src += 64;
+ dst += 64;
+ }
+
+ if (len & 63)
+ SDL_memcpy(dst, src, len & 63);
+}
+#endif /* __SSE__ */
+
+#ifdef __MMX__
+/* This assumes 8-byte aligned src and dst */
+static __inline__ void
+SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
+{
+ int i;
+
+ __m64 values[8];
+ for (i = len / 64; i--;) {
+ _mm_prefetch(src, _MM_HINT_NTA);
+ values[0] = *(__m64 *) (src + 0);
+ values[1] = *(__m64 *) (src + 8);
+ values[2] = *(__m64 *) (src + 16);
+ values[3] = *(__m64 *) (src + 24);
+ values[4] = *(__m64 *) (src + 32);
+ values[5] = *(__m64 *) (src + 40);
+ values[6] = *(__m64 *) (src + 48);
+ values[7] = *(__m64 *) (src + 56);
+ _mm_stream_pi((__m64 *) (dst + 0), values[0]);
+ _mm_stream_pi((__m64 *) (dst + 8), values[1]);
+ _mm_stream_pi((__m64 *) (dst + 16), values[2]);
+ _mm_stream_pi((__m64 *) (dst + 24), values[3]);
+ _mm_stream_pi((__m64 *) (dst + 32), values[4]);
+ _mm_stream_pi((__m64 *) (dst + 40), values[5]);
+ _mm_stream_pi((__m64 *) (dst + 48), values[6]);
+ _mm_stream_pi((__m64 *) (dst + 56), values[7]);
+ src += 64;
+ dst += 64;
+ }
+
+ if (len & 63)
+ SDL_memcpy(dst, src, len & 63);
+}
+#endif /* __MMX__ */
+
+void
+SDL_BlitCopy(SDL_BlitInfo * info)
+{
+ Uint8 *src, *dst;
+ int w, h;
+ int srcskip, dstskip;
+
+ w = info->d_width * info->dst->BytesPerPixel;
+ h = info->d_height;
+ src = info->s_pixels;
+ dst = info->d_pixels;
+ srcskip = w + info->s_skip;
+ dstskip = w + info->d_skip;
+
+#ifdef __SSE__
+ if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
+ while (h--) {
+ SDL_memcpySSE(dst, src, w);
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+#endif
+
+#ifdef __MMX__
+ if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
+ while (h--) {
+ SDL_memcpyMMX(dst, src, w);
+ src += srcskip;
+ dst += dstskip;
+ }
+ _mm_empty();
+ return;
+ }
+#endif
+
+ while (h--) {
+ SDL_memcpy(dst, src, w);
+ src += srcskip;
+ dst += dstskip;
+ }
+}
+
+void
+SDL_BlitCopyOverlap(SDL_BlitInfo * info)
+{
+ Uint8 *src, *dst;
+ int w, h;
+ int skip;
+
+ w = info->d_width * info->dst->BytesPerPixel;
+ h = info->d_height;
+ src = info->s_pixels;
+ dst = info->d_pixels;
+ skip = w + info->s_skip;
+ if ((dst < src) || (dst >= (src + h * skip))) {
+ SDL_BlitCopy(info);
+ } else {
+ src += ((h - 1) * skip);
+ dst += ((h - 1) * skip);
+ while (h--) {
+ SDL_revcpy(dst, src, w);
+ src -= skip;
+ dst -= skip;
+ }
+ }
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_copy.h Thu Aug 16 21:43:19 2007 +0000
@@ -0,0 +1,26 @@
+/*
+ SDL - Simple DirectMedia Layer
+ Copyright (C) 1997-2006 Sam Lantinga
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Sam Lantinga
+ slouken@libsdl.org
+*/
+
+void SDL_BlitCopy(SDL_BlitInfo * info);
+void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_fill.c Thu Aug 16 21:43:19 2007 +0000
@@ -0,0 +1,365 @@
+/*
+ SDL - Simple DirectMedia Layer
+ Copyright (C) 1997-2006 Sam Lantinga
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Sam Lantinga
+ slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#include "SDL_video.h"
+#include "SDL_blit.h"
+
+
+#ifdef __SSE__
+/* *INDENT-OFF* */
+
+#ifdef _MSC_VER
+#define SSE_BEGIN \
+ __m128 c128; \
+ c128.m128_u32[0] = color; \
+ c128.m128_u32[1] = color; \
+ c128.m128_u32[2] = color; \
+ c128.m128_u32[3] = color;
+#else
+#define SSE_BEGIN \
+ DECLARE_ALIGNED(Uint32, cccc[4], 16); \
+ cccc[0] = color; \
+ cccc[1] = color; \
+ cccc[2] = color; \
+ cccc[3] = color; \
+ __m128 c128 = *(__m128 *)cccc;
+#endif
+
+#define SSE_WORK \
+ for (i = n / 64; i--;) { \
+ _mm_stream_ps((float *)(p+0), c128); \
+ _mm_stream_ps((float *)(p+16), c128); \
+ _mm_stream_ps((float *)(p+32), c128); \
+ _mm_stream_ps((float *)(p+48), c128); \
+ p += 64; \
+ }
+
+#define SSE_END
+
+#define DEFINE_SSE_FILLRECT(bpp, type) \
+static void \
+SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
+{ \
+ SSE_BEGIN; \
+ \
+ while (h--) { \
+ int i, n = w * bpp; \
+ Uint8 *p = pixels; \
+ \
+ if (n > 15) { \
+ int adjust = 16 - ((uintptr_t)p & 15); \
+ if (adjust < 16) { \
+ n -= adjust; \
+ adjust /= bpp; \
+ while(adjust--) { \
+ *((type *)p) = (type)color; \
+ p += bpp; \
+ } \
+ } \
+ SSE_WORK; \
+ } \
+ if (n & 63) { \
+ int remainder = (n & 63); \
+ remainder /= bpp; \
+ while(remainder--) { \
+ *((type *)p) = (type)color; \
+ p += bpp; \
+ } \
+ } \
+ pixels += pitch; \
+ } \
+ \
+ SSE_END; \
+}
+
+DEFINE_SSE_FILLRECT(1, Uint8)
+DEFINE_SSE_FILLRECT(2, Uint16)
+DEFINE_SSE_FILLRECT(4, Uint32)
+
+/* *INDENT-ON* */
+#endif /* __SSE__ */
+
+#ifdef __MMX__
+/* *INDENT-OFF* */
+
+#define MMX_BEGIN \
+ __m64 c64 = _mm_set_pi32(color, color)
+
+#define MMX_WORK \
+ for (i = n / 64; i--;) { \
+ _mm_stream_pi((__m64 *)(p+0), c64); \
+ _mm_stream_pi((__m64 *)(p+8), c64); \
+ _mm_stream_pi((__m64 *)(p+16), c64); \
+ _mm_stream_pi((__m64 *)(p+24), c64); \
+ _mm_stream_pi((__m64 *)(p+32), c64); \
+ _mm_stream_pi((__m64 *)(p+40), c64); \
+ _mm_stream_pi((__m64 *)(p+48), c64); \
+ _mm_stream_pi((__m64 *)(p+56), c64); \
+ p += 64; \
+ }
+
+#define MMX_END \
+ _mm_empty()
+
+#define DEFINE_MMX_FILLRECT(bpp, type) \
+static void \
+SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
+{ \
+ MMX_BEGIN; \
+ \
+ while (h--) { \
+ int i, n = w * bpp; \
+ Uint8 *p = pixels; \
+ \
+ if (n > 7) { \
+ int adjust = 8 - ((uintptr_t)p & 7); \
+ if (adjust < 8) { \
+ n -= adjust; \
+ adjust /= bpp; \
+ while(adjust--) { \
+ *((type *)p) = (type)color; \
+ p += bpp; \
+ } \
+ } \
+ MMX_WORK; \
+ } \
+ if (n & 63) { \
+ int remainder = (n & 63); \
+ remainder /= bpp; \
+ while(remainder--) { \
+ *((type *)p) = (type)color; \
+ p += bpp; \
+ } \
+ } \
+ pixels += pitch; \
+ } \
+ \
+ MMX_END; \
+}
+
+DEFINE_MMX_FILLRECT(1, Uint8)
+DEFINE_MMX_FILLRECT(2, Uint16)
+DEFINE_MMX_FILLRECT(4, Uint32)
+
+/* *INDENT-ON* */
+#endif /* __MMX__ */
+
+static void
+SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+ while (h--) {
+ int n = w;
+ Uint8 *p = pixels;
+
+ if (n > 3) {
+ switch ((uintptr_t) p & 3) {
+ case 1:
+ *p++ = (Uint8) color;
+ --n;
+ case 2:
+ *p++ = (Uint8) color;
+ --n;
+ case 3:
+ *p++ = (Uint8) color;
+ --n;
+ }
+ SDL_memset4(p, color, (n >> 2));
+ }
+ if (n & 3) {
+ p += (n & ~3);
+ switch (n & 3) {
+ case 3:
+ *p++ = (Uint8) color;
+ case 2:
+ *p++ = (Uint8) color;
+ case 1:
+ *p++ = (Uint8) color;
+ }
+ }
+ pixels += pitch;
+ }
+}
+
+static void
+SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+ while (h--) {
+ int n = w;
+ Uint16 *p = (Uint16 *) pixels;
+
+ if (n > 1) {
+ if ((uintptr_t) p & 2) {
+ *p++ = (Uint16) color;
+ --n;
+ }
+ SDL_memset4(p, color, (n >> 1));
+ }
+ if (n & 1) {
+ p[n - 1] = (Uint16) color;
+ }
+ pixels += pitch;
+ }
+}
+
+static void
+SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+ Uint8 r = (Uint8) ((color >> 16) & 0xFF);
+ Uint8 g = (Uint8) ((color >> 8) & 0xFF);
+ Uint8 b = (Uint8) (color & 0xFF);
+
+ while (h--) {
+ int n = w;
+ Uint8 *p = pixels;
+
+ while (n--) {
+ *p++ = r;
+ *p++ = g;
+ *p++ = b;
+ }
+ pixels += pitch;
+ }
+}
+
+static void
+SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+ while (h--) {
+ SDL_memset4(pixels, color, w);
+ pixels += pitch;
+ }
+}
+
+/*
+ * This function performs a fast fill of the given rectangle with 'color'
+ */
+int
+SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
+{
+ Uint8 *pixels;
+
+ /* This function doesn't work on surfaces < 8 bpp */
+ if (dst->format->BitsPerPixel < 8) {
+ SDL_SetError("SDL_FillRect(): Unsupported surface format");
+ return (-1);
+ }
+
+ /* If 'dstrect' == NULL, then fill the whole surface */
+ if (dstrect) {
+ /* Perform clipping */
+ if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
+ return (0);
+ }
+ } else {
+ dstrect = &dst->clip_rect;
+ }
+
+ /* Perform software fill */
+ if (!dst->pixels) {
+ SDL_SetError("SDL_FillRect(): You must lock the surface");
+ return (-1);
+ }
+
+ pixels =
+ (Uint8 *) dst->pixels + dstrect->y * dst->pitch +
+ dstrect->x * dst->format->BytesPerPixel;
+
+ switch (dst->format->BytesPerPixel) {
+ case 1:
+ {
+ color |= (color << 8);
+ color |= (color << 16);
+#ifdef __SSE__
+ if (SDL_HasSSE()) {
+ SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
+ dstrect->h);
+ break;
+ }
+#endif
+#ifdef __MMX__
+ if (SDL_HasMMX()) {
+ SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
+ dstrect->h);
+ break;
+ }
+#endif
+ SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+ break;
+ }
+
+ case 2:
+ {
+ color |= (color << 16);
+#ifdef __SSE__
+ if (SDL_HasSSE()) {
+ SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
+ dstrect->h);
+ break;
+ }
+#endif
+#ifdef __MMX__
+ if (SDL_HasMMX()) {
+ SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
+ dstrect->h);
+ break;
+ }
+#endif
+ SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+ break;
+ }
+
+ case 3:
+ /* 24-bit RGB is a slow path, at least for now. */
+ {
+ SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+ break;
+ }
+
+ case 4:
+ {
+#ifdef __SSE__
+ if (SDL_HasSSE()) {
+ SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
+ dstrect->h);
+ break;
+ }
+#endif
+#ifdef __MMX__
+ if (SDL_HasMMX()) {
+ SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
+ dstrect->h);
+ break;
+ }
+#endif
+ SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+ break;
+ }
+ }
+
+ SDL_UnlockSurface(dst);
+
+ /* We're done! */
+ return (0);
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_surface.c Thu Aug 16 06:40:34 2007 +0000
+++ b/src/video/SDL_surface.c Thu Aug 16 21:43:19 2007 +0000
@@ -509,342 +509,6 @@
return 0;
}
-#ifdef __SSE__
-/* *INDENT-OFF* */
-
-#ifdef _MSC_VER
-#define SSE_BEGIN \
- __m128 c128; \
- c128.m128_u32[0] = color; \
- c128.m128_u32[1] = color; \
- c128.m128_u32[2] = color; \
- c128.m128_u32[3] = color;
-#else
-#define SSE_BEGIN \
- DECLARE_ALIGNED(Uint32, cccc[4], 16); \
- cccc[0] = color; \
- cccc[1] = color; \
- cccc[2] = color; \
- cccc[3] = color; \
- __m128 c128 = *(__m128 *)cccc;
-#endif
-
-#define SSE_WORK \
- for (i = n / 64; i--;) { \
- _mm_stream_ps((float *)(p+0), c128); \
- _mm_stream_ps((float *)(p+16), c128); \
- _mm_stream_ps((float *)(p+32), c128); \
- _mm_stream_ps((float *)(p+48), c128); \
- p += 64; \
- }
-
-#define SSE_END
-
-#define DEFINE_SSE_FILLRECT(bpp, type) \
-static void \
-SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
-{ \
- SSE_BEGIN; \
- \
- while (h--) { \
- int i, n = w * bpp; \
- Uint8 *p = pixels; \
- \
- if (n > 15) { \
- int adjust = 16 - ((uintptr_t)p & 15); \
- if (adjust < 16) { \
- n -= adjust; \
- adjust /= bpp; \
- while(adjust--) { \
- *((type *)p) = (type)color; \
- p += bpp; \
- } \
- } \
- SSE_WORK; \
- } \
- if (n & 63) { \
- int remainder = (n & 63); \
- remainder /= bpp; \
- while(remainder--) { \
- *((type *)p) = (type)color; \
- p += bpp; \
- } \
- } \
- pixels += pitch; \
- } \
- \
- SSE_END; \
-}
-
-DEFINE_SSE_FILLRECT(1, Uint8)
-DEFINE_SSE_FILLRECT(2, Uint16)
-DEFINE_SSE_FILLRECT(4, Uint32)
-
-/* *INDENT-ON* */
-#endif /* __SSE__ */
-
-#ifdef __MMX__
-/* *INDENT-OFF* */
-
-#define MMX_BEGIN \
- __m64 c64 = _mm_set_pi32(color, color)
-
-#define MMX_WORK \
- for (i = n / 64; i--;) { \
- _mm_stream_pi((__m64 *)(p+0), c64); \
- _mm_stream_pi((__m64 *)(p+8), c64); \
- _mm_stream_pi((__m64 *)(p+16), c64); \
- _mm_stream_pi((__m64 *)(p+24), c64); \
- _mm_stream_pi((__m64 *)(p+32), c64); \
- _mm_stream_pi((__m64 *)(p+40), c64); \
- _mm_stream_pi((__m64 *)(p+48), c64); \
- _mm_stream_pi((__m64 *)(p+56), c64); \
- p += 64; \
- }
-
-#define MMX_END \
- _mm_empty()
-
-#define DEFINE_MMX_FILLRECT(bpp, type) \
-static void \
-SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
-{ \
- MMX_BEGIN; \
- \
- while (h--) { \
- int i, n = w * bpp; \
- Uint8 *p = pixels; \
- \
- if (n > 7) { \
- int adjust = 8 - ((uintptr_t)p & 7); \
- if (adjust < 8) { \
- n -= adjust; \
- adjust /= bpp; \
- while(adjust--) { \
- *((type *)p) = (type)color; \
- p += bpp; \
- } \
- } \
- MMX_WORK; \
- } \
- if (n & 63) { \
- int remainder = (n & 63); \
- remainder /= bpp; \
- while(remainder--) { \
- *((type *)p) = (type)color; \
- p += bpp; \
- } \
- } \
- pixels += pitch; \
- } \
- \
- MMX_END; \
-}
-
-DEFINE_MMX_FILLRECT(1, Uint8)
-DEFINE_MMX_FILLRECT(2, Uint16)
-DEFINE_MMX_FILLRECT(4, Uint32)
-
-/* *INDENT-ON* */
-#endif /* __MMX__ */
-
-static void
-SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
- while (h--) {
- int n = w;
- Uint8 *p = pixels;
-
- if (n > 3) {
- switch ((uintptr_t) p & 3) {
- case 1:
- *p++ = (Uint8) color;
- --n;
- case 2:
- *p++ = (Uint8) color;
- --n;
- case 3:
- *p++ = (Uint8) color;
- --n;
- }
- SDL_memset4(p, color, (n >> 2));
- }
- if (n & 3) {
- p += (n & ~3);
- switch (n & 3) {
- case 3:
- *p++ = (Uint8) color;
- case 2:
- *p++ = (Uint8) color;
- case 1:
- *p++ = (Uint8) color;
- }
- }
- pixels += pitch;
- }
-}
-
-static void
-SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
- while (h--) {
- int n = w;
- Uint16 *p = (Uint16 *) pixels;
-
- if (n > 1) {
- if ((uintptr_t) p & 2) {
- *p++ = (Uint16) color;
- --n;
- }
- SDL_memset4(p, color, (n >> 1));
- }
- if (n & 1) {
- p[n - 1] = (Uint16) color;
- }
- pixels += pitch;
- }
-}
-
-static void
-SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
- Uint8 r = (Uint8) (color & 0xFF);
- Uint8 g = (Uint8) ((color >> 8) & 0xFF);
- Uint8 b = (Uint8) ((color >> 16) & 0xFF);
-
- while (h--) {
- int n = w;
- Uint8 *p = pixels;
-
- while (n--) {
- *p++ = r;
- *p++ = g;
- *p++ = b;
- }
- pixels += pitch;
- }
-}
-
-static void
-SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
- while (h--) {
- SDL_memset4(pixels, color, w);
- pixels += pitch;
- }
-}
-
-/*
- * This function performs a fast fill of the given rectangle with 'color'
- */
-int
-SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
-{
- Uint8 *pixels;
-
- /* This function doesn't work on surfaces < 8 bpp */
- if (dst->format->BitsPerPixel < 8) {
- SDL_SetError("Fill rect on unsupported surface format");
- return (-1);
- }
-
- /* If 'dstrect' == NULL, then fill the whole surface */
- if (dstrect) {
- /* Perform clipping */
- if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
- return (0);
- }
- } else {
- dstrect = &dst->clip_rect;
- }
-
- /* Perform software fill */
- if (SDL_LockSurface(dst) != 0) {
- return (-1);
- }
-
- pixels =
- (Uint8 *) dst->pixels + dstrect->y * dst->pitch +
- dstrect->x * dst->format->BytesPerPixel;
-
- switch (dst->format->BytesPerPixel) {
- case 1:
- {
- color |= (color << 8);
- color |= (color << 16);
-#ifdef __SSE__
- if (SDL_HasSSE()) {
- SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
- dstrect->h);
- break;
- }
-#endif
-#ifdef __MMX__
- if (SDL_HasMMX()) {
- SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
- dstrect->h);
- break;
- }
-#endif
- SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
- break;
- }
-
- case 2:
- {
- color |= (color << 16);
-#ifdef __SSE__
- if (SDL_HasSSE()) {
- SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
- dstrect->h);
- break;
- }
-#endif
-#ifdef __MMX__
- if (SDL_HasMMX()) {
- SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
- dstrect->h);
- break;
- }
-#endif
- SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
- break;
- }
-
- case 3:
- /* 24-bit RGB is a slow path, at least for now. */
- {
- SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
- break;
- }
-
- case 4:
- {
-#ifdef __SSE__
- if (SDL_HasSSE()) {
- SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
- dstrect->h);
- break;
- }
-#endif
-#ifdef __MMX__
- if (SDL_HasMMX()) {
- SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
- dstrect->h);
- break;
- }
-#endif
- SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
- break;
- }
- }
-
- SDL_UnlockSurface(dst);
-
- /* We're done! */
- return (0);
-}
-
/*
* Lock a surface to directly access the pixels
*/