Fixed bug #1090 (SDL_BlitCopyOverlap() assumes memcpy() operates in order)
authorSam Lantinga <slouken@libsdl.org>
Wed, 16 Feb 2011 15:25:10 -0800
changeset 5325 b9c224e16859
parent 5324 cc606b34bd97
child 5326 4a4095fe12e3
Fixed bug #1090 (SDL_BlitCopyOverlap() assumes memcpy() operates in order) Even if we're blitting between two different surfaces their pixels might still overlap, because of SDL_CreateRGBSurfaceFrom(), so always use SDL_BlitCopy() and check for overlap in that function. When handling overlapping surfaces, don't assume that memcpy() iterates forward, instead use memmove() correctly, and provide a fallback implementation of SDL_memmove() that handles the different cases. Fixed a bug with SDL_memset() not completely filling lengths that aren't a multiple of 4. Optimized SDL_memcpy() a bit using the same technique as SDL_memset().
include/SDL_stdinc.h
src/stdlib/SDL_string.c
src/video/SDL_blit.c
src/video/SDL_blit_copy.c
src/video/SDL_blit_copy.h
--- a/include/SDL_stdinc.h	Wed Feb 16 14:38:49 2011 -0800
+++ b/include/SDL_stdinc.h	Wed Feb 16 15:25:10 2011 -0800
@@ -352,8 +352,8 @@
 #endif
 
 /* We can count on memcpy existing on Mac OS X and being well-tuned. */
-#if defined(__MACH__) && defined(__APPLE__)
-#define SDL_memcpy(dst, src, len) memcpy(dst, src, len)
+#if defined(__MACOSX__)
+#define SDL_memcpy      memcpy
 #elif defined(__GNUC__) && defined(i386)
 #define SDL_memcpy(dst, src, len)					  \
 do {									  \
@@ -385,8 +385,8 @@
 #endif
 
 /* We can count on memcpy existing on Mac OS X and being well-tuned. */
-#if defined(__MACH__) && defined(__APPLE__)
-#define SDL_memcpy4(dst, src, len) memcpy(dst, src, (len)*4)
+#if defined(__MACOSX__)
+#define SDL_memcpy4(dst, src, len)	SDL_memcpy((dst), (src), (len) << 2)
 #elif defined(__GNUC__) && defined(i386)
 #define SDL_memcpy4(dst, src, len)				\
 do {								\
@@ -400,54 +400,14 @@
 } while(0)
 #endif
 #ifndef SDL_memcpy4
-#define SDL_memcpy4(dst, src, len)	SDL_memcpy(dst, src, (len) << 2)
-#endif
-
-#if defined(__GNUC__) && defined(i386)
-#define SDL_revcpy(dst, src, len)			\
-do {							\
-	int u0, u1, u2;					\
-	char *dstp = SDL_static_cast(char *, dst);			\
-	char *srcp = SDL_static_cast(char *, src);			\
-	int n = (len);					\
-	if ( n >= 4 ) {					\
-	__asm__ __volatile__ (				\
-		"std\n\t"				\
-		"rep ; movsl\n\t"			\
-		"cld\n\t"				\
-		: "=&c" (u0), "=&D" (u1), "=&S" (u2)	\
-		: "0" (n >> 2),				\
-		  "1" (dstp+(n-4)), "2" (srcp+(n-4))	\
-		: "memory" );				\
-	}						\
-	switch (n & 3) {				\
-		case 3: dstp[2] = srcp[2];		\
-		case 2: dstp[1] = srcp[1];		\
-		case 1: dstp[0] = srcp[0];		\
-			break;				\
-		default:				\
-			break;				\
-	}						\
-} while(0)
-#endif
-#ifndef SDL_revcpy
-extern DECLSPEC void *SDLCALL SDL_revcpy(void *dst, const void *src,
-                                         size_t len);
+#define SDL_memcpy4(dst, src, len)	SDL_memcpy((dst), (src), (len) << 2)
 #endif
 
 #ifdef HAVE_MEMMOVE
 #define SDL_memmove     memmove
-#elif defined(HAVE_BCOPY)
-#define SDL_memmove(d, s, n)	bcopy((s), (d), (n))
 #else
-#define SDL_memmove(dst, src, len)			\
-do {							\
-	if ( dst < src ) {				\
-		SDL_memcpy(dst, src, len);		\
-	} else {					\
-		SDL_revcpy(dst, src, len);		\
-	}						\
-} while(0)
+extern DECLSPEC void *SDLCALL SDL_memmove(void *dst, const void *src,
+                                          size_t len);
 #endif
 
 #ifdef HAVE_MEMCMP
--- a/src/stdlib/SDL_string.c	Wed Feb 16 14:38:49 2011 -0800
+++ b/src/stdlib/SDL_string.c	Wed Feb 16 15:25:10 2011 -0800
@@ -265,31 +265,27 @@
 SDL_memset(void *dst, int c, size_t len)
 {
     size_t left = (len % 4);
-    if (len >= 4) {
-        Uint32 value = 0;
-        Uint32 *dstp = (Uint32 *) dst;
-        int i;
-        for (i = 0; i < 4; ++i) {
-            value <<= 8;
-            value |= c;
-        }
-        len /= 4;
-        while (len--) {
-            *dstp++ = value;
-        }
+    Uint32 *dstp4;
+    Uint8 *dstp1;
+    Uint32 value4 = (c | (c << 8) | (c << 16) | (c << 24));
+    Uint8 value1 = (Uint8) c;
+
+    dstp4 = (Uint32 *) dst;
+    len /= 4;
+    while (len--) {
+        *dstp4++ = value4;
     }
-    if (left > 0) {
-        Uint8 value = (Uint8) c;
-        Uint8 *dstp = (Uint8 *) dst;
-        switch (left) {
-        case 3:
-            *dstp++ = value;
-        case 2:
-            *dstp++ = value;
-        case 1:
-            *dstp++ = value;
-        }
+
+    dstp1 = (Uint8 *) dstp4;
+    switch (left) {
+    case 3:
+        *dstp1++ = value1;
+    case 2:
+        *dstp1++ = value1;
+    case 1:
+        *dstp1++ = value1;
     }
+
     return dst;
 }
 #endif
@@ -298,25 +294,49 @@
 void *
 SDL_memcpy(void *dst, const void *src, size_t len)
 {
-    char *srcp = (char *) src;
-    char *dstp = (char *) dst;
+    size_t left = (len % 4);
+    Uint32 *srcp4, *dstp4;
+    Uint8 *srcp1, *dstp1;
+
+    srcp4 = (Uint32 *) src;
+    dstp4 = (Uint32 *) dst;
+    len /= 4;
     while (len--) {
-        *dstp++ = *srcp++;
+        *dstp4++ = *srcp4++;
     }
+
+    srcp1 = (Uint8 *) srcp4;
+    dstp1 = (Uint8 *) dstp4;
+    switch (left) {
+    case 3:
+        *dstp1++ = *srcp1++;
+    case 2:
+        *dstp1++ = *srcp1++;
+    case 1:
+        *dstp1++ = *srcp1++;
+    }
+
     return dst;
 }
 #endif
 
-#ifndef SDL_revcpy
+#ifndef SDL_memmove
 void *
-SDL_revcpy(void *dst, const void *src, size_t len)
+SDL_memmove(void *dst, const void *src, size_t len)
 {
     char *srcp = (char *) src;
     char *dstp = (char *) dst;
-    srcp += len - 1;
-    dstp += len - 1;
-    while (len--) {
-        *dstp-- = *srcp--;
+
+    if (src < dst) {
+        srcp += len - 1;
+        dstp += len - 1;
+        while (len--) {
+            *dstp-- = *srcp--;
+        }
+    } else {
+        while (len--) {
+            *dstp++ = *srcp++;
+        }
     }
     return dst;
 }
--- a/src/video/SDL_blit.c	Wed Feb 16 14:38:49 2011 -0800
+++ b/src/video/SDL_blit.c	Wed Feb 16 15:25:10 2011 -0800
@@ -205,12 +205,7 @@
 
     /* Choose a standard blit function */
     if (map->identity && !(map->info.flags & ~SDL_COPY_RLE_DESIRED)) {
-        /* Handle overlapping blits on the same surface */
-        if (surface == dst) {
-            blit = SDL_BlitCopyOverlap;
-        } else {
-            blit = SDL_BlitCopy;
-        }
+        blit = SDL_BlitCopy;
     } else if (surface->format->BitsPerPixel < 8) {
         blit = SDL_CalculateBlit0(surface);
     } else if (surface->format->BytesPerPixel == 1) {
--- a/src/video/SDL_blit_copy.c	Wed Feb 16 14:38:49 2011 -0800
+++ b/src/video/SDL_blit_copy.c	Wed Feb 16 15:25:10 2011 -0800
@@ -96,6 +96,7 @@
 void
 SDL_BlitCopy(SDL_BlitInfo * info)
 {
+    SDL_bool overlap;
     Uint8 *src, *dst;
     int w, h;
     int srcskip, dstskip;
@@ -107,6 +108,21 @@
     srcskip = info->src_pitch;
     dstskip = info->dst_pitch;
 
+    /* Properly handle overlapping blits */
+    if (src < dst) {
+        overlap = (dst < (src + h*srcskip));
+    } else {
+        overlap = (src < (dst + h*dstskip));
+    }
+    if (overlap) {
+        while (h--) {
+            SDL_memmove(dst, src, w);
+            src += srcskip;
+            dst += dstskip;
+        }
+        return;
+    }
+
 #ifdef __SSE__
     if (SDL_HasSSE() &&
         !((uintptr_t) src & 15) && !(srcskip & 15) &&
@@ -141,29 +157,4 @@
     }
 }
 
-void
-SDL_BlitCopyOverlap(SDL_BlitInfo * info)
-{
-    Uint8 *src, *dst;
-    int w, h;
-    int skip;
-
-    w = info->dst_w * info->dst_fmt->BytesPerPixel;
-    h = info->dst_h;
-    src = info->src;
-    dst = info->dst;
-    skip = info->src_pitch;
-    if ((dst < src) || (dst >= (src + h * skip))) {
-        SDL_BlitCopy(info);
-    } else {
-        src += ((h - 1) * skip);
-        dst += ((h - 1) * skip);
-        while (h--) {
-            SDL_revcpy(dst, src, w);
-            src -= skip;
-            dst -= skip;
-        }
-    }
-}
-
 /* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_blit_copy.h	Wed Feb 16 14:38:49 2011 -0800
+++ b/src/video/SDL_blit_copy.h	Wed Feb 16 15:25:10 2011 -0800
@@ -21,6 +21,5 @@
 */
 
 void SDL_BlitCopy(SDL_BlitInfo * info);
-void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
 
 /* vi: set ts=4 sw=4 expandtab: */