Replaced SDL_memset4() implementation with a call to SDL_memset().
authorRyan C. Gordon <icculus@icculus.org>
Mon, 08 Jul 2013 23:22:36 -0400
changeset 7382 898992405fa7
parent 7381 aa0d6c66e3fa
child 7383 97d080ba383f
Replaced SDL_memset4() implementation with a call to SDL_memset(). The implementation was slower than the C runtime on Mac OS X, Linux, and Windows...quite a bit slower when using the C fallback instead of the inline asm, too. Fixes Bugzilla #1755.
include/SDL_stdinc.h
--- a/include/SDL_stdinc.h	Mon Jul 08 17:51:17 2013 -0400
+++ b/include/SDL_stdinc.h	Mon Jul 08 23:22:36 2013 -0400
@@ -257,33 +257,9 @@
 /* Note that the semantics are different from memset() in that this is a 32-bit assignment */
 SDL_FORCE_INLINE void SDL_memset4(void *dst, int val, size_t dwords)
 {
-#if defined(__GNUC__) && defined(i386)
-    int u0, u1, u2;
-    __asm__ __volatile__ (
-        "cld \n\t"
-        "rep ; stosl \n\t"
-        : "=&D" (u0), "=&a" (u1), "=&c" (u2)
-        : "0" (dst), "1" (val), "2" (SDL_static_cast(Uint32, dwords))
-        : "memory"
-    );
-#else
-    size_t _n = (dwords + 3) / 4;
-    Uint32 *_p = SDL_static_cast(Uint32 *, dst);
-    Uint32 _val = (val);
-    if (dwords == 0)
-        return;
-    switch (dwords % 4)
-    {
-        case 0: do {    *_p++ = _val;
-        case 3:         *_p++ = _val;
-        case 2:         *_p++ = _val;
-        case 1:         *_p++ = _val;
-        } while ( --_n );
-    }
-#endif
+    SDL_memset(dst, val, dwords * 4);
 }
 
-
 extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src, size_t len);
 
 SDL_FORCE_INLINE void *SDL_memcpy4(void *dst, const void *src, size_t dwords)