--- a/src/video/SDL_RLEaccel.c Tue Jan 13 03:53:22 2009 +0000
+++ b/src/video/SDL_RLEaccel.c Tue Jan 13 07:20:55 2009 +0000
@@ -91,15 +91,6 @@
#include "SDL_blit.h"
#include "SDL_RLEaccel_c.h"
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES
-#define MMX_ASMBLIT
-#endif
-
-#ifdef MMX_ASMBLIT
-#include "mmx.h"
-#include "SDL_cpuinfo.h"
-#endif
-
#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
@@ -123,262 +114,6 @@
#define OPAQUE_BLIT(to, from, length, bpp, alpha) \
PIXEL_COPY(to, from, length, bpp)
-#ifdef MMX_ASMBLIT
-
-#define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha) \
- do { \
- Uint32 *srcp = (Uint32 *)(from); \
- Uint32 *dstp = (Uint32 *)(to); \
- int i = 0x00FF00FF; \
- movd_m2r(*(&i), mm3); \
- punpckldq_r2r(mm3, mm3); \
- i = 0xFF000000; \
- movd_m2r(*(&i), mm7); \
- punpckldq_r2r(mm7, mm7); \
- i = alpha | alpha << 16; \
- movd_m2r(*(&i), mm4); \
- punpckldq_r2r(mm4, mm4); \
- pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */ \
- pxor_r2r(mm7, mm5); /* make clear alpha mask */ \
- i = length; \
- if(i & 1) { \
- movd_m2r((*srcp), mm1); /* src -> mm1 */ \
- punpcklbw_r2r(mm1, mm1); \
- pand_r2r(mm3, mm1); \
- movd_m2r((*dstp), mm2); /* dst -> mm2 */ \
- punpcklbw_r2r(mm2, mm2); \
- pand_r2r(mm3, mm2); \
- psubw_r2r(mm2, mm1); \
- pmullw_r2r(mm4, mm1); \
- psrlw_i2r(8, mm1); \
- paddw_r2r(mm1, mm2); \
- pand_r2r(mm3, mm2); \
- packuswb_r2r(mm2, mm2); \
- pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \
- movd_r2m(mm2, *dstp); \
- ++srcp; \
- ++dstp; \
- i--; \
- } \
- for(; i > 0; --i) { \
- movq_m2r((*srcp), mm0); \
- movq_r2r(mm0, mm1); \
- punpcklbw_r2r(mm0, mm0); \
- movq_m2r((*dstp), mm2); \
- punpckhbw_r2r(mm1, mm1); \
- movq_r2r(mm2, mm6); \
- pand_r2r(mm3, mm0); \
- punpcklbw_r2r(mm2, mm2); \
- pand_r2r(mm3, mm1); \
- punpckhbw_r2r(mm6, mm6); \
- pand_r2r(mm3, mm2); \
- psubw_r2r(mm2, mm0); \
- pmullw_r2r(mm4, mm0); \
- pand_r2r(mm3, mm6); \
- psubw_r2r(mm6, mm1); \
- pmullw_r2r(mm4, mm1); \
- psrlw_i2r(8, mm0); \
- paddw_r2r(mm0, mm2); \
- psrlw_i2r(8, mm1); \
- paddw_r2r(mm1, mm6); \
- pand_r2r(mm3, mm2); \
- pand_r2r(mm3, mm6); \
- packuswb_r2r(mm2, mm2); \
- packuswb_r2r(mm6, mm6); \
- psrlq_i2r(32, mm2); \
- psllq_i2r(32, mm6); \
- por_r2r(mm6, mm2); \
- pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \
- movq_r2m(mm2, *dstp); \
- srcp += 2; \
- dstp += 2; \
- i--; \
- } \
- emms(); \
- } while(0)
-
-#define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha) \
- do { \
- int i, n = 0; \
- Uint16 *srcp = (Uint16 *)(from); \
- Uint16 *dstp = (Uint16 *)(to); \
- Uint32 ALPHA = 0xF800; \
- movd_m2r(*(&ALPHA), mm1); \
- punpcklwd_r2r(mm1, mm1); \
- punpcklwd_r2r(mm1, mm1); \
- ALPHA = 0x07E0; \
- movd_m2r(*(&ALPHA), mm4); \
- punpcklwd_r2r(mm4, mm4); \
- punpcklwd_r2r(mm4, mm4); \
- ALPHA = 0x001F; \
- movd_m2r(*(&ALPHA), mm7); \
- punpcklwd_r2r(mm7, mm7); \
- punpcklwd_r2r(mm7, mm7); \
- alpha &= ~(1+2+4); \
- i = (Uint32)alpha | (Uint32)alpha << 16; \
- movd_m2r(*(&i), mm0); \
- punpckldq_r2r(mm0, mm0); \
- ALPHA = alpha >> 3; \
- i = ((int)(length) & 3); \
- for(; i > 0; --i) { \
- Uint32 s = *srcp++; \
- Uint32 d = *dstp; \
- s = (s | s << 16) & 0x07e0f81f; \
- d = (d | d << 16) & 0x07e0f81f; \
- d += (s - d) * ALPHA >> 5; \
- d &= 0x07e0f81f; \
- *dstp++ = d | d >> 16; \
- n++; \
- } \
- i = (int)(length) - n; \
- for(; i > 0; --i) { \
- movq_m2r((*dstp), mm3); \
- movq_m2r((*srcp), mm2); \
- movq_r2r(mm2, mm5); \
- pand_r2r(mm1 , mm5); \
- psrlq_i2r(11, mm5); \
- movq_r2r(mm3, mm6); \
- pand_r2r(mm1 , mm6); \
- psrlq_i2r(11, mm6); \
- psubw_r2r(mm6, mm5); \
- pmullw_r2r(mm0, mm5); \
- psrlw_i2r(8, mm5); \
- paddw_r2r(mm5, mm6); \
- psllq_i2r(11, mm6); \
- pand_r2r(mm1, mm6); \
- movq_r2r(mm4, mm5); \
- por_r2r(mm7, mm5); \
- pand_r2r(mm5, mm3); \
- por_r2r(mm6, mm3); \
- movq_r2r(mm2, mm5); \
- pand_r2r(mm4 , mm5); \
- psrlq_i2r(5, mm5); \
- movq_r2r(mm3, mm6); \
- pand_r2r(mm4 , mm6); \
- psrlq_i2r(5, mm6); \
- psubw_r2r(mm6, mm5); \
- pmullw_r2r(mm0, mm5); \
- psrlw_i2r(8, mm5); \
- paddw_r2r(mm5, mm6); \
- psllq_i2r(5, mm6); \
- pand_r2r(mm4, mm6); \
- movq_r2r(mm1, mm5); \
- por_r2r(mm7, mm5); \
- pand_r2r(mm5, mm3); \
- por_r2r(mm6, mm3); \
- movq_r2r(mm2, mm5); \
- pand_r2r(mm7 , mm5); \
- movq_r2r(mm3, mm6); \
- pand_r2r(mm7 , mm6); \
- psubw_r2r(mm6, mm5); \
- pmullw_r2r(mm0, mm5); \
- psrlw_i2r(8, mm5); \
- paddw_r2r(mm5, mm6); \
- pand_r2r(mm7, mm6); \
- movq_r2r(mm1, mm5); \
- por_r2r(mm4, mm5); \
- pand_r2r(mm5, mm3); \
- por_r2r(mm6, mm3); \
- movq_r2m(mm3, *dstp); \
- srcp += 4; \
- dstp += 4; \
- i -= 3; \
- } \
- emms(); \
- } while(0)
-
-#define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha) \
- do { \
- int i, n = 0; \
- Uint16 *srcp = (Uint16 *)(from); \
- Uint16 *dstp = (Uint16 *)(to); \
- Uint32 ALPHA = 0x7C00; \
- movd_m2r(*(&ALPHA), mm1); \
- punpcklwd_r2r(mm1, mm1); \
- punpcklwd_r2r(mm1, mm1); \
- ALPHA = 0x03E0; \
- movd_m2r(*(&ALPHA), mm4); \
- punpcklwd_r2r(mm4, mm4); \
- punpcklwd_r2r(mm4, mm4); \
- ALPHA = 0x001F; \
- movd_m2r(*(&ALPHA), mm7); \
- punpcklwd_r2r(mm7, mm7); \
- punpcklwd_r2r(mm7, mm7); \
- alpha &= ~(1+2+4); \
- i = (Uint32)alpha | (Uint32)alpha << 16; \
- movd_m2r(*(&i), mm0); \
- punpckldq_r2r(mm0, mm0); \
- i = ((int)(length) & 3); \
- ALPHA = alpha >> 3; \
- for(; i > 0; --i) { \
- Uint32 s = *srcp++; \
- Uint32 d = *dstp; \
- s = (s | s << 16) & 0x03e07c1f; \
- d = (d | d << 16) & 0x03e07c1f; \
- d += (s - d) * ALPHA >> 5; \
- d &= 0x03e07c1f; \
- *dstp++ = d | d >> 16; \
- n++; \
- } \
- i = (int)(length) - n; \
- for(; i > 0; --i) { \
- movq_m2r((*dstp), mm3); \
- movq_m2r((*srcp), mm2); \
- movq_r2r(mm2, mm5); \
- pand_r2r(mm1 , mm5); \
- psrlq_i2r(10, mm5); \
- movq_r2r(mm3, mm6); \
- pand_r2r(mm1 , mm6); \
- psrlq_i2r(10, mm6); \
- psubw_r2r(mm6, mm5); \
- pmullw_r2r(mm0, mm5); \
- psrlw_i2r(8, mm5); \
- paddw_r2r(mm5, mm6); \
- psllq_i2r(10, mm6); \
- pand_r2r(mm1, mm6); \
- movq_r2r(mm4, mm5); \
- por_r2r(mm7, mm5); \
- pand_r2r(mm5, mm3); \
- por_r2r(mm6, mm3); \
- movq_r2r(mm2, mm5); \
- pand_r2r(mm4 , mm5); \
- psrlq_i2r(5, mm5); \
- movq_r2r(mm3, mm6); \
- pand_r2r(mm4 , mm6); \
- psrlq_i2r(5, mm6); \
- psubw_r2r(mm6, mm5); \
- pmullw_r2r(mm0, mm5); \
- psrlw_i2r(8, mm5); \
- paddw_r2r(mm5, mm6); \
- psllq_i2r(5, mm6); \
- pand_r2r(mm4, mm6); \
- movq_r2r(mm1, mm5); \
- por_r2r(mm7, mm5); \
- pand_r2r(mm5, mm3); \
- por_r2r(mm6, mm3); \
- movq_r2r(mm2, mm5); \
- pand_r2r(mm7 , mm5); \
- movq_r2r(mm3, mm6); \
- pand_r2r(mm7 , mm6); \
- psubw_r2r(mm6, mm5); \
- pmullw_r2r(mm0, mm5); \
- psrlw_i2r(8, mm5); \
- paddw_r2r(mm5, mm6); \
- pand_r2r(mm7, mm6); \
- movq_r2r(mm1, mm5); \
- por_r2r(mm4, mm5); \
- pand_r2r(mm5, mm3); \
- por_r2r(mm6, mm3); \
- movq_r2m(mm3, *dstp); \
- srcp += 4; \
- dstp += 4; \
- i -= 3; \
- } \
- emms(); \
- } while(0)
-
-#endif
-
/*
* For 32bpp pixels on the form 0x00rrggbb:
* If we treat the middle component separately, we can process the two
@@ -504,48 +239,6 @@
} \
} while(0)
-#ifdef MMX_ASMBLIT
-
-#define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha) \
- do { \
- Uint32 *srcp = (Uint32 *)(from); \
- Uint32 *dstp = (Uint32 *)(to); \
- int i = 0x00fefefe; \
- movd_m2r(*(&i), mm4); \
- punpckldq_r2r(mm4, mm4); \
- i = 0x00010101; \
- movd_m2r(*(&i), mm3); \
- punpckldq_r2r(mm3, mm3); \
- i = (int)(length); \
- if( i & 1 ) { \
- Uint32 s = *srcp++; \
- Uint32 d = *dstp; \
- *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \
- + (s & d & 0x00010101); \
- i--; \
- } \
- for(; i > 0; --i) { \
- movq_m2r((*dstp), mm2); /* dst -> mm2 */ \
- movq_r2r(mm2, mm6); /* dst -> mm6 */ \
- movq_m2r((*srcp), mm1); /* src -> mm1 */ \
- movq_r2r(mm1, mm5); /* src -> mm5 */ \
- pand_r2r(mm4, mm6); /* dst & 0x00fefefe -> mm6 */ \
- pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */ \
- paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */ \
- psrld_i2r(1, mm5); \
- pand_r2r(mm1, mm2); /* s & d -> mm2 */ \
- pand_r2r(mm3, mm2); /* s & d & 0x00010101 -> mm2 */ \
- paddd_r2r(mm5, mm2); \
- movq_r2m(mm2, (*dstp)); \
- dstp += 2; \
- srcp += 2; \
- i--; \
- } \
- emms(); \
- } while(0)
-
-#endif
-
/*
* Special case: 50% alpha (alpha=128)
* This is treated specially because it can be optimized very well, and
@@ -617,94 +310,6 @@
#define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \
ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
-#ifdef MMX_ASMBLIT
-
-#define CHOOSE_BLIT(blitter, alpha, fmt) \
- do { \
- if(alpha == 255) { \
- switch(fmt->BytesPerPixel) { \
- case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \
- case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \
- case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \
- case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \
- } \
- } else { \
- switch(fmt->BytesPerPixel) { \
- case 1: \
- /* No 8bpp alpha blitting */ \
- break; \
- \
- case 2: \
- switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \
- case 0xffff: \
- if(fmt->Gmask == 0x07e0 \
- || fmt->Rmask == 0x07e0 \
- || fmt->Bmask == 0x07e0) { \
- if(alpha == 128) \
- blitter(2, Uint8, ALPHA_BLIT16_565_50); \
- else { \
- if(SDL_HasMMX()) \
- blitter(2, Uint8, ALPHA_BLIT16_565MMX); \
- else \
- blitter(2, Uint8, ALPHA_BLIT16_565); \
- } \
- } else \
- goto general16; \
- break; \
- \
- case 0x7fff: \
- if(fmt->Gmask == 0x03e0 \
- || fmt->Rmask == 0x03e0 \
- || fmt->Bmask == 0x03e0) { \
- if(alpha == 128) \
- blitter(2, Uint8, ALPHA_BLIT16_555_50); \
- else { \
- if(SDL_HasMMX()) \
- blitter(2, Uint8, ALPHA_BLIT16_555MMX); \
- else \
- blitter(2, Uint8, ALPHA_BLIT16_555); \
- } \
- break; \
- } \
- /* fallthrough */ \
- \
- default: \
- general16: \
- blitter(2, Uint8, ALPHA_BLIT_ANY); \
- } \
- break; \
- \
- case 3: \
- blitter(3, Uint8, ALPHA_BLIT_ANY); \
- break; \
- \
- case 4: \
- if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
- && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
- || fmt->Bmask == 0xff00)) { \
- if(alpha == 128) \
- { \
- if(SDL_HasMMX()) \
- blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\
- else \
- blitter(4, Uint16, ALPHA_BLIT32_888_50);\
- } \
- else \
- { \
- if(SDL_HasMMX()) \
- blitter(4, Uint16, ALPHA_BLIT32_888MMX);\
- else \
- blitter(4, Uint16, ALPHA_BLIT32_888); \
- } \
- } else \
- blitter(4, Uint16, ALPHA_BLIT_ANY); \
- break; \
- } \
- } \
- } while(0)
-
-#else
-
#define CHOOSE_BLIT(blitter, alpha, fmt) \
do { \
if(alpha == 255) { \
@@ -773,8 +378,6 @@
} \
} while(0)
-#endif
-
/*
* This takes care of the case when the surface is clipped on the left and/or
* right. Top clipping has already been taken care of.