src/video/SDL_yuv_mmx.c
changeset 1038 29d7db09776e
parent 949 e0d96eb0af19
child 1148 63fb2da89a4b
--- a/src/video/SDL_yuv_mmx.c	Sat Feb 12 19:39:08 2005 +0000
+++ b/src/video/SDL_yuv_mmx.c	Sun Feb 13 07:10:02 2005 +0000
@@ -30,29 +30,42 @@
 
 #include "SDL_types.h"
 
-static unsigned int  MMX_0080w[]    = {0x00800080, 0x00800080};
-static unsigned int  MMX_00FFw[]    = {0x00ff00ff, 0x00ff00ff}; 
-static unsigned int  MMX_FF00w[]    = {0xff00ff00, 0xff00ff00}; 
+#if __GNUC__ > 2
+#    undef GCC2_HACK
+#else
+#    define GCC2_HACK
+#endif
 
-static unsigned short MMX_Ycoeff[]  = {0x004a, 0x004a, 0x004a, 0x004a}; 
-
-static unsigned short MMX_UbluRGB[] = {0x0072, 0x0072, 0x0072, 0x0072};    
-static unsigned short MMX_VredRGB[] = {0x0059, 0x0059, 0x0059, 0x0059};  
-static unsigned short MMX_UgrnRGB[] = {0xffea, 0xffea, 0xffea, 0xffea}; 
-static unsigned short MMX_VgrnRGB[] = {0xffd2, 0xffd2, 0xffd2, 0xffd2};  
+ 
+#if defined(GCC2_HACK) && defined (__ELF__)
+#define ASM_VAR(X) _##X
+#else
+#define ASM_VAR(X) X
+#endif
+ 
+static volatile unsigned int  ASM_VAR(MMX_0080w)[]    = {0x00800080, 0x00800080};
+static volatile unsigned int  ASM_VAR(MMX_00FFw)[]    = {0x00ff00ff, 0x00ff00ff}; 
+static volatile unsigned int  ASM_VAR(MMX_FF00w)[]    = {0xff00ff00, 0xff00ff00}; 
 
-static unsigned short MMX_Ublu5x5[] = {0x0081, 0x0081, 0x0081, 0x0081};
-static unsigned short MMX_Vred5x5[] = {0x0066, 0x0066, 0x0066, 0x0066};
-static unsigned short MMX_Ugrn555[] = {0xffe7, 0xffe7, 0xffe7, 0xffe7};
-static unsigned short MMX_Vgrn555[] = {0xffcc, 0xffcc, 0xffcc, 0xffcc};
-static unsigned short MMX_Ugrn565[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8};
-static unsigned short MMX_Vgrn565[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd};
+static volatile unsigned short ASM_VAR(MMX_Ycoeff)[]  = {0x004a, 0x004a, 0x004a, 0x004a}; 
+
+static volatile unsigned short ASM_VAR(MMX_UbluRGB)[] = {0x0072, 0x0072, 0x0072, 0x0072};    
+static volatile unsigned short ASM_VAR(MMX_VredRGB)[] = {0x0059, 0x0059, 0x0059, 0x0059};  
+static volatile unsigned short ASM_VAR(MMX_UgrnRGB)[] = {0xffea, 0xffea, 0xffea, 0xffea}; 
+static volatile unsigned short ASM_VAR(MMX_VgrnRGB)[] = {0xffd2, 0xffd2, 0xffd2, 0xffd2};  
 
-static unsigned short MMX_red555[]  = {0x7c00, 0x7c00, 0x7c00, 0x7c00};
-static unsigned short MMX_red565[]  = {0xf800, 0xf800, 0xf800, 0xf800};
-static unsigned short MMX_grn555[]  = {0x03e0, 0x03e0, 0x03e0, 0x03e0};
-static unsigned short MMX_grn565[]  = {0x07e0, 0x07e0, 0x07e0, 0x07e0};
-static unsigned short MMX_blu5x5[]  = {0x001f, 0x001f, 0x001f, 0x001f};
+static volatile unsigned short ASM_VAR(MMX_Ublu5x5)[] = {0x0081, 0x0081, 0x0081, 0x0081};
+static volatile unsigned short ASM_VAR(MMX_Vred5x5)[] = {0x0066, 0x0066, 0x0066, 0x0066};
+static volatile unsigned short ASM_VAR(MMX_Ugrn555)[] = {0xffe7, 0xffe7, 0xffe7, 0xffe7};
+static volatile unsigned short ASM_VAR(MMX_Vgrn555)[] = {0xffcc, 0xffcc, 0xffcc, 0xffcc};
+static volatile unsigned short ASM_VAR(MMX_Ugrn565)[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8};
+static volatile unsigned short ASM_VAR(MMX_Vgrn565)[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd};
+
+static volatile unsigned short ASM_VAR(MMX_red555)[]  = {0x7c00, 0x7c00, 0x7c00, 0x7c00};
+static volatile unsigned short ASM_VAR(MMX_red565)[]  = {0xf800, 0xf800, 0xf800, 0xf800};
+static volatile unsigned short ASM_VAR(MMX_grn555)[]  = {0x03e0, 0x03e0, 0x03e0, 0x03e0};
+static volatile unsigned short ASM_VAR(MMX_grn565)[]  = {0x07e0, 0x07e0, 0x07e0, 0x07e0};
+static volatile unsigned short ASM_VAR(MMX_blu5x5)[]  = {0x001f, 0x001f, 0x001f, 0x001f};
 
 /**
    This MMX assembler is my first assembler/MMX program ever.
@@ -114,12 +127,21 @@
 		 "movd (%2), %%mm2\n"           //    0  0  0  0 l3 l2 l1 l0
 		 "punpcklbw %%mm7,%%mm1\n" //         0  v3 0  v2 00 v1 00 v0
 		 "punpckldq %%mm1,%%mm1\n" //         00 v1 00 v0 00 v1 00 v0
+#ifdef GCC2_HACK
+		 "psubw _MMX_0080w,%%mm1\n"  // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 
+#else
 		 "psubw %[_MMX_0080w],%%mm1\n"  // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 
+#endif
 
 		 // create Cr_g (result in mm0)
 		 "movq %%mm1,%%mm0\n"           // r1 r1 r0 r0 r1 r1 r0 r0
+#ifdef GCC2_HACK
+		 "pmullw _MMX_VgrnRGB,%%mm0\n"// red*-46dec=0.7136*64
+		 "pmullw _MMX_VredRGB,%%mm1\n"// red*89dec=1.4013*64
+#else
 		 "pmullw %[_MMX_VgrnRGB],%%mm0\n"// red*-46dec=0.7136*64
 		 "pmullw %[_MMX_VredRGB],%%mm1\n"// red*89dec=1.4013*64
+#endif
 		 "psraw  $6, %%mm0\n"           // red=red/64
 		 "psraw  $6, %%mm1\n"           // red=red/64
 		 
@@ -128,8 +150,13 @@
 		 "movq (%2,%4),%%mm3\n"         //    0  0  0  0 L3 L2 L1 L0
 		 "punpckldq %%mm3,%%mm2\n"      //   L3 L2 L1 L0 l3 l2 l1 l0
 		 "movq %%mm2,%%mm4\n"           //   L3 L2 L1 L0 l3 l2 l1 l0
+#ifdef GCC2_HACK
+		 "pand _MMX_FF00w,%%mm2\n"      //   L3 0  L1  0 l3  0 l1  0
+		 "pand _MMX_00FFw,%%mm4\n"      //   0  L2  0 L0  0 l2  0 l0
+#else
 		 "pand %[_MMX_FF00w],%%mm2\n"      //   L3 0  L1  0 l3  0 l1  0
 		 "pand %[_MMX_00FFw],%%mm4\n"      //   0  L2  0 L0  0 l2  0 l0
+#endif
 		 "psrlw $8,%%mm2\n"             //   0  L3  0 L1  0 l3  0 l1
 
 		 // create R (result in mm6)
@@ -146,11 +173,20 @@
 		 "movd (%1), %%mm1\n"      //         0  0  0  0  u3 u2 u1 u0
 		 "punpcklbw %%mm7,%%mm1\n" //         0  u3 0  u2 00 u1 00 u0
 		 "punpckldq %%mm1,%%mm1\n" //         00 u1 00 u0 00 u1 00 u0
+#ifdef GCC2_HACK
+		 "psubw _MMX_0080w,%%mm1\n"  // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 
+#else
 		 "psubw %[_MMX_0080w],%%mm1\n"  // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 
+#endif
 		 // create Cb_g (result in mm5)
 		 "movq %%mm1,%%mm5\n"            // u1 u1 u0 u0 u1 u1 u0 u0
+#ifdef GCC2_HACK
+		 "pmullw _MMX_UgrnRGB,%%mm5\n"    // blue*-109dec=1.7129*64
+		 "pmullw _MMX_UbluRGB,%%mm1\n"    // blue*114dec=1.78125*64
+#else
 		 "pmullw %[_MMX_UgrnRGB],%%mm5\n"    // blue*-109dec=1.7129*64
 		 "pmullw %[_MMX_UbluRGB],%%mm1\n"    // blue*114dec=1.78125*64
+#endif
 		 "psraw  $6, %%mm5\n"            // blue=red/64
 		 "psraw  $6, %%mm1\n"            // blue=blue/64
 
@@ -232,15 +268,17 @@
 		 "popl %%ebx\n"
 		 :
 		 : "m" (cr), "r"(cb),"r"(lum),
-		   "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
-                   [_MMX_0080w] "m" (*MMX_0080w),
-                   [_MMX_00FFw] "m" (*MMX_00FFw),
-                   [_MMX_FF00w] "m" (*MMX_FF00w),
-                   [_MMX_VgrnRGB] "m" (*MMX_VgrnRGB),
-                   [_MMX_VredRGB] "m" (*MMX_VredRGB),
-                   [_MMX_UgrnRGB] "m" (*MMX_UgrnRGB),
-                   [_MMX_UbluRGB] "m" (*MMX_UbluRGB)
-		 );
+		 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod)
+#ifndef GCC2_HACK
+		 ,[_MMX_0080w] "m" (*MMX_0080w),
+		 [_MMX_00FFw] "m" (*MMX_00FFw),
+		 [_MMX_FF00w] "m" (*MMX_FF00w),
+		 [_MMX_VgrnRGB] "m" (*MMX_VgrnRGB),
+		 [_MMX_VredRGB] "m" (*MMX_VredRGB),
+		 [_MMX_UgrnRGB] "m" (*MMX_UgrnRGB),
+		 [_MMX_UbluRGB] "m" (*MMX_UbluRGB)
+#endif
+			 );
 }
 
 void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
@@ -269,21 +307,48 @@
          "movd           (%%ebx),                %%mm1\n" // 4 Cr                0  0  0  0 v3 v2 v1 v0
          "punpcklbw      %%mm7,                  %%mm0\n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
          "punpcklbw      %%mm7,                  %%mm1\n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
+#ifdef GCC2_HACK
+         "psubw          _MMX_0080w,             %%mm0\n"
+         "psubw          _MMX_0080w,             %%mm1\n"
+#else
          "psubw          %[_MMX_0080w],             %%mm0\n"
          "psubw          %[_MMX_0080w],             %%mm1\n"
+#endif
          "movq           %%mm0,                  %%mm2\n" // Cb                   0 u3  0 u2  0 u1  0 u0
          "movq           %%mm1,                  %%mm3\n" // Cr
+#ifdef GCC2_HACK
+         "pmullw         _MMX_Ugrn565,           %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
+#else
          "pmullw         %[_MMX_Ugrn565],           %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
+#endif
          "movq           (%2),                   %%mm6\n" // L1      l7 L6 L5 L4 L3 L2 L1 L0
+#ifdef GCC2_HACK
+         "pmullw         _MMX_Ublu5x5,           %%mm0\n" // Cb2blue
+         "pand           _MMX_00FFw,             %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
+         "pmullw         _MMX_Vgrn565,           %%mm3\n" // Cr2green
+#else
          "pmullw         %[_MMX_Ublu5x5],           %%mm0\n" // Cb2blue
          "pand           %[_MMX_00FFw],             %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
          "pmullw         %[_MMX_Vgrn565],           %%mm3\n" // Cr2green
+#endif
          "movq           (%2),                   %%mm7\n" // L2
+#ifdef GCC2_HACK
+         "pmullw         _MMX_Vred5x5,           %%mm1\n" // Cr2red
+#else
          "pmullw         %[_MMX_Vred5x5],           %%mm1\n" // Cr2red
+#endif
          "psrlw          $8,                     %%mm7\n"        // L2           00 L7 00 L5 00 L3 00 L1
+#ifdef GCC2_HACK
+         "pmullw         _MMX_Ycoeff,            %%mm6\n" // lum1
+#else
          "pmullw         %[_MMX_Ycoeff],            %%mm6\n" // lum1
+#endif
          "paddw          %%mm3,                  %%mm2\n" // Cb2green + Cr2green == green
+#ifdef GCC2_HACK
+         "pmullw         _MMX_Ycoeff,            %%mm7\n" // lum2
+#else
          "pmullw         %[_MMX_Ycoeff],            %%mm7\n" // lum2
+#endif
 
          "movq           %%mm6,                  %%mm4\n" // lum1
          "paddw          %%mm0,                  %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
@@ -301,11 +366,20 @@
          "punpcklbw      %%mm4,                  %%mm4\n"
          "punpcklbw      %%mm5,                  %%mm5\n"
 
+#ifdef GCC2_HACK
+         "pand           _MMX_red565,            %%mm4\n"
+#else
          "pand           %[_MMX_red565],            %%mm4\n"
+#endif
          "psllw          $3,                     %%mm5\n" // GREEN       1
          "punpcklbw      %%mm6,                  %%mm6\n"
+#ifdef GCC2_HACK
+         "pand           _MMX_grn565,            %%mm5\n"
+         "pand           _MMX_red565,            %%mm6\n"
+#else
          "pand           %[_MMX_grn565],            %%mm5\n"
          "pand           %[_MMX_red565],            %%mm6\n"
+#endif
          "por            %%mm5,                  %%mm4\n" //
          "psrlw          $11,                    %%mm6\n" // BLUE        1
          "movq           %%mm3,                  %%mm5\n" // lum2
@@ -319,23 +393,44 @@
          "packuswb       %%mm3,                  %%mm3\n"
          "packuswb       %%mm5,                  %%mm5\n"
          "packuswb       %%mm7,                  %%mm7\n"
+#ifdef GCC2_HACK
+         "pand           _MMX_00FFw,             %%mm6\n" // L3
+#else
          "pand           %[_MMX_00FFw],             %%mm6\n" // L3
+#endif
          "punpcklbw      %%mm3,                  %%mm3\n"
          "punpcklbw      %%mm5,                  %%mm5\n"
+#ifdef GCC2_HACK
+         "pmullw         _MMX_Ycoeff,            %%mm6\n" // lum3
+#else
          "pmullw         %[_MMX_Ycoeff],            %%mm6\n" // lum3
+#endif
          "punpcklbw      %%mm7,                  %%mm7\n"
          "psllw          $3,                     %%mm5\n" // GREEN 2
+#ifdef GCC2_HACK
+         "pand           _MMX_red565,            %%mm7\n"
+         "pand           _MMX_red565,            %%mm3\n"
+#else
          "pand           %[_MMX_red565],            %%mm7\n"
          "pand           %[_MMX_red565],            %%mm3\n"
+#endif
          "psrlw          $11,                    %%mm7\n" // BLUE  2
+#ifdef GCC2_HACK
+         "pand           _MMX_grn565,            %%mm5\n"
+#else
          "pand           %[_MMX_grn565],            %%mm5\n"
+#endif
          "por            %%mm7,                  %%mm3\n"
          "movq           (%2,%4),                %%mm7\n" // L4 load lum2
          "por            %%mm5,                  %%mm3\n" //
          "psrlw          $8,                     %%mm7\n" // L4
          "movq           %%mm4,                  %%mm5\n"
          "punpcklwd      %%mm3,                  %%mm4\n"
+#ifdef GCC2_HACK
+         "pmullw         _MMX_Ycoeff,            %%mm7\n" // lum4
+#else
          "pmullw         %[_MMX_Ycoeff],            %%mm7\n" // lum4
+#endif
          "punpckhwd      %%mm3,                  %%mm5\n"
 
          "movq           %%mm4,                  (%3)\n"  // write row1
@@ -362,11 +457,20 @@
          "punpcklbw      %%mm5,                  %%mm5\n"
          "punpcklbw      %%mm6,                  %%mm6\n"
          "psllw          $3,                     %%mm5\n" // GREEN 3
+#ifdef GCC2_HACK
+         "pand           _MMX_red565,            %%mm4\n"
+#else
          "pand           %[_MMX_red565],            %%mm4\n"
+#endif
          "psraw          $6,                     %%mm3\n" // psr 6
          "psraw          $6,                     %%mm0\n"
+#ifdef GCC2_HACK
+         "pand           _MMX_red565,            %%mm6\n" // BLUE
+         "pand           _MMX_grn565,            %%mm5\n"
+#else
          "pand           %[_MMX_red565],            %%mm6\n" // BLUE
          "pand           %[_MMX_grn565],            %%mm5\n"
+#endif
          "psrlw          $11,                    %%mm6\n" // BLUE  3
          "por            %%mm5,                  %%mm4\n"
          "psraw          $6,                     %%mm7\n"
@@ -377,11 +481,20 @@
          "punpcklbw      %%mm3,                  %%mm3\n"
          "punpcklbw      %%mm0,                  %%mm0\n"
          "punpcklbw      %%mm7,                  %%mm7\n"
+#ifdef GCC2_HACK
+         "pand           _MMX_red565,            %%mm3\n"
+         "pand           _MMX_red565,            %%mm7\n" // BLUE
+#else
          "pand           %[_MMX_red565],            %%mm3\n"
          "pand           %[_MMX_red565],            %%mm7\n" // BLUE
+#endif
          "psllw          $3,                     %%mm0\n" // GREEN 4
          "psrlw          $11,                    %%mm7\n"
+#ifdef GCC2_HACK
+         "pand           _MMX_grn565,            %%mm0\n"
+#else
          "pand           %[_MMX_grn565],            %%mm0\n"
+#endif
          "por            %%mm7,                  %%mm3\n"
          "por            %%mm0,                  %%mm3\n"
 
@@ -411,10 +524,11 @@
 	 "jl             1b\n"
          "emms\n"
 	 "popl %%ebx\n"
-	:
-	:"m" (cr), "r"(cb),"r"(lum),
-	 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
-	 [_MMX_0080w] "m" (*MMX_0080w),
+         :
+         :"m" (cr), "r"(cb),"r"(lum),
+	 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod)
+#ifndef GCC2_HACK
+	 ,[_MMX_0080w] "m" (*MMX_0080w),
 	 [_MMX_Ugrn565] "m" (*MMX_Ugrn565),
 	 [_MMX_Ublu5x5] "m" (*MMX_Ublu5x5),
 	 [_MMX_00FFw] "m" (*MMX_00FFw),
@@ -423,7 +537,10 @@
 	 [_MMX_Ycoeff] "m" (*MMX_Ycoeff),
 	 [_MMX_red565] "m" (*MMX_red565),
 	 [_MMX_grn565] "m" (*MMX_grn565)
-         );
+#endif
+		 );
 }
 
+#undef GCC2_HACK
+
 #endif /* GCC i386 inline assembly */