author  Sam Lantinga <slouken@libsdl.org> 
Sun, 13 Feb 2005 07:10:02 +0000  
changeset 1038  29d7db09776e 
parent 949  e0d96eb0af19 
child 1148  63fb2da89a4b 
permissions  rwrr 
0  1 
/* 
2 
SDL  Simple DirectMedia Layer 

769
b8d311d90021
Updated copyright information for 2004 (Happy New Year!)
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset

3 
Copyright (C) 19972004 Sam Lantinga 
0  4 

5 
This library is free software; you can redistribute it and/or 

6 
modify it under the terms of the GNU Library General Public 

7 
License as published by the Free Software Foundation; either 

8 
version 2 of the License, or (at your option) any later version. 

9 

10 
This library is distributed in the hope that it will be useful, 

11 
but WITHOUT ANY WARRANTY; without even the implied warranty of 

12 
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 

13 
Library General Public License for more details. 

14 

15 
You should have received a copy of the GNU Library General Public 

16 
License along with this library; if not, write to the Free 

17 
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 021111307 USA 

18 

19 
Sam Lantinga 

252
e8157fcb3114
Updated the source with the correct email address
Sam Lantinga <slouken@libsdl.org>
parents:
0
diff
changeset

20 
slouken@libsdl.org 
0  21 
*/ 
22 

23 
#ifdef SAVE_RCSID 

24 
static char rcsid = 

25 
"@(#) $Id$"; 

26 
#endif 

27 

28 

29 
#if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) 

30 

31 
#include "SDL_types.h" 

32 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

33 
#if __GNUC__ > 2 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

34 
# undef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

35 
#else 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

36 
# define GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

37 
#endif 
0  38 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

39 

29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

40 
#if defined(GCC2_HACK) && defined (__ELF__) 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

41 
#define ASM_VAR(X) _##X 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

42 
#else 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

43 
#define ASM_VAR(X) X 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

44 
#endif 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

45 

29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

46 
static volatile unsigned int ASM_VAR(MMX_0080w)[] = {0x00800080, 0x00800080}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

47 
static volatile unsigned int ASM_VAR(MMX_00FFw)[] = {0x00ff00ff, 0x00ff00ff}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

48 
static volatile unsigned int ASM_VAR(MMX_FF00w)[] = {0xff00ff00, 0xff00ff00}; 
0  49 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

50 
static volatile unsigned short ASM_VAR(MMX_Ycoeff)[] = {0x004a, 0x004a, 0x004a, 0x004a}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

51 

29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

52 
static volatile unsigned short ASM_VAR(MMX_UbluRGB)[] = {0x0072, 0x0072, 0x0072, 0x0072}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

53 
static volatile unsigned short ASM_VAR(MMX_VredRGB)[] = {0x0059, 0x0059, 0x0059, 0x0059}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

54 
static volatile unsigned short ASM_VAR(MMX_UgrnRGB)[] = {0xffea, 0xffea, 0xffea, 0xffea}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

55 
static volatile unsigned short ASM_VAR(MMX_VgrnRGB)[] = {0xffd2, 0xffd2, 0xffd2, 0xffd2}; 
0  56 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

57 
static volatile unsigned short ASM_VAR(MMX_Ublu5x5)[] = {0x0081, 0x0081, 0x0081, 0x0081}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

58 
static volatile unsigned short ASM_VAR(MMX_Vred5x5)[] = {0x0066, 0x0066, 0x0066, 0x0066}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

59 
static volatile unsigned short ASM_VAR(MMX_Ugrn555)[] = {0xffe7, 0xffe7, 0xffe7, 0xffe7}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

60 
static volatile unsigned short ASM_VAR(MMX_Vgrn555)[] = {0xffcc, 0xffcc, 0xffcc, 0xffcc}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

61 
static volatile unsigned short ASM_VAR(MMX_Ugrn565)[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

62 
static volatile unsigned short ASM_VAR(MMX_Vgrn565)[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

63 

29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

64 
static volatile unsigned short ASM_VAR(MMX_red555)[] = {0x7c00, 0x7c00, 0x7c00, 0x7c00}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

65 
static volatile unsigned short ASM_VAR(MMX_red565)[] = {0xf800, 0xf800, 0xf800, 0xf800}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

66 
static volatile unsigned short ASM_VAR(MMX_grn555)[] = {0x03e0, 0x03e0, 0x03e0, 0x03e0}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

67 
static volatile unsigned short ASM_VAR(MMX_grn565)[] = {0x07e0, 0x07e0, 0x07e0, 0x07e0}; 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

68 
static volatile unsigned short ASM_VAR(MMX_blu5x5)[] = {0x001f, 0x001f, 0x001f, 0x001f}; 
0  69 

70 
/** 

71 
This MMX assembler is my first assembler/MMX program ever. 

72 
Thus it maybe buggy. 

73 
Send patches to: 

74 
mvogt@rhrk.unikl.de 

75 

76 
After it worked fine I have "obfuscated" the code a bit to have 

77 
more parallism in the MMX units. This means I moved 

78 
initilisation around and delayed other instruction. 

79 
Performance measurement did not show that this brought any advantage 

80 
but in theory it _should_ be faster this way. 

81 

82 
The overall performanve gain to the C based dither was 30%40%. 

83 
The MMX routine calculates 256bit=8RGB values in each cycle 

84 
(4 for row1 & 4 for row2) 

85 

86 
The red/green/blue.. coefficents are taken from the mpeg_play 

87 
player. They look nice, but I dont know if you can have 

88 
better values, to avoid integer rounding errors. 

89 

90 

91 
IMPORTANT: 

92 
========== 

93 

94 
It is a requirement that the cr/cb/lum are 8 byte aligned and 

95 
the out are 16byte aligned or you will/may get segfaults 

96 

97 
*/ 

98 

99 
void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, 

100 
unsigned char *lum, unsigned char *cr, 

101 
unsigned char *cb, unsigned char *out, 

102 
int rows, int cols, int mod ) 

103 
{ 

104 
Uint32 *row1; 

105 
Uint32 *row2; 

106 

107 
unsigned char* y = lum +cols*rows; // Pointer to the end 

108 
int x=0; 

109 
row1 = (Uint32 *)out; // 32 bit target 

110 
row2 = (Uint32 *)out+cols+mod; // start of second row 

111 
mod = (mod+cols+mod)*4; // increment for row1 in byte 

112 

113 
__asm__ __volatile__ ( 

114 
/* We don't really care about PIC  the code should be rewritten to use 

115 
relative addressing for the static tables, so right now we take the 

116 
COW hit on the pages this code resides. Big deal. 

117 
This spill is just to reduce register pressure in the PIC case. */ 

118 
"pushl %%ebx\n" 

119 
"movl %0, %%ebx\n" 

120 

121 
".align 8\n" 

122 
"1:\n" 

123 

124 
// create Cr (result in mm1) 

125 
"movd (%%ebx), %%mm1\n" // 0 0 0 0 v3 v2 v1 v0 

126 
"pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00 

127 
"movd (%2), %%mm2\n" // 0 0 0 0 l3 l2 l1 l0 

128 
"punpcklbw %%mm7,%%mm1\n" // 0 v3 0 v2 00 v1 00 v0 

129 
"punpckldq %%mm1,%%mm1\n" // 00 v1 00 v0 00 v1 00 v0 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

130 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

131 
"psubw _MMX_0080w,%%mm1\n" // mm1128:r1 r1 r0 r0 r1 r1 r0 r0 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

132 
#else 
887
b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 0400
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset

133 
"psubw %[_MMX_0080w],%%mm1\n" // mm1128:r1 r1 r0 r0 r1 r1 r0 r0 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

134 
#endif 
0  135 

136 
// create Cr_g (result in mm0) 

137 
"movq %%mm1,%%mm0\n" // r1 r1 r0 r0 r1 r1 r0 r0 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

138 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

139 
"pmullw _MMX_VgrnRGB,%%mm0\n"// red*46dec=0.7136*64 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

140 
"pmullw _MMX_VredRGB,%%mm1\n"// red*89dec=1.4013*64 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

141 
#else 
887
b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 0400
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset

142 
"pmullw %[_MMX_VgrnRGB],%%mm0\n"// red*46dec=0.7136*64 
b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 0400
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset

143 
"pmullw %[_MMX_VredRGB],%%mm1\n"// red*89dec=1.4013*64 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

144 
#endif 
0  145 
"psraw $6, %%mm0\n" // red=red/64 
146 
"psraw $6, %%mm1\n" // red=red/64 

147 

148 
// create L1 L2 (result in mm2,mm4) 

149 
// L2=lum+cols 

150 
"movq (%2,%4),%%mm3\n" // 0 0 0 0 L3 L2 L1 L0 

151 
"punpckldq %%mm3,%%mm2\n" // L3 L2 L1 L0 l3 l2 l1 l0 

152 
"movq %%mm2,%%mm4\n" // L3 L2 L1 L0 l3 l2 l1 l0 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

153 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

154 
"pand _MMX_FF00w,%%mm2\n" // L3 0 L1 0 l3 0 l1 0 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

155 
"pand _MMX_00FFw,%%mm4\n" // 0 L2 0 L0 0 l2 0 l0 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

156 
#else 
887
b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 0400
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset

157 
"pand %[_MMX_FF00w],%%mm2\n" // L3 0 L1 0 l3 0 l1 0 
b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 0400
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset

158 
"pand %[_MMX_00FFw],%%mm4\n" // 0 L2 0 L0 0 l2 0 l0 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

159 
#endif 
0  160 
"psrlw $8,%%mm2\n" // 0 L3 0 L1 0 l3 0 l1 
161 

162 
// create R (result in mm6) 

163 
"movq %%mm2,%%mm5\n" // 0 L3 0 L1 0 l3 0 l1 

164 
"movq %%mm4,%%mm6\n" // 0 L2 0 L0 0 l2 0 l0 

165 
"paddsw %%mm1, %%mm5\n" // lum1+red:x R3 x R1 x r3 x r1 

166 
"paddsw %%mm1, %%mm6\n" // lum1+red:x R2 x R0 x r2 x r0 

167 
"packuswb %%mm5,%%mm5\n" // R3 R1 r3 r1 R3 R1 r3 r1 

168 
"packuswb %%mm6,%%mm6\n" // R2 R0 r2 r0 R2 R0 r2 r0 

169 
"pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00 

170 
"punpcklbw %%mm5,%%mm6\n" // R3 R2 R1 R0 r3 r2 r1 r0 

171 

172 
// create Cb (result in mm1) 

173 
"movd (%1), %%mm1\n" // 0 0 0 0 u3 u2 u1 u0 

174 
"punpcklbw %%mm7,%%mm1\n" // 0 u3 0 u2 00 u1 00 u0 

175 
"punpckldq %%mm1,%%mm1\n" // 00 u1 00 u0 00 u1 00 u0 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

176 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

177 
"psubw _MMX_0080w,%%mm1\n" // mm1128:u1 u1 u0 u0 u1 u1 u0 u0 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

178 
#else 
887
b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 0400
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset

179 
"psubw %[_MMX_0080w],%%mm1\n" // mm1128:u1 u1 u0 u0 u1 u1 u0 u0 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

180 
#endif 
0  181 
// create Cb_g (result in mm5) 
182 
"movq %%mm1,%%mm5\n" // u1 u1 u0 u0 u1 u1 u0 u0 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

183 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

184 
"pmullw _MMX_UgrnRGB,%%mm5\n" // blue*109dec=1.7129*64 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

185 
"pmullw _MMX_UbluRGB,%%mm1\n" // blue*114dec=1.78125*64 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

186 
#else 
887
b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 0400
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset

187 
"pmullw %[_MMX_UgrnRGB],%%mm5\n" // blue*109dec=1.7129*64 
b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 0400
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset

188 
"pmullw %[_MMX_UbluRGB],%%mm1\n" // blue*114dec=1.78125*64 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

189 
#endif 
0  190 
"psraw $6, %%mm5\n" // blue=red/64 
191 
"psraw $6, %%mm1\n" // blue=blue/64 

192 

193 
// create G (result in mm7) 

194 
"movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1 

195 
"movq %%mm4,%%mm7\n" // 0 L2 0 L0 0 l2 0 l1 

196 
"paddsw %%mm5, %%mm3\n" // lum1+Cb_g:x G3t x G1t x g3t x g1t 

197 
"paddsw %%mm5, %%mm7\n" // lum1+Cb_g:x G2t x G0t x g2t x g0t 

198 
"paddsw %%mm0, %%mm3\n" // lum1+Cr_g:x G3 x G1 x g3 x g1 

199 
"paddsw %%mm0, %%mm7\n" // lum1+blue:x G2 x G0 x g2 x g0 

200 
"packuswb %%mm3,%%mm3\n" // G3 G1 g3 g1 G3 G1 g3 g1 

201 
"packuswb %%mm7,%%mm7\n" // G2 G0 g2 g0 G2 G0 g2 g0 

202 
"punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0 

203 

204 
// create B (result in mm5) 

205 
"movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1 

206 
"movq %%mm4,%%mm5\n" // 0 L2 0 L0 0 l2 0 l1 

207 
"paddsw %%mm1, %%mm3\n" // lum1+blue:x B3 x B1 x b3 x b1 

208 
"paddsw %%mm1, %%mm5\n" // lum1+blue:x B2 x B0 x b2 x b0 

209 
"packuswb %%mm3,%%mm3\n" // B3 B1 b3 b1 B3 B1 b3 b1 

210 
"packuswb %%mm5,%%mm5\n" // B2 B0 b2 b0 B2 B0 b2 b0 

211 
"punpcklbw %%mm3,%%mm5\n" // B3 B2 B1 B0 b3 b2 b1 b0 

212 

213 
// fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb) 

214 

215 
"pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 

216 
"pxor %%mm4,%%mm4\n" // 0 0 0 0 0 0 0 0 

217 
"movq %%mm6,%%mm1\n" // R3 R2 R1 R0 r3 r2 r1 r0 

218 
"movq %%mm5,%%mm3\n" // B3 B2 B1 B0 b3 b2 b1 b0 

219 
// process lower lum 

220 
"punpcklbw %%mm4,%%mm1\n" // 0 r3 0 r2 0 r1 0 r0 

221 
"punpcklbw %%mm4,%%mm3\n" // 0 b3 0 b2 0 b1 0 b0 

222 
"movq %%mm1,%%mm2\n" // 0 r3 0 r2 0 r1 0 r0 

223 
"movq %%mm3,%%mm0\n" // 0 b3 0 b2 0 b1 0 b0 

224 
"punpcklwd %%mm1,%%mm3\n" // 0 r1 0 b1 0 r0 0 b0 

225 
"punpckhwd %%mm2,%%mm0\n" // 0 r3 0 b3 0 r2 0 b2 

226 

227 
"pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 

228 
"movq %%mm7,%%mm1\n" // G3 G2 G1 G0 g3 g2 g1 g0 

229 
"punpcklbw %%mm1,%%mm2\n" // g3 0 g2 0 g1 0 g0 0 

230 
"punpcklwd %%mm4,%%mm2\n" // 0 0 g1 0 0 0 g0 0 

231 
"por %%mm3, %%mm2\n" // 0 r1 g1 b1 0 r0 g0 b0 

232 
"movq %%mm2,(%3)\n" // wrote out ! row1 

233 

234 
"pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 

235 
"punpcklbw %%mm1,%%mm4\n" // g3 0 g2 0 g1 0 g0 0 

236 
"punpckhwd %%mm2,%%mm4\n" // 0 0 g3 0 0 0 g2 0 

237 
"por %%mm0, %%mm4\n" // 0 r3 g3 b3 0 r2 g2 b2 

238 
"movq %%mm4,8(%3)\n" // wrote out ! row1 

239 

240 
// fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb) 

241 
// this can be done "destructive" 

242 
"pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 

243 
"punpckhbw %%mm2,%%mm6\n" // 0 R3 0 R2 0 R1 0 R0 

244 
"punpckhbw %%mm1,%%mm5\n" // G3 B3 G2 B2 G1 B1 G0 B0 

245 
"movq %%mm5,%%mm1\n" // G3 B3 G2 B2 G1 B1 G0 B0 

246 
"punpcklwd %%mm6,%%mm1\n" // 0 R1 G1 B1 0 R0 G0 B0 

247 
"movq %%mm1,(%5)\n" // wrote out ! row2 

248 
"punpckhwd %%mm6,%%mm5\n" // 0 R3 G3 B3 0 R2 G2 B2 

249 
"movq %%mm5,8(%5)\n" // wrote out ! row2 

250 

251 
"addl $4,%2\n" // lum+4 

252 
"leal 16(%3),%3\n" // row1+16 

253 
"leal 16(%5),%5\n" // row2+16 

254 
"addl $2, %%ebx\n" // cr+2 

255 
"addl $2, %1\n" // cb+2 

256 

257 
"addl $4,%6\n" // x+4 

258 
"cmpl %4,%6\n" 

259 

260 
"jl 1b\n" 

261 
"addl %4, %2\n" // lum += cols 

262 
"addl %8, %3\n" // row1+= mod 

263 
"addl %8, %5\n" // row2+= mod 

264 
"movl $0, %6\n" // x=0 

265 
"cmpl %7, %2\n" 

266 
"jl 1b\n" 

267 
"emms\n" 

268 
"popl %%ebx\n" 

269 
: 

270 
: "m" (cr), "r"(cb),"r"(lum), 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

271 
"r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod) 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

272 
#ifndef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

273 
,[_MMX_0080w] "m" (*MMX_0080w), 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

274 
[_MMX_00FFw] "m" (*MMX_00FFw), 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

275 
[_MMX_FF00w] "m" (*MMX_FF00w), 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

276 
[_MMX_VgrnRGB] "m" (*MMX_VgrnRGB), 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

277 
[_MMX_VredRGB] "m" (*MMX_VredRGB), 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

278 
[_MMX_UgrnRGB] "m" (*MMX_UgrnRGB), 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

279 
[_MMX_UbluRGB] "m" (*MMX_UbluRGB) 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

280 
#endif 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

281 
); 
0  282 
} 
283 

284 
void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, 

285 
unsigned char *lum, unsigned char *cr, 

286 
unsigned char *cb, unsigned char *out, 

287 
int rows, int cols, int mod ) 

288 
{ 

289 
Uint16 *row1; 

290 
Uint16 *row2; 

291 

292 
unsigned char* y = lum +cols*rows; /* Pointer to the end */ 

293 
int x=0; 

294 
row1 = (Uint16 *)out; /* 16 bit target */ 

295 
row2 = (Uint16 *)out+cols+mod; /* start of second row */ 

296 
mod = (mod+cols+mod)*2; /* increment for row1 in byte */ 

297 

298 

299 
__asm__ __volatile__( 

300 
"pushl %%ebx\n" 

301 
"movl %0, %%ebx\n" 

302 

303 
".align 8\n" 

304 
"1:\n" 

305 
"movd (%1), %%mm0\n" // 4 Cb 0 0 0 0 u3 u2 u1 u0 

306 
"pxor %%mm7, %%mm7\n" 

307 
"movd (%%ebx), %%mm1\n" // 4 Cr 0 0 0 0 v3 v2 v1 v0 

308 
"punpcklbw %%mm7, %%mm0\n" // 4 W cb 0 u3 0 u2 0 u1 0 u0 

309 
"punpcklbw %%mm7, %%mm1\n" // 4 W cr 0 v3 0 v2 0 v1 0 v0 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

310 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

311 
"psubw _MMX_0080w, %%mm0\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

312 
"psubw _MMX_0080w, %%mm1\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

313 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

314 
"psubw %[_MMX_0080w], %%mm0\n" 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

315 
"psubw %[_MMX_0080w], %%mm1\n" 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

316 
#endif 
0  317 
"movq %%mm0, %%mm2\n" // Cb 0 u3 0 u2 0 u1 0 u0 
318 
"movq %%mm1, %%mm3\n" // Cr 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

319 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

320 
"pmullw _MMX_Ugrn565, %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

321 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

322 
"pmullw %[_MMX_Ugrn565], %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

323 
#endif 
0  324 
"movq (%2), %%mm6\n" // L1 l7 L6 L5 L4 L3 L2 L1 L0 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

325 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

326 
"pmullw _MMX_Ublu5x5, %%mm0\n" // Cb2blue 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

327 
"pand _MMX_00FFw, %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

328 
"pmullw _MMX_Vgrn565, %%mm3\n" // Cr2green 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

329 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

330 
"pmullw %[_MMX_Ublu5x5], %%mm0\n" // Cb2blue 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

331 
"pand %[_MMX_00FFw], %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

332 
"pmullw %[_MMX_Vgrn565], %%mm3\n" // Cr2green 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

333 
#endif 
0  334 
"movq (%2), %%mm7\n" // L2 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

335 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

336 
"pmullw _MMX_Vred5x5, %%mm1\n" // Cr2red 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

337 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

338 
"pmullw %[_MMX_Vred5x5], %%mm1\n" // Cr2red 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

339 
#endif 
0  340 
"psrlw $8, %%mm7\n" // L2 00 L7 00 L5 00 L3 00 L1 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

341 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

342 
"pmullw _MMX_Ycoeff, %%mm6\n" // lum1 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

343 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

344 
"pmullw %[_MMX_Ycoeff], %%mm6\n" // lum1 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

345 
#endif 
0  346 
"paddw %%mm3, %%mm2\n" // Cb2green + Cr2green == green 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

347 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

348 
"pmullw _MMX_Ycoeff, %%mm7\n" // lum2 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

349 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

350 
"pmullw %[_MMX_Ycoeff], %%mm7\n" // lum2 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

351 
#endif 
0  352 

353 
"movq %%mm6, %%mm4\n" // lum1 

354 
"paddw %%mm0, %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0 

355 
"movq %%mm4, %%mm5\n" // lum1 

356 
"paddw %%mm1, %%mm4\n" // lum1 +red 00 R6 00 R4 00 R2 00 R0 

357 
"paddw %%mm2, %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0 

358 
"psraw $6, %%mm4\n" // R1 0 .. 64 

359 
"movq %%mm7, %%mm3\n" // lum2 00 L7 00 L5 00 L3 00 L1 

360 
"psraw $6, %%mm5\n" // G1  .. + 

361 
"paddw %%mm0, %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1 

362 
"psraw $6, %%mm6\n" // B1 0 .. 64 

363 
"packuswb %%mm4, %%mm4\n" // R1 R1 

364 
"packuswb %%mm5, %%mm5\n" // G1 G1 

365 
"packuswb %%mm6, %%mm6\n" // B1 B1 

366 
"punpcklbw %%mm4, %%mm4\n" 

367 
"punpcklbw %%mm5, %%mm5\n" 

368 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

369 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

370 
"pand _MMX_red565, %%mm4\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

371 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

372 
"pand %[_MMX_red565], %%mm4\n" 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

373 
#endif 
0  374 
"psllw $3, %%mm5\n" // GREEN 1 
375 
"punpcklbw %%mm6, %%mm6\n" 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

376 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

377 
"pand _MMX_grn565, %%mm5\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

378 
"pand _MMX_red565, %%mm6\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

379 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

380 
"pand %[_MMX_grn565], %%mm5\n" 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

381 
"pand %[_MMX_red565], %%mm6\n" 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

382 
#endif 
0  383 
"por %%mm5, %%mm4\n" // 
384 
"psrlw $11, %%mm6\n" // BLUE 1 

385 
"movq %%mm3, %%mm5\n" // lum2 

386 
"paddw %%mm1, %%mm3\n" // lum2 +red 00 R7 00 R5 00 R3 00 R1 

387 
"paddw %%mm2, %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1 

388 
"psraw $6, %%mm3\n" // R2 

389 
"por %%mm6, %%mm4\n" // MM4 

390 
"psraw $6, %%mm5\n" // G2 

391 
"movq (%2, %4), %%mm6\n" // L3 load lum2 

392 
"psraw $6, %%mm7\n" 

393 
"packuswb %%mm3, %%mm3\n" 

394 
"packuswb %%mm5, %%mm5\n" 

395 
"packuswb %%mm7, %%mm7\n" 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

396 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

397 
"pand _MMX_00FFw, %%mm6\n" // L3 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

398 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

399 
"pand %[_MMX_00FFw], %%mm6\n" // L3 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

400 
#endif 
0  401 
"punpcklbw %%mm3, %%mm3\n" 
402 
"punpcklbw %%mm5, %%mm5\n" 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

403 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

404 
"pmullw _MMX_Ycoeff, %%mm6\n" // lum3 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

405 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

406 
"pmullw %[_MMX_Ycoeff], %%mm6\n" // lum3 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

407 
#endif 
0  408 
"punpcklbw %%mm7, %%mm7\n" 
409 
"psllw $3, %%mm5\n" // GREEN 2 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

410 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

411 
"pand _MMX_red565, %%mm7\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

412 
"pand _MMX_red565, %%mm3\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

413 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

414 
"pand %[_MMX_red565], %%mm7\n" 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

415 
"pand %[_MMX_red565], %%mm3\n" 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

416 
#endif 
0  417 
"psrlw $11, %%mm7\n" // BLUE 2 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

418 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

419 
"pand _MMX_grn565, %%mm5\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

420 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

421 
"pand %[_MMX_grn565], %%mm5\n" 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

422 
#endif 
0  423 
"por %%mm7, %%mm3\n" 
424 
"movq (%2,%4), %%mm7\n" // L4 load lum2 

425 
"por %%mm5, %%mm3\n" // 

426 
"psrlw $8, %%mm7\n" // L4 

427 
"movq %%mm4, %%mm5\n" 

428 
"punpcklwd %%mm3, %%mm4\n" 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

429 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

430 
"pmullw _MMX_Ycoeff, %%mm7\n" // lum4 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

431 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

432 
"pmullw %[_MMX_Ycoeff], %%mm7\n" // lum4 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

433 
#endif 
0  434 
"punpckhwd %%mm3, %%mm5\n" 
435 

436 
"movq %%mm4, (%3)\n" // write row1 

437 
"movq %%mm5, 8(%3)\n" // write row1 

438 

439 
"movq %%mm6, %%mm4\n" // Lum3 

440 
"paddw %%mm0, %%mm6\n" // Lum3 +blue 

441 

442 
"movq %%mm4, %%mm5\n" // Lum3 

443 
"paddw %%mm1, %%mm4\n" // Lum3 +red 

444 
"paddw %%mm2, %%mm5\n" // Lum3 +green 

445 
"psraw $6, %%mm4\n" 

446 
"movq %%mm7, %%mm3\n" // Lum4 

447 
"psraw $6, %%mm5\n" 

448 
"paddw %%mm0, %%mm7\n" // Lum4 +blue 

449 
"psraw $6, %%mm6\n" // Lum3 +blue 

450 
"movq %%mm3, %%mm0\n" // Lum4 

451 
"packuswb %%mm4, %%mm4\n" 

452 
"paddw %%mm1, %%mm3\n" // Lum4 +red 

453 
"packuswb %%mm5, %%mm5\n" 

454 
"paddw %%mm2, %%mm0\n" // Lum4 +green 

455 
"packuswb %%mm6, %%mm6\n" 

456 
"punpcklbw %%mm4, %%mm4\n" 

457 
"punpcklbw %%mm5, %%mm5\n" 

458 
"punpcklbw %%mm6, %%mm6\n" 

459 
"psllw $3, %%mm5\n" // GREEN 3 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

460 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

461 
"pand _MMX_red565, %%mm4\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

462 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

463 
"pand %[_MMX_red565], %%mm4\n" 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

464 
#endif 
0  465 
"psraw $6, %%mm3\n" // psr 6 
466 
"psraw $6, %%mm0\n" 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

467 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

468 
"pand _MMX_red565, %%mm6\n" // BLUE 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

469 
"pand _MMX_grn565, %%mm5\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

470 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

471 
"pand %[_MMX_red565], %%mm6\n" // BLUE 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

472 
"pand %[_MMX_grn565], %%mm5\n" 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

473 
#endif 
0  474 
"psrlw $11, %%mm6\n" // BLUE 3 
475 
"por %%mm5, %%mm4\n" 

476 
"psraw $6, %%mm7\n" 

477 
"por %%mm6, %%mm4\n" 

478 
"packuswb %%mm3, %%mm3\n" 

479 
"packuswb %%mm0, %%mm0\n" 

480 
"packuswb %%mm7, %%mm7\n" 

481 
"punpcklbw %%mm3, %%mm3\n" 

482 
"punpcklbw %%mm0, %%mm0\n" 

483 
"punpcklbw %%mm7, %%mm7\n" 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

484 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

485 
"pand _MMX_red565, %%mm3\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

486 
"pand _MMX_red565, %%mm7\n" // BLUE 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

487 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

488 
"pand %[_MMX_red565], %%mm3\n" 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

489 
"pand %[_MMX_red565], %%mm7\n" // BLUE 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

490 
#endif 
0  491 
"psllw $3, %%mm0\n" // GREEN 4 
492 
"psrlw $11, %%mm7\n" 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

493 
#ifdef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

494 
"pand _MMX_grn565, %%mm0\n" 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

495 
#else 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

496 
"pand %[_MMX_grn565], %%mm0\n" 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

497 
#endif 
0  498 
"por %%mm7, %%mm3\n" 
499 
"por %%mm0, %%mm3\n" 

500 

501 
"movq %%mm4, %%mm5\n" 

502 

503 
"punpcklwd %%mm3, %%mm4\n" 

504 
"punpckhwd %%mm3, %%mm5\n" 

505 

506 
"movq %%mm4, (%5)\n" 

507 
"movq %%mm5, 8(%5)\n" 

508 

509 
"addl $8, %6\n" 

510 
"addl $8, %2\n" 

511 
"addl $4, %%ebx\n" 

512 
"addl $4, %1\n" 

513 
"cmpl %4, %6\n" 

514 
"leal 16(%3), %3\n" 

515 
"leal 16(%5),%5\n" // row2+16 

516 

517 

518 
"jl 1b\n" 

519 
"addl %4, %2\n" // lum += cols 

520 
"addl %8, %3\n" // row1+= mod 

521 
"addl %8, %5\n" // row2+= mod 

522 
"movl $0, %6\n" // x=0 

523 
"cmpl %7, %2\n" 

524 
"jl 1b\n" 

525 
"emms\n" 

526 
"popl %%ebx\n" 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

527 
: 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

528 
:"m" (cr), "r"(cb),"r"(lum), 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

529 
"r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod) 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

530 
#ifndef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

531 
,[_MMX_0080w] "m" (*MMX_0080w), 
949
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

532 
[_MMX_Ugrn565] "m" (*MMX_Ugrn565), 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

533 
[_MMX_Ublu5x5] "m" (*MMX_Ublu5x5), 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

534 
[_MMX_00FFw] "m" (*MMX_00FFw), 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

535 
[_MMX_Vgrn565] "m" (*MMX_Vgrn565), 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

536 
[_MMX_Vred5x5] "m" (*MMX_Vred5x5), 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

537 
[_MMX_Ycoeff] "m" (*MMX_Ycoeff), 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

538 
[_MMX_red565] "m" (*MMX_red565), 
e0d96eb0af19
I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents:
946
diff
changeset

539 
[_MMX_grn565] "m" (*MMX_grn565) 
1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

540 
#endif 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

541 
); 
0  542 
} 
543 

1038
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

544 
#undef GCC2_HACK 
29d7db09776e
Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents:
949
diff
changeset

545 

0  546 
#endif /* GCC i386 inline assembly */ 