author | Ryan C. Gordon <icculus@icculus.org> |
Thu, 21 Jun 2007 18:20:59 +0000 | |
branch | SDL-1.2 |
changeset 3973 | 5fbd763b1c88 |
parent 3931 | d65b4a73c991 |
child 4159 | a1b03ba2fcd0 |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
SDL - Simple DirectMedia Layer |
|
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
3 |
Copyright (C) 1997-2006 Sam Lantinga |
0 | 4 |
|
5 |
This library is free software; you can redistribute it and/or |
|
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
6 |
modify it under the terms of the GNU Lesser General Public |
0 | 7 |
License as published by the Free Software Foundation; either |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
8 |
version 2.1 of the License, or (at your option) any later version. |
0 | 9 |
|
10 |
This library is distributed in the hope that it will be useful, |
|
11 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
13 |
Lesser General Public License for more details. |
0 | 14 |
|
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
15 |
You should have received a copy of the GNU Lesser General Public |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
16 |
License along with this library; if not, write to the Free Software |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
17 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 18 |
|
19 |
Sam Lantinga |
|
252
e8157fcb3114
Updated the source with the correct e-mail address
Sam Lantinga <slouken@libsdl.org>
parents:
1
diff
changeset
|
20 |
slouken@libsdl.org |
0 | 21 |
*/ |
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
22 |
#include "SDL_config.h" |
0 | 23 |
|
24 |
#include "SDL_video.h" |
|
25 |
#include "SDL_blit.h" |
|
26 |
||
3973
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
27 |
/* |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
28 |
In Visual C, VC6 has mmintrin.h in the "Processor Pack" add-on. |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
29 |
Checking if _mm_free is #defined in malloc.h is is the only way to |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
30 |
determine if the Processor Pack is installed, as far as I can tell. |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
31 |
*/ |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
32 |
|
1542 | 33 |
#if SDL_ASSEMBLY_ROUTINES |
3973
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
34 |
# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
35 |
# define MMX_ASMBLIT 1 |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
36 |
# define GCC_ASMBLIT 1 |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
37 |
# elif defined(_MSC_VER) && defined(_M_IX86) |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
38 |
# if (_MSC_VER <= 1200) |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
39 |
# include <malloc.h> |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
40 |
# if defined(_mm_free) |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
41 |
# define HAVE_MMINTRIN_H 1 |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
42 |
# endif |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
43 |
# else /* Visual Studio > VC6 always has mmintrin.h */ |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
44 |
# define HAVE_MMINTRIN_H 1 |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
45 |
# endif |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
46 |
# if HAVE_MMINTRIN_H |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
47 |
# define MMX_ASMBLIT 1 |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
48 |
# define MSVC_ASMBLIT 1 |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
49 |
# endif |
5fbd763b1c88
Fixes for Visual C++ 6.0 with and without the Processor Pack.
Ryan C. Gordon <icculus@icculus.org>
parents:
3931
diff
changeset
|
50 |
# endif |
1542 | 51 |
#endif /* SDL_ASSEMBLY_ROUTINES */ |
880
9ef41050100c
Date: Tue, 30 Mar 2004 21:26:47 -0600
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
52 |
|
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
53 |
/* Function to check the CPU flags */ |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
54 |
#include "SDL_cpuinfo.h" |
1542 | 55 |
#if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
56 |
#include "mmx.h" |
1542 | 57 |
#elif MSVC_ASMBLIT |
58 |
#include <mmintrin.h> |
|
59 |
#include <mm3dnow.h> |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
60 |
#endif |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
61 |
|
0 | 62 |
/* Functions to perform alpha blended blitting */ |
63 |
||
64 |
/* N->1 blending with per-surface alpha */ |
|
65 |
static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) |
|
66 |
{ |
|
67 |
int width = info->d_width; |
|
68 |
int height = info->d_height; |
|
69 |
Uint8 *src = info->s_pixels; |
|
70 |
int srcskip = info->s_skip; |
|
71 |
Uint8 *dst = info->d_pixels; |
|
72 |
int dstskip = info->d_skip; |
|
73 |
Uint8 *palmap = info->table; |
|
74 |
SDL_PixelFormat *srcfmt = info->src; |
|
75 |
SDL_PixelFormat *dstfmt = info->dst; |
|
76 |
int srcbpp = srcfmt->BytesPerPixel; |
|
77 |
||
78 |
const unsigned A = srcfmt->alpha; |
|
79 |
||
80 |
while ( height-- ) { |
|
81 |
DUFFS_LOOP4( |
|
82 |
{ |
|
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
83 |
Uint32 Pixel; |
0 | 84 |
unsigned sR; |
85 |
unsigned sG; |
|
86 |
unsigned sB; |
|
87 |
unsigned dR; |
|
88 |
unsigned dG; |
|
89 |
unsigned dB; |
|
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
90 |
DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
0 | 91 |
dR = dstfmt->palette->colors[*dst].r; |
92 |
dG = dstfmt->palette->colors[*dst].g; |
|
93 |
dB = dstfmt->palette->colors[*dst].b; |
|
94 |
ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); |
|
95 |
dR &= 0xff; |
|
96 |
dG &= 0xff; |
|
97 |
dB &= 0xff; |
|
98 |
/* Pack RGB into 8bit pixel */ |
|
99 |
if ( palmap == NULL ) { |
|
100 |
*dst =((dR>>5)<<(3+2))| |
|
101 |
((dG>>5)<<(2))| |
|
102 |
((dB>>6)<<(0)); |
|
103 |
} else { |
|
104 |
*dst = palmap[((dR>>5)<<(3+2))| |
|
105 |
((dG>>5)<<(2)) | |
|
106 |
((dB>>6)<<(0))]; |
|
107 |
} |
|
108 |
dst++; |
|
109 |
src += srcbpp; |
|
110 |
}, |
|
111 |
width); |
|
112 |
src += srcskip; |
|
113 |
dst += dstskip; |
|
114 |
} |
|
115 |
} |
|
116 |
||
117 |
/* N->1 blending with pixel alpha */ |
|
118 |
static void BlitNto1PixelAlpha(SDL_BlitInfo *info) |
|
119 |
{ |
|
120 |
int width = info->d_width; |
|
121 |
int height = info->d_height; |
|
122 |
Uint8 *src = info->s_pixels; |
|
123 |
int srcskip = info->s_skip; |
|
124 |
Uint8 *dst = info->d_pixels; |
|
125 |
int dstskip = info->d_skip; |
|
126 |
Uint8 *palmap = info->table; |
|
127 |
SDL_PixelFormat *srcfmt = info->src; |
|
128 |
SDL_PixelFormat *dstfmt = info->dst; |
|
129 |
int srcbpp = srcfmt->BytesPerPixel; |
|
130 |
||
131 |
/* FIXME: fix alpha bit field expansion here too? */ |
|
132 |
while ( height-- ) { |
|
133 |
DUFFS_LOOP4( |
|
134 |
{ |
|
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
135 |
Uint32 Pixel; |
0 | 136 |
unsigned sR; |
137 |
unsigned sG; |
|
138 |
unsigned sB; |
|
139 |
unsigned sA; |
|
140 |
unsigned dR; |
|
141 |
unsigned dG; |
|
142 |
unsigned dB; |
|
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
143 |
DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); |
0 | 144 |
dR = dstfmt->palette->colors[*dst].r; |
145 |
dG = dstfmt->palette->colors[*dst].g; |
|
146 |
dB = dstfmt->palette->colors[*dst].b; |
|
147 |
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
|
148 |
dR &= 0xff; |
|
149 |
dG &= 0xff; |
|
150 |
dB &= 0xff; |
|
151 |
/* Pack RGB into 8bit pixel */ |
|
152 |
if ( palmap == NULL ) { |
|
153 |
*dst =((dR>>5)<<(3+2))| |
|
154 |
((dG>>5)<<(2))| |
|
155 |
((dB>>6)<<(0)); |
|
156 |
} else { |
|
157 |
*dst = palmap[((dR>>5)<<(3+2))| |
|
158 |
((dG>>5)<<(2)) | |
|
159 |
((dB>>6)<<(0)) ]; |
|
160 |
} |
|
161 |
dst++; |
|
162 |
src += srcbpp; |
|
163 |
}, |
|
164 |
width); |
|
165 |
src += srcskip; |
|
166 |
dst += dstskip; |
|
167 |
} |
|
168 |
} |
|
169 |
||
170 |
/* colorkeyed N->1 blending with per-surface alpha */ |
|
171 |
static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) |
|
172 |
{ |
|
173 |
int width = info->d_width; |
|
174 |
int height = info->d_height; |
|
175 |
Uint8 *src = info->s_pixels; |
|
176 |
int srcskip = info->s_skip; |
|
177 |
Uint8 *dst = info->d_pixels; |
|
178 |
int dstskip = info->d_skip; |
|
179 |
Uint8 *palmap = info->table; |
|
180 |
SDL_PixelFormat *srcfmt = info->src; |
|
181 |
SDL_PixelFormat *dstfmt = info->dst; |
|
182 |
int srcbpp = srcfmt->BytesPerPixel; |
|
183 |
Uint32 ckey = srcfmt->colorkey; |
|
184 |
||
185 |
const int A = srcfmt->alpha; |
|
186 |
||
187 |
while ( height-- ) { |
|
188 |
DUFFS_LOOP( |
|
189 |
{ |
|
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
190 |
Uint32 Pixel; |
0 | 191 |
unsigned sR; |
192 |
unsigned sG; |
|
193 |
unsigned sB; |
|
194 |
unsigned dR; |
|
195 |
unsigned dG; |
|
196 |
unsigned dB; |
|
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
197 |
DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
198 |
if ( Pixel != ckey ) { |
0 | 199 |
dR = dstfmt->palette->colors[*dst].r; |
200 |
dG = dstfmt->palette->colors[*dst].g; |
|
201 |
dB = dstfmt->palette->colors[*dst].b; |
|
202 |
ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); |
|
203 |
dR &= 0xff; |
|
204 |
dG &= 0xff; |
|
205 |
dB &= 0xff; |
|
206 |
/* Pack RGB into 8bit pixel */ |
|
207 |
if ( palmap == NULL ) { |
|
208 |
*dst =((dR>>5)<<(3+2))| |
|
209 |
((dG>>5)<<(2)) | |
|
210 |
((dB>>6)<<(0)); |
|
211 |
} else { |
|
212 |
*dst = palmap[((dR>>5)<<(3+2))| |
|
213 |
((dG>>5)<<(2)) | |
|
214 |
((dB>>6)<<(0)) ]; |
|
215 |
} |
|
216 |
} |
|
217 |
dst++; |
|
218 |
src += srcbpp; |
|
219 |
}, |
|
220 |
width); |
|
221 |
src += srcskip; |
|
222 |
dst += dstskip; |
|
223 |
} |
|
224 |
} |
|
225 |
||
1542 | 226 |
#if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
227 |
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
228 |
static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
229 |
{ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
230 |
int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
231 |
int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
232 |
Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
233 |
int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
234 |
Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
235 |
int dstskip = info->d_skip >> 2; |
1542 | 236 |
Uint32 dalpha = info->dst->Amask; |
237 |
Uint8 load[8]; |
|
238 |
||
239 |
*(Uint64 *)load = 0x00fefefe00fefefeULL;/* alpha128 mask */ |
|
240 |
movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ |
|
241 |
*(Uint64 *)load = 0x0001010100010101ULL;/* !alpha128 mask */ |
|
242 |
movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ |
|
243 |
movd_m2r(dalpha, mm7); /* dst alpha mask */ |
|
244 |
punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
245 |
while(height--) { |
1542 | 246 |
DUFFS_LOOP_DOUBLE2( |
247 |
{ |
|
248 |
Uint32 s = *srcp++; |
|
249 |
Uint32 d = *dstp; |
|
250 |
*dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) |
|
251 |
+ (s & d & 0x00010101)) | dalpha; |
|
252 |
},{ |
|
253 |
movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ |
|
254 |
movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ |
|
255 |
||
256 |
movq_m2r((*srcp), mm1);/* 2 x src -> mm1(ARGBARGB) */ |
|
257 |
movq_r2r(mm1, mm5); /* 2 x src -> mm5(ARGBARGB) */ |
|
258 |
||
259 |
pand_r2r(mm4, mm6); /* dst & mask -> mm6 */ |
|
260 |
pand_r2r(mm4, mm5); /* src & mask -> mm5 */ |
|
261 |
paddd_r2r(mm6, mm5); /* mm6 + mm5 -> mm5 */ |
|
262 |
pand_r2r(mm1, mm2); /* src & dst -> mm2 */ |
|
263 |
psrld_i2r(1, mm5); /* mm5 >> 1 -> mm5 */ |
|
264 |
pand_r2r(mm3, mm2); /* mm2 & !mask -> mm2 */ |
|
265 |
paddd_r2r(mm5, mm2); /* mm5 + mm2 -> mm2 */ |
|
266 |
||
267 |
por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ |
|
268 |
movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ |
|
269 |
dstp += 2; |
|
270 |
srcp += 2; |
|
271 |
}, width); |
|
272 |
srcp += srcskip; |
|
273 |
dstp += dstskip; |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
274 |
} |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
275 |
emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
276 |
} |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
277 |
|
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
278 |
/* fast RGB888->(A)RGB888 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
279 |
static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
280 |
{ |
1542 | 281 |
SDL_PixelFormat* df = info->dst; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
282 |
unsigned alpha = info->src->alpha; |
1542 | 283 |
|
284 |
if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { |
|
285 |
/* only call a128 version when R,G,B occupy lower bits */ |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
286 |
BlitRGBtoRGBSurfaceAlpha128MMX(info); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
287 |
} else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
288 |
int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
289 |
int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
290 |
Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
291 |
int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
292 |
Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
293 |
int dstskip = info->d_skip >> 2; |
1542 | 294 |
|
295 |
pxor_r2r(mm5, mm5); /* 0 -> mm5 */ |
|
296 |
/* form the alpha mult */ |
|
297 |
movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ |
|
298 |
punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ |
|
299 |
punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ |
|
300 |
alpha = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); |
|
301 |
movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ |
|
302 |
punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ |
|
303 |
pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ |
|
304 |
/* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ |
|
305 |
movd_m2r(df->Amask, mm7); /* dst alpha mask */ |
|
306 |
punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
307 |
|
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
308 |
while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
309 |
DUFFS_LOOP_DOUBLE2({ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
310 |
/* One Pixel Blend */ |
1542 | 311 |
movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ |
312 |
movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ |
|
313 |
punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ |
|
314 |
punpcklbw_r2r(mm5, mm2); /* 0A0R0G0B -> mm2(dst) */ |
|
315 |
||
316 |
psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ |
|
317 |
pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ |
|
318 |
psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ |
|
319 |
paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ |
|
320 |
||
321 |
packuswb_r2r(mm5, mm2); /* ARGBARGB -> mm2 */ |
|
322 |
por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ |
|
323 |
movd_r2m(mm2, *dstp);/* mm2 -> pixel */ |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
324 |
++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
325 |
++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
326 |
},{ |
1542 | 327 |
/* Two Pixels Blend */ |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
328 |
movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ |
1542 | 329 |
movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ |
330 |
movq_r2r(mm0, mm1); /* 2 x src -> mm1(ARGBARGB) */ |
|
331 |
movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ |
|
332 |
||
333 |
punpcklbw_r2r(mm5, mm0); /* low - 0A0R0G0B -> mm0(src1) */ |
|
334 |
punpckhbw_r2r(mm5, mm1); /* high - 0A0R0G0B -> mm1(src2) */ |
|
335 |
punpcklbw_r2r(mm5, mm2); /* low - 0A0R0G0B -> mm2(dst1) */ |
|
336 |
punpckhbw_r2r(mm5, mm6); /* high - 0A0R0G0B -> mm6(dst2) */ |
|
337 |
||
338 |
psubw_r2r(mm2, mm0);/* src1 - dst1 -> mm0 */ |
|
339 |
pmullw_r2r(mm4, mm0); /* mm0 * alpha -> mm0 */ |
|
340 |
psrlw_i2r(8, mm0); /* mm0 >> 8 -> mm1 */ |
|
341 |
paddb_r2r(mm0, mm2); /* mm0 + mm2(dst1) -> mm2 */ |
|
342 |
||
343 |
psubw_r2r(mm6, mm1);/* src2 - dst2 -> mm1 */ |
|
344 |
pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ |
|
345 |
psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ |
|
346 |
paddb_r2r(mm1, mm6); /* mm1 + mm6(dst2) -> mm6 */ |
|
347 |
||
348 |
packuswb_r2r(mm6, mm2); /* ARGBARGB -> mm2 */ |
|
349 |
por_r2r(mm7, mm2); /* mm7(dst alpha) | mm2 -> mm2 */ |
|
350 |
||
351 |
movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ |
|
352 |
||
353 |
srcp += 2; |
|
354 |
dstp += 2; |
|
355 |
}, width); |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
356 |
srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
357 |
dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
358 |
} |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
359 |
emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
360 |
} |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
361 |
} |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
362 |
|
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
363 |
/* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
364 |
static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
365 |
{ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
366 |
int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
367 |
int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
368 |
Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
369 |
int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
370 |
Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
371 |
int dstskip = info->d_skip >> 2; |
1542 | 372 |
SDL_PixelFormat* sf = info->src; |
373 |
Uint32 amask = sf->Amask; |
|
374 |
||
375 |
pxor_r2r(mm6, mm6); /* 0 -> mm6 */ |
|
376 |
/* form multiplication mask */ |
|
377 |
movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ |
|
378 |
punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ |
|
379 |
pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ |
|
380 |
movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ |
|
381 |
pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ |
|
382 |
/* form channel masks */ |
|
383 |
movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ |
|
384 |
packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ |
|
385 |
packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ |
|
386 |
pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ |
|
387 |
/* get alpha channel shift */ |
|
3931
d65b4a73c991
BlitRGBtoRGBPixelAlphaMMX() is putting the wrong value into a register.
Ryan C. Gordon <icculus@icculus.org>
parents:
3910
diff
changeset
|
388 |
__asm__ __volatile__ ( |
d65b4a73c991
BlitRGBtoRGBPixelAlphaMMX() is putting the wrong value into a register.
Ryan C. Gordon <icculus@icculus.org>
parents:
3910
diff
changeset
|
389 |
"movd %0, %%mm5" |
d65b4a73c991
BlitRGBtoRGBPixelAlphaMMX() is putting the wrong value into a register.
Ryan C. Gordon <icculus@icculus.org>
parents:
3910
diff
changeset
|
390 |
: : "rm" ((Uint32) sf->Ashift) ); /* Ashift -> mm5 */ |
1542 | 391 |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
392 |
while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
393 |
DUFFS_LOOP4({ |
1542 | 394 |
Uint32 alpha = *srcp & amask; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
395 |
/* FIXME: Here we special-case opaque alpha since the |
1542 | 396 |
compositioning used (>>8 instead of /255) doesn't handle |
397 |
it correctly. Also special-case alpha=0 for speed? |
|
398 |
Benchmark this! */ |
|
399 |
if(alpha == 0) { |
|
400 |
/* do nothing */ |
|
401 |
} else if(alpha == amask) { |
|
402 |
/* opaque alpha -- copy RGB, keep dst alpha */ |
|
403 |
/* using MMX here to free up regular registers for other things */ |
|
404 |
movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ |
|
405 |
movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ |
|
406 |
pand_r2r(mm0, mm1); /* src & chanmask -> mm1 */ |
|
407 |
pand_r2r(mm3, mm2); /* dst & ~chanmask -> mm2 */ |
|
408 |
por_r2r(mm1, mm2); /* src | dst -> mm2 */ |
|
409 |
movd_r2m(mm2, (*dstp)); /* mm2 -> dst */ |
|
410 |
} else { |
|
411 |
movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ |
|
412 |
punpcklbw_r2r(mm6, mm1); /* 0A0R0G0B -> mm1 */ |
|
413 |
||
414 |
movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ |
|
415 |
punpcklbw_r2r(mm6, mm2); /* 0A0R0G0B -> mm2 */ |
|
416 |
||
417 |
__asm__ __volatile__ ( |
|
418 |
"movd %0, %%mm4" |
|
419 |
: : "r" (alpha) ); /* 0000A000 -> mm4 */ |
|
420 |
psrld_r2r(mm5, mm4); /* mm4 >> mm5 -> mm4 (0000000A) */ |
|
421 |
punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ |
|
422 |
punpcklwd_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ |
|
423 |
pand_r2r(mm7, mm4); /* 000A0A0A -> mm4, preserve dst alpha on add */ |
|
424 |
||
425 |
/* blend */ |
|
426 |
psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ |
|
427 |
pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ |
|
428 |
psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1(000R0G0B) */ |
|
429 |
paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ |
|
430 |
||
431 |
packuswb_r2r(mm6, mm2); /* 0000ARGB -> mm2 */ |
|
432 |
movd_r2m(mm2, *dstp);/* mm2 -> dst */ |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
433 |
} |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
434 |
++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
435 |
++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
436 |
}, width); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
437 |
srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
438 |
dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
439 |
} |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
440 |
emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
441 |
} |
1542 | 442 |
/* End GCC_ASMBLIT */ |
443 |
||
444 |
#elif MSVC_ASMBLIT |
|
445 |
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
|
446 |
static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) |
|
447 |
{ |
|
448 |
int width = info->d_width; |
|
449 |
int height = info->d_height; |
|
450 |
Uint32 *srcp = (Uint32 *)info->s_pixels; |
|
451 |
int srcskip = info->s_skip >> 2; |
|
452 |
Uint32 *dstp = (Uint32 *)info->d_pixels; |
|
453 |
int dstskip = info->d_skip >> 2; |
|
454 |
Uint32 dalpha = info->dst->Amask; |
|
455 |
||
456 |
__m64 src1, src2, dst1, dst2, lmask, hmask, dsta; |
|
457 |
||
458 |
hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ |
|
459 |
lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ |
|
460 |
dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ |
|
461 |
||
462 |
while (height--) { |
|
463 |
int n = width; |
|
464 |
if ( n & 1 ) { |
|
465 |
Uint32 s = *srcp++; |
|
466 |
Uint32 d = *dstp; |
|
467 |
*dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) |
|
468 |
+ (s & d & 0x00010101)) | dalpha; |
|
469 |
n--; |
|
470 |
} |
|
471 |
||
472 |
for (n >>= 1; n > 0; --n) { |
|
473 |
dst1 = *(__m64*)dstp; /* 2 x dst -> dst1(ARGBARGB) */ |
|
474 |
dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ |
|
475 |
||
476 |
src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB) */ |
|
477 |
src2 = src1; /* 2 x src -> src2(ARGBARGB) */ |
|
478 |
||
479 |
dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ |
|
480 |
src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ |
|
481 |
src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ |
|
482 |
src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ |
|
483 |
||
484 |
dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ |
|
485 |
dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ |
|
486 |
dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ |
|
487 |
dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ |
|
488 |
||
489 |
*(__m64*)dstp = dst1; /* dst1 -> 2 x dst pixels */ |
|
490 |
dstp += 2; |
|
491 |
srcp += 2; |
|
492 |
} |
|
493 |
||
494 |
srcp += srcskip; |
|
495 |
dstp += dstskip; |
|
496 |
} |
|
497 |
_mm_empty(); |
|
498 |
} |
|
499 |
||
500 |
/* fast RGB888->(A)RGB888 blending with surface alpha */ |
|
501 |
static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) |
|
502 |
{ |
|
503 |
SDL_PixelFormat* df = info->dst; |
|
504 |
Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; |
|
505 |
unsigned alpha = info->src->alpha; |
|
506 |
||
507 |
if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { |
|
508 |
/* only call a128 version when R,G,B occupy lower bits */ |
|
509 |
BlitRGBtoRGBSurfaceAlpha128MMX(info); |
|
510 |
} else { |
|
511 |
int width = info->d_width; |
|
512 |
int height = info->d_height; |
|
513 |
Uint32 *srcp = (Uint32 *)info->s_pixels; |
|
514 |
int srcskip = info->s_skip >> 2; |
|
515 |
Uint32 *dstp = (Uint32 *)info->d_pixels; |
|
516 |
int dstskip = info->d_skip >> 2; |
|
517 |
Uint32 dalpha = df->Amask; |
|
518 |
Uint32 amult; |
|
519 |
||
520 |
__m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; |
|
521 |
||
522 |
mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
|
523 |
/* form the alpha mult */ |
|
524 |
amult = alpha | (alpha << 8); |
|
525 |
amult = amult | (amult << 16); |
|
526 |
chanmask = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); |
|
527 |
mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ |
|
528 |
mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ |
|
529 |
/* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ |
|
530 |
dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ |
|
531 |
||
532 |
while (height--) { |
|
533 |
int n = width; |
|
534 |
if (n & 1) { |
|
535 |
/* One Pixel Blend */ |
|
536 |
src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB)*/ |
|
537 |
src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ |
|
538 |
||
539 |
dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ |
|
540 |
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ |
|
541 |
||
542 |
src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ |
|
543 |
src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ |
|
544 |
src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ |
|
545 |
dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ |
|
546 |
||
547 |
dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ |
|
548 |
dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ |
|
549 |
*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ |
|
550 |
||
551 |
++srcp; |
|
552 |
++dstp; |
|
553 |
||
554 |
n--; |
|
555 |
} |
|
556 |
||
557 |
for (n >>= 1; n > 0; --n) { |
|
558 |
/* Two Pixels Blend */ |
|
559 |
src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB)*/ |
|
560 |
src2 = src1; /* 2 x src -> src2(ARGBARGB) */ |
|
561 |
src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ |
|
562 |
src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ |
|
563 |
||
564 |
dst1 = *(__m64*)dstp;/* 2 x dst -> dst1(ARGBARGB) */ |
|
565 |
dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ |
|
566 |
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ |
|
567 |
dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ |
|
568 |
||
569 |
src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ |
|
570 |
src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ |
|
571 |
src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ |
|
572 |
dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ |
|
573 |
||
574 |
src2 = _mm_sub_pi16(src2, dst2);/* src2 - dst2 -> src2 */ |
|
575 |
src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ |
|
576 |
src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ |
|
577 |
dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ |
|
578 |
||
579 |
dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ |
|
580 |
dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ |
|
581 |
||
582 |
*(__m64*)dstp = dst1; /* dst1 -> 2 x pixel */ |
|
583 |
||
584 |
srcp += 2; |
|
585 |
dstp += 2; |
|
586 |
} |
|
587 |
srcp += srcskip; |
|
588 |
dstp += dstskip; |
|
589 |
} |
|
590 |
_mm_empty(); |
|
591 |
} |
|
592 |
} |
|
593 |
||
594 |
/* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
|
595 |
static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) |
|
596 |
{ |
|
597 |
int width = info->d_width; |
|
598 |
int height = info->d_height; |
|
599 |
Uint32 *srcp = (Uint32 *)info->s_pixels; |
|
600 |
int srcskip = info->s_skip >> 2; |
|
601 |
Uint32 *dstp = (Uint32 *)info->d_pixels; |
|
602 |
int dstskip = info->d_skip >> 2; |
|
603 |
SDL_PixelFormat* sf = info->src; |
|
604 |
Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; |
|
605 |
Uint32 amask = sf->Amask; |
|
606 |
Uint32 ashift = sf->Ashift; |
|
607 |
Uint64 multmask; |
|
608 |
||
609 |
__m64 src1, dst1, mm_alpha, mm_zero, dmask; |
|
610 |
||
611 |
mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
|
612 |
multmask = ~(0xFFFFi64 << (ashift * 2)); |
|
613 |
dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ |
|
614 |
||
615 |
while(height--) { |
|
616 |
DUFFS_LOOP4({ |
|
617 |
Uint32 alpha = *srcp & amask; |
|
618 |
if (alpha == 0) { |
|
619 |
/* do nothing */ |
|
620 |
} else if (alpha == amask) { |
|
621 |
/* opaque alpha -- copy RGB, keep dst alpha */ |
|
622 |
*dstp = (*srcp & chanmask) | (*dstp & ~chanmask); |
|
623 |
} else { |
|
624 |
src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ |
|
625 |
src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ |
|
626 |
||
627 |
dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ |
|
628 |
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ |
|
629 |
||
630 |
mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ |
|
631 |
mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ |
|
632 |
mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ |
|
633 |
mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ |
|
634 |
mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ |
|
635 |
||
636 |
/* blend */ |
|
637 |
src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ |
|
638 |
src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src1 - dst1) * alpha -> src1 */ |
|
639 |
src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ |
|
640 |
dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1 -> dst1(0A0R0G0B) */ |
|
641 |
dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ |
|
642 |
||
643 |
*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ |
|
644 |
} |
|
645 |
++srcp; |
|
646 |
++dstp; |
|
647 |
}, width); |
|
648 |
srcp += srcskip; |
|
649 |
dstp += dstskip; |
|
650 |
} |
|
651 |
_mm_empty(); |
|
652 |
} |
|
653 |
/* End MSVC_ASMBLIT */ |
|
654 |
||
655 |
#endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ |
|
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
656 |
|
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
657 |
#if SDL_ALTIVEC_BLITTERS |
1795 | 658 |
#if __MWERKS__ |
659 |
#pragma altivec_model on |
|
660 |
#endif |
|
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
661 |
#if HAVE_ALTIVEC_H |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
662 |
#include <altivec.h> |
1175
867f521591e5
Fixed Altivec support on Mac OS X.
Ryan C. Gordon <icculus@icculus.org>
parents:
1162
diff
changeset
|
663 |
#endif |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
664 |
#include <assert.h> |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
665 |
|
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
666 |
#if (defined(__MACOSX__) && (__GNUC__ < 4)) |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
667 |
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
668 |
(vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
669 |
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
670 |
(vector unsigned short) ( a,b,c,d,e,f,g,h ) |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
671 |
#else |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
672 |
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
673 |
(vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
674 |
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
675 |
(vector unsigned short) { a,b,c,d,e,f,g,h } |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
676 |
#endif |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
677 |
|
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
678 |
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
679 |
#define VECPRINT(msg, v) do { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
680 |
vector unsigned int tmpvec = (vector unsigned int)(v); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
681 |
unsigned int *vp = (unsigned int *)&tmpvec; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
682 |
printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
683 |
} while (0) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
684 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
685 |
/* the permuation vector that takes the high bytes out of all the appropriate shorts |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
686 |
(vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
687 |
0x00, 0x10, 0x02, 0x12, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
688 |
0x04, 0x14, 0x06, 0x16, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
689 |
0x08, 0x18, 0x0A, 0x1A, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
690 |
0x0C, 0x1C, 0x0E, 0x1E ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
691 |
*/ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
692 |
#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
693 |
#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
694 |
#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
695 |
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
696 |
? vec_lvsl(0, src) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
697 |
: vec_add(vec_lvsl(8, src), vec_splat_u8(8))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
698 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
699 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
700 |
#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
701 |
/* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
702 |
vector unsigned short vtemp1 = vec_mule(vs, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
703 |
/* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
704 |
vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
705 |
/* valpha2 is 255-alpha */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
706 |
vector unsigned char valpha2 = vec_nor(valpha, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
707 |
/* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
708 |
vector unsigned short vtemp3 = vec_mule(vd, valpha2); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
709 |
/* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
710 |
vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
711 |
/* add source and dest */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
712 |
vtemp1 = vec_add(vtemp1, vtemp3); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
713 |
vtemp2 = vec_add(vtemp2, vtemp4); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
714 |
/* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
715 |
vtemp1 = vec_add(vtemp1, v1_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
716 |
vtemp3 = vec_sr(vtemp1, v8_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
717 |
vtemp1 = vec_add(vtemp1, vtemp3); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
718 |
/* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
719 |
vtemp2 = vec_add(vtemp2, v1_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
720 |
vtemp4 = vec_sr(vtemp2, v8_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
721 |
vtemp2 = vec_add(vtemp2, vtemp4); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
722 |
/* (>>8) and get ARGBARGBARGBARGB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
723 |
vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
724 |
} while (0) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
725 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
726 |
/* Calculate the permute vector used for 32->32 swizzling */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
727 |
static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
728 |
const SDL_PixelFormat *dstfmt) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
729 |
{ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
730 |
/* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
731 |
* We have to assume that the bits that aren't used by other |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
732 |
* colors is alpha, and it's one complete byte, since some formats |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
733 |
* leave alpha with a zero mask, but we should still swizzle the bits. |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
734 |
*/ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
735 |
/* ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
736 |
const static struct SDL_PixelFormat default_pixel_format = { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
737 |
NULL, 0, 0, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
738 |
0, 0, 0, 0, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
739 |
16, 8, 0, 24, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
740 |
0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
741 |
0, 0}; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
742 |
if (!srcfmt) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
743 |
srcfmt = &default_pixel_format; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
744 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
745 |
if (!dstfmt) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
746 |
dstfmt = &default_pixel_format; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
747 |
} |
1487
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
748 |
const vector unsigned char plus = VECUINT8_LITERAL |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
749 |
( 0x00, 0x00, 0x00, 0x00, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
750 |
0x04, 0x04, 0x04, 0x04, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
751 |
0x08, 0x08, 0x08, 0x08, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
752 |
0x0C, 0x0C, 0x0C, 0x0C ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
753 |
vector unsigned char vswiz; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
754 |
vector unsigned int srcvec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
755 |
#define RESHIFT(X) (3 - ((X) >> 3)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
756 |
Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
757 |
Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
758 |
Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
759 |
Uint32 amask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
760 |
/* Use zero for alpha if either surface doesn't have alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
761 |
if (dstfmt->Amask) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
762 |
amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
763 |
} else { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
764 |
amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
765 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
766 |
#undef RESHIFT |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
767 |
((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
768 |
vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
769 |
return(vswiz); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
770 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
771 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
772 |
static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
773 |
{ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
774 |
int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
775 |
Uint8 *src = (Uint8 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
776 |
int srcskip = info->s_skip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
777 |
Uint8 *dst = (Uint8 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
778 |
int dstskip = info->d_skip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
779 |
SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
780 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
781 |
vector unsigned char v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
782 |
vector unsigned short v8_16 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
783 |
vector unsigned short v1_16 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
784 |
vector unsigned short v2_16 = vec_splat_u16(2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
785 |
vector unsigned short v3_16 = vec_splat_u16(3); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
786 |
vector unsigned int v8_32 = vec_splat_u32(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
787 |
vector unsigned int v16_32 = vec_add(v8_32, v8_32); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
788 |
vector unsigned short v3f = VECUINT16_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
789 |
0x003f, 0x003f, 0x003f, 0x003f, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
790 |
0x003f, 0x003f, 0x003f, 0x003f); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
791 |
vector unsigned short vfc = VECUINT16_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
792 |
0x00fc, 0x00fc, 0x00fc, 0x00fc, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
793 |
0x00fc, 0x00fc, 0x00fc, 0x00fc); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
794 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
795 |
/* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
796 |
0x10 - 0x1f is the alpha |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
797 |
0x00 - 0x0e evens are the red |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
798 |
0x01 - 0x0f odds are zero |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
799 |
*/ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
800 |
vector unsigned char vredalpha1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
801 |
0x10, 0x00, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
802 |
0x10, 0x02, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
803 |
0x10, 0x04, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
804 |
0x10, 0x06, 0x01, 0x01 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
805 |
); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
806 |
vector unsigned char vredalpha2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
807 |
vec_add((vector unsigned int)vredalpha1, vec_sl(v8_32, v16_32)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
808 |
); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
809 |
/* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
810 |
0x00 - 0x0f is ARxx ARxx ARxx ARxx |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
811 |
0x11 - 0x0f odds are blue |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
812 |
*/ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
813 |
vector unsigned char vblue1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
814 |
0x00, 0x01, 0x02, 0x11, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
815 |
0x04, 0x05, 0x06, 0x13, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
816 |
0x08, 0x09, 0x0a, 0x15, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
817 |
0x0c, 0x0d, 0x0e, 0x17 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
818 |
); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
819 |
vector unsigned char vblue2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
820 |
vec_add((vector unsigned int)vblue1, v8_32) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
821 |
); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
822 |
/* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
823 |
0x00 - 0x0f is ARxB ARxB ARxB ARxB |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
824 |
0x10 - 0x0e evens are green |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
825 |
*/ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
826 |
vector unsigned char vgreen1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
827 |
0x00, 0x01, 0x10, 0x03, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
828 |
0x04, 0x05, 0x12, 0x07, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
829 |
0x08, 0x09, 0x14, 0x0b, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
830 |
0x0c, 0x0d, 0x16, 0x0f |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
831 |
); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
832 |
vector unsigned char vgreen2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
833 |
vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
834 |
); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
835 |
vector unsigned char vgmerge = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
836 |
0x00, 0x02, 0x00, 0x06, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
837 |
0x00, 0x0a, 0x00, 0x0e, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
838 |
0x00, 0x12, 0x00, 0x16, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
839 |
0x00, 0x1a, 0x00, 0x1e); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
840 |
vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
841 |
vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
842 |
vector unsigned char valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
843 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
844 |
vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
845 |
vf800 = vec_sl(vf800, vec_splat_u16(8)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
846 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
847 |
while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
848 |
int extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
849 |
vector unsigned char valigner; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
850 |
vector unsigned char vsrc; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
851 |
vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
852 |
int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
853 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
854 |
#define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
855 |
while (condition) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
856 |
Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
857 |
unsigned sR, sG, sB, dR, dG, dB, sA; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
858 |
DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
859 |
if(sA) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
860 |
unsigned short dstpixel = *((unsigned short *)dst); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
861 |
dR = (dstpixel >> 8) & 0xf8; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
862 |
dG = (dstpixel >> 3) & 0xfc; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
863 |
dB = (dstpixel << 3) & 0xf8; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
864 |
ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
865 |
*((unsigned short *)dst) = ( \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
866 |
((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
867 |
); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
868 |
} \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
869 |
src += 4; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
870 |
dst += 2; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
871 |
widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
872 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
873 |
ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
874 |
extrawidth = (width % 8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
875 |
valigner = VEC_ALIGNER(src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
876 |
vsrc = (vector unsigned char)vec_ld(0, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
877 |
width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
878 |
while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
879 |
vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
880 |
vector unsigned char vsrc1, vsrc2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
881 |
vector unsigned char vdst1, vdst2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
882 |
vector unsigned short vR, vG, vB; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
883 |
vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
884 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
885 |
/* Load 8 pixels from src as ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
886 |
voverflow = (vector unsigned char)vec_ld(15, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
887 |
vsrc = vec_perm(vsrc, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
888 |
vsrc1 = vec_perm(vsrc, vsrc, vpermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
889 |
src += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
890 |
vsrc = (vector unsigned char)vec_ld(15, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
891 |
voverflow = vec_perm(voverflow, vsrc, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
892 |
vsrc2 = vec_perm(voverflow, voverflow, vpermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
893 |
src += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
894 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
895 |
/* Load 8 pixels from dst as XRGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
896 |
voverflow = vec_ld(0, dst); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
897 |
vR = vec_and((vector unsigned short)voverflow, vf800); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
898 |
vB = vec_sl((vector unsigned short)voverflow, v3_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
899 |
vG = vec_sl(vB, v2_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
900 |
vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
901 |
vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
902 |
vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
903 |
vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
904 |
vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
905 |
vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
906 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
907 |
/* Alpha blend 8 pixels as ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
908 |
valpha = vec_perm(vsrc1, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
909 |
VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, v8_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
910 |
valpha = vec_perm(vsrc2, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
911 |
VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, v8_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
912 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
913 |
/* Convert 8 pixels to 565 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
914 |
vpixel = (vector unsigned short)vec_packpx((vector unsigned int)vdst1, (vector unsigned int)vdst2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
915 |
vgpixel = (vector unsigned short)vec_perm(vdst1, vdst2, vgmerge); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
916 |
vgpixel = vec_and(vgpixel, vfc); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
917 |
vgpixel = vec_sl(vgpixel, v3_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
918 |
vrpixel = vec_sl(vpixel, v1_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
919 |
vrpixel = vec_and(vrpixel, vf800); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
920 |
vbpixel = vec_and(vpixel, v3f); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
921 |
vdst1 = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
922 |
vdst1 = vec_or(vdst1, (vector unsigned char)vbpixel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
923 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
924 |
/* Store 8 pixels */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
925 |
vec_st(vdst1, 0, dst); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
926 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
927 |
width -= 8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
928 |
dst += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
929 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
930 |
ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
931 |
#undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
932 |
src += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
933 |
dst += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
934 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
935 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
936 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
937 |
static void Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
938 |
{ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
939 |
unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
940 |
int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
941 |
Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
942 |
int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
943 |
Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
944 |
int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
945 |
SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
946 |
SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
947 |
unsigned sA = srcfmt->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
948 |
unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
949 |
Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
950 |
Uint32 ckey = info->src->colorkey; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
951 |
vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
952 |
vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
953 |
vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
954 |
vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
955 |
vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
956 |
vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
957 |
vector unsigned char vbits; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
958 |
vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
959 |
vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
960 |
vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
961 |
vector unsigned int vckey; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
962 |
vector unsigned int vrgbmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
963 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
964 |
mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
965 |
v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
966 |
v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
967 |
v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
968 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
969 |
/* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
970 |
valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
971 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
972 |
vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
973 |
vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
974 |
vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
975 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
976 |
/* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
977 |
((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
978 |
valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
979 |
vbits = (vector unsigned char)vec_splat_s8(-1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
980 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
981 |
ckey &= rgbmask; |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
982 |
((unsigned int *)(char*)&vckey)[0] = ckey; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
983 |
vckey = vec_splat(vckey, 0); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
984 |
((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
985 |
vrgbmask = vec_splat(vrgbmask, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
986 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
987 |
while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
988 |
int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
989 |
#define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
990 |
while (condition) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
991 |
Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
992 |
unsigned sR, sG, sB, dR, dG, dB; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
993 |
RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
994 |
if(sA && Pixel != ckey) { \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
995 |
RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
996 |
DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
997 |
ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
998 |
ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
999 |
} \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1000 |
dstp++; \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1001 |
srcp++; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1002 |
widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1003 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1004 |
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1005 |
if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1006 |
int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1007 |
vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1008 |
vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1009 |
width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1010 |
while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1011 |
vector unsigned char vsel; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1012 |
vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1013 |
vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1014 |
vector unsigned char vd_orig; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1015 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1016 |
/* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1017 |
voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1018 |
vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1019 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1020 |
/* vsel is set for items that match the key */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1021 |
vsel = (vector unsigned char)vec_and((vector unsigned int)vs, vrgbmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1022 |
vsel = (vector unsigned char)vec_cmpeq((vector unsigned int)vsel, vckey); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1023 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1024 |
/* permute to source format */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1025 |
vs = vec_perm(vs, valpha, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1026 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1027 |
/* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1028 |
vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1029 |
vd_orig = vd = vec_perm(vd, v0, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1030 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1031 |
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1032 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1033 |
/* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1034 |
vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1035 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1036 |
/* mask out color key */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1037 |
vd = vec_sel(vd, vd_orig, vsel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1038 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1039 |
/* permute to dest format */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1040 |
vd = vec_perm(vd, vbits, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1041 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1042 |
/* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1043 |
vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1044 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1045 |
srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1046 |
dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1047 |
width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1048 |
vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1049 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1050 |
ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1051 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1052 |
#undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1053 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1054 |
srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1055 |
dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1056 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1057 |
} |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1058 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1059 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1060 |
static void Blit32to32PixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1061 |
{ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1062 |
int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1063 |
int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1064 |
Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1065 |
int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1066 |
Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1067 |
int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1068 |
SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1069 |
SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1070 |
vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1071 |
vector unsigned char valphaPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1072 |
vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1073 |
vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1074 |
vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1075 |
vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1076 |
vector unsigned char vpixelmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1077 |
vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1078 |
vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1079 |
vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1080 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1081 |
v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1082 |
v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1083 |
v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1084 |
mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1085 |
valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1086 |
valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1087 |
vpixelmask = vec_nor(valphamask, v0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1088 |
vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1089 |
vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1090 |
vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1091 |
|
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1092 |
while ( height-- ) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1093 |
width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1094 |
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1095 |
Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1096 |
unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1097 |
DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1098 |
if(sA) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1099 |
DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1100 |
ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1101 |
ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1102 |
} \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1103 |
++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1104 |
++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1105 |
widthvar--; \ |