--- a/src/hermes/mmxp2_32.asm Thu Jan 05 08:17:35 2006 +0000
+++ b/src/hermes/mmxp2_32.asm Thu Jan 05 15:25:19 2006 +0000
@@ -29,31 +29,43 @@
EXTERN _mmxreturn
-SECTION .data
-
-ALIGN 8
+;; Macros for conversion routines
-;; Constants for conversion routines
+%macro _push_immq_mask 1
+ push dword %1
+ push dword %1
+%endmacro
-mmx32_rgb888_mask dd 00ffffffh,00ffffffh
+%macro load_immq 2
+ _push_immq_mask %2
+ movq %1, [esp]
+%endmacro
-mmx32_rgb565_b dd 000000f8h, 000000f8h
-mmx32_rgb565_g dd 0000fc00h, 0000fc00h
-mmx32_rgb565_r dd 00f80000h, 00f80000h
+%macro pand_immq 2
+ _push_immq_mask %2
+ pand %1, [esp]
+%endmacro
+
+%define CLEANUP_IMMQ_LOADS(num) \
+ add esp, byte 8 * num
-mmx32_rgb555_rb dd 00f800f8h,00f800f8h
-mmx32_rgb555_g dd 0000f800h,0000f800h
-mmx32_rgb555_mul dd 20000008h,20000008h
-mmx32_bgr555_mul dd 00082000h,00082000h
+%define mmx32_rgb888_mask 00ffffffh
+%define mmx32_rgb565_b 000000f8h
+%define mmx32_rgb565_g 0000fc00h
+%define mmx32_rgb565_r 00f80000h
+%define mmx32_rgb555_rb 00f800f8h
+%define mmx32_rgb555_g 0000f800h
+%define mmx32_rgb555_mul 20000008h
+%define mmx32_bgr555_mul 00082000h
-
SECTION .text
_ConvertMMXpII32_24RGB888:
; set up mm6 as the mask, mm7 as zero
- movq mm6, qword [mmx32_rgb888_mask]
+ load_immq mm6, mmx32_rgb888_mask
+ CLEANUP_IMMQ_LOADS(1)
pxor mm7, mm7
mov edx, ecx ; save ecx
@@ -115,9 +127,10 @@
_ConvertMMXpII32_16RGB565:
; set up masks
- movq mm5, [mmx32_rgb565_b]
- movq mm6, [mmx32_rgb565_g]
- movq mm7, [mmx32_rgb565_r]
+ load_immq mm5, mmx32_rgb565_b
+ load_immq mm6, mmx32_rgb565_g
+ load_immq mm7, mmx32_rgb565_r
+ CLEANUP_IMMQ_LOADS(3)
mov edx, ecx
shr ecx, 2
@@ -181,9 +194,10 @@
_ConvertMMXpII32_16BGR565:
- movq mm5, [mmx32_rgb565_r]
- movq mm6, [mmx32_rgb565_g]
- movq mm7, [mmx32_rgb565_b]
+ load_immq mm5, mmx32_rgb565_r
+ load_immq mm6, mmx32_rgb565_g
+ load_immq mm7, mmx32_rgb565_b
+ CLEANUP_IMMQ_LOADS(3)
mov edx, ecx
shr ecx, 2
@@ -253,7 +267,7 @@
; except it uses a different multiplier for the pmaddwd
; instruction. cool huh.
- movq mm7, qword [mmx32_bgr555_mul]
+ load_immq mm7, mmx32_bgr555_mul
jmp _convert_bgr555_cheat
; This is the same as the Intel version.. they obviously went to
@@ -263,9 +277,10 @@
; (I think) a more accurate name..
_ConvertMMXpII32_16RGB555:
- movq mm7,qword [mmx32_rgb555_mul]
+ load_immq mm7, mmx32_rgb555_mul
_convert_bgr555_cheat:
- movq mm6,qword [mmx32_rgb555_g]
+ load_immq mm6, mmx32_rgb555_g
+ CLEANUP_IMMQ_LOADS(2)
mov edx,ecx ; Save ecx
@@ -280,12 +295,14 @@
movq mm0,[esi]
movq mm3,mm2
- pand mm3,qword [mmx32_rgb555_rb]
+ pand_immq mm3, mmx32_rgb555_rb
movq mm1,mm0
- pand mm1,qword [mmx32_rgb555_rb]
+ pand_immq mm1, mmx32_rgb555_rb
pmaddwd mm3,mm7
+ CLEANUP_IMMQ_LOADS(2)
+
pmaddwd mm1,mm7
pand mm2,mm6
@@ -302,13 +319,13 @@
movq mm0,mm4
psrld mm1,6
- pand mm0,qword [mmx32_rgb555_rb]
+ pand_immq mm0, mmx32_rgb555_rb
packssdw mm1,mm3
movq mm3,mm5
pmaddwd mm0,mm7
- pand mm3,qword [mmx32_rgb555_rb]
+ pand_immq mm3, mmx32_rgb555_rb
pand mm4,mm6
movq [edi],mm1
@@ -329,12 +346,14 @@
movq mm3,mm2
movq mm1,mm0
- pand mm3,qword [mmx32_rgb555_rb]
+ pand_immq mm3, mmx32_rgb555_rb
packssdw mm5,mm4
- pand mm1,qword [mmx32_rgb555_rb]
+ pand_immq mm1, mmx32_rgb555_rb
pand mm2,mm6
+ CLEANUP_IMMQ_LOADS(4)
+
movq [edi+8],mm5
pmaddwd mm3,mm7