src/hermes/mmxp2_32.asm
changeset 1230 88c2d6aed428
parent 1199 2d6dc7de1145
child 1697 393092a3ebf6
--- a/src/hermes/mmxp2_32.asm	Thu Jan 05 08:17:35 2006 +0000
+++ b/src/hermes/mmxp2_32.asm	Thu Jan 05 15:25:19 2006 +0000
@@ -29,31 +29,43 @@
 
 EXTERN _mmxreturn
  
-SECTION .data
-	
-ALIGN 8
+;; Macros for conversion routines
 
-;; Constants for conversion routines
+%macro _push_immq_mask 1
+	push dword %1
+	push dword %1
+%endmacro
 
-mmx32_rgb888_mask dd 00ffffffh,00ffffffh
+%macro load_immq 2
+	_push_immq_mask %2
+	movq %1, [esp]
+%endmacro
 
-mmx32_rgb565_b dd 000000f8h, 000000f8h
-mmx32_rgb565_g dd 0000fc00h, 0000fc00h
-mmx32_rgb565_r dd 00f80000h, 00f80000h
+%macro pand_immq 2
+	_push_immq_mask %2
+	pand %1, [esp]
+%endmacro
+
+%define CLEANUP_IMMQ_LOADS(num) \
+	add esp, byte 8 * num
 
-mmx32_rgb555_rb dd 00f800f8h,00f800f8h
-mmx32_rgb555_g dd 0000f800h,0000f800h
-mmx32_rgb555_mul dd 20000008h,20000008h
-mmx32_bgr555_mul dd 00082000h,00082000h
+%define mmx32_rgb888_mask 00ffffffh
+%define mmx32_rgb565_b 000000f8h
+%define mmx32_rgb565_g 0000fc00h
+%define mmx32_rgb565_r 00f80000h
 
+%define mmx32_rgb555_rb 00f800f8h
+%define mmx32_rgb555_g 0000f800h
+%define mmx32_rgb555_mul 20000008h
+%define mmx32_bgr555_mul 00082000h
 
-			
 SECTION .text
 
 _ConvertMMXpII32_24RGB888:
 
         ; set up mm6 as the mask, mm7 as zero
-        movq mm6, qword [mmx32_rgb888_mask]
+        load_immq mm6, mmx32_rgb888_mask
+        CLEANUP_IMMQ_LOADS(1)
         pxor mm7, mm7
 
         mov edx, ecx                    ; save ecx
@@ -115,9 +127,10 @@
 _ConvertMMXpII32_16RGB565:
 
         ; set up masks
-        movq mm5, [mmx32_rgb565_b]
-        movq mm6, [mmx32_rgb565_g]
-        movq mm7, [mmx32_rgb565_r]
+        load_immq mm5, mmx32_rgb565_b
+        load_immq mm6, mmx32_rgb565_g
+        load_immq mm7, mmx32_rgb565_r
+        CLEANUP_IMMQ_LOADS(3)
 
         mov edx, ecx
         shr ecx, 2
@@ -181,9 +194,10 @@
 	
 _ConvertMMXpII32_16BGR565:
 
-        movq mm5, [mmx32_rgb565_r]
-        movq mm6, [mmx32_rgb565_g]
-        movq mm7, [mmx32_rgb565_b]
+        load_immq mm5, mmx32_rgb565_r
+        load_immq mm6, mmx32_rgb565_g
+        load_immq mm7, mmx32_rgb565_b
+        CLEANUP_IMMQ_LOADS(3)
 
         mov edx, ecx
         shr ecx, 2
@@ -253,7 +267,7 @@
         ; except it uses a different multiplier for the pmaddwd
         ; instruction.  cool huh.
 
-        movq mm7, qword [mmx32_bgr555_mul]
+        load_immq mm7, mmx32_bgr555_mul
         jmp _convert_bgr555_cheat
 
 ; This is the same as the Intel version.. they obviously went to
@@ -263,9 +277,10 @@
 ; (I think) a more accurate name..
 _ConvertMMXpII32_16RGB555:
 
-        movq mm7,qword [mmx32_rgb555_mul]
+	load_immq mm7, mmx32_rgb555_mul
 _convert_bgr555_cheat:
-        movq mm6,qword [mmx32_rgb555_g]
+	load_immq mm6, mmx32_rgb555_g
+	CLEANUP_IMMQ_LOADS(2)
         
 	mov edx,ecx		           ; Save ecx 
 
@@ -280,12 +295,14 @@
 	movq mm0,[esi]
 	movq mm3,mm2
 
-	pand mm3,qword [mmx32_rgb555_rb]
+	pand_immq mm3, mmx32_rgb555_rb
 	movq mm1,mm0
 
-	pand mm1,qword [mmx32_rgb555_rb]
+	pand_immq mm1, mmx32_rgb555_rb
 	pmaddwd mm3,mm7
 
+	CLEANUP_IMMQ_LOADS(2)
+
 	pmaddwd mm1,mm7
 	pand mm2,mm6
 
@@ -302,13 +319,13 @@
 	movq mm0,mm4
 	psrld mm1,6
 
-	pand mm0,qword [mmx32_rgb555_rb]
+	pand_immq mm0, mmx32_rgb555_rb
 	packssdw mm1,mm3
 
 	movq mm3,mm5
 	pmaddwd mm0,mm7
 
-	pand mm3,qword [mmx32_rgb555_rb]
+	pand_immq mm3, mmx32_rgb555_rb
 	pand mm4,mm6
 
 	movq [edi],mm1			
@@ -329,12 +346,14 @@
 	movq mm3,mm2
 	movq mm1,mm0
 
-	pand mm3,qword [mmx32_rgb555_rb]
+	pand_immq mm3, mmx32_rgb555_rb
 	packssdw mm5,mm4
 
-	pand mm1,qword [mmx32_rgb555_rb]
+	pand_immq mm1, mmx32_rgb555_rb
 	pand mm2,mm6
 
+	CLEANUP_IMMQ_LOADS(4)
+
 	movq [edi+8],mm5
 	pmaddwd mm3,mm7