src/audio/SDL_mixer_MMX.c
branchSDL-1.3
changeset 1662 782fd950bd46
parent 1612 97d0966f4bf7
child 1668 4da1ee79c9af
equal deleted inserted replaced
1661:281d3f4870e5 1662:782fd950bd46
    34 /***********************************************
    34 /***********************************************
    35 *   Mixing for 16 bit signed buffers
    35 *   Mixing for 16 bit signed buffers
    36 ***********************************************/
    36 ***********************************************/
    37 
    37 
    38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES)
    38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES)
    39 void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume)
    39 void
       
    40 SDL_MixAudio_MMX_S16 (char *dst, char *src, unsigned int size, int volume)
    40 {
    41 {
    41     __asm__ __volatile__ (
    42     __asm__ __volatile__ ("	movl %3,%%eax\n"        /* eax = volume */
    42 
    43                           "	movl %2,%%edx\n"        /* edx = size */
    43 "	movl %3,%%eax\n"	/* eax = volume */
    44                           "	shrl $4,%%edx\n"        /* process 16 bytes per iteration = 8 samples */
    44 
    45                           "	jz .endS16\n" "	pxor %%mm0,%%mm0\n" "	movd %%eax,%%mm0\n" "	movq %%mm0,%%mm1\n" "	psllq $16,%%mm0\n" "	por %%mm1,%%mm0\n" "	psllq $16,%%mm0\n" "	por %%mm1,%%mm0\n" "	psllq $16,%%mm0\n" "	por %%mm1,%%mm0\n"      /* mm0 = vol|vol|vol|vol */
    45 "	movl %2,%%edx\n"	/* edx = size */
    46                           ".align 8\n" "	.mixloopS16:\n" "	movq (%1),%%mm1\n"      /* mm1 = a|b|c|d */
    46 
    47                           "	movq %%mm1,%%mm2\n"     /* mm2 = a|b|c|d */
    47 "	shrl $4,%%edx\n"	/* process 16 bytes per iteration = 8 samples */
    48                           "	movq 8(%1),%%mm4\n"     /* mm4 = e|f|g|h */
    48 
    49                           /* pré charger le buffer dst dans mm7 */
    49 "	jz .endS16\n"
    50                           "	movq (%0),%%mm7\n"      /* mm7 = dst[0] */
    50 
    51                           /* multiplier par le volume */
    51 "	pxor %%mm0,%%mm0\n"
    52                           "	pmullw %%mm0,%%mm1\n"   /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
    52 
    53                           "	pmulhw %%mm0,%%mm2\n"   /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */
    53 "	movd %%eax,%%mm0\n"
    54                           "	movq %%mm4,%%mm5\n"     /* mm5 = e|f|g|h */
    54 "	movq %%mm0,%%mm1\n"
    55                           "	pmullw %%mm0,%%mm4\n"   /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
    55 "	psllq $16,%%mm0\n"
    56                           "	pmulhw %%mm0,%%mm5\n"   /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */
    56 "	por %%mm1,%%mm0\n"
    57                           "	movq %%mm1,%%mm3\n"     /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
    57 "	psllq $16,%%mm0\n"
    58                           "	punpckhwd %%mm2,%%mm1\n"        /* mm1 = a*v|b*v */
    58 "	por %%mm1,%%mm0\n"
    59                           "	movq %%mm4,%%mm6\n"     /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
    59 "	psllq $16,%%mm0\n"
    60                           "	punpcklwd %%mm2,%%mm3\n"        /* mm3 = c*v|d*v */
    60 "	por %%mm1,%%mm0\n"		/* mm0 = vol|vol|vol|vol */
    61                           "	punpckhwd %%mm5,%%mm4\n"        /* mm4 = e*f|f*v */
    61 
    62                           "	punpcklwd %%mm5,%%mm6\n"        /* mm6 = g*v|h*v */
    62 ".align 8\n"
    63                           /* pré charger le buffer dst dans mm5 */
    63 "	.mixloopS16:\n"
    64                           "	movq 8(%0),%%mm5\n"     /* mm5 = dst[1] */
    64 
    65                           /* diviser par 128 */
    65 "	movq (%1),%%mm1\n" /* mm1 = a|b|c|d */
    66                           "	psrad $7,%%mm1\n"       /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */
    66 
    67                           "	add $16,%1\n" "	psrad $7,%%mm3\n"       /* mm3 = c*v/128|d*v/128 */
    67 "	movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */
    68                           "	psrad $7,%%mm4\n"       /* mm4 = e*v/128|f*v/128 */
    68 
    69                           /* mm1 = le sample avec le volume modifié */
    69 "	movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */
    70                           "	packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */
    70 
    71                           "	psrad $7,%%mm6\n"       /* mm6= g*v/128|h*v/128 */
    71 	/* pré charger le buffer dst dans mm7 */
    72                           "	paddsw %%mm7,%%mm3\n"   /* mm3 = adjust_volume(src)+dst */
    72 "	movq (%0),%%mm7\n" /* mm7 = dst[0] */
    73                           /* mm4 = le sample avec le volume modifié */
    73 
    74                           "	packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */
    74 	/* multiplier par le volume */
    75                           "	movq %%mm3,(%0)\n" "	paddsw %%mm5,%%mm6\n"   /* mm6 = adjust_volume(src)+dst */
    75 "	pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
    76                           "	movq %%mm6,8(%0)\n"
    76 
    77                           "	add $16,%0\n"
    77 "	pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */
    78                           "	dec %%edx\n"
    78 "	movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */
    79                           "	jnz .mixloopS16\n"
    79 
    80                           "	emms\n"
    80 "	pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
    81                           ".endS16:\n"::"r" (dst), "r" (src),
    81 
    82                           "m" (size), "m" (volume):"eax", "edx", "memory");
    82 "	pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */
       
    83 "	movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
       
    84 
       
    85 "	punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */
       
    86 
       
    87 "	movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
       
    88 "	punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */
       
    89 
       
    90 "	punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */
       
    91 
       
    92 "	punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */
       
    93 
       
    94 	/* pré charger le buffer dst dans mm5 */
       
    95 "	movq 8(%0),%%mm5\n" /* mm5 = dst[1] */
       
    96 
       
    97 	/* diviser par 128 */
       
    98 "	psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */
       
    99 "	add $16,%1\n"
       
   100 
       
   101 "	psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */
       
   102 
       
   103 "	psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */
       
   104 
       
   105 	/* mm1 = le sample avec le volume modifié */
       
   106 "	packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */
       
   107 
       
   108 "	psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */
       
   109 "	paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */
       
   110 
       
   111 	/* mm4 = le sample avec le volume modifié */
       
   112 "	packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */
       
   113 "	movq %%mm3,(%0)\n"
       
   114 
       
   115 "	paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */
       
   116 
       
   117 "	movq %%mm6,8(%0)\n"
       
   118 
       
   119 "	add $16,%0\n"
       
   120 
       
   121 "	dec %%edx\n"
       
   122 
       
   123 "	jnz .mixloopS16\n"
       
   124 
       
   125 "	emms\n"
       
   126 
       
   127 ".endS16:\n"
       
   128 	 :
       
   129 	 : "r" (dst), "r"(src),"m"(size),
       
   130 	 "m"(volume)
       
   131 	 : "eax","edx","memory"
       
   132 	 );
       
   133 }
    83 }
   134 
    84 
   135 
    85 
   136 
    86 
   137 /*////////////////////////////////////////////// */
    87 /*////////////////////////////////////////////// */
   138 /* Mixing for 8 bit signed buffers */
    88 /* Mixing for 8 bit signed buffers */
   139 /*////////////////////////////////////////////// */
    89 /*////////////////////////////////////////////// */
   140 
    90 
   141 void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume)
    91 void
       
    92 SDL_MixAudio_MMX_S8 (char *dst, char *src, unsigned int size, int volume)
   142 {
    93 {
   143     __asm__ __volatile__ (
    94     __asm__ __volatile__ ("	movl %3,%%eax\n"        /* eax = volume */
   144 
    95                           "	movd %%eax,%%mm0\n" "	movq %%mm0,%%mm1\n" "	psllq $16,%%mm0\n" "	por %%mm1,%%mm0\n" "	psllq $16,%%mm0\n" "	por %%mm1,%%mm0\n" "	psllq $16,%%mm0\n" "	por %%mm1,%%mm0\n" "	movl %2,%%edx\n"        /* edx = size */
   145 "	movl %3,%%eax\n"	/* eax = volume */
    96                           "	shr $3,%%edx\n" /* process 8 bytes per iteration = 8 samples */
   146 
    97                           "	cmp $0,%%edx\n" "	je .endS8\n" ".align 8\n" "	.mixloopS8:\n" "	pxor %%mm2,%%mm2\n"     /* mm2 = 0 */
   147 "	movd %%eax,%%mm0\n"
    98                           "	movq (%1),%%mm1\n"      /* mm1 = a|b|c|d|e|f|g|h */
   148 "	movq %%mm0,%%mm1\n"
    99                           "	movq %%mm1,%%mm3\n"     /* mm3 = a|b|c|d|e|f|g|h */
   149 "	psllq $16,%%mm0\n"
   100                           /* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */
   150 "	por %%mm1,%%mm0\n"
   101                           "	pcmpgtb %%mm1,%%mm2\n"  /* mm2 = 11111111|00000000|00000000.... */
   151 "	psllq $16,%%mm0\n"
   102                           "	punpckhbw %%mm2,%%mm1\n"        /* mm1 = 0|a|0|b|0|c|0|d */
   152 "	por %%mm1,%%mm0\n"
   103                           "	punpcklbw %%mm2,%%mm3\n"        /* mm3 = 0|e|0|f|0|g|0|h */
   153 "	psllq $16,%%mm0\n"
   104                           "	movq (%0),%%mm2\n"      /* mm2 = destination */
   154 "	por %%mm1,%%mm0\n"
   105                           "	pmullw %%mm0,%%mm1\n"   /* mm1 = v*a|v*b|v*c|v*d */
   155 
   106                           "	add $8,%1\n" "	pmullw %%mm0,%%mm3\n"   /* mm3 = v*e|v*f|v*g|v*h */
   156 "	movl %2,%%edx\n"	/* edx = size */
   107                           "	psraw $7,%%mm1\n"       /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128  */
   157 "	shr $3,%%edx\n"	/* process 8 bytes per iteration = 8 samples */
   108                           "	psraw $7,%%mm3\n"       /* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */
   158 
   109                           "	packsswb %%mm1,%%mm3\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */
   159 "	cmp $0,%%edx\n"
   110                           "	paddsb %%mm2,%%mm3\n"   /* add to destination buffer */
   160 "	je .endS8\n"
   111                           "	movq %%mm3,(%0)\n"      /* store back to ram */
   161 
   112                           "	add $8,%0\n"
   162 ".align 8\n"
   113                           "	dec %%edx\n"
   163 "	.mixloopS8:\n"
   114                           "	jnz .mixloopS8\n"
   164 
   115                           ".endS8:\n"
   165 "	pxor %%mm2,%%mm2\n"		/* mm2 = 0 */
   116                           "	emms\n"::"r" (dst), "r" (src), "m" (size),
   166 "	movq (%1),%%mm1\n"	/* mm1 = a|b|c|d|e|f|g|h */
   117                           "m" (volume):"eax", "edx", "memory");
   167 
       
   168 "	movq %%mm1,%%mm3\n" 	/* mm3 = a|b|c|d|e|f|g|h */
       
   169 
       
   170 	/* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */
       
   171 "	pcmpgtb %%mm1,%%mm2\n"	/* mm2 = 11111111|00000000|00000000.... */
       
   172 
       
   173 "	punpckhbw %%mm2,%%mm1\n"	/* mm1 = 0|a|0|b|0|c|0|d */
       
   174 
       
   175 "	punpcklbw %%mm2,%%mm3\n"	/* mm3 = 0|e|0|f|0|g|0|h */
       
   176 "	movq (%0),%%mm2\n"	/* mm2 = destination */
       
   177 
       
   178 "	pmullw %%mm0,%%mm1\n"	/* mm1 = v*a|v*b|v*c|v*d */
       
   179 "	add $8,%1\n"
       
   180 
       
   181 "	pmullw %%mm0,%%mm3\n"	/* mm3 = v*e|v*f|v*g|v*h */
       
   182 "	psraw $7,%%mm1\n"		/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128  */
       
   183 
       
   184 "	psraw $7,%%mm3\n"		/* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */
       
   185 
       
   186 "	packsswb %%mm1,%%mm3\n"	/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */
       
   187 
       
   188 "	paddsb %%mm2,%%mm3\n"	/* add to destination buffer */
       
   189 
       
   190 "	movq %%mm3,(%0)\n"	/* store back to ram */
       
   191 "	add $8,%0\n"
       
   192 
       
   193 "	dec %%edx\n"
       
   194 
       
   195 "	jnz .mixloopS8\n"
       
   196 
       
   197 ".endS8:\n"
       
   198 "	emms\n"
       
   199 	 :
       
   200 	 : "r" (dst), "r"(src),"m"(size),
       
   201 	 "m"(volume)
       
   202 	 : "eax","edx","memory"
       
   203 	 );
       
   204 }
   118 }
   205 #endif
   119 #endif
   206 
   120 /* vi: set ts=4 sw=4 expandtab: */