symbian-qemu-0.9.1-12/libsdl-trunk/src/audio/SDL_mixer_MMX.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /*
       
     2     SDL - Simple DirectMedia Layer
       
     3     Copyright (C) 1997-2006 Sam Lantinga
       
     4 
       
     5     This library is free software; you can redistribute it and/or
       
     6     modify it under the terms of the GNU Lesser General Public
       
     7     License as published by the Free Software Foundation; either
       
     8     version 2.1 of the License, or (at your option) any later version.
       
     9 
       
    10     This library is distributed in the hope that it will be useful,
       
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    13     Lesser General Public License for more details.
       
    14 
       
    15     You should have received a copy of the GNU Lesser General Public
       
    16     License along with this library; if not, write to the Free Software
       
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
       
    18 
       
    19     Sam Lantinga
       
    20     slouken@libsdl.org
       
    21 */
       
    22 #include "SDL_config.h"
       
    23 
       
    24 /*
       
    25     MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
       
    26     Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr)
       
    27     This code is licensed under the LGPL (see COPYING for details)
       
    28  
       
    29     Assumes buffer size in bytes is a multiple of 16
       
    30     Assumes SDL_MIX_MAXVOLUME = 128
       
    31 */
       
    32 
       
    33 
       
    34 /***********************************************
       
    35 *   Mixing for 16 bit signed buffers
       
    36 ***********************************************/
       
    37 
       
    38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES)
       
    39 void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume)
       
    40 {
       
    41     __asm__ __volatile__ (
       
    42 
       
    43 "	movl %3,%%eax\n"	/* eax = volume */
       
    44 
       
    45 "	movl %2,%%edx\n"	/* edx = size */
       
    46 
       
    47 "	shrl $4,%%edx\n"	/* process 16 bytes per iteration = 8 samples */
       
    48 
       
    49 "	jz .endS16\n"
       
    50 
       
    51 "	pxor %%mm0,%%mm0\n"
       
    52 
       
    53 "	movd %%eax,%%mm0\n"
       
    54 "	movq %%mm0,%%mm1\n"
       
    55 "	psllq $16,%%mm0\n"
       
    56 "	por %%mm1,%%mm0\n"
       
    57 "	psllq $16,%%mm0\n"
       
    58 "	por %%mm1,%%mm0\n"
       
    59 "	psllq $16,%%mm0\n"
       
    60 "	por %%mm1,%%mm0\n"		/* mm0 = vol|vol|vol|vol */
       
    61 
       
    62 ".align 8\n"
       
    63 "	.mixloopS16:\n"
       
    64 
       
    65 "	movq (%1),%%mm1\n" /* mm1 = a|b|c|d */
       
    66 
       
    67 "	movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */
       
    68 
       
    69 "	movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */
       
    70 
       
    71 	/* pré charger le buffer dst dans mm7 */
       
    72 "	movq (%0),%%mm7\n" /* mm7 = dst[0] */
       
    73 
       
    74 	/* multiplier par le volume */
       
    75 "	pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
       
    76 
       
    77 "	pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */
       
    78 "	movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */
       
    79 
       
    80 "	pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
       
    81 
       
    82 "	pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */
       
    83 "	movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
       
    84 
       
    85 "	punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */
       
    86 
       
    87 "	movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
       
    88 "	punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */
       
    89 
       
    90 "	punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */
       
    91 
       
    92 "	punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */
       
    93 
       
    94 	/* pré charger le buffer dst dans mm5 */
       
    95 "	movq 8(%0),%%mm5\n" /* mm5 = dst[1] */
       
    96 
       
    97 	/* diviser par 128 */
       
    98 "	psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */
       
    99 "	add $16,%1\n"
       
   100 
       
   101 "	psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */
       
   102 
       
   103 "	psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */
       
   104 
       
   105 	/* mm1 = le sample avec le volume modifié */
       
   106 "	packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */
       
   107 
       
   108 "	psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */
       
   109 "	paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */
       
   110 
       
   111 	/* mm4 = le sample avec le volume modifié */
       
   112 "	packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */
       
   113 "	movq %%mm3,(%0)\n"
       
   114 
       
   115 "	paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */
       
   116 
       
   117 "	movq %%mm6,8(%0)\n"
       
   118 
       
   119 "	add $16,%0\n"
       
   120 
       
   121 "	dec %%edx\n"
       
   122 
       
   123 "	jnz .mixloopS16\n"
       
   124 
       
   125 "	emms\n"
       
   126 
       
   127 ".endS16:\n"
       
   128 	 :
       
   129 	 : "r" (dst), "r"(src),"m"(size),
       
   130 	 "m"(volume)
       
   131 	 : "eax","edx","memory"
       
   132 	 );
       
   133 }
       
   134 
       
   135 
       
   136 
       
   137 /*////////////////////////////////////////////// */
       
   138 /* Mixing for 8 bit signed buffers */
       
   139 /*////////////////////////////////////////////// */
       
   140 
       
   141 void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume)
       
   142 {
       
   143     __asm__ __volatile__ (
       
   144 
       
   145 "	movl %3,%%eax\n"	/* eax = volume */
       
   146 
       
   147 "	movd %%eax,%%mm0\n"
       
   148 "	movq %%mm0,%%mm1\n"
       
   149 "	psllq $16,%%mm0\n"
       
   150 "	por %%mm1,%%mm0\n"
       
   151 "	psllq $16,%%mm0\n"
       
   152 "	por %%mm1,%%mm0\n"
       
   153 "	psllq $16,%%mm0\n"
       
   154 "	por %%mm1,%%mm0\n"
       
   155 
       
   156 "	movl %2,%%edx\n"	/* edx = size */
       
   157 "	shr $3,%%edx\n"	/* process 8 bytes per iteration = 8 samples */
       
   158 
       
   159 "	cmp $0,%%edx\n"
       
   160 "	je .endS8\n"
       
   161 
       
   162 ".align 8\n"
       
   163 "	.mixloopS8:\n"
       
   164 
       
   165 "	pxor %%mm2,%%mm2\n"		/* mm2 = 0 */
       
   166 "	movq (%1),%%mm1\n"	/* mm1 = a|b|c|d|e|f|g|h */
       
   167 
       
   168 "	movq %%mm1,%%mm3\n" 	/* mm3 = a|b|c|d|e|f|g|h */
       
   169 
       
   170 	/* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */
       
   171 "	pcmpgtb %%mm1,%%mm2\n"	/* mm2 = 11111111|00000000|00000000.... */
       
   172 
       
   173 "	punpckhbw %%mm2,%%mm1\n"	/* mm1 = 0|a|0|b|0|c|0|d */
       
   174 
       
   175 "	punpcklbw %%mm2,%%mm3\n"	/* mm3 = 0|e|0|f|0|g|0|h */
       
   176 "	movq (%0),%%mm2\n"	/* mm2 = destination */
       
   177 
       
   178 "	pmullw %%mm0,%%mm1\n"	/* mm1 = v*a|v*b|v*c|v*d */
       
   179 "	add $8,%1\n"
       
   180 
       
   181 "	pmullw %%mm0,%%mm3\n"	/* mm3 = v*e|v*f|v*g|v*h */
       
   182 "	psraw $7,%%mm1\n"		/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128  */
       
   183 
       
   184 "	psraw $7,%%mm3\n"		/* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */
       
   185 
       
   186 "	packsswb %%mm1,%%mm3\n"	/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */
       
   187 
       
   188 "	paddsb %%mm2,%%mm3\n"	/* add to destination buffer */
       
   189 
       
   190 "	movq %%mm3,(%0)\n"	/* store back to ram */
       
   191 "	add $8,%0\n"
       
   192 
       
   193 "	dec %%edx\n"
       
   194 
       
   195 "	jnz .mixloopS8\n"
       
   196 
       
   197 ".endS8:\n"
       
   198 "	emms\n"
       
   199 	 :
       
   200 	 : "r" (dst), "r"(src),"m"(size),
       
   201 	 "m"(volume)
       
   202 	 : "eax","edx","memory"
       
   203 	 );
       
   204 }
       
   205 #endif
       
   206