|
1 /* |
|
2 SDL - Simple DirectMedia Layer |
|
3 Copyright (C) 1997-2006 Sam Lantinga |
|
4 |
|
5 This library is free software; you can redistribute it and/or |
|
6 modify it under the terms of the GNU Lesser General Public |
|
7 License as published by the Free Software Foundation; either |
|
8 version 2.1 of the License, or (at your option) any later version. |
|
9 |
|
10 This library is distributed in the hope that it will be useful, |
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 Lesser General Public License for more details. |
|
14 |
|
15 You should have received a copy of the GNU Lesser General Public |
|
16 License along with this library; if not, write to the Free Software |
|
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
|
19 Sam Lantinga |
|
20 slouken@libsdl.org |
|
21 */ |
|
22 #include "SDL_config.h" |
|
23 |
|
24 /* |
|
25 MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples |
|
26 Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr) |
|
27 This code is licensed under the LGPL (see COPYING for details) |
|
28 |
|
29 Assumes buffer size in bytes is a multiple of 16 |
|
30 Assumes SDL_MIX_MAXVOLUME = 128 |
|
31 */ |
|
32 |
|
33 |
|
34 /*********************************************** |
|
35 * Mixing for 16 bit signed buffers |
|
36 ***********************************************/ |
|
37 |
|
38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES) |
|
39 void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume) |
|
40 { |
|
41 __asm__ __volatile__ ( |
|
42 |
|
43 " movl %3,%%eax\n" /* eax = volume */ |
|
44 |
|
45 " movl %2,%%edx\n" /* edx = size */ |
|
46 |
|
47 " shrl $4,%%edx\n" /* process 16 bytes per iteration = 8 samples */ |
|
48 |
|
49 " jz .endS16\n" |
|
50 |
|
51 " pxor %%mm0,%%mm0\n" |
|
52 |
|
53 " movd %%eax,%%mm0\n" |
|
54 " movq %%mm0,%%mm1\n" |
|
55 " psllq $16,%%mm0\n" |
|
56 " por %%mm1,%%mm0\n" |
|
57 " psllq $16,%%mm0\n" |
|
58 " por %%mm1,%%mm0\n" |
|
59 " psllq $16,%%mm0\n" |
|
60 " por %%mm1,%%mm0\n" /* mm0 = vol|vol|vol|vol */ |
|
61 |
|
62 ".align 8\n" |
|
63 " .mixloopS16:\n" |
|
64 |
|
65 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d */ |
|
66 |
|
67 " movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */ |
|
68 |
|
69 " movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */ |
|
70 |
|
71 /* pré charger le buffer dst dans mm7 */ |
|
72 " movq (%0),%%mm7\n" /* mm7 = dst[0] */ |
|
73 |
|
74 /* multiplier par le volume */ |
|
75 " pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ |
|
76 |
|
77 " pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */ |
|
78 " movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */ |
|
79 |
|
80 " pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ |
|
81 |
|
82 " pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */ |
|
83 " movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ |
|
84 |
|
85 " punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */ |
|
86 |
|
87 " movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ |
|
88 " punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */ |
|
89 |
|
90 " punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */ |
|
91 |
|
92 " punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */ |
|
93 |
|
94 /* pré charger le buffer dst dans mm5 */ |
|
95 " movq 8(%0),%%mm5\n" /* mm5 = dst[1] */ |
|
96 |
|
97 /* diviser par 128 */ |
|
98 " psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */ |
|
99 " add $16,%1\n" |
|
100 |
|
101 " psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */ |
|
102 |
|
103 " psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */ |
|
104 |
|
105 /* mm1 = le sample avec le volume modifié */ |
|
106 " packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */ |
|
107 |
|
108 " psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */ |
|
109 " paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */ |
|
110 |
|
111 /* mm4 = le sample avec le volume modifié */ |
|
112 " packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */ |
|
113 " movq %%mm3,(%0)\n" |
|
114 |
|
115 " paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */ |
|
116 |
|
117 " movq %%mm6,8(%0)\n" |
|
118 |
|
119 " add $16,%0\n" |
|
120 |
|
121 " dec %%edx\n" |
|
122 |
|
123 " jnz .mixloopS16\n" |
|
124 |
|
125 " emms\n" |
|
126 |
|
127 ".endS16:\n" |
|
128 : |
|
129 : "r" (dst), "r"(src),"m"(size), |
|
130 "m"(volume) |
|
131 : "eax","edx","memory" |
|
132 ); |
|
133 } |
|
134 |
|
135 |
|
136 |
|
137 /*////////////////////////////////////////////// */ |
|
138 /* Mixing for 8 bit signed buffers */ |
|
139 /*////////////////////////////////////////////// */ |
|
140 |
|
141 void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume) |
|
142 { |
|
143 __asm__ __volatile__ ( |
|
144 |
|
145 " movl %3,%%eax\n" /* eax = volume */ |
|
146 |
|
147 " movd %%eax,%%mm0\n" |
|
148 " movq %%mm0,%%mm1\n" |
|
149 " psllq $16,%%mm0\n" |
|
150 " por %%mm1,%%mm0\n" |
|
151 " psllq $16,%%mm0\n" |
|
152 " por %%mm1,%%mm0\n" |
|
153 " psllq $16,%%mm0\n" |
|
154 " por %%mm1,%%mm0\n" |
|
155 |
|
156 " movl %2,%%edx\n" /* edx = size */ |
|
157 " shr $3,%%edx\n" /* process 8 bytes per iteration = 8 samples */ |
|
158 |
|
159 " cmp $0,%%edx\n" |
|
160 " je .endS8\n" |
|
161 |
|
162 ".align 8\n" |
|
163 " .mixloopS8:\n" |
|
164 |
|
165 " pxor %%mm2,%%mm2\n" /* mm2 = 0 */ |
|
166 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d|e|f|g|h */ |
|
167 |
|
168 " movq %%mm1,%%mm3\n" /* mm3 = a|b|c|d|e|f|g|h */ |
|
169 |
|
170 /* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */ |
|
171 " pcmpgtb %%mm1,%%mm2\n" /* mm2 = 11111111|00000000|00000000.... */ |
|
172 |
|
173 " punpckhbw %%mm2,%%mm1\n" /* mm1 = 0|a|0|b|0|c|0|d */ |
|
174 |
|
175 " punpcklbw %%mm2,%%mm3\n" /* mm3 = 0|e|0|f|0|g|0|h */ |
|
176 " movq (%0),%%mm2\n" /* mm2 = destination */ |
|
177 |
|
178 " pmullw %%mm0,%%mm1\n" /* mm1 = v*a|v*b|v*c|v*d */ |
|
179 " add $8,%1\n" |
|
180 |
|
181 " pmullw %%mm0,%%mm3\n" /* mm3 = v*e|v*f|v*g|v*h */ |
|
182 " psraw $7,%%mm1\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128 */ |
|
183 |
|
184 " psraw $7,%%mm3\n" /* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */ |
|
185 |
|
186 " packsswb %%mm1,%%mm3\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */ |
|
187 |
|
188 " paddsb %%mm2,%%mm3\n" /* add to destination buffer */ |
|
189 |
|
190 " movq %%mm3,(%0)\n" /* store back to ram */ |
|
191 " add $8,%0\n" |
|
192 |
|
193 " dec %%edx\n" |
|
194 |
|
195 " jnz .mixloopS8\n" |
|
196 |
|
197 ".endS8:\n" |
|
198 " emms\n" |
|
199 : |
|
200 : "r" (dst), "r"(src),"m"(size), |
|
201 "m"(volume) |
|
202 : "eax","edx","memory" |
|
203 ); |
|
204 } |
|
205 #endif |
|
206 |