more ices
[ICEs.git] / 287391 / svolume_mmx.i.2
1 typedef short int int16_t;
2 typedef int int32_t;
3 typedef long int int64_t;
4 typedef enum pa_sample_format {
5 PA_SAMPLE_U8, PA_SAMPLE_ALAW, PA_SAMPLE_ULAW, PA_SAMPLE_S16LE,
6 PA_SAMPLE_S16BE, PA_SAMPLE_FLOAT32LE, PA_SAMPLE_FLOAT32BE,
7 PA_SAMPLE_S32LE, PA_SAMPLE_S32BE, PA_SAMPLE_S24LE, PA_SAMPLE_S24BE,
8 PA_SAMPLE_S24_32LE, PA_SAMPLE_S24_32BE, PA_SAMPLE_MAX,
9 PA_SAMPLE_INVALID = -1
10 } pa_log_target_t;
11 typedef enum pa_cpu_x86_flag {
12 PA_CPU_X86_MMX = (1 << 0), PA_CPU_X86_MMXEXT =
13 (1 << 1), PA_CPU_X86_SSE = (1 << 2), PA_CPU_X86_SSE2 =
14 (1 << 3), PA_CPU_X86_SSE3 = (1 << 4), PA_CPU_X86_SSSE3 =
15 (1 << 5), PA_CPU_X86_SSE4_1 = (1 << 6), PA_CPU_X86_SSE4_2 =
16 (1 << 7), PA_CPU_X86_3DNOW = (1 << 8), PA_CPU_X86_3DNOWEXT =
17 (1 << 9)
18 } pa_cpu_x86_flag_t;
19 typedef int64_t pa_reg_x86;
20 typedef void (*pa_do_volume_func_t) (void *samples, void *volumes,
21 unsigned channels, unsigned length);
22
23 void pa_volume_s16ne_mmx(short * samples, int * volumes,
24 unsigned channels, unsigned length)
25 {
26 long int channel;
27 long int temp;
28 __asm__ __volatile__(" xor %3, %3 \n\t"
29 " sar $1, %2 \n\t"
30 " test $1, %2 \n\t"
31 " je 2f \n\t"
32 " movd (%1, %3, 4), %%mm0 \n\t"
33 " movw (%0), %w4 \n\t"
34 " movd %4, %%mm1 \n\t"
35 " pxor %%mm4, %%mm4 \n\t"
36 " punpcklwd %%mm4, " "%%mm1" " \n\t"
37 " pcmpgtw " "%%mm0" ", %%mm4 \n\t"
38 " pand " "%%mm1" ", %%mm4 \n\t"
39 " movq " "%%mm1" ", %%mm5 \n\t"
40 " pmulhw " "%%mm0" ", " "%%mm1" " \n\t"
41 " paddw %%mm4, " "%%mm1" " \n\t"
42 " psrld $16, " "%%mm0" " \n\t"
43 " pmaddwd %%mm5, " "%%mm0" " \n\t"
44 " paddd " "%%mm1" ", " "%%mm0" " \n\t"
45 " packssdw " "%%mm0" ", " "%%mm0" " \n\t"
46 " movd %%mm0, %4 \n\t"
47 " movw %w4, (%0) \n\t"
48 " add $2, %0 \n\t" " add " "$1"
49 ", %3 \n\t"
50 " mov %3, %4 \n\t" " sub " "%5"
51 ", %4 \n\t"
52 " cmovae %4, %3 \n\t"
53 "2: \n\t"
54 " sar $1, %2 \n\t"
55 " test $1, %2 \n\t"
56 " je 4f \n\t"
57 "3: \n\t"
58 " movq (%1, %3, 4), %%mm0 \n\t"
59 " movd (%0), %%mm1 \n\t"
60 " pxor %%mm4, %%mm4 \n\t"
61 " punpcklwd %%mm4, " "%%mm1" " \n\t"
62 " pcmpgtw " "%%mm0" ", %%mm4 \n\t"
63 " pand " "%%mm1" ", %%mm4 \n\t"
64 " movq " "%%mm1" ", %%mm5 \n\t"
65 " pmulhw " "%%mm0" ", " "%%mm1" " \n\t"
66 " paddw %%mm4, " "%%mm1" " \n\t"
67 " psrld $16, " "%%mm0" " \n\t"
68 " pmaddwd %%mm5, " "%%mm0" " \n\t"
69 " paddd " "%%mm1" ", " "%%mm0" " \n\t"
70 " packssdw " "%%mm0" ", " "%%mm0" " \n\t"
71 " movd %%mm0, (%0) \n\t"
72 " add $4, %0 \n\t" " add " "$2"
73 ", %3 \n\t"
74 " mov %3, %4 \n\t" " sub " "%5"
75 ", %4 \n\t"
76 " cmovae %4, %3 \n\t"
77 "4: \n\t"
78 " sar $1, %2 \n\t"
79 " cmp $0, %2 \n\t"
80 " je 6f \n\t"
81 "5: \n\t"
82 " movq (%1, %3, 4), %%mm0 \n\t"
83 " movq 8(%1, %3, 4), %%mm2 \n\t"
84 " movd (%0), %%mm1 \n\t"
85 " movd 4(%0), %%mm3 \n\t"
86 " pxor %%mm4, %%mm4 \n\t"
87 " punpcklwd %%mm4, " "%%mm1" " \n\t"
88 " pcmpgtw " "%%mm0" ", %%mm4 \n\t"
89 " pand " "%%mm1" ", %%mm4 \n\t"
90 " movq " "%%mm1" ", %%mm5 \n\t"
91 " pmulhw " "%%mm0" ", " "%%mm1" " \n\t"
92 " paddw %%mm4, " "%%mm1" " \n\t"
93 " psrld $16, " "%%mm0" " \n\t"
94 " pmaddwd %%mm5, " "%%mm0" " \n\t"
95 " paddd " "%%mm1" ", " "%%mm0" " \n\t"
96 " packssdw " "%%mm0" ", " "%%mm0" " \n\t"
97 " pxor %%mm4, %%mm4 \n\t"
98 " punpcklwd %%mm4, " "%%mm3" " \n\t"
99 " pcmpgtw " "%%mm2" ", %%mm4 \n\t"
100 " pand " "%%mm3" ", %%mm4 \n\t"
101 " movq " "%%mm3" ", %%mm5 \n\t"
102 " pmulhw " "%%mm2" ", " "%%mm3" " \n\t"
103 " paddw %%mm4, " "%%mm3" " \n\t"
104 " psrld $16, " "%%mm2" " \n\t"
105 " pmaddwd %%mm5, " "%%mm2" " \n\t"
106 " paddd " "%%mm3" ", " "%%mm2" " \n\t"
107 " packssdw " "%%mm2" ", " "%%mm2" " \n\t"
108 " movd %%mm0, (%0) \n\t"
109 " movd %%mm2, 4(%0) \n\t"
110 " add $8, %0 \n\t" " add " "$4"
111 ", %3 \n\t"
112 " mov %3, %4 \n\t" " sub " "%5"
113 ", %4 \n\t"
114 " cmovae %4, %3 \n\t"
115 " dec %2 \n\t"
116 " jne 5b \n\t"
117 "6: \n\t"
118 " emms \n\t":"+r"
119 (samples), "+r"(volumes), "+r"(length),
120 "=D"((pa_reg_x86) channel),
121 "=&r"(temp):"X"((pa_reg_x86) channels):"cc");
122 }
123
124 void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags)
125 {
126 if (flags & PA_CPU_X86_MMX) {
127 pa_set_volume_func(PA_SAMPLE_S16LE,
128 (pa_do_volume_func_t) pa_volume_s16ne_mmx);
129 }
130 }