]>
Commit | Line | Data |
---|---|---|
81f80d8c MF |
1 | typedef short int int16_t; |
2 | typedef int int32_t; | |
3 | typedef long int int64_t; | |
4 | typedef enum pa_sample_format { | |
5 | PA_SAMPLE_U8, PA_SAMPLE_ALAW, PA_SAMPLE_ULAW, PA_SAMPLE_S16LE, | |
6 | PA_SAMPLE_S16BE, PA_SAMPLE_FLOAT32LE, PA_SAMPLE_FLOAT32BE, | |
7 | PA_SAMPLE_S32LE, PA_SAMPLE_S32BE, PA_SAMPLE_S24LE, PA_SAMPLE_S24BE, | |
8 | PA_SAMPLE_S24_32LE, PA_SAMPLE_S24_32BE, PA_SAMPLE_MAX, | |
9 | PA_SAMPLE_INVALID = -1 | |
10 | } pa_log_target_t; | |
11 | typedef enum pa_cpu_x86_flag { | |
12 | PA_CPU_X86_MMX = (1 << 0), PA_CPU_X86_MMXEXT = | |
13 | (1 << 1), PA_CPU_X86_SSE = (1 << 2), PA_CPU_X86_SSE2 = | |
14 | (1 << 3), PA_CPU_X86_SSE3 = (1 << 4), PA_CPU_X86_SSSE3 = | |
15 | (1 << 5), PA_CPU_X86_SSE4_1 = (1 << 6), PA_CPU_X86_SSE4_2 = | |
16 | (1 << 7), PA_CPU_X86_3DNOW = (1 << 8), PA_CPU_X86_3DNOWEXT = | |
17 | (1 << 9) | |
18 | } pa_cpu_x86_flag_t; | |
19 | typedef int64_t pa_reg_x86; | |
20 | typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, | |
21 | unsigned channels, unsigned length); | |
22 | ||
23 | void pa_volume_s16ne_mmx(short * samples, int * volumes, | |
24 | unsigned channels, unsigned length) | |
25 | { | |
26 | long int channel; | |
27 | long int temp; | |
28 | __asm__ __volatile__(" xor %3, %3 \n\t" | |
29 | " sar $1, %2 \n\t" | |
30 | " test $1, %2 \n\t" | |
31 | " je 2f \n\t" | |
32 | " movd (%1, %3, 4), %%mm0 \n\t" | |
33 | " movw (%0), %w4 \n\t" | |
34 | " movd %4, %%mm1 \n\t" | |
35 | " pxor %%mm4, %%mm4 \n\t" | |
36 | " punpcklwd %%mm4, " "%%mm1" " \n\t" | |
37 | " pcmpgtw " "%%mm0" ", %%mm4 \n\t" | |
38 | " pand " "%%mm1" ", %%mm4 \n\t" | |
39 | " movq " "%%mm1" ", %%mm5 \n\t" | |
40 | " pmulhw " "%%mm0" ", " "%%mm1" " \n\t" | |
41 | " paddw %%mm4, " "%%mm1" " \n\t" | |
42 | " psrld $16, " "%%mm0" " \n\t" | |
43 | " pmaddwd %%mm5, " "%%mm0" " \n\t" | |
44 | " paddd " "%%mm1" ", " "%%mm0" " \n\t" | |
45 | " packssdw " "%%mm0" ", " "%%mm0" " \n\t" | |
46 | " movd %%mm0, %4 \n\t" | |
47 | " movw %w4, (%0) \n\t" | |
48 | " add $2, %0 \n\t" " add " "$1" | |
49 | ", %3 \n\t" | |
50 | " mov %3, %4 \n\t" " sub " "%5" | |
51 | ", %4 \n\t" | |
52 | " cmovae %4, %3 \n\t" | |
53 | "2: \n\t" | |
54 | " sar $1, %2 \n\t" | |
55 | " test $1, %2 \n\t" | |
56 | " je 4f \n\t" | |
57 | "3: \n\t" | |
58 | " movq (%1, %3, 4), %%mm0 \n\t" | |
59 | " movd (%0), %%mm1 \n\t" | |
60 | " pxor %%mm4, %%mm4 \n\t" | |
61 | " punpcklwd %%mm4, " "%%mm1" " \n\t" | |
62 | " pcmpgtw " "%%mm0" ", %%mm4 \n\t" | |
63 | " pand " "%%mm1" ", %%mm4 \n\t" | |
64 | " movq " "%%mm1" ", %%mm5 \n\t" | |
65 | " pmulhw " "%%mm0" ", " "%%mm1" " \n\t" | |
66 | " paddw %%mm4, " "%%mm1" " \n\t" | |
67 | " psrld $16, " "%%mm0" " \n\t" | |
68 | " pmaddwd %%mm5, " "%%mm0" " \n\t" | |
69 | " paddd " "%%mm1" ", " "%%mm0" " \n\t" | |
70 | " packssdw " "%%mm0" ", " "%%mm0" " \n\t" | |
71 | " movd %%mm0, (%0) \n\t" | |
72 | " add $4, %0 \n\t" " add " "$2" | |
73 | ", %3 \n\t" | |
74 | " mov %3, %4 \n\t" " sub " "%5" | |
75 | ", %4 \n\t" | |
76 | " cmovae %4, %3 \n\t" | |
77 | "4: \n\t" | |
78 | " sar $1, %2 \n\t" | |
79 | " cmp $0, %2 \n\t" | |
80 | " je 6f \n\t" | |
81 | "5: \n\t" | |
82 | " movq (%1, %3, 4), %%mm0 \n\t" | |
83 | " movq 8(%1, %3, 4), %%mm2 \n\t" | |
84 | " movd (%0), %%mm1 \n\t" | |
85 | " movd 4(%0), %%mm3 \n\t" | |
86 | " pxor %%mm4, %%mm4 \n\t" | |
87 | " punpcklwd %%mm4, " "%%mm1" " \n\t" | |
88 | " pcmpgtw " "%%mm0" ", %%mm4 \n\t" | |
89 | " pand " "%%mm1" ", %%mm4 \n\t" | |
90 | " movq " "%%mm1" ", %%mm5 \n\t" | |
91 | " pmulhw " "%%mm0" ", " "%%mm1" " \n\t" | |
92 | " paddw %%mm4, " "%%mm1" " \n\t" | |
93 | " psrld $16, " "%%mm0" " \n\t" | |
94 | " pmaddwd %%mm5, " "%%mm0" " \n\t" | |
95 | " paddd " "%%mm1" ", " "%%mm0" " \n\t" | |
96 | " packssdw " "%%mm0" ", " "%%mm0" " \n\t" | |
97 | " pxor %%mm4, %%mm4 \n\t" | |
98 | " punpcklwd %%mm4, " "%%mm3" " \n\t" | |
99 | " pcmpgtw " "%%mm2" ", %%mm4 \n\t" | |
100 | " pand " "%%mm3" ", %%mm4 \n\t" | |
101 | " movq " "%%mm3" ", %%mm5 \n\t" | |
102 | " pmulhw " "%%mm2" ", " "%%mm3" " \n\t" | |
103 | " paddw %%mm4, " "%%mm3" " \n\t" | |
104 | " psrld $16, " "%%mm2" " \n\t" | |
105 | " pmaddwd %%mm5, " "%%mm2" " \n\t" | |
106 | " paddd " "%%mm3" ", " "%%mm2" " \n\t" | |
107 | " packssdw " "%%mm2" ", " "%%mm2" " \n\t" | |
108 | " movd %%mm0, (%0) \n\t" | |
109 | " movd %%mm2, 4(%0) \n\t" | |
110 | " add $8, %0 \n\t" " add " "$4" | |
111 | ", %3 \n\t" | |
112 | " mov %3, %4 \n\t" " sub " "%5" | |
113 | ", %4 \n\t" | |
114 | " cmovae %4, %3 \n\t" | |
115 | " dec %2 \n\t" | |
116 | " jne 5b \n\t" | |
117 | "6: \n\t" | |
118 | " emms \n\t":"+r" | |
119 | (samples), "+r"(volumes), "+r"(length), | |
120 | "=D"((pa_reg_x86) channel), | |
121 | "=&r"(temp):"X"((pa_reg_x86) channels):"cc"); | |
122 | } | |
123 | ||
124 | void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags) | |
125 | { | |
126 | if (flags & PA_CPU_X86_MMX) { | |
127 | pa_set_volume_func(PA_SAMPLE_S16LE, | |
128 | (pa_do_volume_func_t) pa_volume_s16ne_mmx); | |
129 | } | |
130 | } |