--- /dev/null
+typedef short int int16_t;
+typedef int int32_t;
+typedef long int int64_t;
+typedef enum pa_sample_format {
+ PA_SAMPLE_U8, PA_SAMPLE_ALAW, PA_SAMPLE_ULAW, PA_SAMPLE_S16LE,
+ PA_SAMPLE_S16BE, PA_SAMPLE_FLOAT32LE, PA_SAMPLE_FLOAT32BE,
+ PA_SAMPLE_S32LE, PA_SAMPLE_S32BE, PA_SAMPLE_S24LE, PA_SAMPLE_S24BE,
+ PA_SAMPLE_S24_32LE, PA_SAMPLE_S24_32BE, PA_SAMPLE_MAX,
+ PA_SAMPLE_INVALID = -1
+} pa_log_target_t;
+typedef enum pa_cpu_x86_flag {
+ PA_CPU_X86_MMX = (1 << 0), PA_CPU_X86_MMXEXT =
+ (1 << 1), PA_CPU_X86_SSE = (1 << 2), PA_CPU_X86_SSE2 =
+ (1 << 3), PA_CPU_X86_SSE3 = (1 << 4), PA_CPU_X86_SSSE3 =
+ (1 << 5), PA_CPU_X86_SSE4_1 = (1 << 6), PA_CPU_X86_SSE4_2 =
+ (1 << 7), PA_CPU_X86_3DNOW = (1 << 8), PA_CPU_X86_3DNOWEXT =
+ (1 << 9)
+} pa_cpu_x86_flag_t;
+typedef int64_t pa_reg_x86;
+typedef void (*pa_do_volume_func_t) (void *samples, void *volumes,
+ unsigned channels, unsigned length);
+
+void pa_volume_s16ne_mmx(short * samples, int * volumes,
+ unsigned channels, unsigned length)
+{
+ long int channel;
+ long int temp;
+ __asm__ __volatile__(" xor %3, %3 \n\t"
+ " sar $1, %2 \n\t"
+ " test $1, %2 \n\t"
+ " je 2f \n\t"
+ " movd (%1, %3, 4), %%mm0 \n\t"
+ " movw (%0), %w4 \n\t"
+ " movd %4, %%mm1 \n\t"
+ " pxor %%mm4, %%mm4 \n\t"
+ " punpcklwd %%mm4, " "%%mm1" " \n\t"
+ " pcmpgtw " "%%mm0" ", %%mm4 \n\t"
+ " pand " "%%mm1" ", %%mm4 \n\t"
+ " movq " "%%mm1" ", %%mm5 \n\t"
+ " pmulhw " "%%mm0" ", " "%%mm1" " \n\t"
+ " paddw %%mm4, " "%%mm1" " \n\t"
+ " psrld $16, " "%%mm0" " \n\t"
+ " pmaddwd %%mm5, " "%%mm0" " \n\t"
+ " paddd " "%%mm1" ", " "%%mm0" " \n\t"
+ " packssdw " "%%mm0" ", " "%%mm0" " \n\t"
+ " movd %%mm0, %4 \n\t"
+ " movw %w4, (%0) \n\t"
+ " add $2, %0 \n\t" " add " "$1"
+ ", %3 \n\t"
+ " mov %3, %4 \n\t" " sub " "%5"
+ ", %4 \n\t"
+ " cmovae %4, %3 \n\t"
+ "2: \n\t"
+ " sar $1, %2 \n\t"
+ " test $1, %2 \n\t"
+ " je 4f \n\t"
+ "3: \n\t"
+ " movq (%1, %3, 4), %%mm0 \n\t"
+ " movd (%0), %%mm1 \n\t"
+ " pxor %%mm4, %%mm4 \n\t"
+ " punpcklwd %%mm4, " "%%mm1" " \n\t"
+ " pcmpgtw " "%%mm0" ", %%mm4 \n\t"
+ " pand " "%%mm1" ", %%mm4 \n\t"
+ " movq " "%%mm1" ", %%mm5 \n\t"
+ " pmulhw " "%%mm0" ", " "%%mm1" " \n\t"
+ " paddw %%mm4, " "%%mm1" " \n\t"
+ " psrld $16, " "%%mm0" " \n\t"
+ " pmaddwd %%mm5, " "%%mm0" " \n\t"
+ " paddd " "%%mm1" ", " "%%mm0" " \n\t"
+ " packssdw " "%%mm0" ", " "%%mm0" " \n\t"
+ " movd %%mm0, (%0) \n\t"
+ " add $4, %0 \n\t" " add " "$2"
+ ", %3 \n\t"
+ " mov %3, %4 \n\t" " sub " "%5"
+ ", %4 \n\t"
+ " cmovae %4, %3 \n\t"
+ "4: \n\t"
+ " sar $1, %2 \n\t"
+ " cmp $0, %2 \n\t"
+ " je 6f \n\t"
+ "5: \n\t"
+ " movq (%1, %3, 4), %%mm0 \n\t"
+ " movq 8(%1, %3, 4), %%mm2 \n\t"
+ " movd (%0), %%mm1 \n\t"
+ " movd 4(%0), %%mm3 \n\t"
+ " pxor %%mm4, %%mm4 \n\t"
+ " punpcklwd %%mm4, " "%%mm1" " \n\t"
+ " pcmpgtw " "%%mm0" ", %%mm4 \n\t"
+ " pand " "%%mm1" ", %%mm4 \n\t"
+ " movq " "%%mm1" ", %%mm5 \n\t"
+ " pmulhw " "%%mm0" ", " "%%mm1" " \n\t"
+ " paddw %%mm4, " "%%mm1" " \n\t"
+ " psrld $16, " "%%mm0" " \n\t"
+ " pmaddwd %%mm5, " "%%mm0" " \n\t"
+ " paddd " "%%mm1" ", " "%%mm0" " \n\t"
+ " packssdw " "%%mm0" ", " "%%mm0" " \n\t"
+ " pxor %%mm4, %%mm4 \n\t"
+ " punpcklwd %%mm4, " "%%mm3" " \n\t"
+ " pcmpgtw " "%%mm2" ", %%mm4 \n\t"
+ " pand " "%%mm3" ", %%mm4 \n\t"
+ " movq " "%%mm3" ", %%mm5 \n\t"
+ " pmulhw " "%%mm2" ", " "%%mm3" " \n\t"
+ " paddw %%mm4, " "%%mm3" " \n\t"
+ " psrld $16, " "%%mm2" " \n\t"
+ " pmaddwd %%mm5, " "%%mm2" " \n\t"
+ " paddd " "%%mm3" ", " "%%mm2" " \n\t"
+ " packssdw " "%%mm2" ", " "%%mm2" " \n\t"
+ " movd %%mm0, (%0) \n\t"
+ " movd %%mm2, 4(%0) \n\t"
+ " add $8, %0 \n\t" " add " "$4"
+ ", %3 \n\t"
+ " mov %3, %4 \n\t" " sub " "%5"
+ ", %4 \n\t"
+ " cmovae %4, %3 \n\t"
+ " dec %2 \n\t"
+ " jne 5b \n\t"
+ "6: \n\t"
+ " emms \n\t":"+r"
+ (samples), "+r"(volumes), "+r"(length),
+ "=D"((pa_reg_x86) channel),
+ "=&r"(temp):"X"((pa_reg_x86) channels):"cc");
+}
+
+void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags)
+{
+ if (flags & PA_CPU_X86_MMX) {
+ pa_set_volume_func(PA_SAMPLE_S16LE,
+ (pa_do_volume_func_t) pa_volume_s16ne_mmx);
+ }
+}