--- /dev/null
+f(short *samples, int *volumes, unsigned channels,
+ unsigned length)
+{
+ long channel, temp;
+ asm volatile(" xor %3, %3\n"
+ "2:\n"
+ " sar $1, %2\n"
+ " test $1, %2\n"
+ " je 4f\n"
+ "3:\n"
+ " movq (%1, %3, 4), %%mm0\n"
+ " movd (%0), %%mm1\n"
+ " pxor %%mm4, %%mm4\n"
+ " punpcklwd %%mm4, %%mm1\n"
+ " pcmpgtw %%mm0, %%mm4\n"
+ " pand %%mm1, %%mm4\n"
+ " movq %%mm1, %%mm5\n"
+ " pmulhw %%mm0, %%mm1\n"
+ " paddw %%mm4, %%mm1\n"
+ " psrld $16, %%mm0\n"
+ " pmaddwd %%mm5, %%mm0\n"
+ " paddd %%mm1, %%mm0\n"
+ " packssdw %%mm0, %%mm0\n"
+ " movd %%mm0, (%0)\n"
+ " add $4, %0\n"
+ " add $2, %3\n"
+ " mov %3, %4\n"
+ " sub %5, %4\n"
+ " cmovae %4, %3\n"
+ "4:\n"
+ " sar $1, %2\n"
+ " cmp $0, %2\n"
+ " je 6f\n"
+ "5:\n"
+ " movq (%1, %3, 4), %%mm0\n"
+ " movq 8(%1, %3, 4), %%mm2\n"
+ " movd (%0), %%mm1\n"
+ " movd 4(%0), %%mm3\n"
+ " pxor %%mm4, %%mm4\n"
+ " punpcklwd %%mm4, %%mm1\n"
+ " pcmpgtw %%mm0, %%mm4\n"
+ " pand %%mm1, %%mm4\n"
+ " movq %%mm1, %%mm5\n"
+ " pmulhw %%mm0, %%mm1\n"
+ " paddw %%mm4, %%mm1\n"
+ " psrld $16, %%mm0\n"
+ " pmaddwd %%mm5, %%mm0\n"
+ " paddd %%mm1, %%mm0\n"
+ " packssdw %%mm0, %%mm0\n"
+ " pxor %%mm4, %%mm4\n"
+ " punpcklwd %%mm4, %%mm3\n"
+ " pcmpgtw %%mm2, %%mm4\n"
+ " pand %%mm3, %%mm4\n"
+ " movq %%mm3, %%mm5\n"
+ " pmulhw %%mm2, %%mm3\n"
+ " paddw %%mm4, %%mm3\n"
+ " psrld $16, %%mm2\n"
+ " pmaddwd %%mm5, %%mm2\n"
+ " paddd %%mm3, %%mm2\n"
+ " packssdw %%mm2, %%mm2\n"
+ " movd %%mm0, (%0)\n"
+ " movd %%mm2, 4(%0)\n"
+ " add $8, %0\n"
+ " add $4, %3\n"
+ " mov %3, %4\n"
+ " sub %5, %4\n"
+ " cmovae %4, %3\n"
+ " dec %2\n"
+ " jne 5b\n"
+ "6:\n"
+ " emms\n":"+r"
+ (samples), "+r"(volumes), "+r"(length),
+ "=D"((long) channel),
+ "=&r"(temp):"X"((long) channels):"cc");
+}