]> git.wh0rd.org - ICEs.git/blobdiff - 287391/svolume_mmx.i.4
more ices
[ICEs.git] / 287391 / svolume_mmx.i.4
diff --git a/287391/svolume_mmx.i.4 b/287391/svolume_mmx.i.4
new file mode 100644 (file)
index 0000000..08c8465
--- /dev/null
@@ -0,0 +1,75 @@
+f(short *samples, int *volumes, unsigned channels,
+                        unsigned length)
+{
+       long channel, temp;
+       asm volatile(" xor %3, %3\n"
+                            "2:\n"
+                            " sar $1, %2\n"
+                            " test $1, %2\n"
+                            " je 4f\n"
+                            "3:\n"
+                            " movq (%1, %3, 4), %%mm0\n"
+                            " movd (%0), %%mm1\n"
+                            " pxor  %%mm4, %%mm4\n"
+                            " punpcklwd %%mm4,  %%mm1\n"
+                            " pcmpgtw  %%mm0, %%mm4\n"
+                            " pand  %%mm1, %%mm4\n"
+                            " movq  %%mm1, %%mm5\n"
+                            " pmulhw  %%mm0,  %%mm1\n"
+                            " paddw %%mm4,  %%mm1\n"
+                            " psrld $16,  %%mm0\n"
+                            " pmaddwd %%mm5,  %%mm0\n"
+                            " paddd  %%mm1,  %%mm0\n"
+                            " packssdw  %%mm0,  %%mm0\n"
+                            " movd %%mm0, (%0)\n"
+                            " add $4, %0\n"
+                            " add  $2, %3\n"
+                            " mov %3, %4\n"
+                            " sub  %5, %4\n"
+                            " cmovae %4, %3\n"
+                            "4:\n"
+                            " sar $1, %2\n"
+                            " cmp $0, %2\n"
+                            " je 6f\n"
+                            "5:\n"
+                            " movq (%1, %3, 4), %%mm0\n"
+                            " movq 8(%1, %3, 4), %%mm2\n"
+                            " movd (%0), %%mm1\n"
+                            " movd 4(%0), %%mm3\n"
+                            " pxor  %%mm4, %%mm4\n"
+                            " punpcklwd %%mm4,  %%mm1\n"
+                            " pcmpgtw  %%mm0, %%mm4\n"
+                            " pand  %%mm1, %%mm4\n"
+                            " movq  %%mm1, %%mm5\n"
+                            " pmulhw  %%mm0,  %%mm1\n"
+                            " paddw %%mm4,  %%mm1\n"
+                            " psrld $16,  %%mm0\n"
+                            " pmaddwd %%mm5,  %%mm0\n"
+                            " paddd  %%mm1,  %%mm0\n"
+                            " packssdw  %%mm0,  %%mm0\n"
+                            " pxor  %%mm4, %%mm4\n"
+                            " punpcklwd %%mm4,  %%mm3\n"
+                            " pcmpgtw  %%mm2, %%mm4\n"
+                            " pand  %%mm3, %%mm4\n"
+                            " movq  %%mm3, %%mm5\n"
+                            " pmulhw  %%mm2,  %%mm3\n"
+                            " paddw %%mm4,  %%mm3\n"
+                            " psrld $16,  %%mm2\n"
+                            " pmaddwd %%mm5,  %%mm2\n"
+                            " paddd  %%mm3,  %%mm2\n"
+                            " packssdw  %%mm2,  %%mm2\n"
+                            " movd %%mm0, (%0)\n"
+                            " movd %%mm2, 4(%0)\n"
+                            " add $8, %0\n"
+                            " add  $4, %3\n"
+                            " mov %3, %4\n"
+                            " sub  %5, %4\n"
+                            " cmovae %4, %3\n"
+                            " dec %2\n"
+                            " jne 5b\n"
+                            "6:\n"
+                            " emms\n":"+r"
+                            (samples), "+r"(volumes), "+r"(length),
+                            "=D"((long) channel),
+                            "=&r"(temp):"X"((long) channels):"cc");
+}