scummvm random work
[patches.git] / libdv-cvs-pic-fix.patch
1 --- libdv/dct_block_mmx.S 31 Jan 2005 13:27:54 -0000 1.4
2 +++ libdv/dct_block_mmx.S 15 Dec 2005 06:45:38 -0000
3 @@ -55,17 +55,22 @@ scratch4: .quad 0
4
5 .section .note.GNU-stack, "", @progbits
6
7 +#include "asm_common.S"
8 +
9 .text
10
11 .align 8
12 .global _dv_dct_88_block_mmx
13 +.hidden _dv_dct_88_block_mmx
14 +.type _dv_dct_88_block_mmx,@function
15 _dv_dct_88_block_mmx:
16
17 pushl %ebp
18 - movl %esp, %ebp
19 pushl %esi
20
21 - movl 8(%ebp), %esi # source
22 + LOAD_PIC_REG_BP()
23 +
24 + movl 12(%ebp), %esi # source
25
26 # column 0
27 movq 16*0(%esi), %mm0 # v0
28 @@ -88,22 +93,22 @@ _dv_dct_88_block_mmx:
29
30 movq 16*3(%esi), %mm5 # v3
31 movq 16*4(%esi), %mm7 # v4
32 - movq %mm7, scratch1 # scratch1: v4 ;
33 + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
34 movq %mm5, %mm7 # duplicate v3
35 - paddw scratch1, %mm5 # v03: v3+v4
36 - psubw scratch1, %mm7 # v04: v3-v4
37 - movq %mm5, scratch2 # scratch2: v03
38 + paddw MUNG(scratch1), %mm5 # v03: v3+v4
39 + psubw MUNG(scratch1), %mm7 # v04: v3-v4
40 + movq %mm5, MUNG(scratch2) # scratch2: v03
41 movq %mm0, %mm5 # mm5: v00
42
43 - paddw scratch2, %mm0 # v10: v00+v03
44 - psubw scratch2, %mm5 # v13: v00-v03
45 - movq %mm3, scratch3 # scratch3: v02
46 + paddw MUNG(scratch2), %mm0 # v10: v00+v03
47 + psubw MUNG(scratch2), %mm5 # v13: v00-v03
48 + movq %mm3, MUNG(scratch3) # scratch3: v02
49 movq %mm1, %mm3 # duplicate v01
50
51 - paddw scratch3, %mm1 # v11: v01+v02
52 - psubw scratch3, %mm3 # v12: v01-v02
53 + paddw MUNG(scratch3), %mm1 # v11: v01+v02
54 + psubw MUNG(scratch3), %mm3 # v12: v01-v02
55
56 - movq %mm6, scratch4 # scratch4: v05
57 + movq %mm6, MUNG(scratch4) # scratch4: v05
58 movq %mm0, %mm6 # duplicate v10
59
60 paddw %mm1, %mm0 # v10+v11
61 @@ -113,10 +118,10 @@ _dv_dct_88_block_mmx:
62 movq %mm6, 16*4(%esi) # out4: v10-v11
63
64 movq %mm4, %mm0 # mm0: v06
65 - paddw scratch4, %mm4 # v15: v05+v06
66 + paddw MUNG(scratch4), %mm4 # v15: v05+v06
67 paddw %mm2, %mm0 # v16: v07+v06
68
69 - pmulhw WA3, %mm4 # v35~: WA3*v15
70 + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
71 psllw $1, %mm4 # v35: compensate the coeefient scale
72
73 movq %mm4, %mm6 # duplicate v35
74 @@ -125,7 +130,7 @@ _dv_dct_88_block_mmx:
75
76 paddw %mm5, %mm3 # v22: v12+v13
77
78 - pmulhw WA1, %mm3 # v32~: WA1*v22
79 + pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22
80 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
81 movq %mm5, %mm6 # duplicate v13
82
83 @@ -136,13 +141,13 @@ _dv_dct_88_block_mmx:
84 movq %mm6, 16*6(%esi) # out6: v13-v32
85
86
87 - paddw scratch4, %mm7 # v14n: v04+v05
88 + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
89 movq %mm0, %mm5 # duplicate v16
90
91 psubw %mm7, %mm0 # va1: v16-v14n
92 - pmulhw WA5, %mm0 # va0~: va1*WA5
93 - pmulhw WA4, %mm5 # v36~~: v16*WA4
94 - pmulhw WA2, %mm7 # v34~~: v14n*WA2
95 + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
96 + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
97 + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
98 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale
99 psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale
100
101 @@ -190,22 +195,22 @@ _dv_dct_88_block_mmx:
102
103 movq 16*3(%esi), %mm5 # v3
104 movq 16*4(%esi), %mm7 # v4
105 - movq %mm7, scratch1 # scratch1: v4 ;
106 + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
107 movq %mm5, %mm7 # duplicate v3
108 - paddw scratch1, %mm5 # v03: v3+v4
109 - psubw scratch1, %mm7 # v04: v3-v4
110 - movq %mm5, scratch2 # scratch2: v03
111 + paddw MUNG(scratch1), %mm5 # v03: v3+v4
112 + psubw MUNG(scratch1), %mm7 # v04: v3-v4
113 + movq %mm5, MUNG(scratch2) # scratch2: v03
114 movq %mm0, %mm5 # mm5: v00
115
116 - paddw scratch2, %mm0 # v10: v00+v03
117 - psubw scratch2, %mm5 # v13: v00-v03
118 - movq %mm3, scratch3 # scratc3: v02
119 + paddw MUNG(scratch2), %mm0 # v10: v00+v03
120 + psubw MUNG(scratch2), %mm5 # v13: v00-v03
121 + movq %mm3, MUNG(scratch3) # scratc3: v02
122 movq %mm1, %mm3 # duplicate v01
123
124 - paddw scratch3, %mm1 # v11: v01+v02
125 - psubw scratch3, %mm3 # v12: v01-v02
126 + paddw MUNG(scratch3), %mm1 # v11: v01+v02
127 + psubw MUNG(scratch3), %mm3 # v12: v01-v02
128
129 - movq %mm6, scratch4 # scratc4: v05
130 + movq %mm6, MUNG(scratch4) # scratc4: v05
131 movq %mm0, %mm6 # duplicate v10
132
133 paddw %mm1, %mm0 # v10+v11
134 @@ -215,10 +220,10 @@ _dv_dct_88_block_mmx:
135 movq %mm6, 16*4(%esi) # out4: v10-v11
136
137 movq %mm4, %mm0 # mm0: v06
138 - paddw scratch4, %mm4 # v15: v05+v06
139 + paddw MUNG(scratch4), %mm4 # v15: v05+v06
140 paddw %mm2, %mm0 # v16: v07+v06
141
142 - pmulhw WA3, %mm4 # v35~: WA3*v15
143 + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
144 psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale
145
146 movq %mm4, %mm6 # duplicate v35
147 @@ -227,7 +232,7 @@ _dv_dct_88_block_mmx:
148
149 paddw %mm5, %mm3 # v22: v12+v13
150
151 - pmulhw WA1, %mm3 # v32~: WA3*v15
152 + pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15
153 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
154 movq %mm5, %mm6 # duplicate v13
155
156 @@ -237,13 +242,13 @@ _dv_dct_88_block_mmx:
157 movq %mm5, 16*2(%esi) # out2: v13+v32
158 movq %mm6, 16*6(%esi) # out6: v13-v32
159
160 - paddw scratch4, %mm7 # v14n: v04+v05
161 + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
162 movq %mm0, %mm5 # duplicate v16
163
164 psubw %mm7, %mm0 # va1: v16-v14n
165 - pmulhw WA2, %mm7 # v34~~: v14n*WA2
166 - pmulhw WA5, %mm0 # va0~: va1*WA5
167 - pmulhw WA4, %mm5 # v36~~: v16*WA4
168 + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
169 + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
170 + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
171 psllw $16-NSHIFT, %mm7
172 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient
173 # scale note that WA4 is shifted 1 bit less than the others
174 @@ -274,6 +279,8 @@ _dv_dct_88_block_mmx:
175
176 .align 8
177 .global _dv_dct_block_mmx_postscale_88
178 +.hidden _dv_dct_block_mmx_postscale_88
179 +.type _dv_dct_block_mmx_postscale_88,@function
180 _dv_dct_block_mmx_postscale_88:
181
182 pushl %ebp
183 @@ -750,14 +757,17 @@ _dv_dct_block_mmx_postscale_88:
184
185 .align 8
186 .global _dv_dct_248_block_mmx
187 +.hidden _dv_dct_248_block_mmx
188 +.type _dv_dct_248_block_mmx,@function
189 _dv_dct_248_block_mmx:
190
191 pushl %ebp
192 - movl %esp, %ebp
193 pushl %esi
194 pushl %edi
195
196 - movl 8(%ebp), %esi # source
197 + LOAD_PIC_REG_BP()
198 +
199 + movl 16(%ebp), %esi # source
200
201 # column 0
202
203 @@ -781,7 +791,7 @@ _dv_dct_248_block_mmx:
204 paddw %mm1, %mm0 # v20: v10+v11
205 psubw %mm1, %mm3 # v21: v10-v11
206
207 - pmulhw WA1, %mm5 # v32~: WA1*v22
208 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
209 movq %mm4, %mm2
210 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
211
212 @@ -820,7 +830,7 @@ _dv_dct_248_block_mmx:
213 paddw %mm1, %mm0 # v20: v10+v11
214 psubw %mm1, %mm3 # v21: v10-v11
215
216 - pmulhw WA1, %mm5 # v32~: WA1*v22
217 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
218 movq %mm4, %mm2
219 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
220
221 @@ -857,7 +867,7 @@ _dv_dct_248_block_mmx:
222 paddw %mm1, %mm0 # v20: v10+v11
223 psubw %mm1, %mm3 # v21: v10-v11
224
225 - pmulhw WA1, %mm5 # v32~: WA1*v22
226 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
227 movq %mm4, %mm2
228 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
229
230 @@ -894,7 +904,7 @@ _dv_dct_248_block_mmx:
231 paddw %mm1, %mm0 # v20: v10+v11
232 psubw %mm1, %mm3 # v21: v10-v11
233
234 - pmulhw WA1, %mm5 # v32~: WA1*v22
235 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
236 movq %mm4, %mm2
237 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
238
239 @@ -914,6 +924,8 @@ _dv_dct_248_block_mmx:
240
241 .align 8
242 .global _dv_dct_248_block_mmx_post_sum
243 +.hidden _dv_dct_248_block_mmx_post_sum
244 +.type _dv_dct_248_block_mmx_post_sum,@function
245 _dv_dct_248_block_mmx_post_sum:
246
247 pushl %ebp
248 @@ -994,6 +1006,8 @@ _dv_dct_248_block_mmx_post_sum:
249
250 .align 8
251 .global _dv_dct_block_mmx_postscale_248
252 +.hidden _dv_dct_block_mmx_postscale_248
253 +.type _dv_dct_block_mmx_postscale_248,@function
254 _dv_dct_block_mmx_postscale_248:
255
256 pushl %ebp
257 --- libdv/dct_block_mmx_x86_64.S 31 Jan 2005 13:27:54 -0000 1.2
258 +++ libdv/dct_block_mmx_x86_64.S 15 Dec 2005 06:45:38 -0000
259 @@ -59,6 +59,8 @@ scratch4: .quad 0
260
261 .align 8
262 .global _dv_dct_88_block_mmx_x86_64
263 +.hidden _dv_dct_88_block_mmx_x86_64
264 +.type _dv_dct_88_block_mmx_x86_64,@function
265 _dv_dct_88_block_mmx_x86_64:
266
267 /* void _dv_dct_88_block_mmx_x86_64(int16_t* block); */
268 @@ -271,6 +273,8 @@ _dv_dct_88_block_mmx_x86_64:
269
270 .align 8
271 .global _dv_dct_block_mmx_x86_64_postscale_88
272 +.hidden _dv_dct_block_mmx_x86_64_postscale_88
273 +.type _dv_dct_block_mmx_x86_64_postscale_88,@function
274 _dv_dct_block_mmx_x86_64_postscale_88:
275
276 /* void _dv_dct_block_mmx_x86_64_postscale_88(int16_t* block, int16_t* postscale_matrix); */
277 --- libdv/dv.c 20 Oct 2004 03:49:24 -0000 1.31
278 +++ libdv/dv.c 15 Dec 2005 06:45:38 -0000
279 @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
280 } /* dv_reconfigure */
281
282
283 +extern uint8_t dv_quant_offset[4];
284 +extern uint8_t dv_quant_shifts[22][4];
285 +
286 static inline void
287 dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
288 int i;
289 @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
290 dv_idct_248 (co248, mb->b[i].coeffs);
291 } else {
292 #if ARCH_X86
293 - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
294 + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
295 _dv_idct_88(mb->b[i].coeffs);
296 #elif ARCH_X86_64
297 _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
298 @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
299 dv_idct_248 (co248, mb->b[b].coeffs);
300 } else {
301 #if ARCH_X86
302 - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
303 + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
304 _dv_weight_88_inverse(bl->coeffs);
305 _dv_idct_88(bl->coeffs);
306 #elif ARCH_X86_64
307 --- libdv/encode.c 17 Nov 2004 03:36:30 -0000 1.26
308 +++ libdv/encode.c 15 Dec 2005 06:45:38 -0000
309 @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
310 }
311
312 extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
313 - dv_vlc_entry_t ** out);
314 + dv_vlc_entry_t ** out,
315 + dv_vlc_entry_t * lookup);
316
317 extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
318 dv_vlc_entry_t ** out);
319 @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
320 #elif ARCH_X86
321 int num_bits;
322
323 - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
324 + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
325 emms();
326 #else
327 int num_bits;
328 @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
329 return num_bits;
330 }
331
332 -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
333 +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
334 extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
335
336 extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
337 @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
338 #elif ARCH_X86_64
339 return _dv_vlc_num_bits_block_x86_64(coeffs);
340 #else
341 - return _dv_vlc_num_bits_block_x86(coeffs);
342 + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
343 #endif
344 }
345
346 --- libdv/encode_x86.S 31 Jan 2005 13:27:54 -0000 1.4
347 +++ libdv/encode_x86.S 15 Dec 2005 06:45:38 -0000
348 @@ -23,16 +23,13 @@
349 * The libdv homepage is http://libdv.sourceforge.net/.
350 */
351
352 -.data
353 -ALLONE: .word 1,1,1,1
354 -VLCADDMASK: .byte 255,0,0,0,255,0,0,0
355 -
356 -
357 .section .note.GNU-stack, "", @progbits
358
359 .text
360
361 .global _dv_vlc_encode_block_mmx
362 +.hidden _dv_vlc_encode_block_mmx
363 +.type _dv_vlc_encode_block_mmx,@function
364 _dv_vlc_encode_block_mmx:
365 pushl %ebx
366 pushl %esi
367 @@ -48,11 +45,14 @@ _dv_vlc_encode_block_mmx:
368
369 movl $63, %ecx
370
371 - movl vlc_encode_lookup, %esi
372 + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup
373
374 pxor %mm0, %mm0
375 pxor %mm2, %mm2
376 - movq VLCADDMASK, %mm1
377 + pushl $0x000000FF # these four lines
378 + pushl $0x000000FF # load VLCADDMASK
379 + movq (%esp), %mm1 # into %mm1 off the stack
380 + addl $8, %esp # --> no TEXTRELs
381 xorl %ebp, %ebp
382 subl $8, %edx
383 vlc_encode_block_mmx_loop:
384 @@ -109,6 +109,8 @@ vlc_encode_block_out:
385 ret
386
387 .global _dv_vlc_num_bits_block_x86
388 +.hidden _dv_vlc_num_bits_block_x86
389 +.type _dv_vlc_num_bits_block_x86,@function
390 _dv_vlc_num_bits_block_x86:
391 pushl %ebx
392 pushl %esi
393 @@ -124,7 +126,7 @@ _dv_vlc_num_bits_block_x86:
394 addl $2, %edi
395
396 movl $63, %ecx
397 - movl vlc_num_bits_lookup, %esi
398 + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup
399
400 vlc_num_bits_block_x86_loop:
401 movw (%edi), %ax
402 @@ -167,6 +169,8 @@ vlc_num_bits_block_out:
403 ret
404
405 .global _dv_vlc_encode_block_pass_1_x86
406 +.hidden _dv_vlc_encode_block_pass_1_x86
407 +.type _dv_vlc_encode_block_pass_1_x86,@function
408 _dv_vlc_encode_block_pass_1_x86:
409 pushl %ebx
410 pushl %esi
411 @@ -243,6 +247,8 @@ vlc_encode_block_pass1_x86_out:
412 ret
413
414 .global _dv_classify_mmx
415 +.hidden _dv_classify_mmx
416 +.type _dv_classify_mmx,@function
417 _dv_classify_mmx:
418
419 pushl %ebp
420 @@ -348,6 +354,8 @@ _dv_classify_mmx:
421 don't know why... */
422
423 .global _dv_reorder_block_mmx
424 +.hidden _dv_reorder_block_mmx
425 +.type _dv_reorder_block_mmx,@function
426 _dv_reorder_block_mmx:
427
428 pushl %ebp
429 @@ -463,6 +471,8 @@ reorder_loop:
430 ret
431
432 .global _dv_need_dct_248_mmx_rows
433 +.hidden _dv_need_dct_248_mmx_rows
434 +.type _dv_need_dct_248_mmx_rows,@function
435 _dv_need_dct_248_mmx_rows:
436
437 pushl %ebp
438 @@ -582,8 +592,11 @@ _dv_need_dct_248_mmx_rows:
439 paddw %mm5, %mm1
440
441 paddw %mm1, %mm0
442 -
443 - pmaddwd ALLONE, %mm0
444 +
445 + pushl $0x00010001 # these four lines
446 + pushl $0x00010001 # load ALLONE
447 + pmaddwd (%esp), %mm0 # into %mm0 off the stack
448 + addl $8, %esp # --> no TEXTRELs
449 movq %mm0, %mm1
450 psrlq $32, %mm1
451 paddd %mm1, %mm0
452 --- libdv/encode_x86_64.S 31 Jan 2005 13:27:54 -0000 1.4
453 +++ libdv/encode_x86_64.S 15 Dec 2005 06:45:39 -0000
454 @@ -32,6 +32,8 @@ VLCADDMASK: .byte 255,0,0,0,255,0,0,0
455 .text
456
457 .global _dv_vlc_encode_block_mmx_x86_64
458 +.hidden _dv_vlc_encode_block_mmx_x86_64
459 +.type _dv_vlc_encode_block_mmx_x86_64,@function
460 _dv_vlc_encode_block_mmx_x86_64:
461
462 /* extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
463 @@ -115,6 +117,8 @@ vlc_encode_block_out:
464 ret
465
466 .global _dv_vlc_num_bits_block_x86_64
467 +.hidden _dv_vlc_num_bits_block_x86_64
468 +.type _dv_vlc_num_bits_block_x86_64,@function
469 _dv_vlc_num_bits_block_x86_64:
470
471 /* extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); */
472 @@ -175,6 +179,8 @@ vlc_num_bits_block_out:
473 ret
474
475 .global _dv_vlc_encode_block_pass_1_x86_64
476 +.hidden _dv_vlc_encode_block_pass_1_x86_64
477 +.type _dv_vlc_encode_block_pass_1_x86_64,@function
478 _dv_vlc_encode_block_pass_1_x86_64:
479
480 /*
481 @@ -253,6 +259,8 @@ vlc_encode_block_pass1_x86_out:
482 ret
483
484 .global _dv_classify_mmx_x86_64
485 +.hidden _dv_classify_mmx_x86_64
486 +.type _dv_classify_mmx_x86_64,@function
487 _dv_classify_mmx_x86_64:
488
489 /* extern int _dv_classify_mmx_x86_64(dv_coeff_t * a, rdi
490 @@ -357,6 +365,8 @@ _dv_classify_mmx_x86_64:
491 don't know why... */
492
493 .global _dv_reorder_block_mmx_x86_64
494 +.hidden _dv_reorder_block_mmx_x86_64
495 +.type _dv_reorder_block_mmx_x86_64,@function
496 _dv_reorder_block_mmx_x86_64:
497
498 /*extern int _dv_reorder_block_mmx_x86_64(dv_coeff_t * a, rdi
499 @@ -471,6 +481,8 @@ reorder_loop:
500 ret
501
502 .global _dv_need_dct_248_mmx_x86_64_rows
503 +.hidden _dv_need_dct_248_mmx_x86_64_rows
504 +.type _dv_need_dct_248_mmx_x86_64_rows,@function
505 _dv_need_dct_248_mmx_x86_64_rows:
506
507 /* extern int _dv_need_dct_248_mmx_x86_64_rows(dv_coeff_t * bl); rdi */
508 --- libdv/idct_block_mmx.S 31 Jan 2005 13:27:54 -0000 1.3
509 +++ libdv/idct_block_mmx.S 15 Dec 2005 06:45:39 -0000
510 @@ -8,16 +8,22 @@
511
512 .section .note.GNU-stack, "", @progbits
513
514 +#include "asm_common.S"
515 +
516 .text
517 +
518 .align 4
519 .globl _dv_idct_block_mmx
520 +.hidden _dv_idct_block_mmx
521 .type _dv_idct_block_mmx,@function
522 _dv_idct_block_mmx:
523 pushl %ebp
524 - movl %esp,%ebp
525 pushl %esi
526 - leal preSC, %ecx
527 - movl 8(%ebp),%esi /* source matrix */
528 +
529 + LOAD_PIC_REG_BP()
530 +
531 + leal MUNG(preSC), %ecx
532 + movl 12(%esp),%esi /* source matrix */
533
534 /*
535 * column 0: even part
536 @@ -35,7 +41,7 @@ _dv_idct_block_mmx:
537 movq %mm1, %mm2 /* added 11/1/96 */
538 pmulhw 8*8(%esi),%mm5 /* V8 */
539 psubsw %mm0, %mm1 /* V16 */
540 - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
541 + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */
542 paddsw %mm0, %mm2 /* V17 */
543 movq %mm2, %mm0 /* duplicate V17 */
544 psraw $1, %mm2 /* t75=t82 */
545 @@ -76,7 +82,7 @@ _dv_idct_block_mmx:
546 paddsw %mm0, %mm3 /* V29 ; free mm0 */
547 movq %mm7, %mm1 /* duplicate V26 */
548 psraw $1, %mm3 /* t91=t94 */
549 - pmulhw x539f539f539f539f,%mm7 /* V33 */
550 + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */
551 psraw $1, %mm1 /* t96 */
552 movq %mm5, %mm0 /* duplicate V2 */
553 psraw $2, %mm4 /* t85=t87 */
554 @@ -84,15 +90,15 @@ _dv_idct_block_mmx:
555 psubsw %mm4, %mm0 /* V28 ; free mm4 */
556 movq %mm0, %mm2 /* duplicate V28 */
557 psraw $1, %mm5 /* t90=t93 */
558 - pmulhw x4546454645464546,%mm0 /* V35 */
559 + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */
560 psraw $1, %mm2 /* t97 */
561 movq %mm5, %mm4 /* duplicate t90=t93 */
562 psubsw %mm2, %mm1 /* V32 ; free mm2 */
563 - pmulhw x61f861f861f861f8,%mm1 /* V36 */
564 + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */
565 psllw $1, %mm7 /* t107 */
566 paddsw %mm3, %mm5 /* V31 */
567 psubsw %mm3, %mm4 /* V30 ; free mm3 */
568 - pmulhw x5a825a825a825a82,%mm4 /* V34 */
569 + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */
570 nop
571 psubsw %mm1, %mm0 /* V38 */
572 psubsw %mm7, %mm1 /* V37 ; free mm7 */
573 @@ -159,7 +165,7 @@ _dv_idct_block_mmx:
574 psubsw %mm7, %mm1 /* V50 */
575 pmulhw 8*9(%esi), %mm5 /* V9 */
576 paddsw %mm7, %mm2 /* V51 */
577 - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
578 + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */
579 movq %mm2, %mm6 /* duplicate V51 */
580 psraw $1, %mm2 /* t138=t144 */
581 movq %mm3, %mm4 /* duplicate V1 */
582 @@ -200,11 +206,11 @@ _dv_idct_block_mmx:
583 * even more by doing the correction step in a later stage when the number
584 * is actually multiplied by 16
585 */
586 - paddw x0005000200010001, %mm4
587 + paddw MUNG(x0005000200010001), %mm4
588 psubsw %mm6, %mm3 /* V60 ; free mm6 */
589 psraw $1, %mm0 /* t154=t156 */
590 movq %mm3, %mm1 /* duplicate V60 */
591 - pmulhw x539f539f539f539f, %mm1 /* V67 */
592 + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */
593 movq %mm5, %mm6 /* duplicate V3 */
594 psraw $2, %mm4 /* t148=t150 */
595 paddsw %mm4, %mm5 /* V61 */
596 @@ -213,13 +219,13 @@ _dv_idct_block_mmx:
597 psllw $1, %mm1 /* t169 */
598 paddsw %mm0, %mm5 /* V65 -> result */
599 psubsw %mm0, %mm4 /* V64 ; free mm0 */
600 - pmulhw x5a825a825a825a82, %mm4 /* V68 */
601 + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */
602 psraw $1, %mm3 /* t158 */
603 psubsw %mm6, %mm3 /* V66 */
604 movq %mm5, %mm2 /* duplicate V65 */
605 - pmulhw x61f861f861f861f8, %mm3 /* V70 */
606 + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */
607 psllw $1, %mm6 /* t165 */
608 - pmulhw x4546454645464546, %mm6 /* V69 */
609 + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */
610 psraw $1, %mm2 /* t172 */
611 /* moved from next block */
612 movq 8*5(%esi), %mm0 /* V56 */
613 @@ -344,7 +350,7 @@ _dv_idct_block_mmx:
614 * movq 8*13(%esi), %mm4 tmt13
615 */
616 psubsw %mm4, %mm3 /* V134 */
617 - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
618 + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */
619 movq 8*9(%esi), %mm6 /* tmt9 */
620 paddsw %mm4, %mm5 /* V135 ; mm4 free */
621 movq %mm0, %mm4 /* duplicate tmt1 */
622 @@ -373,17 +379,17 @@ _dv_idct_block_mmx:
623 psubsw %mm7, %mm0 /* V144 */
624 movq %mm0, %mm3 /* duplicate V144 */
625 paddsw %mm7, %mm2 /* V147 ; free mm7 */
626 - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
627 + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */
628 movq %mm1, %mm7 /* duplicate tmt3 */
629 paddsw %mm5, %mm7 /* V145 */
630 psubsw %mm5, %mm1 /* V146 ; free mm5 */
631 psubsw %mm1, %mm3 /* V150 */
632 movq %mm7, %mm5 /* duplicate V145 */
633 - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
634 + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */
635 psubsw %mm2, %mm5 /* V148 */
636 - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
637 + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */
638 psllw $2, %mm0 /* t311 */
639 - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
640 + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */
641 paddsw %mm2, %mm7 /* V149 ; free mm2 */
642 psllw $1, %mm1 /* t313 */
643 nop /* without the nop - freeze here for one clock */
644 @@ -409,7 +415,7 @@ _dv_idct_block_mmx:
645 paddsw %mm3, %mm6 /* V164 ; free mm3 */
646 movq %mm4, %mm3 /* duplicate V142 */
647 psubsw %mm5, %mm4 /* V165 ; free mm5 */
648 - movq %mm2, scratch7 /* out7 */
649 + movq %mm2, MUNG(scratch7) /* out7 */
650 psraw $4, %mm6
651 psraw $4, %mm4
652 paddsw %mm5, %mm3 /* V162 */
653 @@ -420,11 +426,11 @@ _dv_idct_block_mmx:
654 */
655 movq %mm6, 8*9(%esi) /* out9 */
656 paddsw %mm1, %mm0 /* V161 */
657 - movq %mm3, scratch5 /* out5 */
658 + movq %mm3, MUNG(scratch5) /* out5 */
659 psubsw %mm1, %mm5 /* V166 ; free mm1 */
660 movq %mm4, 8*11(%esi) /* out11 */
661 psraw $4, %mm5
662 - movq %mm0, scratch3 /* out3 */
663 + movq %mm0, MUNG(scratch3) /* out3 */
664 movq %mm2, %mm4 /* duplicate V140 */
665 movq %mm5, 8*13(%esi) /* out13 */
666 paddsw %mm7, %mm2 /* V160 */
667 @@ -434,7 +440,7 @@ _dv_idct_block_mmx:
668 /* moved from the next block */
669 movq 8*3(%esi), %mm7
670 psraw $4, %mm4
671 - movq %mm2, scratch1 /* out1 */
672 + movq %mm2, MUNG(scratch1) /* out1 */
673 /* moved from the next block */
674 movq %mm0, %mm1
675 movq %mm4, 8*15(%esi) /* out15 */
676 @@ -491,15 +497,15 @@ _dv_idct_block_mmx:
677 paddsw %mm4, %mm3 /* V113 ; free mm4 */
678 movq %mm0, %mm4 /* duplicate V110 */
679 paddsw %mm1, %mm2 /* V111 */
680 - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
681 + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */
682 psubsw %mm1, %mm5 /* V112 ; free mm1 */
683 psubsw %mm5, %mm4 /* V116 */
684 movq %mm2, %mm1 /* duplicate V111 */
685 - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
686 + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */
687 psubsw %mm3, %mm2 /* V114 */
688 - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
689 + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */
690 paddsw %mm3, %mm1 /* V115 ; free mm3 */
691 - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
692 + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */
693 psllw $2, %mm0 /* t266 */
694 movq %mm1, (%esi) /* save V115 */
695 psllw $1, %mm5 /* t268 */
696 @@ -517,7 +523,7 @@ _dv_idct_block_mmx:
697 movq %mm6, %mm3 /* duplicate tmt4 */
698 psubsw %mm0, %mm6 /* V100 */
699 paddsw %mm0, %mm3 /* V101 ; free mm0 */
700 - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
701 + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */
702 movq %mm7, %mm5 /* duplicate tmt0 */
703 movq 8*8(%esi), %mm1 /* tmt8 */
704 paddsw %mm1, %mm7 /* V103 */
705 @@ -551,10 +557,10 @@ _dv_idct_block_mmx:
706 movq 8*2(%esi), %mm3 /* V123 */
707 paddsw %mm4, %mm7 /* out0 */
708 /* moved up from next block */
709 - movq scratch3, %mm0
710 + movq MUNG(scratch3), %mm0
711 psraw $4, %mm7
712 /* moved up from next block */
713 - movq scratch5, %mm6
714 + movq MUNG(scratch5), %mm6
715 psubsw %mm4, %mm1 /* out14 ; free mm4 */
716 paddsw %mm3, %mm5 /* out2 */
717 psraw $4, %mm1
718 @@ -565,7 +571,7 @@ _dv_idct_block_mmx:
719 movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
720 psraw $4, %mm2
721 /* moved up to the prev block */
722 - movq scratch7, %mm4
723 + movq MUNG(scratch7), %mm4
724 /* moved up to the prev block */
725 psraw $4, %mm0
726 movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
727 @@ -579,7 +585,7 @@ _dv_idct_block_mmx:
728 * psraw $4, %mm0
729 * psraw $4, %mm6
730 */
731 - movq scratch1, %mm1
732 + movq MUNG(scratch1), %mm1
733 psraw $4, %mm4
734 movq %mm0, 8*3(%esi) /* out3 */
735 psraw $4, %mm1
736 --- libdv/idct_block_mmx_x86_64.S 31 Jan 2005 13:27:54 -0000 1.3
737 +++ libdv/idct_block_mmx_x86_64.S 15 Dec 2005 06:45:39 -0000
738 @@ -18,6 +18,7 @@
739 .text
740 .align 4
741 .globl _dv_idct_block_mmx_x86_64
742 +.hidden _dv_idct_block_mmx_x86_64
743 .type _dv_idct_block_mmx_x86_64,@function
744 _dv_idct_block_mmx_x86_64:
745 /* void _dv_idct_88(dv_coeff_t *block) */
746 --- libdv/parse.c 20 Oct 2004 03:49:24 -0000 1.13
747 +++ libdv/parse.c 15 Dec 2005 06:45:39 -0000
748 @@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
749 exit(0);
750 #endif
751 } /* dv_parse_ac_coeffs */
752 +#if defined __GNUC__ && __ELF__
753 +# define dv_strong_hidden_alias(name, aliasname) \
754 + extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
755 +dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
756 +#else
757 +int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
758 +#endif
759
760 /* ---------------------------------------------------------------------------
761 */
762 --- libdv/quant.c 20 Oct 2004 03:49:24 -0000 1.9
763 +++ libdv/quant.c 15 Dec 2005 06:45:39 -0000
764 @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1
765 uint32_t dv_quant_248_mul_tab [2] [22] [64];
766 uint32_t dv_quant_88_mul_tab [2] [22] [64];
767
768 -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
769 +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t dv_quant_offset[],uint8_t dv_quant_shifts[][]);
770 extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
771 static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
772 static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
773 @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
774 _dv_quant_x86_64(block, qno, klass);
775 emms();
776 #else
777 - _dv_quant_x86(block, qno, klass);
778 + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
779 emms();
780 #endif
781 }
782 --- libdv/quant.h 20 Oct 2004 03:49:24 -0000 1.4
783 +++ libdv/quant.h 15 Dec 2005 06:45:39 -0000
784 @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
785 extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
786 extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
787 dv_248_coeff_t *co);
788 -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
789 +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t offset[], uint8_t shifts[][]);
790 extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
791 extern void dv_quant_init (void);
792 #ifdef __cplusplus
793 --- libdv/quant_x86.S 31 Jan 2005 13:27:54 -0000 1.4
794 +++ libdv/quant_x86.S 15 Dec 2005 06:45:39 -0000
795 @@ -57,6 +57,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl
796 .text
797 .align 4
798 .globl _dv_quant_88_inverse_x86
799 +.hidden _dv_quant_88_inverse_x86
800 +.type _dv_quant_88_inverse_x86,@function
801 _dv_quant_88_inverse_x86:
802 pushl %ebx
803 pushl %esi
804 @@ -73,10 +75,13 @@ _dv_quant_88_inverse_x86:
805
806 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
807 movl ARGn(1),%eax /* qno */
808 + movl ARGn(3),%ebx /* dv_quant_offset */
809 + addl ARGn(2),%ebx /* class */
810 + movzbl (%ebx),%ecx
811 movl ARGn(2),%ebx /* class */
812 - movzbl dv_quant_offset(%ebx),%ecx
813 addl %ecx,%eax
814 - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
815 + movl ARGn(4),%edx /* dv_quant_shifts */
816 + leal (%edx,%eax,4),%edx /* edx is pq */
817
818 /* extra = (class == 3); */
819 /* 0 1 2 3 */
820 @@ -195,6 +200,8 @@ _dv_quant_88_inverse_x86:
821
822 .align 4
823 .globl _dv_quant_x86
824 +.hidden _dv_quant_x86
825 +.type _dv_quant_x86,@function
826 _dv_quant_x86:
827 pushl %ebx
828 pushl %ecx
829 @@ -214,11 +221,13 @@ _dv_quant_x86:
830
831 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
832 movl ARGn(1),%eax /* qno */
833 + movl ARGn(3),%ebx /* offset */
834 + addl ARGn(2),%ebx /* class */
835 + movzbl (%ebx),%ecx
836 movl ARGn(2),%ebx /* class */
837 -
838 - movzbl dv_quant_offset(%ebx),%ecx
839 + movl ARGn(4),%edx /* shifts */
840 addl %ecx,%eax
841 - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
842 + leal (%edx,%eax,4),%edx /* edx is pq */
843
844 /* extra = (class == 3); */
845 /* 0 1 2 3 */
846 --- libdv/quant_x86_64.S 31 Jan 2005 13:27:54 -0000 1.4
847 +++ libdv/quant_x86_64.S 15 Dec 2005 06:45:39 -0000
848 @@ -57,6 +57,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl
849 .text
850 .align 4
851 .globl _dv_quant_88_inverse_x86_64
852 +.hidden _dv_quant_88_inverse_x86_64
853 +.type _dv_quant_88_inverse_x86_64,@function
854 _dv_quant_88_inverse_x86_64:
855
856 /* Args are at block=rdi, qno=rsi, class=rdx */
857 @@ -197,6 +199,8 @@ _dv_quant_88_inverse_x86_64:
858
859 .align 4
860 .globl _dv_quant_x86_64
861 +.hidden _dv_quant_x86_64
862 +.type _dv_quant_x86_64,@function
863 _dv_quant_x86_64:
864
865 /* Args are at block=rdi, qno=rsi, class=rdx */
866 --- libdv/rgbtoyuv.S 31 Jan 2005 13:27:54 -0000 1.6
867 +++ libdv/rgbtoyuv.S 15 Dec 2005 06:45:39 -0000
868 @@ -41,9 +41,6 @@
869 #define DV_WIDTH_SHORT_HALF 720
870 #define DV_WIDTH_BYTE_HALF 360
871
872 -.global _dv_rgbtoycb_mmx
873 -# .global yuvtoycb_mmx
874 -
875 .data
876
877 .align 8
878 @@ -110,22 +107,26 @@ VR0GR: .long 0,0
879 VBG0B: .long 0,0
880
881 #endif
882 -
883 +
884 .section .note.GNU-stack, "", @progbits
885
886 +#include "asm_common.S"
887 +
888 .text
889
890 -#define _inPtr 8
891 -#define _rows 12
892 -#define _columns 16
893 -#define _outyPtr 20
894 -#define _outuPtr 24
895 -#define _outvPtr 28
896 +#define _inPtr 24+8
897 +#define _rows 24+12
898 +#define _columns 24+16
899 +#define _outyPtr 24+20
900 +#define _outuPtr 24+24
901 +#define _outvPtr 24+28
902
903 +.global _dv_rgbtoycb_mmx
904 +.hidden _dv_rgbtoycb_mmx
905 +.type _dv_rgbtoycb_mmx,@function
906 _dv_rgbtoycb_mmx:
907
908 pushl %ebp
909 - movl %esp, %ebp
910 pushl %eax
911 pushl %ebx
912 pushl %ecx
913 @@ -133,46 +134,47 @@ _dv_rgbtoycb_mmx:
914 pushl %esi
915 pushl %edi
916
917 - leal ZEROSX, %eax #This section gets around a bug
918 + LOAD_PIC_REG_BP()
919 +
920 + leal MUNG(ZEROSX), %eax #This section gets around a bug
921 movq (%eax), %mm0 #unlikely to persist
922 - movq %mm0, ZEROS
923 - leal OFFSETDX, %eax
924 + movq %mm0, MUNG(ZEROS)
925 + leal MUNG(OFFSETDX), %eax
926 movq (%eax), %mm0
927 - movq %mm0, OFFSETD
928 - leal OFFSETWX, %eax
929 + movq %mm0, MUNG(OFFSETD)
930 + leal MUNG(OFFSETWX), %eax
931 movq (%eax), %mm0
932 - movq %mm0, OFFSETW
933 - leal OFFSETBX, %eax
934 + movq %mm0, MUNG(OFFSETW)
935 + leal MUNG(OFFSETBX), %eax
936 movq (%eax), %mm0
937 - movq %mm0, OFFSETB
938 - leal YR0GRX, %eax
939 + movq %mm0, MUNG(OFFSETB)
940 + leal MUNG(YR0GRX), %eax
941 movq (%eax), %mm0
942 - movq %mm0, YR0GR
943 - leal YBG0BX, %eax
944 + movq %mm0, MUNG(YR0GR)
945 + leal MUNG(YBG0BX), %eax
946 movq (%eax), %mm0
947 - movq %mm0, YBG0B
948 - leal UR0GRX, %eax
949 + movq %mm0, MUNG(YBG0B)
950 + leal MUNG(UR0GRX), %eax
951 movq (%eax), %mm0
952 - movq %mm0, UR0GR
953 - leal UBG0BX, %eax
954 + movq %mm0, MUNG(UR0GR)
955 + leal MUNG(UBG0BX), %eax
956 movq (%eax), %mm0
957 - movq %mm0, UBG0B
958 - leal VR0GRX, %eax
959 + movq %mm0, MUNG(UBG0B)
960 + leal MUNG(VR0GRX), %eax
961 movq (%eax), %mm0
962 - movq %mm0, VR0GR
963 - leal VBG0BX, %eax
964 + movq %mm0, MUNG(VR0GR)
965 + leal MUNG(VBG0BX), %eax
966 movq (%eax), %mm0
967 - movq %mm0, VBG0B
968 -
969 - movl _rows(%ebp), %eax
970 - movl _columns(%ebp), %ebx
971 + movq %mm0, MUNG(VBG0B)
972 + movl _rows(%esp), %eax
973 + movl _columns(%esp), %ebx
974 mull %ebx #number pixels
975 shrl $3, %eax #number of loops
976 movl %eax, %edi #loop counter in edi
977 - movl _inPtr(%ebp), %eax
978 - movl _outyPtr(%ebp), %ebx
979 - movl _outuPtr(%ebp), %ecx
980 - movl _outvPtr(%ebp), %edx
981 + movl _inPtr(%esp), %eax
982 + movl _outyPtr(%esp), %ebx
983 + movl _outuPtr(%esp), %ecx
984 + movl _outvPtr(%esp), %edx
985 rgbtoycb_mmx_loop:
986 movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0
987 pxor %mm6, %mm6 #0 -> mm6
988 @@ -186,29 +188,29 @@ rgbtoycb_mmx_loop:
989 punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1
990 movq %mm0, %mm2 #R1B0G0R0 -> mm2
991
992 - pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0
993 + pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0
994 movq %mm1, %mm3 #B1G1R1B0 -> mm3
995
996 - pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1
997 + pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1
998 movq %mm2, %mm4 #R1B0G0R0 -> mm4
999
1000 - pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2
1001 + pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2
1002 movq %mm3, %mm5 #B1G1R1B0 -> mm5
1003
1004 - pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3
1005 + pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3
1006 punpckhbw %mm6, %mm7 # 00G2R2 -> mm7
1007
1008 - pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4
1009 + pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4
1010 paddd %mm1, %mm0 #Y1Y0 -> mm0
1011
1012 - pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5
1013 + pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5
1014
1015 movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1
1016 paddd %mm3, %mm2 #U1U0 -> mm2
1017
1018 movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6
1019
1020 - punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1
1021 + punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1
1022 paddd %mm5, %mm4 #V1V0 -> mm4
1023
1024 movq %mm1, %mm5 #B3G3R3B2 -> mm5
1025 @@ -216,29 +218,29 @@ rgbtoycb_mmx_loop:
1026
1027 paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1
1028
1029 - punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6
1030 + punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6
1031 movq %mm1, %mm3 #R3B2G2R2 -> mm3
1032
1033 - pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1
1034 + pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1
1035 movq %mm5, %mm7 #B3G3R3B2 -> mm7
1036
1037 - pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5
1038 + pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5
1039 psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0
1040
1041 - movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0
1042 + movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0
1043 movq %mm3, %mm6 #R3B2G2R2 -> mm6
1044 - pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6
1045 + pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6
1046 psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2
1047
1048 paddd %mm5, %mm1 #Y3Y2 -> mm1
1049 movq %mm7, %mm5 #B3G3R3B2 -> mm5
1050 - pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2
1051 + pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2
1052 psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
1053
1054 - pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2
1055 + pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2
1056 packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0
1057
1058 - pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5
1059 + pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5
1060 psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4
1061
1062 movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7
1063 @@ -253,58 +255,58 @@ rgbtoycb_mmx_loop:
1064 movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5
1065 psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3
1066
1067 - paddw OFFSETY, %mm0
1068 + paddw MUNG(OFFSETY), %mm0
1069 movq %mm0, (%ebx) #store Y3Y2Y1Y0
1070 packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2
1071
1072 - movq TEMP0, %mm0 #R5B4G4R4 -> mm0
1073 + movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0
1074 addl $8, %ebx
1075 -
1076 - punpcklbw ZEROS, %mm7 #B5G500 -> mm7
1077 +
1078 + punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7
1079 movq %mm0, %mm6 #R5B4G4R4 -> mm6
1080
1081 - movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU
1082 + movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU
1083 psrlq $32, %mm0 #00R5B4 -> mm0
1084
1085 paddw %mm0, %mm7 #B5G5R5B4 -> mm7
1086 movq %mm6, %mm2 #B5B4G4R4 -> mm2
1087
1088 - pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2
1089 + pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2
1090 movq %mm7, %mm0 #B5G5R5B4 -> mm0
1091
1092 - pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7
1093 + pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7
1094 packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4
1095
1096 addl $24, %eax #increment RGB count
1097
1098 - movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4
1099 + movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4
1100 movq %mm6, %mm4 #B5B4G4R4 -> mm4
1101
1102 - pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4
1103 + pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4
1104 movq %mm0, %mm3 #B5G5R5B4 -> mm0
1105
1106 - pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4
1107 + pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4
1108 paddd %mm7, %mm2 #Y5Y4 -> mm2
1109
1110 - pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4
1111 + pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4
1112 pxor %mm7, %mm7 #0 -> mm7
1113
1114 - pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3
1115 + pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3
1116 punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1
1117
1118 paddd %mm6, %mm0 #U5U4 -> mm0
1119 movq %mm1, %mm6 #B7G7R7B6 -> mm6
1120
1121 - pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6
1122 + pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6
1123 punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5
1124
1125 movq %mm5, %mm7 #R7B6G6R6 -> mm7
1126 paddd %mm4, %mm3 #V5V4 -> mm3
1127
1128 - pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5
1129 + pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5
1130 movq %mm1, %mm4 #B7G7R7B6 -> mm4
1131
1132 - pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4
1133 + pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4
1134 psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0
1135
1136 psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2
1137 @@ -312,25 +314,25 @@ rgbtoycb_mmx_loop:
1138 paddd %mm5, %mm6 #Y7Y6 -> mm6
1139 movq %mm7, %mm5 #R7B6G6R6 -> mm5
1140
1141 - pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7
1142 + pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7
1143 psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3
1144
1145 - pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1
1146 + pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1
1147 psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
1148
1149 packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2
1150
1151 - pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5
1152 + pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5
1153 paddd %mm4, %mm7 #U7U6 -> mm7
1154
1155 psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7
1156 - paddw OFFSETY, %mm2
1157 + paddw MUNG(OFFSETY), %mm2
1158 movq %mm2, (%ebx) #store Y7Y6Y5Y4
1159
1160 - movq ALLONE, %mm6
1161 + movq MUNG(ALLONE), %mm6
1162 packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0
1163
1164 - movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4
1165 + movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4
1166 pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
1167
1168 pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
1169 @@ -340,8 +342,8 @@ rgbtoycb_mmx_loop:
1170
1171 psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1
1172 psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4
1173 -
1174 - movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5
1175 +
1176 + movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5
1177
1178 movq %mm4, (%ecx) # store U
1179
1180 @@ -374,6 +376,8 @@ rgbtoycb_mmx_loop:
1181 ret
1182
1183 .global _dv_ppm_copy_y_block_mmx
1184 +.hidden _dv_ppm_copy_y_block_mmx
1185 +.type _dv_ppm_copy_y_block_mmx,@function
1186 _dv_ppm_copy_y_block_mmx:
1187
1188 pushl %ebp
1189 @@ -424,17 +428,20 @@ _dv_ppm_copy_y_block_mmx:
1190 ret
1191
1192 .global _dv_pgm_copy_y_block_mmx
1193 +.hidden _dv_pgm_copy_y_block_mmx
1194 +.type _dv_ppm_copy_y_block_mmx,@function
1195 _dv_pgm_copy_y_block_mmx:
1196
1197 pushl %ebp
1198 - movl %esp, %ebp
1199 pushl %esi
1200 pushl %edi
1201 -
1202 - movl 8(%ebp), %edi # dest
1203 - movl 12(%ebp), %esi # src
1204
1205 - movq OFFSETY, %mm7
1206 + LOAD_PIC_REG_BP()
1207 +
1208 + movl 16(%esp), %edi # dest
1209 + movl 20(%esp), %esi # src
1210 +
1211 + movq MUNG(OFFSETY), %mm7
1212 pxor %mm6, %mm6
1213
1214 movq (%esi), %mm0
1215 @@ -566,17 +573,20 @@ _dv_pgm_copy_y_block_mmx:
1216 ret
1217
1218 .global _dv_video_copy_y_block_mmx
1219 +.hidden _dv_video_copy_y_block_mmx
1220 +.type _dv_video_copy_y_block_mmx,@function
1221 _dv_video_copy_y_block_mmx:
1222
1223 pushl %ebp
1224 - movl %esp, %ebp
1225 pushl %esi
1226 pushl %edi
1227 -
1228 - movl 8(%ebp), %edi # dest
1229 - movl 12(%ebp), %esi # src
1230
1231 - movq OFFSETBX, %mm7
1232 + LOAD_PIC_REG_BP()
1233 +
1234 + movl 16(%esp), %edi # dest
1235 + movl 20(%esp), %esi # src
1236 +
1237 + movq MUNG(OFFSETBX), %mm7
1238 pxor %mm6, %mm6
1239
1240 movq (%esi), %mm0
1241 @@ -711,6 +721,8 @@ _dv_video_copy_y_block_mmx:
1242
1243
1244 .global _dv_ppm_copy_pal_c_block_mmx
1245 +.hidden _dv_ppm_copy_pal_c_block_mmx
1246 +.type _dv_ppm_copy_pal_c_block_mmx,@function
1247 _dv_ppm_copy_pal_c_block_mmx:
1248
1249 pushl %ebp
1250 @@ -854,19 +866,21 @@ _dv_ppm_copy_pal_c_block_mmx:
1251 ret
1252
1253 .global _dv_pgm_copy_pal_c_block_mmx
1254 +.hidden _dv_ppm_copy_pal_c_block_mmx
1255 +.type _dv_pgm_copy_pal_c_block_mmx,@function
1256 _dv_pgm_copy_pal_c_block_mmx:
1257
1258 pushl %ebp
1259 - movl %esp, %ebp
1260 pushl %esi
1261 pushl %edi
1262 pushl %ebx
1263 -
1264 - movl 8(%ebp), %edi # dest
1265 - movl 12(%ebp), %esi # src
1266
1267 + LOAD_PIC_REG_BP()
1268 +
1269 + movl 20(%esp), %edi # dest
1270 + movl 24(%esp), %esi # src
1271
1272 - movq OFFSETBX, %mm7
1273 + movq MUNG(OFFSETBX), %mm7
1274 pxor %mm6, %mm6
1275
1276
1277 @@ -1002,18 +1016,21 @@ _dv_pgm_copy_pal_c_block_mmx:
1278 ret
1279
1280 .global _dv_video_copy_pal_c_block_mmx
1281 +.hidden _dv_video_copy_pal_c_block_mmx
1282 +.type _dv_video_copy_pal_c_block_mmx,@function
1283 _dv_video_copy_pal_c_block_mmx:
1284
1285 pushl %ebp
1286 - movl %esp, %ebp
1287 pushl %esi
1288 pushl %edi
1289 pushl %ebx
1290 -
1291 - movl 8(%ebp), %edi # dest
1292 - movl 12(%ebp), %esi # src
1293
1294 - movq OFFSETBX, %mm7
1295 + LOAD_PIC_REG_BP()
1296 +
1297 + movl 20(%esp), %edi # dest
1298 + movl 24(%esp), %esi # src
1299 +
1300 + movq MUNG(OFFSETBX), %mm7
1301 paddw %mm7, %mm7
1302 pxor %mm6, %mm6
1303
1304 @@ -1097,21 +1114,23 @@ video_copy_pal_c_block_mmx_loop:
1305 ret
1306
1307 .global _dv_ppm_copy_ntsc_c_block_mmx
1308 +.hidden _dv_ppm_copy_ntsc_c_block_mmx
1309 +.type _dv_ppm_copy_ntsc_c_block_mmx,@function
1310 _dv_ppm_copy_ntsc_c_block_mmx:
1311
1312 pushl %ebp
1313 - movl %esp, %ebp
1314 pushl %esi
1315 pushl %edi
1316 pushl %ebx
1317 -
1318 - movl 8(%ebp), %edi # dest
1319 - movl 12(%ebp), %esi # src
1320 +
1321 + LOAD_PIC_REG_BP()
1322 +
1323 + movl 20(%esp), %edi # dest
1324 + movl 24(%esp), %esi # src
1325
1326 movl $4, %ebx
1327
1328 - movq ALLONE, %mm6
1329 -
1330 + movq MUNG(ALLONE), %mm6
1331 ppm_copy_ntsc_c_block_mmx_loop:
1332
1333 movq (%esi), %mm0
1334 @@ -1170,17 +1189,20 @@ ppm_copy_ntsc_c_block_mmx_loop:
1335 ret
1336
1337 .global _dv_pgm_copy_ntsc_c_block_mmx
1338 +.hidden _dv_pgm_copy_ntsc_c_block_mmx
1339 +.type _dv_pgm_copy_ntsc_c_block_mmx,@function
1340 _dv_pgm_copy_ntsc_c_block_mmx:
1341
1342 pushl %ebp
1343 - movl %esp, %ebp
1344 pushl %esi
1345 pushl %edi
1346 -
1347 - movl 8(%ebp), %edi # dest
1348 - movl 12(%ebp), %esi # src
1349
1350 - movq OFFSETBX, %mm7
1351 + LOAD_PIC_REG_BP()
1352 +
1353 + movl 16(%esp), %edi # dest
1354 + movl 20(%esp), %esi # src
1355 +
1356 + movq MUNG(OFFSETBX), %mm7
1357 paddw %mm7, %mm7
1358 pxor %mm6, %mm6
1359
1360 @@ -1327,18 +1349,21 @@ _dv_pgm_copy_ntsc_c_block_mmx:
1361 ret
1362
1363 .global _dv_video_copy_ntsc_c_block_mmx
1364 +.hidden _dv_video_copy_ntsc_c_block_mmx
1365 +.type _dv_video_copy_ntsc_c_block_mmx,@function
1366 _dv_video_copy_ntsc_c_block_mmx:
1367
1368 pushl %ebp
1369 - movl %esp, %ebp
1370 pushl %esi
1371 pushl %edi
1372 pushl %ebx
1373 -
1374 - movl 8(%ebp), %edi # dest
1375 - movl 12(%ebp), %esi # src
1376
1377 - movq OFFSETBX, %mm7
1378 + LOAD_PIC_REG_BP()
1379 +
1380 + movl 20(%esp), %edi # dest
1381 + movl 24(%esp), %esi # src
1382 +
1383 + movq MUNG(OFFSETBX), %mm7
1384 paddw %mm7, %mm7
1385 pxor %mm6, %mm6
1386
1387 --- libdv/rgbtoyuv_x86_64.S 31 Jan 2005 13:27:54 -0000 1.2
1388 +++ libdv/rgbtoyuv_x86_64.S 15 Dec 2005 06:45:39 -0000
1389 @@ -41,9 +41,6 @@
1390 #define DV_WIDTH_SHORT_HALF 720
1391 #define DV_WIDTH_BYTE_HALF 360
1392
1393 -.global _dv_rgbtoycb_mmx_x86_64
1394 -# .global yuvtoycb_mmx_x86_64
1395 -
1396 .data
1397
1398 .align 8
1399 --- libdv/transpose_x86.S 31 Jan 2005 13:27:54 -0000 1.3
1400 +++ libdv/transpose_x86.S 15 Dec 2005 06:45:39 -0000
1401 @@ -2,6 +2,8 @@
1402
1403 .text
1404 .global _dv_transpose_mmx
1405 +.hidden _dv_transpose_mmx
1406 +.type _dv_transpose_mmx,@function
1407
1408 _dv_transpose_mmx:
1409 pushl %ebp
1410 --- libdv/transpose_x86_64.S 31 Jan 2005 13:27:54 -0000 1.2
1411 +++ libdv/transpose_x86_64.S 15 Dec 2005 06:45:39 -0000
1412 @@ -2,6 +2,8 @@
1413
1414 .text
1415 .global _dv_transpose_mmx_x86_64
1416 +.hidden _dv_transpose_mmx_x86_64
1417 +.type _dv_transpose_mmx_x86_64,@function
1418
1419 _dv_transpose_mmx_x86_64:
1420
1421 --- libdv/vlc_x86.S 31 Jan 2005 13:27:54 -0000 1.3
1422 +++ libdv/vlc_x86.S 15 Dec 2005 06:45:40 -0000
1423 @@ -1,31 +1,42 @@
1424 #include "asmoff.h"
1425 .section .note.GNU-stack, "", @progbits
1426
1427 +#include "asm_common.S"
1428 +
1429 .text
1430 +
1431 .align 4
1432 +
1433 +.hidden asm_dv_decode_vlc
1434 +.globl asm_dv_decode_vlc
1435 + asm_dv_decode_vlc = dv_decode_vlc
1436 +
1437 .globl dv_decode_vlc
1438 .type dv_decode_vlc,@function
1439 dv_decode_vlc:
1440 pushl %ebx
1441 + pushl %ebp
1442
1443 - /* Args are at 8(%esp). */
1444 - movl 8(%esp),%eax /* %eax is bits */
1445 - movl 12(%esp),%ebx /* %ebx is maxbits */
1446 + LOAD_PIC_REG_BP()
1447 +
1448 + /* Args are at 12(%esp). */
1449 + movl 12(%esp),%eax /* %eax is bits */
1450 + movl 16(%esp),%ebx /* %ebx is maxbits */
1451 andl $0x3f,%ebx /* limit index range STL*/
1452
1453 - movl dv_vlc_class_index_mask(,%ebx,4),%edx
1454 + movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
1455 andl %eax,%edx
1456 - movl dv_vlc_class_index_rshift(,%ebx,4),%ecx
1457 + movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
1458 sarl %cl,%edx
1459 - movl dv_vlc_classes(,%ebx,4),%ecx
1460 + movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
1461 movsbl (%ecx,%edx,1),%edx /* %edx is class */
1462 -
1463 - movl dv_vlc_index_mask(,%edx,4),%ebx
1464 - movl dv_vlc_index_rshift(,%edx,4),%ecx
1465 +
1466 + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
1467 + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
1468 andl %eax,%ebx
1469 sarl %cl,%ebx
1470
1471 - movl dv_vlc_lookups(,%edx,4),%edx
1472 + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
1473 movl (%edx,%ebx,4),%edx
1474
1475 /* Now %edx holds result, like this:
1476 @@ -44,7 +55,7 @@ dv_decode_vlc:
1477 movl %edx,%ecx
1478 sarl $8,%ecx
1479 andl $0xff,%ecx
1480 - movl sign_mask(,%ecx,4),%ebx
1481 + movl MUNG_ARR(sign_mask,%ecx,4),%ebx
1482 andl %ebx,%eax
1483 negl %eax
1484 sarl $31,%eax
1485 @@ -65,14 +76,14 @@ dv_decode_vlc:
1486 *result = broken;
1487 Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
1488 */
1489 - movl 12(%esp),%ebx /* %ebx is maxbits */
1490 + movl 16(%esp),%ebx /* %ebx is maxbits */
1491 subl %ecx,%ebx
1492 sbbl %ebx,%ebx
1493 orl %ebx,%edx
1494
1495 - movl 16(%esp),%eax
1496 + movl 20(%esp),%eax
1497 movl %edx,(%eax)
1498 -
1499 + popl %ebp
1500 popl %ebx
1501 ret
1502
1503 @@ -82,21 +93,28 @@ dv_decode_vlc:
1504 .type __dv_decode_vlc,@function
1505 __dv_decode_vlc:
1506 pushl %ebx
1507 + pushl %ebp
1508
1509 - /* Args are at 8(%esp). */
1510 - movl 8(%esp),%eax /* %eax is bits */
1511 + LOAD_PIC_REG_BP()
1512 +
1513 + /* Args are at 12(%esp). */
1514 + movl 12(%esp),%eax /* %eax is bits */
1515
1516 movl %eax,%edx /* %edx is class */
1517 andl $0xfe00,%edx
1518 sarl $9,%edx
1519 +#ifdef __PIC__
1520 + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
1521 +#else
1522 movsbl dv_vlc_class_lookup5(%edx),%edx
1523 -
1524 - movl dv_vlc_index_mask(,%edx,4),%ebx
1525 - movl dv_vlc_index_rshift(,%edx,4),%ecx
1526 +#endif
1527 +
1528 + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
1529 + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
1530 andl %eax,%ebx
1531 sarl %cl,%ebx
1532
1533 - movl dv_vlc_lookups(,%edx,4),%edx
1534 + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
1535 movl (%edx,%ebx,4),%edx
1536
1537 /* Now %edx holds result, like this:
1538 @@ -114,7 +132,7 @@ __dv_decode_vlc:
1539 movl %edx,%ecx
1540 sarl $8,%ecx
1541 andl $0xff,%ecx
1542 - movl sign_mask(,%ecx,4),%ecx
1543 + movl MUNG_ARR(sign_mask,%ecx,4),%ecx
1544 andl %ecx,%eax
1545 negl %eax
1546 sarl $31,%eax
1547 @@ -129,9 +147,9 @@ __dv_decode_vlc:
1548 xorl %eax,%edx
1549 subl %eax,%edx
1550
1551 - movl 12(%esp),%eax
1552 + movl 16(%esp),%eax
1553 movl %edx,(%eax)
1554 -
1555 + popl %ebp
1556 popl %ebx
1557 ret
1558
1559 @@ -142,13 +160,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
1560 */
1561 .text
1562 .align 4
1563 +.hidden asm_dv_parse_ac_coeffs_pass0
1564 +.globl asm_dv_parse_ac_coeffs_pass0
1565 + asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
1566 +
1567 .globl dv_parse_ac_coeffs_pass0
1568 +.type dv_parse_ac_coeffs_pass0,@function
1569 dv_parse_ac_coeffs_pass0:
1570 pushl %ebx
1571 pushl %edi
1572 pushl %esi
1573 pushl %ebp
1574
1575 + LOAD_PIC_REG_SI()
1576 +
1577 #define ARGn(N) (20+(4*(N)))(%esp)
1578
1579 /*
1580 @@ -161,8 +186,10 @@ dv_parse_ac_coeffs_pass0:
1581 ebp bl
1582 */
1583 movl ARGn(2),%ebp
1584 +#ifndef __PIC__
1585 movl ARGn(0),%esi
1586 movl bitstream_t_buf(%esi),%esi
1587 +#endif
1588 movl dv_block_t_offset(%ebp),%edi
1589 movl dv_block_t_reorder(%ebp),%ebx
1590
1591 @@ -172,7 +199,11 @@ dv_parse_ac_coeffs_pass0:
1592
1593 movq dv_block_t_coeffs(%ebp),%mm1
1594 pxor %mm0,%mm0
1595 +#ifdef __PIC__
1596 + pand const_f_0_0_0@GOTOFF(%esi),%mm1
1597 +#else
1598 pand const_f_0_0_0,%mm1
1599 +#endif
1600 movq %mm1,dv_block_t_coeffs(%ebp)
1601 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1602 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1603 @@ -193,9 +224,17 @@ dv_parse_ac_coeffs_pass0:
1604 readloop:
1605 movl %edi,%ecx
1606 shrl $3,%ecx
1607 +#ifdef __PIC__
1608 + pushl %esi
1609 + movl ARGn(1),%esi
1610 + movl bitstream_t_buf(%esi),%esi
1611 +#endif
1612 movzbl (%esi,%ecx,1),%eax
1613 movzbl 1(%esi,%ecx,1),%edx
1614 movzbl 2(%esi,%ecx,1),%ecx
1615 +#ifdef __PIC__
1616 + popl %esi
1617 +#endif
1618 shll $16,%eax
1619 shll $8,%edx
1620 orl %ecx,%eax
1621 @@ -219,7 +258,11 @@ readloop:
1622
1623 /* Attempt to use the shortcut first. If it hits, then
1624 this vlc term has been decoded. */
1625 +#ifdef __PIC__
1626 + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
1627 +#else
1628 movl dv_vlc_class1_shortcut(,%ecx,4),%edx
1629 +#endif
1630 test $0x80,%edx
1631 je done_decode
1632
1633 @@ -230,12 +273,19 @@ readloop:
1634 movl %ebx,dv_block_t_reorder(%ebp)
1635
1636 /* %eax is bits */
1637 -
1638 +#ifdef __PIC__
1639 + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
1640 +
1641 + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
1642 + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
1643 + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
1644 +#else
1645 movsbl dv_vlc_class_lookup5(%ecx),%ecx
1646
1647 movl dv_vlc_index_mask(,%ecx,4),%ebx
1648 movl dv_vlc_lookups(,%ecx,4),%edx
1649 movl dv_vlc_index_rshift(,%ecx,4),%ecx
1650 +#endif
1651 andl %eax,%ebx
1652 sarl %cl,%ebx
1653
1654 @@ -258,7 +308,11 @@ readloop:
1655 movl %edx,%ecx
1656 sarl $8,%ecx
1657 andl $0xff,%ecx
1658 +#ifdef __PIC__
1659 + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx
1660 +#else
1661 movl sign_mask(,%ecx,4),%ecx
1662 +#endif
1663 andl %ecx,%eax
1664 negl %eax
1665 sarl $31,%eax
1666 @@ -328,10 +382,16 @@ alldone:
1667
1668 slowpath:
1669 /* slow path: use dv_decode_vlc */;
1670 +#ifdef __PIC__
1671 + pushl %esi
1672 + leal vlc@GOTOFF(%esi),%esi
1673 + xchgl %esi,(%esp) /* last parameter is &vlc */
1674 +#else
1675 pushl $vlc /* last parameter is &vlc */
1676 +#endif
1677 pushl %edx /* bits_left */
1678 pushl %eax /* bits */
1679 - call dv_decode_vlc
1680 + call asm_dv_decode_vlc
1681 addl $12,%esp
1682 test $0x80,%edx /* If (vlc.run < 0) break */
1683 jne escape
1684 @@ -361,12 +421,15 @@ show16:
1685 gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
1686 */
1687 .globl dv_parse_video_segment
1688 + .type dv_parse_video_segment,@function
1689 dv_parse_video_segment:
1690 pushl %ebx
1691 pushl %edi
1692 pushl %esi
1693 pushl %ebp
1694
1695 + LOAD_PIC_REG_SI()
1696 +
1697 #define ARGn(N) (20+(4*(N)))(%esp)
1698
1699 movl ARGn(1),%eax /* quality */
1700 @@ -375,7 +438,11 @@ dv_parse_video_segment:
1701 jz its_mono
1702 movl $6,%ebx
1703 its_mono:
1704 +#ifdef __PIC__
1705 + movl %ebx,n_blocks@GOTOFF(%esi)
1706 +#else
1707 movl %ebx,n_blocks
1708 +#endif
1709
1710 /*
1711 * ebx seg/b
1712 @@ -386,15 +453,22 @@ its_mono:
1713 * ebp bl
1714 */
1715 movl ARGn(0),%ebx
1716 +#ifndef __PIC__
1717 movl dv_videosegment_t_bs(%ebx),%esi
1718 movl bitstream_t_buf(%esi),%esi
1719 +#endif
1720 leal dv_videosegment_t_mb(%ebx),%edi
1721
1722 movl $0,%eax
1723 movl $0,%ecx
1724 macloop:
1725 +#ifdef __PIC__
1726 + movl %eax,m@GOTOFF(%esi)
1727 + movl %ecx,mb_start@GOTOFF(%esi)
1728 +#else
1729 movl %eax,m
1730 movl %ecx,mb_start
1731 +#endif
1732
1733 movl ARGn(0),%ebx
1734
1735 @@ -402,7 +476,15 @@ macloop:
1736 /* mb->qno = bitstream_get(bs,4); */
1737 movl %ecx,%edx
1738 shr $3,%edx
1739 +#ifdef __PIC__
1740 + pushl %esi
1741 + movl dv_videosegment_t_bs(%ebx),%esi
1742 + movl bitstream_t_buf(%esi),%esi
1743 +#endif
1744 movzbl 3(%esi,%edx,1),%edx
1745 +#ifdef __PIC__
1746 + popl %esi
1747 +#endif
1748 andl $0xf,%edx
1749 movl %edx,dv_macroblock_t_qno(%edi)
1750
1751 @@ -413,7 +495,11 @@ macloop:
1752 movl %edx,dv_macroblock_t_eob_count(%edi)
1753
1754 /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
1755 +#ifdef __PIC__
1756 + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
1757 +#else
1758 movl dv_super_map_vertical(,%eax,4),%edx
1759 +#endif
1760 movl dv_videosegment_t_i(%ebx),%ecx
1761 addl %ecx,%edx
1762
1763 @@ -424,11 +510,20 @@ skarly:
1764 andl $1,%ecx
1765 shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */
1766
1767 +#ifdef __PIC__
1768 + leal mod_10@GOTOFF(%esi,%edx),%edx
1769 + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1770 +#else
1771 movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1772 +#endif
1773 movl %edx,dv_macroblock_t_i(%edi)
1774
1775 /* mb->j = dv_super_map_horizontal[m]; */
1776 +#ifdef __PIC__
1777 + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
1778 +#else
1779 movl dv_super_map_horizontal(,%eax,4),%edx
1780 +#endif
1781 movl %edx,dv_macroblock_t_j(%edi)
1782
1783 /* mb->k = seg->k; */
1784 @@ -447,12 +542,29 @@ blkloop:
1785 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
1786 */
1787 /* dc = bitstream_get(bs,9); */
1788 +#ifdef __PIC__
1789 + movl mb_start@GOTOFF(%esi),%ecx
1790 +#else
1791 movl mb_start,%ecx
1792 +#endif
1793 shr $3,%ecx
1794 +#ifdef __PIC__
1795 + movzbl blk_start@GOTOFF(%esi,%ebx),%edx
1796 +#else
1797 movzbl blk_start(%ebx),%edx
1798 +#endif
1799 addl %ecx,%edx
1800 +#ifdef __PIC__
1801 + pushl %esi
1802 + movl ARGn(1),%esi
1803 + movl dv_videosegment_t_bs(%esi),%esi
1804 + movl bitstream_t_buf(%esi),%esi
1805 +#endif
1806 movzbl (%esi,%edx,1),%eax /* hi byte */
1807 movzbl 1(%esi,%edx,1),%ecx /* lo byte */
1808 +#ifdef __PIC__
1809 + popl %esi
1810 +#endif
1811 shll $8,%eax
1812 orl %ecx,%eax
1813
1814 @@ -479,7 +591,11 @@ blkloop:
1815
1816 /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
1817 shll $6,%eax
1818 +#ifdef __PIC__
1819 + addl dv_reorder@GOTOFF+1(%esi),%eax
1820 +#else
1821 addl $(dv_reorder+1),%eax
1822 +#endif
1823 movl %eax,dv_block_t_reorder(%ebp)
1824
1825 /* bl->reorder_sentinel = bl->reorder + 63; */
1826 @@ -487,13 +603,22 @@ blkloop:
1827 movl %eax,dv_block_t_reorder_sentinel(%ebp)
1828
1829 /* bl->offset= mb_start + dv_parse_bit_start[b]; */
1830 +#ifdef __PIC__
1831 + movl mb_start@GOTOFF(%esi),%ecx
1832 + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
1833 +#else
1834 movl mb_start,%ecx
1835 movl dv_parse_bit_start(,%ebx,4),%eax
1836 +#endif
1837 addl %ecx,%eax
1838 movl %eax,dv_block_t_offset(%ebp)
1839
1840 /* bl->end= mb_start + dv_parse_bit_end[b]; */
1841 +#ifdef __PIC__
1842 + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
1843 +#else
1844 movl dv_parse_bit_end(,%ebx,4),%eax
1845 +#endif
1846 addl %ecx,%eax
1847 movl %eax,dv_block_t_end(%ebp)
1848
1849 @@ -505,7 +630,11 @@ blkloop:
1850 /* no AC pass. Just zero out the remaining coeffs */
1851 movq dv_block_t_coeffs(%ebp),%mm1
1852 pxor %mm0,%mm0
1853 +#ifdef __PIC__
1854 + pand const_f_0_0_0@GOTOFF(%esi),%mm1
1855 +#else
1856 pand const_f_0_0_0,%mm1
1857 +#endif
1858 movq %mm1,dv_block_t_coeffs(%ebp)
1859 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1860 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1861 @@ -530,18 +659,27 @@ do_ac_pass:
1862 pushl %ebp
1863 pushl %edi
1864 pushl %eax
1865 - call dv_parse_ac_coeffs_pass0
1866 + call asm_dv_parse_ac_coeffs_pass0
1867 addl $12,%esp
1868 done_ac:
1869
1870 +#ifdef __PIC__
1871 + movl n_blocks@GOTOFF(%esi),%eax
1872 +#else
1873 movl n_blocks,%eax
1874 +#endif
1875 addl $dv_block_t_size,%ebp
1876 incl %ebx
1877 cmpl %eax,%ebx
1878 jnz blkloop
1879
1880 +#ifdef __PIC__
1881 + movl m@GOTOFF(%esi),%eax
1882 + movl mb_start@GOTOFF(%esi),%ecx
1883 +#else
1884 movl m,%eax
1885 movl mb_start,%ecx
1886 +#endif
1887 addl $(8 * 80),%ecx
1888 addl $dv_macroblock_t_size,%edi
1889 incl %eax
1890 @@ -559,7 +697,7 @@ done_ac:
1891
1892 andl $DV_QUALITY_AC_MASK,%eax
1893 cmpl $DV_QUALITY_AC_2,%eax
1894 - jz dv_parse_ac_coeffs
1895 + jz asm_dv_parse_ac_coeffs
1896 movl $0,%eax
1897 ret
1898
1899 --- libdv/vlc_x86_64.S 31 Jan 2005 13:27:54 -0000 1.3
1900 +++ libdv/vlc_x86_64.S 15 Dec 2005 06:45:40 -0000
1901 @@ -171,7 +171,8 @@ void dv_parse_ac_coeffs_pass0(bitstream_
1902 .text
1903 .align 4
1904 .globl dv_parse_ac_coeffs_pass0
1905 -
1906 +.type dv_parse_ac_coeffs_pass0,@function
1907 +
1908 dv_parse_ac_coeffs_pass0:
1909
1910 /* Args are at rdi=bs, rsi=mb, rdx=bl */
1911 @@ -424,6 +425,7 @@ show16: /* not u
1912 gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
1913 */
1914 .globl dv_parse_video_segment
1915 + .type dv_parse_video_segment,@function
1916 dv_parse_video_segment:
1917
1918 /* Args are at rdi=seg, rsi=quality */
1919 --- /dev/null 2005-12-15 06:00:01.513317500 +0000
1920 +++ libdv/asm_common.S 2005-12-14 19:57:06.000000000 +0000
1921 @@ -0,0 +1,37 @@
1922 +/* public domain, do what you want */
1923 +
1924 +#ifdef __PIC__
1925 +# define MUNG(sym) sym##@GOTOFF(%ebp)
1926 +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args)
1927 +#else
1928 +# define MUNG(sym) sym
1929 +# define MUNG_ARR(sym, args...) sym(,##args)
1930 +#endif
1931 +
1932 +#ifdef __PIC__
1933 +# undef __i686 /* gcc define gets in our way */
1934 + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
1935 +.globl __i686.get_pc_thunk.bp
1936 + .hidden __i686.get_pc_thunk.bp
1937 + .type __i686.get_pc_thunk.bp,@function
1938 +__i686.get_pc_thunk.bp:
1939 + movl (%esp), %ebp
1940 + ret
1941 +# define LOAD_PIC_REG_BP() \
1942 + call __i686.get_pc_thunk.bp ; \
1943 + addl $_GLOBAL_OFFSET_TABLE_, %ebp
1944 +
1945 + .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits
1946 +.globl __i686.get_pc_thunk.si
1947 + .hidden __i686.get_pc_thunk.si
1948 + .type __i686.get_pc_thunk.si,@function
1949 +__i686.get_pc_thunk.si:
1950 + movl (%esp), %esi
1951 + ret
1952 +# define LOAD_PIC_REG_SI() \
1953 + call __i686.get_pc_thunk.si ; \
1954 + addl $_GLOBAL_OFFSET_TABLE_, %esi
1955 +#else
1956 +# define LOAD_PIC_REG_BP()
1957 +# define LOAD_PIC_REG_SI()
1958 +#endif