]> git.wh0rd.org - patches.git/blob - libdv-1.0.0-pic.patch
sync vapier-m
[patches.git] / libdv-1.0.0-pic.patch
1 --- libdv-0.104-old/libdv/asm_common.S
2 +++ libdv-0.104/libdv/asm_common.S
3 @@ -0,0 +1,29 @@
4 +/* public domain, do what you want */
5 +
6 +#ifdef __PIC__
7 +# define MUNG(sym) sym##@GOTOFF(%ebp)
8 +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args)
9 +#else
10 +# define MUNG(sym) sym
11 +# define MUNG_ARR(sym, args...) sym(,##args)
12 +#endif
13 +
14 +#ifdef __PIC__
15 +# undef __i686 /* gcc define gets in our way */
16 +# define LOAD_PIC_REG(reg) \
17 + .ifndef __i686.get_pc_thunk.reg; \
18 + .section .gnu.linkonce.t.__i686.get_pc_thunk.reg,"ax",@progbits; \
19 + .global __i686.get_pc_thunk.reg; \
20 + .hidden __i686.get_pc_thunk.reg; \
21 + .type __i686.get_pc_thunk.reg,@function; \
22 + __i686.get_pc_thunk.reg: \
23 + movl (%esp), %e##reg; \
24 + ret; \
25 + .size __i686.get_pc_thunk.reg,.-__i686.get_pc_thunk.reg; \
26 + .previous; \
27 + .endif; \
28 + call __i686.get_pc_thunk.reg; \
29 + addl $_GLOBAL_OFFSET_TABLE_, %e##reg
30 +#else
31 +# define LOAD_PIC_REG(reg)
32 +#endif
33 --- libdv-0.104-old/libdv/dct_block_mmx.S
34 +++ libdv-0.104/libdv/dct_block_mmx.S
35 @@ -53,19 +53,22 @@ scratch2: .quad 0
36
37 .section .note.GNU-stack, "", @progbits
38
39 +#include "asm_common.S"
40 +
41 .text
42
43 .align 8
44 .global _dv_dct_88_block_mmx
45 .hidden _dv_dct_88_block_mmx
46 .type _dv_dct_88_block_mmx,@function
47 _dv_dct_88_block_mmx:
48
49 pushl %ebp
50 - movl %esp, %ebp
51 pushl %esi
52
53 - movl 8(%ebp), %esi # source
54 + LOAD_PIC_REG(bp)
55 +
56 + movl 12(%esp), %esi # source
57
58 # column 0
59 movq 16*0(%esi), %mm0 # v0
60 @@ -86,22 +91,22 @@ _dv_dct_88_block_mmx:
61
62 movq 16*3(%esi), %mm5 # v3
63 movq 16*4(%esi), %mm7 # v4
64 - movq %mm7, scratch1 # scratch1: v4 ;
65 + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
66 movq %mm5, %mm7 # duplicate v3
67 - paddw scratch1, %mm5 # v03: v3+v4
68 - psubw scratch1, %mm7 # v04: v3-v4
69 - movq %mm5, scratch2 # scratch2: v03
70 + paddw MUNG(scratch1), %mm5 # v03: v3+v4
71 + psubw MUNG(scratch1), %mm7 # v04: v3-v4
72 + movq %mm5, MUNG(scratch2) # scratch2: v03
73 movq %mm0, %mm5 # mm5: v00
74
75 - paddw scratch2, %mm0 # v10: v00+v03
76 - psubw scratch2, %mm5 # v13: v00-v03
77 - movq %mm3, scratch3 # scratch3: v02
78 + paddw MUNG(scratch2), %mm0 # v10: v00+v03
79 + psubw MUNG(scratch2), %mm5 # v13: v00-v03
80 + movq %mm3, MUNG(scratch3) # scratch3: v02
81 movq %mm1, %mm3 # duplicate v01
82
83 - paddw scratch3, %mm1 # v11: v01+v02
84 - psubw scratch3, %mm3 # v12: v01-v02
85 + paddw MUNG(scratch3), %mm1 # v11: v01+v02
86 + psubw MUNG(scratch3), %mm3 # v12: v01-v02
87
88 - movq %mm6, scratch4 # scratch4: v05
89 + movq %mm6, MUNG(scratch4) # scratch4: v05
90 movq %mm0, %mm6 # duplicate v10
91
92 paddw %mm1, %mm0 # v10+v11
93 @@ -111,10 +116,10 @@ _dv_dct_88_block_mmx:
94 movq %mm6, 16*4(%esi) # out4: v10-v11
95
96 movq %mm4, %mm0 # mm0: v06
97 - paddw scratch4, %mm4 # v15: v05+v06
98 + paddw MUNG(scratch4), %mm4 # v15: v05+v06
99 paddw %mm2, %mm0 # v16: v07+v06
100
101 - pmulhw WA3, %mm4 # v35~: WA3*v15
102 + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
103 psllw $1, %mm4 # v35: compensate the coeefient scale
104
105 movq %mm4, %mm6 # duplicate v35
106 @@ -123,7 +128,7 @@ _dv_dct_88_block_mmx:
107
108 paddw %mm5, %mm3 # v22: v12+v13
109
110 - pmulhw WA1, %mm3 # v32~: WA1*v22
111 + pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22
112 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
113 movq %mm5, %mm6 # duplicate v13
114
115 @@ -134,13 +139,13 @@ _dv_dct_88_block_mmx:
116 movq %mm6, 16*6(%esi) # out6: v13-v32
117
118
119 - paddw scratch4, %mm7 # v14n: v04+v05
120 + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
121 movq %mm0, %mm5 # duplicate v16
122
123 psubw %mm7, %mm0 # va1: v16-v14n
124 - pmulhw WA5, %mm0 # va0~: va1*WA5
125 - pmulhw WA4, %mm5 # v36~~: v16*WA4
126 - pmulhw WA2, %mm7 # v34~~: v14n*WA2
127 + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
128 + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
129 + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
130 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale
131 psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale
132
133 @@ -188,22 +193,22 @@ _dv_dct_88_block_mmx:
134
135 movq 16*3(%esi), %mm5 # v3
136 movq 16*4(%esi), %mm7 # v4
137 - movq %mm7, scratch1 # scratch1: v4 ;
138 + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
139 movq %mm5, %mm7 # duplicate v3
140 - paddw scratch1, %mm5 # v03: v3+v4
141 - psubw scratch1, %mm7 # v04: v3-v4
142 - movq %mm5, scratch2 # scratch2: v03
143 + paddw MUNG(scratch1), %mm5 # v03: v3+v4
144 + psubw MUNG(scratch1), %mm7 # v04: v3-v4
145 + movq %mm5, MUNG(scratch2) # scratch2: v03
146 movq %mm0, %mm5 # mm5: v00
147
148 - paddw scratch2, %mm0 # v10: v00+v03
149 - psubw scratch2, %mm5 # v13: v00-v03
150 - movq %mm3, scratch3 # scratc3: v02
151 + paddw MUNG(scratch2), %mm0 # v10: v00+v03
152 + psubw MUNG(scratch2), %mm5 # v13: v00-v03
153 + movq %mm3, MUNG(scratch3) # scratc3: v02
154 movq %mm1, %mm3 # duplicate v01
155
156 - paddw scratch3, %mm1 # v11: v01+v02
157 - psubw scratch3, %mm3 # v12: v01-v02
158 + paddw MUNG(scratch3), %mm1 # v11: v01+v02
159 + psubw MUNG(scratch3), %mm3 # v12: v01-v02
160
161 - movq %mm6, scratch4 # scratc4: v05
162 + movq %mm6, MUNG(scratch4) # scratc4: v05
163 movq %mm0, %mm6 # duplicate v10
164
165 paddw %mm1, %mm0 # v10+v11
166 @@ -213,10 +218,10 @@ _dv_dct_88_block_mmx:
167 movq %mm6, 16*4(%esi) # out4: v10-v11
168
169 movq %mm4, %mm0 # mm0: v06
170 - paddw scratch4, %mm4 # v15: v05+v06
171 + paddw MUNG(scratch4), %mm4 # v15: v05+v06
172 paddw %mm2, %mm0 # v16: v07+v06
173
174 - pmulhw WA3, %mm4 # v35~: WA3*v15
175 + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
176 psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale
177
178 movq %mm4, %mm6 # duplicate v35
179 @@ -225,7 +230,7 @@ _dv_dct_88_block_mmx:
180
181 paddw %mm5, %mm3 # v22: v12+v13
182
183 - pmulhw WA1, %mm3 # v32~: WA3*v15
184 + pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15
185 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
186 movq %mm5, %mm6 # duplicate v13
187
188 @@ -235,13 +240,13 @@ _dv_dct_88_block_mmx:
189 movq %mm5, 16*2(%esi) # out2: v13+v32
190 movq %mm6, 16*6(%esi) # out6: v13-v32
191
192 - paddw scratch4, %mm7 # v14n: v04+v05
193 + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
194 movq %mm0, %mm5 # duplicate v16
195
196 psubw %mm7, %mm0 # va1: v16-v14n
197 - pmulhw WA2, %mm7 # v34~~: v14n*WA2
198 - pmulhw WA5, %mm0 # va0~: va1*WA5
199 - pmulhw WA4, %mm5 # v36~~: v16*WA4
200 + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
201 + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
202 + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
203 psllw $16-NSHIFT, %mm7
204 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient
205 # scale note that WA4 is shifted 1 bit less than the others
206 @@ -748,11 +755,12 @@ _dv_dct_block_mmx_postscale_88:
207 _dv_dct_248_block_mmx:
208
209 pushl %ebp
210 - movl %esp, %ebp
211 pushl %esi
212 pushl %edi
213
214 - movl 8(%ebp), %esi # source
215 + LOAD_PIC_REG(bp)
216 +
217 + movl 16(%esp), %esi # source
218
219 # column 0
220
221 @@ -779,7 +789,7 @@ _dv_dct_248_block_mmx:
222 paddw %mm1, %mm0 # v20: v10+v11
223 psubw %mm1, %mm3 # v21: v10-v11
224
225 - pmulhw WA1, %mm5 # v32~: WA1*v22
226 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
227 movq %mm4, %mm2
228 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
229
230 @@ -818,7 +828,7 @@ _dv_dct_248_block_mmx:
231 paddw %mm1, %mm0 # v20: v10+v11
232 psubw %mm1, %mm3 # v21: v10-v11
233
234 - pmulhw WA1, %mm5 # v32~: WA1*v22
235 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
236 movq %mm4, %mm2
237 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
238
239 @@ -855,7 +865,7 @@ _dv_dct_248_block_mmx:
240 paddw %mm1, %mm0 # v20: v10+v11
241 psubw %mm1, %mm3 # v21: v10-v11
242
243 - pmulhw WA1, %mm5 # v32~: WA1*v22
244 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
245 movq %mm4, %mm2
246 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
247
248 @@ -892,7 +902,7 @@ _dv_dct_248_block_mmx:
249 paddw %mm1, %mm0 # v20: v10+v11
250 psubw %mm1, %mm3 # v21: v10-v11
251
252 - pmulhw WA1, %mm5 # v32~: WA1*v22
253 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
254 movq %mm4, %mm2
255 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
256
257 --- libdv-0.104-old/libdv/dv.c
258 +++ libdv-0.104/libdv/dv.c
259 @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
260 } /* dv_reconfigure */
261
262
263 +extern uint8_t dv_quant_offset[4];
264 +extern uint8_t dv_quant_shifts[22][4];
265 +
266 static inline void
267 dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
268 int i;
269 @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
270 dv_idct_248 (co248, mb->b[i].coeffs);
271 } else {
272 #if ARCH_X86
273 - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
274 + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
275 _dv_idct_88(mb->b[i].coeffs);
276 #elif ARCH_X86_64
277 _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
278 @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
279 dv_idct_248 (co248, mb->b[b].coeffs);
280 } else {
281 #if ARCH_X86
282 - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
283 + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
284 _dv_weight_88_inverse(bl->coeffs);
285 _dv_idct_88(bl->coeffs);
286 #elif ARCH_X86_64
287 --- libdv-0.104-old/libdv/encode.c
288 +++ libdv-0.104/libdv/encode.c
289 @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
290 }
291
292 extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
293 - dv_vlc_entry_t ** out);
294 + dv_vlc_entry_t ** out,
295 + dv_vlc_entry_t * lookup);
296
297 extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
298 dv_vlc_entry_t ** out);
299 @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
300 #elif ARCH_X86
301 int num_bits;
302
303 - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
304 + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
305 emms();
306 #else
307 int num_bits;
308 @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
309 return num_bits;
310 }
311
312 -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
313 +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
314 extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
315
316 extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
317 @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
318 #elif ARCH_X86_64
319 return _dv_vlc_num_bits_block_x86_64(coeffs);
320 #else
321 - return _dv_vlc_num_bits_block_x86(coeffs);
322 + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
323 #endif
324 }
325
326 --- libdv-0.104-old/libdv/encode_x86.S
327 +++ libdv-0.104/libdv/encode_x86.S
328 @@ -23,9 +23,6 @@
329 * The libdv homepage is http://libdv.sourceforge.net/.
330 */
331
332 -.data
333 -ALLONE: .word 1,1,1,1
334 -VLCADDMASK: .byte 255,0,0,0,255,0,0,0
335
336
337 .section .note.GNU-stack, "", @progbits
338 @@ -45,11 +43,14 @@ _dv_vlc_encode_block_mmx:
339
340 movl $63, %ecx
341
342 - movl vlc_encode_lookup, %esi
343 + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup
344
345 pxor %mm0, %mm0
346 pxor %mm2, %mm2
347 - movq VLCADDMASK, %mm1
348 + pushl $0x000000FF # these four lines
349 + pushl $0x000000FF # load VLCADDMASK
350 + movq (%esp), %mm1 # into %mm1 off the stack
351 + addl $8, %esp # --> no TEXTRELs
352 xorl %ebp, %ebp
353 subl $8, %edx
354 vlc_encode_block_mmx_loop:
355 @@ -121,7 +124,7 @@ _dv_vlc_num_bits_block_x86:
356 addl $2, %edi
357
358 movl $63, %ecx
359 - movl vlc_num_bits_lookup, %esi
360 + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup
361
362 vlc_num_bits_block_x86_loop:
363 movw (%edi), %ax
364 @@ -579,8 +590,11 @@ _dv_need_dct_248_mmx_rows:
365 paddw %mm5, %mm1
366
367 paddw %mm1, %mm0
368 -
369 - pmaddwd ALLONE, %mm0
370 +
371 + pushl $0x00010001 # these four lines
372 + pushl $0x00010001 # load ALLONE
373 + pmaddwd (%esp), %mm0 # into %mm0 off the stack
374 + addl $8, %esp # --> no TEXTRELs
375 movq %mm0, %mm1
376 psrlq $32, %mm1
377 paddd %mm1, %mm0
378 --- libdv-0.104-old/libdv/idct_block_mmx.S
379 +++ libdv-0.104/libdv/idct_block_mmx.S
380 @@ -8,17 +8,21 @@
381
382 .section .note.GNU-stack, "", @progbits
383
384 +#include "asm_common.S"
385 +
386 .text
387 .align 4
388 .global _dv_idct_block_mmx
389 .hidden _dv_idct_block_mmx
390 .type _dv_idct_block_mmx,@function
391 _dv_idct_block_mmx:
392 pushl %ebp
393 - movl %esp,%ebp
394 pushl %esi
395 - leal preSC, %ecx
396 - movl 8(%ebp),%esi /* source matrix */
397 +
398 + LOAD_PIC_REG(bp)
399 +
400 + leal MUNG(preSC), %ecx
401 + movl 12(%esp),%esi /* source matrix */
402
403 /*
404 * column 0: even part
405 @@ -35,7 +41,7 @@ _dv_idct_block_mmx:
406 movq %mm1, %mm2 /* added 11/1/96 */
407 pmulhw 8*8(%esi),%mm5 /* V8 */
408 psubsw %mm0, %mm1 /* V16 */
409 - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
410 + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */
411 paddsw %mm0, %mm2 /* V17 */
412 movq %mm2, %mm0 /* duplicate V17 */
413 psraw $1, %mm2 /* t75=t82 */
414 @@ -76,7 +82,7 @@ _dv_idct_block_mmx:
415 paddsw %mm0, %mm3 /* V29 ; free mm0 */
416 movq %mm7, %mm1 /* duplicate V26 */
417 psraw $1, %mm3 /* t91=t94 */
418 - pmulhw x539f539f539f539f,%mm7 /* V33 */
419 + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */
420 psraw $1, %mm1 /* t96 */
421 movq %mm5, %mm0 /* duplicate V2 */
422 psraw $2, %mm4 /* t85=t87 */
423 @@ -84,15 +90,15 @@ _dv_idct_block_mmx:
424 psubsw %mm4, %mm0 /* V28 ; free mm4 */
425 movq %mm0, %mm2 /* duplicate V28 */
426 psraw $1, %mm5 /* t90=t93 */
427 - pmulhw x4546454645464546,%mm0 /* V35 */
428 + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */
429 psraw $1, %mm2 /* t97 */
430 movq %mm5, %mm4 /* duplicate t90=t93 */
431 psubsw %mm2, %mm1 /* V32 ; free mm2 */
432 - pmulhw x61f861f861f861f8,%mm1 /* V36 */
433 + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */
434 psllw $1, %mm7 /* t107 */
435 paddsw %mm3, %mm5 /* V31 */
436 psubsw %mm3, %mm4 /* V30 ; free mm3 */
437 - pmulhw x5a825a825a825a82,%mm4 /* V34 */
438 + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */
439 nop
440 psubsw %mm1, %mm0 /* V38 */
441 psubsw %mm7, %mm1 /* V37 ; free mm7 */
442 @@ -159,7 +165,7 @@ _dv_idct_block_mmx:
443 psubsw %mm7, %mm1 /* V50 */
444 pmulhw 8*9(%esi), %mm5 /* V9 */
445 paddsw %mm7, %mm2 /* V51 */
446 - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
447 + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */
448 movq %mm2, %mm6 /* duplicate V51 */
449 psraw $1, %mm2 /* t138=t144 */
450 movq %mm3, %mm4 /* duplicate V1 */
451 @@ -200,11 +206,11 @@ _dv_idct_block_mmx:
452 * even more by doing the correction step in a later stage when the number
453 * is actually multiplied by 16
454 */
455 - paddw x0005000200010001, %mm4
456 + paddw MUNG(x0005000200010001), %mm4
457 psubsw %mm6, %mm3 /* V60 ; free mm6 */
458 psraw $1, %mm0 /* t154=t156 */
459 movq %mm3, %mm1 /* duplicate V60 */
460 - pmulhw x539f539f539f539f, %mm1 /* V67 */
461 + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */
462 movq %mm5, %mm6 /* duplicate V3 */
463 psraw $2, %mm4 /* t148=t150 */
464 paddsw %mm4, %mm5 /* V61 */
465 @@ -213,13 +219,13 @@ _dv_idct_block_mmx:
466 psllw $1, %mm1 /* t169 */
467 paddsw %mm0, %mm5 /* V65 -> result */
468 psubsw %mm0, %mm4 /* V64 ; free mm0 */
469 - pmulhw x5a825a825a825a82, %mm4 /* V68 */
470 + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */
471 psraw $1, %mm3 /* t158 */
472 psubsw %mm6, %mm3 /* V66 */
473 movq %mm5, %mm2 /* duplicate V65 */
474 - pmulhw x61f861f861f861f8, %mm3 /* V70 */
475 + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */
476 psllw $1, %mm6 /* t165 */
477 - pmulhw x4546454645464546, %mm6 /* V69 */
478 + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */
479 psraw $1, %mm2 /* t172 */
480 /* moved from next block */
481 movq 8*5(%esi), %mm0 /* V56 */
482 @@ -344,7 +350,7 @@ _dv_idct_block_mmx:
483 * movq 8*13(%esi), %mm4 tmt13
484 */
485 psubsw %mm4, %mm3 /* V134 */
486 - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
487 + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */
488 movq 8*9(%esi), %mm6 /* tmt9 */
489 paddsw %mm4, %mm5 /* V135 ; mm4 free */
490 movq %mm0, %mm4 /* duplicate tmt1 */
491 @@ -373,17 +379,17 @@ _dv_idct_block_mmx:
492 psubsw %mm7, %mm0 /* V144 */
493 movq %mm0, %mm3 /* duplicate V144 */
494 paddsw %mm7, %mm2 /* V147 ; free mm7 */
495 - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
496 + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */
497 movq %mm1, %mm7 /* duplicate tmt3 */
498 paddsw %mm5, %mm7 /* V145 */
499 psubsw %mm5, %mm1 /* V146 ; free mm5 */
500 psubsw %mm1, %mm3 /* V150 */
501 movq %mm7, %mm5 /* duplicate V145 */
502 - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
503 + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */
504 psubsw %mm2, %mm5 /* V148 */
505 - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
506 + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */
507 psllw $2, %mm0 /* t311 */
508 - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
509 + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */
510 paddsw %mm2, %mm7 /* V149 ; free mm2 */
511 psllw $1, %mm1 /* t313 */
512 nop /* without the nop - freeze here for one clock */
513 @@ -409,7 +415,7 @@ _dv_idct_block_mmx:
514 paddsw %mm3, %mm6 /* V164 ; free mm3 */
515 movq %mm4, %mm3 /* duplicate V142 */
516 psubsw %mm5, %mm4 /* V165 ; free mm5 */
517 - movq %mm2, scratch7 /* out7 */
518 + movq %mm2, MUNG(scratch7) /* out7 */
519 psraw $4, %mm6
520 psraw $4, %mm4
521 paddsw %mm5, %mm3 /* V162 */
522 @@ -420,11 +426,11 @@ _dv_idct_block_mmx:
523 */
524 movq %mm6, 8*9(%esi) /* out9 */
525 paddsw %mm1, %mm0 /* V161 */
526 - movq %mm3, scratch5 /* out5 */
527 + movq %mm3, MUNG(scratch5) /* out5 */
528 psubsw %mm1, %mm5 /* V166 ; free mm1 */
529 movq %mm4, 8*11(%esi) /* out11 */
530 psraw $4, %mm5
531 - movq %mm0, scratch3 /* out3 */
532 + movq %mm0, MUNG(scratch3) /* out3 */
533 movq %mm2, %mm4 /* duplicate V140 */
534 movq %mm5, 8*13(%esi) /* out13 */
535 paddsw %mm7, %mm2 /* V160 */
536 @@ -434,7 +440,7 @@ _dv_idct_block_mmx:
537 /* moved from the next block */
538 movq 8*3(%esi), %mm7
539 psraw $4, %mm4
540 - movq %mm2, scratch1 /* out1 */
541 + movq %mm2, MUNG(scratch1) /* out1 */
542 /* moved from the next block */
543 movq %mm0, %mm1
544 movq %mm4, 8*15(%esi) /* out15 */
545 @@ -491,15 +497,15 @@ _dv_idct_block_mmx:
546 paddsw %mm4, %mm3 /* V113 ; free mm4 */
547 movq %mm0, %mm4 /* duplicate V110 */
548 paddsw %mm1, %mm2 /* V111 */
549 - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
550 + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */
551 psubsw %mm1, %mm5 /* V112 ; free mm1 */
552 psubsw %mm5, %mm4 /* V116 */
553 movq %mm2, %mm1 /* duplicate V111 */
554 - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
555 + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */
556 psubsw %mm3, %mm2 /* V114 */
557 - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
558 + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */
559 paddsw %mm3, %mm1 /* V115 ; free mm3 */
560 - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
561 + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */
562 psllw $2, %mm0 /* t266 */
563 movq %mm1, (%esi) /* save V115 */
564 psllw $1, %mm5 /* t268 */
565 @@ -517,7 +523,7 @@ _dv_idct_block_mmx:
566 movq %mm6, %mm3 /* duplicate tmt4 */
567 psubsw %mm0, %mm6 /* V100 */
568 paddsw %mm0, %mm3 /* V101 ; free mm0 */
569 - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
570 + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */
571 movq %mm7, %mm5 /* duplicate tmt0 */
572 movq 8*8(%esi), %mm1 /* tmt8 */
573 paddsw %mm1, %mm7 /* V103 */
574 @@ -551,10 +557,10 @@ _dv_idct_block_mmx:
575 movq 8*2(%esi), %mm3 /* V123 */
576 paddsw %mm4, %mm7 /* out0 */
577 /* moved up from next block */
578 - movq scratch3, %mm0
579 + movq MUNG(scratch3), %mm0
580 psraw $4, %mm7
581 /* moved up from next block */
582 - movq scratch5, %mm6
583 + movq MUNG(scratch5), %mm6
584 psubsw %mm4, %mm1 /* out14 ; free mm4 */
585 paddsw %mm3, %mm5 /* out2 */
586 psraw $4, %mm1
587 @@ -565,7 +571,7 @@ _dv_idct_block_mmx:
588 movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
589 psraw $4, %mm2
590 /* moved up to the prev block */
591 - movq scratch7, %mm4
592 + movq MUNG(scratch7), %mm4
593 /* moved up to the prev block */
594 psraw $4, %mm0
595 movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
596 @@ -579,7 +585,7 @@ _dv_idct_block_mmx:
597 * psraw $4, %mm0
598 * psraw $4, %mm6
599 */
600 - movq scratch1, %mm1
601 + movq MUNG(scratch1), %mm1
602 psraw $4, %mm4
603 movq %mm0, 8*3(%esi) /* out3 */
604 psraw $4, %mm1
605 --- libdv-0.104-old/libdv/parse.c
606 +++ libdv-0.104/libdv/parse.c
607 @@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
608 exit(0);
609 #endif
610 } /* dv_parse_ac_coeffs */
611 +#if defined __GNUC__ && __ELF__
612 +# define dv_strong_hidden_alias(name, aliasname) \
613 + extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
614 +dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
615 +#else
616 +int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
617 +#endif
618
619 /* ---------------------------------------------------------------------------
620 */
621 --- libdv-0.104-old/libdv/quant.c
622 +++ libdv-0.104/libdv/quant.c
623 @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1
624 uint32_t dv_quant_248_mul_tab [2] [22] [64];
625 uint32_t dv_quant_88_mul_tab [2] [22] [64];
626
627 -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
628 +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts);
629 extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
630 static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
631 static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
632 @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
633 _dv_quant_x86_64(block, qno, klass);
634 emms();
635 #else
636 - _dv_quant_x86(block, qno, klass);
637 + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
638 emms();
639 #endif
640 }
641 --- libdv-0.104-old/libdv/quant.h
642 +++ libdv-0.104/libdv/quant.h
643 @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
644 extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
645 extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
646 dv_248_coeff_t *co);
647 -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
648 +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts);
649 extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
650 extern void dv_quant_init (void);
651 #ifdef __cplusplus
652 --- libdv-0.104-old/libdv/quant_x86.S
653 +++ libdv-0.104/libdv/quant_x86.S
654 @@ -71,10 +73,13 @@ _dv_quant_88_inverse_x86:
655
656 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
657 movl ARGn(1),%eax /* qno */
658 + movl ARGn(3),%ebx /* dv_quant_offset */
659 + addl ARGn(2),%ebx /* class */
660 + movzbl (%ebx),%ecx
661 movl ARGn(2),%ebx /* class */
662 - movzbl dv_quant_offset(%ebx),%ecx
663 addl %ecx,%eax
664 - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
665 + movl ARGn(4),%edx /* dv_quant_shifts */
666 + leal (%edx,%eax,4),%edx /* edx is pq */
667
668 /* extra = (class == 3); */
669 /* 0 1 2 3 */
670 @@ -212,11 +219,13 @@ _dv_quant_x86:
671
672 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
673 movl ARGn(1),%eax /* qno */
674 + movl ARGn(3),%ebx /* offset */
675 + addl ARGn(2),%ebx /* class */
676 + movzbl (%ebx),%ecx
677 movl ARGn(2),%ebx /* class */
678 -
679 - movzbl dv_quant_offset(%ebx),%ecx
680 + movl ARGn(4),%edx /* shifts */
681 addl %ecx,%eax
682 - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
683 + leal (%edx,%eax,4),%edx /* edx is pq */
684
685 /* extra = (class == 3); */
686 /* 0 1 2 3 */
687 --- libdv-0.104-old/libdv/rgbtoyuv.S
688 +++ libdv-0.104/libdv/rgbtoyuv.S
689 @@ -41,9 +41,6 @@
690 #define DV_WIDTH_SHORT_HALF 720
691 #define DV_WIDTH_BYTE_HALF 360
692
693 -.global _dv_rgbtoycb_mmx
694 -# .global yuvtoycb_mmx
695 -
696 .data
697
698 .align 8
699 @@ -110,25 +107,26 @@ VR0GR: .long 0,0
700 VBG0B: .long 0,0
701
702 #endif
703 -
704 +
705 +#include "asm_common.S"
706 +
707 .section .note.GNU-stack, "", @progbits
708
709 .text
710
711 -#define _inPtr 8
712 -#define _rows 12
713 -#define _columns 16
714 -#define _outyPtr 20
715 -#define _outuPtr 24
716 -#define _outvPtr 28
717 +#define _inPtr 24+8
718 +#define _rows 24+12
719 +#define _columns 24+16
720 +#define _outyPtr 24+20
721 +#define _outuPtr 24+24
722 +#define _outvPtr 24+28
723
724 .global _dv_rgbtoycb_mmx
725 .hidden _dv_rgbtoycb_mmx
726 .type _dv_rgbtoycb_mmx,@function
727 _dv_rgbtoycb_mmx:
728
729 pushl %ebp
730 - movl %esp, %ebp
731 pushl %eax
732 pushl %ebx
733 pushl %ecx
734 @@ -131,46 +132,47 @@ _dv_rgbtoycb_mmx:
735 pushl %esi
736 pushl %edi
737
738 - leal ZEROSX, %eax #This section gets around a bug
739 + LOAD_PIC_REG(bp)
740 +
741 + leal MUNG(ZEROSX), %eax #This section gets around a bug
742 movq (%eax), %mm0 #unlikely to persist
743 - movq %mm0, ZEROS
744 - leal OFFSETDX, %eax
745 + movq %mm0, MUNG(ZEROS)
746 + leal MUNG(OFFSETDX), %eax
747 movq (%eax), %mm0
748 - movq %mm0, OFFSETD
749 - leal OFFSETWX, %eax
750 + movq %mm0, MUNG(OFFSETD)
751 + leal MUNG(OFFSETWX), %eax
752 movq (%eax), %mm0
753 - movq %mm0, OFFSETW
754 - leal OFFSETBX, %eax
755 + movq %mm0, MUNG(OFFSETW)
756 + leal MUNG(OFFSETBX), %eax
757 movq (%eax), %mm0
758 - movq %mm0, OFFSETB
759 - leal YR0GRX, %eax
760 + movq %mm0, MUNG(OFFSETB)
761 + leal MUNG(YR0GRX), %eax
762 movq (%eax), %mm0
763 - movq %mm0, YR0GR
764 - leal YBG0BX, %eax
765 + movq %mm0, MUNG(YR0GR)
766 + leal MUNG(YBG0BX), %eax
767 movq (%eax), %mm0
768 - movq %mm0, YBG0B
769 - leal UR0GRX, %eax
770 + movq %mm0, MUNG(YBG0B)
771 + leal MUNG(UR0GRX), %eax
772 movq (%eax), %mm0
773 - movq %mm0, UR0GR
774 - leal UBG0BX, %eax
775 + movq %mm0, MUNG(UR0GR)
776 + leal MUNG(UBG0BX), %eax
777 movq (%eax), %mm0
778 - movq %mm0, UBG0B
779 - leal VR0GRX, %eax
780 + movq %mm0, MUNG(UBG0B)
781 + leal MUNG(VR0GRX), %eax
782 movq (%eax), %mm0
783 - movq %mm0, VR0GR
784 - leal VBG0BX, %eax
785 + movq %mm0, MUNG(VR0GR)
786 + leal MUNG(VBG0BX), %eax
787 movq (%eax), %mm0
788 - movq %mm0, VBG0B
789 -
790 - movl _rows(%ebp), %eax
791 - movl _columns(%ebp), %ebx
792 + movq %mm0, MUNG(VBG0B)
793 + movl _rows(%esp), %eax
794 + movl _columns(%esp), %ebx
795 mull %ebx #number pixels
796 shrl $3, %eax #number of loops
797 movl %eax, %edi #loop counter in edi
798 - movl _inPtr(%ebp), %eax
799 - movl _outyPtr(%ebp), %ebx
800 - movl _outuPtr(%ebp), %ecx
801 - movl _outvPtr(%ebp), %edx
802 + movl _inPtr(%esp), %eax
803 + movl _outyPtr(%esp), %ebx
804 + movl _outuPtr(%esp), %ecx
805 + movl _outvPtr(%esp), %edx
806 rgbtoycb_mmx_loop:
807 movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0
808 pxor %mm6, %mm6 #0 -> mm6
809 @@ -184,29 +186,29 @@ rgbtoycb_mmx_loop:
810 punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1
811 movq %mm0, %mm2 #R1B0G0R0 -> mm2
812
813 - pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0
814 + pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0
815 movq %mm1, %mm3 #B1G1R1B0 -> mm3
816
817 - pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1
818 + pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1
819 movq %mm2, %mm4 #R1B0G0R0 -> mm4
820
821 - pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2
822 + pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2
823 movq %mm3, %mm5 #B1G1R1B0 -> mm5
824
825 - pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3
826 + pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3
827 punpckhbw %mm6, %mm7 # 00G2R2 -> mm7
828
829 - pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4
830 + pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4
831 paddd %mm1, %mm0 #Y1Y0 -> mm0
832
833 - pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5
834 + pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5
835
836 movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1
837 paddd %mm3, %mm2 #U1U0 -> mm2
838
839 movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6
840
841 - punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1
842 + punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1
843 paddd %mm5, %mm4 #V1V0 -> mm4
844
845 movq %mm1, %mm5 #B3G3R3B2 -> mm5
846 @@ -214,29 +216,29 @@ rgbtoycb_mmx_loop:
847
848 paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1
849
850 - punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6
851 + punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6
852 movq %mm1, %mm3 #R3B2G2R2 -> mm3
853
854 - pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1
855 + pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1
856 movq %mm5, %mm7 #B3G3R3B2 -> mm7
857
858 - pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5
859 + pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5
860 psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0
861
862 - movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0
863 + movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0
864 movq %mm3, %mm6 #R3B2G2R2 -> mm6
865 - pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6
866 + pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6
867 psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2
868
869 paddd %mm5, %mm1 #Y3Y2 -> mm1
870 movq %mm7, %mm5 #B3G3R3B2 -> mm5
871 - pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2
872 + pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2
873 psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
874
875 - pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2
876 + pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2
877 packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0
878
879 - pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5
880 + pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5
881 psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4
882
883 movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7
884 @@ -251,58 +253,58 @@ rgbtoycb_mmx_loop:
885 movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5
886 psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3
887
888 - paddw OFFSETY, %mm0
889 + paddw MUNG(OFFSETY), %mm0
890 movq %mm0, (%ebx) #store Y3Y2Y1Y0
891 packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2
892
893 - movq TEMP0, %mm0 #R5B4G4R4 -> mm0
894 + movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0
895 addl $8, %ebx
896 -
897 - punpcklbw ZEROS, %mm7 #B5G500 -> mm7
898 +
899 + punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7
900 movq %mm0, %mm6 #R5B4G4R4 -> mm6
901
902 - movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU
903 + movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU
904 psrlq $32, %mm0 #00R5B4 -> mm0
905
906 paddw %mm0, %mm7 #B5G5R5B4 -> mm7
907 movq %mm6, %mm2 #B5B4G4R4 -> mm2
908
909 - pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2
910 + pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2
911 movq %mm7, %mm0 #B5G5R5B4 -> mm0
912
913 - pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7
914 + pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7
915 packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4
916
917 addl $24, %eax #increment RGB count
918
919 - movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4
920 + movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4
921 movq %mm6, %mm4 #B5B4G4R4 -> mm4
922
923 - pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4
924 + pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4
925 movq %mm0, %mm3 #B5G5R5B4 -> mm0
926
927 - pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4
928 + pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4
929 paddd %mm7, %mm2 #Y5Y4 -> mm2
930
931 - pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4
932 + pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4
933 pxor %mm7, %mm7 #0 -> mm7
934
935 - pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3
936 + pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3
937 punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1
938
939 paddd %mm6, %mm0 #U5U4 -> mm0
940 movq %mm1, %mm6 #B7G7R7B6 -> mm6
941
942 - pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6
943 + pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6
944 punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5
945
946 movq %mm5, %mm7 #R7B6G6R6 -> mm7
947 paddd %mm4, %mm3 #V5V4 -> mm3
948
949 - pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5
950 + pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5
951 movq %mm1, %mm4 #B7G7R7B6 -> mm4
952
953 - pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4
954 + pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4
955 psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0
956
957 psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2
958 @@ -310,25 +312,25 @@ rgbtoycb_mmx_loop:
959 paddd %mm5, %mm6 #Y7Y6 -> mm6
960 movq %mm7, %mm5 #R7B6G6R6 -> mm5
961
962 - pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7
963 + pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7
964 psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3
965
966 - pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1
967 + pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1
968 psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
969
970 packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2
971
972 - pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5
973 + pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5
974 paddd %mm4, %mm7 #U7U6 -> mm7
975
976 psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7
977 - paddw OFFSETY, %mm2
978 + paddw MUNG(OFFSETY), %mm2
979 movq %mm2, (%ebx) #store Y7Y6Y5Y4
980
981 - movq ALLONE, %mm6
982 + movq MUNG(ALLONE), %mm6
983 packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0
984
985 - movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4
986 + movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4
987 pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
988
989 pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
990 @@ -338,8 +340,8 @@ rgbtoycb_mmx_loop:
991
992 psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1
993 psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4
994 -
995 - movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5
996 +
997 + movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5
998
999 movq %mm4, (%ecx) # store U
1000
1001 @@ -422,14 +426,15 @@ _dv_ppm_copy_y_block_mmx:
1002 _dv_pgm_copy_y_block_mmx:
1003
1004 pushl %ebp
1005 - movl %esp, %ebp
1006 pushl %esi
1007 pushl %edi
1008 -
1009 - movl 8(%ebp), %edi # dest
1010 - movl 12(%ebp), %esi # src
1011
1012 - movq OFFSETY, %mm7
1013 + LOAD_PIC_REG(bp)
1014 +
1015 + movl 16(%esp), %edi # dest
1016 + movl 20(%esp), %esi # src
1017 +
1018 + movq MUNG(OFFSETY), %mm7
1019 pxor %mm6, %mm6
1020
1021 movq (%esi), %mm0
1022 @@ -564,14 +571,15 @@ _dv_pgm_copy_y_block_mmx:
1023 _dv_video_copy_y_block_mmx:
1024
1025 pushl %ebp
1026 - movl %esp, %ebp
1027 pushl %esi
1028 pushl %edi
1029 -
1030 - movl 8(%ebp), %edi # dest
1031 - movl 12(%ebp), %esi # src
1032
1033 - movq OFFSETBX, %mm7
1034 + LOAD_PIC_REG(bp)
1035 +
1036 + movl 16(%esp), %edi # dest
1037 + movl 20(%esp), %esi # src
1038 +
1039 + movq MUNG(OFFSETBX), %mm7
1040 pxor %mm6, %mm6
1041
1042 movq (%esi), %mm0
1043 @@ -852,16 +864,16 @@ _dv_ppm_copy_pal_c_block_mmx:
1044 _dv_pgm_copy_pal_c_block_mmx:
1045
1046 pushl %ebp
1047 - movl %esp, %ebp
1048 pushl %esi
1049 pushl %edi
1050 pushl %ebx
1051 -
1052 - movl 8(%ebp), %edi # dest
1053 - movl 12(%ebp), %esi # src
1054
1055 + LOAD_PIC_REG(bp)
1056 +
1057 + movl 20(%esp), %edi # dest
1058 + movl 24(%esp), %esi # src
1059
1060 - movq OFFSETBX, %mm7
1061 + movq MUNG(OFFSETBX), %mm7
1062 pxor %mm6, %mm6
1063
1064
1065 @@ -1000,15 +1014,16 @@ _dv_pgm_copy_pal_c_block_mmx:
1066 _dv_video_copy_pal_c_block_mmx:
1067
1068 pushl %ebp
1069 - movl %esp, %ebp
1070 pushl %esi
1071 pushl %edi
1072 pushl %ebx
1073 -
1074 - movl 8(%ebp), %edi # dest
1075 - movl 12(%ebp), %esi # src
1076
1077 - movq OFFSETBX, %mm7
1078 + LOAD_PIC_REG(bp)
1079 +
1080 + movl 20(%esp), %edi # dest
1081 + movl 24(%esp), %esi # src
1082 +
1083 + movq MUNG(OFFSETBX), %mm7
1084 paddw %mm7, %mm7
1085 pxor %mm6, %mm6
1086
1087 @@ -1095,18 +1112,18 @@ video_copy_pal_c_block_mmx_loop:
1088 _dv_ppm_copy_ntsc_c_block_mmx:
1089
1090 pushl %ebp
1091 - movl %esp, %ebp
1092 pushl %esi
1093 pushl %edi
1094 pushl %ebx
1095 -
1096 - movl 8(%ebp), %edi # dest
1097 - movl 12(%ebp), %esi # src
1098 +
1099 + LOAD_PIC_REG(bp)
1100 +
1101 + movl 20(%esp), %edi # dest
1102 + movl 24(%esp), %esi # src
1103
1104 movl $4, %ebx
1105
1106 - movq ALLONE, %mm6
1107 -
1108 + movq MUNG(ALLONE), %mm6
1109 ppm_copy_ntsc_c_block_mmx_loop:
1110
1111 movq (%esi), %mm0
1112 @@ -1168,14 +1187,15 @@ ppm_copy_ntsc_c_block_mmx_loop:
1113 _dv_pgm_copy_ntsc_c_block_mmx:
1114
1115 pushl %ebp
1116 - movl %esp, %ebp
1117 pushl %esi
1118 pushl %edi
1119 -
1120 - movl 8(%ebp), %edi # dest
1121 - movl 12(%ebp), %esi # src
1122
1123 - movq OFFSETBX, %mm7
1124 + LOAD_PIC_REG(bp)
1125 +
1126 + movl 16(%esp), %edi # dest
1127 + movl 20(%esp), %esi # src
1128 +
1129 + movq MUNG(OFFSETBX), %mm7
1130 paddw %mm7, %mm7
1131 pxor %mm6, %mm6
1132
1133 @@ -1325,15 +1347,16 @@ _dv_pgm_copy_ntsc_c_block_mmx:
1134 _dv_video_copy_ntsc_c_block_mmx:
1135
1136 pushl %ebp
1137 - movl %esp, %ebp
1138 pushl %esi
1139 pushl %edi
1140 pushl %ebx
1141 -
1142 - movl 8(%ebp), %edi # dest
1143 - movl 12(%ebp), %esi # src
1144
1145 - movq OFFSETBX, %mm7
1146 + LOAD_PIC_REG(bp)
1147 +
1148 + movl 20(%esp), %edi # dest
1149 + movl 24(%esp), %esi # src
1150 +
1151 + movq MUNG(OFFSETBX), %mm7
1152 paddw %mm7, %mm7
1153 pxor %mm6, %mm6
1154
1155 --- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S
1156 +++ libdv-0.104/libdv/rgbtoyuv_x86_64.S
1157 @@ -41,9 +41,6 @@
1158 #define DV_WIDTH_SHORT_HALF 720
1159 #define DV_WIDTH_BYTE_HALF 360
1160
1161 -.global _dv_rgbtoycb_mmx_x86_64
1162 -# .global yuvtoycb_mmx_x86_64
1163 -
1164 .data
1165
1166 .align 8
1167 --- libdv-0.104-old/libdv/vlc_x86.S
1168 +++ libdv-0.104/libdv/vlc_x86.S
1169 @@ -1,31 +1,39 @@
1170 #include "asmoff.h"
1171 .section .note.GNU-stack, "", @progbits
1172 + #include "asm_common.S"
1173
1174 .text
1175 .align 4
1176 .globl dv_decode_vlc
1177 +.globl asm_dv_decode_vlc
1178 +.hidden asm_dv_decode_vlc
1179 +asm_dv_decode_vlc = dv_decode_vlc
1180 +
1181 .type dv_decode_vlc,@function
1182 dv_decode_vlc:
1183 pushl %ebx
1184 + pushl %ebp
1185 +
1186 + LOAD_PIC_REG(bp)
1187
1188 - /* Args are at 8(%esp). */
1189 - movl 8(%esp),%eax /* %eax is bits */
1190 - movl 12(%esp),%ebx /* %ebx is maxbits */
1191 + /* Args are at 12(%esp). */
1192 + movl 12(%esp),%eax /* %eax is bits */
1193 + movl 16(%esp),%ebx /* %ebx is maxbits */
1194 andl $0x3f,%ebx /* limit index range STL*/
1195
1196 - movl dv_vlc_class_index_mask(,%ebx,4),%edx
1197 + movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
1198 andl %eax,%edx
1199 - movl dv_vlc_class_index_rshift(,%ebx,4),%ecx
1200 + movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
1201 sarl %cl,%edx
1202 - movl dv_vlc_classes(,%ebx,4),%ecx
1203 + movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
1204 movsbl (%ecx,%edx,1),%edx /* %edx is class */
1205
1206 - movl dv_vlc_index_mask(,%edx,4),%ebx
1207 - movl dv_vlc_index_rshift(,%edx,4),%ecx
1208 + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
1209 + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
1210 andl %eax,%ebx
1211 sarl %cl,%ebx
1212
1213 - movl dv_vlc_lookups(,%edx,4),%edx
1214 + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
1215 movl (%edx,%ebx,4),%edx
1216
1217 /* Now %edx holds result, like this:
1218 @@ -42,7 +51,7 @@ dv_decode_vlc:
1219 movl %edx,%ecx
1220 sarl $8,%ecx
1221 andl $0xff,%ecx
1222 - movl sign_mask(,%ecx,4),%ebx
1223 + movl MUNG_ARR(sign_mask,%ecx,4),%ebx
1224 andl %ebx,%eax
1225 negl %eax
1226 sarl $31,%eax
1227 @@ -63,14 +72,14 @@ dv_decode_vlc:
1228 *result = broken;
1229 Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
1230 */
1231 - movl 12(%esp),%ebx /* %ebx is maxbits */
1232 + movl 16(%esp),%ebx /* %ebx is maxbits */
1233 subl %ecx,%ebx
1234 sbbl %ebx,%ebx
1235 orl %ebx,%edx
1236
1237 - movl 16(%esp),%eax
1238 + movl 20(%esp),%eax
1239 movl %edx,(%eax)
1240 -
1241 + popl %ebp
1242 popl %ebx
1243 ret
1244
1245 @@ -80,21 +89,28 @@ dv_decode_vlc:
1246 .type __dv_decode_vlc,@function
1247 __dv_decode_vlc:
1248 pushl %ebx
1249 + pushl %ebp
1250 +
1251 + LOAD_PIC_REG(bp)
1252
1253 - /* Args are at 8(%esp). */
1254 - movl 8(%esp),%eax /* %eax is bits */
1255 + /* Args are at 12(%esp). */
1256 + movl 12(%esp),%eax /* %eax is bits */
1257
1258 movl %eax,%edx /* %edx is class */
1259 andl $0xfe00,%edx
1260 sarl $9,%edx
1261 +#ifdef __PIC__
1262 + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
1263 +#else
1264 movsbl dv_vlc_class_lookup5(%edx),%edx
1265 -
1266 - movl dv_vlc_index_mask(,%edx,4),%ebx
1267 - movl dv_vlc_index_rshift(,%edx,4),%ecx
1268 +#endif
1269 +
1270 + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
1271 + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
1272 andl %eax,%ebx
1273 sarl %cl,%ebx
1274
1275 - movl dv_vlc_lookups(,%edx,4),%edx
1276 + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
1277 movl (%edx,%ebx,4),%edx
1278
1279 /* Now %edx holds result, like this:
1280 @@ -112,7 +128,7 @@ __dv_decode_vlc:
1281 movl %edx,%ecx
1282 sarl $8,%ecx
1283 andl $0xff,%ecx
1284 - movl sign_mask(,%ecx,4),%ecx
1285 + movl MUNG_ARR(sign_mask,%ecx,4),%ecx
1286 andl %ecx,%eax
1287 negl %eax
1288 sarl $31,%eax
1289 @@ -127,9 +143,9 @@ __dv_decode_vlc:
1290 xorl %eax,%edx
1291 subl %eax,%edx
1292
1293 - movl 12(%esp),%eax
1294 + movl 16(%esp),%eax
1295 movl %edx,(%eax)
1296 -
1297 + popl %ebp
1298 popl %ebx
1299 ret
1300
1301 @@ -140,14 +156,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
1302 */
1303 .text
1304 .align 4
1305 +.globl asm_dv_parse_ac_coeffs_pass0
1306 +.hidden asm_dv_parse_ac_coeffs_pass0
1307 + asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
1308 +
1309 .globl dv_parse_ac_coeffs_pass0
1310 .type dv_parse_ac_coeffs_pass0,@function
1311 dv_parse_ac_coeffs_pass0:
1312 pushl %ebx
1313 pushl %edi
1314 pushl %esi
1315 pushl %ebp
1316
1317 + LOAD_PIC_REG(si)
1318 +
1319 #define ARGn(N) (20+(4*(N)))(%esp)
1320
1321 /*
1322 @@ -159,8 +182,10 @@ dv_parse_ac_coeffs_pass0:
1323 ebp bl
1324 */
1325 movl ARGn(2),%ebp
1326 +#ifndef __PIC__
1327 movl ARGn(0),%esi
1328 movl bitstream_t_buf(%esi),%esi
1329 +#endif
1330 movl dv_block_t_offset(%ebp),%edi
1331 movl dv_block_t_reorder(%ebp),%ebx
1332
1333 @@ -170,7 +195,11 @@ dv_parse_ac_coeffs_pass0:
1334
1335 movq dv_block_t_coeffs(%ebp),%mm1
1336 pxor %mm0,%mm0
1337 +#ifdef __PIC__
1338 + pand const_f_0_0_0@GOTOFF(%esi),%mm1
1339 +#else
1340 pand const_f_0_0_0,%mm1
1341 +#endif
1342 movq %mm1,dv_block_t_coeffs(%ebp)
1343 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1344 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1345 @@ -191,9 +220,17 @@ dv_parse_ac_coeffs_pass0:
1346 readloop:
1347 movl %edi,%ecx
1348 shrl $3,%ecx
1349 +#ifdef __PIC__
1350 + movl ARGn(0),%eax
1351 + addl bitstream_t_buf(%eax),%ecx
1352 + movzbl (%ecx),%eax
1353 + movzbl 1(%ecx),%edx
1354 + movzbl 2(%ecx),%ecx
1355 +#else
1356 movzbl (%esi,%ecx,1),%eax
1357 movzbl 1(%esi,%ecx,1),%edx
1358 movzbl 2(%esi,%ecx,1),%ecx
1359 +#endif
1360 shll $16,%eax
1361 shll $8,%edx
1362 orl %ecx,%eax
1363 @@ -217,7 +254,11 @@ readloop:
1364
1365 /* Attempt to use the shortcut first. If it hits, then
1366 this vlc term has been decoded. */
1367 +#ifdef __PIC__
1368 + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
1369 +#else
1370 movl dv_vlc_class1_shortcut(,%ecx,4),%edx
1371 +#endif
1372 test $0x80,%edx
1373 je done_decode
1374
1375 @@ -228,12 +269,19 @@ readloop:
1376 movl %ebx,dv_block_t_reorder(%ebp)
1377
1378 /* %eax is bits */
1379 -
1380 +#ifdef __PIC__
1381 + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
1382 +
1383 + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
1384 + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
1385 + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
1386 +#else
1387 movsbl dv_vlc_class_lookup5(%ecx),%ecx
1388
1389 movl dv_vlc_index_mask(,%ecx,4),%ebx
1390 movl dv_vlc_lookups(,%ecx,4),%edx
1391 movl dv_vlc_index_rshift(,%ecx,4),%ecx
1392 +#endif
1393 andl %eax,%ebx
1394 sarl %cl,%ebx
1395
1396 @@ -256,7 +304,11 @@ readloop:
1397 movl %edx,%ecx
1398 sarl $8,%ecx
1399 andl $0xff,%ecx
1400 +#ifdef __PIC__
1401 + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx
1402 +#else
1403 movl sign_mask(,%ecx,4),%ecx
1404 +#endif
1405 andl %ecx,%eax
1406 negl %eax
1407 sarl $31,%eax
1408 @@ -326,10 +378,16 @@ alldone:
1409
1410 slowpath:
1411 /* slow path: use dv_decode_vlc */;
1412 +#ifdef __PIC__
1413 + pushl %esi
1414 + leal vlc@GOTOFF(%esi),%esi
1415 + xchgl %esi,(%esp) /* last parameter is &vlc */
1416 +#else
1417 pushl $vlc /* last parameter is &vlc */
1418 +#endif
1419 pushl %edx /* bits_left */
1420 pushl %eax /* bits */
1421 - call dv_decode_vlc
1422 + call asm_dv_decode_vlc
1423 addl $12,%esp
1424 test $0x80,%edx /* If (vlc.run < 0) break */
1425 jne escape
1426 @@ -359,6 +417,8 @@ show16:
1427 pushl %esi
1428 pushl %ebp
1429
1430 + LOAD_PIC_REG(si)
1431 +
1432 #define ARGn(N) (20+(4*(N)))(%esp)
1433
1434 movl ARGn(1),%eax /* quality */
1435 @@ -373,7 +434,11 @@ dv_parse_video_segment:
1436 jz its_mono
1437 movl $6,%ebx
1438 its_mono:
1439 +#ifdef __PIC__
1440 + movl %ebx,n_blocks@GOTOFF(%esi)
1441 +#else
1442 movl %ebx,n_blocks
1443 +#endif
1444
1445 /*
1446 * ebx seg/b
1447 @@ -384,15 +449,22 @@ its_mono:
1448 * ebp bl
1449 */
1450 movl ARGn(0),%ebx
1451 +#ifndef __PIC__
1452 movl dv_videosegment_t_bs(%ebx),%esi
1453 movl bitstream_t_buf(%esi),%esi
1454 +#endif
1455 leal dv_videosegment_t_mb(%ebx),%edi
1456
1457 movl $0,%eax
1458 movl $0,%ecx
1459 macloop:
1460 +#ifdef __PIC__
1461 + movl %eax,m@GOTOFF(%esi)
1462 + movl %ecx,mb_start@GOTOFF(%esi)
1463 +#else
1464 movl %eax,m
1465 movl %ecx,mb_start
1466 +#endif
1467
1468 movl ARGn(0),%ebx
1469
1470 @@ -400,7 +472,13 @@ macloop:
1471 /* mb->qno = bitstream_get(bs,4); */
1472 movl %ecx,%edx
1473 shr $3,%edx
1474 +#ifdef __PIC__
1475 + movl dv_videosegment_t_bs(%ebx),%ecx
1476 + movl bitstream_t_buf(%ecx),%ecx
1477 + movzbl 3(%ecx,%edx,1),%edx
1478 +#else
1479 movzbl 3(%esi,%edx,1),%edx
1480 +#endif
1481 andl $0xf,%edx
1482 movl %edx,dv_macroblock_t_qno(%edi)
1483
1484 @@ -411,7 +489,11 @@ macloop:
1485 movl %edx,dv_macroblock_t_eob_count(%edi)
1486
1487 /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
1488 +#ifdef __PIC__
1489 + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
1490 +#else
1491 movl dv_super_map_vertical(,%eax,4),%edx
1492 +#endif
1493 movl dv_videosegment_t_i(%ebx),%ecx
1494 addl %ecx,%edx
1495
1496 @@ -422,11 +504,20 @@ skarly:
1497 andl $1,%ecx
1498 shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */
1499
1500 +#ifdef __PIC__
1501 + leal mod_10@GOTOFF(%esi),%edx
1502 + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1503 +#else
1504 movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1505 +#endif
1506 movl %edx,dv_macroblock_t_i(%edi)
1507
1508 /* mb->j = dv_super_map_horizontal[m]; */
1509 +#ifdef __PIC__
1510 + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
1511 +#else
1512 movl dv_super_map_horizontal(,%eax,4),%edx
1513 +#endif
1514 movl %edx,dv_macroblock_t_j(%edi)
1515
1516 /* mb->k = seg->k; */
1517 @@ -445,12 +536,28 @@ blkloop:
1518 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
1519 */
1520 /* dc = bitstream_get(bs,9); */
1521 +#ifdef __PIC__
1522 + movl mb_start@GOTOFF(%esi),%ecx
1523 +#else
1524 movl mb_start,%ecx
1525 +#endif
1526 shr $3,%ecx
1527 +#ifdef __PIC__
1528 + movzbl blk_start@GOTOFF(%esi,%ebx),%edx
1529 +#else
1530 movzbl blk_start(%ebx),%edx
1531 +#endif
1532 addl %ecx,%edx
1533 +#ifdef __PIC__
1534 + movl ARGn(0),%ecx
1535 + movl dv_videosegment_t_bs(%ecx),%ecx
1536 + movl bitstream_t_buf(%ecx),%ecx
1537 + movzbl (%ecx,%edx,1),%eax /* hi byte */
1538 + movzbl 1(%ecx,%edx,1),%ecx /* lo byte */
1539 +#else
1540 movzbl (%esi,%edx,1),%eax /* hi byte */
1541 movzbl 1(%esi,%edx,1),%ecx /* lo byte */
1542 +#endif
1543 shll $8,%eax
1544 orl %ecx,%eax
1545
1546 @@ -477,7 +584,11 @@ blkloop:
1547
1548 /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
1549 shll $6,%eax
1550 +#ifdef __PIC__
1551 + leal dv_reorder@GOTOFF+1(%esi,%eax),%eax
1552 +#else
1553 addl $(dv_reorder+1),%eax
1554 +#endif
1555 movl %eax,dv_block_t_reorder(%ebp)
1556
1557 /* bl->reorder_sentinel = bl->reorder + 63; */
1558 @@ -485,13 +596,22 @@ blkloop:
1559 movl %eax,dv_block_t_reorder_sentinel(%ebp)
1560
1561 /* bl->offset= mb_start + dv_parse_bit_start[b]; */
1562 +#ifdef __PIC__
1563 + movl mb_start@GOTOFF(%esi),%ecx
1564 + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
1565 +#else
1566 movl mb_start,%ecx
1567 movl dv_parse_bit_start(,%ebx,4),%eax
1568 +#endif
1569 addl %ecx,%eax
1570 movl %eax,dv_block_t_offset(%ebp)
1571
1572 /* bl->end= mb_start + dv_parse_bit_end[b]; */
1573 +#ifdef __PIC__
1574 + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
1575 +#else
1576 movl dv_parse_bit_end(,%ebx,4),%eax
1577 +#endif
1578 addl %ecx,%eax
1579 movl %eax,dv_block_t_end(%ebp)
1580
1581 @@ -503,7 +623,11 @@ blkloop:
1582 /* no AC pass. Just zero out the remaining coeffs */
1583 movq dv_block_t_coeffs(%ebp),%mm1
1584 pxor %mm0,%mm0
1585 +#ifdef __PIC__
1586 + pand const_f_0_0_0@GOTOFF(%esi),%mm1
1587 +#else
1588 pand const_f_0_0_0,%mm1
1589 +#endif
1590 movq %mm1,dv_block_t_coeffs(%ebp)
1591 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1592 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1593 @@ -528,18 +652,27 @@ do_ac_pass:
1594 pushl %ebp
1595 pushl %edi
1596 pushl %eax
1597 - call dv_parse_ac_coeffs_pass0
1598 + call asm_dv_parse_ac_coeffs_pass0
1599 addl $12,%esp
1600 done_ac:
1601
1602 +#ifdef __PIC__
1603 + movl n_blocks@GOTOFF(%esi),%eax
1604 +#else
1605 movl n_blocks,%eax
1606 +#endif
1607 addl $dv_block_t_size,%ebp
1608 incl %ebx
1609 cmpl %eax,%ebx
1610 jnz blkloop
1611
1612 +#ifdef __PIC__
1613 + movl m@GOTOFF(%esi),%eax
1614 + movl mb_start@GOTOFF(%esi),%ecx
1615 +#else
1616 movl m,%eax
1617 movl mb_start,%ecx
1618 +#endif
1619 addl $(8 * 80),%ecx
1620 addl $dv_macroblock_t_size,%edi
1621 incl %eax
1622 @@ -557,7 +690,7 @@ done_ac:
1623
1624 andl $DV_QUALITY_AC_MASK,%eax
1625 cmpl $DV_QUALITY_AC_2,%eax
1626 - jz dv_parse_ac_coeffs
1627 + jz asm_dv_parse_ac_coeffs
1628 movl $0,%eax
1629 ret
1630