]> git.wh0rd.org - patches.git/blame_incremental - libdv-0.104-pic-fix.patch
scummvm random work
[patches.git] / libdv-0.104-pic-fix.patch
... / ...
CommitLineData
1diff -urp libdv-0.104-old/libdv/dct_block_mmx.S libdv-0.104/libdv/dct_block_mmx.S
2--- libdv-0.104-old/libdv/dct_block_mmx.S 2005-10-23 19:40:58.000000000 +0200
3+++ libdv-0.104/libdv/dct_block_mmx.S 2005-10-24 00:11:39.000000000 +0200
4@@ -53,6 +53,17 @@ scratch2: .quad 0
5 scratch3: .quad 0
6 scratch4: .quad 0
7
8+#ifdef __PIC__
9+# undef __i686 /* gcc define gets in our way */
10+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
11+.globl __i686.get_pc_thunk.bp
12+ .hidden __i686.get_pc_thunk.bp
13+ .type __i686.get_pc_thunk.bp,@function
14+__i686.get_pc_thunk.bp:
15+ movl (%esp), %ebp
16+ ret
17+#endif
18+
19 .text
20
21 .align 8
22@@ -60,10 +71,14 @@ scratch4: .quad 0
23 _dv_dct_88_block_mmx:
24
25 pushl %ebp
26- movl %esp, %ebp
27 pushl %esi
28
29- movl 8(%ebp), %esi # source
30+#ifdef __PIC__
31+ call __i686.get_pc_thunk.bp
32+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
33+#endif
34+
35+ movl 12(%ebp), %esi # source
36
37 # column 0
38 movq 16*0(%esi), %mm0 # v0
39@@ -86,22 +101,45 @@ _dv_dct_88_block_mmx:
40
41 movq 16*3(%esi), %mm5 # v3
42 movq 16*4(%esi), %mm7 # v4
43+#ifdef __PIC__
44+ movq %mm7, scratch1@GOTOFF(%ebp) # scratch1: v4 ;
45+#else
46 movq %mm7, scratch1 # scratch1: v4 ;
47+#endif
48 movq %mm5, %mm7 # duplicate v3
49+#ifdef __PIC__
50+ paddw scratch1@GOTOFF(%ebp), %mm5 # v03: v3+v4
51+ psubw scratch1@GOTOFF(%ebp), %mm7 # v04: v3-v4
52+ movq %mm5, scratch2@GOTOFF(%ebp) # scratch2: v03
53+#else
54 paddw scratch1, %mm5 # v03: v3+v4
55 psubw scratch1, %mm7 # v04: v3-v4
56 movq %mm5, scratch2 # scratch2: v03
57+#endif
58 movq %mm0, %mm5 # mm5: v00
59
60+#ifdef __PIC__
61+ paddw scratch2@GOTOFF(%ebp), %mm0 # v10: v00+v03
62+ psubw scratch2@GOTOFF(%ebp), %mm5 # v13: v00-v03
63+ movq %mm3, scratch3@GOTOFF(%ebp) # scratch3: v02
64+#else
65 paddw scratch2, %mm0 # v10: v00+v03
66 psubw scratch2, %mm5 # v13: v00-v03
67 movq %mm3, scratch3 # scratch3: v02
68+#endif
69 movq %mm1, %mm3 # duplicate v01
70
71+#ifdef __PIC__
72+ paddw scratch3@GOTOFF(%ebp), %mm1 # v11: v01+v02
73+ psubw scratch3@GOTOFF(%ebp), %mm3 # v12: v01-v02
74+
75+ movq %mm6, scratch4@GOTOFF(%ebp) # scratch4: v05
76+#else
77 paddw scratch3, %mm1 # v11: v01+v02
78 psubw scratch3, %mm3 # v12: v01-v02
79
80 movq %mm6, scratch4 # scratch4: v05
81+#endif
82 movq %mm0, %mm6 # duplicate v10
83
84 paddw %mm1, %mm0 # v10+v11
85@@ -111,10 +149,18 @@ _dv_dct_88_block_mmx:
86 movq %mm6, 16*4(%esi) # out4: v10-v11
87
88 movq %mm4, %mm0 # mm0: v06
89+#ifdef __PIC__
90+ paddw scratch4@GOTOFF(%ebp), %mm4 # v15: v05+v06
91+#else
92 paddw scratch4, %mm4 # v15: v05+v06
93+#endif
94 paddw %mm2, %mm0 # v16: v07+v06
95
96+#ifdef __PIC__
97+ pmulhw WA3@GOTOFF(%ebp), %mm4 # v35~: WA3*v15
98+#else
99 pmulhw WA3, %mm4 # v35~: WA3*v15
100+#endif
101 psllw $1, %mm4 # v35: compensate the coeefient scale
102
103 movq %mm4, %mm6 # duplicate v35
104@@ -123,7 +169,11 @@ _dv_dct_88_block_mmx:
105
106 paddw %mm5, %mm3 # v22: v12+v13
107
108+#ifdef __PIC__
109+ pmulhw WA1@GOTOFF(%ebp), %mm3 # v32~: WA1*v22
110+#else
111 pmulhw WA1, %mm3 # v32~: WA1*v22
112+#endif
113 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
114 movq %mm5, %mm6 # duplicate v13
115
116@@ -134,13 +184,23 @@ _dv_dct_88_block_mmx:
117 movq %mm6, 16*6(%esi) # out6: v13-v32
118
119
120+#ifdef __PIC__
121+ paddw scratch4@GOTOFF(%ebp), %mm7 # v14n: v04+v05
122+#else
123 paddw scratch4, %mm7 # v14n: v04+v05
124+#endif
125 movq %mm0, %mm5 # duplicate v16
126
127 psubw %mm7, %mm0 # va1: v16-v14n
128+#ifdef __PIC__
129+ pmulhw WA5@GOTOFF(%ebp), %mm0 # va0~: va1*WA5
130+ pmulhw WA4@GOTOFF(%ebp), %mm5 # v36~~: v16*WA4
131+ pmulhw WA2@GOTOFF(%ebp), %mm7 # v34~~: v14n*WA2
132+#else
133 pmulhw WA5, %mm0 # va0~: va1*WA5
134 pmulhw WA4, %mm5 # v36~~: v16*WA4
135 pmulhw WA2, %mm7 # v34~~: v14n*WA2
136+#endif
137 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale
138 psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale
139
140@@ -188,22 +248,45 @@ _dv_dct_88_block_mmx:
141
142 movq 16*3(%esi), %mm5 # v3
143 movq 16*4(%esi), %mm7 # v4
144+#ifdef __PIC__
145+ movq %mm7, scratch1@GOTOFF(%ebp) # scratch1: v4 ;
146+#else
147 movq %mm7, scratch1 # scratch1: v4 ;
148+#endif
149 movq %mm5, %mm7 # duplicate v3
150+#ifdef __PIC__
151+ paddw scratch1@GOTOFF(%ebp), %mm5 # v03: v3+v4
152+ psubw scratch1@GOTOFF(%ebp), %mm7 # v04: v3-v4
153+ movq %mm5, scratch2@GOTOFF(%ebp) # scratch2: v03
154+#else
155 paddw scratch1, %mm5 # v03: v3+v4
156 psubw scratch1, %mm7 # v04: v3-v4
157 movq %mm5, scratch2 # scratch2: v03
158+#endif
159 movq %mm0, %mm5 # mm5: v00
160
161+#ifdef __PIC__
162+ paddw scratch2@GOTOFF(%ebp), %mm0 # v10: v00+v03
163+ psubw scratch2@GOTOFF(%ebp), %mm5 # v13: v00-v03
164+ movq %mm3, scratch3@GOTOFF(%ebp) # scratc3: v02
165+#else
166 paddw scratch2, %mm0 # v10: v00+v03
167 psubw scratch2, %mm5 # v13: v00-v03
168 movq %mm3, scratch3 # scratc3: v02
169+#endif
170 movq %mm1, %mm3 # duplicate v01
171
172+#ifdef __PIC__
173+ paddw scratch3@GOTOFF(%ebp), %mm1 # v11: v01+v02
174+ psubw scratch3@GOTOFF(%ebp), %mm3 # v12: v01-v02
175+
176+ movq %mm6, scratch4@GOTOFF(%ebp) # scratc4: v05
177+#else
178 paddw scratch3, %mm1 # v11: v01+v02
179 psubw scratch3, %mm3 # v12: v01-v02
180
181 movq %mm6, scratch4 # scratc4: v05
182+#endif
183 movq %mm0, %mm6 # duplicate v10
184
185 paddw %mm1, %mm0 # v10+v11
186@@ -213,10 +296,18 @@ _dv_dct_88_block_mmx:
187 movq %mm6, 16*4(%esi) # out4: v10-v11
188
189 movq %mm4, %mm0 # mm0: v06
190+#ifdef __PIC__
191+ paddw scratch4@GOTOFF(%ebp), %mm4 # v15: v05+v06
192+#else
193 paddw scratch4, %mm4 # v15: v05+v06
194+#endif
195 paddw %mm2, %mm0 # v16: v07+v06
196
197+#ifdef __PIC__
198+ pmulhw WA3@GOTOFF(%ebp), %mm4 # v35~: WA3*v15
199+#else
200 pmulhw WA3, %mm4 # v35~: WA3*v15
201+#endif
202 psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale
203
204 movq %mm4, %mm6 # duplicate v35
205@@ -225,7 +316,11 @@ _dv_dct_88_block_mmx:
206
207 paddw %mm5, %mm3 # v22: v12+v13
208
209+#ifdef __PIC__
210+ pmulhw WA1@GOTOFF(%ebp), %mm3 # v32~: WA3*v15
211+#else
212 pmulhw WA1, %mm3 # v32~: WA3*v15
213+#endif
214 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
215 movq %mm5, %mm6 # duplicate v13
216
217@@ -235,13 +330,23 @@ _dv_dct_88_block_mmx:
218 movq %mm5, 16*2(%esi) # out2: v13+v32
219 movq %mm6, 16*6(%esi) # out6: v13-v32
220
221+#ifdef __PIC__
222+ paddw scratch4@GOTOFF(%ebp), %mm7 # v14n: v04+v05
223+#else
224 paddw scratch4, %mm7 # v14n: v04+v05
225+#endif
226 movq %mm0, %mm5 # duplicate v16
227
228 psubw %mm7, %mm0 # va1: v16-v14n
229+#ifdef __PIC__
230+ pmulhw WA2@GOTOFF(%ebp), %mm7 # v34~~: v14n*WA2
231+ pmulhw WA5@GOTOFF(%ebp), %mm0 # va0~: va1*WA5
232+ pmulhw WA4@GOTOFF(%ebp), %mm5 # v36~~: v16*WA4
233+#else
234 pmulhw WA2, %mm7 # v34~~: v14n*WA2
235 pmulhw WA5, %mm0 # va0~: va1*WA5
236 pmulhw WA4, %mm5 # v36~~: v16*WA4
237+#endif
238 psllw $16-NSHIFT, %mm7
239 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient
240 # scale note that WA4 is shifted 1 bit less than the others
241@@ -751,11 +856,15 @@ _dv_dct_block_mmx_postscale_88:
242 _dv_dct_248_block_mmx:
243
244 pushl %ebp
245- movl %esp, %ebp
246 pushl %esi
247 pushl %edi
248
249- movl 8(%ebp), %esi # source
250+#ifdef __PIC__
251+ call __i686.get_pc_thunk.bp
252+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
253+#endif
254+
255+ movl 16(%ebp), %esi # source
256
257 # column 0
258
259@@ -779,7 +888,11 @@ _dv_dct_248_block_mmx:
260 paddw %mm1, %mm0 # v20: v10+v11
261 psubw %mm1, %mm3 # v21: v10-v11
262
263+#ifdef __PIC__
264+ pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22
265+#else
266 pmulhw WA1, %mm5 # v32~: WA1*v22
267+#endif
268 movq %mm4, %mm2
269 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
270
271@@ -818,7 +931,11 @@ _dv_dct_248_block_mmx:
272 paddw %mm1, %mm0 # v20: v10+v11
273 psubw %mm1, %mm3 # v21: v10-v11
274
275+#ifdef __PIC__
276+ pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22
277+#else
278 pmulhw WA1, %mm5 # v32~: WA1*v22
279+#endif
280 movq %mm4, %mm2
281 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
282
283@@ -855,7 +972,11 @@ _dv_dct_248_block_mmx:
284 paddw %mm1, %mm0 # v20: v10+v11
285 psubw %mm1, %mm3 # v21: v10-v11
286
287+#ifdef __PIC__
288+ pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22
289+#else
290 pmulhw WA1, %mm5 # v32~: WA1*v22
291+#endif
292 movq %mm4, %mm2
293 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
294
295@@ -892,7 +1013,11 @@ _dv_dct_248_block_mmx:
296 paddw %mm1, %mm0 # v20: v10+v11
297 psubw %mm1, %mm3 # v21: v10-v11
298
299+#ifdef __PIC__
300+ pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22
301+#else
302 pmulhw WA1, %mm5 # v32~: WA1*v22
303+#endif
304 movq %mm4, %mm2
305 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
306
307diff -urp libdv-0.104-old/libdv/dv.c libdv-0.104/libdv/dv.c
308--- libdv-0.104-old/libdv/dv.c 2004-10-20 05:49:24.000000000 +0200
309+++ libdv-0.104/libdv/dv.c 2005-10-24 00:59:57.000000000 +0200
310@@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
311 } /* dv_reconfigure */
312
313
314+extern uint8_t dv_quant_offset[4];
315+extern uint8_t dv_quant_shifts[22][4];
316+
317 static inline void
318 dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
319 int i;
320@@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
321 dv_idct_248 (co248, mb->b[i].coeffs);
322 } else {
323 #if ARCH_X86
324- _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
325+ _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
326 _dv_idct_88(mb->b[i].coeffs);
327 #elif ARCH_X86_64
328 _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
329@@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
330 dv_idct_248 (co248, mb->b[b].coeffs);
331 } else {
332 #if ARCH_X86
333- _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
334+ _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
335 _dv_weight_88_inverse(bl->coeffs);
336 _dv_idct_88(bl->coeffs);
337 #elif ARCH_X86_64
338diff -urp libdv-0.104-old/libdv/encode.c libdv-0.104/libdv/encode.c
339--- libdv-0.104-old/libdv/encode.c 2004-11-17 04:36:30.000000000 +0100
340+++ libdv-0.104/libdv/encode.c 2005-10-24 01:17:41.000000000 +0200
341@@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
342 }
343
344 extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
345- dv_vlc_entry_t ** out);
346+ dv_vlc_entry_t ** out,
347+ dv_vlc_entry_t * lookup);
348
349 extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
350 dv_vlc_entry_t ** out);
351@@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
352 #elif ARCH_X86
353 int num_bits;
354
355- num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
356+ num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
357 emms();
358 #else
359 int num_bits;
360@@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
361 return num_bits;
362 }
363
364-extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
365+extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
366 extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
367
368 extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
369@@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
370 #elif ARCH_X86_64
371 return _dv_vlc_num_bits_block_x86_64(coeffs);
372 #else
373- return _dv_vlc_num_bits_block_x86(coeffs);
374+ return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
375 #endif
376 }
377
378diff -urp libdv-0.104-old/libdv/encode_x86.S libdv-0.104/libdv/encode_x86.S
379--- libdv-0.104-old/libdv/encode_x86.S 2005-10-23 19:40:58.000000000 +0200
380+++ libdv-0.104/libdv/encode_x86.S 2005-10-24 01:18:32.000000000 +0200
381@@ -23,10 +23,6 @@
382 * The libdv homepage is http://libdv.sourceforge.net/.
383 */
384
385-.data
386-ALLONE: .word 1,1,1,1
387-VLCADDMASK: .byte 255,0,0,0,255,0,0,0
388-
389 .text
390
391 .global _dv_vlc_encode_block_mmx
392@@ -45,11 +41,14 @@ _dv_vlc_encode_block_mmx:
393
394 movl $63, %ecx
395
396- movl vlc_encode_lookup, %esi
397+ movl 4+4*4+8(%esp), %esi # vlc_encode_lookup
398
399 pxor %mm0, %mm0
400 pxor %mm2, %mm2
401- movq VLCADDMASK, %mm1
402+ pushl $0x000000FF
403+ pushl $0x000000FF
404+ movq (%esp), %mm1
405+ addl $8, %esp
406 xorl %ebp, %ebp
407 subl $8, %edx
408 vlc_encode_block_mmx_loop:
409@@ -121,7 +120,7 @@ _dv_vlc_num_bits_block_x86:
410 addl $2, %edi
411
412 movl $63, %ecx
413- movl vlc_num_bits_lookup, %esi
414+ movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup
415
416 vlc_num_bits_block_x86_loop:
417 movw (%edi), %ax
418@@ -579,8 +578,11 @@ _dv_need_dct_248_mmx_rows:
419 paddw %mm5, %mm1
420
421 paddw %mm1, %mm0
422-
423- pmaddwd ALLONE, %mm0
424+
425+ pushl $0x00010001
426+ pushl $0x00010001
427+ pmaddwd (%esp), %mm0
428+ addl $8, %esp
429 movq %mm0, %mm1
430 psrlq $32, %mm1
431 paddd %mm1, %mm0
432diff -urp libdv-0.104-old/libdv/idct_block_mmx.S libdv-0.104/libdv/idct_block_mmx.S
433--- libdv-0.104-old/libdv/idct_block_mmx.S 2005-10-23 19:40:58.000000000 +0200
434+++ libdv-0.104/libdv/idct_block_mmx.S 2005-10-24 01:12:12.000000000 +0200
435@@ -8,16 +8,37 @@
436
437
438
439+#ifdef __PIC__
440+# undef __i686 /* gcc define gets in our way */
441+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
442+.globl __i686.get_pc_thunk.bp
443+ .hidden __i686.get_pc_thunk.bp
444+ .type __i686.get_pc_thunk.bp,@function
445+__i686.get_pc_thunk.bp:
446+ movl (%esp), %ebp
447+ ret
448+#endif
449+
450 .text
451+
452 .align 4
453 .globl _dv_idct_block_mmx
454 .type _dv_idct_block_mmx,@function
455 _dv_idct_block_mmx:
456 pushl %ebp
457- movl %esp,%ebp
458 pushl %esi
459+
460+#ifdef __PIC__
461+ call __i686.get_pc_thunk.bp
462+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
463+#endif
464+
465+#ifdef __PIC__
466+ leal preSC@GOTOFF(%ebp), %ecx
467+#else
468 leal preSC, %ecx
469- movl 8(%ebp),%esi /* source matrix */
470+#endif
471+ movl 12(%esp),%esi /* source matrix */
472
473 /*
474 * column 0: even part
475@@ -35,7 +56,11 @@ _dv_idct_block_mmx:
476 movq %mm1, %mm2 /* added 11/1/96 */
477 pmulhw 8*8(%esi),%mm5 /* V8 */
478 psubsw %mm0, %mm1 /* V16 */
479+#ifdef __PIC__
480+ pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V18 */
481+#else
482 pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
483+#endif
484 paddsw %mm0, %mm2 /* V17 */
485 movq %mm2, %mm0 /* duplicate V17 */
486 psraw $1, %mm2 /* t75=t82 */
487@@ -76,7 +101,11 @@ _dv_idct_block_mmx:
488 paddsw %mm0, %mm3 /* V29 ; free mm0 */
489 movq %mm7, %mm1 /* duplicate V26 */
490 psraw $1, %mm3 /* t91=t94 */
491+#ifdef __PIC__
492+ pmulhw x539f539f539f539f@GOTOFF(%ebp),%mm7 /* V33 */
493+#else
494 pmulhw x539f539f539f539f,%mm7 /* V33 */
495+#endif
496 psraw $1, %mm1 /* t96 */
497 movq %mm5, %mm0 /* duplicate V2 */
498 psraw $2, %mm4 /* t85=t87 */
499@@ -84,15 +113,27 @@ _dv_idct_block_mmx:
500 psubsw %mm4, %mm0 /* V28 ; free mm4 */
501 movq %mm0, %mm2 /* duplicate V28 */
502 psraw $1, %mm5 /* t90=t93 */
503+#ifdef __PIC__
504+ pmulhw x4546454645464546@GOTOFF(%ebp),%mm0 /* V35 */
505+#else
506 pmulhw x4546454645464546,%mm0 /* V35 */
507+#endif
508 psraw $1, %mm2 /* t97 */
509 movq %mm5, %mm4 /* duplicate t90=t93 */
510 psubsw %mm2, %mm1 /* V32 ; free mm2 */
511+#ifdef __PIC__
512+ pmulhw x61f861f861f861f8@GOTOFF(%ebp),%mm1 /* V36 */
513+#else
514 pmulhw x61f861f861f861f8,%mm1 /* V36 */
515+#endif
516 psllw $1, %mm7 /* t107 */
517 paddsw %mm3, %mm5 /* V31 */
518 psubsw %mm3, %mm4 /* V30 ; free mm3 */
519+#ifdef __PIC__
520+ pmulhw x5a825a825a825a82@GOTOFF(%ebp),%mm4 /* V34 */
521+#else
522 pmulhw x5a825a825a825a82,%mm4 /* V34 */
523+#endif
524 nop
525 psubsw %mm1, %mm0 /* V38 */
526 psubsw %mm7, %mm1 /* V37 ; free mm7 */
527@@ -159,7 +200,11 @@ _dv_idct_block_mmx:
528 psubsw %mm7, %mm1 /* V50 */
529 pmulhw 8*9(%esi), %mm5 /* V9 */
530 paddsw %mm7, %mm2 /* V51 */
531+#ifdef __PIC__
532+ pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V52 */
533+#else
534 pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
535+#endif
536 movq %mm2, %mm6 /* duplicate V51 */
537 psraw $1, %mm2 /* t138=t144 */
538 movq %mm3, %mm4 /* duplicate V1 */
539@@ -200,11 +245,19 @@ _dv_idct_block_mmx:
540 * even more by doing the correction step in a later stage when the number
541 * is actually multiplied by 16
542 */
543+#ifdef __PIC__
544+ paddw x0005000200010001@GOTOFF(%ebp), %mm4
545+#else
546 paddw x0005000200010001, %mm4
547+#endif
548 psubsw %mm6, %mm3 /* V60 ; free mm6 */
549 psraw $1, %mm0 /* t154=t156 */
550 movq %mm3, %mm1 /* duplicate V60 */
551+#ifdef __PIC__
552+ pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm1 /* V67 */
553+#else
554 pmulhw x539f539f539f539f, %mm1 /* V67 */
555+#endif
556 movq %mm5, %mm6 /* duplicate V3 */
557 psraw $2, %mm4 /* t148=t150 */
558 paddsw %mm4, %mm5 /* V61 */
559@@ -213,13 +266,25 @@ _dv_idct_block_mmx:
560 psllw $1, %mm1 /* t169 */
561 paddsw %mm0, %mm5 /* V65 -> result */
562 psubsw %mm0, %mm4 /* V64 ; free mm0 */
563+#ifdef __PIC__
564+ pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm4 /* V68 */
565+#else
566 pmulhw x5a825a825a825a82, %mm4 /* V68 */
567+#endif
568 psraw $1, %mm3 /* t158 */
569 psubsw %mm6, %mm3 /* V66 */
570 movq %mm5, %mm2 /* duplicate V65 */
571+#ifdef __PIC__
572+ pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* V70 */
573+#else
574 pmulhw x61f861f861f861f8, %mm3 /* V70 */
575+#endif
576 psllw $1, %mm6 /* t165 */
577+#ifdef __PIC__
578+ pmulhw x4546454645464546@GOTOFF(%ebp), %mm6 /* V69 */
579+#else
580 pmulhw x4546454645464546, %mm6 /* V69 */
581+#endif
582 psraw $1, %mm2 /* t172 */
583 /* moved from next block */
584 movq 8*5(%esi), %mm0 /* V56 */
585@@ -344,7 +409,11 @@ _dv_idct_block_mmx:
586 * movq 8*13(%esi), %mm4 tmt13
587 */
588 psubsw %mm4, %mm3 /* V134 */
589+#ifdef __PIC__
590+ pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm3 /* 23170 ->V136 */
591+#else
592 pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
593+#endif
594 movq 8*9(%esi), %mm6 /* tmt9 */
595 paddsw %mm4, %mm5 /* V135 ; mm4 free */
596 movq %mm0, %mm4 /* duplicate tmt1 */
597@@ -373,17 +442,33 @@ _dv_idct_block_mmx:
598 psubsw %mm7, %mm0 /* V144 */
599 movq %mm0, %mm3 /* duplicate V144 */
600 paddsw %mm7, %mm2 /* V147 ; free mm7 */
601+#ifdef __PIC__
602+ pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V151 */
603+#else
604 pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
605+#endif
606 movq %mm1, %mm7 /* duplicate tmt3 */
607 paddsw %mm5, %mm7 /* V145 */
608 psubsw %mm5, %mm1 /* V146 ; free mm5 */
609 psubsw %mm1, %mm3 /* V150 */
610 movq %mm7, %mm5 /* duplicate V145 */
611+#ifdef __PIC__
612+ pmulhw x4546454645464546@GOTOFF(%ebp), %mm1 /* 17734-> V153 */
613+#else
614 pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
615+#endif
616 psubsw %mm2, %mm5 /* V148 */
617+#ifdef __PIC__
618+ pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* 25080-> V154 */
619+#else
620 pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
621+#endif
622 psllw $2, %mm0 /* t311 */
623+#ifdef __PIC__
624+ pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm5 /* 23170-> V152 */
625+#else
626 pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
627+#endif
628 paddsw %mm2, %mm7 /* V149 ; free mm2 */
629 psllw $1, %mm1 /* t313 */
630 nop /* without the nop - freeze here for one clock */
631@@ -409,7 +494,11 @@ _dv_idct_block_mmx:
632 paddsw %mm3, %mm6 /* V164 ; free mm3 */
633 movq %mm4, %mm3 /* duplicate V142 */
634 psubsw %mm5, %mm4 /* V165 ; free mm5 */
635+#ifdef __PIC__
636+ movq %mm2, scratch7@GOTOFF(%ebp) /* out7 */
637+#else
638 movq %mm2, scratch7 /* out7 */
639+#endif
640 psraw $4, %mm6
641 psraw $4, %mm4
642 paddsw %mm5, %mm3 /* V162 */
643@@ -420,11 +509,19 @@ _dv_idct_block_mmx:
644 */
645 movq %mm6, 8*9(%esi) /* out9 */
646 paddsw %mm1, %mm0 /* V161 */
647+#ifdef __PIC__
648+ movq %mm3, scratch5@GOTOFF(%ebp) /* out5 */
649+#else
650 movq %mm3, scratch5 /* out5 */
651+#endif
652 psubsw %mm1, %mm5 /* V166 ; free mm1 */
653 movq %mm4, 8*11(%esi) /* out11 */
654 psraw $4, %mm5
655+#ifdef __PIC__
656+ movq %mm0, scratch3@GOTOFF(%ebp) /* out3 */
657+#else
658 movq %mm0, scratch3 /* out3 */
659+#endif
660 movq %mm2, %mm4 /* duplicate V140 */
661 movq %mm5, 8*13(%esi) /* out13 */
662 paddsw %mm7, %mm2 /* V160 */
663@@ -434,7 +531,11 @@ _dv_idct_block_mmx:
664 /* moved from the next block */
665 movq 8*3(%esi), %mm7
666 psraw $4, %mm4
667+#ifdef __PIC__
668+ movq %mm2, scratch1@GOTOFF(%ebp) /* out1 */
669+#else
670 movq %mm2, scratch1 /* out1 */
671+#endif
672 /* moved from the next block */
673 movq %mm0, %mm1
674 movq %mm4, 8*15(%esi) /* out15 */
675@@ -491,15 +592,31 @@ _dv_idct_block_mmx:
676 paddsw %mm4, %mm3 /* V113 ; free mm4 */
677 movq %mm0, %mm4 /* duplicate V110 */
678 paddsw %mm1, %mm2 /* V111 */
679+#ifdef __PIC__
680+ pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V117 */
681+#else
682 pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
683+#endif
684 psubsw %mm1, %mm5 /* V112 ; free mm1 */
685 psubsw %mm5, %mm4 /* V116 */
686 movq %mm2, %mm1 /* duplicate V111 */
687+#ifdef __PIC__
688+ pmulhw x4546454645464546@GOTOFF(%ebp), %mm5 /* 17734-> V119 */
689+#else
690 pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
691+#endif
692 psubsw %mm3, %mm2 /* V114 */
693+#ifdef __PIC__
694+ pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm4 /* 25080-> V120 */
695+#else
696 pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
697+#endif
698 paddsw %mm3, %mm1 /* V115 ; free mm3 */
699+#ifdef __PIC__
700+ pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm2 /* 23170-> V118 */
701+#else
702 pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
703+#endif
704 psllw $2, %mm0 /* t266 */
705 movq %mm1, (%esi) /* save V115 */
706 psllw $1, %mm5 /* t268 */
707@@ -517,7 +634,11 @@ _dv_idct_block_mmx:
708 movq %mm6, %mm3 /* duplicate tmt4 */
709 psubsw %mm0, %mm6 /* V100 */
710 paddsw %mm0, %mm3 /* V101 ; free mm0 */
711+#ifdef __PIC__
712+ pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm6 /* 23170 ->V102 */
713+#else
714 pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
715+#endif
716 movq %mm7, %mm5 /* duplicate tmt0 */
717 movq 8*8(%esi), %mm1 /* tmt8 */
718 paddsw %mm1, %mm7 /* V103 */
719@@ -551,10 +672,18 @@ _dv_idct_block_mmx:
720 movq 8*2(%esi), %mm3 /* V123 */
721 paddsw %mm4, %mm7 /* out0 */
722 /* moved up from next block */
723+#ifdef __PIC__
724+ movq scratch3@GOTOFF(%ebp), %mm0
725+#else
726 movq scratch3, %mm0
727+#endif
728 psraw $4, %mm7
729 /* moved up from next block */
730+#ifdef __PIC__
731+ movq scratch5@GOTOFF(%ebp), %mm6
732+#else
733 movq scratch5, %mm6
734+#endif
735 psubsw %mm4, %mm1 /* out14 ; free mm4 */
736 paddsw %mm3, %mm5 /* out2 */
737 psraw $4, %mm1
738@@ -565,7 +694,11 @@ _dv_idct_block_mmx:
739 movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
740 psraw $4, %mm2
741 /* moved up to the prev block */
742+#ifdef __PIC__
743+ movq scratch7@GOTOFF(%ebp), %mm4
744+#else
745 movq scratch7, %mm4
746+#endif
747 /* moved up to the prev block */
748 psraw $4, %mm0
749 movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
750@@ -579,7 +712,11 @@ _dv_idct_block_mmx:
751 * psraw $4, %mm0
752 * psraw $4, %mm6
753 */
754+#ifdef __PIC__
755+ movq scratch1@GOTOFF(%ebp), %mm1
756+#else
757 movq scratch1, %mm1
758+#endif
759 psraw $4, %mm4
760 movq %mm0, 8*3(%esi) /* out3 */
761 psraw $4, %mm1
762diff -urp libdv-0.104-old/libdv/quant.c libdv-0.104/libdv/quant.c
763--- libdv-0.104-old/libdv/quant.c 2004-10-20 05:49:24.000000000 +0200
764+++ libdv-0.104/libdv/quant.c 2005-10-24 01:06:24.000000000 +0200
765@@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1
766 uint32_t dv_quant_248_mul_tab [2] [22] [64];
767 uint32_t dv_quant_88_mul_tab [2] [22] [64];
768
769-extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
770+extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t dv_quant_offset[],uint8_t dv_quant_shifts[][]);
771 extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
772 static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
773 static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
774@@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
775 _dv_quant_x86_64(block, qno, klass);
776 emms();
777 #else
778- _dv_quant_x86(block, qno, klass);
779+ _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
780 emms();
781 #endif
782 }
783diff -urp libdv-0.104-old/libdv/quant.h libdv-0.104/libdv/quant.h
784--- libdv-0.104-old/libdv/quant.h 2004-10-20 05:49:24.000000000 +0200
785+++ libdv-0.104/libdv/quant.h 2005-10-24 00:57:43.000000000 +0200
786@@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
787 extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
788 extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
789 dv_248_coeff_t *co);
790-extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
791+extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t offset[], uint8_t shifts[][]);
792 extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
793 extern void dv_quant_init (void);
794 #ifdef __cplusplus
795diff -urp libdv-0.104-old/libdv/quant_x86.S libdv-0.104/libdv/quant_x86.S
796--- libdv-0.104-old/libdv/quant_x86.S 2005-10-23 19:40:58.000000000 +0200
797+++ libdv-0.104/libdv/quant_x86.S 2005-10-24 01:10:21.000000000 +0200
798@@ -71,10 +71,13 @@ _dv_quant_88_inverse_x86:
799
800 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
801 movl ARGn(1),%eax /* qno */
802+ movl ARGn(3),%ebx /* dv_quant_offset */
803+ addl ARGn(2),%ebx /* class */
804+ movzbl (%ebx),%ecx
805 movl ARGn(2),%ebx /* class */
806- movzbl dv_quant_offset(%ebx),%ecx
807 addl %ecx,%eax
808- leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
809+ movl ARGn(4),%edx /* dv_quant_shifts */
810+ leal (%edx,%eax,4),%edx /* edx is pq */
811
812 /* extra = (class == 3); */
813 /* 0 1 2 3 */
814@@ -212,11 +215,13 @@ _dv_quant_x86:
815
816 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
817 movl ARGn(1),%eax /* qno */
818+ movl ARGn(3),%ebx /* offset */
819+ addl ARGn(2),%ebx /* class */
820+ movzbl (%ebx),%ecx
821 movl ARGn(2),%ebx /* class */
822-
823- movzbl dv_quant_offset(%ebx),%ecx
824+ movl ARGn(4),%edx /* shifts */
825 addl %ecx,%eax
826- leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
827+ leal (%edx,%eax,4),%edx /* edx is pq */
828
829 /* extra = (class == 3); */
830 /* 0 1 2 3 */
831diff -urp libdv-0.104-old/libdv/rgbtoyuv.S libdv-0.104/libdv/rgbtoyuv.S
832--- libdv-0.104-old/libdv/rgbtoyuv.S 2005-10-23 19:40:58.000000000 +0200
833+++ libdv-0.104/libdv/rgbtoyuv.S 2005-10-24 00:46:34.000000000 +0200
834@@ -110,20 +110,30 @@ VR0GR: .long 0,0
835 VBG0B: .long 0,0
836
837 #endif
838-
839+
840+#ifdef __PIC__
841+# undef __i686 /* gcc define gets in our way */
842+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
843+.globl __i686.get_pc_thunk.bp
844+ .hidden __i686.get_pc_thunk.bp
845+ .type __i686.get_pc_thunk.bp,@function
846+__i686.get_pc_thunk.bp:
847+ movl (%esp), %ebp
848+ ret
849+#endif
850+
851 .text
852
853-#define _inPtr 8
854-#define _rows 12
855-#define _columns 16
856-#define _outyPtr 20
857-#define _outuPtr 24
858-#define _outvPtr 28
859+#define _inPtr 24+8
860+#define _rows 24+12
861+#define _columns 24+16
862+#define _outyPtr 24+20
863+#define _outuPtr 24+24
864+#define _outvPtr 24+28
865
866 _dv_rgbtoycb_mmx:
867
868 pushl %ebp
869- movl %esp, %ebp
870 pushl %eax
871 pushl %ebx
872 pushl %ecx
873@@ -131,46 +141,103 @@ _dv_rgbtoycb_mmx:
874 pushl %esi
875 pushl %edi
876
877+#ifdef __PIC__
878+ call __i686.get_pc_thunk.bp
879+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
880+#endif
881+
882+#ifdef __PIC__
883+ leal ZEROSX@GOTOFF(%ebp), %eax #This section gets around a bug
884+#else
885 leal ZEROSX, %eax #This section gets around a bug
886+#endif
887 movq (%eax), %mm0 #unlikely to persist
888+#ifdef __PIC__
889+ movq %mm0, ZEROS@GOTOFF(%ebp)
890+ leal OFFSETDX@GOTOFF(%ebp), %eax
891+#else
892 movq %mm0, ZEROS
893 leal OFFSETDX, %eax
894+#endif
895 movq (%eax), %mm0
896+#ifdef __PIC__
897+ movq %mm0, OFFSETD@GOTOFF(%ebp)
898+ leal OFFSETWX@GOTOFF(%ebp), %eax
899+#else
900 movq %mm0, OFFSETD
901 leal OFFSETWX, %eax
902+#endif
903 movq (%eax), %mm0
904+#ifdef __PIC__
905+ movq %mm0, OFFSETW@GOTOFF(%ebp)
906+ leal OFFSETBX@GOTOFF(%ebp), %eax
907+#else
908 movq %mm0, OFFSETW
909 leal OFFSETBX, %eax
910+#endif
911 movq (%eax), %mm0
912+#ifdef __PIC__
913+ movq %mm0, OFFSETB@GOTOFF(%ebp)
914+ leal YR0GRX@GOTOFF(%ebp), %eax
915+#else
916 movq %mm0, OFFSETB
917 leal YR0GRX, %eax
918+#endif
919 movq (%eax), %mm0
920+#ifdef __PIC__
921+ movq %mm0, YR0GR@GOTOFF(%ebp)
922+ leal YBG0BX@GOTOFF(%ebp), %eax
923+#else
924 movq %mm0, YR0GR
925 leal YBG0BX, %eax
926+#endif
927 movq (%eax), %mm0
928+#ifdef __PIC__
929+ movq %mm0, YBG0B@GOTOFF(%ebp)
930+ leal UR0GRX@GOTOFF(%ebp), %eax
931+#else
932 movq %mm0, YBG0B
933 leal UR0GRX, %eax
934+#endif
935 movq (%eax), %mm0
936+#ifdef __PIC__
937+ movq %mm0, UR0GR@GOTOFF(%ebp)
938+ leal UBG0BX@GOTOFF(%ebp), %eax
939+#else
940 movq %mm0, UR0GR
941 leal UBG0BX, %eax
942+#endif
943 movq (%eax), %mm0
944+#ifdef __PIC__
945+ movq %mm0, UBG0B@GOTOFF(%ebp)
946+ leal VR0GRX@GOTOFF(%ebp), %eax
947+#else
948 movq %mm0, UBG0B
949 leal VR0GRX, %eax
950+#endif
951 movq (%eax), %mm0
952+#ifdef __PIC__
953+ movq %mm0, VR0GR@GOTOFF(%ebp)
954+ leal VBG0BX@GOTOFF(%ebp), %eax
955+#else
956 movq %mm0, VR0GR
957 leal VBG0BX, %eax
958+#endif
959 movq (%eax), %mm0
960+#ifdef __PIC__
961+ movq %mm0, VBG0B@GOTOFF(%ebp)
962+#else
963 movq %mm0, VBG0B
964-
965- movl _rows(%ebp), %eax
966- movl _columns(%ebp), %ebx
967+#endif
968+ movl _rows(%esp), %eax
969+ movl _columns(%esp), %ebx
970 mull %ebx #number pixels
971 shrl $3, %eax #number of loops
972 movl %eax, %edi #loop counter in edi
973- movl _inPtr(%ebp), %eax
974- movl _outyPtr(%ebp), %ebx
975- movl _outuPtr(%ebp), %ecx
976- movl _outvPtr(%ebp), %edx
977+ movl _inPtr(%esp), %eax
978+ movl _outyPtr(%esp), %ebx
979+ movl _outuPtr(%esp), %ecx
980+ movl _outvPtr(%esp), %edx
981 rgbtoycb_mmx_loop:
982 movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0
983 pxor %mm6, %mm6 #0 -> mm6
984@@ -184,29 +251,57 @@ rgbtoycb_mmx_loop:
985 punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1
986 movq %mm0, %mm2 #R1B0G0R0 -> mm2
987
988+#ifdef __PIC__
989+ pmaddwd YR0GR@GOTOFF(%ebp), %mm0 #yrR1,ygG0+yrR0 -> mm0
990+#else
991 pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0
992+#endif
993 movq %mm1, %mm3 #B1G1R1B0 -> mm3
994
995+#ifdef __PIC__
996+ pmaddwd YBG0B@GOTOFF(%ebp), %mm1 #ybB1+ygG1,ybB0 -> mm1
997+#else
998 pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1
999+#endif
1000 movq %mm2, %mm4 #R1B0G0R0 -> mm4
1001
1002+#ifdef __PIC__
1003+ pmaddwd UR0GR@GOTOFF(%ebp), %mm2 #urR1,ugG0+urR0 -> mm2
1004+#else
1005 pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2
1006+#endif
1007 movq %mm3, %mm5 #B1G1R1B0 -> mm5
1008
1009+#ifdef __PIC__
1010+ pmaddwd UBG0B@GOTOFF(%ebp), %mm3 #ubB1+ugG1,ubB0 -> mm3
1011+#else
1012 pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3
1013+#endif
1014 punpckhbw %mm6, %mm7 # 00G2R2 -> mm7
1015
1016+#ifdef __PIC__
1017+ pmaddwd VR0GR@GOTOFF(%ebp), %mm4 #vrR1,vgG0+vrR0 -> mm4
1018+#else
1019 pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4
1020+#endif
1021 paddd %mm1, %mm0 #Y1Y0 -> mm0
1022
1023+#ifdef __PIC__
1024+ pmaddwd VBG0B@GOTOFF(%ebp), %mm5 #vbB1+vgG1,vbB0 -> mm5
1025+#else
1026 pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5
1027+#endif
1028
1029 movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1
1030 paddd %mm3, %mm2 #U1U0 -> mm2
1031
1032 movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6
1033
1034+#ifdef __PIC__
1035+ punpcklbw ZEROS@GOTOFF(%ebp), %mm1 #B3G3R3B2 -> mm1
1036+#else
1037 punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1
1038+#endif
1039 paddd %mm5, %mm4 #V1V0 -> mm4
1040
1041 movq %mm1, %mm5 #B3G3R3B2 -> mm5
1042@@ -214,29 +309,61 @@ rgbtoycb_mmx_loop:
1043
1044 paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1
1045
1046+#ifdef __PIC__
1047+ punpckhbw ZEROS@GOTOFF(%ebp), %mm6 #R5B4G4R3 -> mm6
1048+#else
1049 punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6
1050+#endif
1051 movq %mm1, %mm3 #R3B2G2R2 -> mm3
1052
1053+#ifdef __PIC__
1054+ pmaddwd YR0GR@GOTOFF(%ebp), %mm1 #yrR3,ygG2+yrR2 -> mm1
1055+#else
1056 pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1
1057+#endif
1058 movq %mm5, %mm7 #B3G3R3B2 -> mm7
1059
1060+#ifdef __PIC__
1061+ pmaddwd YBG0B@GOTOFF(%ebp), %mm5 #ybB3+ygG3,ybB2 -> mm5
1062+#else
1063 pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5
1064+#endif
1065 psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0
1066
1067+#ifdef __PIC__
1068+ movq %mm6, TEMP0@GOTOFF(%ebp) #R5B4G4R4 -> TEMP0
1069+#else
1070 movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0
1071+#endif
1072 movq %mm3, %mm6 #R3B2G2R2 -> mm6
1073+#ifdef __PIC__
1074+ pmaddwd UR0GR@GOTOFF(%ebp), %mm6 #urR3,ugG2+urR2 -> mm6
1075+#else
1076 pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6
1077+#endif
1078 psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2
1079
1080 paddd %mm5, %mm1 #Y3Y2 -> mm1
1081 movq %mm7, %mm5 #B3G3R3B2 -> mm5
1082+#ifdef __PIC__
1083+ pmaddwd UBG0B@GOTOFF(%ebp), %mm7 #ubB3+ugG3,ubB2
1084+#else
1085 pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2
1086+#endif
1087 psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
1088
1089+#ifdef __PIC__
1090+ pmaddwd VR0GR@GOTOFF(%ebp), %mm3 #vrR3,vgG2+vgR2
1091+#else
1092 pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2
1093+#endif
1094 packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0
1095
1096+#ifdef __PIC__
1097+ pmaddwd VBG0B@GOTOFF(%ebp), %mm5 #vbB3+vgG3,vbB2 -> mm5
1098+#else
1099 pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5
1100+#endif
1101 psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4
1102
1103 movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7
1104@@ -251,58 +378,114 @@ rgbtoycb_mmx_loop:
1105 movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5
1106 psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3
1107
1108+#ifdef __PIC__
1109+ paddw OFFSETY@GOTOFF(%ebp), %mm0
1110+#else
1111 paddw OFFSETY, %mm0
1112+#endif
1113 movq %mm0, (%ebx) #store Y3Y2Y1Y0
1114 packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2
1115
1116+#ifdef __PIC__
1117+ movq TEMP0@GOTOFF(%ebp), %mm0 #R5B4G4R4 -> mm0
1118+#else
1119 movq TEMP0, %mm0 #R5B4G4R4 -> mm0
1120+#endif
1121 addl $8, %ebx
1122-
1123+
1124+#ifdef __PIC__
1125+ punpcklbw ZEROS@GOTOFF(%ebp), %mm7 #B5G500 -> mm7
1126+#else
1127 punpcklbw ZEROS, %mm7 #B5G500 -> mm7
1128+#endif
1129 movq %mm0, %mm6 #R5B4G4R4 -> mm6
1130
1131+#ifdef __PIC__
1132+ movq %mm2, TEMPU@GOTOFF(%ebp) #32-bit scaled U3U2U1U0 -> TEMPU
1133+#else
1134 movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU
1135+#endif
1136 psrlq $32, %mm0 #00R5B4 -> mm0
1137
1138 paddw %mm0, %mm7 #B5G5R5B4 -> mm7
1139 movq %mm6, %mm2 #B5B4G4R4 -> mm2
1140
1141+#ifdef __PIC__
1142+ pmaddwd YR0GR@GOTOFF(%ebp), %mm2 #yrR5,ygG4+yrR4 -> mm2
1143+#else
1144 pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2
1145+#endif
1146 movq %mm7, %mm0 #B5G5R5B4 -> mm0
1147
1148+#ifdef __PIC__
1149+ pmaddwd YBG0B@GOTOFF(%ebp), %mm7 #ybB5+ygG5,ybB4 -> mm7
1150+#else
1151 pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7
1152+#endif
1153 packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4
1154
1155 addl $24, %eax #increment RGB count
1156
1157+#ifdef __PIC__
1158+ movq %mm4, TEMPV@GOTOFF(%ebp) #(V3V2V1V0)/256 -> mm4
1159+#else
1160 movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4
1161+#endif
1162 movq %mm6, %mm4 #B5B4G4R4 -> mm4
1163
1164+#ifdef __PIC__
1165+ pmaddwd UR0GR@GOTOFF(%ebp), %mm6 #urR5,ugG4+urR4
1166+#else
1167 pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4
1168+#endif
1169 movq %mm0, %mm3 #B5G5R5B4 -> mm0
1170
1171+#ifdef __PIC__
1172+ pmaddwd UBG0B@GOTOFF(%ebp), %mm0 #ubB5+ugG5,ubB4
1173+#else
1174 pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4
1175+#endif
1176 paddd %mm7, %mm2 #Y5Y4 -> mm2
1177
1178+#ifdef __PIC__
1179+ pmaddwd VR0GR@GOTOFF(%ebp), %mm4 #vrR5,vgG4+vrR4 -> mm4
1180+#else
1181 pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4
1182+#endif
1183 pxor %mm7, %mm7 #0 -> mm7
1184
1185+#ifdef __PIC__
1186+ pmaddwd VBG0B@GOTOFF(%ebp), %mm3 #vbB5+vgG5,vbB4 -> mm3
1187+#else
1188 pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3
1189+#endif
1190 punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1
1191
1192 paddd %mm6, %mm0 #U5U4 -> mm0
1193 movq %mm1, %mm6 #B7G7R7B6 -> mm6
1194
1195+#ifdef __PIC__
1196+ pmaddwd YBG0B@GOTOFF(%ebp), %mm6 #ybB7+ygG7,ybB6 -> mm6
1197+#else
1198 pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6
1199+#endif
1200 punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5
1201
1202 movq %mm5, %mm7 #R7B6G6R6 -> mm7
1203 paddd %mm4, %mm3 #V5V4 -> mm3
1204
1205+#ifdef __PIC__
1206+ pmaddwd YR0GR@GOTOFF(%ebp), %mm5 #yrR7,ygG6+yrR6 -> mm5
1207+#else
1208 pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5
1209+#endif
1210 movq %mm1, %mm4 #B7G7R7B6 -> mm4
1211
1212+#ifdef __PIC__
1213+ pmaddwd UBG0B@GOTOFF(%ebp), %mm4 #ubB7+ugG7,ubB6 -> mm4
1214+#else
1215 pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4
1216+#endif
1217 psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0
1218
1219 psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2
1220@@ -310,25 +493,49 @@ rgbtoycb_mmx_loop:
1221 paddd %mm5, %mm6 #Y7Y6 -> mm6
1222 movq %mm7, %mm5 #R7B6G6R6 -> mm5
1223
1224+#ifdef __PIC__
1225+ pmaddwd UR0GR@GOTOFF(%ebp), %mm7 #urR7,ugG6+ugR6 -> mm7
1226+#else
1227 pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7
1228+#endif
1229 psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3
1230
1231+#ifdef __PIC__
1232+ pmaddwd VBG0B@GOTOFF(%ebp), %mm1 #vbB7+vgG7,vbB6 -> mm1
1233+#else
1234 pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1
1235+#endif
1236 psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
1237
1238 packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2
1239
1240+#ifdef __PIC__
1241+ pmaddwd VR0GR@GOTOFF(%ebp), %mm5 #vrR7,vgG6+vrR6 -> mm5
1242+#else
1243 pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5
1244+#endif
1245 paddd %mm4, %mm7 #U7U6 -> mm7
1246
1247 psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7
1248+#ifdef __PIC__
1249+ paddw OFFSETY@GOTOFF(%ebp), %mm2
1250+#else
1251 paddw OFFSETY, %mm2
1252+#endif
1253 movq %mm2, (%ebx) #store Y7Y6Y5Y4
1254
1255+#ifdef __PIC__
1256+ movq ALLONE@GOTOFF(%ebp), %mm6
1257+#else
1258 movq ALLONE, %mm6
1259+#endif
1260 packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0
1261
1262+#ifdef __PIC__
1263+ movq TEMPU@GOTOFF(%ebp), %mm4 #32-bit scaled U3U2U1U0 -> mm4
1264+#else
1265 movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4
1266+#endif
1267 pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
1268
1269 pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
1270@@ -338,8 +545,12 @@ rgbtoycb_mmx_loop:
1271
1272 psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1
1273 psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4
1274-
1275+
1276+#ifdef __PIC__
1277+ movq TEMPV@GOTOFF(%ebp), %mm5 #32-bit scaled V3V2V1V0 -> mm5
1278+#else
1279 movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5
1280+#endif
1281
1282 movq %mm4, (%ecx) # store U
1283
1284@@ -425,14 +636,22 @@ _dv_ppm_copy_y_block_mmx:
1285 _dv_pgm_copy_y_block_mmx:
1286
1287 pushl %ebp
1288- movl %esp, %ebp
1289 pushl %esi
1290 pushl %edi
1291-
1292- movl 8(%ebp), %edi # dest
1293- movl 12(%ebp), %esi # src
1294
1295+#ifdef __PIC__
1296+ call __i686.get_pc_thunk.bp
1297+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1298+#endif
1299+
1300+ movl 16(%esp), %edi # dest
1301+ movl 20(%esp), %esi # src
1302+
1303+#ifdef __PIC__
1304+ movq OFFSETY@GOTOFF(%ebp), %mm7
1305+#else
1306 movq OFFSETY, %mm7
1307+#endif
1308 pxor %mm6, %mm6
1309
1310 movq (%esi), %mm0
1311@@ -567,14 +786,22 @@ _dv_pgm_copy_y_block_mmx:
1312 _dv_video_copy_y_block_mmx:
1313
1314 pushl %ebp
1315- movl %esp, %ebp
1316 pushl %esi
1317 pushl %edi
1318-
1319- movl 8(%ebp), %edi # dest
1320- movl 12(%ebp), %esi # src
1321
1322+#ifdef __PIC__
1323+ call __i686.get_pc_thunk.bp
1324+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1325+#endif
1326+
1327+ movl 16(%esp), %edi # dest
1328+ movl 20(%esp), %esi # src
1329+
1330+#ifdef __PIC__
1331+ movq OFFSETBX@GOTOFF(%ebp), %mm7
1332+#else
1333 movq OFFSETBX, %mm7
1334+#endif
1335 pxor %mm6, %mm6
1336
1337 movq (%esi), %mm0
1338@@ -855,16 +1082,23 @@ _dv_ppm_copy_pal_c_block_mmx:
1339 _dv_pgm_copy_pal_c_block_mmx:
1340
1341 pushl %ebp
1342- movl %esp, %ebp
1343 pushl %esi
1344 pushl %edi
1345 pushl %ebx
1346-
1347- movl 8(%ebp), %edi # dest
1348- movl 12(%ebp), %esi # src
1349
1350+#ifdef __PIC__
1351+ call __i686.get_pc_thunk.bp
1352+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1353+#endif
1354+
1355+ movl 20(%esp), %edi # dest
1356+ movl 24(%esp), %esi # src
1357
1358+#ifdef __PIC__
1359+ movq OFFSETBX@GOTOFF(%ebp), %mm7
1360+#else
1361 movq OFFSETBX, %mm7
1362+#endif
1363 pxor %mm6, %mm6
1364
1365
1366@@ -1003,15 +1237,23 @@ _dv_pgm_copy_pal_c_block_mmx:
1367 _dv_video_copy_pal_c_block_mmx:
1368
1369 pushl %ebp
1370- movl %esp, %ebp
1371 pushl %esi
1372 pushl %edi
1373 pushl %ebx
1374-
1375- movl 8(%ebp), %edi # dest
1376- movl 12(%ebp), %esi # src
1377
1378+#ifdef __PIC__
1379+ call __i686.get_pc_thunk.bp
1380+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1381+#endif
1382+
1383+ movl 20(%esp), %edi # dest
1384+ movl 24(%esp), %esi # src
1385+
1386+#ifdef __PIC__
1387+ movq OFFSETBX@GOTOFF(%ebp), %mm7
1388+#else
1389 movq OFFSETBX, %mm7
1390+#endif
1391 paddw %mm7, %mm7
1392 pxor %mm6, %mm6
1393
1394@@ -1098,18 +1340,25 @@ video_copy_pal_c_block_mmx_loop:
1395 _dv_ppm_copy_ntsc_c_block_mmx:
1396
1397 pushl %ebp
1398- movl %esp, %ebp
1399 pushl %esi
1400 pushl %edi
1401 pushl %ebx
1402-
1403- movl 8(%ebp), %edi # dest
1404- movl 12(%ebp), %esi # src
1405+
1406+#ifdef __PIC__
1407+ call __i686.get_pc_thunk.bp
1408+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1409+#endif
1410+
1411+ movl 20(%esp), %edi # dest
1412+ movl 24(%esp), %esi # src
1413
1414 movl $4, %ebx
1415
1416+#ifdef __PIC__
1417+ movq ALLONE@GOTOFF(%ebp), %mm6
1418+#else
1419 movq ALLONE, %mm6
1420-
1421+#endif
1422 ppm_copy_ntsc_c_block_mmx_loop:
1423
1424 movq (%esi), %mm0
1425@@ -1171,14 +1420,22 @@ ppm_copy_ntsc_c_block_mmx_loop:
1426 _dv_pgm_copy_ntsc_c_block_mmx:
1427
1428 pushl %ebp
1429- movl %esp, %ebp
1430 pushl %esi
1431 pushl %edi
1432-
1433- movl 8(%ebp), %edi # dest
1434- movl 12(%ebp), %esi # src
1435
1436+#ifdef __PIC__
1437+ call __i686.get_pc_thunk.bp
1438+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1439+#endif
1440+
1441+ movl 16(%esp), %edi # dest
1442+ movl 20(%esp), %esi # src
1443+
1444+#ifdef __PIC__
1445+ movq OFFSETBX@GOTOFF(%ebp), %mm7
1446+#else
1447 movq OFFSETBX, %mm7
1448+#endif
1449 paddw %mm7, %mm7
1450 pxor %mm6, %mm6
1451
1452@@ -1328,15 +1585,23 @@ _dv_pgm_copy_ntsc_c_block_mmx:
1453 _dv_video_copy_ntsc_c_block_mmx:
1454
1455 pushl %ebp
1456- movl %esp, %ebp
1457 pushl %esi
1458 pushl %edi
1459 pushl %ebx
1460-
1461- movl 8(%ebp), %edi # dest
1462- movl 12(%ebp), %esi # src
1463
1464+#ifdef __PIC__
1465+ call __i686.get_pc_thunk.bp
1466+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1467+#endif
1468+
1469+ movl 20(%esp), %edi # dest
1470+ movl 24(%esp), %esi # src
1471+
1472+#ifdef __PIC__
1473+ movq OFFSETBX@GOTOFF(%ebp), %mm7
1474+#else
1475 movq OFFSETBX, %mm7
1476+#endif
1477 paddw %mm7, %mm7
1478 pxor %mm6, %mm6
1479
1480diff -urp libdv-0.104-old/libdv/vlc_x86.S libdv-0.104/libdv/vlc_x86.S
1481--- libdv-0.104-old/libdv/vlc_x86.S 2005-10-23 19:40:58.000000000 +0200
1482+++ libdv-0.104/libdv/vlc_x86.S 2005-10-25 01:47:14.000000000 +0200
1483@@ -1,29 +1,76 @@
1484 #include "asmoff.h"
1485 .text
1486+
1487+#ifdef __PIC__
1488+# undef __i686 /* gcc define gets in our way */
1489+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
1490+.globl __i686.get_pc_thunk.bp
1491+ .hidden __i686.get_pc_thunk.bp
1492+ .type __i686.get_pc_thunk.bp,@function
1493+__i686.get_pc_thunk.bp:
1494+ movl (%esp), %ebp
1495+ ret
1496+
1497+ .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits
1498+.globl __i686.get_pc_thunk.si
1499+ .hidden __i686.get_pc_thunk.si
1500+ .type __i686.get_pc_thunk.si,@function
1501+__i686.get_pc_thunk.si:
1502+ movl (%esp), %esi
1503+ ret
1504+#endif
1505+
1506 .align 4
1507 .globl dv_decode_vlc
1508 .type dv_decode_vlc,@function
1509 dv_decode_vlc:
1510 pushl %ebx
1511+ pushl %ebp
1512+
1513+#ifdef __PIC__
1514+ call __i686.get_pc_thunk.bp
1515+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1516+#endif
1517
1518- /* Args are at 8(%esp). */
1519- movl 8(%esp),%eax /* %eax is bits */
1520- movl 12(%esp),%ebx /* %ebx is maxbits */
1521+ /* Args are at 12(%esp). */
1522+ movl 12(%esp),%eax /* %eax is bits */
1523+ movl 16(%esp),%ebx /* %ebx is maxbits */
1524 andl $0x3f,%ebx /* limit index range STL*/
1525
1526+#ifdef __ELF__
1527+ movl dv_vlc_class_index_mask@GOTOFF(%ebp,%ebx,4),%edx
1528+#else
1529 movl dv_vlc_class_index_mask(,%ebx,4),%edx
1530+#endif
1531 andl %eax,%edx
1532+#ifdef __ELF__
1533+ movl dv_vlc_class_index_rshift@GOTOFF(%ebp,%ebx,4),%ecx
1534+#else
1535 movl dv_vlc_class_index_rshift(,%ebx,4),%ecx
1536+#endif
1537 sarl %cl,%edx
1538+#ifdef __ELF__
1539+ movl dv_vlc_classes@GOTOFF(%ebp,%ebx,4),%ecx
1540+#else
1541 movl dv_vlc_classes(,%ebx,4),%ecx
1542+#endif
1543 movsbl (%ecx,%edx,1),%edx /* %edx is class */
1544-
1545+
1546+#ifdef __ELF__
1547+ movl dv_vlc_index_mask@GOTOFF(%ebp,%edx,4),%ebx
1548+ movl dv_vlc_index_rshift@GOTOFF(%ebp,%edx,4),%ecx
1549+#else
1550 movl dv_vlc_index_mask(,%edx,4),%ebx
1551 movl dv_vlc_index_rshift(,%edx,4),%ecx
1552+#endif
1553 andl %eax,%ebx
1554 sarl %cl,%ebx
1555
1556+#ifdef __ELF__
1557+ movl dv_vlc_lookups@GOTOFF(%ebp,%edx,4),%edx
1558+#else
1559 movl dv_vlc_lookups(,%edx,4),%edx
1560+#endif
1561 movl (%edx,%ebx,4),%edx
1562
1563 /* Now %edx holds result, like this:
1564@@ -42,7 +89,11 @@ dv_decode_vlc:
1565 movl %edx,%ecx
1566 sarl $8,%ecx
1567 andl $0xff,%ecx
1568+#ifdef __ELF__
1569+ movl sign_mask@GOTOFF(%ebp,%ecx,4),%ebx
1570+#else
1571 movl sign_mask(,%ecx,4),%ebx
1572+#endif
1573 andl %ebx,%eax
1574 negl %eax
1575 sarl $31,%eax
1576@@ -63,14 +114,14 @@ dv_decode_vlc:
1577 *result = broken;
1578 Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
1579 */
1580- movl 12(%esp),%ebx /* %ebx is maxbits */
1581+ movl 16(%esp),%ebx /* %ebx is maxbits */
1582 subl %ecx,%ebx
1583 sbbl %ebx,%ebx
1584 orl %ebx,%edx
1585
1586- movl 16(%esp),%eax
1587+ movl 20(%esp),%eax
1588 movl %edx,(%eax)
1589-
1590+ popl %ebp
1591 popl %ebx
1592 ret
1593
1594@@ -80,21 +131,38 @@ dv_decode_vlc:
1595 .type __dv_decode_vlc,@function
1596 __dv_decode_vlc:
1597 pushl %ebx
1598+ pushl %ebp
1599+
1600+#ifdef __PIC__
1601+ call __i686.get_pc_thunk.bp
1602+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1603+#endif
1604
1605- /* Args are at 8(%esp). */
1606- movl 8(%esp),%eax /* %eax is bits */
1607+ /* Args are at 12(%esp). */
1608+ movl 12(%esp),%eax /* %eax is bits */
1609
1610 movl %eax,%edx /* %edx is class */
1611 andl $0xfe00,%edx
1612 sarl $9,%edx
1613+#ifdef __PIC__
1614+ movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
1615+
1616+ movl dv_vlc_index_mask@GOTOFF(%ebp,%edx,4),%ebx
1617+ movl dv_vlc_index_rshift@GOTOFF(%ebp,%edx,4),%ecx
1618+#else
1619 movsbl dv_vlc_class_lookup5(%edx),%edx
1620-
1621+
1622 movl dv_vlc_index_mask(,%edx,4),%ebx
1623 movl dv_vlc_index_rshift(,%edx,4),%ecx
1624+#endif
1625 andl %eax,%ebx
1626 sarl %cl,%ebx
1627
1628+#ifdef __PIC__
1629+ movl dv_vlc_lookups@GOTOFF(%ebp,%edx,4),%edx
1630+#else
1631 movl dv_vlc_lookups(,%edx,4),%edx
1632+#endif
1633 movl (%edx,%ebx,4),%edx
1634
1635 /* Now %edx holds result, like this:
1636@@ -112,7 +180,11 @@ __dv_decode_vlc:
1637 movl %edx,%ecx
1638 sarl $8,%ecx
1639 andl $0xff,%ecx
1640+#ifdef __PIC__
1641+ movl sign_mask@GOTOFF(%ebp,%ecx,4),%ecx
1642+#else
1643 movl sign_mask(,%ecx,4),%ecx
1644+#endif
1645 andl %ecx,%eax
1646 negl %eax
1647 sarl $31,%eax
1648@@ -127,9 +199,9 @@ __dv_decode_vlc:
1649 xorl %eax,%edx
1650 subl %eax,%edx
1651
1652- movl 12(%esp),%eax
1653+ movl 16(%esp),%eax
1654 movl %edx,(%eax)
1655-
1656+ popl %ebp
1657 popl %ebx
1658 ret
1659
1660@@ -147,6 +219,11 @@ dv_parse_ac_coeffs_pass0:
1661 pushl %esi
1662 pushl %ebp
1663
1664+#ifdef __PIC__
1665+ call __i686.get_pc_thunk.si
1666+ addl $_GLOBAL_OFFSET_TABLE_, %esi
1667+#endif
1668+
1669 #define ARGn(N) (20+(4*(N)))(%esp)
1670
1671 /*
1672@@ -159,8 +236,10 @@ dv_parse_ac_coeffs_pass0:
1673 ebp bl
1674 */
1675 movl ARGn(2),%ebp
1676+#ifndef __PIC__
1677 movl ARGn(0),%esi
1678 movl bitstream_t_buf(%esi),%esi
1679+#endif
1680 movl dv_block_t_offset(%ebp),%edi
1681 movl dv_block_t_reorder(%ebp),%ebx
1682
1683@@ -170,7 +249,11 @@ dv_parse_ac_coeffs_pass0:
1684
1685 movq dv_block_t_coeffs(%ebp),%mm1
1686 pxor %mm0,%mm0
1687+#ifdef __PIC__
1688+ pand const_f_0_0_0@GOTOFF(%esi),%mm1
1689+#else
1690 pand const_f_0_0_0,%mm1
1691+#endif
1692 movq %mm1,dv_block_t_coeffs(%ebp)
1693 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1694 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1695@@ -191,9 +274,17 @@ dv_parse_ac_coeffs_pass0:
1696 readloop:
1697 movl %edi,%ecx
1698 shrl $3,%ecx
1699+#ifdef __PIC__
1700+ pushl %esi
1701+ movl ARGn(1),%esi
1702+ movl bitstream_t_buf(%esi),%esi
1703+#endif
1704 movzbl (%esi,%ecx,1),%eax
1705 movzbl 1(%esi,%ecx,1),%edx
1706 movzbl 2(%esi,%ecx,1),%ecx
1707+#ifdef __PIC__
1708+ popl %esi
1709+#endif
1710 shll $16,%eax
1711 shll $8,%edx
1712 orl %ecx,%eax
1713@@ -217,7 +308,11 @@ readloop:
1714
1715 /* Attempt to use the shortcut first. If it hits, then
1716 this vlc term has been decoded. */
1717+#ifdef __PIC__
1718+ movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
1719+#else
1720 movl dv_vlc_class1_shortcut(,%ecx,4),%edx
1721+#endif
1722 test $0x80,%edx
1723 je done_decode
1724
1725@@ -228,12 +323,19 @@ readloop:
1726 movl %ebx,dv_block_t_reorder(%ebp)
1727
1728 /* %eax is bits */
1729-
1730+#ifdef __PIC__
1731+ movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
1732+
1733+ movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
1734+ movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
1735+ movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
1736+#else
1737 movsbl dv_vlc_class_lookup5(%ecx),%ecx
1738
1739 movl dv_vlc_index_mask(,%ecx,4),%ebx
1740 movl dv_vlc_lookups(,%ecx,4),%edx
1741 movl dv_vlc_index_rshift(,%ecx,4),%ecx
1742+#endif
1743 andl %eax,%ebx
1744 sarl %cl,%ebx
1745
1746@@ -256,7 +358,11 @@ readloop:
1747 movl %edx,%ecx
1748 sarl $8,%ecx
1749 andl $0xff,%ecx
1750+#ifdef __PIC__
1751+ movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx
1752+#else
1753 movl sign_mask(,%ecx,4),%ecx
1754+#endif
1755 andl %ecx,%eax
1756 negl %eax
1757 sarl $31,%eax
1758@@ -326,10 +432,20 @@ alldone:
1759
1760 slowpath:
1761 /* slow path: use dv_decode_vlc */;
1762+#ifdef __PIC__
1763+ pushl %esi
1764+ leal vlc@GOTOFF(%esi),%esi
1765+ xchgl %esi,(%esp) /* last parameter is &vlc */
1766+#else
1767 pushl $vlc /* last parameter is &vlc */
1768+#endif
1769 pushl %edx /* bits_left */
1770 pushl %eax /* bits */
1771+#ifdef __PIC__
1772+ call dv_decode_vlc@PLT
1773+#else
1774 call dv_decode_vlc
1775+#endif
1776 addl $12,%esp
1777 test $0x80,%edx /* If (vlc.run < 0) break */
1778 jne escape
1779@@ -365,6 +481,11 @@ dv_parse_video_segment:
1780 pushl %esi
1781 pushl %ebp
1782
1783+#ifdef __PIC__
1784+ call __i686.get_pc_thunk.si
1785+ addl $_GLOBAL_OFFSET_TABLE_, %esi
1786+#endif
1787+
1788 #define ARGn(N) (20+(4*(N)))(%esp)
1789
1790 movl ARGn(1),%eax /* quality */
1791@@ -373,7 +494,11 @@ dv_parse_video_segment:
1792 jz its_mono
1793 movl $6,%ebx
1794 its_mono:
1795+#ifdef __PIC__
1796+ movl %ebx,n_blocks@GOTOFF(%esi)
1797+#else
1798 movl %ebx,n_blocks
1799+#endif
1800
1801 /*
1802 * ebx seg/b
1803@@ -384,15 +509,22 @@ its_mono:
1804 * ebp bl
1805 */
1806 movl ARGn(0),%ebx
1807+#ifndef __PIC__
1808 movl dv_videosegment_t_bs(%ebx),%esi
1809 movl bitstream_t_buf(%esi),%esi
1810+#endif
1811 leal dv_videosegment_t_mb(%ebx),%edi
1812
1813 movl $0,%eax
1814 movl $0,%ecx
1815 macloop:
1816+#ifdef __PIC__
1817+ movl %eax,m@GOTOFF(%esi)
1818+ movl %ecx,mb_start@GOTOFF(%esi)
1819+#else
1820 movl %eax,m
1821 movl %ecx,mb_start
1822+#endif
1823
1824 movl ARGn(0),%ebx
1825
1826@@ -400,7 +532,15 @@ macloop:
1827 /* mb->qno = bitstream_get(bs,4); */
1828 movl %ecx,%edx
1829 shr $3,%edx
1830+#ifdef __PIC__
1831+ pushl %esi
1832+ movl dv_videosegment_t_bs(%ebx),%esi
1833+ movl bitstream_t_buf(%esi),%esi
1834+#endif
1835 movzbl 3(%esi,%edx,1),%edx
1836+#ifdef __PIC__
1837+ popl %esi
1838+#endif
1839 andl $0xf,%edx
1840 movl %edx,dv_macroblock_t_qno(%edi)
1841
1842@@ -411,7 +551,11 @@ macloop:
1843 movl %edx,dv_macroblock_t_eob_count(%edi)
1844
1845 /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
1846+#ifdef __PIC__
1847+ movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
1848+#else
1849 movl dv_super_map_vertical(,%eax,4),%edx
1850+#endif
1851 movl dv_videosegment_t_i(%ebx),%ecx
1852 addl %ecx,%edx
1853
1854@@ -422,11 +566,20 @@ skarly:
1855 andl $1,%ecx
1856 shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */
1857
1858+#ifdef __PIC__
1859+ leal mod_10@GOTOFF(%esi,%edx),%edx
1860+ movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1861+#else
1862 movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1863+#endif
1864 movl %edx,dv_macroblock_t_i(%edi)
1865
1866 /* mb->j = dv_super_map_horizontal[m]; */
1867+#ifdef __PIC__
1868+ movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
1869+#else
1870 movl dv_super_map_horizontal(,%eax,4),%edx
1871+#endif
1872 movl %edx,dv_macroblock_t_j(%edi)
1873
1874 /* mb->k = seg->k; */
1875@@ -445,12 +598,29 @@ blkloop:
1876 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
1877 */
1878 /* dc = bitstream_get(bs,9); */
1879+#ifdef __PIC__
1880+ movl mb_start@GOTOFF(%esi),%ecx
1881+#else
1882 movl mb_start,%ecx
1883+#endif
1884 shr $3,%ecx
1885+#ifdef __PIC__
1886+ movzbl blk_start@GOTOFF(%esi,%ebx),%edx
1887+#else
1888 movzbl blk_start(%ebx),%edx
1889+#endif
1890 addl %ecx,%edx
1891+#ifdef __PIC__
1892+ pushl %esi
1893+ movl ARGn(1),%esi
1894+ movl dv_videosegment_t_bs(%esi),%esi
1895+ movl bitstream_t_buf(%esi),%esi
1896+#endif
1897 movzbl (%esi,%edx,1),%eax /* hi byte */
1898 movzbl 1(%esi,%edx,1),%ecx /* lo byte */
1899+#ifdef __PIC__
1900+ popl %esi
1901+#endif
1902 shll $8,%eax
1903 orl %ecx,%eax
1904
1905@@ -477,7 +647,11 @@ blkloop:
1906
1907 /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
1908 shll $6,%eax
1909+#ifdef __PIC__
1910+ addl dv_reorder@GOTOFF+1(%esi),%eax
1911+#else
1912 addl $(dv_reorder+1),%eax
1913+#endif
1914 movl %eax,dv_block_t_reorder(%ebp)
1915
1916 /* bl->reorder_sentinel = bl->reorder + 63; */
1917@@ -485,13 +659,22 @@ blkloop:
1918 movl %eax,dv_block_t_reorder_sentinel(%ebp)
1919
1920 /* bl->offset= mb_start + dv_parse_bit_start[b]; */
1921+#ifdef __PIC__
1922+ movl mb_start@GOTOFF(%esi),%ecx
1923+ movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
1924+#else
1925 movl mb_start,%ecx
1926 movl dv_parse_bit_start(,%ebx,4),%eax
1927+#endif
1928 addl %ecx,%eax
1929 movl %eax,dv_block_t_offset(%ebp)
1930
1931 /* bl->end= mb_start + dv_parse_bit_end[b]; */
1932+#ifdef __PIC__
1933+ movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
1934+#else
1935 movl dv_parse_bit_end(,%ebx,4),%eax
1936+#endif
1937 addl %ecx,%eax
1938 movl %eax,dv_block_t_end(%ebp)
1939
1940@@ -503,7 +686,11 @@ blkloop:
1941 /* no AC pass. Just zero out the remaining coeffs */
1942 movq dv_block_t_coeffs(%ebp),%mm1
1943 pxor %mm0,%mm0
1944+#ifdef __PIC__
1945+ pand const_f_0_0_0@GOTOFF(%esi),%mm1
1946+#else
1947 pand const_f_0_0_0,%mm1
1948+#endif
1949 movq %mm1,dv_block_t_coeffs(%ebp)
1950 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1951 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1952@@ -528,18 +715,31 @@ do_ac_pass:
1953 pushl %ebp
1954 pushl %edi
1955 pushl %eax
1956+#ifdef __PIC__
1957+ call dv_parse_ac_coeffs_pass0@PLT
1958+#else
1959 call dv_parse_ac_coeffs_pass0
1960+#endif
1961 addl $12,%esp
1962 done_ac:
1963
1964+#ifdef __PIC__
1965+ movl n_blocks@GOTOFF(%esi),%eax
1966+#else
1967 movl n_blocks,%eax
1968+#endif
1969 addl $dv_block_t_size,%ebp
1970 incl %ebx
1971 cmpl %eax,%ebx
1972 jnz blkloop
1973
1974+#ifdef __PIC__
1975+ movl m@GOTOFF(%esi),%eax
1976+ movl mb_start@GOTOFF(%esi),%ecx
1977+#else
1978 movl m,%eax
1979 movl mb_start,%ecx
1980+#endif
1981 addl $(8 * 80),%ecx
1982 addl $dv_macroblock_t_size,%edi
1983 incl %eax
1984@@ -557,7 +757,11 @@ done_ac:
1985
1986 andl $DV_QUALITY_AC_MASK,%eax
1987 cmpl $DV_QUALITY_AC_2,%eax
1988+#ifdef __PIC__
1989+ jz dv_parse_ac_coeffs@PLT
1990+#else
1991 jz dv_parse_ac_coeffs
1992+#endif
1993 movl $0,%eax
1994 ret
1995