]> git.wh0rd.org - patches.git/blame - libdv-0.104-pic-fix-pax.patch
initial import
[patches.git] / libdv-0.104-pic-fix-pax.patch
CommitLineData
5e993f12 1diff -Nurp libdv-0.104-old/libdv/asm_common.S libdv-0.104/libdv/asm_common.S
2--- libdv-0.104-old/libdv/asm_common.S 1970-01-01 01:00:00.000000000 +0100
3+++ libdv-0.104/libdv/asm_common.S 2006-01-01 22:44:43.000000000 +0100
4@@ -0,0 +1,37 @@
5+/* public domain, do what you want */
6+
7+#ifdef __PIC__
8+# define MUNG(sym) sym##@GOTOFF(%ebp)
9+# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args)
10+#else
11+# define MUNG(sym) sym
12+# define MUNG_ARR(sym, args...) sym(,##args)
13+#endif
14+
15+#ifdef __PIC__
16+# undef __i686 /* gcc define gets in our way */
17+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
18+.globl __i686.get_pc_thunk.bp
19+ .hidden __i686.get_pc_thunk.bp
20+ .type __i686.get_pc_thunk.bp,@function
21+__i686.get_pc_thunk.bp:
22+ movl (%esp), %ebp
23+ ret
24+# define LOAD_PIC_REG_BP() \
25+ call __i686.get_pc_thunk.bp ; \
26+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
27+
28+ .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits
29+.globl __i686.get_pc_thunk.si
30+ .hidden __i686.get_pc_thunk.si
31+ .type __i686.get_pc_thunk.si,@function
32+__i686.get_pc_thunk.si:
33+ movl (%esp), %esi
34+ ret
35+# define LOAD_PIC_REG_SI() \
36+ call __i686.get_pc_thunk.si ; \
37+ addl $_GLOBAL_OFFSET_TABLE_, %esi
38+#else
39+# define LOAD_PIC_REG_BP()
40+# define LOAD_PIC_REG_SI()
41+#endif
42diff -Nurp libdv-0.104-old/libdv/dct_block_mmx.S libdv-0.104/libdv/dct_block_mmx.S
43--- libdv-0.104-old/libdv/dct_block_mmx.S 2006-01-01 22:44:22.000000000 +0100
44+++ libdv-0.104/libdv/dct_block_mmx.S 2006-01-01 22:44:43.000000000 +0100
45@@ -53,17 +53,22 @@ scratch2: .quad 0
46 scratch3: .quad 0
47 scratch4: .quad 0
48
49+#include "asm_common.S"
50+
51 .text
52
53 .align 8
54 .global _dv_dct_88_block_mmx
55+.hidden _dv_dct_88_block_mmx
56+.type _dv_dct_88_block_mmx,@function
57 _dv_dct_88_block_mmx:
58
59 pushl %ebp
60- movl %esp, %ebp
61 pushl %esi
62
63- movl 8(%ebp), %esi # source
64+ LOAD_PIC_REG_BP()
65+
66+ movl 12(%esp), %esi # source
67
68 # column 0
69 movq 16*0(%esi), %mm0 # v0
70@@ -86,22 +91,22 @@ _dv_dct_88_block_mmx:
71
72 movq 16*3(%esi), %mm5 # v3
73 movq 16*4(%esi), %mm7 # v4
74- movq %mm7, scratch1 # scratch1: v4 ;
75+ movq %mm7, MUNG(scratch1) # scratch1: v4 ;
76 movq %mm5, %mm7 # duplicate v3
77- paddw scratch1, %mm5 # v03: v3+v4
78- psubw scratch1, %mm7 # v04: v3-v4
79- movq %mm5, scratch2 # scratch2: v03
80+ paddw MUNG(scratch1), %mm5 # v03: v3+v4
81+ psubw MUNG(scratch1), %mm7 # v04: v3-v4
82+ movq %mm5, MUNG(scratch2) # scratch2: v03
83 movq %mm0, %mm5 # mm5: v00
84
85- paddw scratch2, %mm0 # v10: v00+v03
86- psubw scratch2, %mm5 # v13: v00-v03
87- movq %mm3, scratch3 # scratch3: v02
88+ paddw MUNG(scratch2), %mm0 # v10: v00+v03
89+ psubw MUNG(scratch2), %mm5 # v13: v00-v03
90+ movq %mm3, MUNG(scratch3) # scratch3: v02
91 movq %mm1, %mm3 # duplicate v01
92
93- paddw scratch3, %mm1 # v11: v01+v02
94- psubw scratch3, %mm3 # v12: v01-v02
95+ paddw MUNG(scratch3), %mm1 # v11: v01+v02
96+ psubw MUNG(scratch3), %mm3 # v12: v01-v02
97
98- movq %mm6, scratch4 # scratch4: v05
99+ movq %mm6, MUNG(scratch4) # scratch4: v05
100 movq %mm0, %mm6 # duplicate v10
101
102 paddw %mm1, %mm0 # v10+v11
103@@ -111,10 +116,10 @@ _dv_dct_88_block_mmx:
104 movq %mm6, 16*4(%esi) # out4: v10-v11
105
106 movq %mm4, %mm0 # mm0: v06
107- paddw scratch4, %mm4 # v15: v05+v06
108+ paddw MUNG(scratch4), %mm4 # v15: v05+v06
109 paddw %mm2, %mm0 # v16: v07+v06
110
111- pmulhw WA3, %mm4 # v35~: WA3*v15
112+ pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
113 psllw $1, %mm4 # v35: compensate the coeefient scale
114
115 movq %mm4, %mm6 # duplicate v35
116@@ -123,7 +128,7 @@ _dv_dct_88_block_mmx:
117
118 paddw %mm5, %mm3 # v22: v12+v13
119
120- pmulhw WA1, %mm3 # v32~: WA1*v22
121+ pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22
122 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
123 movq %mm5, %mm6 # duplicate v13
124
125@@ -134,13 +139,13 @@ _dv_dct_88_block_mmx:
126 movq %mm6, 16*6(%esi) # out6: v13-v32
127
128
129- paddw scratch4, %mm7 # v14n: v04+v05
130+ paddw MUNG(scratch4), %mm7 # v14n: v04+v05
131 movq %mm0, %mm5 # duplicate v16
132
133 psubw %mm7, %mm0 # va1: v16-v14n
134- pmulhw WA5, %mm0 # va0~: va1*WA5
135- pmulhw WA4, %mm5 # v36~~: v16*WA4
136- pmulhw WA2, %mm7 # v34~~: v14n*WA2
137+ pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
138+ pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
139+ pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
140 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale
141 psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale
142
143@@ -188,22 +193,22 @@ _dv_dct_88_block_mmx:
144
145 movq 16*3(%esi), %mm5 # v3
146 movq 16*4(%esi), %mm7 # v4
147- movq %mm7, scratch1 # scratch1: v4 ;
148+ movq %mm7, MUNG(scratch1) # scratch1: v4 ;
149 movq %mm5, %mm7 # duplicate v3
150- paddw scratch1, %mm5 # v03: v3+v4
151- psubw scratch1, %mm7 # v04: v3-v4
152- movq %mm5, scratch2 # scratch2: v03
153+ paddw MUNG(scratch1), %mm5 # v03: v3+v4
154+ psubw MUNG(scratch1), %mm7 # v04: v3-v4
155+ movq %mm5, MUNG(scratch2) # scratch2: v03
156 movq %mm0, %mm5 # mm5: v00
157
158- paddw scratch2, %mm0 # v10: v00+v03
159- psubw scratch2, %mm5 # v13: v00-v03
160- movq %mm3, scratch3 # scratc3: v02
161+ paddw MUNG(scratch2), %mm0 # v10: v00+v03
162+ psubw MUNG(scratch2), %mm5 # v13: v00-v03
163+ movq %mm3, MUNG(scratch3) # scratc3: v02
164 movq %mm1, %mm3 # duplicate v01
165
166- paddw scratch3, %mm1 # v11: v01+v02
167- psubw scratch3, %mm3 # v12: v01-v02
168+ paddw MUNG(scratch3), %mm1 # v11: v01+v02
169+ psubw MUNG(scratch3), %mm3 # v12: v01-v02
170
171- movq %mm6, scratch4 # scratc4: v05
172+ movq %mm6, MUNG(scratch4) # scratc4: v05
173 movq %mm0, %mm6 # duplicate v10
174
175 paddw %mm1, %mm0 # v10+v11
176@@ -213,10 +218,10 @@ _dv_dct_88_block_mmx:
177 movq %mm6, 16*4(%esi) # out4: v10-v11
178
179 movq %mm4, %mm0 # mm0: v06
180- paddw scratch4, %mm4 # v15: v05+v06
181+ paddw MUNG(scratch4), %mm4 # v15: v05+v06
182 paddw %mm2, %mm0 # v16: v07+v06
183
184- pmulhw WA3, %mm4 # v35~: WA3*v15
185+ pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
186 psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale
187
188 movq %mm4, %mm6 # duplicate v35
189@@ -225,7 +230,7 @@ _dv_dct_88_block_mmx:
190
191 paddw %mm5, %mm3 # v22: v12+v13
192
193- pmulhw WA1, %mm3 # v32~: WA3*v15
194+ pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15
195 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
196 movq %mm5, %mm6 # duplicate v13
197
198@@ -235,13 +240,13 @@ _dv_dct_88_block_mmx:
199 movq %mm5, 16*2(%esi) # out2: v13+v32
200 movq %mm6, 16*6(%esi) # out6: v13-v32
201
202- paddw scratch4, %mm7 # v14n: v04+v05
203+ paddw MUNG(scratch4), %mm7 # v14n: v04+v05
204 movq %mm0, %mm5 # duplicate v16
205
206 psubw %mm7, %mm0 # va1: v16-v14n
207- pmulhw WA2, %mm7 # v34~~: v14n*WA2
208- pmulhw WA5, %mm0 # va0~: va1*WA5
209- pmulhw WA4, %mm5 # v36~~: v16*WA4
210+ pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
211+ pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
212+ pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
213 psllw $16-NSHIFT, %mm7
214 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient
215 # scale note that WA4 is shifted 1 bit less than the others
216@@ -272,6 +277,8 @@ _dv_dct_88_block_mmx:
217
218 .align 8
219 .global _dv_dct_block_mmx_postscale_88
220+.hidden _dv_dct_block_mmx_postscale_88
221+.type _dv_dct_block_mmx_postscale_88,@function
222 _dv_dct_block_mmx_postscale_88:
223
224 pushl %ebp
225@@ -748,14 +755,17 @@ _dv_dct_block_mmx_postscale_88:
226
227 .align 8
228 .global _dv_dct_248_block_mmx
229+.hidden _dv_dct_248_block_mmx
230+.type _dv_dct_248_block_mmx,@function
231 _dv_dct_248_block_mmx:
232
233 pushl %ebp
234- movl %esp, %ebp
235 pushl %esi
236 pushl %edi
237
238- movl 8(%ebp), %esi # source
239+ LOAD_PIC_REG_BP()
240+
241+ movl 16(%esp), %esi # source
242
243 # column 0
244
245@@ -779,7 +789,7 @@ _dv_dct_248_block_mmx:
246 paddw %mm1, %mm0 # v20: v10+v11
247 psubw %mm1, %mm3 # v21: v10-v11
248
249- pmulhw WA1, %mm5 # v32~: WA1*v22
250+ pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
251 movq %mm4, %mm2
252 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
253
254@@ -818,7 +828,7 @@ _dv_dct_248_block_mmx:
255 paddw %mm1, %mm0 # v20: v10+v11
256 psubw %mm1, %mm3 # v21: v10-v11
257
258- pmulhw WA1, %mm5 # v32~: WA1*v22
259+ pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
260 movq %mm4, %mm2
261 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
262
263@@ -855,7 +865,7 @@ _dv_dct_248_block_mmx:
264 paddw %mm1, %mm0 # v20: v10+v11
265 psubw %mm1, %mm3 # v21: v10-v11
266
267- pmulhw WA1, %mm5 # v32~: WA1*v22
268+ pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
269 movq %mm4, %mm2
270 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
271
272@@ -892,7 +902,7 @@ _dv_dct_248_block_mmx:
273 paddw %mm1, %mm0 # v20: v10+v11
274 psubw %mm1, %mm3 # v21: v10-v11
275
276- pmulhw WA1, %mm5 # v32~: WA1*v22
277+ pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
278 movq %mm4, %mm2
279 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
280
281@@ -912,6 +922,8 @@ _dv_dct_248_block_mmx:
282
283 .align 8
284 .global _dv_dct_248_block_mmx_post_sum
285+.hidden _dv_dct_248_block_mmx_post_sum
286+.type _dv_dct_248_block_mmx_post_sum,@function
287 _dv_dct_248_block_mmx_post_sum:
288
289 pushl %ebp
290@@ -992,6 +1004,8 @@ _dv_dct_248_block_mmx_post_sum:
291
292 .align 8
293 .global _dv_dct_block_mmx_postscale_248
294+.hidden _dv_dct_block_mmx_postscale_248
295+.type _dv_dct_block_mmx_postscale_248,@function
296 _dv_dct_block_mmx_postscale_248:
297
298 pushl %ebp
299diff -Nurp libdv-0.104-old/libdv/dct_block_mmx_x86_64.S libdv-0.104/libdv/dct_block_mmx_x86_64.S
300--- libdv-0.104-old/libdv/dct_block_mmx_x86_64.S 2006-01-01 22:44:22.000000000 +0100
301+++ libdv-0.104/libdv/dct_block_mmx_x86_64.S 2006-01-01 22:44:43.000000000 +0100
302@@ -57,6 +57,8 @@ scratch4: .quad 0
303
304 .align 8
305 .global _dv_dct_88_block_mmx_x86_64
306+.hidden _dv_dct_88_block_mmx_x86_64
307+.type _dv_dct_88_block_mmx_x86_64,@function
308 _dv_dct_88_block_mmx_x86_64:
309
310 /* void _dv_dct_88_block_mmx_x86_64(int16_t* block); */
311@@ -269,6 +271,8 @@ _dv_dct_88_block_mmx_x86_64:
312
313 .align 8
314 .global _dv_dct_block_mmx_x86_64_postscale_88
315+.hidden _dv_dct_block_mmx_x86_64_postscale_88
316+.type _dv_dct_block_mmx_x86_64_postscale_88,@function
317 _dv_dct_block_mmx_x86_64_postscale_88:
318
319 /* void _dv_dct_block_mmx_x86_64_postscale_88(int16_t* block, int16_t* postscale_matrix); */
320diff -Nurp libdv-0.104-old/libdv/dv.c libdv-0.104/libdv/dv.c
321--- libdv-0.104-old/libdv/dv.c 2004-10-20 05:49:24.000000000 +0200
322+++ libdv-0.104/libdv/dv.c 2006-01-01 22:44:43.000000000 +0100
323@@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
324 } /* dv_reconfigure */
325
326
327+extern uint8_t dv_quant_offset[4];
328+extern uint8_t dv_quant_shifts[22][4];
329+
330 static inline void
331 dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
332 int i;
333@@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
334 dv_idct_248 (co248, mb->b[i].coeffs);
335 } else {
336 #if ARCH_X86
337- _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
338+ _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
339 _dv_idct_88(mb->b[i].coeffs);
340 #elif ARCH_X86_64
341 _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
342@@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
343 dv_idct_248 (co248, mb->b[b].coeffs);
344 } else {
345 #if ARCH_X86
346- _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
347+ _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
348 _dv_weight_88_inverse(bl->coeffs);
349 _dv_idct_88(bl->coeffs);
350 #elif ARCH_X86_64
351diff -Nurp libdv-0.104-old/libdv/encode.c libdv-0.104/libdv/encode.c
352--- libdv-0.104-old/libdv/encode.c 2004-11-17 04:36:30.000000000 +0100
353+++ libdv-0.104/libdv/encode.c 2006-01-01 22:44:43.000000000 +0100
354@@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
355 }
356
357 extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
358- dv_vlc_entry_t ** out);
359+ dv_vlc_entry_t ** out,
360+ dv_vlc_entry_t * lookup);
361
362 extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
363 dv_vlc_entry_t ** out);
364@@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
365 #elif ARCH_X86
366 int num_bits;
367
368- num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
369+ num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
370 emms();
371 #else
372 int num_bits;
373@@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
374 return num_bits;
375 }
376
377-extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
378+extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
379 extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
380
381 extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
382@@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
383 #elif ARCH_X86_64
384 return _dv_vlc_num_bits_block_x86_64(coeffs);
385 #else
386- return _dv_vlc_num_bits_block_x86(coeffs);
387+ return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
388 #endif
389 }
390
391diff -Nurp libdv-0.104-old/libdv/encode_x86.S libdv-0.104/libdv/encode_x86.S
392--- libdv-0.104-old/libdv/encode_x86.S 2006-01-01 22:44:22.000000000 +0100
393+++ libdv-0.104/libdv/encode_x86.S 2006-01-01 22:44:43.000000000 +0100
394@@ -23,13 +23,11 @@
395 * The libdv homepage is http://libdv.sourceforge.net/.
396 */
397
398-.data
399-ALLONE: .word 1,1,1,1
400-VLCADDMASK: .byte 255,0,0,0,255,0,0,0
401-
402 .text
403
404 .global _dv_vlc_encode_block_mmx
405+.hidden _dv_vlc_encode_block_mmx
406+.type _dv_vlc_encode_block_mmx,@function
407 _dv_vlc_encode_block_mmx:
408 pushl %ebx
409 pushl %esi
410@@ -45,11 +43,14 @@ _dv_vlc_encode_block_mmx:
411
412 movl $63, %ecx
413
414- movl vlc_encode_lookup, %esi
415+ movl 4+4*4+8(%esp), %esi # vlc_encode_lookup
416
417 pxor %mm0, %mm0
418 pxor %mm2, %mm2
419- movq VLCADDMASK, %mm1
420+ pushl $0x000000FF # these four lines
421+ pushl $0x000000FF # load VLCADDMASK
422+ movq (%esp), %mm1 # into %mm1 off the stack
423+ addl $8, %esp # --> no TEXTRELs
424 xorl %ebp, %ebp
425 subl $8, %edx
426 vlc_encode_block_mmx_loop:
427@@ -106,6 +107,8 @@ vlc_encode_block_out:
428 ret
429
430 .global _dv_vlc_num_bits_block_x86
431+.hidden _dv_vlc_num_bits_block_x86
432+.type _dv_vlc_num_bits_block_x86,@function
433 _dv_vlc_num_bits_block_x86:
434 pushl %ebx
435 pushl %esi
436@@ -121,7 +124,7 @@ _dv_vlc_num_bits_block_x86:
437 addl $2, %edi
438
439 movl $63, %ecx
440- movl vlc_num_bits_lookup, %esi
441+ movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup
442
443 vlc_num_bits_block_x86_loop:
444 movw (%edi), %ax
445@@ -164,6 +167,8 @@ vlc_num_bits_block_out:
446 ret
447
448 .global _dv_vlc_encode_block_pass_1_x86
449+.hidden _dv_vlc_encode_block_pass_1_x86
450+.type _dv_vlc_encode_block_pass_1_x86,@function
451 _dv_vlc_encode_block_pass_1_x86:
452 pushl %ebx
453 pushl %esi
454@@ -240,6 +245,8 @@ vlc_encode_block_pass1_x86_out:
455 ret
456
457 .global _dv_classify_mmx
458+.hidden _dv_classify_mmx
459+.type _dv_classify_mmx,@function
460 _dv_classify_mmx:
461
462 pushl %ebp
463@@ -345,6 +352,8 @@ _dv_classify_mmx:
464 don't know why... */
465
466 .global _dv_reorder_block_mmx
467+.hidden _dv_reorder_block_mmx
468+.type _dv_reorder_block_mmx,@function
469 _dv_reorder_block_mmx:
470
471 pushl %ebp
472@@ -460,6 +469,8 @@ reorder_loop:
473 ret
474
475 .global _dv_need_dct_248_mmx_rows
476+.hidden _dv_need_dct_248_mmx_rows
477+.type _dv_need_dct_248_mmx_rows,@function
478 _dv_need_dct_248_mmx_rows:
479
480 pushl %ebp
481@@ -579,8 +590,11 @@ _dv_need_dct_248_mmx_rows:
482 paddw %mm5, %mm1
483
484 paddw %mm1, %mm0
485-
486- pmaddwd ALLONE, %mm0
487+
488+ pushl $0x00010001 # these four lines
489+ pushl $0x00010001 # load ALLONE
490+ pmaddwd (%esp), %mm0 # into %mm0 off the stack
491+ addl $8, %esp # --> no TEXTRELs
492 movq %mm0, %mm1
493 psrlq $32, %mm1
494 paddd %mm1, %mm0
495diff -Nurp libdv-0.104-old/libdv/encode_x86_64.S libdv-0.104/libdv/encode_x86_64.S
496--- libdv-0.104-old/libdv/encode_x86_64.S 2006-01-01 22:44:22.000000000 +0100
497+++ libdv-0.104/libdv/encode_x86_64.S 2006-01-01 22:44:43.000000000 +0100
498@@ -30,6 +30,8 @@ VLCADDMASK: .byte 255,0,0,0,255,0,0,0
499 .text
500
501 .global _dv_vlc_encode_block_mmx_x86_64
502+.hidden _dv_vlc_encode_block_mmx_x86_64
503+.type _dv_vlc_encode_block_mmx_x86_64,@function
504 _dv_vlc_encode_block_mmx_x86_64:
505
506 /* extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
507@@ -113,6 +115,8 @@ vlc_encode_block_out:
508 ret
509
510 .global _dv_vlc_num_bits_block_x86_64
511+.hidden _dv_vlc_num_bits_block_x86_64
512+.type _dv_vlc_num_bits_block_x86_64,@function
513 _dv_vlc_num_bits_block_x86_64:
514
515 /* extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); */
516@@ -173,6 +177,8 @@ vlc_num_bits_block_out:
517 ret
518
519 .global _dv_vlc_encode_block_pass_1_x86_64
520+.hidden _dv_vlc_encode_block_pass_1_x86_64
521+.type _dv_vlc_encode_block_pass_1_x86_64,@function
522 _dv_vlc_encode_block_pass_1_x86_64:
523
524 /*
525@@ -251,6 +257,8 @@ vlc_encode_block_pass1_x86_out:
526 ret
527
528 .global _dv_classify_mmx_x86_64
529+.hidden _dv_classify_mmx_x86_64
530+.type _dv_classify_mmx_x86_64,@function
531 _dv_classify_mmx_x86_64:
532
533 /* extern int _dv_classify_mmx_x86_64(dv_coeff_t * a, rdi
534@@ -355,6 +363,8 @@ _dv_classify_mmx_x86_64:
535 don't know why... */
536
537 .global _dv_reorder_block_mmx_x86_64
538+.hidden _dv_reorder_block_mmx_x86_64
539+.type _dv_reorder_block_mmx_x86_64,@function
540 _dv_reorder_block_mmx_x86_64:
541
542 /*extern int _dv_reorder_block_mmx_x86_64(dv_coeff_t * a, rdi
543@@ -469,6 +479,8 @@ reorder_loop:
544 ret
545
546 .global _dv_need_dct_248_mmx_x86_64_rows
547+.hidden _dv_need_dct_248_mmx_x86_64_rows
548+.type _dv_need_dct_248_mmx_x86_64_rows,@function
549 _dv_need_dct_248_mmx_x86_64_rows:
550
551 /* extern int _dv_need_dct_248_mmx_x86_64_rows(dv_coeff_t * bl); rdi */
552diff -Nurp libdv-0.104-old/libdv/idct_block_mmx.S libdv-0.104/libdv/idct_block_mmx.S
553--- libdv-0.104-old/libdv/idct_block_mmx.S 2006-01-01 22:44:22.000000000 +0100
554+++ libdv-0.104/libdv/idct_block_mmx.S 2006-01-01 22:44:43.000000000 +0100
555@@ -8,16 +8,22 @@
556
557
558
559+#include "asm_common.S"
560+
561 .text
562+
563 .align 4
564 .globl _dv_idct_block_mmx
565+.hidden _dv_idct_block_mmx
566 .type _dv_idct_block_mmx,@function
567 _dv_idct_block_mmx:
568 pushl %ebp
569- movl %esp,%ebp
570 pushl %esi
571- leal preSC, %ecx
572- movl 8(%ebp),%esi /* source matrix */
573+
574+ LOAD_PIC_REG_BP()
575+
576+ leal MUNG(preSC), %ecx
577+ movl 12(%esp),%esi /* source matrix */
578
579 /*
580 * column 0: even part
581@@ -35,7 +41,7 @@ _dv_idct_block_mmx:
582 movq %mm1, %mm2 /* added 11/1/96 */
583 pmulhw 8*8(%esi),%mm5 /* V8 */
584 psubsw %mm0, %mm1 /* V16 */
585- pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
586+ pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */
587 paddsw %mm0, %mm2 /* V17 */
588 movq %mm2, %mm0 /* duplicate V17 */
589 psraw $1, %mm2 /* t75=t82 */
590@@ -76,7 +82,7 @@ _dv_idct_block_mmx:
591 paddsw %mm0, %mm3 /* V29 ; free mm0 */
592 movq %mm7, %mm1 /* duplicate V26 */
593 psraw $1, %mm3 /* t91=t94 */
594- pmulhw x539f539f539f539f,%mm7 /* V33 */
595+ pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */
596 psraw $1, %mm1 /* t96 */
597 movq %mm5, %mm0 /* duplicate V2 */
598 psraw $2, %mm4 /* t85=t87 */
599@@ -84,15 +90,15 @@ _dv_idct_block_mmx:
600 psubsw %mm4, %mm0 /* V28 ; free mm4 */
601 movq %mm0, %mm2 /* duplicate V28 */
602 psraw $1, %mm5 /* t90=t93 */
603- pmulhw x4546454645464546,%mm0 /* V35 */
604+ pmulhw MUNG(x4546454645464546),%mm0 /* V35 */
605 psraw $1, %mm2 /* t97 */
606 movq %mm5, %mm4 /* duplicate t90=t93 */
607 psubsw %mm2, %mm1 /* V32 ; free mm2 */
608- pmulhw x61f861f861f861f8,%mm1 /* V36 */
609+ pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */
610 psllw $1, %mm7 /* t107 */
611 paddsw %mm3, %mm5 /* V31 */
612 psubsw %mm3, %mm4 /* V30 ; free mm3 */
613- pmulhw x5a825a825a825a82,%mm4 /* V34 */
614+ pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */
615 nop
616 psubsw %mm1, %mm0 /* V38 */
617 psubsw %mm7, %mm1 /* V37 ; free mm7 */
618@@ -159,7 +165,7 @@ _dv_idct_block_mmx:
619 psubsw %mm7, %mm1 /* V50 */
620 pmulhw 8*9(%esi), %mm5 /* V9 */
621 paddsw %mm7, %mm2 /* V51 */
622- pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
623+ pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */
624 movq %mm2, %mm6 /* duplicate V51 */
625 psraw $1, %mm2 /* t138=t144 */
626 movq %mm3, %mm4 /* duplicate V1 */
627@@ -200,11 +206,11 @@ _dv_idct_block_mmx:
628 * even more by doing the correction step in a later stage when the number
629 * is actually multiplied by 16
630 */
631- paddw x0005000200010001, %mm4
632+ paddw MUNG(x0005000200010001), %mm4
633 psubsw %mm6, %mm3 /* V60 ; free mm6 */
634 psraw $1, %mm0 /* t154=t156 */
635 movq %mm3, %mm1 /* duplicate V60 */
636- pmulhw x539f539f539f539f, %mm1 /* V67 */
637+ pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */
638 movq %mm5, %mm6 /* duplicate V3 */
639 psraw $2, %mm4 /* t148=t150 */
640 paddsw %mm4, %mm5 /* V61 */
641@@ -213,13 +219,13 @@ _dv_idct_block_mmx:
642 psllw $1, %mm1 /* t169 */
643 paddsw %mm0, %mm5 /* V65 -> result */
644 psubsw %mm0, %mm4 /* V64 ; free mm0 */
645- pmulhw x5a825a825a825a82, %mm4 /* V68 */
646+ pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */
647 psraw $1, %mm3 /* t158 */
648 psubsw %mm6, %mm3 /* V66 */
649 movq %mm5, %mm2 /* duplicate V65 */
650- pmulhw x61f861f861f861f8, %mm3 /* V70 */
651+ pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */
652 psllw $1, %mm6 /* t165 */
653- pmulhw x4546454645464546, %mm6 /* V69 */
654+ pmulhw MUNG(x4546454645464546), %mm6 /* V69 */
655 psraw $1, %mm2 /* t172 */
656 /* moved from next block */
657 movq 8*5(%esi), %mm0 /* V56 */
658@@ -344,7 +350,7 @@ _dv_idct_block_mmx:
659 * movq 8*13(%esi), %mm4 tmt13
660 */
661 psubsw %mm4, %mm3 /* V134 */
662- pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
663+ pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */
664 movq 8*9(%esi), %mm6 /* tmt9 */
665 paddsw %mm4, %mm5 /* V135 ; mm4 free */
666 movq %mm0, %mm4 /* duplicate tmt1 */
667@@ -373,17 +379,17 @@ _dv_idct_block_mmx:
668 psubsw %mm7, %mm0 /* V144 */
669 movq %mm0, %mm3 /* duplicate V144 */
670 paddsw %mm7, %mm2 /* V147 ; free mm7 */
671- pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
672+ pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */
673 movq %mm1, %mm7 /* duplicate tmt3 */
674 paddsw %mm5, %mm7 /* V145 */
675 psubsw %mm5, %mm1 /* V146 ; free mm5 */
676 psubsw %mm1, %mm3 /* V150 */
677 movq %mm7, %mm5 /* duplicate V145 */
678- pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
679+ pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */
680 psubsw %mm2, %mm5 /* V148 */
681- pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
682+ pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */
683 psllw $2, %mm0 /* t311 */
684- pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
685+ pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */
686 paddsw %mm2, %mm7 /* V149 ; free mm2 */
687 psllw $1, %mm1 /* t313 */
688 nop /* without the nop - freeze here for one clock */
689@@ -409,7 +415,7 @@ _dv_idct_block_mmx:
690 paddsw %mm3, %mm6 /* V164 ; free mm3 */
691 movq %mm4, %mm3 /* duplicate V142 */
692 psubsw %mm5, %mm4 /* V165 ; free mm5 */
693- movq %mm2, scratch7 /* out7 */
694+ movq %mm2, MUNG(scratch7) /* out7 */
695 psraw $4, %mm6
696 psraw $4, %mm4
697 paddsw %mm5, %mm3 /* V162 */
698@@ -420,11 +426,11 @@ _dv_idct_block_mmx:
699 */
700 movq %mm6, 8*9(%esi) /* out9 */
701 paddsw %mm1, %mm0 /* V161 */
702- movq %mm3, scratch5 /* out5 */
703+ movq %mm3, MUNG(scratch5) /* out5 */
704 psubsw %mm1, %mm5 /* V166 ; free mm1 */
705 movq %mm4, 8*11(%esi) /* out11 */
706 psraw $4, %mm5
707- movq %mm0, scratch3 /* out3 */
708+ movq %mm0, MUNG(scratch3) /* out3 */
709 movq %mm2, %mm4 /* duplicate V140 */
710 movq %mm5, 8*13(%esi) /* out13 */
711 paddsw %mm7, %mm2 /* V160 */
712@@ -434,7 +440,7 @@ _dv_idct_block_mmx:
713 /* moved from the next block */
714 movq 8*3(%esi), %mm7
715 psraw $4, %mm4
716- movq %mm2, scratch1 /* out1 */
717+ movq %mm2, MUNG(scratch1) /* out1 */
718 /* moved from the next block */
719 movq %mm0, %mm1
720 movq %mm4, 8*15(%esi) /* out15 */
721@@ -491,15 +497,15 @@ _dv_idct_block_mmx:
722 paddsw %mm4, %mm3 /* V113 ; free mm4 */
723 movq %mm0, %mm4 /* duplicate V110 */
724 paddsw %mm1, %mm2 /* V111 */
725- pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
726+ pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */
727 psubsw %mm1, %mm5 /* V112 ; free mm1 */
728 psubsw %mm5, %mm4 /* V116 */
729 movq %mm2, %mm1 /* duplicate V111 */
730- pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
731+ pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */
732 psubsw %mm3, %mm2 /* V114 */
733- pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
734+ pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */
735 paddsw %mm3, %mm1 /* V115 ; free mm3 */
736- pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
737+ pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */
738 psllw $2, %mm0 /* t266 */
739 movq %mm1, (%esi) /* save V115 */
740 psllw $1, %mm5 /* t268 */
741@@ -517,7 +523,7 @@ _dv_idct_block_mmx:
742 movq %mm6, %mm3 /* duplicate tmt4 */
743 psubsw %mm0, %mm6 /* V100 */
744 paddsw %mm0, %mm3 /* V101 ; free mm0 */
745- pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
746+ pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */
747 movq %mm7, %mm5 /* duplicate tmt0 */
748 movq 8*8(%esi), %mm1 /* tmt8 */
749 paddsw %mm1, %mm7 /* V103 */
750@@ -551,10 +557,10 @@ _dv_idct_block_mmx:
751 movq 8*2(%esi), %mm3 /* V123 */
752 paddsw %mm4, %mm7 /* out0 */
753 /* moved up from next block */
754- movq scratch3, %mm0
755+ movq MUNG(scratch3), %mm0
756 psraw $4, %mm7
757 /* moved up from next block */
758- movq scratch5, %mm6
759+ movq MUNG(scratch5), %mm6
760 psubsw %mm4, %mm1 /* out14 ; free mm4 */
761 paddsw %mm3, %mm5 /* out2 */
762 psraw $4, %mm1
763@@ -565,7 +571,7 @@ _dv_idct_block_mmx:
764 movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
765 psraw $4, %mm2
766 /* moved up to the prev block */
767- movq scratch7, %mm4
768+ movq MUNG(scratch7), %mm4
769 /* moved up to the prev block */
770 psraw $4, %mm0
771 movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
772@@ -579,7 +585,7 @@ _dv_idct_block_mmx:
773 * psraw $4, %mm0
774 * psraw $4, %mm6
775 */
776- movq scratch1, %mm1
777+ movq MUNG(scratch1), %mm1
778 psraw $4, %mm4
779 movq %mm0, 8*3(%esi) /* out3 */
780 psraw $4, %mm1
781diff -Nurp libdv-0.104-old/libdv/idct_block_mmx_x86_64.S libdv-0.104/libdv/idct_block_mmx_x86_64.S
782--- libdv-0.104-old/libdv/idct_block_mmx_x86_64.S 2006-01-01 22:44:22.000000000 +0100
783+++ libdv-0.104/libdv/idct_block_mmx_x86_64.S 2006-01-01 22:44:43.000000000 +0100
784@@ -17,6 +17,7 @@
785 .text
786 .align 4
787 .globl _dv_idct_block_mmx_x86_64
788+.hidden _dv_idct_block_mmx_x86_64
789 .type _dv_idct_block_mmx_x86_64,@function
790 _dv_idct_block_mmx_x86_64:
791 /* void _dv_idct_88(dv_coeff_t *block) */
792diff -Nurp libdv-0.104-old/libdv/parse.c libdv-0.104/libdv/parse.c
793--- libdv-0.104-old/libdv/parse.c 2004-10-20 05:49:24.000000000 +0200
794+++ libdv-0.104/libdv/parse.c 2006-01-01 22:44:43.000000000 +0100
795@@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
796 exit(0);
797 #endif
798 } /* dv_parse_ac_coeffs */
799+#if defined __GNUC__ && __ELF__
800+# define dv_strong_hidden_alias(name, aliasname) \
801+ extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
802+dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
803+#else
804+int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
805+#endif
806
807 /* ---------------------------------------------------------------------------
808 */
809diff -Nurp libdv-0.104-old/libdv/quant.c libdv-0.104/libdv/quant.c
810--- libdv-0.104-old/libdv/quant.c 2004-10-20 05:49:24.000000000 +0200
811+++ libdv-0.104/libdv/quant.c 2006-01-01 22:44:43.000000000 +0100
812@@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1
813 uint32_t dv_quant_248_mul_tab [2] [22] [64];
814 uint32_t dv_quant_88_mul_tab [2] [22] [64];
815
816-extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
817+extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t dv_quant_offset[],uint8_t dv_quant_shifts[][]);
818 extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
819 static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
820 static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
821@@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
822 _dv_quant_x86_64(block, qno, klass);
823 emms();
824 #else
825- _dv_quant_x86(block, qno, klass);
826+ _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
827 emms();
828 #endif
829 }
830diff -Nurp libdv-0.104-old/libdv/quant.h libdv-0.104/libdv/quant.h
831--- libdv-0.104-old/libdv/quant.h 2004-10-20 05:49:24.000000000 +0200
832+++ libdv-0.104/libdv/quant.h 2006-01-01 22:44:43.000000000 +0100
833@@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
834 extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
835 extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
836 dv_248_coeff_t *co);
837-extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
838+extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t offset[], uint8_t shifts[][]);
839 extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
840 extern void dv_quant_init (void);
841 #ifdef __cplusplus
842diff -Nurp libdv-0.104-old/libdv/quant_x86.S libdv-0.104/libdv/quant_x86.S
843--- libdv-0.104-old/libdv/quant_x86.S 2006-01-01 22:44:22.000000000 +0100
844+++ libdv-0.104/libdv/quant_x86.S 2006-01-01 22:44:43.000000000 +0100
845@@ -55,6 +55,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl
846 .text
847 .align 4
848 .globl _dv_quant_88_inverse_x86
849+.hidden _dv_quant_88_inverse_x86
850+.type _dv_quant_88_inverse_x86,@function
851 _dv_quant_88_inverse_x86:
852 pushl %ebx
853 pushl %esi
854@@ -71,10 +73,13 @@ _dv_quant_88_inverse_x86:
855
856 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
857 movl ARGn(1),%eax /* qno */
858+ movl ARGn(3),%ebx /* dv_quant_offset */
859+ addl ARGn(2),%ebx /* class */
860+ movzbl (%ebx),%ecx
861 movl ARGn(2),%ebx /* class */
862- movzbl dv_quant_offset(%ebx),%ecx
863 addl %ecx,%eax
864- leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
865+ movl ARGn(4),%edx /* dv_quant_shifts */
866+ leal (%edx,%eax,4),%edx /* edx is pq */
867
868 /* extra = (class == 3); */
869 /* 0 1 2 3 */
870@@ -193,6 +198,8 @@ _dv_quant_88_inverse_x86:
871
872 .align 4
873 .globl _dv_quant_x86
874+.hidden _dv_quant_x86
875+.type _dv_quant_x86,@function
876 _dv_quant_x86:
877 pushl %ebx
878 pushl %ecx
879@@ -212,11 +219,13 @@ _dv_quant_x86:
880
881 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
882 movl ARGn(1),%eax /* qno */
883+ movl ARGn(3),%ebx /* offset */
884+ addl ARGn(2),%ebx /* class */
885+ movzbl (%ebx),%ecx
886 movl ARGn(2),%ebx /* class */
887-
888- movzbl dv_quant_offset(%ebx),%ecx
889+ movl ARGn(4),%edx /* shifts */
890 addl %ecx,%eax
891- leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
892+ leal (%edx,%eax,4),%edx /* edx is pq */
893
894 /* extra = (class == 3); */
895 /* 0 1 2 3 */
896diff -Nurp libdv-0.104-old/libdv/quant_x86_64.S libdv-0.104/libdv/quant_x86_64.S
897--- libdv-0.104-old/libdv/quant_x86_64.S 2006-01-01 22:44:22.000000000 +0100
898+++ libdv-0.104/libdv/quant_x86_64.S 2006-01-01 22:44:43.000000000 +0100
899@@ -55,6 +55,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl
900 .text
901 .align 4
902 .globl _dv_quant_88_inverse_x86_64
903+.hidden _dv_quant_88_inverse_x86_64
904+.type _dv_quant_88_inverse_x86_64,@function
905 _dv_quant_88_inverse_x86_64:
906
907 /* Args are at block=rdi, qno=rsi, class=rdx */
908@@ -195,6 +197,8 @@ _dv_quant_88_inverse_x86_64:
909
910 .align 4
911 .globl _dv_quant_x86_64
912+.hidden _dv_quant_x86_64
913+.type _dv_quant_x86_64,@function
914 _dv_quant_x86_64:
915
916 /* Args are at block=rdi, qno=rsi, class=rdx */
917diff -Nurp libdv-0.104-old/libdv/rgbtoyuv.S libdv-0.104/libdv/rgbtoyuv.S
918--- libdv-0.104-old/libdv/rgbtoyuv.S 2006-01-01 22:44:22.000000000 +0100
919+++ libdv-0.104/libdv/rgbtoyuv.S 2006-01-01 22:44:43.000000000 +0100
920@@ -41,9 +41,6 @@
921 #define DV_WIDTH_SHORT_HALF 720
922 #define DV_WIDTH_BYTE_HALF 360
923
924-.global _dv_rgbtoycb_mmx
925-# .global yuvtoycb_mmx
926-
927 .data
928
929 .align 8
930@@ -110,20 +107,24 @@ VR0GR: .long 0,0
931 VBG0B: .long 0,0
932
933 #endif
934-
935+
936+#include "asm_common.S"
937+
938 .text
939
940-#define _inPtr 8
941-#define _rows 12
942-#define _columns 16
943-#define _outyPtr 20
944-#define _outuPtr 24
945-#define _outvPtr 28
946+#define _inPtr 24+8
947+#define _rows 24+12
948+#define _columns 24+16
949+#define _outyPtr 24+20
950+#define _outuPtr 24+24
951+#define _outvPtr 24+28
952
953+.global _dv_rgbtoycb_mmx
954+.hidden _dv_rgbtoycb_mmx
955+.type _dv_rgbtoycb_mmx,@function
956 _dv_rgbtoycb_mmx:
957
958 pushl %ebp
959- movl %esp, %ebp
960 pushl %eax
961 pushl %ebx
962 pushl %ecx
963@@ -131,46 +132,47 @@ _dv_rgbtoycb_mmx:
964 pushl %esi
965 pushl %edi
966
967- leal ZEROSX, %eax #This section gets around a bug
968+ LOAD_PIC_REG_BP()
969+
970+ leal MUNG(ZEROSX), %eax #This section gets around a bug
971 movq (%eax), %mm0 #unlikely to persist
972- movq %mm0, ZEROS
973- leal OFFSETDX, %eax
974+ movq %mm0, MUNG(ZEROS)
975+ leal MUNG(OFFSETDX), %eax
976 movq (%eax), %mm0
977- movq %mm0, OFFSETD
978- leal OFFSETWX, %eax
979+ movq %mm0, MUNG(OFFSETD)
980+ leal MUNG(OFFSETWX), %eax
981 movq (%eax), %mm0
982- movq %mm0, OFFSETW
983- leal OFFSETBX, %eax
984+ movq %mm0, MUNG(OFFSETW)
985+ leal MUNG(OFFSETBX), %eax
986 movq (%eax), %mm0
987- movq %mm0, OFFSETB
988- leal YR0GRX, %eax
989+ movq %mm0, MUNG(OFFSETB)
990+ leal MUNG(YR0GRX), %eax
991 movq (%eax), %mm0
992- movq %mm0, YR0GR
993- leal YBG0BX, %eax
994+ movq %mm0, MUNG(YR0GR)
995+ leal MUNG(YBG0BX), %eax
996 movq (%eax), %mm0
997- movq %mm0, YBG0B
998- leal UR0GRX, %eax
999+ movq %mm0, MUNG(YBG0B)
1000+ leal MUNG(UR0GRX), %eax
1001 movq (%eax), %mm0
1002- movq %mm0, UR0GR
1003- leal UBG0BX, %eax
1004+ movq %mm0, MUNG(UR0GR)
1005+ leal MUNG(UBG0BX), %eax
1006 movq (%eax), %mm0
1007- movq %mm0, UBG0B
1008- leal VR0GRX, %eax
1009+ movq %mm0, MUNG(UBG0B)
1010+ leal MUNG(VR0GRX), %eax
1011 movq (%eax), %mm0
1012- movq %mm0, VR0GR
1013- leal VBG0BX, %eax
1014+ movq %mm0, MUNG(VR0GR)
1015+ leal MUNG(VBG0BX), %eax
1016 movq (%eax), %mm0
1017- movq %mm0, VBG0B
1018-
1019- movl _rows(%ebp), %eax
1020- movl _columns(%ebp), %ebx
1021+ movq %mm0, MUNG(VBG0B)
1022+ movl _rows(%esp), %eax
1023+ movl _columns(%esp), %ebx
1024 mull %ebx #number pixels
1025 shrl $3, %eax #number of loops
1026 movl %eax, %edi #loop counter in edi
1027- movl _inPtr(%ebp), %eax
1028- movl _outyPtr(%ebp), %ebx
1029- movl _outuPtr(%ebp), %ecx
1030- movl _outvPtr(%ebp), %edx
1031+ movl _inPtr(%esp), %eax
1032+ movl _outyPtr(%esp), %ebx
1033+ movl _outuPtr(%esp), %ecx
1034+ movl _outvPtr(%esp), %edx
1035 rgbtoycb_mmx_loop:
1036 movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0
1037 pxor %mm6, %mm6 #0 -> mm6
1038@@ -184,29 +186,29 @@ rgbtoycb_mmx_loop:
1039 punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1
1040 movq %mm0, %mm2 #R1B0G0R0 -> mm2
1041
1042- pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0
1043+ pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0
1044 movq %mm1, %mm3 #B1G1R1B0 -> mm3
1045
1046- pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1
1047+ pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1
1048 movq %mm2, %mm4 #R1B0G0R0 -> mm4
1049
1050- pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2
1051+ pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2
1052 movq %mm3, %mm5 #B1G1R1B0 -> mm5
1053
1054- pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3
1055+ pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3
1056 punpckhbw %mm6, %mm7 # 00G2R2 -> mm7
1057
1058- pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4
1059+ pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4
1060 paddd %mm1, %mm0 #Y1Y0 -> mm0
1061
1062- pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5
1063+ pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5
1064
1065 movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1
1066 paddd %mm3, %mm2 #U1U0 -> mm2
1067
1068 movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6
1069
1070- punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1
1071+ punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1
1072 paddd %mm5, %mm4 #V1V0 -> mm4
1073
1074 movq %mm1, %mm5 #B3G3R3B2 -> mm5
1075@@ -214,29 +216,29 @@ rgbtoycb_mmx_loop:
1076
1077 paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1
1078
1079- punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6
1080+ punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6
1081 movq %mm1, %mm3 #R3B2G2R2 -> mm3
1082
1083- pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1
1084+ pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1
1085 movq %mm5, %mm7 #B3G3R3B2 -> mm7
1086
1087- pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5
1088+ pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5
1089 psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0
1090
1091- movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0
1092+ movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0
1093 movq %mm3, %mm6 #R3B2G2R2 -> mm6
1094- pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6
1095+ pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6
1096 psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2
1097
1098 paddd %mm5, %mm1 #Y3Y2 -> mm1
1099 movq %mm7, %mm5 #B3G3R3B2 -> mm5
1100- pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2
1101+ pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2
1102 psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
1103
1104- pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2
1105+ pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2
1106 packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0
1107
1108- pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5
1109+ pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5
1110 psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4
1111
1112 movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7
1113@@ -251,58 +253,58 @@ rgbtoycb_mmx_loop:
1114 movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5
1115 psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3
1116
1117- paddw OFFSETY, %mm0
1118+ paddw MUNG(OFFSETY), %mm0
1119 movq %mm0, (%ebx) #store Y3Y2Y1Y0
1120 packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2
1121
1122- movq TEMP0, %mm0 #R5B4G4R4 -> mm0
1123+ movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0
1124 addl $8, %ebx
1125-
1126- punpcklbw ZEROS, %mm7 #B5G500 -> mm7
1127+
1128+ punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7
1129 movq %mm0, %mm6 #R5B4G4R4 -> mm6
1130
1131- movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU
1132+ movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU
1133 psrlq $32, %mm0 #00R5B4 -> mm0
1134
1135 paddw %mm0, %mm7 #B5G5R5B4 -> mm7
1136 movq %mm6, %mm2 #B5B4G4R4 -> mm2
1137
1138- pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2
1139+ pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2
1140 movq %mm7, %mm0 #B5G5R5B4 -> mm0
1141
1142- pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7
1143+ pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7
1144 packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4
1145
1146 addl $24, %eax #increment RGB count
1147
1148- movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4
1149+ movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4
1150 movq %mm6, %mm4 #B5B4G4R4 -> mm4
1151
1152- pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4
1153+ pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4
1154 movq %mm0, %mm3 #B5G5R5B4 -> mm0
1155
1156- pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4
1157+ pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4
1158 paddd %mm7, %mm2 #Y5Y4 -> mm2
1159
1160- pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4
1161+ pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4
1162 pxor %mm7, %mm7 #0 -> mm7
1163
1164- pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3
1165+ pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3
1166 punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1
1167
1168 paddd %mm6, %mm0 #U5U4 -> mm0
1169 movq %mm1, %mm6 #B7G7R7B6 -> mm6
1170
1171- pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6
1172+ pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6
1173 punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5
1174
1175 movq %mm5, %mm7 #R7B6G6R6 -> mm7
1176 paddd %mm4, %mm3 #V5V4 -> mm3
1177
1178- pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5
1179+ pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5
1180 movq %mm1, %mm4 #B7G7R7B6 -> mm4
1181
1182- pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4
1183+ pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4
1184 psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0
1185
1186 psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2
1187@@ -310,25 +312,25 @@ rgbtoycb_mmx_loop:
1188 paddd %mm5, %mm6 #Y7Y6 -> mm6
1189 movq %mm7, %mm5 #R7B6G6R6 -> mm5
1190
1191- pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7
1192+ pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7
1193 psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3
1194
1195- pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1
1196+ pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1
1197 psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
1198
1199 packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2
1200
1201- pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5
1202+ pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5
1203 paddd %mm4, %mm7 #U7U6 -> mm7
1204
1205 psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7
1206- paddw OFFSETY, %mm2
1207+ paddw MUNG(OFFSETY), %mm2
1208 movq %mm2, (%ebx) #store Y7Y6Y5Y4
1209
1210- movq ALLONE, %mm6
1211+ movq MUNG(ALLONE), %mm6
1212 packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0
1213
1214- movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4
1215+ movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4
1216 pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
1217
1218 pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
1219@@ -338,8 +340,8 @@ rgbtoycb_mmx_loop:
1220
1221 psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1
1222 psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4
1223-
1224- movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5
1225+
1226+ movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5
1227
1228 movq %mm4, (%ecx) # store U
1229
1230@@ -372,6 +374,8 @@ rgbtoycb_mmx_loop:
1231 ret
1232
1233 .global _dv_ppm_copy_y_block_mmx
1234+.hidden _dv_ppm_copy_y_block_mmx
1235+.type _dv_ppm_copy_y_block_mmx,@function
1236 _dv_ppm_copy_y_block_mmx:
1237
1238 pushl %ebp
1239@@ -422,17 +426,20 @@ _dv_ppm_copy_y_block_mmx:
1240 ret
1241
1242 .global _dv_pgm_copy_y_block_mmx
1243+.hidden _dv_pgm_copy_y_block_mmx
1244+.type _dv_ppm_copy_y_block_mmx,@function
1245 _dv_pgm_copy_y_block_mmx:
1246
1247 pushl %ebp
1248- movl %esp, %ebp
1249 pushl %esi
1250 pushl %edi
1251-
1252- movl 8(%ebp), %edi # dest
1253- movl 12(%ebp), %esi # src
1254
1255- movq OFFSETY, %mm7
1256+ LOAD_PIC_REG_BP()
1257+
1258+ movl 16(%esp), %edi # dest
1259+ movl 20(%esp), %esi # src
1260+
1261+ movq MUNG(OFFSETY), %mm7
1262 pxor %mm6, %mm6
1263
1264 movq (%esi), %mm0
1265@@ -564,17 +571,20 @@ _dv_pgm_copy_y_block_mmx:
1266 ret
1267
1268 .global _dv_video_copy_y_block_mmx
1269+.hidden _dv_video_copy_y_block_mmx
1270+.type _dv_video_copy_y_block_mmx,@function
1271 _dv_video_copy_y_block_mmx:
1272
1273 pushl %ebp
1274- movl %esp, %ebp
1275 pushl %esi
1276 pushl %edi
1277-
1278- movl 8(%ebp), %edi # dest
1279- movl 12(%ebp), %esi # src
1280
1281- movq OFFSETBX, %mm7
1282+ LOAD_PIC_REG_BP()
1283+
1284+ movl 16(%esp), %edi # dest
1285+ movl 20(%esp), %esi # src
1286+
1287+ movq MUNG(OFFSETBX), %mm7
1288 pxor %mm6, %mm6
1289
1290 movq (%esi), %mm0
1291@@ -709,6 +719,8 @@ _dv_video_copy_y_block_mmx:
1292
1293
1294 .global _dv_ppm_copy_pal_c_block_mmx
1295+.hidden _dv_ppm_copy_pal_c_block_mmx
1296+.type _dv_ppm_copy_pal_c_block_mmx,@function
1297 _dv_ppm_copy_pal_c_block_mmx:
1298
1299 pushl %ebp
1300@@ -852,19 +864,21 @@ _dv_ppm_copy_pal_c_block_mmx:
1301 ret
1302
1303 .global _dv_pgm_copy_pal_c_block_mmx
1304+.hidden _dv_ppm_copy_pal_c_block_mmx
1305+.type _dv_pgm_copy_pal_c_block_mmx,@function
1306 _dv_pgm_copy_pal_c_block_mmx:
1307
1308 pushl %ebp
1309- movl %esp, %ebp
1310 pushl %esi
1311 pushl %edi
1312 pushl %ebx
1313-
1314- movl 8(%ebp), %edi # dest
1315- movl 12(%ebp), %esi # src
1316
1317+ LOAD_PIC_REG_BP()
1318+
1319+ movl 20(%esp), %edi # dest
1320+ movl 24(%esp), %esi # src
1321
1322- movq OFFSETBX, %mm7
1323+ movq MUNG(OFFSETBX), %mm7
1324 pxor %mm6, %mm6
1325
1326
1327@@ -1000,18 +1014,21 @@ _dv_pgm_copy_pal_c_block_mmx:
1328 ret
1329
1330 .global _dv_video_copy_pal_c_block_mmx
1331+.hidden _dv_video_copy_pal_c_block_mmx
1332+.type _dv_video_copy_pal_c_block_mmx,@function
1333 _dv_video_copy_pal_c_block_mmx:
1334
1335 pushl %ebp
1336- movl %esp, %ebp
1337 pushl %esi
1338 pushl %edi
1339 pushl %ebx
1340-
1341- movl 8(%ebp), %edi # dest
1342- movl 12(%ebp), %esi # src
1343
1344- movq OFFSETBX, %mm7
1345+ LOAD_PIC_REG_BP()
1346+
1347+ movl 20(%esp), %edi # dest
1348+ movl 24(%esp), %esi # src
1349+
1350+ movq MUNG(OFFSETBX), %mm7
1351 paddw %mm7, %mm7
1352 pxor %mm6, %mm6
1353
1354@@ -1095,21 +1112,23 @@ video_copy_pal_c_block_mmx_loop:
1355 ret
1356
1357 .global _dv_ppm_copy_ntsc_c_block_mmx
1358+.hidden _dv_ppm_copy_ntsc_c_block_mmx
1359+.type _dv_ppm_copy_ntsc_c_block_mmx,@function
1360 _dv_ppm_copy_ntsc_c_block_mmx:
1361
1362 pushl %ebp
1363- movl %esp, %ebp
1364 pushl %esi
1365 pushl %edi
1366 pushl %ebx
1367-
1368- movl 8(%ebp), %edi # dest
1369- movl 12(%ebp), %esi # src
1370+
1371+ LOAD_PIC_REG_BP()
1372+
1373+ movl 20(%esp), %edi # dest
1374+ movl 24(%esp), %esi # src
1375
1376 movl $4, %ebx
1377
1378- movq ALLONE, %mm6
1379-
1380+ movq MUNG(ALLONE), %mm6
1381 ppm_copy_ntsc_c_block_mmx_loop:
1382
1383 movq (%esi), %mm0
1384@@ -1168,17 +1187,20 @@ ppm_copy_ntsc_c_block_mmx_loop:
1385 ret
1386
1387 .global _dv_pgm_copy_ntsc_c_block_mmx
1388+.hidden _dv_pgm_copy_ntsc_c_block_mmx
1389+.type _dv_pgm_copy_ntsc_c_block_mmx,@function
1390 _dv_pgm_copy_ntsc_c_block_mmx:
1391
1392 pushl %ebp
1393- movl %esp, %ebp
1394 pushl %esi
1395 pushl %edi
1396-
1397- movl 8(%ebp), %edi # dest
1398- movl 12(%ebp), %esi # src
1399
1400- movq OFFSETBX, %mm7
1401+ LOAD_PIC_REG_BP()
1402+
1403+ movl 16(%esp), %edi # dest
1404+ movl 20(%esp), %esi # src
1405+
1406+ movq MUNG(OFFSETBX), %mm7
1407 paddw %mm7, %mm7
1408 pxor %mm6, %mm6
1409
1410@@ -1325,18 +1347,21 @@ _dv_pgm_copy_ntsc_c_block_mmx:
1411 ret
1412
1413 .global _dv_video_copy_ntsc_c_block_mmx
1414+.hidden _dv_video_copy_ntsc_c_block_mmx
1415+.type _dv_video_copy_ntsc_c_block_mmx,@function
1416 _dv_video_copy_ntsc_c_block_mmx:
1417
1418 pushl %ebp
1419- movl %esp, %ebp
1420 pushl %esi
1421 pushl %edi
1422 pushl %ebx
1423-
1424- movl 8(%ebp), %edi # dest
1425- movl 12(%ebp), %esi # src
1426
1427- movq OFFSETBX, %mm7
1428+ LOAD_PIC_REG_BP()
1429+
1430+ movl 20(%esp), %edi # dest
1431+ movl 24(%esp), %esi # src
1432+
1433+ movq MUNG(OFFSETBX), %mm7
1434 paddw %mm7, %mm7
1435 pxor %mm6, %mm6
1436
1437diff -Nurp libdv-0.104-old/libdv/rgbtoyuv_x86_64.S libdv-0.104/libdv/rgbtoyuv_x86_64.S
1438--- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S 2006-01-01 22:44:22.000000000 +0100
1439+++ libdv-0.104/libdv/rgbtoyuv_x86_64.S 2006-01-01 22:44:43.000000000 +0100
1440@@ -41,9 +41,6 @@
1441 #define DV_WIDTH_SHORT_HALF 720
1442 #define DV_WIDTH_BYTE_HALF 360
1443
1444-.global _dv_rgbtoycb_mmx_x86_64
1445-# .global yuvtoycb_mmx_x86_64
1446-
1447 .data
1448
1449 .align 8
1450diff -Nurp libdv-0.104-old/libdv/transpose_x86.S libdv-0.104/libdv/transpose_x86.S
1451--- libdv-0.104-old/libdv/transpose_x86.S 2006-01-01 22:44:22.000000000 +0100
1452+++ libdv-0.104/libdv/transpose_x86.S 2006-01-01 22:44:43.000000000 +0100
1453@@ -1,5 +1,7 @@
1454 .text
1455 .global _dv_transpose_mmx
1456+.hidden _dv_transpose_mmx
1457+.type _dv_transpose_mmx,@function
1458
1459 _dv_transpose_mmx:
1460 pushl %ebp
1461diff -Nurp libdv-0.104-old/libdv/transpose_x86_64.S libdv-0.104/libdv/transpose_x86_64.S
1462--- libdv-0.104-old/libdv/transpose_x86_64.S 2006-01-01 22:44:22.000000000 +0100
1463+++ libdv-0.104/libdv/transpose_x86_64.S 2006-01-01 22:44:43.000000000 +0100
1464@@ -1,5 +1,7 @@
1465 .text
1466 .global _dv_transpose_mmx_x86_64
1467+.hidden _dv_transpose_mmx_x86_64
1468+.type _dv_transpose_mmx_x86_64,@function
1469
1470 _dv_transpose_mmx_x86_64:
1471
1472diff -Nurp libdv-0.104-old/libdv/vlc_x86.S libdv-0.104/libdv/vlc_x86.S
1473--- libdv-0.104-old/libdv/vlc_x86.S 2006-01-01 22:44:22.000000000 +0100
1474+++ libdv-0.104/libdv/vlc_x86.S 2006-01-01 22:45:51.000000000 +0100
1475@@ -1,29 +1,38 @@
1476 #include "asmoff.h"
1477+ #include "asm_common.S"
1478+
1479 .text
1480 .align 4
1481 .globl dv_decode_vlc
1482+.globl asm_dv_decode_vlc
1483+.hidden asm_dv_decode_vlc
1484+asm_dv_decode_vlc = dv_decode_vlc
1485+
1486 .type dv_decode_vlc,@function
1487 dv_decode_vlc:
1488 pushl %ebx
1489+ pushl %ebp
1490
1491- /* Args are at 8(%esp). */
1492- movl 8(%esp),%eax /* %eax is bits */
1493- movl 12(%esp),%ebx /* %ebx is maxbits */
1494+ LOAD_PIC_REG_BP()
1495+
1496+ /* Args are at 12(%esp). */
1497+ movl 12(%esp),%eax /* %eax is bits */
1498+ movl 16(%esp),%ebx /* %ebx is maxbits */
1499 andl $0x3f,%ebx /* limit index range STL*/
1500
1501- movl dv_vlc_class_index_mask(,%ebx,4),%edx
1502+ movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
1503 andl %eax,%edx
1504- movl dv_vlc_class_index_rshift(,%ebx,4),%ecx
1505+ movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
1506 sarl %cl,%edx
1507- movl dv_vlc_classes(,%ebx,4),%ecx
1508+ movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
1509 movsbl (%ecx,%edx,1),%edx /* %edx is class */
1510
1511- movl dv_vlc_index_mask(,%edx,4),%ebx
1512- movl dv_vlc_index_rshift(,%edx,4),%ecx
1513+ movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
1514+ movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
1515 andl %eax,%ebx
1516 sarl %cl,%ebx
1517
1518- movl dv_vlc_lookups(,%edx,4),%edx
1519+ movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
1520 movl (%edx,%ebx,4),%edx
1521
1522 /* Now %edx holds result, like this:
1523@@ -42,7 +51,7 @@ dv_decode_vlc:
1524 movl %edx,%ecx
1525 sarl $8,%ecx
1526 andl $0xff,%ecx
1527- movl sign_mask(,%ecx,4),%ebx
1528+ movl MUNG_ARR(sign_mask,%ecx,4),%ebx
1529 andl %ebx,%eax
1530 negl %eax
1531 sarl $31,%eax
1532@@ -63,14 +72,14 @@ dv_decode_vlc:
1533 *result = broken;
1534 Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
1535 */
1536- movl 12(%esp),%ebx /* %ebx is maxbits */
1537+ movl 20(%esp),%ebx /* %ebx is maxbits */
1538 subl %ecx,%ebx
1539 sbbl %ebx,%ebx
1540 orl %ebx,%edx
1541
1542- movl 16(%esp),%eax
1543+ movl 24(%esp),%eax
1544 movl %edx,(%eax)
1545-
1546+ popl %ebp
1547 popl %ebx
1548 ret
1549
1550@@ -80,21 +89,28 @@ dv_decode_vlc:
1551 .type __dv_decode_vlc,@function
1552 __dv_decode_vlc:
1553 pushl %ebx
1554+ pushl %ebp
1555+
1556+ LOAD_PIC_REG_BP()
1557
1558- /* Args are at 8(%esp). */
1559- movl 8(%esp),%eax /* %eax is bits */
1560+ /* Args are at 12(%esp). */
1561+ movl 12(%esp),%eax /* %eax is bits */
1562
1563 movl %eax,%edx /* %edx is class */
1564 andl $0xfe00,%edx
1565 sarl $9,%edx
1566+#ifdef __PIC__
1567+ movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
1568+#else
1569 movsbl dv_vlc_class_lookup5(%edx),%edx
1570-
1571- movl dv_vlc_index_mask(,%edx,4),%ebx
1572- movl dv_vlc_index_rshift(,%edx,4),%ecx
1573+#endif
1574+
1575+ movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
1576+ movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
1577 andl %eax,%ebx
1578 sarl %cl,%ebx
1579
1580- movl dv_vlc_lookups(,%edx,4),%edx
1581+ movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
1582 movl (%edx,%ebx,4),%edx
1583
1584 /* Now %edx holds result, like this:
1585@@ -112,7 +128,7 @@ __dv_decode_vlc:
1586 movl %edx,%ecx
1587 sarl $8,%ecx
1588 andl $0xff,%ecx
1589- movl sign_mask(,%ecx,4),%ecx
1590+ movl MUNG_ARR(sign_mask,%ecx,4),%ecx
1591 andl %ecx,%eax
1592 negl %eax
1593 sarl $31,%eax
1594@@ -127,9 +143,9 @@ __dv_decode_vlc:
1595 xorl %eax,%edx
1596 subl %eax,%edx
1597
1598- movl 12(%esp),%eax
1599+ movl 16(%esp),%eax
1600 movl %edx,(%eax)
1601-
1602+ popl %ebp
1603 popl %ebx
1604 ret
1605
1606@@ -140,13 +156,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
1607 */
1608 .text
1609 .align 4
1610+.globl asm_dv_parse_ac_coeffs_pass0
1611+.hidden asm_dv_parse_ac_coeffs_pass0
1612+ asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
1613+
1614 .globl dv_parse_ac_coeffs_pass0
1615+.type dv_parse_ac_coeffs_pass0,@function
1616 dv_parse_ac_coeffs_pass0:
1617 pushl %ebx
1618 pushl %edi
1619 pushl %esi
1620 pushl %ebp
1621
1622+ LOAD_PIC_REG_SI()
1623+
1624 #define ARGn(N) (20+(4*(N)))(%esp)
1625
1626 /*
1627@@ -159,8 +182,10 @@ dv_parse_ac_coeffs_pass0:
1628 ebp bl
1629 */
1630 movl ARGn(2),%ebp
1631+#ifndef __PIC__
1632 movl ARGn(0),%esi
1633 movl bitstream_t_buf(%esi),%esi
1634+#endif
1635 movl dv_block_t_offset(%ebp),%edi
1636 movl dv_block_t_reorder(%ebp),%ebx
1637
1638@@ -170,7 +195,11 @@ dv_parse_ac_coeffs_pass0:
1639
1640 movq dv_block_t_coeffs(%ebp),%mm1
1641 pxor %mm0,%mm0
1642+#ifdef __PIC__
1643+ pand const_f_0_0_0@GOTOFF(%esi),%mm1
1644+#else
1645 pand const_f_0_0_0,%mm1
1646+#endif
1647 movq %mm1,dv_block_t_coeffs(%ebp)
1648 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1649 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1650@@ -191,9 +220,17 @@ dv_parse_ac_coeffs_pass0:
1651 readloop:
1652 movl %edi,%ecx
1653 shrl $3,%ecx
1654+#ifdef __PIC__
1655+ pushl %esi
1656+ movl ARGn(1),%esi
1657+ movl bitstream_t_buf(%esi),%esi
1658+#endif
1659 movzbl (%esi,%ecx,1),%eax
1660 movzbl 1(%esi,%ecx,1),%edx
1661 movzbl 2(%esi,%ecx,1),%ecx
1662+#ifdef __PIC__
1663+ popl %esi
1664+#endif
1665 shll $16,%eax
1666 shll $8,%edx
1667 orl %ecx,%eax
1668@@ -217,7 +254,11 @@ readloop:
1669
1670 /* Attempt to use the shortcut first. If it hits, then
1671 this vlc term has been decoded. */
1672+#ifdef __PIC__
1673+ movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
1674+#else
1675 movl dv_vlc_class1_shortcut(,%ecx,4),%edx
1676+#endif
1677 test $0x80,%edx
1678 je done_decode
1679
1680@@ -228,12 +269,19 @@ readloop:
1681 movl %ebx,dv_block_t_reorder(%ebp)
1682
1683 /* %eax is bits */
1684-
1685+#ifdef __PIC__
1686+ movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
1687+
1688+ movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
1689+ movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
1690+ movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
1691+#else
1692 movsbl dv_vlc_class_lookup5(%ecx),%ecx
1693
1694 movl dv_vlc_index_mask(,%ecx,4),%ebx
1695 movl dv_vlc_lookups(,%ecx,4),%edx
1696 movl dv_vlc_index_rshift(,%ecx,4),%ecx
1697+#endif
1698 andl %eax,%ebx
1699 sarl %cl,%ebx
1700
1701@@ -256,7 +304,11 @@ readloop:
1702 movl %edx,%ecx
1703 sarl $8,%ecx
1704 andl $0xff,%ecx
1705+#ifdef __PIC__
1706+ movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx
1707+#else
1708 movl sign_mask(,%ecx,4),%ecx
1709+#endif
1710 andl %ecx,%eax
1711 negl %eax
1712 sarl $31,%eax
1713@@ -326,10 +378,16 @@ alldone:
1714
1715 slowpath:
1716 /* slow path: use dv_decode_vlc */;
1717+#ifdef __PIC__
1718+ pushl %esi
1719+ leal vlc@GOTOFF(%esi),%esi
1720+ xchgl %esi,(%esp) /* last parameter is &vlc */
1721+#else
1722 pushl $vlc /* last parameter is &vlc */
1723+#endif
1724 pushl %edx /* bits_left */
1725 pushl %eax /* bits */
1726- call dv_decode_vlc
1727+ call asm_dv_decode_vlc
1728 addl $12,%esp
1729 test $0x80,%edx /* If (vlc.run < 0) break */
1730 jne escape
1731@@ -359,12 +417,15 @@ show16:
1732 gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
1733 */
1734 .globl dv_parse_video_segment
1735+ .type dv_parse_video_segment,@function
1736 dv_parse_video_segment:
1737 pushl %ebx
1738 pushl %edi
1739 pushl %esi
1740 pushl %ebp
1741
1742+ LOAD_PIC_REG_SI()
1743+
1744 #define ARGn(N) (20+(4*(N)))(%esp)
1745
1746 movl ARGn(1),%eax /* quality */
1747@@ -373,7 +434,11 @@ dv_parse_video_segment:
1748 jz its_mono
1749 movl $6,%ebx
1750 its_mono:
1751+#ifdef __PIC__
1752+ movl %ebx,n_blocks@GOTOFF(%esi)
1753+#else
1754 movl %ebx,n_blocks
1755+#endif
1756
1757 /*
1758 * ebx seg/b
1759@@ -384,15 +449,22 @@ its_mono:
1760 * ebp bl
1761 */
1762 movl ARGn(0),%ebx
1763+#ifndef __PIC__
1764 movl dv_videosegment_t_bs(%ebx),%esi
1765 movl bitstream_t_buf(%esi),%esi
1766+#endif
1767 leal dv_videosegment_t_mb(%ebx),%edi
1768
1769 movl $0,%eax
1770 movl $0,%ecx
1771 macloop:
1772+#ifdef __PIC__
1773+ movl %eax,m@GOTOFF(%esi)
1774+ movl %ecx,mb_start@GOTOFF(%esi)
1775+#else
1776 movl %eax,m
1777 movl %ecx,mb_start
1778+#endif
1779
1780 movl ARGn(0),%ebx
1781
1782@@ -400,7 +472,15 @@ macloop:
1783 /* mb->qno = bitstream_get(bs,4); */
1784 movl %ecx,%edx
1785 shr $3,%edx
1786+#ifdef __PIC__
1787+ pushl %esi
1788+ movl dv_videosegment_t_bs(%ebx),%esi
1789+ movl bitstream_t_buf(%esi),%esi
1790+#endif
1791 movzbl 3(%esi,%edx,1),%edx
1792+#ifdef __PIC__
1793+ popl %esi
1794+#endif
1795 andl $0xf,%edx
1796 movl %edx,dv_macroblock_t_qno(%edi)
1797
1798@@ -411,7 +491,11 @@ macloop:
1799 movl %edx,dv_macroblock_t_eob_count(%edi)
1800
1801 /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
1802+#ifdef __PIC__
1803+ movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
1804+#else
1805 movl dv_super_map_vertical(,%eax,4),%edx
1806+#endif
1807 movl dv_videosegment_t_i(%ebx),%ecx
1808 addl %ecx,%edx
1809
1810@@ -422,11 +506,20 @@ skarly:
1811 andl $1,%ecx
1812 shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */
1813
1814+#ifdef __PIC__
1815+ leal mod_10@GOTOFF(%esi,%edx),%edx
1816+ movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1817+#else
1818 movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1819+#endif
1820 movl %edx,dv_macroblock_t_i(%edi)
1821
1822 /* mb->j = dv_super_map_horizontal[m]; */
1823+#ifdef __PIC__
1824+ movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
1825+#else
1826 movl dv_super_map_horizontal(,%eax,4),%edx
1827+#endif
1828 movl %edx,dv_macroblock_t_j(%edi)
1829
1830 /* mb->k = seg->k; */
1831@@ -445,12 +538,29 @@ blkloop:
1832 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
1833 */
1834 /* dc = bitstream_get(bs,9); */
1835+#ifdef __PIC__
1836+ movl mb_start@GOTOFF(%esi),%ecx
1837+#else
1838 movl mb_start,%ecx
1839+#endif
1840 shr $3,%ecx
1841+#ifdef __PIC__
1842+ movzbl blk_start@GOTOFF(%esi,%ebx),%edx
1843+#else
1844 movzbl blk_start(%ebx),%edx
1845+#endif
1846 addl %ecx,%edx
1847+#ifdef __PIC__
1848+ pushl %esi
1849+ movl ARGn(1),%esi
1850+ movl dv_videosegment_t_bs(%esi),%esi
1851+ movl bitstream_t_buf(%esi),%esi
1852+#endif
1853 movzbl (%esi,%edx,1),%eax /* hi byte */
1854 movzbl 1(%esi,%edx,1),%ecx /* lo byte */
1855+#ifdef __PIC__
1856+ popl %esi
1857+#endif
1858 shll $8,%eax
1859 orl %ecx,%eax
1860
1861@@ -477,7 +587,11 @@ blkloop:
1862
1863 /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
1864 shll $6,%eax
1865+#ifdef __PIC__
1866+ addl dv_reorder@GOTOFF+1(%esi),%eax
1867+#else
1868 addl $(dv_reorder+1),%eax
1869+#endif
1870 movl %eax,dv_block_t_reorder(%ebp)
1871
1872 /* bl->reorder_sentinel = bl->reorder + 63; */
1873@@ -485,13 +599,22 @@ blkloop:
1874 movl %eax,dv_block_t_reorder_sentinel(%ebp)
1875
1876 /* bl->offset= mb_start + dv_parse_bit_start[b]; */
1877+#ifdef __PIC__
1878+ movl mb_start@GOTOFF(%esi),%ecx
1879+ movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
1880+#else
1881 movl mb_start,%ecx
1882 movl dv_parse_bit_start(,%ebx,4),%eax
1883+#endif
1884 addl %ecx,%eax
1885 movl %eax,dv_block_t_offset(%ebp)
1886
1887 /* bl->end= mb_start + dv_parse_bit_end[b]; */
1888+#ifdef __PIC__
1889+ movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
1890+#else
1891 movl dv_parse_bit_end(,%ebx,4),%eax
1892+#endif
1893 addl %ecx,%eax
1894 movl %eax,dv_block_t_end(%ebp)
1895
1896@@ -503,7 +626,11 @@ blkloop:
1897 /* no AC pass. Just zero out the remaining coeffs */
1898 movq dv_block_t_coeffs(%ebp),%mm1
1899 pxor %mm0,%mm0
1900+#ifdef __PIC__
1901+ pand const_f_0_0_0@GOTOFF(%esi),%mm1
1902+#else
1903 pand const_f_0_0_0,%mm1
1904+#endif
1905 movq %mm1,dv_block_t_coeffs(%ebp)
1906 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1907 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1908@@ -528,18 +655,27 @@ do_ac_pass:
1909 pushl %ebp
1910 pushl %edi
1911 pushl %eax
1912- call dv_parse_ac_coeffs_pass0
1913+ call asm_dv_parse_ac_coeffs_pass0
1914 addl $12,%esp
1915 done_ac:
1916
1917+#ifdef __PIC__
1918+ movl n_blocks@GOTOFF(%esi),%eax
1919+#else
1920 movl n_blocks,%eax
1921+#endif
1922 addl $dv_block_t_size,%ebp
1923 incl %ebx
1924 cmpl %eax,%ebx
1925 jnz blkloop
1926
1927+#ifdef __PIC__
1928+ movl m@GOTOFF(%esi),%eax
1929+ movl mb_start@GOTOFF(%esi),%ecx
1930+#else
1931 movl m,%eax
1932 movl mb_start,%ecx
1933+#endif
1934 addl $(8 * 80),%ecx
1935 addl $dv_macroblock_t_size,%edi
1936 incl %eax
1937@@ -557,7 +693,7 @@ done_ac:
1938
1939 andl $DV_QUALITY_AC_MASK,%eax
1940 cmpl $DV_QUALITY_AC_2,%eax
1941- jz dv_parse_ac_coeffs
1942+ jz asm_dv_parse_ac_coeffs
1943 movl $0,%eax
1944 ret
1945
1946diff -Nurp libdv-0.104-old/libdv/vlc_x86_64.S libdv-0.104/libdv/vlc_x86_64.S
1947--- libdv-0.104-old/libdv/vlc_x86_64.S 2006-01-01 22:44:23.000000000 +0100
1948+++ libdv-0.104/libdv/vlc_x86_64.S 2006-01-01 22:44:43.000000000 +0100
1949@@ -169,7 +169,8 @@ void dv_parse_ac_coeffs_pass0(bitstream_
1950 .text
1951 .align 4
1952 .globl dv_parse_ac_coeffs_pass0
1953-
1954+.type dv_parse_ac_coeffs_pass0,@function
1955+
1956 dv_parse_ac_coeffs_pass0:
1957
1958 /* Args are at rdi=bs, rsi=mb, rdx=bl */
1959@@ -422,6 +423,7 @@ show16: /* not u
1960 gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
1961 */
1962 .globl dv_parse_video_segment
1963+ .type dv_parse_video_segment,@function
1964 dv_parse_video_segment:
1965
1966 /* Args are at rdi=seg, rsi=quality */
1967diff -Nurp libdv-0.104-old/libdv-0.104/libdv/asm_common.S libdv-0.104/libdv-0.104/libdv/asm_common.S
1968--- libdv-0.104-old/libdv-0.104/libdv/asm_common.S 1970-01-01 01:00:00.000000000 +0100
1969+++ libdv-0.104/libdv-0.104/libdv/asm_common.S 2006-01-01 22:44:43.000000000 +0100
1970@@ -0,0 +1,37 @@
1971+/* public domain, do what you want */
1972+
1973+#ifdef __PIC__
1974+# define MUNG(sym) sym##@GOTOFF(%ebp)
1975+# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args)
1976+#else
1977+# define MUNG(sym) sym
1978+# define MUNG_ARR(sym, args...) sym(,##args)
1979+#endif
1980+
1981+#ifdef __PIC__
1982+# undef __i686 /* gcc define gets in our way */
1983+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
1984+.globl __i686.get_pc_thunk.bp
1985+ .hidden __i686.get_pc_thunk.bp
1986+ .type __i686.get_pc_thunk.bp,@function
1987+__i686.get_pc_thunk.bp:
1988+ movl (%esp), %ebp
1989+ ret
1990+# define LOAD_PIC_REG_BP() \
1991+ call __i686.get_pc_thunk.bp ; \
1992+ addl $_GLOBAL_OFFSET_TABLE_, %ebp
1993+
1994+ .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits
1995+.globl __i686.get_pc_thunk.si
1996+ .hidden __i686.get_pc_thunk.si
1997+ .type __i686.get_pc_thunk.si,@function
1998+__i686.get_pc_thunk.si:
1999+ movl (%esp), %esi
2000+ ret
2001+# define LOAD_PIC_REG_SI() \
2002+ call __i686.get_pc_thunk.si ; \
2003+ addl $_GLOBAL_OFFSET_TABLE_, %esi
2004+#else
2005+# define LOAD_PIC_REG_BP()
2006+# define LOAD_PIC_REG_SI()
2007+#endif