initial import
[patches.git] / libdv-0.104-pic-fix-pax.patch
1 diff -Nurp libdv-0.104-old/libdv/asm_common.S libdv-0.104/libdv/asm_common.S
2 --- libdv-0.104-old/libdv/asm_common.S 1970-01-01 01:00:00.000000000 +0100
3 +++ libdv-0.104/libdv/asm_common.S 2006-01-01 22:44:43.000000000 +0100
4 @@ -0,0 +1,37 @@
5 +/* public domain, do what you want */
6 +
7 +#ifdef __PIC__
8 +# define MUNG(sym) sym##@GOTOFF(%ebp)
9 +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args)
10 +#else
11 +# define MUNG(sym) sym
12 +# define MUNG_ARR(sym, args...) sym(,##args)
13 +#endif
14 +
15 +#ifdef __PIC__
16 +# undef __i686 /* gcc define gets in our way */
17 + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
18 +.globl __i686.get_pc_thunk.bp
19 + .hidden __i686.get_pc_thunk.bp
20 + .type __i686.get_pc_thunk.bp,@function
21 +__i686.get_pc_thunk.bp:
22 + movl (%esp), %ebp
23 + ret
24 +# define LOAD_PIC_REG_BP() \
25 + call __i686.get_pc_thunk.bp ; \
26 + addl $_GLOBAL_OFFSET_TABLE_, %ebp
27 +
28 + .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits
29 +.globl __i686.get_pc_thunk.si
30 + .hidden __i686.get_pc_thunk.si
31 + .type __i686.get_pc_thunk.si,@function
32 +__i686.get_pc_thunk.si:
33 + movl (%esp), %esi
34 + ret
35 +# define LOAD_PIC_REG_SI() \
36 + call __i686.get_pc_thunk.si ; \
37 + addl $_GLOBAL_OFFSET_TABLE_, %esi
38 +#else
39 +# define LOAD_PIC_REG_BP()
40 +# define LOAD_PIC_REG_SI()
41 +#endif
42 diff -Nurp libdv-0.104-old/libdv/dct_block_mmx.S libdv-0.104/libdv/dct_block_mmx.S
43 --- libdv-0.104-old/libdv/dct_block_mmx.S 2006-01-01 22:44:22.000000000 +0100
44 +++ libdv-0.104/libdv/dct_block_mmx.S 2006-01-01 22:44:43.000000000 +0100
45 @@ -53,17 +53,22 @@ scratch2: .quad 0
46 scratch3: .quad 0
47 scratch4: .quad 0
48
49 +#include "asm_common.S"
50 +
51 .text
52
53 .align 8
54 .global _dv_dct_88_block_mmx
55 +.hidden _dv_dct_88_block_mmx
56 +.type _dv_dct_88_block_mmx,@function
57 _dv_dct_88_block_mmx:
58
59 pushl %ebp
60 - movl %esp, %ebp
61 pushl %esi
62
63 - movl 8(%ebp), %esi # source
64 + LOAD_PIC_REG_BP()
65 +
66 + movl 12(%esp), %esi # source
67
68 # column 0
69 movq 16*0(%esi), %mm0 # v0
70 @@ -86,22 +91,22 @@ _dv_dct_88_block_mmx:
71
72 movq 16*3(%esi), %mm5 # v3
73 movq 16*4(%esi), %mm7 # v4
74 - movq %mm7, scratch1 # scratch1: v4 ;
75 + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
76 movq %mm5, %mm7 # duplicate v3
77 - paddw scratch1, %mm5 # v03: v3+v4
78 - psubw scratch1, %mm7 # v04: v3-v4
79 - movq %mm5, scratch2 # scratch2: v03
80 + paddw MUNG(scratch1), %mm5 # v03: v3+v4
81 + psubw MUNG(scratch1), %mm7 # v04: v3-v4
82 + movq %mm5, MUNG(scratch2) # scratch2: v03
83 movq %mm0, %mm5 # mm5: v00
84
85 - paddw scratch2, %mm0 # v10: v00+v03
86 - psubw scratch2, %mm5 # v13: v00-v03
87 - movq %mm3, scratch3 # scratch3: v02
88 + paddw MUNG(scratch2), %mm0 # v10: v00+v03
89 + psubw MUNG(scratch2), %mm5 # v13: v00-v03
90 + movq %mm3, MUNG(scratch3) # scratch3: v02
91 movq %mm1, %mm3 # duplicate v01
92
93 - paddw scratch3, %mm1 # v11: v01+v02
94 - psubw scratch3, %mm3 # v12: v01-v02
95 + paddw MUNG(scratch3), %mm1 # v11: v01+v02
96 + psubw MUNG(scratch3), %mm3 # v12: v01-v02
97
98 - movq %mm6, scratch4 # scratch4: v05
99 + movq %mm6, MUNG(scratch4) # scratch4: v05
100 movq %mm0, %mm6 # duplicate v10
101
102 paddw %mm1, %mm0 # v10+v11
103 @@ -111,10 +116,10 @@ _dv_dct_88_block_mmx:
104 movq %mm6, 16*4(%esi) # out4: v10-v11
105
106 movq %mm4, %mm0 # mm0: v06
107 - paddw scratch4, %mm4 # v15: v05+v06
108 + paddw MUNG(scratch4), %mm4 # v15: v05+v06
109 paddw %mm2, %mm0 # v16: v07+v06
110
111 - pmulhw WA3, %mm4 # v35~: WA3*v15
112 + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
113 psllw $1, %mm4 # v35: compensate the coeefient scale
114
115 movq %mm4, %mm6 # duplicate v35
116 @@ -123,7 +128,7 @@ _dv_dct_88_block_mmx:
117
118 paddw %mm5, %mm3 # v22: v12+v13
119
120 - pmulhw WA1, %mm3 # v32~: WA1*v22
121 + pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22
122 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
123 movq %mm5, %mm6 # duplicate v13
124
125 @@ -134,13 +139,13 @@ _dv_dct_88_block_mmx:
126 movq %mm6, 16*6(%esi) # out6: v13-v32
127
128
129 - paddw scratch4, %mm7 # v14n: v04+v05
130 + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
131 movq %mm0, %mm5 # duplicate v16
132
133 psubw %mm7, %mm0 # va1: v16-v14n
134 - pmulhw WA5, %mm0 # va0~: va1*WA5
135 - pmulhw WA4, %mm5 # v36~~: v16*WA4
136 - pmulhw WA2, %mm7 # v34~~: v14n*WA2
137 + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
138 + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
139 + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
140 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale
141 psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale
142
143 @@ -188,22 +193,22 @@ _dv_dct_88_block_mmx:
144
145 movq 16*3(%esi), %mm5 # v3
146 movq 16*4(%esi), %mm7 # v4
147 - movq %mm7, scratch1 # scratch1: v4 ;
148 + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
149 movq %mm5, %mm7 # duplicate v3
150 - paddw scratch1, %mm5 # v03: v3+v4
151 - psubw scratch1, %mm7 # v04: v3-v4
152 - movq %mm5, scratch2 # scratch2: v03
153 + paddw MUNG(scratch1), %mm5 # v03: v3+v4
154 + psubw MUNG(scratch1), %mm7 # v04: v3-v4
155 + movq %mm5, MUNG(scratch2) # scratch2: v03
156 movq %mm0, %mm5 # mm5: v00
157
158 - paddw scratch2, %mm0 # v10: v00+v03
159 - psubw scratch2, %mm5 # v13: v00-v03
160 - movq %mm3, scratch3 # scratc3: v02
161 + paddw MUNG(scratch2), %mm0 # v10: v00+v03
162 + psubw MUNG(scratch2), %mm5 # v13: v00-v03
163 + movq %mm3, MUNG(scratch3) # scratc3: v02
164 movq %mm1, %mm3 # duplicate v01
165
166 - paddw scratch3, %mm1 # v11: v01+v02
167 - psubw scratch3, %mm3 # v12: v01-v02
168 + paddw MUNG(scratch3), %mm1 # v11: v01+v02
169 + psubw MUNG(scratch3), %mm3 # v12: v01-v02
170
171 - movq %mm6, scratch4 # scratc4: v05
172 + movq %mm6, MUNG(scratch4) # scratc4: v05
173 movq %mm0, %mm6 # duplicate v10
174
175 paddw %mm1, %mm0 # v10+v11
176 @@ -213,10 +218,10 @@ _dv_dct_88_block_mmx:
177 movq %mm6, 16*4(%esi) # out4: v10-v11
178
179 movq %mm4, %mm0 # mm0: v06
180 - paddw scratch4, %mm4 # v15: v05+v06
181 + paddw MUNG(scratch4), %mm4 # v15: v05+v06
182 paddw %mm2, %mm0 # v16: v07+v06
183
184 - pmulhw WA3, %mm4 # v35~: WA3*v15
185 + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
186 psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale
187
188 movq %mm4, %mm6 # duplicate v35
189 @@ -225,7 +230,7 @@ _dv_dct_88_block_mmx:
190
191 paddw %mm5, %mm3 # v22: v12+v13
192
193 - pmulhw WA1, %mm3 # v32~: WA3*v15
194 + pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15
195 psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
196 movq %mm5, %mm6 # duplicate v13
197
198 @@ -235,13 +240,13 @@ _dv_dct_88_block_mmx:
199 movq %mm5, 16*2(%esi) # out2: v13+v32
200 movq %mm6, 16*6(%esi) # out6: v13-v32
201
202 - paddw scratch4, %mm7 # v14n: v04+v05
203 + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
204 movq %mm0, %mm5 # duplicate v16
205
206 psubw %mm7, %mm0 # va1: v16-v14n
207 - pmulhw WA2, %mm7 # v34~~: v14n*WA2
208 - pmulhw WA5, %mm0 # va0~: va1*WA5
209 - pmulhw WA4, %mm5 # v36~~: v16*WA4
210 + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
211 + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
212 + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
213 psllw $16-NSHIFT, %mm7
214 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient
215 # scale note that WA4 is shifted 1 bit less than the others
216 @@ -272,6 +277,8 @@ _dv_dct_88_block_mmx:
217
218 .align 8
219 .global _dv_dct_block_mmx_postscale_88
220 +.hidden _dv_dct_block_mmx_postscale_88
221 +.type _dv_dct_block_mmx_postscale_88,@function
222 _dv_dct_block_mmx_postscale_88:
223
224 pushl %ebp
225 @@ -748,14 +755,17 @@ _dv_dct_block_mmx_postscale_88:
226
227 .align 8
228 .global _dv_dct_248_block_mmx
229 +.hidden _dv_dct_248_block_mmx
230 +.type _dv_dct_248_block_mmx,@function
231 _dv_dct_248_block_mmx:
232
233 pushl %ebp
234 - movl %esp, %ebp
235 pushl %esi
236 pushl %edi
237
238 - movl 8(%ebp), %esi # source
239 + LOAD_PIC_REG_BP()
240 +
241 + movl 16(%esp), %esi # source
242
243 # column 0
244
245 @@ -779,7 +789,7 @@ _dv_dct_248_block_mmx:
246 paddw %mm1, %mm0 # v20: v10+v11
247 psubw %mm1, %mm3 # v21: v10-v11
248
249 - pmulhw WA1, %mm5 # v32~: WA1*v22
250 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
251 movq %mm4, %mm2
252 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
253
254 @@ -818,7 +828,7 @@ _dv_dct_248_block_mmx:
255 paddw %mm1, %mm0 # v20: v10+v11
256 psubw %mm1, %mm3 # v21: v10-v11
257
258 - pmulhw WA1, %mm5 # v32~: WA1*v22
259 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
260 movq %mm4, %mm2
261 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
262
263 @@ -855,7 +865,7 @@ _dv_dct_248_block_mmx:
264 paddw %mm1, %mm0 # v20: v10+v11
265 psubw %mm1, %mm3 # v21: v10-v11
266
267 - pmulhw WA1, %mm5 # v32~: WA1*v22
268 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
269 movq %mm4, %mm2
270 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
271
272 @@ -892,7 +902,7 @@ _dv_dct_248_block_mmx:
273 paddw %mm1, %mm0 # v20: v10+v11
274 psubw %mm1, %mm3 # v21: v10-v11
275
276 - pmulhw WA1, %mm5 # v32~: WA1*v22
277 + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
278 movq %mm4, %mm2
279 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
280
281 @@ -912,6 +922,8 @@ _dv_dct_248_block_mmx:
282
283 .align 8
284 .global _dv_dct_248_block_mmx_post_sum
285 +.hidden _dv_dct_248_block_mmx_post_sum
286 +.type _dv_dct_248_block_mmx_post_sum,@function
287 _dv_dct_248_block_mmx_post_sum:
288
289 pushl %ebp
290 @@ -992,6 +1004,8 @@ _dv_dct_248_block_mmx_post_sum:
291
292 .align 8
293 .global _dv_dct_block_mmx_postscale_248
294 +.hidden _dv_dct_block_mmx_postscale_248
295 +.type _dv_dct_block_mmx_postscale_248,@function
296 _dv_dct_block_mmx_postscale_248:
297
298 pushl %ebp
299 diff -Nurp libdv-0.104-old/libdv/dct_block_mmx_x86_64.S libdv-0.104/libdv/dct_block_mmx_x86_64.S
300 --- libdv-0.104-old/libdv/dct_block_mmx_x86_64.S 2006-01-01 22:44:22.000000000 +0100
301 +++ libdv-0.104/libdv/dct_block_mmx_x86_64.S 2006-01-01 22:44:43.000000000 +0100
302 @@ -57,6 +57,8 @@ scratch4: .quad 0
303
304 .align 8
305 .global _dv_dct_88_block_mmx_x86_64
306 +.hidden _dv_dct_88_block_mmx_x86_64
307 +.type _dv_dct_88_block_mmx_x86_64,@function
308 _dv_dct_88_block_mmx_x86_64:
309
310 /* void _dv_dct_88_block_mmx_x86_64(int16_t* block); */
311 @@ -269,6 +271,8 @@ _dv_dct_88_block_mmx_x86_64:
312
313 .align 8
314 .global _dv_dct_block_mmx_x86_64_postscale_88
315 +.hidden _dv_dct_block_mmx_x86_64_postscale_88
316 +.type _dv_dct_block_mmx_x86_64_postscale_88,@function
317 _dv_dct_block_mmx_x86_64_postscale_88:
318
319 /* void _dv_dct_block_mmx_x86_64_postscale_88(int16_t* block, int16_t* postscale_matrix); */
320 diff -Nurp libdv-0.104-old/libdv/dv.c libdv-0.104/libdv/dv.c
321 --- libdv-0.104-old/libdv/dv.c 2004-10-20 05:49:24.000000000 +0200
322 +++ libdv-0.104/libdv/dv.c 2006-01-01 22:44:43.000000000 +0100
323 @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
324 } /* dv_reconfigure */
325
326
327 +extern uint8_t dv_quant_offset[4];
328 +extern uint8_t dv_quant_shifts[22][4];
329 +
330 static inline void
331 dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
332 int i;
333 @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
334 dv_idct_248 (co248, mb->b[i].coeffs);
335 } else {
336 #if ARCH_X86
337 - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
338 + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
339 _dv_idct_88(mb->b[i].coeffs);
340 #elif ARCH_X86_64
341 _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
342 @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
343 dv_idct_248 (co248, mb->b[b].coeffs);
344 } else {
345 #if ARCH_X86
346 - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
347 + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
348 _dv_weight_88_inverse(bl->coeffs);
349 _dv_idct_88(bl->coeffs);
350 #elif ARCH_X86_64
351 diff -Nurp libdv-0.104-old/libdv/encode.c libdv-0.104/libdv/encode.c
352 --- libdv-0.104-old/libdv/encode.c 2004-11-17 04:36:30.000000000 +0100
353 +++ libdv-0.104/libdv/encode.c 2006-01-01 22:44:43.000000000 +0100
354 @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
355 }
356
357 extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
358 - dv_vlc_entry_t ** out);
359 + dv_vlc_entry_t ** out,
360 + dv_vlc_entry_t * lookup);
361
362 extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
363 dv_vlc_entry_t ** out);
364 @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
365 #elif ARCH_X86
366 int num_bits;
367
368 - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
369 + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
370 emms();
371 #else
372 int num_bits;
373 @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
374 return num_bits;
375 }
376
377 -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
378 +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
379 extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
380
381 extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
382 @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
383 #elif ARCH_X86_64
384 return _dv_vlc_num_bits_block_x86_64(coeffs);
385 #else
386 - return _dv_vlc_num_bits_block_x86(coeffs);
387 + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
388 #endif
389 }
390
391 diff -Nurp libdv-0.104-old/libdv/encode_x86.S libdv-0.104/libdv/encode_x86.S
392 --- libdv-0.104-old/libdv/encode_x86.S 2006-01-01 22:44:22.000000000 +0100
393 +++ libdv-0.104/libdv/encode_x86.S 2006-01-01 22:44:43.000000000 +0100
394 @@ -23,13 +23,11 @@
395 * The libdv homepage is http://libdv.sourceforge.net/.
396 */
397
398 -.data
399 -ALLONE: .word 1,1,1,1
400 -VLCADDMASK: .byte 255,0,0,0,255,0,0,0
401 -
402 .text
403
404 .global _dv_vlc_encode_block_mmx
405 +.hidden _dv_vlc_encode_block_mmx
406 +.type _dv_vlc_encode_block_mmx,@function
407 _dv_vlc_encode_block_mmx:
408 pushl %ebx
409 pushl %esi
410 @@ -45,11 +43,14 @@ _dv_vlc_encode_block_mmx:
411
412 movl $63, %ecx
413
414 - movl vlc_encode_lookup, %esi
415 + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup
416
417 pxor %mm0, %mm0
418 pxor %mm2, %mm2
419 - movq VLCADDMASK, %mm1
420 + pushl $0x000000FF # these four lines
421 + pushl $0x000000FF # load VLCADDMASK
422 + movq (%esp), %mm1 # into %mm1 off the stack
423 + addl $8, %esp # --> no TEXTRELs
424 xorl %ebp, %ebp
425 subl $8, %edx
426 vlc_encode_block_mmx_loop:
427 @@ -106,6 +107,8 @@ vlc_encode_block_out:
428 ret
429
430 .global _dv_vlc_num_bits_block_x86
431 +.hidden _dv_vlc_num_bits_block_x86
432 +.type _dv_vlc_num_bits_block_x86,@function
433 _dv_vlc_num_bits_block_x86:
434 pushl %ebx
435 pushl %esi
436 @@ -121,7 +124,7 @@ _dv_vlc_num_bits_block_x86:
437 addl $2, %edi
438
439 movl $63, %ecx
440 - movl vlc_num_bits_lookup, %esi
441 + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup
442
443 vlc_num_bits_block_x86_loop:
444 movw (%edi), %ax
445 @@ -164,6 +167,8 @@ vlc_num_bits_block_out:
446 ret
447
448 .global _dv_vlc_encode_block_pass_1_x86
449 +.hidden _dv_vlc_encode_block_pass_1_x86
450 +.type _dv_vlc_encode_block_pass_1_x86,@function
451 _dv_vlc_encode_block_pass_1_x86:
452 pushl %ebx
453 pushl %esi
454 @@ -240,6 +245,8 @@ vlc_encode_block_pass1_x86_out:
455 ret
456
457 .global _dv_classify_mmx
458 +.hidden _dv_classify_mmx
459 +.type _dv_classify_mmx,@function
460 _dv_classify_mmx:
461
462 pushl %ebp
463 @@ -345,6 +352,8 @@ _dv_classify_mmx:
464 don't know why... */
465
466 .global _dv_reorder_block_mmx
467 +.hidden _dv_reorder_block_mmx
468 +.type _dv_reorder_block_mmx,@function
469 _dv_reorder_block_mmx:
470
471 pushl %ebp
472 @@ -460,6 +469,8 @@ reorder_loop:
473 ret
474
475 .global _dv_need_dct_248_mmx_rows
476 +.hidden _dv_need_dct_248_mmx_rows
477 +.type _dv_need_dct_248_mmx_rows,@function
478 _dv_need_dct_248_mmx_rows:
479
480 pushl %ebp
481 @@ -579,8 +590,11 @@ _dv_need_dct_248_mmx_rows:
482 paddw %mm5, %mm1
483
484 paddw %mm1, %mm0
485 -
486 - pmaddwd ALLONE, %mm0
487 +
488 + pushl $0x00010001 # these four lines
489 + pushl $0x00010001 # load ALLONE
490 + pmaddwd (%esp), %mm0 # into %mm0 off the stack
491 + addl $8, %esp # --> no TEXTRELs
492 movq %mm0, %mm1
493 psrlq $32, %mm1
494 paddd %mm1, %mm0
495 diff -Nurp libdv-0.104-old/libdv/encode_x86_64.S libdv-0.104/libdv/encode_x86_64.S
496 --- libdv-0.104-old/libdv/encode_x86_64.S 2006-01-01 22:44:22.000000000 +0100
497 +++ libdv-0.104/libdv/encode_x86_64.S 2006-01-01 22:44:43.000000000 +0100
498 @@ -30,6 +30,8 @@ VLCADDMASK: .byte 255,0,0,0,255,0,0,0
499 .text
500
501 .global _dv_vlc_encode_block_mmx_x86_64
502 +.hidden _dv_vlc_encode_block_mmx_x86_64
503 +.type _dv_vlc_encode_block_mmx_x86_64,@function
504 _dv_vlc_encode_block_mmx_x86_64:
505
506 /* extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
507 @@ -113,6 +115,8 @@ vlc_encode_block_out:
508 ret
509
510 .global _dv_vlc_num_bits_block_x86_64
511 +.hidden _dv_vlc_num_bits_block_x86_64
512 +.type _dv_vlc_num_bits_block_x86_64,@function
513 _dv_vlc_num_bits_block_x86_64:
514
515 /* extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); */
516 @@ -173,6 +177,8 @@ vlc_num_bits_block_out:
517 ret
518
519 .global _dv_vlc_encode_block_pass_1_x86_64
520 +.hidden _dv_vlc_encode_block_pass_1_x86_64
521 +.type _dv_vlc_encode_block_pass_1_x86_64,@function
522 _dv_vlc_encode_block_pass_1_x86_64:
523
524 /*
525 @@ -251,6 +257,8 @@ vlc_encode_block_pass1_x86_out:
526 ret
527
528 .global _dv_classify_mmx_x86_64
529 +.hidden _dv_classify_mmx_x86_64
530 +.type _dv_classify_mmx_x86_64,@function
531 _dv_classify_mmx_x86_64:
532
533 /* extern int _dv_classify_mmx_x86_64(dv_coeff_t * a, rdi
534 @@ -355,6 +363,8 @@ _dv_classify_mmx_x86_64:
535 don't know why... */
536
537 .global _dv_reorder_block_mmx_x86_64
538 +.hidden _dv_reorder_block_mmx_x86_64
539 +.type _dv_reorder_block_mmx_x86_64,@function
540 _dv_reorder_block_mmx_x86_64:
541
542 /*extern int _dv_reorder_block_mmx_x86_64(dv_coeff_t * a, rdi
543 @@ -469,6 +479,8 @@ reorder_loop:
544 ret
545
546 .global _dv_need_dct_248_mmx_x86_64_rows
547 +.hidden _dv_need_dct_248_mmx_x86_64_rows
548 +.type _dv_need_dct_248_mmx_x86_64_rows,@function
549 _dv_need_dct_248_mmx_x86_64_rows:
550
551 /* extern int _dv_need_dct_248_mmx_x86_64_rows(dv_coeff_t * bl); rdi */
552 diff -Nurp libdv-0.104-old/libdv/idct_block_mmx.S libdv-0.104/libdv/idct_block_mmx.S
553 --- libdv-0.104-old/libdv/idct_block_mmx.S 2006-01-01 22:44:22.000000000 +0100
554 +++ libdv-0.104/libdv/idct_block_mmx.S 2006-01-01 22:44:43.000000000 +0100
555 @@ -8,16 +8,22 @@
556
557
558
559 +#include "asm_common.S"
560 +
561 .text
562 +
563 .align 4
564 .globl _dv_idct_block_mmx
565 +.hidden _dv_idct_block_mmx
566 .type _dv_idct_block_mmx,@function
567 _dv_idct_block_mmx:
568 pushl %ebp
569 - movl %esp,%ebp
570 pushl %esi
571 - leal preSC, %ecx
572 - movl 8(%ebp),%esi /* source matrix */
573 +
574 + LOAD_PIC_REG_BP()
575 +
576 + leal MUNG(preSC), %ecx
577 + movl 12(%esp),%esi /* source matrix */
578
579 /*
580 * column 0: even part
581 @@ -35,7 +41,7 @@ _dv_idct_block_mmx:
582 movq %mm1, %mm2 /* added 11/1/96 */
583 pmulhw 8*8(%esi),%mm5 /* V8 */
584 psubsw %mm0, %mm1 /* V16 */
585 - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
586 + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */
587 paddsw %mm0, %mm2 /* V17 */
588 movq %mm2, %mm0 /* duplicate V17 */
589 psraw $1, %mm2 /* t75=t82 */
590 @@ -76,7 +82,7 @@ _dv_idct_block_mmx:
591 paddsw %mm0, %mm3 /* V29 ; free mm0 */
592 movq %mm7, %mm1 /* duplicate V26 */
593 psraw $1, %mm3 /* t91=t94 */
594 - pmulhw x539f539f539f539f,%mm7 /* V33 */
595 + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */
596 psraw $1, %mm1 /* t96 */
597 movq %mm5, %mm0 /* duplicate V2 */
598 psraw $2, %mm4 /* t85=t87 */
599 @@ -84,15 +90,15 @@ _dv_idct_block_mmx:
600 psubsw %mm4, %mm0 /* V28 ; free mm4 */
601 movq %mm0, %mm2 /* duplicate V28 */
602 psraw $1, %mm5 /* t90=t93 */
603 - pmulhw x4546454645464546,%mm0 /* V35 */
604 + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */
605 psraw $1, %mm2 /* t97 */
606 movq %mm5, %mm4 /* duplicate t90=t93 */
607 psubsw %mm2, %mm1 /* V32 ; free mm2 */
608 - pmulhw x61f861f861f861f8,%mm1 /* V36 */
609 + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */
610 psllw $1, %mm7 /* t107 */
611 paddsw %mm3, %mm5 /* V31 */
612 psubsw %mm3, %mm4 /* V30 ; free mm3 */
613 - pmulhw x5a825a825a825a82,%mm4 /* V34 */
614 + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */
615 nop
616 psubsw %mm1, %mm0 /* V38 */
617 psubsw %mm7, %mm1 /* V37 ; free mm7 */
618 @@ -159,7 +165,7 @@ _dv_idct_block_mmx:
619 psubsw %mm7, %mm1 /* V50 */
620 pmulhw 8*9(%esi), %mm5 /* V9 */
621 paddsw %mm7, %mm2 /* V51 */
622 - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
623 + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */
624 movq %mm2, %mm6 /* duplicate V51 */
625 psraw $1, %mm2 /* t138=t144 */
626 movq %mm3, %mm4 /* duplicate V1 */
627 @@ -200,11 +206,11 @@ _dv_idct_block_mmx:
628 * even more by doing the correction step in a later stage when the number
629 * is actually multiplied by 16
630 */
631 - paddw x0005000200010001, %mm4
632 + paddw MUNG(x0005000200010001), %mm4
633 psubsw %mm6, %mm3 /* V60 ; free mm6 */
634 psraw $1, %mm0 /* t154=t156 */
635 movq %mm3, %mm1 /* duplicate V60 */
636 - pmulhw x539f539f539f539f, %mm1 /* V67 */
637 + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */
638 movq %mm5, %mm6 /* duplicate V3 */
639 psraw $2, %mm4 /* t148=t150 */
640 paddsw %mm4, %mm5 /* V61 */
641 @@ -213,13 +219,13 @@ _dv_idct_block_mmx:
642 psllw $1, %mm1 /* t169 */
643 paddsw %mm0, %mm5 /* V65 -> result */
644 psubsw %mm0, %mm4 /* V64 ; free mm0 */
645 - pmulhw x5a825a825a825a82, %mm4 /* V68 */
646 + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */
647 psraw $1, %mm3 /* t158 */
648 psubsw %mm6, %mm3 /* V66 */
649 movq %mm5, %mm2 /* duplicate V65 */
650 - pmulhw x61f861f861f861f8, %mm3 /* V70 */
651 + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */
652 psllw $1, %mm6 /* t165 */
653 - pmulhw x4546454645464546, %mm6 /* V69 */
654 + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */
655 psraw $1, %mm2 /* t172 */
656 /* moved from next block */
657 movq 8*5(%esi), %mm0 /* V56 */
658 @@ -344,7 +350,7 @@ _dv_idct_block_mmx:
659 * movq 8*13(%esi), %mm4 tmt13
660 */
661 psubsw %mm4, %mm3 /* V134 */
662 - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
663 + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */
664 movq 8*9(%esi), %mm6 /* tmt9 */
665 paddsw %mm4, %mm5 /* V135 ; mm4 free */
666 movq %mm0, %mm4 /* duplicate tmt1 */
667 @@ -373,17 +379,17 @@ _dv_idct_block_mmx:
668 psubsw %mm7, %mm0 /* V144 */
669 movq %mm0, %mm3 /* duplicate V144 */
670 paddsw %mm7, %mm2 /* V147 ; free mm7 */
671 - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
672 + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */
673 movq %mm1, %mm7 /* duplicate tmt3 */
674 paddsw %mm5, %mm7 /* V145 */
675 psubsw %mm5, %mm1 /* V146 ; free mm5 */
676 psubsw %mm1, %mm3 /* V150 */
677 movq %mm7, %mm5 /* duplicate V145 */
678 - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
679 + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */
680 psubsw %mm2, %mm5 /* V148 */
681 - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
682 + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */
683 psllw $2, %mm0 /* t311 */
684 - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
685 + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */
686 paddsw %mm2, %mm7 /* V149 ; free mm2 */
687 psllw $1, %mm1 /* t313 */
688 nop /* without the nop - freeze here for one clock */
689 @@ -409,7 +415,7 @@ _dv_idct_block_mmx:
690 paddsw %mm3, %mm6 /* V164 ; free mm3 */
691 movq %mm4, %mm3 /* duplicate V142 */
692 psubsw %mm5, %mm4 /* V165 ; free mm5 */
693 - movq %mm2, scratch7 /* out7 */
694 + movq %mm2, MUNG(scratch7) /* out7 */
695 psraw $4, %mm6
696 psraw $4, %mm4
697 paddsw %mm5, %mm3 /* V162 */
698 @@ -420,11 +426,11 @@ _dv_idct_block_mmx:
699 */
700 movq %mm6, 8*9(%esi) /* out9 */
701 paddsw %mm1, %mm0 /* V161 */
702 - movq %mm3, scratch5 /* out5 */
703 + movq %mm3, MUNG(scratch5) /* out5 */
704 psubsw %mm1, %mm5 /* V166 ; free mm1 */
705 movq %mm4, 8*11(%esi) /* out11 */
706 psraw $4, %mm5
707 - movq %mm0, scratch3 /* out3 */
708 + movq %mm0, MUNG(scratch3) /* out3 */
709 movq %mm2, %mm4 /* duplicate V140 */
710 movq %mm5, 8*13(%esi) /* out13 */
711 paddsw %mm7, %mm2 /* V160 */
712 @@ -434,7 +440,7 @@ _dv_idct_block_mmx:
713 /* moved from the next block */
714 movq 8*3(%esi), %mm7
715 psraw $4, %mm4
716 - movq %mm2, scratch1 /* out1 */
717 + movq %mm2, MUNG(scratch1) /* out1 */
718 /* moved from the next block */
719 movq %mm0, %mm1
720 movq %mm4, 8*15(%esi) /* out15 */
721 @@ -491,15 +497,15 @@ _dv_idct_block_mmx:
722 paddsw %mm4, %mm3 /* V113 ; free mm4 */
723 movq %mm0, %mm4 /* duplicate V110 */
724 paddsw %mm1, %mm2 /* V111 */
725 - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
726 + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */
727 psubsw %mm1, %mm5 /* V112 ; free mm1 */
728 psubsw %mm5, %mm4 /* V116 */
729 movq %mm2, %mm1 /* duplicate V111 */
730 - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
731 + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */
732 psubsw %mm3, %mm2 /* V114 */
733 - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
734 + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */
735 paddsw %mm3, %mm1 /* V115 ; free mm3 */
736 - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
737 + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */
738 psllw $2, %mm0 /* t266 */
739 movq %mm1, (%esi) /* save V115 */
740 psllw $1, %mm5 /* t268 */
741 @@ -517,7 +523,7 @@ _dv_idct_block_mmx:
742 movq %mm6, %mm3 /* duplicate tmt4 */
743 psubsw %mm0, %mm6 /* V100 */
744 paddsw %mm0, %mm3 /* V101 ; free mm0 */
745 - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
746 + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */
747 movq %mm7, %mm5 /* duplicate tmt0 */
748 movq 8*8(%esi), %mm1 /* tmt8 */
749 paddsw %mm1, %mm7 /* V103 */
750 @@ -551,10 +557,10 @@ _dv_idct_block_mmx:
751 movq 8*2(%esi), %mm3 /* V123 */
752 paddsw %mm4, %mm7 /* out0 */
753 /* moved up from next block */
754 - movq scratch3, %mm0
755 + movq MUNG(scratch3), %mm0
756 psraw $4, %mm7
757 /* moved up from next block */
758 - movq scratch5, %mm6
759 + movq MUNG(scratch5), %mm6
760 psubsw %mm4, %mm1 /* out14 ; free mm4 */
761 paddsw %mm3, %mm5 /* out2 */
762 psraw $4, %mm1
763 @@ -565,7 +571,7 @@ _dv_idct_block_mmx:
764 movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
765 psraw $4, %mm2
766 /* moved up to the prev block */
767 - movq scratch7, %mm4
768 + movq MUNG(scratch7), %mm4
769 /* moved up to the prev block */
770 psraw $4, %mm0
771 movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
772 @@ -579,7 +585,7 @@ _dv_idct_block_mmx:
773 * psraw $4, %mm0
774 * psraw $4, %mm6
775 */
776 - movq scratch1, %mm1
777 + movq MUNG(scratch1), %mm1
778 psraw $4, %mm4
779 movq %mm0, 8*3(%esi) /* out3 */
780 psraw $4, %mm1
781 diff -Nurp libdv-0.104-old/libdv/idct_block_mmx_x86_64.S libdv-0.104/libdv/idct_block_mmx_x86_64.S
782 --- libdv-0.104-old/libdv/idct_block_mmx_x86_64.S 2006-01-01 22:44:22.000000000 +0100
783 +++ libdv-0.104/libdv/idct_block_mmx_x86_64.S 2006-01-01 22:44:43.000000000 +0100
784 @@ -17,6 +17,7 @@
785 .text
786 .align 4
787 .globl _dv_idct_block_mmx_x86_64
788 +.hidden _dv_idct_block_mmx_x86_64
789 .type _dv_idct_block_mmx_x86_64,@function
790 _dv_idct_block_mmx_x86_64:
791 /* void _dv_idct_88(dv_coeff_t *block) */
792 diff -Nurp libdv-0.104-old/libdv/parse.c libdv-0.104/libdv/parse.c
793 --- libdv-0.104-old/libdv/parse.c 2004-10-20 05:49:24.000000000 +0200
794 +++ libdv-0.104/libdv/parse.c 2006-01-01 22:44:43.000000000 +0100
795 @@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
796 exit(0);
797 #endif
798 } /* dv_parse_ac_coeffs */
799 +#if defined __GNUC__ && __ELF__
800 +# define dv_strong_hidden_alias(name, aliasname) \
801 + extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
802 +dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
803 +#else
804 +int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
805 +#endif
806
807 /* ---------------------------------------------------------------------------
808 */
809 diff -Nurp libdv-0.104-old/libdv/quant.c libdv-0.104/libdv/quant.c
810 --- libdv-0.104-old/libdv/quant.c 2004-10-20 05:49:24.000000000 +0200
811 +++ libdv-0.104/libdv/quant.c 2006-01-01 22:44:43.000000000 +0100
812 @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1
813 uint32_t dv_quant_248_mul_tab [2] [22] [64];
814 uint32_t dv_quant_88_mul_tab [2] [22] [64];
815
816 -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
817 +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t dv_quant_offset[],uint8_t dv_quant_shifts[][]);
818 extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
819 static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
820 static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
821 @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
822 _dv_quant_x86_64(block, qno, klass);
823 emms();
824 #else
825 - _dv_quant_x86(block, qno, klass);
826 + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
827 emms();
828 #endif
829 }
830 diff -Nurp libdv-0.104-old/libdv/quant.h libdv-0.104/libdv/quant.h
831 --- libdv-0.104-old/libdv/quant.h 2004-10-20 05:49:24.000000000 +0200
832 +++ libdv-0.104/libdv/quant.h 2006-01-01 22:44:43.000000000 +0100
833 @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
834 extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
835 extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
836 dv_248_coeff_t *co);
837 -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
838 +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t offset[], uint8_t shifts[][]);
839 extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
840 extern void dv_quant_init (void);
841 #ifdef __cplusplus
842 diff -Nurp libdv-0.104-old/libdv/quant_x86.S libdv-0.104/libdv/quant_x86.S
843 --- libdv-0.104-old/libdv/quant_x86.S 2006-01-01 22:44:22.000000000 +0100
844 +++ libdv-0.104/libdv/quant_x86.S 2006-01-01 22:44:43.000000000 +0100
845 @@ -55,6 +55,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl
846 .text
847 .align 4
848 .globl _dv_quant_88_inverse_x86
849 +.hidden _dv_quant_88_inverse_x86
850 +.type _dv_quant_88_inverse_x86,@function
851 _dv_quant_88_inverse_x86:
852 pushl %ebx
853 pushl %esi
854 @@ -71,10 +73,13 @@ _dv_quant_88_inverse_x86:
855
856 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
857 movl ARGn(1),%eax /* qno */
858 + movl ARGn(3),%ebx /* dv_quant_offset */
859 + addl ARGn(2),%ebx /* class */
860 + movzbl (%ebx),%ecx
861 movl ARGn(2),%ebx /* class */
862 - movzbl dv_quant_offset(%ebx),%ecx
863 addl %ecx,%eax
864 - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
865 + movl ARGn(4),%edx /* dv_quant_shifts */
866 + leal (%edx,%eax,4),%edx /* edx is pq */
867
868 /* extra = (class == 3); */
869 /* 0 1 2 3 */
870 @@ -193,6 +198,8 @@ _dv_quant_88_inverse_x86:
871
872 .align 4
873 .globl _dv_quant_x86
874 +.hidden _dv_quant_x86
875 +.type _dv_quant_x86,@function
876 _dv_quant_x86:
877 pushl %ebx
878 pushl %ecx
879 @@ -212,11 +219,13 @@ _dv_quant_x86:
880
881 /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
882 movl ARGn(1),%eax /* qno */
883 + movl ARGn(3),%ebx /* offset */
884 + addl ARGn(2),%ebx /* class */
885 + movzbl (%ebx),%ecx
886 movl ARGn(2),%ebx /* class */
887 -
888 - movzbl dv_quant_offset(%ebx),%ecx
889 + movl ARGn(4),%edx /* shifts */
890 addl %ecx,%eax
891 - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
892 + leal (%edx,%eax,4),%edx /* edx is pq */
893
894 /* extra = (class == 3); */
895 /* 0 1 2 3 */
896 diff -Nurp libdv-0.104-old/libdv/quant_x86_64.S libdv-0.104/libdv/quant_x86_64.S
897 --- libdv-0.104-old/libdv/quant_x86_64.S 2006-01-01 22:44:22.000000000 +0100
898 +++ libdv-0.104/libdv/quant_x86_64.S 2006-01-01 22:44:43.000000000 +0100
899 @@ -55,6 +55,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl
900 .text
901 .align 4
902 .globl _dv_quant_88_inverse_x86_64
903 +.hidden _dv_quant_88_inverse_x86_64
904 +.type _dv_quant_88_inverse_x86_64,@function
905 _dv_quant_88_inverse_x86_64:
906
907 /* Args are at block=rdi, qno=rsi, class=rdx */
908 @@ -195,6 +197,8 @@ _dv_quant_88_inverse_x86_64:
909
910 .align 4
911 .globl _dv_quant_x86_64
912 +.hidden _dv_quant_x86_64
913 +.type _dv_quant_x86_64,@function
914 _dv_quant_x86_64:
915
916 /* Args are at block=rdi, qno=rsi, class=rdx */
917 diff -Nurp libdv-0.104-old/libdv/rgbtoyuv.S libdv-0.104/libdv/rgbtoyuv.S
918 --- libdv-0.104-old/libdv/rgbtoyuv.S 2006-01-01 22:44:22.000000000 +0100
919 +++ libdv-0.104/libdv/rgbtoyuv.S 2006-01-01 22:44:43.000000000 +0100
920 @@ -41,9 +41,6 @@
921 #define DV_WIDTH_SHORT_HALF 720
922 #define DV_WIDTH_BYTE_HALF 360
923
924 -.global _dv_rgbtoycb_mmx
925 -# .global yuvtoycb_mmx
926 -
927 .data
928
929 .align 8
930 @@ -110,20 +107,24 @@ VR0GR: .long 0,0
931 VBG0B: .long 0,0
932
933 #endif
934 -
935 +
936 +#include "asm_common.S"
937 +
938 .text
939
940 -#define _inPtr 8
941 -#define _rows 12
942 -#define _columns 16
943 -#define _outyPtr 20
944 -#define _outuPtr 24
945 -#define _outvPtr 28
946 +#define _inPtr 24+8
947 +#define _rows 24+12
948 +#define _columns 24+16
949 +#define _outyPtr 24+20
950 +#define _outuPtr 24+24
951 +#define _outvPtr 24+28
952
953 +.global _dv_rgbtoycb_mmx
954 +.hidden _dv_rgbtoycb_mmx
955 +.type _dv_rgbtoycb_mmx,@function
956 _dv_rgbtoycb_mmx:
957
958 pushl %ebp
959 - movl %esp, %ebp
960 pushl %eax
961 pushl %ebx
962 pushl %ecx
963 @@ -131,46 +132,47 @@ _dv_rgbtoycb_mmx:
964 pushl %esi
965 pushl %edi
966
967 - leal ZEROSX, %eax #This section gets around a bug
968 + LOAD_PIC_REG_BP()
969 +
970 + leal MUNG(ZEROSX), %eax #This section gets around a bug
971 movq (%eax), %mm0 #unlikely to persist
972 - movq %mm0, ZEROS
973 - leal OFFSETDX, %eax
974 + movq %mm0, MUNG(ZEROS)
975 + leal MUNG(OFFSETDX), %eax
976 movq (%eax), %mm0
977 - movq %mm0, OFFSETD
978 - leal OFFSETWX, %eax
979 + movq %mm0, MUNG(OFFSETD)
980 + leal MUNG(OFFSETWX), %eax
981 movq (%eax), %mm0
982 - movq %mm0, OFFSETW
983 - leal OFFSETBX, %eax
984 + movq %mm0, MUNG(OFFSETW)
985 + leal MUNG(OFFSETBX), %eax
986 movq (%eax), %mm0
987 - movq %mm0, OFFSETB
988 - leal YR0GRX, %eax
989 + movq %mm0, MUNG(OFFSETB)
990 + leal MUNG(YR0GRX), %eax
991 movq (%eax), %mm0
992 - movq %mm0, YR0GR
993 - leal YBG0BX, %eax
994 + movq %mm0, MUNG(YR0GR)
995 + leal MUNG(YBG0BX), %eax
996 movq (%eax), %mm0
997 - movq %mm0, YBG0B
998 - leal UR0GRX, %eax
999 + movq %mm0, MUNG(YBG0B)
1000 + leal MUNG(UR0GRX), %eax
1001 movq (%eax), %mm0
1002 - movq %mm0, UR0GR
1003 - leal UBG0BX, %eax
1004 + movq %mm0, MUNG(UR0GR)
1005 + leal MUNG(UBG0BX), %eax
1006 movq (%eax), %mm0
1007 - movq %mm0, UBG0B
1008 - leal VR0GRX, %eax
1009 + movq %mm0, MUNG(UBG0B)
1010 + leal MUNG(VR0GRX), %eax
1011 movq (%eax), %mm0
1012 - movq %mm0, VR0GR
1013 - leal VBG0BX, %eax
1014 + movq %mm0, MUNG(VR0GR)
1015 + leal MUNG(VBG0BX), %eax
1016 movq (%eax), %mm0
1017 - movq %mm0, VBG0B
1018 -
1019 - movl _rows(%ebp), %eax
1020 - movl _columns(%ebp), %ebx
1021 + movq %mm0, MUNG(VBG0B)
1022 + movl _rows(%esp), %eax
1023 + movl _columns(%esp), %ebx
1024 mull %ebx #number pixels
1025 shrl $3, %eax #number of loops
1026 movl %eax, %edi #loop counter in edi
1027 - movl _inPtr(%ebp), %eax
1028 - movl _outyPtr(%ebp), %ebx
1029 - movl _outuPtr(%ebp), %ecx
1030 - movl _outvPtr(%ebp), %edx
1031 + movl _inPtr(%esp), %eax
1032 + movl _outyPtr(%esp), %ebx
1033 + movl _outuPtr(%esp), %ecx
1034 + movl _outvPtr(%esp), %edx
1035 rgbtoycb_mmx_loop:
1036 movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0
1037 pxor %mm6, %mm6 #0 -> mm6
1038 @@ -184,29 +186,29 @@ rgbtoycb_mmx_loop:
1039 punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1
1040 movq %mm0, %mm2 #R1B0G0R0 -> mm2
1041
1042 - pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0
1043 + pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0
1044 movq %mm1, %mm3 #B1G1R1B0 -> mm3
1045
1046 - pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1
1047 + pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1
1048 movq %mm2, %mm4 #R1B0G0R0 -> mm4
1049
1050 - pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2
1051 + pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2
1052 movq %mm3, %mm5 #B1G1R1B0 -> mm5
1053
1054 - pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3
1055 + pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3
1056 punpckhbw %mm6, %mm7 # 00G2R2 -> mm7
1057
1058 - pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4
1059 + pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4
1060 paddd %mm1, %mm0 #Y1Y0 -> mm0
1061
1062 - pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5
1063 + pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5
1064
1065 movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1
1066 paddd %mm3, %mm2 #U1U0 -> mm2
1067
1068 movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6
1069
1070 - punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1
1071 + punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1
1072 paddd %mm5, %mm4 #V1V0 -> mm4
1073
1074 movq %mm1, %mm5 #B3G3R3B2 -> mm5
1075 @@ -214,29 +216,29 @@ rgbtoycb_mmx_loop:
1076
1077 paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1
1078
1079 - punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6
1080 + punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6
1081 movq %mm1, %mm3 #R3B2G2R2 -> mm3
1082
1083 - pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1
1084 + pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1
1085 movq %mm5, %mm7 #B3G3R3B2 -> mm7
1086
1087 - pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5
1088 + pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5
1089 psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0
1090
1091 - movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0
1092 + movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0
1093 movq %mm3, %mm6 #R3B2G2R2 -> mm6
1094 - pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6
1095 + pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6
1096 psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2
1097
1098 paddd %mm5, %mm1 #Y3Y2 -> mm1
1099 movq %mm7, %mm5 #B3G3R3B2 -> mm5
1100 - pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2
1101 + pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2
1102 psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
1103
1104 - pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2
1105 + pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2
1106 packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0
1107
1108 - pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5
1109 + pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5
1110 psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4
1111
1112 movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7
1113 @@ -251,58 +253,58 @@ rgbtoycb_mmx_loop:
1114 movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5
1115 psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3
1116
1117 - paddw OFFSETY, %mm0
1118 + paddw MUNG(OFFSETY), %mm0
1119 movq %mm0, (%ebx) #store Y3Y2Y1Y0
1120 packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2
1121
1122 - movq TEMP0, %mm0 #R5B4G4R4 -> mm0
1123 + movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0
1124 addl $8, %ebx
1125 -
1126 - punpcklbw ZEROS, %mm7 #B5G500 -> mm7
1127 +
1128 + punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7
1129 movq %mm0, %mm6 #R5B4G4R4 -> mm6
1130
1131 - movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU
1132 + movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU
1133 psrlq $32, %mm0 #00R5B4 -> mm0
1134
1135 paddw %mm0, %mm7 #B5G5R5B4 -> mm7
1136 movq %mm6, %mm2 #B5B4G4R4 -> mm2
1137
1138 - pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2
1139 + pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2
1140 movq %mm7, %mm0 #B5G5R5B4 -> mm0
1141
1142 - pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7
1143 + pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7
1144 packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4
1145
1146 addl $24, %eax #increment RGB count
1147
1148 - movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4
1149 + movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4
1150 movq %mm6, %mm4 #B5B4G4R4 -> mm4
1151
1152 - pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4
1153 + pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4
1154 movq %mm0, %mm3 #B5G5R5B4 -> mm0
1155
1156 - pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4
1157 + pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4
1158 paddd %mm7, %mm2 #Y5Y4 -> mm2
1159
1160 - pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4
1161 + pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4
1162 pxor %mm7, %mm7 #0 -> mm7
1163
1164 - pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3
1165 + pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3
1166 punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1
1167
1168 paddd %mm6, %mm0 #U5U4 -> mm0
1169 movq %mm1, %mm6 #B7G7R7B6 -> mm6
1170
1171 - pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6
1172 + pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6
1173 punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5
1174
1175 movq %mm5, %mm7 #R7B6G6R6 -> mm7
1176 paddd %mm4, %mm3 #V5V4 -> mm3
1177
1178 - pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5
1179 + pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5
1180 movq %mm1, %mm4 #B7G7R7B6 -> mm4
1181
1182 - pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4
1183 + pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4
1184 psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0
1185
1186 psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2
1187 @@ -310,25 +312,25 @@ rgbtoycb_mmx_loop:
1188 paddd %mm5, %mm6 #Y7Y6 -> mm6
1189 movq %mm7, %mm5 #R7B6G6R6 -> mm5
1190
1191 - pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7
1192 + pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7
1193 psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3
1194
1195 - pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1
1196 + pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1
1197 psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
1198
1199 packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2
1200
1201 - pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5
1202 + pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5
1203 paddd %mm4, %mm7 #U7U6 -> mm7
1204
1205 psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7
1206 - paddw OFFSETY, %mm2
1207 + paddw MUNG(OFFSETY), %mm2
1208 movq %mm2, (%ebx) #store Y7Y6Y5Y4
1209
1210 - movq ALLONE, %mm6
1211 + movq MUNG(ALLONE), %mm6
1212 packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0
1213
1214 - movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4
1215 + movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4
1216 pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
1217
1218 pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
1219 @@ -338,8 +340,8 @@ rgbtoycb_mmx_loop:
1220
1221 psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1
1222 psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4
1223 -
1224 - movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5
1225 +
1226 + movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5
1227
1228 movq %mm4, (%ecx) # store U
1229
1230 @@ -372,6 +374,8 @@ rgbtoycb_mmx_loop:
1231 ret
1232
1233 .global _dv_ppm_copy_y_block_mmx
1234 +.hidden _dv_ppm_copy_y_block_mmx
1235 +.type _dv_ppm_copy_y_block_mmx,@function
1236 _dv_ppm_copy_y_block_mmx:
1237
1238 pushl %ebp
1239 @@ -422,17 +426,20 @@ _dv_ppm_copy_y_block_mmx:
1240 ret
1241
1242 .global _dv_pgm_copy_y_block_mmx
1243 +.hidden _dv_pgm_copy_y_block_mmx
1244 +.type _dv_ppm_copy_y_block_mmx,@function
1245 _dv_pgm_copy_y_block_mmx:
1246
1247 pushl %ebp
1248 - movl %esp, %ebp
1249 pushl %esi
1250 pushl %edi
1251 -
1252 - movl 8(%ebp), %edi # dest
1253 - movl 12(%ebp), %esi # src
1254
1255 - movq OFFSETY, %mm7
1256 + LOAD_PIC_REG_BP()
1257 +
1258 + movl 16(%esp), %edi # dest
1259 + movl 20(%esp), %esi # src
1260 +
1261 + movq MUNG(OFFSETY), %mm7
1262 pxor %mm6, %mm6
1263
1264 movq (%esi), %mm0
1265 @@ -564,17 +571,20 @@ _dv_pgm_copy_y_block_mmx:
1266 ret
1267
1268 .global _dv_video_copy_y_block_mmx
1269 +.hidden _dv_video_copy_y_block_mmx
1270 +.type _dv_video_copy_y_block_mmx,@function
1271 _dv_video_copy_y_block_mmx:
1272
1273 pushl %ebp
1274 - movl %esp, %ebp
1275 pushl %esi
1276 pushl %edi
1277 -
1278 - movl 8(%ebp), %edi # dest
1279 - movl 12(%ebp), %esi # src
1280
1281 - movq OFFSETBX, %mm7
1282 + LOAD_PIC_REG_BP()
1283 +
1284 + movl 16(%esp), %edi # dest
1285 + movl 20(%esp), %esi # src
1286 +
1287 + movq MUNG(OFFSETBX), %mm7
1288 pxor %mm6, %mm6
1289
1290 movq (%esi), %mm0
1291 @@ -709,6 +719,8 @@ _dv_video_copy_y_block_mmx:
1292
1293
1294 .global _dv_ppm_copy_pal_c_block_mmx
1295 +.hidden _dv_ppm_copy_pal_c_block_mmx
1296 +.type _dv_ppm_copy_pal_c_block_mmx,@function
1297 _dv_ppm_copy_pal_c_block_mmx:
1298
1299 pushl %ebp
1300 @@ -852,19 +864,21 @@ _dv_ppm_copy_pal_c_block_mmx:
1301 ret
1302
1303 .global _dv_pgm_copy_pal_c_block_mmx
1304 +.hidden _dv_ppm_copy_pal_c_block_mmx
1305 +.type _dv_pgm_copy_pal_c_block_mmx,@function
1306 _dv_pgm_copy_pal_c_block_mmx:
1307
1308 pushl %ebp
1309 - movl %esp, %ebp
1310 pushl %esi
1311 pushl %edi
1312 pushl %ebx
1313 -
1314 - movl 8(%ebp), %edi # dest
1315 - movl 12(%ebp), %esi # src
1316
1317 + LOAD_PIC_REG_BP()
1318 +
1319 + movl 20(%esp), %edi # dest
1320 + movl 24(%esp), %esi # src
1321
1322 - movq OFFSETBX, %mm7
1323 + movq MUNG(OFFSETBX), %mm7
1324 pxor %mm6, %mm6
1325
1326
1327 @@ -1000,18 +1014,21 @@ _dv_pgm_copy_pal_c_block_mmx:
1328 ret
1329
1330 .global _dv_video_copy_pal_c_block_mmx
1331 +.hidden _dv_video_copy_pal_c_block_mmx
1332 +.type _dv_video_copy_pal_c_block_mmx,@function
1333 _dv_video_copy_pal_c_block_mmx:
1334
1335 pushl %ebp
1336 - movl %esp, %ebp
1337 pushl %esi
1338 pushl %edi
1339 pushl %ebx
1340 -
1341 - movl 8(%ebp), %edi # dest
1342 - movl 12(%ebp), %esi # src
1343
1344 - movq OFFSETBX, %mm7
1345 + LOAD_PIC_REG_BP()
1346 +
1347 + movl 20(%esp), %edi # dest
1348 + movl 24(%esp), %esi # src
1349 +
1350 + movq MUNG(OFFSETBX), %mm7
1351 paddw %mm7, %mm7
1352 pxor %mm6, %mm6
1353
1354 @@ -1095,21 +1112,23 @@ video_copy_pal_c_block_mmx_loop:
1355 ret
1356
1357 .global _dv_ppm_copy_ntsc_c_block_mmx
1358 +.hidden _dv_ppm_copy_ntsc_c_block_mmx
1359 +.type _dv_ppm_copy_ntsc_c_block_mmx,@function
1360 _dv_ppm_copy_ntsc_c_block_mmx:
1361
1362 pushl %ebp
1363 - movl %esp, %ebp
1364 pushl %esi
1365 pushl %edi
1366 pushl %ebx
1367 -
1368 - movl 8(%ebp), %edi # dest
1369 - movl 12(%ebp), %esi # src
1370 +
1371 + LOAD_PIC_REG_BP()
1372 +
1373 + movl 20(%esp), %edi # dest
1374 + movl 24(%esp), %esi # src
1375
1376 movl $4, %ebx
1377
1378 - movq ALLONE, %mm6
1379 -
1380 + movq MUNG(ALLONE), %mm6
1381 ppm_copy_ntsc_c_block_mmx_loop:
1382
1383 movq (%esi), %mm0
1384 @@ -1168,17 +1187,20 @@ ppm_copy_ntsc_c_block_mmx_loop:
1385 ret
1386
1387 .global _dv_pgm_copy_ntsc_c_block_mmx
1388 +.hidden _dv_pgm_copy_ntsc_c_block_mmx
1389 +.type _dv_pgm_copy_ntsc_c_block_mmx,@function
1390 _dv_pgm_copy_ntsc_c_block_mmx:
1391
1392 pushl %ebp
1393 - movl %esp, %ebp
1394 pushl %esi
1395 pushl %edi
1396 -
1397 - movl 8(%ebp), %edi # dest
1398 - movl 12(%ebp), %esi # src
1399
1400 - movq OFFSETBX, %mm7
1401 + LOAD_PIC_REG_BP()
1402 +
1403 + movl 16(%esp), %edi # dest
1404 + movl 20(%esp), %esi # src
1405 +
1406 + movq MUNG(OFFSETBX), %mm7
1407 paddw %mm7, %mm7
1408 pxor %mm6, %mm6
1409
1410 @@ -1325,18 +1347,21 @@ _dv_pgm_copy_ntsc_c_block_mmx:
1411 ret
1412
1413 .global _dv_video_copy_ntsc_c_block_mmx
1414 +.hidden _dv_video_copy_ntsc_c_block_mmx
1415 +.type _dv_video_copy_ntsc_c_block_mmx,@function
1416 _dv_video_copy_ntsc_c_block_mmx:
1417
1418 pushl %ebp
1419 - movl %esp, %ebp
1420 pushl %esi
1421 pushl %edi
1422 pushl %ebx
1423 -
1424 - movl 8(%ebp), %edi # dest
1425 - movl 12(%ebp), %esi # src
1426
1427 - movq OFFSETBX, %mm7
1428 + LOAD_PIC_REG_BP()
1429 +
1430 + movl 20(%esp), %edi # dest
1431 + movl 24(%esp), %esi # src
1432 +
1433 + movq MUNG(OFFSETBX), %mm7
1434 paddw %mm7, %mm7
1435 pxor %mm6, %mm6
1436
1437 diff -Nurp libdv-0.104-old/libdv/rgbtoyuv_x86_64.S libdv-0.104/libdv/rgbtoyuv_x86_64.S
1438 --- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S 2006-01-01 22:44:22.000000000 +0100
1439 +++ libdv-0.104/libdv/rgbtoyuv_x86_64.S 2006-01-01 22:44:43.000000000 +0100
1440 @@ -41,9 +41,6 @@
1441 #define DV_WIDTH_SHORT_HALF 720
1442 #define DV_WIDTH_BYTE_HALF 360
1443
1444 -.global _dv_rgbtoycb_mmx_x86_64
1445 -# .global yuvtoycb_mmx_x86_64
1446 -
1447 .data
1448
1449 .align 8
1450 diff -Nurp libdv-0.104-old/libdv/transpose_x86.S libdv-0.104/libdv/transpose_x86.S
1451 --- libdv-0.104-old/libdv/transpose_x86.S 2006-01-01 22:44:22.000000000 +0100
1452 +++ libdv-0.104/libdv/transpose_x86.S 2006-01-01 22:44:43.000000000 +0100
1453 @@ -1,5 +1,7 @@
1454 .text
1455 .global _dv_transpose_mmx
1456 +.hidden _dv_transpose_mmx
1457 +.type _dv_transpose_mmx,@function
1458
1459 _dv_transpose_mmx:
1460 pushl %ebp
1461 diff -Nurp libdv-0.104-old/libdv/transpose_x86_64.S libdv-0.104/libdv/transpose_x86_64.S
1462 --- libdv-0.104-old/libdv/transpose_x86_64.S 2006-01-01 22:44:22.000000000 +0100
1463 +++ libdv-0.104/libdv/transpose_x86_64.S 2006-01-01 22:44:43.000000000 +0100
1464 @@ -1,5 +1,7 @@
1465 .text
1466 .global _dv_transpose_mmx_x86_64
1467 +.hidden _dv_transpose_mmx_x86_64
1468 +.type _dv_transpose_mmx_x86_64,@function
1469
1470 _dv_transpose_mmx_x86_64:
1471
1472 diff -Nurp libdv-0.104-old/libdv/vlc_x86.S libdv-0.104/libdv/vlc_x86.S
1473 --- libdv-0.104-old/libdv/vlc_x86.S 2006-01-01 22:44:22.000000000 +0100
1474 +++ libdv-0.104/libdv/vlc_x86.S 2006-01-01 22:45:51.000000000 +0100
1475 @@ -1,29 +1,38 @@
1476 #include "asmoff.h"
1477 + #include "asm_common.S"
1478 +
1479 .text
1480 .align 4
1481 .globl dv_decode_vlc
1482 +.globl asm_dv_decode_vlc
1483 +.hidden asm_dv_decode_vlc
1484 +asm_dv_decode_vlc = dv_decode_vlc
1485 +
1486 .type dv_decode_vlc,@function
1487 dv_decode_vlc:
1488 pushl %ebx
1489 + pushl %ebp
1490
1491 - /* Args are at 8(%esp). */
1492 - movl 8(%esp),%eax /* %eax is bits */
1493 - movl 12(%esp),%ebx /* %ebx is maxbits */
1494 + LOAD_PIC_REG_BP()
1495 +
1496 + /* Args are at 12(%esp). */
1497 + movl 12(%esp),%eax /* %eax is bits */
1498 + movl 16(%esp),%ebx /* %ebx is maxbits */
1499 andl $0x3f,%ebx /* limit index range STL*/
1500
1501 - movl dv_vlc_class_index_mask(,%ebx,4),%edx
1502 + movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
1503 andl %eax,%edx
1504 - movl dv_vlc_class_index_rshift(,%ebx,4),%ecx
1505 + movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
1506 sarl %cl,%edx
1507 - movl dv_vlc_classes(,%ebx,4),%ecx
1508 + movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
1509 movsbl (%ecx,%edx,1),%edx /* %edx is class */
1510
1511 - movl dv_vlc_index_mask(,%edx,4),%ebx
1512 - movl dv_vlc_index_rshift(,%edx,4),%ecx
1513 + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
1514 + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
1515 andl %eax,%ebx
1516 sarl %cl,%ebx
1517
1518 - movl dv_vlc_lookups(,%edx,4),%edx
1519 + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
1520 movl (%edx,%ebx,4),%edx
1521
1522 /* Now %edx holds result, like this:
1523 @@ -42,7 +51,7 @@ dv_decode_vlc:
1524 movl %edx,%ecx
1525 sarl $8,%ecx
1526 andl $0xff,%ecx
1527 - movl sign_mask(,%ecx,4),%ebx
1528 + movl MUNG_ARR(sign_mask,%ecx,4),%ebx
1529 andl %ebx,%eax
1530 negl %eax
1531 sarl $31,%eax
1532 @@ -63,14 +72,14 @@ dv_decode_vlc:
1533 *result = broken;
1534 Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
1535 */
1536 - movl 12(%esp),%ebx /* %ebx is maxbits */
1537 + movl 20(%esp),%ebx /* %ebx is maxbits */
1538 subl %ecx,%ebx
1539 sbbl %ebx,%ebx
1540 orl %ebx,%edx
1541
1542 - movl 16(%esp),%eax
1543 + movl 24(%esp),%eax
1544 movl %edx,(%eax)
1545 -
1546 + popl %ebp
1547 popl %ebx
1548 ret
1549
1550 @@ -80,21 +89,28 @@ dv_decode_vlc:
1551 .type __dv_decode_vlc,@function
1552 __dv_decode_vlc:
1553 pushl %ebx
1554 + pushl %ebp
1555 +
1556 + LOAD_PIC_REG_BP()
1557
1558 - /* Args are at 8(%esp). */
1559 - movl 8(%esp),%eax /* %eax is bits */
1560 + /* Args are at 12(%esp). */
1561 + movl 12(%esp),%eax /* %eax is bits */
1562
1563 movl %eax,%edx /* %edx is class */
1564 andl $0xfe00,%edx
1565 sarl $9,%edx
1566 +#ifdef __PIC__
1567 + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
1568 +#else
1569 movsbl dv_vlc_class_lookup5(%edx),%edx
1570 -
1571 - movl dv_vlc_index_mask(,%edx,4),%ebx
1572 - movl dv_vlc_index_rshift(,%edx,4),%ecx
1573 +#endif
1574 +
1575 + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
1576 + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
1577 andl %eax,%ebx
1578 sarl %cl,%ebx
1579
1580 - movl dv_vlc_lookups(,%edx,4),%edx
1581 + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
1582 movl (%edx,%ebx,4),%edx
1583
1584 /* Now %edx holds result, like this:
1585 @@ -112,7 +128,7 @@ __dv_decode_vlc:
1586 movl %edx,%ecx
1587 sarl $8,%ecx
1588 andl $0xff,%ecx
1589 - movl sign_mask(,%ecx,4),%ecx
1590 + movl MUNG_ARR(sign_mask,%ecx,4),%ecx
1591 andl %ecx,%eax
1592 negl %eax
1593 sarl $31,%eax
1594 @@ -127,9 +143,9 @@ __dv_decode_vlc:
1595 xorl %eax,%edx
1596 subl %eax,%edx
1597
1598 - movl 12(%esp),%eax
1599 + movl 16(%esp),%eax
1600 movl %edx,(%eax)
1601 -
1602 + popl %ebp
1603 popl %ebx
1604 ret
1605
1606 @@ -140,13 +156,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
1607 */
1608 .text
1609 .align 4
1610 +.globl asm_dv_parse_ac_coeffs_pass0
1611 +.hidden asm_dv_parse_ac_coeffs_pass0
1612 + asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
1613 +
1614 .globl dv_parse_ac_coeffs_pass0
1615 +.type dv_parse_ac_coeffs_pass0,@function
1616 dv_parse_ac_coeffs_pass0:
1617 pushl %ebx
1618 pushl %edi
1619 pushl %esi
1620 pushl %ebp
1621
1622 + LOAD_PIC_REG_SI()
1623 +
1624 #define ARGn(N) (20+(4*(N)))(%esp)
1625
1626 /*
1627 @@ -159,8 +182,10 @@ dv_parse_ac_coeffs_pass0:
1628 ebp bl
1629 */
1630 movl ARGn(2),%ebp
1631 +#ifndef __PIC__
1632 movl ARGn(0),%esi
1633 movl bitstream_t_buf(%esi),%esi
1634 +#endif
1635 movl dv_block_t_offset(%ebp),%edi
1636 movl dv_block_t_reorder(%ebp),%ebx
1637
1638 @@ -170,7 +195,11 @@ dv_parse_ac_coeffs_pass0:
1639
1640 movq dv_block_t_coeffs(%ebp),%mm1
1641 pxor %mm0,%mm0
1642 +#ifdef __PIC__
1643 + pand const_f_0_0_0@GOTOFF(%esi),%mm1
1644 +#else
1645 pand const_f_0_0_0,%mm1
1646 +#endif
1647 movq %mm1,dv_block_t_coeffs(%ebp)
1648 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1649 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1650 @@ -191,9 +220,17 @@ dv_parse_ac_coeffs_pass0:
1651 readloop:
1652 movl %edi,%ecx
1653 shrl $3,%ecx
1654 +#ifdef __PIC__
1655 + pushl %esi
1656 + movl ARGn(1),%esi
1657 + movl bitstream_t_buf(%esi),%esi
1658 +#endif
1659 movzbl (%esi,%ecx,1),%eax
1660 movzbl 1(%esi,%ecx,1),%edx
1661 movzbl 2(%esi,%ecx,1),%ecx
1662 +#ifdef __PIC__
1663 + popl %esi
1664 +#endif
1665 shll $16,%eax
1666 shll $8,%edx
1667 orl %ecx,%eax
1668 @@ -217,7 +254,11 @@ readloop:
1669
1670 /* Attempt to use the shortcut first. If it hits, then
1671 this vlc term has been decoded. */
1672 +#ifdef __PIC__
1673 + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
1674 +#else
1675 movl dv_vlc_class1_shortcut(,%ecx,4),%edx
1676 +#endif
1677 test $0x80,%edx
1678 je done_decode
1679
1680 @@ -228,12 +269,19 @@ readloop:
1681 movl %ebx,dv_block_t_reorder(%ebp)
1682
1683 /* %eax is bits */
1684 -
1685 +#ifdef __PIC__
1686 + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
1687 +
1688 + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
1689 + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
1690 + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
1691 +#else
1692 movsbl dv_vlc_class_lookup5(%ecx),%ecx
1693
1694 movl dv_vlc_index_mask(,%ecx,4),%ebx
1695 movl dv_vlc_lookups(,%ecx,4),%edx
1696 movl dv_vlc_index_rshift(,%ecx,4),%ecx
1697 +#endif
1698 andl %eax,%ebx
1699 sarl %cl,%ebx
1700
1701 @@ -256,7 +304,11 @@ readloop:
1702 movl %edx,%ecx
1703 sarl $8,%ecx
1704 andl $0xff,%ecx
1705 +#ifdef __PIC__
1706 + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx
1707 +#else
1708 movl sign_mask(,%ecx,4),%ecx
1709 +#endif
1710 andl %ecx,%eax
1711 negl %eax
1712 sarl $31,%eax
1713 @@ -326,10 +378,16 @@ alldone:
1714
1715 slowpath:
1716 /* slow path: use dv_decode_vlc */;
1717 +#ifdef __PIC__
1718 + pushl %esi
1719 + leal vlc@GOTOFF(%esi),%esi
1720 + xchgl %esi,(%esp) /* last parameter is &vlc */
1721 +#else
1722 pushl $vlc /* last parameter is &vlc */
1723 +#endif
1724 pushl %edx /* bits_left */
1725 pushl %eax /* bits */
1726 - call dv_decode_vlc
1727 + call asm_dv_decode_vlc
1728 addl $12,%esp
1729 test $0x80,%edx /* If (vlc.run < 0) break */
1730 jne escape
1731 @@ -359,12 +417,15 @@ show16:
1732 gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
1733 */
1734 .globl dv_parse_video_segment
1735 + .type dv_parse_video_segment,@function
1736 dv_parse_video_segment:
1737 pushl %ebx
1738 pushl %edi
1739 pushl %esi
1740 pushl %ebp
1741
1742 + LOAD_PIC_REG_SI()
1743 +
1744 #define ARGn(N) (20+(4*(N)))(%esp)
1745
1746 movl ARGn(1),%eax /* quality */
1747 @@ -373,7 +434,11 @@ dv_parse_video_segment:
1748 jz its_mono
1749 movl $6,%ebx
1750 its_mono:
1751 +#ifdef __PIC__
1752 + movl %ebx,n_blocks@GOTOFF(%esi)
1753 +#else
1754 movl %ebx,n_blocks
1755 +#endif
1756
1757 /*
1758 * ebx seg/b
1759 @@ -384,15 +449,22 @@ its_mono:
1760 * ebp bl
1761 */
1762 movl ARGn(0),%ebx
1763 +#ifndef __PIC__
1764 movl dv_videosegment_t_bs(%ebx),%esi
1765 movl bitstream_t_buf(%esi),%esi
1766 +#endif
1767 leal dv_videosegment_t_mb(%ebx),%edi
1768
1769 movl $0,%eax
1770 movl $0,%ecx
1771 macloop:
1772 +#ifdef __PIC__
1773 + movl %eax,m@GOTOFF(%esi)
1774 + movl %ecx,mb_start@GOTOFF(%esi)
1775 +#else
1776 movl %eax,m
1777 movl %ecx,mb_start
1778 +#endif
1779
1780 movl ARGn(0),%ebx
1781
1782 @@ -400,7 +472,15 @@ macloop:
1783 /* mb->qno = bitstream_get(bs,4); */
1784 movl %ecx,%edx
1785 shr $3,%edx
1786 +#ifdef __PIC__
1787 + pushl %esi
1788 + movl dv_videosegment_t_bs(%ebx),%esi
1789 + movl bitstream_t_buf(%esi),%esi
1790 +#endif
1791 movzbl 3(%esi,%edx,1),%edx
1792 +#ifdef __PIC__
1793 + popl %esi
1794 +#endif
1795 andl $0xf,%edx
1796 movl %edx,dv_macroblock_t_qno(%edi)
1797
1798 @@ -411,7 +491,11 @@ macloop:
1799 movl %edx,dv_macroblock_t_eob_count(%edi)
1800
1801 /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
1802 +#ifdef __PIC__
1803 + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
1804 +#else
1805 movl dv_super_map_vertical(,%eax,4),%edx
1806 +#endif
1807 movl dv_videosegment_t_i(%ebx),%ecx
1808 addl %ecx,%edx
1809
1810 @@ -422,11 +506,20 @@ skarly:
1811 andl $1,%ecx
1812 shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */
1813
1814 +#ifdef __PIC__
1815 + leal mod_10@GOTOFF(%esi,%edx),%edx
1816 + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1817 +#else
1818 movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */
1819 +#endif
1820 movl %edx,dv_macroblock_t_i(%edi)
1821
1822 /* mb->j = dv_super_map_horizontal[m]; */
1823 +#ifdef __PIC__
1824 + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
1825 +#else
1826 movl dv_super_map_horizontal(,%eax,4),%edx
1827 +#endif
1828 movl %edx,dv_macroblock_t_j(%edi)
1829
1830 /* mb->k = seg->k; */
1831 @@ -445,12 +538,29 @@ blkloop:
1832 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
1833 */
1834 /* dc = bitstream_get(bs,9); */
1835 +#ifdef __PIC__
1836 + movl mb_start@GOTOFF(%esi),%ecx
1837 +#else
1838 movl mb_start,%ecx
1839 +#endif
1840 shr $3,%ecx
1841 +#ifdef __PIC__
1842 + movzbl blk_start@GOTOFF(%esi,%ebx),%edx
1843 +#else
1844 movzbl blk_start(%ebx),%edx
1845 +#endif
1846 addl %ecx,%edx
1847 +#ifdef __PIC__
1848 + pushl %esi
1849 + movl ARGn(1),%esi
1850 + movl dv_videosegment_t_bs(%esi),%esi
1851 + movl bitstream_t_buf(%esi),%esi
1852 +#endif
1853 movzbl (%esi,%edx,1),%eax /* hi byte */
1854 movzbl 1(%esi,%edx,1),%ecx /* lo byte */
1855 +#ifdef __PIC__
1856 + popl %esi
1857 +#endif
1858 shll $8,%eax
1859 orl %ecx,%eax
1860
1861 @@ -477,7 +587,11 @@ blkloop:
1862
1863 /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
1864 shll $6,%eax
1865 +#ifdef __PIC__
1866 + addl dv_reorder@GOTOFF+1(%esi),%eax
1867 +#else
1868 addl $(dv_reorder+1),%eax
1869 +#endif
1870 movl %eax,dv_block_t_reorder(%ebp)
1871
1872 /* bl->reorder_sentinel = bl->reorder + 63; */
1873 @@ -485,13 +599,22 @@ blkloop:
1874 movl %eax,dv_block_t_reorder_sentinel(%ebp)
1875
1876 /* bl->offset= mb_start + dv_parse_bit_start[b]; */
1877 +#ifdef __PIC__
1878 + movl mb_start@GOTOFF(%esi),%ecx
1879 + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
1880 +#else
1881 movl mb_start,%ecx
1882 movl dv_parse_bit_start(,%ebx,4),%eax
1883 +#endif
1884 addl %ecx,%eax
1885 movl %eax,dv_block_t_offset(%ebp)
1886
1887 /* bl->end= mb_start + dv_parse_bit_end[b]; */
1888 +#ifdef __PIC__
1889 + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
1890 +#else
1891 movl dv_parse_bit_end(,%ebx,4),%eax
1892 +#endif
1893 addl %ecx,%eax
1894 movl %eax,dv_block_t_end(%ebp)
1895
1896 @@ -503,7 +626,11 @@ blkloop:
1897 /* no AC pass. Just zero out the remaining coeffs */
1898 movq dv_block_t_coeffs(%ebp),%mm1
1899 pxor %mm0,%mm0
1900 +#ifdef __PIC__
1901 + pand const_f_0_0_0@GOTOFF(%esi),%mm1
1902 +#else
1903 pand const_f_0_0_0,%mm1
1904 +#endif
1905 movq %mm1,dv_block_t_coeffs(%ebp)
1906 movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
1907 movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
1908 @@ -528,18 +655,27 @@ do_ac_pass:
1909 pushl %ebp
1910 pushl %edi
1911 pushl %eax
1912 - call dv_parse_ac_coeffs_pass0
1913 + call asm_dv_parse_ac_coeffs_pass0
1914 addl $12,%esp
1915 done_ac:
1916
1917 +#ifdef __PIC__
1918 + movl n_blocks@GOTOFF(%esi),%eax
1919 +#else
1920 movl n_blocks,%eax
1921 +#endif
1922 addl $dv_block_t_size,%ebp
1923 incl %ebx
1924 cmpl %eax,%ebx
1925 jnz blkloop
1926
1927 +#ifdef __PIC__
1928 + movl m@GOTOFF(%esi),%eax
1929 + movl mb_start@GOTOFF(%esi),%ecx
1930 +#else
1931 movl m,%eax
1932 movl mb_start,%ecx
1933 +#endif
1934 addl $(8 * 80),%ecx
1935 addl $dv_macroblock_t_size,%edi
1936 incl %eax
1937 @@ -557,7 +693,7 @@ done_ac:
1938
1939 andl $DV_QUALITY_AC_MASK,%eax
1940 cmpl $DV_QUALITY_AC_2,%eax
1941 - jz dv_parse_ac_coeffs
1942 + jz asm_dv_parse_ac_coeffs
1943 movl $0,%eax
1944 ret
1945
1946 diff -Nurp libdv-0.104-old/libdv/vlc_x86_64.S libdv-0.104/libdv/vlc_x86_64.S
1947 --- libdv-0.104-old/libdv/vlc_x86_64.S 2006-01-01 22:44:23.000000000 +0100
1948 +++ libdv-0.104/libdv/vlc_x86_64.S 2006-01-01 22:44:43.000000000 +0100
1949 @@ -169,7 +169,8 @@ void dv_parse_ac_coeffs_pass0(bitstream_
1950 .text
1951 .align 4
1952 .globl dv_parse_ac_coeffs_pass0
1953 -
1954 +.type dv_parse_ac_coeffs_pass0,@function
1955 +
1956 dv_parse_ac_coeffs_pass0:
1957
1958 /* Args are at rdi=bs, rsi=mb, rdx=bl */
1959 @@ -422,6 +423,7 @@ show16: /* not u
1960 gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
1961 */
1962 .globl dv_parse_video_segment
1963 + .type dv_parse_video_segment,@function
1964 dv_parse_video_segment:
1965
1966 /* Args are at rdi=seg, rsi=quality */
1967 diff -Nurp libdv-0.104-old/libdv-0.104/libdv/asm_common.S libdv-0.104/libdv-0.104/libdv/asm_common.S
1968 --- libdv-0.104-old/libdv-0.104/libdv/asm_common.S 1970-01-01 01:00:00.000000000 +0100
1969 +++ libdv-0.104/libdv-0.104/libdv/asm_common.S 2006-01-01 22:44:43.000000000 +0100
1970 @@ -0,0 +1,37 @@
1971 +/* public domain, do what you want */
1972 +
1973 +#ifdef __PIC__
1974 +# define MUNG(sym) sym##@GOTOFF(%ebp)
1975 +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args)
1976 +#else
1977 +# define MUNG(sym) sym
1978 +# define MUNG_ARR(sym, args...) sym(,##args)
1979 +#endif
1980 +
1981 +#ifdef __PIC__
1982 +# undef __i686 /* gcc define gets in our way */
1983 + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
1984 +.globl __i686.get_pc_thunk.bp
1985 + .hidden __i686.get_pc_thunk.bp
1986 + .type __i686.get_pc_thunk.bp,@function
1987 +__i686.get_pc_thunk.bp:
1988 + movl (%esp), %ebp
1989 + ret
1990 +# define LOAD_PIC_REG_BP() \
1991 + call __i686.get_pc_thunk.bp ; \
1992 + addl $_GLOBAL_OFFSET_TABLE_, %ebp
1993 +
1994 + .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits
1995 +.globl __i686.get_pc_thunk.si
1996 + .hidden __i686.get_pc_thunk.si
1997 + .type __i686.get_pc_thunk.si,@function
1998 +__i686.get_pc_thunk.si:
1999 + movl (%esp), %esi
2000 + ret
2001 +# define LOAD_PIC_REG_SI() \
2002 + call __i686.get_pc_thunk.si ; \
2003 + addl $_GLOBAL_OFFSET_TABLE_, %esi
2004 +#else
2005 +# define LOAD_PIC_REG_BP()
2006 +# define LOAD_PIC_REG_SI()
2007 +#endif