]>
Commit | Line | Data |
---|---|---|
1 | diff -urp libdv-0.104-old/libdv/dct_block_mmx.S libdv-0.104/libdv/dct_block_mmx.S | |
2 | --- libdv-0.104-old/libdv/dct_block_mmx.S 2005-10-23 19:40:58.000000000 +0200 | |
3 | +++ libdv-0.104/libdv/dct_block_mmx.S 2005-10-24 00:11:39.000000000 +0200 | |
4 | @@ -53,6 +53,17 @@ scratch2: .quad 0 | |
5 | scratch3: .quad 0 | |
6 | scratch4: .quad 0 | |
7 | ||
8 | +#ifdef __PIC__ | |
9 | +# undef __i686 /* gcc define gets in our way */ | |
10 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits | |
11 | +.globl __i686.get_pc_thunk.bp | |
12 | + .hidden __i686.get_pc_thunk.bp | |
13 | + .type __i686.get_pc_thunk.bp,@function | |
14 | +__i686.get_pc_thunk.bp: | |
15 | + movl (%esp), %ebp | |
16 | + ret | |
17 | +#endif | |
18 | + | |
19 | .text | |
20 | ||
21 | .align 8 | |
22 | @@ -60,10 +71,14 @@ scratch4: .quad 0 | |
23 | _dv_dct_88_block_mmx: | |
24 | ||
25 | pushl %ebp | |
26 | - movl %esp, %ebp | |
27 | pushl %esi | |
28 | ||
29 | - movl 8(%ebp), %esi # source | |
30 | +#ifdef __PIC__ | |
31 | + call __i686.get_pc_thunk.bp | |
32 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
33 | +#endif | |
34 | + | |
35 | + movl 12(%ebp), %esi # source | |
36 | ||
37 | # column 0 | |
38 | movq 16*0(%esi), %mm0 # v0 | |
39 | @@ -86,22 +101,45 @@ _dv_dct_88_block_mmx: | |
40 | ||
41 | movq 16*3(%esi), %mm5 # v3 | |
42 | movq 16*4(%esi), %mm7 # v4 | |
43 | +#ifdef __PIC__ | |
44 | + movq %mm7, scratch1@GOTOFF(%ebp) # scratch1: v4 ; | |
45 | +#else | |
46 | movq %mm7, scratch1 # scratch1: v4 ; | |
47 | +#endif | |
48 | movq %mm5, %mm7 # duplicate v3 | |
49 | +#ifdef __PIC__ | |
50 | + paddw scratch1@GOTOFF(%ebp), %mm5 # v03: v3+v4 | |
51 | + psubw scratch1@GOTOFF(%ebp), %mm7 # v04: v3-v4 | |
52 | + movq %mm5, scratch2@GOTOFF(%ebp) # scratch2: v03 | |
53 | +#else | |
54 | paddw scratch1, %mm5 # v03: v3+v4 | |
55 | psubw scratch1, %mm7 # v04: v3-v4 | |
56 | movq %mm5, scratch2 # scratch2: v03 | |
57 | +#endif | |
58 | movq %mm0, %mm5 # mm5: v00 | |
59 | ||
60 | +#ifdef __PIC__ | |
61 | + paddw scratch2@GOTOFF(%ebp), %mm0 # v10: v00+v03 | |
62 | + psubw scratch2@GOTOFF(%ebp), %mm5 # v13: v00-v03 | |
63 | + movq %mm3, scratch3@GOTOFF(%ebp) # scratch3: v02 | |
64 | +#else | |
65 | paddw scratch2, %mm0 # v10: v00+v03 | |
66 | psubw scratch2, %mm5 # v13: v00-v03 | |
67 | movq %mm3, scratch3 # scratch3: v02 | |
68 | +#endif | |
69 | movq %mm1, %mm3 # duplicate v01 | |
70 | ||
71 | +#ifdef __PIC__ | |
72 | + paddw scratch3@GOTOFF(%ebp), %mm1 # v11: v01+v02 | |
73 | + psubw scratch3@GOTOFF(%ebp), %mm3 # v12: v01-v02 | |
74 | + | |
75 | + movq %mm6, scratch4@GOTOFF(%ebp) # scratch4: v05 | |
76 | +#else | |
77 | paddw scratch3, %mm1 # v11: v01+v02 | |
78 | psubw scratch3, %mm3 # v12: v01-v02 | |
79 | ||
80 | movq %mm6, scratch4 # scratch4: v05 | |
81 | +#endif | |
82 | movq %mm0, %mm6 # duplicate v10 | |
83 | ||
84 | paddw %mm1, %mm0 # v10+v11 | |
85 | @@ -111,10 +149,18 @@ _dv_dct_88_block_mmx: | |
86 | movq %mm6, 16*4(%esi) # out4: v10-v11 | |
87 | ||
88 | movq %mm4, %mm0 # mm0: v06 | |
89 | +#ifdef __PIC__ | |
90 | + paddw scratch4@GOTOFF(%ebp), %mm4 # v15: v05+v06 | |
91 | +#else | |
92 | paddw scratch4, %mm4 # v15: v05+v06 | |
93 | +#endif | |
94 | paddw %mm2, %mm0 # v16: v07+v06 | |
95 | ||
96 | +#ifdef __PIC__ | |
97 | + pmulhw WA3@GOTOFF(%ebp), %mm4 # v35~: WA3*v15 | |
98 | +#else | |
99 | pmulhw WA3, %mm4 # v35~: WA3*v15 | |
100 | +#endif | |
101 | psllw $1, %mm4 # v35: compensate the coeefient scale | |
102 | ||
103 | movq %mm4, %mm6 # duplicate v35 | |
104 | @@ -123,7 +169,11 @@ _dv_dct_88_block_mmx: | |
105 | ||
106 | paddw %mm5, %mm3 # v22: v12+v13 | |
107 | ||
108 | +#ifdef __PIC__ | |
109 | + pmulhw WA1@GOTOFF(%ebp), %mm3 # v32~: WA1*v22 | |
110 | +#else | |
111 | pmulhw WA1, %mm3 # v32~: WA1*v22 | |
112 | +#endif | |
113 | psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale | |
114 | movq %mm5, %mm6 # duplicate v13 | |
115 | ||
116 | @@ -134,13 +184,23 @@ _dv_dct_88_block_mmx: | |
117 | movq %mm6, 16*6(%esi) # out6: v13-v32 | |
118 | ||
119 | ||
120 | +#ifdef __PIC__ | |
121 | + paddw scratch4@GOTOFF(%ebp), %mm7 # v14n: v04+v05 | |
122 | +#else | |
123 | paddw scratch4, %mm7 # v14n: v04+v05 | |
124 | +#endif | |
125 | movq %mm0, %mm5 # duplicate v16 | |
126 | ||
127 | psubw %mm7, %mm0 # va1: v16-v14n | |
128 | +#ifdef __PIC__ | |
129 | + pmulhw WA5@GOTOFF(%ebp), %mm0 # va0~: va1*WA5 | |
130 | + pmulhw WA4@GOTOFF(%ebp), %mm5 # v36~~: v16*WA4 | |
131 | + pmulhw WA2@GOTOFF(%ebp), %mm7 # v34~~: v14n*WA2 | |
132 | +#else | |
133 | pmulhw WA5, %mm0 # va0~: va1*WA5 | |
134 | pmulhw WA4, %mm5 # v36~~: v16*WA4 | |
135 | pmulhw WA2, %mm7 # v34~~: v14n*WA2 | |
136 | +#endif | |
137 | psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale | |
138 | psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale | |
139 | ||
140 | @@ -188,22 +248,45 @@ _dv_dct_88_block_mmx: | |
141 | ||
142 | movq 16*3(%esi), %mm5 # v3 | |
143 | movq 16*4(%esi), %mm7 # v4 | |
144 | +#ifdef __PIC__ | |
145 | + movq %mm7, scratch1@GOTOFF(%ebp) # scratch1: v4 ; | |
146 | +#else | |
147 | movq %mm7, scratch1 # scratch1: v4 ; | |
148 | +#endif | |
149 | movq %mm5, %mm7 # duplicate v3 | |
150 | +#ifdef __PIC__ | |
151 | + paddw scratch1@GOTOFF(%ebp), %mm5 # v03: v3+v4 | |
152 | + psubw scratch1@GOTOFF(%ebp), %mm7 # v04: v3-v4 | |
153 | + movq %mm5, scratch2@GOTOFF(%ebp) # scratch2: v03 | |
154 | +#else | |
155 | paddw scratch1, %mm5 # v03: v3+v4 | |
156 | psubw scratch1, %mm7 # v04: v3-v4 | |
157 | movq %mm5, scratch2 # scratch2: v03 | |
158 | +#endif | |
159 | movq %mm0, %mm5 # mm5: v00 | |
160 | ||
161 | +#ifdef __PIC__ | |
162 | + paddw scratch2@GOTOFF(%ebp), %mm0 # v10: v00+v03 | |
163 | + psubw scratch2@GOTOFF(%ebp), %mm5 # v13: v00-v03 | |
164 | + movq %mm3, scratch3@GOTOFF(%ebp) # scratc3: v02 | |
165 | +#else | |
166 | paddw scratch2, %mm0 # v10: v00+v03 | |
167 | psubw scratch2, %mm5 # v13: v00-v03 | |
168 | movq %mm3, scratch3 # scratc3: v02 | |
169 | +#endif | |
170 | movq %mm1, %mm3 # duplicate v01 | |
171 | ||
172 | +#ifdef __PIC__ | |
173 | + paddw scratch3@GOTOFF(%ebp), %mm1 # v11: v01+v02 | |
174 | + psubw scratch3@GOTOFF(%ebp), %mm3 # v12: v01-v02 | |
175 | + | |
176 | + movq %mm6, scratch4@GOTOFF(%ebp) # scratc4: v05 | |
177 | +#else | |
178 | paddw scratch3, %mm1 # v11: v01+v02 | |
179 | psubw scratch3, %mm3 # v12: v01-v02 | |
180 | ||
181 | movq %mm6, scratch4 # scratc4: v05 | |
182 | +#endif | |
183 | movq %mm0, %mm6 # duplicate v10 | |
184 | ||
185 | paddw %mm1, %mm0 # v10+v11 | |
186 | @@ -213,10 +296,18 @@ _dv_dct_88_block_mmx: | |
187 | movq %mm6, 16*4(%esi) # out4: v10-v11 | |
188 | ||
189 | movq %mm4, %mm0 # mm0: v06 | |
190 | +#ifdef __PIC__ | |
191 | + paddw scratch4@GOTOFF(%ebp), %mm4 # v15: v05+v06 | |
192 | +#else | |
193 | paddw scratch4, %mm4 # v15: v05+v06 | |
194 | +#endif | |
195 | paddw %mm2, %mm0 # v16: v07+v06 | |
196 | ||
197 | +#ifdef __PIC__ | |
198 | + pmulhw WA3@GOTOFF(%ebp), %mm4 # v35~: WA3*v15 | |
199 | +#else | |
200 | pmulhw WA3, %mm4 # v35~: WA3*v15 | |
201 | +#endif | |
202 | psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale | |
203 | ||
204 | movq %mm4, %mm6 # duplicate v35 | |
205 | @@ -225,7 +316,11 @@ _dv_dct_88_block_mmx: | |
206 | ||
207 | paddw %mm5, %mm3 # v22: v12+v13 | |
208 | ||
209 | +#ifdef __PIC__ | |
210 | + pmulhw WA1@GOTOFF(%ebp), %mm3 # v32~: WA3*v15 | |
211 | +#else | |
212 | pmulhw WA1, %mm3 # v32~: WA3*v15 | |
213 | +#endif | |
214 | psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale | |
215 | movq %mm5, %mm6 # duplicate v13 | |
216 | ||
217 | @@ -235,13 +330,23 @@ _dv_dct_88_block_mmx: | |
218 | movq %mm5, 16*2(%esi) # out2: v13+v32 | |
219 | movq %mm6, 16*6(%esi) # out6: v13-v32 | |
220 | ||
221 | +#ifdef __PIC__ | |
222 | + paddw scratch4@GOTOFF(%ebp), %mm7 # v14n: v04+v05 | |
223 | +#else | |
224 | paddw scratch4, %mm7 # v14n: v04+v05 | |
225 | +#endif | |
226 | movq %mm0, %mm5 # duplicate v16 | |
227 | ||
228 | psubw %mm7, %mm0 # va1: v16-v14n | |
229 | +#ifdef __PIC__ | |
230 | + pmulhw WA2@GOTOFF(%ebp), %mm7 # v34~~: v14n*WA2 | |
231 | + pmulhw WA5@GOTOFF(%ebp), %mm0 # va0~: va1*WA5 | |
232 | + pmulhw WA4@GOTOFF(%ebp), %mm5 # v36~~: v16*WA4 | |
233 | +#else | |
234 | pmulhw WA2, %mm7 # v34~~: v14n*WA2 | |
235 | pmulhw WA5, %mm0 # va0~: va1*WA5 | |
236 | pmulhw WA4, %mm5 # v36~~: v16*WA4 | |
237 | +#endif | |
238 | psllw $16-NSHIFT, %mm7 | |
239 | psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient | |
240 | # scale note that WA4 is shifted 1 bit less than the others | |
241 | @@ -751,11 +856,15 @@ _dv_dct_block_mmx_postscale_88: | |
242 | _dv_dct_248_block_mmx: | |
243 | ||
244 | pushl %ebp | |
245 | - movl %esp, %ebp | |
246 | pushl %esi | |
247 | pushl %edi | |
248 | ||
249 | - movl 8(%ebp), %esi # source | |
250 | +#ifdef __PIC__ | |
251 | + call __i686.get_pc_thunk.bp | |
252 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
253 | +#endif | |
254 | + | |
255 | + movl 16(%ebp), %esi # source | |
256 | ||
257 | # column 0 | |
258 | ||
259 | @@ -779,7 +888,11 @@ _dv_dct_248_block_mmx: | |
260 | paddw %mm1, %mm0 # v20: v10+v11 | |
261 | psubw %mm1, %mm3 # v21: v10-v11 | |
262 | ||
263 | +#ifdef __PIC__ | |
264 | + pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22 | |
265 | +#else | |
266 | pmulhw WA1, %mm5 # v32~: WA1*v22 | |
267 | +#endif | |
268 | movq %mm4, %mm2 | |
269 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
270 | ||
271 | @@ -818,7 +931,11 @@ _dv_dct_248_block_mmx: | |
272 | paddw %mm1, %mm0 # v20: v10+v11 | |
273 | psubw %mm1, %mm3 # v21: v10-v11 | |
274 | ||
275 | +#ifdef __PIC__ | |
276 | + pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22 | |
277 | +#else | |
278 | pmulhw WA1, %mm5 # v32~: WA1*v22 | |
279 | +#endif | |
280 | movq %mm4, %mm2 | |
281 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
282 | ||
283 | @@ -855,7 +972,11 @@ _dv_dct_248_block_mmx: | |
284 | paddw %mm1, %mm0 # v20: v10+v11 | |
285 | psubw %mm1, %mm3 # v21: v10-v11 | |
286 | ||
287 | +#ifdef __PIC__ | |
288 | + pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22 | |
289 | +#else | |
290 | pmulhw WA1, %mm5 # v32~: WA1*v22 | |
291 | +#endif | |
292 | movq %mm4, %mm2 | |
293 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
294 | ||
295 | @@ -892,7 +1013,11 @@ _dv_dct_248_block_mmx: | |
296 | paddw %mm1, %mm0 # v20: v10+v11 | |
297 | psubw %mm1, %mm3 # v21: v10-v11 | |
298 | ||
299 | +#ifdef __PIC__ | |
300 | + pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22 | |
301 | +#else | |
302 | pmulhw WA1, %mm5 # v32~: WA1*v22 | |
303 | +#endif | |
304 | movq %mm4, %mm2 | |
305 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
306 | ||
307 | diff -urp libdv-0.104-old/libdv/dv.c libdv-0.104/libdv/dv.c | |
308 | --- libdv-0.104-old/libdv/dv.c 2004-10-20 05:49:24.000000000 +0200 | |
309 | +++ libdv-0.104/libdv/dv.c 2005-10-24 00:59:57.000000000 +0200 | |
310 | @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp | |
311 | } /* dv_reconfigure */ | |
312 | ||
313 | ||
314 | +extern uint8_t dv_quant_offset[4]; | |
315 | +extern uint8_t dv_quant_shifts[22][4]; | |
316 | + | |
317 | static inline void | |
318 | dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) { | |
319 | int i; | |
320 | @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d | |
321 | dv_idct_248 (co248, mb->b[i].coeffs); | |
322 | } else { | |
323 | #if ARCH_X86 | |
324 | - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); | |
325 | + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts); | |
326 | _dv_idct_88(mb->b[i].coeffs); | |
327 | #elif ARCH_X86_64 | |
328 | _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); | |
329 | @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv | |
330 | dv_idct_248 (co248, mb->b[b].coeffs); | |
331 | } else { | |
332 | #if ARCH_X86 | |
333 | - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no); | |
334 | + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts); | |
335 | _dv_weight_88_inverse(bl->coeffs); | |
336 | _dv_idct_88(bl->coeffs); | |
337 | #elif ARCH_X86_64 | |
338 | diff -urp libdv-0.104-old/libdv/encode.c libdv-0.104/libdv/encode.c | |
339 | --- libdv-0.104-old/libdv/encode.c 2004-11-17 04:36:30.000000000 +0100 | |
340 | +++ libdv-0.104/libdv/encode.c 2005-10-24 01:17:41.000000000 +0200 | |
341 | @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl | |
342 | } | |
343 | ||
344 | extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, | |
345 | - dv_vlc_entry_t ** out); | |
346 | + dv_vlc_entry_t ** out, | |
347 | + dv_vlc_entry_t * lookup); | |
348 | ||
349 | extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs, | |
350 | dv_vlc_entry_t ** out); | |
351 | @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv | |
352 | #elif ARCH_X86 | |
353 | int num_bits; | |
354 | ||
355 | - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o); | |
356 | + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup); | |
357 | emms(); | |
358 | #else | |
359 | int num_bits; | |
360 | @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv | |
361 | return num_bits; | |
362 | } | |
363 | ||
364 | -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs); | |
365 | +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup); | |
366 | extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); | |
367 | ||
368 | extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs) | |
369 | @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl | |
370 | #elif ARCH_X86_64 | |
371 | return _dv_vlc_num_bits_block_x86_64(coeffs); | |
372 | #else | |
373 | - return _dv_vlc_num_bits_block_x86(coeffs); | |
374 | + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup); | |
375 | #endif | |
376 | } | |
377 | ||
378 | diff -urp libdv-0.104-old/libdv/encode_x86.S libdv-0.104/libdv/encode_x86.S | |
379 | --- libdv-0.104-old/libdv/encode_x86.S 2005-10-23 19:40:58.000000000 +0200 | |
380 | +++ libdv-0.104/libdv/encode_x86.S 2005-10-24 01:18:32.000000000 +0200 | |
381 | @@ -23,10 +23,6 @@ | |
382 | * The libdv homepage is http://libdv.sourceforge.net/. | |
383 | */ | |
384 | ||
385 | -.data | |
386 | -ALLONE: .word 1,1,1,1 | |
387 | -VLCADDMASK: .byte 255,0,0,0,255,0,0,0 | |
388 | - | |
389 | .text | |
390 | ||
391 | .global _dv_vlc_encode_block_mmx | |
392 | @@ -45,11 +41,14 @@ _dv_vlc_encode_block_mmx: | |
393 | ||
394 | movl $63, %ecx | |
395 | ||
396 | - movl vlc_encode_lookup, %esi | |
397 | + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup | |
398 | ||
399 | pxor %mm0, %mm0 | |
400 | pxor %mm2, %mm2 | |
401 | - movq VLCADDMASK, %mm1 | |
402 | + pushl $0x000000FF | |
403 | + pushl $0x000000FF | |
404 | + movq (%esp), %mm1 | |
405 | + addl $8, %esp | |
406 | xorl %ebp, %ebp | |
407 | subl $8, %edx | |
408 | vlc_encode_block_mmx_loop: | |
409 | @@ -121,7 +120,7 @@ _dv_vlc_num_bits_block_x86: | |
410 | addl $2, %edi | |
411 | ||
412 | movl $63, %ecx | |
413 | - movl vlc_num_bits_lookup, %esi | |
414 | + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup | |
415 | ||
416 | vlc_num_bits_block_x86_loop: | |
417 | movw (%edi), %ax | |
418 | @@ -579,8 +578,11 @@ _dv_need_dct_248_mmx_rows: | |
419 | paddw %mm5, %mm1 | |
420 | ||
421 | paddw %mm1, %mm0 | |
422 | - | |
423 | - pmaddwd ALLONE, %mm0 | |
424 | + | |
425 | + pushl $0x00010001 | |
426 | + pushl $0x00010001 | |
427 | + pmaddwd (%esp), %mm0 | |
428 | + addl $8, %esp | |
429 | movq %mm0, %mm1 | |
430 | psrlq $32, %mm1 | |
431 | paddd %mm1, %mm0 | |
432 | diff -urp libdv-0.104-old/libdv/idct_block_mmx.S libdv-0.104/libdv/idct_block_mmx.S | |
433 | --- libdv-0.104-old/libdv/idct_block_mmx.S 2005-10-23 19:40:58.000000000 +0200 | |
434 | +++ libdv-0.104/libdv/idct_block_mmx.S 2005-10-24 01:12:12.000000000 +0200 | |
435 | @@ -8,16 +8,37 @@ | |
436 | ||
437 | ||
438 | ||
439 | +#ifdef __PIC__ | |
440 | +# undef __i686 /* gcc define gets in our way */ | |
441 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits | |
442 | +.globl __i686.get_pc_thunk.bp | |
443 | + .hidden __i686.get_pc_thunk.bp | |
444 | + .type __i686.get_pc_thunk.bp,@function | |
445 | +__i686.get_pc_thunk.bp: | |
446 | + movl (%esp), %ebp | |
447 | + ret | |
448 | +#endif | |
449 | + | |
450 | .text | |
451 | + | |
452 | .align 4 | |
453 | .globl _dv_idct_block_mmx | |
454 | .type _dv_idct_block_mmx,@function | |
455 | _dv_idct_block_mmx: | |
456 | pushl %ebp | |
457 | - movl %esp,%ebp | |
458 | pushl %esi | |
459 | + | |
460 | +#ifdef __PIC__ | |
461 | + call __i686.get_pc_thunk.bp | |
462 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
463 | +#endif | |
464 | + | |
465 | +#ifdef __PIC__ | |
466 | + leal preSC@GOTOFF(%ebp), %ecx | |
467 | +#else | |
468 | leal preSC, %ecx | |
469 | - movl 8(%ebp),%esi /* source matrix */ | |
470 | +#endif | |
471 | + movl 12(%esp),%esi /* source matrix */ | |
472 | ||
473 | /* | |
474 | * column 0: even part | |
475 | @@ -35,7 +56,11 @@ _dv_idct_block_mmx: | |
476 | movq %mm1, %mm2 /* added 11/1/96 */ | |
477 | pmulhw 8*8(%esi),%mm5 /* V8 */ | |
478 | psubsw %mm0, %mm1 /* V16 */ | |
479 | +#ifdef __PIC__ | |
480 | + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V18 */ | |
481 | +#else | |
482 | pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ | |
483 | +#endif | |
484 | paddsw %mm0, %mm2 /* V17 */ | |
485 | movq %mm2, %mm0 /* duplicate V17 */ | |
486 | psraw $1, %mm2 /* t75=t82 */ | |
487 | @@ -76,7 +101,11 @@ _dv_idct_block_mmx: | |
488 | paddsw %mm0, %mm3 /* V29 ; free mm0 */ | |
489 | movq %mm7, %mm1 /* duplicate V26 */ | |
490 | psraw $1, %mm3 /* t91=t94 */ | |
491 | +#ifdef __PIC__ | |
492 | + pmulhw x539f539f539f539f@GOTOFF(%ebp),%mm7 /* V33 */ | |
493 | +#else | |
494 | pmulhw x539f539f539f539f,%mm7 /* V33 */ | |
495 | +#endif | |
496 | psraw $1, %mm1 /* t96 */ | |
497 | movq %mm5, %mm0 /* duplicate V2 */ | |
498 | psraw $2, %mm4 /* t85=t87 */ | |
499 | @@ -84,15 +113,27 @@ _dv_idct_block_mmx: | |
500 | psubsw %mm4, %mm0 /* V28 ; free mm4 */ | |
501 | movq %mm0, %mm2 /* duplicate V28 */ | |
502 | psraw $1, %mm5 /* t90=t93 */ | |
503 | +#ifdef __PIC__ | |
504 | + pmulhw x4546454645464546@GOTOFF(%ebp),%mm0 /* V35 */ | |
505 | +#else | |
506 | pmulhw x4546454645464546,%mm0 /* V35 */ | |
507 | +#endif | |
508 | psraw $1, %mm2 /* t97 */ | |
509 | movq %mm5, %mm4 /* duplicate t90=t93 */ | |
510 | psubsw %mm2, %mm1 /* V32 ; free mm2 */ | |
511 | +#ifdef __PIC__ | |
512 | + pmulhw x61f861f861f861f8@GOTOFF(%ebp),%mm1 /* V36 */ | |
513 | +#else | |
514 | pmulhw x61f861f861f861f8,%mm1 /* V36 */ | |
515 | +#endif | |
516 | psllw $1, %mm7 /* t107 */ | |
517 | paddsw %mm3, %mm5 /* V31 */ | |
518 | psubsw %mm3, %mm4 /* V30 ; free mm3 */ | |
519 | +#ifdef __PIC__ | |
520 | + pmulhw x5a825a825a825a82@GOTOFF(%ebp),%mm4 /* V34 */ | |
521 | +#else | |
522 | pmulhw x5a825a825a825a82,%mm4 /* V34 */ | |
523 | +#endif | |
524 | nop | |
525 | psubsw %mm1, %mm0 /* V38 */ | |
526 | psubsw %mm7, %mm1 /* V37 ; free mm7 */ | |
527 | @@ -159,7 +200,11 @@ _dv_idct_block_mmx: | |
528 | psubsw %mm7, %mm1 /* V50 */ | |
529 | pmulhw 8*9(%esi), %mm5 /* V9 */ | |
530 | paddsw %mm7, %mm2 /* V51 */ | |
531 | +#ifdef __PIC__ | |
532 | + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V52 */ | |
533 | +#else | |
534 | pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ | |
535 | +#endif | |
536 | movq %mm2, %mm6 /* duplicate V51 */ | |
537 | psraw $1, %mm2 /* t138=t144 */ | |
538 | movq %mm3, %mm4 /* duplicate V1 */ | |
539 | @@ -200,11 +245,19 @@ _dv_idct_block_mmx: | |
540 | * even more by doing the correction step in a later stage when the number | |
541 | * is actually multiplied by 16 | |
542 | */ | |
543 | +#ifdef __PIC__ | |
544 | + paddw x0005000200010001@GOTOFF(%ebp), %mm4 | |
545 | +#else | |
546 | paddw x0005000200010001, %mm4 | |
547 | +#endif | |
548 | psubsw %mm6, %mm3 /* V60 ; free mm6 */ | |
549 | psraw $1, %mm0 /* t154=t156 */ | |
550 | movq %mm3, %mm1 /* duplicate V60 */ | |
551 | +#ifdef __PIC__ | |
552 | + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm1 /* V67 */ | |
553 | +#else | |
554 | pmulhw x539f539f539f539f, %mm1 /* V67 */ | |
555 | +#endif | |
556 | movq %mm5, %mm6 /* duplicate V3 */ | |
557 | psraw $2, %mm4 /* t148=t150 */ | |
558 | paddsw %mm4, %mm5 /* V61 */ | |
559 | @@ -213,13 +266,25 @@ _dv_idct_block_mmx: | |
560 | psllw $1, %mm1 /* t169 */ | |
561 | paddsw %mm0, %mm5 /* V65 -> result */ | |
562 | psubsw %mm0, %mm4 /* V64 ; free mm0 */ | |
563 | +#ifdef __PIC__ | |
564 | + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm4 /* V68 */ | |
565 | +#else | |
566 | pmulhw x5a825a825a825a82, %mm4 /* V68 */ | |
567 | +#endif | |
568 | psraw $1, %mm3 /* t158 */ | |
569 | psubsw %mm6, %mm3 /* V66 */ | |
570 | movq %mm5, %mm2 /* duplicate V65 */ | |
571 | +#ifdef __PIC__ | |
572 | + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* V70 */ | |
573 | +#else | |
574 | pmulhw x61f861f861f861f8, %mm3 /* V70 */ | |
575 | +#endif | |
576 | psllw $1, %mm6 /* t165 */ | |
577 | +#ifdef __PIC__ | |
578 | + pmulhw x4546454645464546@GOTOFF(%ebp), %mm6 /* V69 */ | |
579 | +#else | |
580 | pmulhw x4546454645464546, %mm6 /* V69 */ | |
581 | +#endif | |
582 | psraw $1, %mm2 /* t172 */ | |
583 | /* moved from next block */ | |
584 | movq 8*5(%esi), %mm0 /* V56 */ | |
585 | @@ -344,7 +409,11 @@ _dv_idct_block_mmx: | |
586 | * movq 8*13(%esi), %mm4 tmt13 | |
587 | */ | |
588 | psubsw %mm4, %mm3 /* V134 */ | |
589 | +#ifdef __PIC__ | |
590 | + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm3 /* 23170 ->V136 */ | |
591 | +#else | |
592 | pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ | |
593 | +#endif | |
594 | movq 8*9(%esi), %mm6 /* tmt9 */ | |
595 | paddsw %mm4, %mm5 /* V135 ; mm4 free */ | |
596 | movq %mm0, %mm4 /* duplicate tmt1 */ | |
597 | @@ -373,17 +442,33 @@ _dv_idct_block_mmx: | |
598 | psubsw %mm7, %mm0 /* V144 */ | |
599 | movq %mm0, %mm3 /* duplicate V144 */ | |
600 | paddsw %mm7, %mm2 /* V147 ; free mm7 */ | |
601 | +#ifdef __PIC__ | |
602 | + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V151 */ | |
603 | +#else | |
604 | pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ | |
605 | +#endif | |
606 | movq %mm1, %mm7 /* duplicate tmt3 */ | |
607 | paddsw %mm5, %mm7 /* V145 */ | |
608 | psubsw %mm5, %mm1 /* V146 ; free mm5 */ | |
609 | psubsw %mm1, %mm3 /* V150 */ | |
610 | movq %mm7, %mm5 /* duplicate V145 */ | |
611 | +#ifdef __PIC__ | |
612 | + pmulhw x4546454645464546@GOTOFF(%ebp), %mm1 /* 17734-> V153 */ | |
613 | +#else | |
614 | pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ | |
615 | +#endif | |
616 | psubsw %mm2, %mm5 /* V148 */ | |
617 | +#ifdef __PIC__ | |
618 | + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* 25080-> V154 */ | |
619 | +#else | |
620 | pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ | |
621 | +#endif | |
622 | psllw $2, %mm0 /* t311 */ | |
623 | +#ifdef __PIC__ | |
624 | + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm5 /* 23170-> V152 */ | |
625 | +#else | |
626 | pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ | |
627 | +#endif | |
628 | paddsw %mm2, %mm7 /* V149 ; free mm2 */ | |
629 | psllw $1, %mm1 /* t313 */ | |
630 | nop /* without the nop - freeze here for one clock */ | |
631 | @@ -409,7 +494,11 @@ _dv_idct_block_mmx: | |
632 | paddsw %mm3, %mm6 /* V164 ; free mm3 */ | |
633 | movq %mm4, %mm3 /* duplicate V142 */ | |
634 | psubsw %mm5, %mm4 /* V165 ; free mm5 */ | |
635 | +#ifdef __PIC__ | |
636 | + movq %mm2, scratch7@GOTOFF(%ebp) /* out7 */ | |
637 | +#else | |
638 | movq %mm2, scratch7 /* out7 */ | |
639 | +#endif | |
640 | psraw $4, %mm6 | |
641 | psraw $4, %mm4 | |
642 | paddsw %mm5, %mm3 /* V162 */ | |
643 | @@ -420,11 +509,19 @@ _dv_idct_block_mmx: | |
644 | */ | |
645 | movq %mm6, 8*9(%esi) /* out9 */ | |
646 | paddsw %mm1, %mm0 /* V161 */ | |
647 | +#ifdef __PIC__ | |
648 | + movq %mm3, scratch5@GOTOFF(%ebp) /* out5 */ | |
649 | +#else | |
650 | movq %mm3, scratch5 /* out5 */ | |
651 | +#endif | |
652 | psubsw %mm1, %mm5 /* V166 ; free mm1 */ | |
653 | movq %mm4, 8*11(%esi) /* out11 */ | |
654 | psraw $4, %mm5 | |
655 | +#ifdef __PIC__ | |
656 | + movq %mm0, scratch3@GOTOFF(%ebp) /* out3 */ | |
657 | +#else | |
658 | movq %mm0, scratch3 /* out3 */ | |
659 | +#endif | |
660 | movq %mm2, %mm4 /* duplicate V140 */ | |
661 | movq %mm5, 8*13(%esi) /* out13 */ | |
662 | paddsw %mm7, %mm2 /* V160 */ | |
663 | @@ -434,7 +531,11 @@ _dv_idct_block_mmx: | |
664 | /* moved from the next block */ | |
665 | movq 8*3(%esi), %mm7 | |
666 | psraw $4, %mm4 | |
667 | +#ifdef __PIC__ | |
668 | + movq %mm2, scratch1@GOTOFF(%ebp) /* out1 */ | |
669 | +#else | |
670 | movq %mm2, scratch1 /* out1 */ | |
671 | +#endif | |
672 | /* moved from the next block */ | |
673 | movq %mm0, %mm1 | |
674 | movq %mm4, 8*15(%esi) /* out15 */ | |
675 | @@ -491,15 +592,31 @@ _dv_idct_block_mmx: | |
676 | paddsw %mm4, %mm3 /* V113 ; free mm4 */ | |
677 | movq %mm0, %mm4 /* duplicate V110 */ | |
678 | paddsw %mm1, %mm2 /* V111 */ | |
679 | +#ifdef __PIC__ | |
680 | + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V117 */ | |
681 | +#else | |
682 | pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ | |
683 | +#endif | |
684 | psubsw %mm1, %mm5 /* V112 ; free mm1 */ | |
685 | psubsw %mm5, %mm4 /* V116 */ | |
686 | movq %mm2, %mm1 /* duplicate V111 */ | |
687 | +#ifdef __PIC__ | |
688 | + pmulhw x4546454645464546@GOTOFF(%ebp), %mm5 /* 17734-> V119 */ | |
689 | +#else | |
690 | pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ | |
691 | +#endif | |
692 | psubsw %mm3, %mm2 /* V114 */ | |
693 | +#ifdef __PIC__ | |
694 | + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm4 /* 25080-> V120 */ | |
695 | +#else | |
696 | pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ | |
697 | +#endif | |
698 | paddsw %mm3, %mm1 /* V115 ; free mm3 */ | |
699 | +#ifdef __PIC__ | |
700 | + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm2 /* 23170-> V118 */ | |
701 | +#else | |
702 | pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ | |
703 | +#endif | |
704 | psllw $2, %mm0 /* t266 */ | |
705 | movq %mm1, (%esi) /* save V115 */ | |
706 | psllw $1, %mm5 /* t268 */ | |
707 | @@ -517,7 +634,11 @@ _dv_idct_block_mmx: | |
708 | movq %mm6, %mm3 /* duplicate tmt4 */ | |
709 | psubsw %mm0, %mm6 /* V100 */ | |
710 | paddsw %mm0, %mm3 /* V101 ; free mm0 */ | |
711 | +#ifdef __PIC__ | |
712 | + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm6 /* 23170 ->V102 */ | |
713 | +#else | |
714 | pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ | |
715 | +#endif | |
716 | movq %mm7, %mm5 /* duplicate tmt0 */ | |
717 | movq 8*8(%esi), %mm1 /* tmt8 */ | |
718 | paddsw %mm1, %mm7 /* V103 */ | |
719 | @@ -551,10 +672,18 @@ _dv_idct_block_mmx: | |
720 | movq 8*2(%esi), %mm3 /* V123 */ | |
721 | paddsw %mm4, %mm7 /* out0 */ | |
722 | /* moved up from next block */ | |
723 | +#ifdef __PIC__ | |
724 | + movq scratch3@GOTOFF(%ebp), %mm0 | |
725 | +#else | |
726 | movq scratch3, %mm0 | |
727 | +#endif | |
728 | psraw $4, %mm7 | |
729 | /* moved up from next block */ | |
730 | +#ifdef __PIC__ | |
731 | + movq scratch5@GOTOFF(%ebp), %mm6 | |
732 | +#else | |
733 | movq scratch5, %mm6 | |
734 | +#endif | |
735 | psubsw %mm4, %mm1 /* out14 ; free mm4 */ | |
736 | paddsw %mm3, %mm5 /* out2 */ | |
737 | psraw $4, %mm1 | |
738 | @@ -565,7 +694,11 @@ _dv_idct_block_mmx: | |
739 | movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ | |
740 | psraw $4, %mm2 | |
741 | /* moved up to the prev block */ | |
742 | +#ifdef __PIC__ | |
743 | + movq scratch7@GOTOFF(%ebp), %mm4 | |
744 | +#else | |
745 | movq scratch7, %mm4 | |
746 | +#endif | |
747 | /* moved up to the prev block */ | |
748 | psraw $4, %mm0 | |
749 | movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ | |
750 | @@ -579,7 +712,11 @@ _dv_idct_block_mmx: | |
751 | * psraw $4, %mm0 | |
752 | * psraw $4, %mm6 | |
753 | */ | |
754 | +#ifdef __PIC__ | |
755 | + movq scratch1@GOTOFF(%ebp), %mm1 | |
756 | +#else | |
757 | movq scratch1, %mm1 | |
758 | +#endif | |
759 | psraw $4, %mm4 | |
760 | movq %mm0, 8*3(%esi) /* out3 */ | |
761 | psraw $4, %mm1 | |
762 | diff -urp libdv-0.104-old/libdv/quant.c libdv-0.104/libdv/quant.c | |
763 | --- libdv-0.104-old/libdv/quant.c 2004-10-20 05:49:24.000000000 +0200 | |
764 | +++ libdv-0.104/libdv/quant.c 2005-10-24 01:06:24.000000000 +0200 | |
765 | @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1 | |
766 | uint32_t dv_quant_248_mul_tab [2] [22] [64]; | |
767 | uint32_t dv_quant_88_mul_tab [2] [22] [64]; | |
768 | ||
769 | -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass); | |
770 | +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t dv_quant_offset[],uint8_t dv_quant_shifts[][]); | |
771 | extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass); | |
772 | static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); | |
773 | static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); | |
774 | @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno | |
775 | _dv_quant_x86_64(block, qno, klass); | |
776 | emms(); | |
777 | #else | |
778 | - _dv_quant_x86(block, qno, klass); | |
779 | + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts); | |
780 | emms(); | |
781 | #endif | |
782 | } | |
783 | diff -urp libdv-0.104-old/libdv/quant.h libdv-0.104/libdv/quant.h | |
784 | --- libdv-0.104-old/libdv/quant.h 2004-10-20 05:49:24.000000000 +0200 | |
785 | +++ libdv-0.104/libdv/quant.h 2005-10-24 00:57:43.000000000 +0200 | |
786 | @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block, | |
787 | extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass); | |
788 | extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass, | |
789 | dv_248_coeff_t *co); | |
790 | -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass); | |
791 | +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t offset[], uint8_t shifts[][]); | |
792 | extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass); | |
793 | extern void dv_quant_init (void); | |
794 | #ifdef __cplusplus | |
795 | diff -urp libdv-0.104-old/libdv/quant_x86.S libdv-0.104/libdv/quant_x86.S | |
796 | --- libdv-0.104-old/libdv/quant_x86.S 2005-10-23 19:40:58.000000000 +0200 | |
797 | +++ libdv-0.104/libdv/quant_x86.S 2005-10-24 01:10:21.000000000 +0200 | |
798 | @@ -71,10 +71,13 @@ _dv_quant_88_inverse_x86: | |
799 | ||
800 | /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ | |
801 | movl ARGn(1),%eax /* qno */ | |
802 | + movl ARGn(3),%ebx /* dv_quant_offset */ | |
803 | + addl ARGn(2),%ebx /* class */ | |
804 | + movzbl (%ebx),%ecx | |
805 | movl ARGn(2),%ebx /* class */ | |
806 | - movzbl dv_quant_offset(%ebx),%ecx | |
807 | addl %ecx,%eax | |
808 | - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ | |
809 | + movl ARGn(4),%edx /* dv_quant_shifts */ | |
810 | + leal (%edx,%eax,4),%edx /* edx is pq */ | |
811 | ||
812 | /* extra = (class == 3); */ | |
813 | /* 0 1 2 3 */ | |
814 | @@ -212,11 +215,13 @@ _dv_quant_x86: | |
815 | ||
816 | /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ | |
817 | movl ARGn(1),%eax /* qno */ | |
818 | + movl ARGn(3),%ebx /* offset */ | |
819 | + addl ARGn(2),%ebx /* class */ | |
820 | + movzbl (%ebx),%ecx | |
821 | movl ARGn(2),%ebx /* class */ | |
822 | - | |
823 | - movzbl dv_quant_offset(%ebx),%ecx | |
824 | + movl ARGn(4),%edx /* shifts */ | |
825 | addl %ecx,%eax | |
826 | - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ | |
827 | + leal (%edx,%eax,4),%edx /* edx is pq */ | |
828 | ||
829 | /* extra = (class == 3); */ | |
830 | /* 0 1 2 3 */ | |
831 | diff -urp libdv-0.104-old/libdv/rgbtoyuv.S libdv-0.104/libdv/rgbtoyuv.S | |
832 | --- libdv-0.104-old/libdv/rgbtoyuv.S 2005-10-23 19:40:58.000000000 +0200 | |
833 | +++ libdv-0.104/libdv/rgbtoyuv.S 2005-10-24 00:46:34.000000000 +0200 | |
834 | @@ -110,20 +110,30 @@ VR0GR: .long 0,0 | |
835 | VBG0B: .long 0,0 | |
836 | ||
837 | #endif | |
838 | - | |
839 | + | |
840 | +#ifdef __PIC__ | |
841 | +# undef __i686 /* gcc define gets in our way */ | |
842 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits | |
843 | +.globl __i686.get_pc_thunk.bp | |
844 | + .hidden __i686.get_pc_thunk.bp | |
845 | + .type __i686.get_pc_thunk.bp,@function | |
846 | +__i686.get_pc_thunk.bp: | |
847 | + movl (%esp), %ebp | |
848 | + ret | |
849 | +#endif | |
850 | + | |
851 | .text | |
852 | ||
853 | -#define _inPtr 8 | |
854 | -#define _rows 12 | |
855 | -#define _columns 16 | |
856 | -#define _outyPtr 20 | |
857 | -#define _outuPtr 24 | |
858 | -#define _outvPtr 28 | |
859 | +#define _inPtr 24+8 | |
860 | +#define _rows 24+12 | |
861 | +#define _columns 24+16 | |
862 | +#define _outyPtr 24+20 | |
863 | +#define _outuPtr 24+24 | |
864 | +#define _outvPtr 24+28 | |
865 | ||
866 | _dv_rgbtoycb_mmx: | |
867 | ||
868 | pushl %ebp | |
869 | - movl %esp, %ebp | |
870 | pushl %eax | |
871 | pushl %ebx | |
872 | pushl %ecx | |
873 | @@ -131,46 +141,103 @@ _dv_rgbtoycb_mmx: | |
874 | pushl %esi | |
875 | pushl %edi | |
876 | ||
877 | +#ifdef __PIC__ | |
878 | + call __i686.get_pc_thunk.bp | |
879 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
880 | +#endif | |
881 | + | |
882 | +#ifdef __PIC__ | |
883 | + leal ZEROSX@GOTOFF(%ebp), %eax #This section gets around a bug | |
884 | +#else | |
885 | leal ZEROSX, %eax #This section gets around a bug | |
886 | +#endif | |
887 | movq (%eax), %mm0 #unlikely to persist | |
888 | +#ifdef __PIC__ | |
889 | + movq %mm0, ZEROS@GOTOFF(%ebp) | |
890 | + leal OFFSETDX@GOTOFF(%ebp), %eax | |
891 | +#else | |
892 | movq %mm0, ZEROS | |
893 | leal OFFSETDX, %eax | |
894 | +#endif | |
895 | movq (%eax), %mm0 | |
896 | +#ifdef __PIC__ | |
897 | + movq %mm0, OFFSETD@GOTOFF(%ebp) | |
898 | + leal OFFSETWX@GOTOFF(%ebp), %eax | |
899 | +#else | |
900 | movq %mm0, OFFSETD | |
901 | leal OFFSETWX, %eax | |
902 | +#endif | |
903 | movq (%eax), %mm0 | |
904 | +#ifdef __PIC__ | |
905 | + movq %mm0, OFFSETW@GOTOFF(%ebp) | |
906 | + leal OFFSETBX@GOTOFF(%ebp), %eax | |
907 | +#else | |
908 | movq %mm0, OFFSETW | |
909 | leal OFFSETBX, %eax | |
910 | +#endif | |
911 | movq (%eax), %mm0 | |
912 | +#ifdef __PIC__ | |
913 | + movq %mm0, OFFSETB@GOTOFF(%ebp) | |
914 | + leal YR0GRX@GOTOFF(%ebp), %eax | |
915 | +#else | |
916 | movq %mm0, OFFSETB | |
917 | leal YR0GRX, %eax | |
918 | +#endif | |
919 | movq (%eax), %mm0 | |
920 | +#ifdef __PIC__ | |
921 | + movq %mm0, YR0GR@GOTOFF(%ebp) | |
922 | + leal YBG0BX@GOTOFF(%ebp), %eax | |
923 | +#else | |
924 | movq %mm0, YR0GR | |
925 | leal YBG0BX, %eax | |
926 | +#endif | |
927 | movq (%eax), %mm0 | |
928 | +#ifdef __PIC__ | |
929 | + movq %mm0, YBG0B@GOTOFF(%ebp) | |
930 | + leal UR0GRX@GOTOFF(%ebp), %eax | |
931 | +#else | |
932 | movq %mm0, YBG0B | |
933 | leal UR0GRX, %eax | |
934 | +#endif | |
935 | movq (%eax), %mm0 | |
936 | +#ifdef __PIC__ | |
937 | + movq %mm0, UR0GR@GOTOFF(%ebp) | |
938 | + leal UBG0BX@GOTOFF(%ebp), %eax | |
939 | +#else | |
940 | movq %mm0, UR0GR | |
941 | leal UBG0BX, %eax | |
942 | +#endif | |
943 | movq (%eax), %mm0 | |
944 | +#ifdef __PIC__ | |
945 | + movq %mm0, UBG0B@GOTOFF(%ebp) | |
946 | + leal VR0GRX@GOTOFF(%ebp), %eax | |
947 | +#else | |
948 | movq %mm0, UBG0B | |
949 | leal VR0GRX, %eax | |
950 | +#endif | |
951 | movq (%eax), %mm0 | |
952 | +#ifdef __PIC__ | |
953 | + movq %mm0, VR0GR@GOTOFF(%ebp) | |
954 | + leal VBG0BX@GOTOFF(%ebp), %eax | |
955 | +#else | |
956 | movq %mm0, VR0GR | |
957 | leal VBG0BX, %eax | |
958 | +#endif | |
959 | movq (%eax), %mm0 | |
960 | +#ifdef __PIC__ | |
961 | + movq %mm0, VBG0B@GOTOFF(%ebp) | |
962 | +#else | |
963 | movq %mm0, VBG0B | |
964 | - | |
965 | - movl _rows(%ebp), %eax | |
966 | - movl _columns(%ebp), %ebx | |
967 | +#endif | |
968 | + movl _rows(%esp), %eax | |
969 | + movl _columns(%esp), %ebx | |
970 | mull %ebx #number pixels | |
971 | shrl $3, %eax #number of loops | |
972 | movl %eax, %edi #loop counter in edi | |
973 | - movl _inPtr(%ebp), %eax | |
974 | - movl _outyPtr(%ebp), %ebx | |
975 | - movl _outuPtr(%ebp), %ecx | |
976 | - movl _outvPtr(%ebp), %edx | |
977 | + movl _inPtr(%esp), %eax | |
978 | + movl _outyPtr(%esp), %ebx | |
979 | + movl _outuPtr(%esp), %ecx | |
980 | + movl _outvPtr(%esp), %edx | |
981 | rgbtoycb_mmx_loop: | |
982 | movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 | |
983 | pxor %mm6, %mm6 #0 -> mm6 | |
984 | @@ -184,29 +251,57 @@ rgbtoycb_mmx_loop: | |
985 | punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 | |
986 | movq %mm0, %mm2 #R1B0G0R0 -> mm2 | |
987 | ||
988 | +#ifdef __PIC__ | |
989 | + pmaddwd YR0GR@GOTOFF(%ebp), %mm0 #yrR1,ygG0+yrR0 -> mm0 | |
990 | +#else | |
991 | pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0 | |
992 | +#endif | |
993 | movq %mm1, %mm3 #B1G1R1B0 -> mm3 | |
994 | ||
995 | +#ifdef __PIC__ | |
996 | + pmaddwd YBG0B@GOTOFF(%ebp), %mm1 #ybB1+ygG1,ybB0 -> mm1 | |
997 | +#else | |
998 | pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1 | |
999 | +#endif | |
1000 | movq %mm2, %mm4 #R1B0G0R0 -> mm4 | |
1001 | ||
1002 | +#ifdef __PIC__ | |
1003 | + pmaddwd UR0GR@GOTOFF(%ebp), %mm2 #urR1,ugG0+urR0 -> mm2 | |
1004 | +#else | |
1005 | pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2 | |
1006 | +#endif | |
1007 | movq %mm3, %mm5 #B1G1R1B0 -> mm5 | |
1008 | ||
1009 | +#ifdef __PIC__ | |
1010 | + pmaddwd UBG0B@GOTOFF(%ebp), %mm3 #ubB1+ugG1,ubB0 -> mm3 | |
1011 | +#else | |
1012 | pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3 | |
1013 | +#endif | |
1014 | punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 | |
1015 | ||
1016 | +#ifdef __PIC__ | |
1017 | + pmaddwd VR0GR@GOTOFF(%ebp), %mm4 #vrR1,vgG0+vrR0 -> mm4 | |
1018 | +#else | |
1019 | pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4 | |
1020 | +#endif | |
1021 | paddd %mm1, %mm0 #Y1Y0 -> mm0 | |
1022 | ||
1023 | +#ifdef __PIC__ | |
1024 | + pmaddwd VBG0B@GOTOFF(%ebp), %mm5 #vbB1+vgG1,vbB0 -> mm5 | |
1025 | +#else | |
1026 | pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5 | |
1027 | +#endif | |
1028 | ||
1029 | movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 | |
1030 | paddd %mm3, %mm2 #U1U0 -> mm2 | |
1031 | ||
1032 | movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 | |
1033 | ||
1034 | +#ifdef __PIC__ | |
1035 | + punpcklbw ZEROS@GOTOFF(%ebp), %mm1 #B3G3R3B2 -> mm1 | |
1036 | +#else | |
1037 | punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1 | |
1038 | +#endif | |
1039 | paddd %mm5, %mm4 #V1V0 -> mm4 | |
1040 | ||
1041 | movq %mm1, %mm5 #B3G3R3B2 -> mm5 | |
1042 | @@ -214,29 +309,61 @@ rgbtoycb_mmx_loop: | |
1043 | ||
1044 | paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 | |
1045 | ||
1046 | +#ifdef __PIC__ | |
1047 | + punpckhbw ZEROS@GOTOFF(%ebp), %mm6 #R5B4G4R3 -> mm6 | |
1048 | +#else | |
1049 | punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6 | |
1050 | +#endif | |
1051 | movq %mm1, %mm3 #R3B2G2R2 -> mm3 | |
1052 | ||
1053 | +#ifdef __PIC__ | |
1054 | + pmaddwd YR0GR@GOTOFF(%ebp), %mm1 #yrR3,ygG2+yrR2 -> mm1 | |
1055 | +#else | |
1056 | pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1 | |
1057 | +#endif | |
1058 | movq %mm5, %mm7 #B3G3R3B2 -> mm7 | |
1059 | ||
1060 | +#ifdef __PIC__ | |
1061 | + pmaddwd YBG0B@GOTOFF(%ebp), %mm5 #ybB3+ygG3,ybB2 -> mm5 | |
1062 | +#else | |
1063 | pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5 | |
1064 | +#endif | |
1065 | psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 | |
1066 | ||
1067 | +#ifdef __PIC__ | |
1068 | + movq %mm6, TEMP0@GOTOFF(%ebp) #R5B4G4R4 -> TEMP0 | |
1069 | +#else | |
1070 | movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0 | |
1071 | +#endif | |
1072 | movq %mm3, %mm6 #R3B2G2R2 -> mm6 | |
1073 | +#ifdef __PIC__ | |
1074 | + pmaddwd UR0GR@GOTOFF(%ebp), %mm6 #urR3,ugG2+urR2 -> mm6 | |
1075 | +#else | |
1076 | pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6 | |
1077 | +#endif | |
1078 | psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 | |
1079 | ||
1080 | paddd %mm5, %mm1 #Y3Y2 -> mm1 | |
1081 | movq %mm7, %mm5 #B3G3R3B2 -> mm5 | |
1082 | +#ifdef __PIC__ | |
1083 | + pmaddwd UBG0B@GOTOFF(%ebp), %mm7 #ubB3+ugG3,ubB2 | |
1084 | +#else | |
1085 | pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2 | |
1086 | +#endif | |
1087 | psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 | |
1088 | ||
1089 | +#ifdef __PIC__ | |
1090 | + pmaddwd VR0GR@GOTOFF(%ebp), %mm3 #vrR3,vgG2+vgR2 | |
1091 | +#else | |
1092 | pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2 | |
1093 | +#endif | |
1094 | packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 | |
1095 | ||
1096 | +#ifdef __PIC__ | |
1097 | + pmaddwd VBG0B@GOTOFF(%ebp), %mm5 #vbB3+vgG3,vbB2 -> mm5 | |
1098 | +#else | |
1099 | pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5 | |
1100 | +#endif | |
1101 | psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 | |
1102 | ||
1103 | movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 | |
1104 | @@ -251,58 +378,114 @@ rgbtoycb_mmx_loop: | |
1105 | movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 | |
1106 | psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 | |
1107 | ||
1108 | +#ifdef __PIC__ | |
1109 | + paddw OFFSETY@GOTOFF(%ebp), %mm0 | |
1110 | +#else | |
1111 | paddw OFFSETY, %mm0 | |
1112 | +#endif | |
1113 | movq %mm0, (%ebx) #store Y3Y2Y1Y0 | |
1114 | packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 | |
1115 | ||
1116 | +#ifdef __PIC__ | |
1117 | + movq TEMP0@GOTOFF(%ebp), %mm0 #R5B4G4R4 -> mm0 | |
1118 | +#else | |
1119 | movq TEMP0, %mm0 #R5B4G4R4 -> mm0 | |
1120 | +#endif | |
1121 | addl $8, %ebx | |
1122 | - | |
1123 | + | |
1124 | +#ifdef __PIC__ | |
1125 | + punpcklbw ZEROS@GOTOFF(%ebp), %mm7 #B5G500 -> mm7 | |
1126 | +#else | |
1127 | punpcklbw ZEROS, %mm7 #B5G500 -> mm7 | |
1128 | +#endif | |
1129 | movq %mm0, %mm6 #R5B4G4R4 -> mm6 | |
1130 | ||
1131 | +#ifdef __PIC__ | |
1132 | + movq %mm2, TEMPU@GOTOFF(%ebp) #32-bit scaled U3U2U1U0 -> TEMPU | |
1133 | +#else | |
1134 | movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU | |
1135 | +#endif | |
1136 | psrlq $32, %mm0 #00R5B4 -> mm0 | |
1137 | ||
1138 | paddw %mm0, %mm7 #B5G5R5B4 -> mm7 | |
1139 | movq %mm6, %mm2 #B5B4G4R4 -> mm2 | |
1140 | ||
1141 | +#ifdef __PIC__ | |
1142 | + pmaddwd YR0GR@GOTOFF(%ebp), %mm2 #yrR5,ygG4+yrR4 -> mm2 | |
1143 | +#else | |
1144 | pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2 | |
1145 | +#endif | |
1146 | movq %mm7, %mm0 #B5G5R5B4 -> mm0 | |
1147 | ||
1148 | +#ifdef __PIC__ | |
1149 | + pmaddwd YBG0B@GOTOFF(%ebp), %mm7 #ybB5+ygG5,ybB4 -> mm7 | |
1150 | +#else | |
1151 | pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7 | |
1152 | +#endif | |
1153 | packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 | |
1154 | ||
1155 | addl $24, %eax #increment RGB count | |
1156 | ||
1157 | +#ifdef __PIC__ | |
1158 | + movq %mm4, TEMPV@GOTOFF(%ebp) #(V3V2V1V0)/256 -> mm4 | |
1159 | +#else | |
1160 | movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4 | |
1161 | +#endif | |
1162 | movq %mm6, %mm4 #B5B4G4R4 -> mm4 | |
1163 | ||
1164 | +#ifdef __PIC__ | |
1165 | + pmaddwd UR0GR@GOTOFF(%ebp), %mm6 #urR5,ugG4+urR4 | |
1166 | +#else | |
1167 | pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4 | |
1168 | +#endif | |
1169 | movq %mm0, %mm3 #B5G5R5B4 -> mm0 | |
1170 | ||
1171 | +#ifdef __PIC__ | |
1172 | + pmaddwd UBG0B@GOTOFF(%ebp), %mm0 #ubB5+ugG5,ubB4 | |
1173 | +#else | |
1174 | pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4 | |
1175 | +#endif | |
1176 | paddd %mm7, %mm2 #Y5Y4 -> mm2 | |
1177 | ||
1178 | +#ifdef __PIC__ | |
1179 | + pmaddwd VR0GR@GOTOFF(%ebp), %mm4 #vrR5,vgG4+vrR4 -> mm4 | |
1180 | +#else | |
1181 | pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4 | |
1182 | +#endif | |
1183 | pxor %mm7, %mm7 #0 -> mm7 | |
1184 | ||
1185 | +#ifdef __PIC__ | |
1186 | + pmaddwd VBG0B@GOTOFF(%ebp), %mm3 #vbB5+vgG5,vbB4 -> mm3 | |
1187 | +#else | |
1188 | pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3 | |
1189 | +#endif | |
1190 | punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 | |
1191 | ||
1192 | paddd %mm6, %mm0 #U5U4 -> mm0 | |
1193 | movq %mm1, %mm6 #B7G7R7B6 -> mm6 | |
1194 | ||
1195 | +#ifdef __PIC__ | |
1196 | + pmaddwd YBG0B@GOTOFF(%ebp), %mm6 #ybB7+ygG7,ybB6 -> mm6 | |
1197 | +#else | |
1198 | pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6 | |
1199 | +#endif | |
1200 | punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 | |
1201 | ||
1202 | movq %mm5, %mm7 #R7B6G6R6 -> mm7 | |
1203 | paddd %mm4, %mm3 #V5V4 -> mm3 | |
1204 | ||
1205 | +#ifdef __PIC__ | |
1206 | + pmaddwd YR0GR@GOTOFF(%ebp), %mm5 #yrR7,ygG6+yrR6 -> mm5 | |
1207 | +#else | |
1208 | pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5 | |
1209 | +#endif | |
1210 | movq %mm1, %mm4 #B7G7R7B6 -> mm4 | |
1211 | ||
1212 | +#ifdef __PIC__ | |
1213 | + pmaddwd UBG0B@GOTOFF(%ebp), %mm4 #ubB7+ugG7,ubB6 -> mm4 | |
1214 | +#else | |
1215 | pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4 | |
1216 | +#endif | |
1217 | psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 | |
1218 | ||
1219 | psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 | |
1220 | @@ -310,25 +493,49 @@ rgbtoycb_mmx_loop: | |
1221 | paddd %mm5, %mm6 #Y7Y6 -> mm6 | |
1222 | movq %mm7, %mm5 #R7B6G6R6 -> mm5 | |
1223 | ||
1224 | +#ifdef __PIC__ | |
1225 | + pmaddwd UR0GR@GOTOFF(%ebp), %mm7 #urR7,ugG6+ugR6 -> mm7 | |
1226 | +#else | |
1227 | pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7 | |
1228 | +#endif | |
1229 | psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 | |
1230 | ||
1231 | +#ifdef __PIC__ | |
1232 | + pmaddwd VBG0B@GOTOFF(%ebp), %mm1 #vbB7+vgG7,vbB6 -> mm1 | |
1233 | +#else | |
1234 | pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1 | |
1235 | +#endif | |
1236 | psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 | |
1237 | ||
1238 | packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 | |
1239 | ||
1240 | +#ifdef __PIC__ | |
1241 | + pmaddwd VR0GR@GOTOFF(%ebp), %mm5 #vrR7,vgG6+vrR6 -> mm5 | |
1242 | +#else | |
1243 | pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5 | |
1244 | +#endif | |
1245 | paddd %mm4, %mm7 #U7U6 -> mm7 | |
1246 | ||
1247 | psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 | |
1248 | +#ifdef __PIC__ | |
1249 | + paddw OFFSETY@GOTOFF(%ebp), %mm2 | |
1250 | +#else | |
1251 | paddw OFFSETY, %mm2 | |
1252 | +#endif | |
1253 | movq %mm2, (%ebx) #store Y7Y6Y5Y4 | |
1254 | ||
1255 | +#ifdef __PIC__ | |
1256 | + movq ALLONE@GOTOFF(%ebp), %mm6 | |
1257 | +#else | |
1258 | movq ALLONE, %mm6 | |
1259 | +#endif | |
1260 | packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 | |
1261 | ||
1262 | +#ifdef __PIC__ | |
1263 | + movq TEMPU@GOTOFF(%ebp), %mm4 #32-bit scaled U3U2U1U0 -> mm4 | |
1264 | +#else | |
1265 | movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4 | |
1266 | +#endif | |
1267 | pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 | |
1268 | ||
1269 | pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 | |
1270 | @@ -338,8 +545,12 @@ rgbtoycb_mmx_loop: | |
1271 | ||
1272 | psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 | |
1273 | psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 | |
1274 | - | |
1275 | + | |
1276 | +#ifdef __PIC__ | |
1277 | + movq TEMPV@GOTOFF(%ebp), %mm5 #32-bit scaled V3V2V1V0 -> mm5 | |
1278 | +#else | |
1279 | movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5 | |
1280 | +#endif | |
1281 | ||
1282 | movq %mm4, (%ecx) # store U | |
1283 | ||
1284 | @@ -425,14 +636,22 @@ _dv_ppm_copy_y_block_mmx: | |
1285 | _dv_pgm_copy_y_block_mmx: | |
1286 | ||
1287 | pushl %ebp | |
1288 | - movl %esp, %ebp | |
1289 | pushl %esi | |
1290 | pushl %edi | |
1291 | - | |
1292 | - movl 8(%ebp), %edi # dest | |
1293 | - movl 12(%ebp), %esi # src | |
1294 | ||
1295 | +#ifdef __PIC__ | |
1296 | + call __i686.get_pc_thunk.bp | |
1297 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1298 | +#endif | |
1299 | + | |
1300 | + movl 16(%esp), %edi # dest | |
1301 | + movl 20(%esp), %esi # src | |
1302 | + | |
1303 | +#ifdef __PIC__ | |
1304 | + movq OFFSETY@GOTOFF(%ebp), %mm7 | |
1305 | +#else | |
1306 | movq OFFSETY, %mm7 | |
1307 | +#endif | |
1308 | pxor %mm6, %mm6 | |
1309 | ||
1310 | movq (%esi), %mm0 | |
1311 | @@ -567,14 +786,22 @@ _dv_pgm_copy_y_block_mmx: | |
1312 | _dv_video_copy_y_block_mmx: | |
1313 | ||
1314 | pushl %ebp | |
1315 | - movl %esp, %ebp | |
1316 | pushl %esi | |
1317 | pushl %edi | |
1318 | - | |
1319 | - movl 8(%ebp), %edi # dest | |
1320 | - movl 12(%ebp), %esi # src | |
1321 | ||
1322 | +#ifdef __PIC__ | |
1323 | + call __i686.get_pc_thunk.bp | |
1324 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1325 | +#endif | |
1326 | + | |
1327 | + movl 16(%esp), %edi # dest | |
1328 | + movl 20(%esp), %esi # src | |
1329 | + | |
1330 | +#ifdef __PIC__ | |
1331 | + movq OFFSETBX@GOTOFF(%ebp), %mm7 | |
1332 | +#else | |
1333 | movq OFFSETBX, %mm7 | |
1334 | +#endif | |
1335 | pxor %mm6, %mm6 | |
1336 | ||
1337 | movq (%esi), %mm0 | |
1338 | @@ -855,16 +1082,23 @@ _dv_ppm_copy_pal_c_block_mmx: | |
1339 | _dv_pgm_copy_pal_c_block_mmx: | |
1340 | ||
1341 | pushl %ebp | |
1342 | - movl %esp, %ebp | |
1343 | pushl %esi | |
1344 | pushl %edi | |
1345 | pushl %ebx | |
1346 | - | |
1347 | - movl 8(%ebp), %edi # dest | |
1348 | - movl 12(%ebp), %esi # src | |
1349 | ||
1350 | +#ifdef __PIC__ | |
1351 | + call __i686.get_pc_thunk.bp | |
1352 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1353 | +#endif | |
1354 | + | |
1355 | + movl 20(%esp), %edi # dest | |
1356 | + movl 24(%esp), %esi # src | |
1357 | ||
1358 | +#ifdef __PIC__ | |
1359 | + movq OFFSETBX@GOTOFF(%ebp), %mm7 | |
1360 | +#else | |
1361 | movq OFFSETBX, %mm7 | |
1362 | +#endif | |
1363 | pxor %mm6, %mm6 | |
1364 | ||
1365 | ||
1366 | @@ -1003,15 +1237,23 @@ _dv_pgm_copy_pal_c_block_mmx: | |
1367 | _dv_video_copy_pal_c_block_mmx: | |
1368 | ||
1369 | pushl %ebp | |
1370 | - movl %esp, %ebp | |
1371 | pushl %esi | |
1372 | pushl %edi | |
1373 | pushl %ebx | |
1374 | - | |
1375 | - movl 8(%ebp), %edi # dest | |
1376 | - movl 12(%ebp), %esi # src | |
1377 | ||
1378 | +#ifdef __PIC__ | |
1379 | + call __i686.get_pc_thunk.bp | |
1380 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1381 | +#endif | |
1382 | + | |
1383 | + movl 20(%esp), %edi # dest | |
1384 | + movl 24(%esp), %esi # src | |
1385 | + | |
1386 | +#ifdef __PIC__ | |
1387 | + movq OFFSETBX@GOTOFF(%ebp), %mm7 | |
1388 | +#else | |
1389 | movq OFFSETBX, %mm7 | |
1390 | +#endif | |
1391 | paddw %mm7, %mm7 | |
1392 | pxor %mm6, %mm6 | |
1393 | ||
1394 | @@ -1098,18 +1340,25 @@ video_copy_pal_c_block_mmx_loop: | |
1395 | _dv_ppm_copy_ntsc_c_block_mmx: | |
1396 | ||
1397 | pushl %ebp | |
1398 | - movl %esp, %ebp | |
1399 | pushl %esi | |
1400 | pushl %edi | |
1401 | pushl %ebx | |
1402 | - | |
1403 | - movl 8(%ebp), %edi # dest | |
1404 | - movl 12(%ebp), %esi # src | |
1405 | + | |
1406 | +#ifdef __PIC__ | |
1407 | + call __i686.get_pc_thunk.bp | |
1408 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1409 | +#endif | |
1410 | + | |
1411 | + movl 20(%esp), %edi # dest | |
1412 | + movl 24(%esp), %esi # src | |
1413 | ||
1414 | movl $4, %ebx | |
1415 | ||
1416 | +#ifdef __PIC__ | |
1417 | + movq ALLONE@GOTOFF(%ebp), %mm6 | |
1418 | +#else | |
1419 | movq ALLONE, %mm6 | |
1420 | - | |
1421 | +#endif | |
1422 | ppm_copy_ntsc_c_block_mmx_loop: | |
1423 | ||
1424 | movq (%esi), %mm0 | |
1425 | @@ -1171,14 +1420,22 @@ ppm_copy_ntsc_c_block_mmx_loop: | |
1426 | _dv_pgm_copy_ntsc_c_block_mmx: | |
1427 | ||
1428 | pushl %ebp | |
1429 | - movl %esp, %ebp | |
1430 | pushl %esi | |
1431 | pushl %edi | |
1432 | - | |
1433 | - movl 8(%ebp), %edi # dest | |
1434 | - movl 12(%ebp), %esi # src | |
1435 | ||
1436 | +#ifdef __PIC__ | |
1437 | + call __i686.get_pc_thunk.bp | |
1438 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1439 | +#endif | |
1440 | + | |
1441 | + movl 16(%esp), %edi # dest | |
1442 | + movl 20(%esp), %esi # src | |
1443 | + | |
1444 | +#ifdef __PIC__ | |
1445 | + movq OFFSETBX@GOTOFF(%ebp), %mm7 | |
1446 | +#else | |
1447 | movq OFFSETBX, %mm7 | |
1448 | +#endif | |
1449 | paddw %mm7, %mm7 | |
1450 | pxor %mm6, %mm6 | |
1451 | ||
1452 | @@ -1328,15 +1585,23 @@ _dv_pgm_copy_ntsc_c_block_mmx: | |
1453 | _dv_video_copy_ntsc_c_block_mmx: | |
1454 | ||
1455 | pushl %ebp | |
1456 | - movl %esp, %ebp | |
1457 | pushl %esi | |
1458 | pushl %edi | |
1459 | pushl %ebx | |
1460 | - | |
1461 | - movl 8(%ebp), %edi # dest | |
1462 | - movl 12(%ebp), %esi # src | |
1463 | ||
1464 | +#ifdef __PIC__ | |
1465 | + call __i686.get_pc_thunk.bp | |
1466 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1467 | +#endif | |
1468 | + | |
1469 | + movl 20(%esp), %edi # dest | |
1470 | + movl 24(%esp), %esi # src | |
1471 | + | |
1472 | +#ifdef __PIC__ | |
1473 | + movq OFFSETBX@GOTOFF(%ebp), %mm7 | |
1474 | +#else | |
1475 | movq OFFSETBX, %mm7 | |
1476 | +#endif | |
1477 | paddw %mm7, %mm7 | |
1478 | pxor %mm6, %mm6 | |
1479 | ||
1480 | diff -urp libdv-0.104-old/libdv/vlc_x86.S libdv-0.104/libdv/vlc_x86.S | |
1481 | --- libdv-0.104-old/libdv/vlc_x86.S 2005-10-23 19:40:58.000000000 +0200 | |
1482 | +++ libdv-0.104/libdv/vlc_x86.S 2005-10-25 01:47:14.000000000 +0200 | |
1483 | @@ -1,29 +1,76 @@ | |
1484 | #include "asmoff.h" | |
1485 | .text | |
1486 | + | |
1487 | +#ifdef __PIC__ | |
1488 | +# undef __i686 /* gcc define gets in our way */ | |
1489 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits | |
1490 | +.globl __i686.get_pc_thunk.bp | |
1491 | + .hidden __i686.get_pc_thunk.bp | |
1492 | + .type __i686.get_pc_thunk.bp,@function | |
1493 | +__i686.get_pc_thunk.bp: | |
1494 | + movl (%esp), %ebp | |
1495 | + ret | |
1496 | + | |
1497 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits | |
1498 | +.globl __i686.get_pc_thunk.si | |
1499 | + .hidden __i686.get_pc_thunk.si | |
1500 | + .type __i686.get_pc_thunk.si,@function | |
1501 | +__i686.get_pc_thunk.si: | |
1502 | + movl (%esp), %esi | |
1503 | + ret | |
1504 | +#endif | |
1505 | + | |
1506 | .align 4 | |
1507 | .globl dv_decode_vlc | |
1508 | .type dv_decode_vlc,@function | |
1509 | dv_decode_vlc: | |
1510 | pushl %ebx | |
1511 | + pushl %ebp | |
1512 | + | |
1513 | +#ifdef __PIC__ | |
1514 | + call __i686.get_pc_thunk.bp | |
1515 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1516 | +#endif | |
1517 | ||
1518 | - /* Args are at 8(%esp). */ | |
1519 | - movl 8(%esp),%eax /* %eax is bits */ | |
1520 | - movl 12(%esp),%ebx /* %ebx is maxbits */ | |
1521 | + /* Args are at 12(%esp). */ | |
1522 | + movl 12(%esp),%eax /* %eax is bits */ | |
1523 | + movl 16(%esp),%ebx /* %ebx is maxbits */ | |
1524 | andl $0x3f,%ebx /* limit index range STL*/ | |
1525 | ||
1526 | +#ifdef __ELF__ | |
1527 | + movl dv_vlc_class_index_mask@GOTOFF(%ebp,%ebx,4),%edx | |
1528 | +#else | |
1529 | movl dv_vlc_class_index_mask(,%ebx,4),%edx | |
1530 | +#endif | |
1531 | andl %eax,%edx | |
1532 | +#ifdef __ELF__ | |
1533 | + movl dv_vlc_class_index_rshift@GOTOFF(%ebp,%ebx,4),%ecx | |
1534 | +#else | |
1535 | movl dv_vlc_class_index_rshift(,%ebx,4),%ecx | |
1536 | +#endif | |
1537 | sarl %cl,%edx | |
1538 | +#ifdef __ELF__ | |
1539 | + movl dv_vlc_classes@GOTOFF(%ebp,%ebx,4),%ecx | |
1540 | +#else | |
1541 | movl dv_vlc_classes(,%ebx,4),%ecx | |
1542 | +#endif | |
1543 | movsbl (%ecx,%edx,1),%edx /* %edx is class */ | |
1544 | - | |
1545 | + | |
1546 | +#ifdef __ELF__ | |
1547 | + movl dv_vlc_index_mask@GOTOFF(%ebp,%edx,4),%ebx | |
1548 | + movl dv_vlc_index_rshift@GOTOFF(%ebp,%edx,4),%ecx | |
1549 | +#else | |
1550 | movl dv_vlc_index_mask(,%edx,4),%ebx | |
1551 | movl dv_vlc_index_rshift(,%edx,4),%ecx | |
1552 | +#endif | |
1553 | andl %eax,%ebx | |
1554 | sarl %cl,%ebx | |
1555 | ||
1556 | +#ifdef __ELF__ | |
1557 | + movl dv_vlc_lookups@GOTOFF(%ebp,%edx,4),%edx | |
1558 | +#else | |
1559 | movl dv_vlc_lookups(,%edx,4),%edx | |
1560 | +#endif | |
1561 | movl (%edx,%ebx,4),%edx | |
1562 | ||
1563 | /* Now %edx holds result, like this: | |
1564 | @@ -42,7 +89,11 @@ dv_decode_vlc: | |
1565 | movl %edx,%ecx | |
1566 | sarl $8,%ecx | |
1567 | andl $0xff,%ecx | |
1568 | +#ifdef __ELF__ | |
1569 | + movl sign_mask@GOTOFF(%ebp,%ecx,4),%ebx | |
1570 | +#else | |
1571 | movl sign_mask(,%ecx,4),%ebx | |
1572 | +#endif | |
1573 | andl %ebx,%eax | |
1574 | negl %eax | |
1575 | sarl $31,%eax | |
1576 | @@ -63,14 +114,14 @@ dv_decode_vlc: | |
1577 | *result = broken; | |
1578 | Note that the 'broken' pattern is all ones (i.e. 0xffffffff) | |
1579 | */ | |
1580 | - movl 12(%esp),%ebx /* %ebx is maxbits */ | |
1581 | + movl 16(%esp),%ebx /* %ebx is maxbits */ | |
1582 | subl %ecx,%ebx | |
1583 | sbbl %ebx,%ebx | |
1584 | orl %ebx,%edx | |
1585 | ||
1586 | - movl 16(%esp),%eax | |
1587 | + movl 20(%esp),%eax | |
1588 | movl %edx,(%eax) | |
1589 | - | |
1590 | + popl %ebp | |
1591 | popl %ebx | |
1592 | ret | |
1593 | ||
1594 | @@ -80,21 +131,38 @@ dv_decode_vlc: | |
1595 | .type __dv_decode_vlc,@function | |
1596 | __dv_decode_vlc: | |
1597 | pushl %ebx | |
1598 | + pushl %ebp | |
1599 | + | |
1600 | +#ifdef __PIC__ | |
1601 | + call __i686.get_pc_thunk.bp | |
1602 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1603 | +#endif | |
1604 | ||
1605 | - /* Args are at 8(%esp). */ | |
1606 | - movl 8(%esp),%eax /* %eax is bits */ | |
1607 | + /* Args are at 12(%esp). */ | |
1608 | + movl 12(%esp),%eax /* %eax is bits */ | |
1609 | ||
1610 | movl %eax,%edx /* %edx is class */ | |
1611 | andl $0xfe00,%edx | |
1612 | sarl $9,%edx | |
1613 | +#ifdef __PIC__ | |
1614 | + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx | |
1615 | + | |
1616 | + movl dv_vlc_index_mask@GOTOFF(%ebp,%edx,4),%ebx | |
1617 | + movl dv_vlc_index_rshift@GOTOFF(%ebp,%edx,4),%ecx | |
1618 | +#else | |
1619 | movsbl dv_vlc_class_lookup5(%edx),%edx | |
1620 | - | |
1621 | + | |
1622 | movl dv_vlc_index_mask(,%edx,4),%ebx | |
1623 | movl dv_vlc_index_rshift(,%edx,4),%ecx | |
1624 | +#endif | |
1625 | andl %eax,%ebx | |
1626 | sarl %cl,%ebx | |
1627 | ||
1628 | +#ifdef __PIC__ | |
1629 | + movl dv_vlc_lookups@GOTOFF(%ebp,%edx,4),%edx | |
1630 | +#else | |
1631 | movl dv_vlc_lookups(,%edx,4),%edx | |
1632 | +#endif | |
1633 | movl (%edx,%ebx,4),%edx | |
1634 | ||
1635 | /* Now %edx holds result, like this: | |
1636 | @@ -112,7 +180,11 @@ __dv_decode_vlc: | |
1637 | movl %edx,%ecx | |
1638 | sarl $8,%ecx | |
1639 | andl $0xff,%ecx | |
1640 | +#ifdef __PIC__ | |
1641 | + movl sign_mask@GOTOFF(%ebp,%ecx,4),%ecx | |
1642 | +#else | |
1643 | movl sign_mask(,%ecx,4),%ecx | |
1644 | +#endif | |
1645 | andl %ecx,%eax | |
1646 | negl %eax | |
1647 | sarl $31,%eax | |
1648 | @@ -127,9 +199,9 @@ __dv_decode_vlc: | |
1649 | xorl %eax,%edx | |
1650 | subl %eax,%edx | |
1651 | ||
1652 | - movl 12(%esp),%eax | |
1653 | + movl 16(%esp),%eax | |
1654 | movl %edx,(%eax) | |
1655 | - | |
1656 | + popl %ebp | |
1657 | popl %ebx | |
1658 | ret | |
1659 | ||
1660 | @@ -147,6 +219,11 @@ dv_parse_ac_coeffs_pass0: | |
1661 | pushl %esi | |
1662 | pushl %ebp | |
1663 | ||
1664 | +#ifdef __PIC__ | |
1665 | + call __i686.get_pc_thunk.si | |
1666 | + addl $_GLOBAL_OFFSET_TABLE_, %esi | |
1667 | +#endif | |
1668 | + | |
1669 | #define ARGn(N) (20+(4*(N)))(%esp) | |
1670 | ||
1671 | /* | |
1672 | @@ -159,8 +236,10 @@ dv_parse_ac_coeffs_pass0: | |
1673 | ebp bl | |
1674 | */ | |
1675 | movl ARGn(2),%ebp | |
1676 | +#ifndef __PIC__ | |
1677 | movl ARGn(0),%esi | |
1678 | movl bitstream_t_buf(%esi),%esi | |
1679 | +#endif | |
1680 | movl dv_block_t_offset(%ebp),%edi | |
1681 | movl dv_block_t_reorder(%ebp),%ebx | |
1682 | ||
1683 | @@ -170,7 +249,11 @@ dv_parse_ac_coeffs_pass0: | |
1684 | ||
1685 | movq dv_block_t_coeffs(%ebp),%mm1 | |
1686 | pxor %mm0,%mm0 | |
1687 | +#ifdef __PIC__ | |
1688 | + pand const_f_0_0_0@GOTOFF(%esi),%mm1 | |
1689 | +#else | |
1690 | pand const_f_0_0_0,%mm1 | |
1691 | +#endif | |
1692 | movq %mm1,dv_block_t_coeffs(%ebp) | |
1693 | movq %mm0,(dv_block_t_coeffs + 8)(%ebp) | |
1694 | movq %mm0,(dv_block_t_coeffs + 16)(%ebp) | |
1695 | @@ -191,9 +274,17 @@ dv_parse_ac_coeffs_pass0: | |
1696 | readloop: | |
1697 | movl %edi,%ecx | |
1698 | shrl $3,%ecx | |
1699 | +#ifdef __PIC__ | |
1700 | + pushl %esi | |
1701 | + movl ARGn(1),%esi | |
1702 | + movl bitstream_t_buf(%esi),%esi | |
1703 | +#endif | |
1704 | movzbl (%esi,%ecx,1),%eax | |
1705 | movzbl 1(%esi,%ecx,1),%edx | |
1706 | movzbl 2(%esi,%ecx,1),%ecx | |
1707 | +#ifdef __PIC__ | |
1708 | + popl %esi | |
1709 | +#endif | |
1710 | shll $16,%eax | |
1711 | shll $8,%edx | |
1712 | orl %ecx,%eax | |
1713 | @@ -217,7 +308,11 @@ readloop: | |
1714 | ||
1715 | /* Attempt to use the shortcut first. If it hits, then | |
1716 | this vlc term has been decoded. */ | |
1717 | +#ifdef __PIC__ | |
1718 | + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx | |
1719 | +#else | |
1720 | movl dv_vlc_class1_shortcut(,%ecx,4),%edx | |
1721 | +#endif | |
1722 | test $0x80,%edx | |
1723 | je done_decode | |
1724 | ||
1725 | @@ -228,12 +323,19 @@ readloop: | |
1726 | movl %ebx,dv_block_t_reorder(%ebp) | |
1727 | ||
1728 | /* %eax is bits */ | |
1729 | - | |
1730 | +#ifdef __PIC__ | |
1731 | + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx | |
1732 | + | |
1733 | + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx | |
1734 | + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx | |
1735 | + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx | |
1736 | +#else | |
1737 | movsbl dv_vlc_class_lookup5(%ecx),%ecx | |
1738 | ||
1739 | movl dv_vlc_index_mask(,%ecx,4),%ebx | |
1740 | movl dv_vlc_lookups(,%ecx,4),%edx | |
1741 | movl dv_vlc_index_rshift(,%ecx,4),%ecx | |
1742 | +#endif | |
1743 | andl %eax,%ebx | |
1744 | sarl %cl,%ebx | |
1745 | ||
1746 | @@ -256,7 +358,11 @@ readloop: | |
1747 | movl %edx,%ecx | |
1748 | sarl $8,%ecx | |
1749 | andl $0xff,%ecx | |
1750 | +#ifdef __PIC__ | |
1751 | + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx | |
1752 | +#else | |
1753 | movl sign_mask(,%ecx,4),%ecx | |
1754 | +#endif | |
1755 | andl %ecx,%eax | |
1756 | negl %eax | |
1757 | sarl $31,%eax | |
1758 | @@ -326,10 +432,20 @@ alldone: | |
1759 | ||
1760 | slowpath: | |
1761 | /* slow path: use dv_decode_vlc */; | |
1762 | +#ifdef __PIC__ | |
1763 | + pushl %esi | |
1764 | + leal vlc@GOTOFF(%esi),%esi | |
1765 | + xchgl %esi,(%esp) /* last parameter is &vlc */ | |
1766 | +#else | |
1767 | pushl $vlc /* last parameter is &vlc */ | |
1768 | +#endif | |
1769 | pushl %edx /* bits_left */ | |
1770 | pushl %eax /* bits */ | |
1771 | +#ifdef __PIC__ | |
1772 | + call dv_decode_vlc@PLT | |
1773 | +#else | |
1774 | call dv_decode_vlc | |
1775 | +#endif | |
1776 | addl $12,%esp | |
1777 | test $0x80,%edx /* If (vlc.run < 0) break */ | |
1778 | jne escape | |
1779 | @@ -365,6 +481,11 @@ dv_parse_video_segment: | |
1780 | pushl %esi | |
1781 | pushl %ebp | |
1782 | ||
1783 | +#ifdef __PIC__ | |
1784 | + call __i686.get_pc_thunk.si | |
1785 | + addl $_GLOBAL_OFFSET_TABLE_, %esi | |
1786 | +#endif | |
1787 | + | |
1788 | #define ARGn(N) (20+(4*(N)))(%esp) | |
1789 | ||
1790 | movl ARGn(1),%eax /* quality */ | |
1791 | @@ -373,7 +494,11 @@ dv_parse_video_segment: | |
1792 | jz its_mono | |
1793 | movl $6,%ebx | |
1794 | its_mono: | |
1795 | +#ifdef __PIC__ | |
1796 | + movl %ebx,n_blocks@GOTOFF(%esi) | |
1797 | +#else | |
1798 | movl %ebx,n_blocks | |
1799 | +#endif | |
1800 | ||
1801 | /* | |
1802 | * ebx seg/b | |
1803 | @@ -384,15 +509,22 @@ its_mono: | |
1804 | * ebp bl | |
1805 | */ | |
1806 | movl ARGn(0),%ebx | |
1807 | +#ifndef __PIC__ | |
1808 | movl dv_videosegment_t_bs(%ebx),%esi | |
1809 | movl bitstream_t_buf(%esi),%esi | |
1810 | +#endif | |
1811 | leal dv_videosegment_t_mb(%ebx),%edi | |
1812 | ||
1813 | movl $0,%eax | |
1814 | movl $0,%ecx | |
1815 | macloop: | |
1816 | +#ifdef __PIC__ | |
1817 | + movl %eax,m@GOTOFF(%esi) | |
1818 | + movl %ecx,mb_start@GOTOFF(%esi) | |
1819 | +#else | |
1820 | movl %eax,m | |
1821 | movl %ecx,mb_start | |
1822 | +#endif | |
1823 | ||
1824 | movl ARGn(0),%ebx | |
1825 | ||
1826 | @@ -400,7 +532,15 @@ macloop: | |
1827 | /* mb->qno = bitstream_get(bs,4); */ | |
1828 | movl %ecx,%edx | |
1829 | shr $3,%edx | |
1830 | +#ifdef __PIC__ | |
1831 | + pushl %esi | |
1832 | + movl dv_videosegment_t_bs(%ebx),%esi | |
1833 | + movl bitstream_t_buf(%esi),%esi | |
1834 | +#endif | |
1835 | movzbl 3(%esi,%edx,1),%edx | |
1836 | +#ifdef __PIC__ | |
1837 | + popl %esi | |
1838 | +#endif | |
1839 | andl $0xf,%edx | |
1840 | movl %edx,dv_macroblock_t_qno(%edi) | |
1841 | ||
1842 | @@ -411,7 +551,11 @@ macloop: | |
1843 | movl %edx,dv_macroblock_t_eob_count(%edi) | |
1844 | ||
1845 | /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */ | |
1846 | +#ifdef __PIC__ | |
1847 | + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx | |
1848 | +#else | |
1849 | movl dv_super_map_vertical(,%eax,4),%edx | |
1850 | +#endif | |
1851 | movl dv_videosegment_t_i(%ebx),%ecx | |
1852 | addl %ecx,%edx | |
1853 | ||
1854 | @@ -422,11 +566,20 @@ skarly: | |
1855 | andl $1,%ecx | |
1856 | shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */ | |
1857 | ||
1858 | +#ifdef __PIC__ | |
1859 | + leal mod_10@GOTOFF(%esi,%edx),%edx | |
1860 | + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */ | |
1861 | +#else | |
1862 | movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */ | |
1863 | +#endif | |
1864 | movl %edx,dv_macroblock_t_i(%edi) | |
1865 | ||
1866 | /* mb->j = dv_super_map_horizontal[m]; */ | |
1867 | +#ifdef __PIC__ | |
1868 | + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx | |
1869 | +#else | |
1870 | movl dv_super_map_horizontal(,%eax,4),%edx | |
1871 | +#endif | |
1872 | movl %edx,dv_macroblock_t_j(%edi) | |
1873 | ||
1874 | /* mb->k = seg->k; */ | |
1875 | @@ -445,12 +598,29 @@ blkloop: | |
1876 | +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | |
1877 | */ | |
1878 | /* dc = bitstream_get(bs,9); */ | |
1879 | +#ifdef __PIC__ | |
1880 | + movl mb_start@GOTOFF(%esi),%ecx | |
1881 | +#else | |
1882 | movl mb_start,%ecx | |
1883 | +#endif | |
1884 | shr $3,%ecx | |
1885 | +#ifdef __PIC__ | |
1886 | + movzbl blk_start@GOTOFF(%esi,%ebx),%edx | |
1887 | +#else | |
1888 | movzbl blk_start(%ebx),%edx | |
1889 | +#endif | |
1890 | addl %ecx,%edx | |
1891 | +#ifdef __PIC__ | |
1892 | + pushl %esi | |
1893 | + movl ARGn(1),%esi | |
1894 | + movl dv_videosegment_t_bs(%esi),%esi | |
1895 | + movl bitstream_t_buf(%esi),%esi | |
1896 | +#endif | |
1897 | movzbl (%esi,%edx,1),%eax /* hi byte */ | |
1898 | movzbl 1(%esi,%edx,1),%ecx /* lo byte */ | |
1899 | +#ifdef __PIC__ | |
1900 | + popl %esi | |
1901 | +#endif | |
1902 | shll $8,%eax | |
1903 | orl %ecx,%eax | |
1904 | ||
1905 | @@ -477,7 +647,11 @@ blkloop: | |
1906 | ||
1907 | /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */ | |
1908 | shll $6,%eax | |
1909 | +#ifdef __PIC__ | |
1910 | + addl dv_reorder@GOTOFF+1(%esi),%eax | |
1911 | +#else | |
1912 | addl $(dv_reorder+1),%eax | |
1913 | +#endif | |
1914 | movl %eax,dv_block_t_reorder(%ebp) | |
1915 | ||
1916 | /* bl->reorder_sentinel = bl->reorder + 63; */ | |
1917 | @@ -485,13 +659,22 @@ blkloop: | |
1918 | movl %eax,dv_block_t_reorder_sentinel(%ebp) | |
1919 | ||
1920 | /* bl->offset= mb_start + dv_parse_bit_start[b]; */ | |
1921 | +#ifdef __PIC__ | |
1922 | + movl mb_start@GOTOFF(%esi),%ecx | |
1923 | + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax | |
1924 | +#else | |
1925 | movl mb_start,%ecx | |
1926 | movl dv_parse_bit_start(,%ebx,4),%eax | |
1927 | +#endif | |
1928 | addl %ecx,%eax | |
1929 | movl %eax,dv_block_t_offset(%ebp) | |
1930 | ||
1931 | /* bl->end= mb_start + dv_parse_bit_end[b]; */ | |
1932 | +#ifdef __PIC__ | |
1933 | + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax | |
1934 | +#else | |
1935 | movl dv_parse_bit_end(,%ebx,4),%eax | |
1936 | +#endif | |
1937 | addl %ecx,%eax | |
1938 | movl %eax,dv_block_t_end(%ebp) | |
1939 | ||
1940 | @@ -503,7 +686,11 @@ blkloop: | |
1941 | /* no AC pass. Just zero out the remaining coeffs */ | |
1942 | movq dv_block_t_coeffs(%ebp),%mm1 | |
1943 | pxor %mm0,%mm0 | |
1944 | +#ifdef __PIC__ | |
1945 | + pand const_f_0_0_0@GOTOFF(%esi),%mm1 | |
1946 | +#else | |
1947 | pand const_f_0_0_0,%mm1 | |
1948 | +#endif | |
1949 | movq %mm1,dv_block_t_coeffs(%ebp) | |
1950 | movq %mm0,(dv_block_t_coeffs + 8)(%ebp) | |
1951 | movq %mm0,(dv_block_t_coeffs + 16)(%ebp) | |
1952 | @@ -528,18 +715,31 @@ do_ac_pass: | |
1953 | pushl %ebp | |
1954 | pushl %edi | |
1955 | pushl %eax | |
1956 | +#ifdef __PIC__ | |
1957 | + call dv_parse_ac_coeffs_pass0@PLT | |
1958 | +#else | |
1959 | call dv_parse_ac_coeffs_pass0 | |
1960 | +#endif | |
1961 | addl $12,%esp | |
1962 | done_ac: | |
1963 | ||
1964 | +#ifdef __PIC__ | |
1965 | + movl n_blocks@GOTOFF(%esi),%eax | |
1966 | +#else | |
1967 | movl n_blocks,%eax | |
1968 | +#endif | |
1969 | addl $dv_block_t_size,%ebp | |
1970 | incl %ebx | |
1971 | cmpl %eax,%ebx | |
1972 | jnz blkloop | |
1973 | ||
1974 | +#ifdef __PIC__ | |
1975 | + movl m@GOTOFF(%esi),%eax | |
1976 | + movl mb_start@GOTOFF(%esi),%ecx | |
1977 | +#else | |
1978 | movl m,%eax | |
1979 | movl mb_start,%ecx | |
1980 | +#endif | |
1981 | addl $(8 * 80),%ecx | |
1982 | addl $dv_macroblock_t_size,%edi | |
1983 | incl %eax | |
1984 | @@ -557,7 +757,11 @@ done_ac: | |
1985 | ||
1986 | andl $DV_QUALITY_AC_MASK,%eax | |
1987 | cmpl $DV_QUALITY_AC_2,%eax | |
1988 | +#ifdef __PIC__ | |
1989 | + jz dv_parse_ac_coeffs@PLT | |
1990 | +#else | |
1991 | jz dv_parse_ac_coeffs | |
1992 | +#endif | |
1993 | movl $0,%eax | |
1994 | ret | |
1995 |