]>
Commit | Line | Data |
---|---|---|
5e993f12 | 1 | --- libdv/dct_block_mmx.S 31 Jan 2005 13:27:54 -0000 1.4 |
2 | +++ libdv/dct_block_mmx.S 15 Dec 2005 06:45:38 -0000 | |
3 | @@ -55,17 +55,22 @@ scratch4: .quad 0 | |
4 | ||
5 | .section .note.GNU-stack, "", @progbits | |
6 | ||
7 | +#include "asm_common.S" | |
8 | + | |
9 | .text | |
10 | ||
11 | .align 8 | |
12 | .global _dv_dct_88_block_mmx | |
13 | +.hidden _dv_dct_88_block_mmx | |
14 | +.type _dv_dct_88_block_mmx,@function | |
15 | _dv_dct_88_block_mmx: | |
16 | ||
17 | pushl %ebp | |
18 | - movl %esp, %ebp | |
19 | pushl %esi | |
20 | ||
21 | - movl 8(%ebp), %esi # source | |
22 | + LOAD_PIC_REG_BP() | |
23 | + | |
24 | + movl 12(%ebp), %esi # source | |
25 | ||
26 | # column 0 | |
27 | movq 16*0(%esi), %mm0 # v0 | |
28 | @@ -88,22 +93,22 @@ _dv_dct_88_block_mmx: | |
29 | ||
30 | movq 16*3(%esi), %mm5 # v3 | |
31 | movq 16*4(%esi), %mm7 # v4 | |
32 | - movq %mm7, scratch1 # scratch1: v4 ; | |
33 | + movq %mm7, MUNG(scratch1) # scratch1: v4 ; | |
34 | movq %mm5, %mm7 # duplicate v3 | |
35 | - paddw scratch1, %mm5 # v03: v3+v4 | |
36 | - psubw scratch1, %mm7 # v04: v3-v4 | |
37 | - movq %mm5, scratch2 # scratch2: v03 | |
38 | + paddw MUNG(scratch1), %mm5 # v03: v3+v4 | |
39 | + psubw MUNG(scratch1), %mm7 # v04: v3-v4 | |
40 | + movq %mm5, MUNG(scratch2) # scratch2: v03 | |
41 | movq %mm0, %mm5 # mm5: v00 | |
42 | ||
43 | - paddw scratch2, %mm0 # v10: v00+v03 | |
44 | - psubw scratch2, %mm5 # v13: v00-v03 | |
45 | - movq %mm3, scratch3 # scratch3: v02 | |
46 | + paddw MUNG(scratch2), %mm0 # v10: v00+v03 | |
47 | + psubw MUNG(scratch2), %mm5 # v13: v00-v03 | |
48 | + movq %mm3, MUNG(scratch3) # scratch3: v02 | |
49 | movq %mm1, %mm3 # duplicate v01 | |
50 | ||
51 | - paddw scratch3, %mm1 # v11: v01+v02 | |
52 | - psubw scratch3, %mm3 # v12: v01-v02 | |
53 | + paddw MUNG(scratch3), %mm1 # v11: v01+v02 | |
54 | + psubw MUNG(scratch3), %mm3 # v12: v01-v02 | |
55 | ||
56 | - movq %mm6, scratch4 # scratch4: v05 | |
57 | + movq %mm6, MUNG(scratch4) # scratch4: v05 | |
58 | movq %mm0, %mm6 # duplicate v10 | |
59 | ||
60 | paddw %mm1, %mm0 # v10+v11 | |
61 | @@ -113,10 +118,10 @@ _dv_dct_88_block_mmx: | |
62 | movq %mm6, 16*4(%esi) # out4: v10-v11 | |
63 | ||
64 | movq %mm4, %mm0 # mm0: v06 | |
65 | - paddw scratch4, %mm4 # v15: v05+v06 | |
66 | + paddw MUNG(scratch4), %mm4 # v15: v05+v06 | |
67 | paddw %mm2, %mm0 # v16: v07+v06 | |
68 | ||
69 | - pmulhw WA3, %mm4 # v35~: WA3*v15 | |
70 | + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15 | |
71 | psllw $1, %mm4 # v35: compensate the coeefient scale | |
72 | ||
73 | movq %mm4, %mm6 # duplicate v35 | |
74 | @@ -125,7 +130,7 @@ _dv_dct_88_block_mmx: | |
75 | ||
76 | paddw %mm5, %mm3 # v22: v12+v13 | |
77 | ||
78 | - pmulhw WA1, %mm3 # v32~: WA1*v22 | |
79 | + pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22 | |
80 | psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale | |
81 | movq %mm5, %mm6 # duplicate v13 | |
82 | ||
83 | @@ -136,13 +141,13 @@ _dv_dct_88_block_mmx: | |
84 | movq %mm6, 16*6(%esi) # out6: v13-v32 | |
85 | ||
86 | ||
87 | - paddw scratch4, %mm7 # v14n: v04+v05 | |
88 | + paddw MUNG(scratch4), %mm7 # v14n: v04+v05 | |
89 | movq %mm0, %mm5 # duplicate v16 | |
90 | ||
91 | psubw %mm7, %mm0 # va1: v16-v14n | |
92 | - pmulhw WA5, %mm0 # va0~: va1*WA5 | |
93 | - pmulhw WA4, %mm5 # v36~~: v16*WA4 | |
94 | - pmulhw WA2, %mm7 # v34~~: v14n*WA2 | |
95 | + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5 | |
96 | + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4 | |
97 | + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2 | |
98 | psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale | |
99 | psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale | |
100 | ||
101 | @@ -190,22 +195,22 @@ _dv_dct_88_block_mmx: | |
102 | ||
103 | movq 16*3(%esi), %mm5 # v3 | |
104 | movq 16*4(%esi), %mm7 # v4 | |
105 | - movq %mm7, scratch1 # scratch1: v4 ; | |
106 | + movq %mm7, MUNG(scratch1) # scratch1: v4 ; | |
107 | movq %mm5, %mm7 # duplicate v3 | |
108 | - paddw scratch1, %mm5 # v03: v3+v4 | |
109 | - psubw scratch1, %mm7 # v04: v3-v4 | |
110 | - movq %mm5, scratch2 # scratch2: v03 | |
111 | + paddw MUNG(scratch1), %mm5 # v03: v3+v4 | |
112 | + psubw MUNG(scratch1), %mm7 # v04: v3-v4 | |
113 | + movq %mm5, MUNG(scratch2) # scratch2: v03 | |
114 | movq %mm0, %mm5 # mm5: v00 | |
115 | ||
116 | - paddw scratch2, %mm0 # v10: v00+v03 | |
117 | - psubw scratch2, %mm5 # v13: v00-v03 | |
118 | - movq %mm3, scratch3 # scratc3: v02 | |
119 | + paddw MUNG(scratch2), %mm0 # v10: v00+v03 | |
120 | + psubw MUNG(scratch2), %mm5 # v13: v00-v03 | |
121 | + movq %mm3, MUNG(scratch3) # scratc3: v02 | |
122 | movq %mm1, %mm3 # duplicate v01 | |
123 | ||
124 | - paddw scratch3, %mm1 # v11: v01+v02 | |
125 | - psubw scratch3, %mm3 # v12: v01-v02 | |
126 | + paddw MUNG(scratch3), %mm1 # v11: v01+v02 | |
127 | + psubw MUNG(scratch3), %mm3 # v12: v01-v02 | |
128 | ||
129 | - movq %mm6, scratch4 # scratc4: v05 | |
130 | + movq %mm6, MUNG(scratch4) # scratc4: v05 | |
131 | movq %mm0, %mm6 # duplicate v10 | |
132 | ||
133 | paddw %mm1, %mm0 # v10+v11 | |
134 | @@ -215,10 +220,10 @@ _dv_dct_88_block_mmx: | |
135 | movq %mm6, 16*4(%esi) # out4: v10-v11 | |
136 | ||
137 | movq %mm4, %mm0 # mm0: v06 | |
138 | - paddw scratch4, %mm4 # v15: v05+v06 | |
139 | + paddw MUNG(scratch4), %mm4 # v15: v05+v06 | |
140 | paddw %mm2, %mm0 # v16: v07+v06 | |
141 | ||
142 | - pmulhw WA3, %mm4 # v35~: WA3*v15 | |
143 | + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15 | |
144 | psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale | |
145 | ||
146 | movq %mm4, %mm6 # duplicate v35 | |
147 | @@ -227,7 +232,7 @@ _dv_dct_88_block_mmx: | |
148 | ||
149 | paddw %mm5, %mm3 # v22: v12+v13 | |
150 | ||
151 | - pmulhw WA1, %mm3 # v32~: WA3*v15 | |
152 | + pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15 | |
153 | psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale | |
154 | movq %mm5, %mm6 # duplicate v13 | |
155 | ||
156 | @@ -237,13 +242,13 @@ _dv_dct_88_block_mmx: | |
157 | movq %mm5, 16*2(%esi) # out2: v13+v32 | |
158 | movq %mm6, 16*6(%esi) # out6: v13-v32 | |
159 | ||
160 | - paddw scratch4, %mm7 # v14n: v04+v05 | |
161 | + paddw MUNG(scratch4), %mm7 # v14n: v04+v05 | |
162 | movq %mm0, %mm5 # duplicate v16 | |
163 | ||
164 | psubw %mm7, %mm0 # va1: v16-v14n | |
165 | - pmulhw WA2, %mm7 # v34~~: v14n*WA2 | |
166 | - pmulhw WA5, %mm0 # va0~: va1*WA5 | |
167 | - pmulhw WA4, %mm5 # v36~~: v16*WA4 | |
168 | + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2 | |
169 | + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5 | |
170 | + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4 | |
171 | psllw $16-NSHIFT, %mm7 | |
172 | psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient | |
173 | # scale note that WA4 is shifted 1 bit less than the others | |
174 | @@ -274,6 +279,8 @@ _dv_dct_88_block_mmx: | |
175 | ||
176 | .align 8 | |
177 | .global _dv_dct_block_mmx_postscale_88 | |
178 | +.hidden _dv_dct_block_mmx_postscale_88 | |
179 | +.type _dv_dct_block_mmx_postscale_88,@function | |
180 | _dv_dct_block_mmx_postscale_88: | |
181 | ||
182 | pushl %ebp | |
183 | @@ -750,14 +757,17 @@ _dv_dct_block_mmx_postscale_88: | |
184 | ||
185 | .align 8 | |
186 | .global _dv_dct_248_block_mmx | |
187 | +.hidden _dv_dct_248_block_mmx | |
188 | +.type _dv_dct_248_block_mmx,@function | |
189 | _dv_dct_248_block_mmx: | |
190 | ||
191 | pushl %ebp | |
192 | - movl %esp, %ebp | |
193 | pushl %esi | |
194 | pushl %edi | |
195 | ||
196 | - movl 8(%ebp), %esi # source | |
197 | + LOAD_PIC_REG_BP() | |
198 | + | |
199 | + movl 16(%ebp), %esi # source | |
200 | ||
201 | # column 0 | |
202 | ||
203 | @@ -781,7 +791,7 @@ _dv_dct_248_block_mmx: | |
204 | paddw %mm1, %mm0 # v20: v10+v11 | |
205 | psubw %mm1, %mm3 # v21: v10-v11 | |
206 | ||
207 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
208 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
209 | movq %mm4, %mm2 | |
210 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
211 | ||
212 | @@ -820,7 +830,7 @@ _dv_dct_248_block_mmx: | |
213 | paddw %mm1, %mm0 # v20: v10+v11 | |
214 | psubw %mm1, %mm3 # v21: v10-v11 | |
215 | ||
216 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
217 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
218 | movq %mm4, %mm2 | |
219 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
220 | ||
221 | @@ -857,7 +867,7 @@ _dv_dct_248_block_mmx: | |
222 | paddw %mm1, %mm0 # v20: v10+v11 | |
223 | psubw %mm1, %mm3 # v21: v10-v11 | |
224 | ||
225 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
226 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
227 | movq %mm4, %mm2 | |
228 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
229 | ||
230 | @@ -894,7 +904,7 @@ _dv_dct_248_block_mmx: | |
231 | paddw %mm1, %mm0 # v20: v10+v11 | |
232 | psubw %mm1, %mm3 # v21: v10-v11 | |
233 | ||
234 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
235 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
236 | movq %mm4, %mm2 | |
237 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
238 | ||
239 | @@ -914,6 +924,8 @@ _dv_dct_248_block_mmx: | |
240 | ||
241 | .align 8 | |
242 | .global _dv_dct_248_block_mmx_post_sum | |
243 | +.hidden _dv_dct_248_block_mmx_post_sum | |
244 | +.type _dv_dct_248_block_mmx_post_sum,@function | |
245 | _dv_dct_248_block_mmx_post_sum: | |
246 | ||
247 | pushl %ebp | |
248 | @@ -994,6 +1006,8 @@ _dv_dct_248_block_mmx_post_sum: | |
249 | ||
250 | .align 8 | |
251 | .global _dv_dct_block_mmx_postscale_248 | |
252 | +.hidden _dv_dct_block_mmx_postscale_248 | |
253 | +.type _dv_dct_block_mmx_postscale_248,@function | |
254 | _dv_dct_block_mmx_postscale_248: | |
255 | ||
256 | pushl %ebp | |
257 | --- libdv/dct_block_mmx_x86_64.S 31 Jan 2005 13:27:54 -0000 1.2 | |
258 | +++ libdv/dct_block_mmx_x86_64.S 15 Dec 2005 06:45:38 -0000 | |
259 | @@ -59,6 +59,8 @@ scratch4: .quad 0 | |
260 | ||
261 | .align 8 | |
262 | .global _dv_dct_88_block_mmx_x86_64 | |
263 | +.hidden _dv_dct_88_block_mmx_x86_64 | |
264 | +.type _dv_dct_88_block_mmx_x86_64,@function | |
265 | _dv_dct_88_block_mmx_x86_64: | |
266 | ||
267 | /* void _dv_dct_88_block_mmx_x86_64(int16_t* block); */ | |
268 | @@ -271,6 +273,8 @@ _dv_dct_88_block_mmx_x86_64: | |
269 | ||
270 | .align 8 | |
271 | .global _dv_dct_block_mmx_x86_64_postscale_88 | |
272 | +.hidden _dv_dct_block_mmx_x86_64_postscale_88 | |
273 | +.type _dv_dct_block_mmx_x86_64_postscale_88,@function | |
274 | _dv_dct_block_mmx_x86_64_postscale_88: | |
275 | ||
276 | /* void _dv_dct_block_mmx_x86_64_postscale_88(int16_t* block, int16_t* postscale_matrix); */ | |
277 | --- libdv/dv.c 20 Oct 2004 03:49:24 -0000 1.31 | |
278 | +++ libdv/dv.c 15 Dec 2005 06:45:38 -0000 | |
279 | @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp | |
280 | } /* dv_reconfigure */ | |
281 | ||
282 | ||
283 | +extern uint8_t dv_quant_offset[4]; | |
284 | +extern uint8_t dv_quant_shifts[22][4]; | |
285 | + | |
286 | static inline void | |
287 | dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) { | |
288 | int i; | |
289 | @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d | |
290 | dv_idct_248 (co248, mb->b[i].coeffs); | |
291 | } else { | |
292 | #if ARCH_X86 | |
293 | - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); | |
294 | + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts); | |
295 | _dv_idct_88(mb->b[i].coeffs); | |
296 | #elif ARCH_X86_64 | |
297 | _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); | |
298 | @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv | |
299 | dv_idct_248 (co248, mb->b[b].coeffs); | |
300 | } else { | |
301 | #if ARCH_X86 | |
302 | - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no); | |
303 | + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts); | |
304 | _dv_weight_88_inverse(bl->coeffs); | |
305 | _dv_idct_88(bl->coeffs); | |
306 | #elif ARCH_X86_64 | |
307 | --- libdv/encode.c 17 Nov 2004 03:36:30 -0000 1.26 | |
308 | +++ libdv/encode.c 15 Dec 2005 06:45:38 -0000 | |
309 | @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl | |
310 | } | |
311 | ||
312 | extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, | |
313 | - dv_vlc_entry_t ** out); | |
314 | + dv_vlc_entry_t ** out, | |
315 | + dv_vlc_entry_t * lookup); | |
316 | ||
317 | extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs, | |
318 | dv_vlc_entry_t ** out); | |
319 | @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv | |
320 | #elif ARCH_X86 | |
321 | int num_bits; | |
322 | ||
323 | - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o); | |
324 | + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup); | |
325 | emms(); | |
326 | #else | |
327 | int num_bits; | |
328 | @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv | |
329 | return num_bits; | |
330 | } | |
331 | ||
332 | -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs); | |
333 | +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup); | |
334 | extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); | |
335 | ||
336 | extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs) | |
337 | @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl | |
338 | #elif ARCH_X86_64 | |
339 | return _dv_vlc_num_bits_block_x86_64(coeffs); | |
340 | #else | |
341 | - return _dv_vlc_num_bits_block_x86(coeffs); | |
342 | + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup); | |
343 | #endif | |
344 | } | |
345 | ||
346 | --- libdv/encode_x86.S 31 Jan 2005 13:27:54 -0000 1.4 | |
347 | +++ libdv/encode_x86.S 15 Dec 2005 06:45:38 -0000 | |
348 | @@ -23,16 +23,13 @@ | |
349 | * The libdv homepage is http://libdv.sourceforge.net/. | |
350 | */ | |
351 | ||
352 | -.data | |
353 | -ALLONE: .word 1,1,1,1 | |
354 | -VLCADDMASK: .byte 255,0,0,0,255,0,0,0 | |
355 | - | |
356 | - | |
357 | .section .note.GNU-stack, "", @progbits | |
358 | ||
359 | .text | |
360 | ||
361 | .global _dv_vlc_encode_block_mmx | |
362 | +.hidden _dv_vlc_encode_block_mmx | |
363 | +.type _dv_vlc_encode_block_mmx,@function | |
364 | _dv_vlc_encode_block_mmx: | |
365 | pushl %ebx | |
366 | pushl %esi | |
367 | @@ -48,11 +45,14 @@ _dv_vlc_encode_block_mmx: | |
368 | ||
369 | movl $63, %ecx | |
370 | ||
371 | - movl vlc_encode_lookup, %esi | |
372 | + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup | |
373 | ||
374 | pxor %mm0, %mm0 | |
375 | pxor %mm2, %mm2 | |
376 | - movq VLCADDMASK, %mm1 | |
377 | + pushl $0x000000FF # these four lines | |
378 | + pushl $0x000000FF # load VLCADDMASK | |
379 | + movq (%esp), %mm1 # into %mm1 off the stack | |
380 | + addl $8, %esp # --> no TEXTRELs | |
381 | xorl %ebp, %ebp | |
382 | subl $8, %edx | |
383 | vlc_encode_block_mmx_loop: | |
384 | @@ -109,6 +109,8 @@ vlc_encode_block_out: | |
385 | ret | |
386 | ||
387 | .global _dv_vlc_num_bits_block_x86 | |
388 | +.hidden _dv_vlc_num_bits_block_x86 | |
389 | +.type _dv_vlc_num_bits_block_x86,@function | |
390 | _dv_vlc_num_bits_block_x86: | |
391 | pushl %ebx | |
392 | pushl %esi | |
393 | @@ -124,7 +126,7 @@ _dv_vlc_num_bits_block_x86: | |
394 | addl $2, %edi | |
395 | ||
396 | movl $63, %ecx | |
397 | - movl vlc_num_bits_lookup, %esi | |
398 | + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup | |
399 | ||
400 | vlc_num_bits_block_x86_loop: | |
401 | movw (%edi), %ax | |
402 | @@ -167,6 +169,8 @@ vlc_num_bits_block_out: | |
403 | ret | |
404 | ||
405 | .global _dv_vlc_encode_block_pass_1_x86 | |
406 | +.hidden _dv_vlc_encode_block_pass_1_x86 | |
407 | +.type _dv_vlc_encode_block_pass_1_x86,@function | |
408 | _dv_vlc_encode_block_pass_1_x86: | |
409 | pushl %ebx | |
410 | pushl %esi | |
411 | @@ -243,6 +247,8 @@ vlc_encode_block_pass1_x86_out: | |
412 | ret | |
413 | ||
414 | .global _dv_classify_mmx | |
415 | +.hidden _dv_classify_mmx | |
416 | +.type _dv_classify_mmx,@function | |
417 | _dv_classify_mmx: | |
418 | ||
419 | pushl %ebp | |
420 | @@ -348,6 +354,8 @@ _dv_classify_mmx: | |
421 | don't know why... */ | |
422 | ||
423 | .global _dv_reorder_block_mmx | |
424 | +.hidden _dv_reorder_block_mmx | |
425 | +.type _dv_reorder_block_mmx,@function | |
426 | _dv_reorder_block_mmx: | |
427 | ||
428 | pushl %ebp | |
429 | @@ -463,6 +471,8 @@ reorder_loop: | |
430 | ret | |
431 | ||
432 | .global _dv_need_dct_248_mmx_rows | |
433 | +.hidden _dv_need_dct_248_mmx_rows | |
434 | +.type _dv_need_dct_248_mmx_rows,@function | |
435 | _dv_need_dct_248_mmx_rows: | |
436 | ||
437 | pushl %ebp | |
438 | @@ -582,8 +592,11 @@ _dv_need_dct_248_mmx_rows: | |
439 | paddw %mm5, %mm1 | |
440 | ||
441 | paddw %mm1, %mm0 | |
442 | - | |
443 | - pmaddwd ALLONE, %mm0 | |
444 | + | |
445 | + pushl $0x00010001 # these four lines | |
446 | + pushl $0x00010001 # load ALLONE | |
447 | + pmaddwd (%esp), %mm0 # into %mm0 off the stack | |
448 | + addl $8, %esp # --> no TEXTRELs | |
449 | movq %mm0, %mm1 | |
450 | psrlq $32, %mm1 | |
451 | paddd %mm1, %mm0 | |
452 | --- libdv/encode_x86_64.S 31 Jan 2005 13:27:54 -0000 1.4 | |
453 | +++ libdv/encode_x86_64.S 15 Dec 2005 06:45:39 -0000 | |
454 | @@ -32,6 +32,8 @@ VLCADDMASK: .byte 255,0,0,0,255,0,0,0 | |
455 | .text | |
456 | ||
457 | .global _dv_vlc_encode_block_mmx_x86_64 | |
458 | +.hidden _dv_vlc_encode_block_mmx_x86_64 | |
459 | +.type _dv_vlc_encode_block_mmx_x86_64,@function | |
460 | _dv_vlc_encode_block_mmx_x86_64: | |
461 | ||
462 | /* extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, | |
463 | @@ -115,6 +117,8 @@ vlc_encode_block_out: | |
464 | ret | |
465 | ||
466 | .global _dv_vlc_num_bits_block_x86_64 | |
467 | +.hidden _dv_vlc_num_bits_block_x86_64 | |
468 | +.type _dv_vlc_num_bits_block_x86_64,@function | |
469 | _dv_vlc_num_bits_block_x86_64: | |
470 | ||
471 | /* extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); */ | |
472 | @@ -175,6 +179,8 @@ vlc_num_bits_block_out: | |
473 | ret | |
474 | ||
475 | .global _dv_vlc_encode_block_pass_1_x86_64 | |
476 | +.hidden _dv_vlc_encode_block_pass_1_x86_64 | |
477 | +.type _dv_vlc_encode_block_pass_1_x86_64,@function | |
478 | _dv_vlc_encode_block_pass_1_x86_64: | |
479 | ||
480 | /* | |
481 | @@ -253,6 +259,8 @@ vlc_encode_block_pass1_x86_out: | |
482 | ret | |
483 | ||
484 | .global _dv_classify_mmx_x86_64 | |
485 | +.hidden _dv_classify_mmx_x86_64 | |
486 | +.type _dv_classify_mmx_x86_64,@function | |
487 | _dv_classify_mmx_x86_64: | |
488 | ||
489 | /* extern int _dv_classify_mmx_x86_64(dv_coeff_t * a, rdi | |
490 | @@ -357,6 +365,8 @@ _dv_classify_mmx_x86_64: | |
491 | don't know why... */ | |
492 | ||
493 | .global _dv_reorder_block_mmx_x86_64 | |
494 | +.hidden _dv_reorder_block_mmx_x86_64 | |
495 | +.type _dv_reorder_block_mmx_x86_64,@function | |
496 | _dv_reorder_block_mmx_x86_64: | |
497 | ||
498 | /*extern int _dv_reorder_block_mmx_x86_64(dv_coeff_t * a, rdi | |
499 | @@ -471,6 +481,8 @@ reorder_loop: | |
500 | ret | |
501 | ||
502 | .global _dv_need_dct_248_mmx_x86_64_rows | |
503 | +.hidden _dv_need_dct_248_mmx_x86_64_rows | |
504 | +.type _dv_need_dct_248_mmx_x86_64_rows,@function | |
505 | _dv_need_dct_248_mmx_x86_64_rows: | |
506 | ||
507 | /* extern int _dv_need_dct_248_mmx_x86_64_rows(dv_coeff_t * bl); rdi */ | |
508 | --- libdv/idct_block_mmx.S 31 Jan 2005 13:27:54 -0000 1.3 | |
509 | +++ libdv/idct_block_mmx.S 15 Dec 2005 06:45:39 -0000 | |
510 | @@ -8,16 +8,22 @@ | |
511 | ||
512 | .section .note.GNU-stack, "", @progbits | |
513 | ||
514 | +#include "asm_common.S" | |
515 | + | |
516 | .text | |
517 | + | |
518 | .align 4 | |
519 | .globl _dv_idct_block_mmx | |
520 | +.hidden _dv_idct_block_mmx | |
521 | .type _dv_idct_block_mmx,@function | |
522 | _dv_idct_block_mmx: | |
523 | pushl %ebp | |
524 | - movl %esp,%ebp | |
525 | pushl %esi | |
526 | - leal preSC, %ecx | |
527 | - movl 8(%ebp),%esi /* source matrix */ | |
528 | + | |
529 | + LOAD_PIC_REG_BP() | |
530 | + | |
531 | + leal MUNG(preSC), %ecx | |
532 | + movl 12(%esp),%esi /* source matrix */ | |
533 | ||
534 | /* | |
535 | * column 0: even part | |
536 | @@ -35,7 +41,7 @@ _dv_idct_block_mmx: | |
537 | movq %mm1, %mm2 /* added 11/1/96 */ | |
538 | pmulhw 8*8(%esi),%mm5 /* V8 */ | |
539 | psubsw %mm0, %mm1 /* V16 */ | |
540 | - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ | |
541 | + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */ | |
542 | paddsw %mm0, %mm2 /* V17 */ | |
543 | movq %mm2, %mm0 /* duplicate V17 */ | |
544 | psraw $1, %mm2 /* t75=t82 */ | |
545 | @@ -76,7 +82,7 @@ _dv_idct_block_mmx: | |
546 | paddsw %mm0, %mm3 /* V29 ; free mm0 */ | |
547 | movq %mm7, %mm1 /* duplicate V26 */ | |
548 | psraw $1, %mm3 /* t91=t94 */ | |
549 | - pmulhw x539f539f539f539f,%mm7 /* V33 */ | |
550 | + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */ | |
551 | psraw $1, %mm1 /* t96 */ | |
552 | movq %mm5, %mm0 /* duplicate V2 */ | |
553 | psraw $2, %mm4 /* t85=t87 */ | |
554 | @@ -84,15 +90,15 @@ _dv_idct_block_mmx: | |
555 | psubsw %mm4, %mm0 /* V28 ; free mm4 */ | |
556 | movq %mm0, %mm2 /* duplicate V28 */ | |
557 | psraw $1, %mm5 /* t90=t93 */ | |
558 | - pmulhw x4546454645464546,%mm0 /* V35 */ | |
559 | + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */ | |
560 | psraw $1, %mm2 /* t97 */ | |
561 | movq %mm5, %mm4 /* duplicate t90=t93 */ | |
562 | psubsw %mm2, %mm1 /* V32 ; free mm2 */ | |
563 | - pmulhw x61f861f861f861f8,%mm1 /* V36 */ | |
564 | + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */ | |
565 | psllw $1, %mm7 /* t107 */ | |
566 | paddsw %mm3, %mm5 /* V31 */ | |
567 | psubsw %mm3, %mm4 /* V30 ; free mm3 */ | |
568 | - pmulhw x5a825a825a825a82,%mm4 /* V34 */ | |
569 | + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */ | |
570 | nop | |
571 | psubsw %mm1, %mm0 /* V38 */ | |
572 | psubsw %mm7, %mm1 /* V37 ; free mm7 */ | |
573 | @@ -159,7 +165,7 @@ _dv_idct_block_mmx: | |
574 | psubsw %mm7, %mm1 /* V50 */ | |
575 | pmulhw 8*9(%esi), %mm5 /* V9 */ | |
576 | paddsw %mm7, %mm2 /* V51 */ | |
577 | - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ | |
578 | + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */ | |
579 | movq %mm2, %mm6 /* duplicate V51 */ | |
580 | psraw $1, %mm2 /* t138=t144 */ | |
581 | movq %mm3, %mm4 /* duplicate V1 */ | |
582 | @@ -200,11 +206,11 @@ _dv_idct_block_mmx: | |
583 | * even more by doing the correction step in a later stage when the number | |
584 | * is actually multiplied by 16 | |
585 | */ | |
586 | - paddw x0005000200010001, %mm4 | |
587 | + paddw MUNG(x0005000200010001), %mm4 | |
588 | psubsw %mm6, %mm3 /* V60 ; free mm6 */ | |
589 | psraw $1, %mm0 /* t154=t156 */ | |
590 | movq %mm3, %mm1 /* duplicate V60 */ | |
591 | - pmulhw x539f539f539f539f, %mm1 /* V67 */ | |
592 | + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */ | |
593 | movq %mm5, %mm6 /* duplicate V3 */ | |
594 | psraw $2, %mm4 /* t148=t150 */ | |
595 | paddsw %mm4, %mm5 /* V61 */ | |
596 | @@ -213,13 +219,13 @@ _dv_idct_block_mmx: | |
597 | psllw $1, %mm1 /* t169 */ | |
598 | paddsw %mm0, %mm5 /* V65 -> result */ | |
599 | psubsw %mm0, %mm4 /* V64 ; free mm0 */ | |
600 | - pmulhw x5a825a825a825a82, %mm4 /* V68 */ | |
601 | + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */ | |
602 | psraw $1, %mm3 /* t158 */ | |
603 | psubsw %mm6, %mm3 /* V66 */ | |
604 | movq %mm5, %mm2 /* duplicate V65 */ | |
605 | - pmulhw x61f861f861f861f8, %mm3 /* V70 */ | |
606 | + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */ | |
607 | psllw $1, %mm6 /* t165 */ | |
608 | - pmulhw x4546454645464546, %mm6 /* V69 */ | |
609 | + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */ | |
610 | psraw $1, %mm2 /* t172 */ | |
611 | /* moved from next block */ | |
612 | movq 8*5(%esi), %mm0 /* V56 */ | |
613 | @@ -344,7 +350,7 @@ _dv_idct_block_mmx: | |
614 | * movq 8*13(%esi), %mm4 tmt13 | |
615 | */ | |
616 | psubsw %mm4, %mm3 /* V134 */ | |
617 | - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ | |
618 | + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */ | |
619 | movq 8*9(%esi), %mm6 /* tmt9 */ | |
620 | paddsw %mm4, %mm5 /* V135 ; mm4 free */ | |
621 | movq %mm0, %mm4 /* duplicate tmt1 */ | |
622 | @@ -373,17 +379,17 @@ _dv_idct_block_mmx: | |
623 | psubsw %mm7, %mm0 /* V144 */ | |
624 | movq %mm0, %mm3 /* duplicate V144 */ | |
625 | paddsw %mm7, %mm2 /* V147 ; free mm7 */ | |
626 | - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ | |
627 | + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */ | |
628 | movq %mm1, %mm7 /* duplicate tmt3 */ | |
629 | paddsw %mm5, %mm7 /* V145 */ | |
630 | psubsw %mm5, %mm1 /* V146 ; free mm5 */ | |
631 | psubsw %mm1, %mm3 /* V150 */ | |
632 | movq %mm7, %mm5 /* duplicate V145 */ | |
633 | - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ | |
634 | + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */ | |
635 | psubsw %mm2, %mm5 /* V148 */ | |
636 | - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ | |
637 | + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */ | |
638 | psllw $2, %mm0 /* t311 */ | |
639 | - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ | |
640 | + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */ | |
641 | paddsw %mm2, %mm7 /* V149 ; free mm2 */ | |
642 | psllw $1, %mm1 /* t313 */ | |
643 | nop /* without the nop - freeze here for one clock */ | |
644 | @@ -409,7 +415,7 @@ _dv_idct_block_mmx: | |
645 | paddsw %mm3, %mm6 /* V164 ; free mm3 */ | |
646 | movq %mm4, %mm3 /* duplicate V142 */ | |
647 | psubsw %mm5, %mm4 /* V165 ; free mm5 */ | |
648 | - movq %mm2, scratch7 /* out7 */ | |
649 | + movq %mm2, MUNG(scratch7) /* out7 */ | |
650 | psraw $4, %mm6 | |
651 | psraw $4, %mm4 | |
652 | paddsw %mm5, %mm3 /* V162 */ | |
653 | @@ -420,11 +426,11 @@ _dv_idct_block_mmx: | |
654 | */ | |
655 | movq %mm6, 8*9(%esi) /* out9 */ | |
656 | paddsw %mm1, %mm0 /* V161 */ | |
657 | - movq %mm3, scratch5 /* out5 */ | |
658 | + movq %mm3, MUNG(scratch5) /* out5 */ | |
659 | psubsw %mm1, %mm5 /* V166 ; free mm1 */ | |
660 | movq %mm4, 8*11(%esi) /* out11 */ | |
661 | psraw $4, %mm5 | |
662 | - movq %mm0, scratch3 /* out3 */ | |
663 | + movq %mm0, MUNG(scratch3) /* out3 */ | |
664 | movq %mm2, %mm4 /* duplicate V140 */ | |
665 | movq %mm5, 8*13(%esi) /* out13 */ | |
666 | paddsw %mm7, %mm2 /* V160 */ | |
667 | @@ -434,7 +440,7 @@ _dv_idct_block_mmx: | |
668 | /* moved from the next block */ | |
669 | movq 8*3(%esi), %mm7 | |
670 | psraw $4, %mm4 | |
671 | - movq %mm2, scratch1 /* out1 */ | |
672 | + movq %mm2, MUNG(scratch1) /* out1 */ | |
673 | /* moved from the next block */ | |
674 | movq %mm0, %mm1 | |
675 | movq %mm4, 8*15(%esi) /* out15 */ | |
676 | @@ -491,15 +497,15 @@ _dv_idct_block_mmx: | |
677 | paddsw %mm4, %mm3 /* V113 ; free mm4 */ | |
678 | movq %mm0, %mm4 /* duplicate V110 */ | |
679 | paddsw %mm1, %mm2 /* V111 */ | |
680 | - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ | |
681 | + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */ | |
682 | psubsw %mm1, %mm5 /* V112 ; free mm1 */ | |
683 | psubsw %mm5, %mm4 /* V116 */ | |
684 | movq %mm2, %mm1 /* duplicate V111 */ | |
685 | - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ | |
686 | + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */ | |
687 | psubsw %mm3, %mm2 /* V114 */ | |
688 | - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ | |
689 | + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */ | |
690 | paddsw %mm3, %mm1 /* V115 ; free mm3 */ | |
691 | - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ | |
692 | + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */ | |
693 | psllw $2, %mm0 /* t266 */ | |
694 | movq %mm1, (%esi) /* save V115 */ | |
695 | psllw $1, %mm5 /* t268 */ | |
696 | @@ -517,7 +523,7 @@ _dv_idct_block_mmx: | |
697 | movq %mm6, %mm3 /* duplicate tmt4 */ | |
698 | psubsw %mm0, %mm6 /* V100 */ | |
699 | paddsw %mm0, %mm3 /* V101 ; free mm0 */ | |
700 | - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ | |
701 | + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */ | |
702 | movq %mm7, %mm5 /* duplicate tmt0 */ | |
703 | movq 8*8(%esi), %mm1 /* tmt8 */ | |
704 | paddsw %mm1, %mm7 /* V103 */ | |
705 | @@ -551,10 +557,10 @@ _dv_idct_block_mmx: | |
706 | movq 8*2(%esi), %mm3 /* V123 */ | |
707 | paddsw %mm4, %mm7 /* out0 */ | |
708 | /* moved up from next block */ | |
709 | - movq scratch3, %mm0 | |
710 | + movq MUNG(scratch3), %mm0 | |
711 | psraw $4, %mm7 | |
712 | /* moved up from next block */ | |
713 | - movq scratch5, %mm6 | |
714 | + movq MUNG(scratch5), %mm6 | |
715 | psubsw %mm4, %mm1 /* out14 ; free mm4 */ | |
716 | paddsw %mm3, %mm5 /* out2 */ | |
717 | psraw $4, %mm1 | |
718 | @@ -565,7 +571,7 @@ _dv_idct_block_mmx: | |
719 | movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ | |
720 | psraw $4, %mm2 | |
721 | /* moved up to the prev block */ | |
722 | - movq scratch7, %mm4 | |
723 | + movq MUNG(scratch7), %mm4 | |
724 | /* moved up to the prev block */ | |
725 | psraw $4, %mm0 | |
726 | movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ | |
727 | @@ -579,7 +585,7 @@ _dv_idct_block_mmx: | |
728 | * psraw $4, %mm0 | |
729 | * psraw $4, %mm6 | |
730 | */ | |
731 | - movq scratch1, %mm1 | |
732 | + movq MUNG(scratch1), %mm1 | |
733 | psraw $4, %mm4 | |
734 | movq %mm0, 8*3(%esi) /* out3 */ | |
735 | psraw $4, %mm1 | |
736 | --- libdv/idct_block_mmx_x86_64.S 31 Jan 2005 13:27:54 -0000 1.3 | |
737 | +++ libdv/idct_block_mmx_x86_64.S 15 Dec 2005 06:45:39 -0000 | |
738 | @@ -18,6 +18,7 @@ | |
739 | .text | |
740 | .align 4 | |
741 | .globl _dv_idct_block_mmx_x86_64 | |
742 | +.hidden _dv_idct_block_mmx_x86_64 | |
743 | .type _dv_idct_block_mmx_x86_64,@function | |
744 | _dv_idct_block_mmx_x86_64: | |
745 | /* void _dv_idct_88(dv_coeff_t *block) */ | |
746 | --- libdv/parse.c 20 Oct 2004 03:49:24 -0000 1.13 | |
747 | +++ libdv/parse.c 15 Dec 2005 06:45:39 -0000 | |
748 | @@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se | |
749 | exit(0); | |
750 | #endif | |
751 | } /* dv_parse_ac_coeffs */ | |
752 | +#if defined __GNUC__ && __ELF__ | |
753 | +# define dv_strong_hidden_alias(name, aliasname) \ | |
754 | + extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden"))) | |
755 | +dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs); | |
756 | +#else | |
757 | +int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); } | |
758 | +#endif | |
759 | ||
760 | /* --------------------------------------------------------------------------- | |
761 | */ | |
762 | --- libdv/quant.c 20 Oct 2004 03:49:24 -0000 1.9 | |
763 | +++ libdv/quant.c 15 Dec 2005 06:45:39 -0000 | |
764 | @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1 | |
765 | uint32_t dv_quant_248_mul_tab [2] [22] [64]; | |
766 | uint32_t dv_quant_88_mul_tab [2] [22] [64]; | |
767 | ||
768 | -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass); | |
769 | +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t dv_quant_offset[],uint8_t dv_quant_shifts[][]); | |
770 | extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass); | |
771 | static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); | |
772 | static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); | |
773 | @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno | |
774 | _dv_quant_x86_64(block, qno, klass); | |
775 | emms(); | |
776 | #else | |
777 | - _dv_quant_x86(block, qno, klass); | |
778 | + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts); | |
779 | emms(); | |
780 | #endif | |
781 | } | |
782 | --- libdv/quant.h 20 Oct 2004 03:49:24 -0000 1.4 | |
783 | +++ libdv/quant.h 15 Dec 2005 06:45:39 -0000 | |
784 | @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block, | |
785 | extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass); | |
786 | extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass, | |
787 | dv_248_coeff_t *co); | |
788 | -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass); | |
789 | +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t offset[], uint8_t shifts[][]); | |
790 | extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass); | |
791 | extern void dv_quant_init (void); | |
792 | #ifdef __cplusplus | |
793 | --- libdv/quant_x86.S 31 Jan 2005 13:27:54 -0000 1.4 | |
794 | +++ libdv/quant_x86.S 15 Dec 2005 06:45:39 -0000 | |
795 | @@ -57,6 +57,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl | |
796 | .text | |
797 | .align 4 | |
798 | .globl _dv_quant_88_inverse_x86 | |
799 | +.hidden _dv_quant_88_inverse_x86 | |
800 | +.type _dv_quant_88_inverse_x86,@function | |
801 | _dv_quant_88_inverse_x86: | |
802 | pushl %ebx | |
803 | pushl %esi | |
804 | @@ -73,10 +75,13 @@ _dv_quant_88_inverse_x86: | |
805 | ||
806 | /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ | |
807 | movl ARGn(1),%eax /* qno */ | |
808 | + movl ARGn(3),%ebx /* dv_quant_offset */ | |
809 | + addl ARGn(2),%ebx /* class */ | |
810 | + movzbl (%ebx),%ecx | |
811 | movl ARGn(2),%ebx /* class */ | |
812 | - movzbl dv_quant_offset(%ebx),%ecx | |
813 | addl %ecx,%eax | |
814 | - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ | |
815 | + movl ARGn(4),%edx /* dv_quant_shifts */ | |
816 | + leal (%edx,%eax,4),%edx /* edx is pq */ | |
817 | ||
818 | /* extra = (class == 3); */ | |
819 | /* 0 1 2 3 */ | |
820 | @@ -195,6 +200,8 @@ _dv_quant_88_inverse_x86: | |
821 | ||
822 | .align 4 | |
823 | .globl _dv_quant_x86 | |
824 | +.hidden _dv_quant_x86 | |
825 | +.type _dv_quant_x86,@function | |
826 | _dv_quant_x86: | |
827 | pushl %ebx | |
828 | pushl %ecx | |
829 | @@ -214,11 +221,13 @@ _dv_quant_x86: | |
830 | ||
831 | /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ | |
832 | movl ARGn(1),%eax /* qno */ | |
833 | + movl ARGn(3),%ebx /* offset */ | |
834 | + addl ARGn(2),%ebx /* class */ | |
835 | + movzbl (%ebx),%ecx | |
836 | movl ARGn(2),%ebx /* class */ | |
837 | - | |
838 | - movzbl dv_quant_offset(%ebx),%ecx | |
839 | + movl ARGn(4),%edx /* shifts */ | |
840 | addl %ecx,%eax | |
841 | - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ | |
842 | + leal (%edx,%eax,4),%edx /* edx is pq */ | |
843 | ||
844 | /* extra = (class == 3); */ | |
845 | /* 0 1 2 3 */ | |
846 | --- libdv/quant_x86_64.S 31 Jan 2005 13:27:54 -0000 1.4 | |
847 | +++ libdv/quant_x86_64.S 15 Dec 2005 06:45:39 -0000 | |
848 | @@ -57,6 +57,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl | |
849 | .text | |
850 | .align 4 | |
851 | .globl _dv_quant_88_inverse_x86_64 | |
852 | +.hidden _dv_quant_88_inverse_x86_64 | |
853 | +.type _dv_quant_88_inverse_x86_64,@function | |
854 | _dv_quant_88_inverse_x86_64: | |
855 | ||
856 | /* Args are at block=rdi, qno=rsi, class=rdx */ | |
857 | @@ -197,6 +199,8 @@ _dv_quant_88_inverse_x86_64: | |
858 | ||
859 | .align 4 | |
860 | .globl _dv_quant_x86_64 | |
861 | +.hidden _dv_quant_x86_64 | |
862 | +.type _dv_quant_x86_64,@function | |
863 | _dv_quant_x86_64: | |
864 | ||
865 | /* Args are at block=rdi, qno=rsi, class=rdx */ | |
866 | --- libdv/rgbtoyuv.S 31 Jan 2005 13:27:54 -0000 1.6 | |
867 | +++ libdv/rgbtoyuv.S 15 Dec 2005 06:45:39 -0000 | |
868 | @@ -41,9 +41,6 @@ | |
869 | #define DV_WIDTH_SHORT_HALF 720 | |
870 | #define DV_WIDTH_BYTE_HALF 360 | |
871 | ||
872 | -.global _dv_rgbtoycb_mmx | |
873 | -# .global yuvtoycb_mmx | |
874 | - | |
875 | .data | |
876 | ||
877 | .align 8 | |
878 | @@ -110,22 +107,26 @@ VR0GR: .long 0,0 | |
879 | VBG0B: .long 0,0 | |
880 | ||
881 | #endif | |
882 | - | |
883 | + | |
884 | .section .note.GNU-stack, "", @progbits | |
885 | ||
886 | +#include "asm_common.S" | |
887 | + | |
888 | .text | |
889 | ||
890 | -#define _inPtr 8 | |
891 | -#define _rows 12 | |
892 | -#define _columns 16 | |
893 | -#define _outyPtr 20 | |
894 | -#define _outuPtr 24 | |
895 | -#define _outvPtr 28 | |
896 | +#define _inPtr 24+8 | |
897 | +#define _rows 24+12 | |
898 | +#define _columns 24+16 | |
899 | +#define _outyPtr 24+20 | |
900 | +#define _outuPtr 24+24 | |
901 | +#define _outvPtr 24+28 | |
902 | ||
903 | +.global _dv_rgbtoycb_mmx | |
904 | +.hidden _dv_rgbtoycb_mmx | |
905 | +.type _dv_rgbtoycb_mmx,@function | |
906 | _dv_rgbtoycb_mmx: | |
907 | ||
908 | pushl %ebp | |
909 | - movl %esp, %ebp | |
910 | pushl %eax | |
911 | pushl %ebx | |
912 | pushl %ecx | |
913 | @@ -133,46 +134,47 @@ _dv_rgbtoycb_mmx: | |
914 | pushl %esi | |
915 | pushl %edi | |
916 | ||
917 | - leal ZEROSX, %eax #This section gets around a bug | |
918 | + LOAD_PIC_REG_BP() | |
919 | + | |
920 | + leal MUNG(ZEROSX), %eax #This section gets around a bug | |
921 | movq (%eax), %mm0 #unlikely to persist | |
922 | - movq %mm0, ZEROS | |
923 | - leal OFFSETDX, %eax | |
924 | + movq %mm0, MUNG(ZEROS) | |
925 | + leal MUNG(OFFSETDX), %eax | |
926 | movq (%eax), %mm0 | |
927 | - movq %mm0, OFFSETD | |
928 | - leal OFFSETWX, %eax | |
929 | + movq %mm0, MUNG(OFFSETD) | |
930 | + leal MUNG(OFFSETWX), %eax | |
931 | movq (%eax), %mm0 | |
932 | - movq %mm0, OFFSETW | |
933 | - leal OFFSETBX, %eax | |
934 | + movq %mm0, MUNG(OFFSETW) | |
935 | + leal MUNG(OFFSETBX), %eax | |
936 | movq (%eax), %mm0 | |
937 | - movq %mm0, OFFSETB | |
938 | - leal YR0GRX, %eax | |
939 | + movq %mm0, MUNG(OFFSETB) | |
940 | + leal MUNG(YR0GRX), %eax | |
941 | movq (%eax), %mm0 | |
942 | - movq %mm0, YR0GR | |
943 | - leal YBG0BX, %eax | |
944 | + movq %mm0, MUNG(YR0GR) | |
945 | + leal MUNG(YBG0BX), %eax | |
946 | movq (%eax), %mm0 | |
947 | - movq %mm0, YBG0B | |
948 | - leal UR0GRX, %eax | |
949 | + movq %mm0, MUNG(YBG0B) | |
950 | + leal MUNG(UR0GRX), %eax | |
951 | movq (%eax), %mm0 | |
952 | - movq %mm0, UR0GR | |
953 | - leal UBG0BX, %eax | |
954 | + movq %mm0, MUNG(UR0GR) | |
955 | + leal MUNG(UBG0BX), %eax | |
956 | movq (%eax), %mm0 | |
957 | - movq %mm0, UBG0B | |
958 | - leal VR0GRX, %eax | |
959 | + movq %mm0, MUNG(UBG0B) | |
960 | + leal MUNG(VR0GRX), %eax | |
961 | movq (%eax), %mm0 | |
962 | - movq %mm0, VR0GR | |
963 | - leal VBG0BX, %eax | |
964 | + movq %mm0, MUNG(VR0GR) | |
965 | + leal MUNG(VBG0BX), %eax | |
966 | movq (%eax), %mm0 | |
967 | - movq %mm0, VBG0B | |
968 | - | |
969 | - movl _rows(%ebp), %eax | |
970 | - movl _columns(%ebp), %ebx | |
971 | + movq %mm0, MUNG(VBG0B) | |
972 | + movl _rows(%esp), %eax | |
973 | + movl _columns(%esp), %ebx | |
974 | mull %ebx #number pixels | |
975 | shrl $3, %eax #number of loops | |
976 | movl %eax, %edi #loop counter in edi | |
977 | - movl _inPtr(%ebp), %eax | |
978 | - movl _outyPtr(%ebp), %ebx | |
979 | - movl _outuPtr(%ebp), %ecx | |
980 | - movl _outvPtr(%ebp), %edx | |
981 | + movl _inPtr(%esp), %eax | |
982 | + movl _outyPtr(%esp), %ebx | |
983 | + movl _outuPtr(%esp), %ecx | |
984 | + movl _outvPtr(%esp), %edx | |
985 | rgbtoycb_mmx_loop: | |
986 | movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 | |
987 | pxor %mm6, %mm6 #0 -> mm6 | |
988 | @@ -186,29 +188,29 @@ rgbtoycb_mmx_loop: | |
989 | punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 | |
990 | movq %mm0, %mm2 #R1B0G0R0 -> mm2 | |
991 | ||
992 | - pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0 | |
993 | + pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0 | |
994 | movq %mm1, %mm3 #B1G1R1B0 -> mm3 | |
995 | ||
996 | - pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1 | |
997 | + pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1 | |
998 | movq %mm2, %mm4 #R1B0G0R0 -> mm4 | |
999 | ||
1000 | - pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2 | |
1001 | + pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2 | |
1002 | movq %mm3, %mm5 #B1G1R1B0 -> mm5 | |
1003 | ||
1004 | - pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3 | |
1005 | + pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3 | |
1006 | punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 | |
1007 | ||
1008 | - pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4 | |
1009 | + pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4 | |
1010 | paddd %mm1, %mm0 #Y1Y0 -> mm0 | |
1011 | ||
1012 | - pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5 | |
1013 | + pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5 | |
1014 | ||
1015 | movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 | |
1016 | paddd %mm3, %mm2 #U1U0 -> mm2 | |
1017 | ||
1018 | movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 | |
1019 | ||
1020 | - punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1 | |
1021 | + punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1 | |
1022 | paddd %mm5, %mm4 #V1V0 -> mm4 | |
1023 | ||
1024 | movq %mm1, %mm5 #B3G3R3B2 -> mm5 | |
1025 | @@ -216,29 +218,29 @@ rgbtoycb_mmx_loop: | |
1026 | ||
1027 | paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 | |
1028 | ||
1029 | - punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6 | |
1030 | + punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6 | |
1031 | movq %mm1, %mm3 #R3B2G2R2 -> mm3 | |
1032 | ||
1033 | - pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1 | |
1034 | + pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1 | |
1035 | movq %mm5, %mm7 #B3G3R3B2 -> mm7 | |
1036 | ||
1037 | - pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5 | |
1038 | + pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5 | |
1039 | psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 | |
1040 | ||
1041 | - movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0 | |
1042 | + movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0 | |
1043 | movq %mm3, %mm6 #R3B2G2R2 -> mm6 | |
1044 | - pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6 | |
1045 | + pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6 | |
1046 | psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 | |
1047 | ||
1048 | paddd %mm5, %mm1 #Y3Y2 -> mm1 | |
1049 | movq %mm7, %mm5 #B3G3R3B2 -> mm5 | |
1050 | - pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2 | |
1051 | + pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2 | |
1052 | psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 | |
1053 | ||
1054 | - pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2 | |
1055 | + pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2 | |
1056 | packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 | |
1057 | ||
1058 | - pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5 | |
1059 | + pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5 | |
1060 | psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 | |
1061 | ||
1062 | movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 | |
1063 | @@ -253,58 +255,58 @@ rgbtoycb_mmx_loop: | |
1064 | movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 | |
1065 | psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 | |
1066 | ||
1067 | - paddw OFFSETY, %mm0 | |
1068 | + paddw MUNG(OFFSETY), %mm0 | |
1069 | movq %mm0, (%ebx) #store Y3Y2Y1Y0 | |
1070 | packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 | |
1071 | ||
1072 | - movq TEMP0, %mm0 #R5B4G4R4 -> mm0 | |
1073 | + movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0 | |
1074 | addl $8, %ebx | |
1075 | - | |
1076 | - punpcklbw ZEROS, %mm7 #B5G500 -> mm7 | |
1077 | + | |
1078 | + punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7 | |
1079 | movq %mm0, %mm6 #R5B4G4R4 -> mm6 | |
1080 | ||
1081 | - movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU | |
1082 | + movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU | |
1083 | psrlq $32, %mm0 #00R5B4 -> mm0 | |
1084 | ||
1085 | paddw %mm0, %mm7 #B5G5R5B4 -> mm7 | |
1086 | movq %mm6, %mm2 #B5B4G4R4 -> mm2 | |
1087 | ||
1088 | - pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2 | |
1089 | + pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2 | |
1090 | movq %mm7, %mm0 #B5G5R5B4 -> mm0 | |
1091 | ||
1092 | - pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7 | |
1093 | + pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7 | |
1094 | packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 | |
1095 | ||
1096 | addl $24, %eax #increment RGB count | |
1097 | ||
1098 | - movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4 | |
1099 | + movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4 | |
1100 | movq %mm6, %mm4 #B5B4G4R4 -> mm4 | |
1101 | ||
1102 | - pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4 | |
1103 | + pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4 | |
1104 | movq %mm0, %mm3 #B5G5R5B4 -> mm0 | |
1105 | ||
1106 | - pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4 | |
1107 | + pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4 | |
1108 | paddd %mm7, %mm2 #Y5Y4 -> mm2 | |
1109 | ||
1110 | - pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4 | |
1111 | + pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4 | |
1112 | pxor %mm7, %mm7 #0 -> mm7 | |
1113 | ||
1114 | - pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3 | |
1115 | + pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3 | |
1116 | punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 | |
1117 | ||
1118 | paddd %mm6, %mm0 #U5U4 -> mm0 | |
1119 | movq %mm1, %mm6 #B7G7R7B6 -> mm6 | |
1120 | ||
1121 | - pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6 | |
1122 | + pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6 | |
1123 | punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 | |
1124 | ||
1125 | movq %mm5, %mm7 #R7B6G6R6 -> mm7 | |
1126 | paddd %mm4, %mm3 #V5V4 -> mm3 | |
1127 | ||
1128 | - pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5 | |
1129 | + pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5 | |
1130 | movq %mm1, %mm4 #B7G7R7B6 -> mm4 | |
1131 | ||
1132 | - pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4 | |
1133 | + pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4 | |
1134 | psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 | |
1135 | ||
1136 | psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 | |
1137 | @@ -312,25 +314,25 @@ rgbtoycb_mmx_loop: | |
1138 | paddd %mm5, %mm6 #Y7Y6 -> mm6 | |
1139 | movq %mm7, %mm5 #R7B6G6R6 -> mm5 | |
1140 | ||
1141 | - pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7 | |
1142 | + pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7 | |
1143 | psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 | |
1144 | ||
1145 | - pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1 | |
1146 | + pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1 | |
1147 | psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 | |
1148 | ||
1149 | packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 | |
1150 | ||
1151 | - pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5 | |
1152 | + pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5 | |
1153 | paddd %mm4, %mm7 #U7U6 -> mm7 | |
1154 | ||
1155 | psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 | |
1156 | - paddw OFFSETY, %mm2 | |
1157 | + paddw MUNG(OFFSETY), %mm2 | |
1158 | movq %mm2, (%ebx) #store Y7Y6Y5Y4 | |
1159 | ||
1160 | - movq ALLONE, %mm6 | |
1161 | + movq MUNG(ALLONE), %mm6 | |
1162 | packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 | |
1163 | ||
1164 | - movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4 | |
1165 | + movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4 | |
1166 | pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 | |
1167 | ||
1168 | pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 | |
1169 | @@ -340,8 +342,8 @@ rgbtoycb_mmx_loop: | |
1170 | ||
1171 | psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 | |
1172 | psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 | |
1173 | - | |
1174 | - movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5 | |
1175 | + | |
1176 | + movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5 | |
1177 | ||
1178 | movq %mm4, (%ecx) # store U | |
1179 | ||
1180 | @@ -374,6 +376,8 @@ rgbtoycb_mmx_loop: | |
1181 | ret | |
1182 | ||
1183 | .global _dv_ppm_copy_y_block_mmx | |
1184 | +.hidden _dv_ppm_copy_y_block_mmx | |
1185 | +.type _dv_ppm_copy_y_block_mmx,@function | |
1186 | _dv_ppm_copy_y_block_mmx: | |
1187 | ||
1188 | pushl %ebp | |
1189 | @@ -424,17 +428,20 @@ _dv_ppm_copy_y_block_mmx: | |
1190 | ret | |
1191 | ||
1192 | .global _dv_pgm_copy_y_block_mmx | |
1193 | +.hidden _dv_pgm_copy_y_block_mmx | |
1194 | +.type _dv_ppm_copy_y_block_mmx,@function | |
1195 | _dv_pgm_copy_y_block_mmx: | |
1196 | ||
1197 | pushl %ebp | |
1198 | - movl %esp, %ebp | |
1199 | pushl %esi | |
1200 | pushl %edi | |
1201 | - | |
1202 | - movl 8(%ebp), %edi # dest | |
1203 | - movl 12(%ebp), %esi # src | |
1204 | ||
1205 | - movq OFFSETY, %mm7 | |
1206 | + LOAD_PIC_REG_BP() | |
1207 | + | |
1208 | + movl 16(%esp), %edi # dest | |
1209 | + movl 20(%esp), %esi # src | |
1210 | + | |
1211 | + movq MUNG(OFFSETY), %mm7 | |
1212 | pxor %mm6, %mm6 | |
1213 | ||
1214 | movq (%esi), %mm0 | |
1215 | @@ -566,17 +573,20 @@ _dv_pgm_copy_y_block_mmx: | |
1216 | ret | |
1217 | ||
1218 | .global _dv_video_copy_y_block_mmx | |
1219 | +.hidden _dv_video_copy_y_block_mmx | |
1220 | +.type _dv_video_copy_y_block_mmx,@function | |
1221 | _dv_video_copy_y_block_mmx: | |
1222 | ||
1223 | pushl %ebp | |
1224 | - movl %esp, %ebp | |
1225 | pushl %esi | |
1226 | pushl %edi | |
1227 | - | |
1228 | - movl 8(%ebp), %edi # dest | |
1229 | - movl 12(%ebp), %esi # src | |
1230 | ||
1231 | - movq OFFSETBX, %mm7 | |
1232 | + LOAD_PIC_REG_BP() | |
1233 | + | |
1234 | + movl 16(%esp), %edi # dest | |
1235 | + movl 20(%esp), %esi # src | |
1236 | + | |
1237 | + movq MUNG(OFFSETBX), %mm7 | |
1238 | pxor %mm6, %mm6 | |
1239 | ||
1240 | movq (%esi), %mm0 | |
1241 | @@ -711,6 +721,8 @@ _dv_video_copy_y_block_mmx: | |
1242 | ||
1243 | ||
1244 | .global _dv_ppm_copy_pal_c_block_mmx | |
1245 | +.hidden _dv_ppm_copy_pal_c_block_mmx | |
1246 | +.type _dv_ppm_copy_pal_c_block_mmx,@function | |
1247 | _dv_ppm_copy_pal_c_block_mmx: | |
1248 | ||
1249 | pushl %ebp | |
1250 | @@ -854,19 +866,21 @@ _dv_ppm_copy_pal_c_block_mmx: | |
1251 | ret | |
1252 | ||
1253 | .global _dv_pgm_copy_pal_c_block_mmx | |
1254 | +.hidden _dv_ppm_copy_pal_c_block_mmx | |
1255 | +.type _dv_pgm_copy_pal_c_block_mmx,@function | |
1256 | _dv_pgm_copy_pal_c_block_mmx: | |
1257 | ||
1258 | pushl %ebp | |
1259 | - movl %esp, %ebp | |
1260 | pushl %esi | |
1261 | pushl %edi | |
1262 | pushl %ebx | |
1263 | - | |
1264 | - movl 8(%ebp), %edi # dest | |
1265 | - movl 12(%ebp), %esi # src | |
1266 | ||
1267 | + LOAD_PIC_REG_BP() | |
1268 | + | |
1269 | + movl 20(%esp), %edi # dest | |
1270 | + movl 24(%esp), %esi # src | |
1271 | ||
1272 | - movq OFFSETBX, %mm7 | |
1273 | + movq MUNG(OFFSETBX), %mm7 | |
1274 | pxor %mm6, %mm6 | |
1275 | ||
1276 | ||
1277 | @@ -1002,18 +1016,21 @@ _dv_pgm_copy_pal_c_block_mmx: | |
1278 | ret | |
1279 | ||
1280 | .global _dv_video_copy_pal_c_block_mmx | |
1281 | +.hidden _dv_video_copy_pal_c_block_mmx | |
1282 | +.type _dv_video_copy_pal_c_block_mmx,@function | |
1283 | _dv_video_copy_pal_c_block_mmx: | |
1284 | ||
1285 | pushl %ebp | |
1286 | - movl %esp, %ebp | |
1287 | pushl %esi | |
1288 | pushl %edi | |
1289 | pushl %ebx | |
1290 | - | |
1291 | - movl 8(%ebp), %edi # dest | |
1292 | - movl 12(%ebp), %esi # src | |
1293 | ||
1294 | - movq OFFSETBX, %mm7 | |
1295 | + LOAD_PIC_REG_BP() | |
1296 | + | |
1297 | + movl 20(%esp), %edi # dest | |
1298 | + movl 24(%esp), %esi # src | |
1299 | + | |
1300 | + movq MUNG(OFFSETBX), %mm7 | |
1301 | paddw %mm7, %mm7 | |
1302 | pxor %mm6, %mm6 | |
1303 | ||
1304 | @@ -1097,21 +1114,23 @@ video_copy_pal_c_block_mmx_loop: | |
1305 | ret | |
1306 | ||
1307 | .global _dv_ppm_copy_ntsc_c_block_mmx | |
1308 | +.hidden _dv_ppm_copy_ntsc_c_block_mmx | |
1309 | +.type _dv_ppm_copy_ntsc_c_block_mmx,@function | |
1310 | _dv_ppm_copy_ntsc_c_block_mmx: | |
1311 | ||
1312 | pushl %ebp | |
1313 | - movl %esp, %ebp | |
1314 | pushl %esi | |
1315 | pushl %edi | |
1316 | pushl %ebx | |
1317 | - | |
1318 | - movl 8(%ebp), %edi # dest | |
1319 | - movl 12(%ebp), %esi # src | |
1320 | + | |
1321 | + LOAD_PIC_REG_BP() | |
1322 | + | |
1323 | + movl 20(%esp), %edi # dest | |
1324 | + movl 24(%esp), %esi # src | |
1325 | ||
1326 | movl $4, %ebx | |
1327 | ||
1328 | - movq ALLONE, %mm6 | |
1329 | - | |
1330 | + movq MUNG(ALLONE), %mm6 | |
1331 | ppm_copy_ntsc_c_block_mmx_loop: | |
1332 | ||
1333 | movq (%esi), %mm0 | |
1334 | @@ -1170,17 +1189,20 @@ ppm_copy_ntsc_c_block_mmx_loop: | |
1335 | ret | |
1336 | ||
1337 | .global _dv_pgm_copy_ntsc_c_block_mmx | |
1338 | +.hidden _dv_pgm_copy_ntsc_c_block_mmx | |
1339 | +.type _dv_pgm_copy_ntsc_c_block_mmx,@function | |
1340 | _dv_pgm_copy_ntsc_c_block_mmx: | |
1341 | ||
1342 | pushl %ebp | |
1343 | - movl %esp, %ebp | |
1344 | pushl %esi | |
1345 | pushl %edi | |
1346 | - | |
1347 | - movl 8(%ebp), %edi # dest | |
1348 | - movl 12(%ebp), %esi # src | |
1349 | ||
1350 | - movq OFFSETBX, %mm7 | |
1351 | + LOAD_PIC_REG_BP() | |
1352 | + | |
1353 | + movl 16(%esp), %edi # dest | |
1354 | + movl 20(%esp), %esi # src | |
1355 | + | |
1356 | + movq MUNG(OFFSETBX), %mm7 | |
1357 | paddw %mm7, %mm7 | |
1358 | pxor %mm6, %mm6 | |
1359 | ||
1360 | @@ -1327,18 +1349,21 @@ _dv_pgm_copy_ntsc_c_block_mmx: | |
1361 | ret | |
1362 | ||
1363 | .global _dv_video_copy_ntsc_c_block_mmx | |
1364 | +.hidden _dv_video_copy_ntsc_c_block_mmx | |
1365 | +.type _dv_video_copy_ntsc_c_block_mmx,@function | |
1366 | _dv_video_copy_ntsc_c_block_mmx: | |
1367 | ||
1368 | pushl %ebp | |
1369 | - movl %esp, %ebp | |
1370 | pushl %esi | |
1371 | pushl %edi | |
1372 | pushl %ebx | |
1373 | - | |
1374 | - movl 8(%ebp), %edi # dest | |
1375 | - movl 12(%ebp), %esi # src | |
1376 | ||
1377 | - movq OFFSETBX, %mm7 | |
1378 | + LOAD_PIC_REG_BP() | |
1379 | + | |
1380 | + movl 20(%esp), %edi # dest | |
1381 | + movl 24(%esp), %esi # src | |
1382 | + | |
1383 | + movq MUNG(OFFSETBX), %mm7 | |
1384 | paddw %mm7, %mm7 | |
1385 | pxor %mm6, %mm6 | |
1386 | ||
1387 | --- libdv/rgbtoyuv_x86_64.S 31 Jan 2005 13:27:54 -0000 1.2 | |
1388 | +++ libdv/rgbtoyuv_x86_64.S 15 Dec 2005 06:45:39 -0000 | |
1389 | @@ -41,9 +41,6 @@ | |
1390 | #define DV_WIDTH_SHORT_HALF 720 | |
1391 | #define DV_WIDTH_BYTE_HALF 360 | |
1392 | ||
1393 | -.global _dv_rgbtoycb_mmx_x86_64 | |
1394 | -# .global yuvtoycb_mmx_x86_64 | |
1395 | - | |
1396 | .data | |
1397 | ||
1398 | .align 8 | |
1399 | --- libdv/transpose_x86.S 31 Jan 2005 13:27:54 -0000 1.3 | |
1400 | +++ libdv/transpose_x86.S 15 Dec 2005 06:45:39 -0000 | |
1401 | @@ -2,6 +2,8 @@ | |
1402 | ||
1403 | .text | |
1404 | .global _dv_transpose_mmx | |
1405 | +.hidden _dv_transpose_mmx | |
1406 | +.type _dv_transpose_mmx,@function | |
1407 | ||
1408 | _dv_transpose_mmx: | |
1409 | pushl %ebp | |
1410 | --- libdv/transpose_x86_64.S 31 Jan 2005 13:27:54 -0000 1.2 | |
1411 | +++ libdv/transpose_x86_64.S 15 Dec 2005 06:45:39 -0000 | |
1412 | @@ -2,6 +2,8 @@ | |
1413 | ||
1414 | .text | |
1415 | .global _dv_transpose_mmx_x86_64 | |
1416 | +.hidden _dv_transpose_mmx_x86_64 | |
1417 | +.type _dv_transpose_mmx_x86_64,@function | |
1418 | ||
1419 | _dv_transpose_mmx_x86_64: | |
1420 | ||
1421 | --- libdv/vlc_x86.S 31 Jan 2005 13:27:54 -0000 1.3 | |
1422 | +++ libdv/vlc_x86.S 15 Dec 2005 06:45:40 -0000 | |
1423 | @@ -1,31 +1,42 @@ | |
1424 | #include "asmoff.h" | |
1425 | .section .note.GNU-stack, "", @progbits | |
1426 | ||
1427 | +#include "asm_common.S" | |
1428 | + | |
1429 | .text | |
1430 | + | |
1431 | .align 4 | |
1432 | + | |
1433 | +.hidden asm_dv_decode_vlc | |
1434 | +.globl asm_dv_decode_vlc | |
1435 | + asm_dv_decode_vlc = dv_decode_vlc | |
1436 | + | |
1437 | .globl dv_decode_vlc | |
1438 | .type dv_decode_vlc,@function | |
1439 | dv_decode_vlc: | |
1440 | pushl %ebx | |
1441 | + pushl %ebp | |
1442 | ||
1443 | - /* Args are at 8(%esp). */ | |
1444 | - movl 8(%esp),%eax /* %eax is bits */ | |
1445 | - movl 12(%esp),%ebx /* %ebx is maxbits */ | |
1446 | + LOAD_PIC_REG_BP() | |
1447 | + | |
1448 | + /* Args are at 12(%esp). */ | |
1449 | + movl 12(%esp),%eax /* %eax is bits */ | |
1450 | + movl 16(%esp),%ebx /* %ebx is maxbits */ | |
1451 | andl $0x3f,%ebx /* limit index range STL*/ | |
1452 | ||
1453 | - movl dv_vlc_class_index_mask(,%ebx,4),%edx | |
1454 | + movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx | |
1455 | andl %eax,%edx | |
1456 | - movl dv_vlc_class_index_rshift(,%ebx,4),%ecx | |
1457 | + movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx | |
1458 | sarl %cl,%edx | |
1459 | - movl dv_vlc_classes(,%ebx,4),%ecx | |
1460 | + movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx | |
1461 | movsbl (%ecx,%edx,1),%edx /* %edx is class */ | |
1462 | - | |
1463 | - movl dv_vlc_index_mask(,%edx,4),%ebx | |
1464 | - movl dv_vlc_index_rshift(,%edx,4),%ecx | |
1465 | + | |
1466 | + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx | |
1467 | + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx | |
1468 | andl %eax,%ebx | |
1469 | sarl %cl,%ebx | |
1470 | ||
1471 | - movl dv_vlc_lookups(,%edx,4),%edx | |
1472 | + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx | |
1473 | movl (%edx,%ebx,4),%edx | |
1474 | ||
1475 | /* Now %edx holds result, like this: | |
1476 | @@ -44,7 +55,7 @@ dv_decode_vlc: | |
1477 | movl %edx,%ecx | |
1478 | sarl $8,%ecx | |
1479 | andl $0xff,%ecx | |
1480 | - movl sign_mask(,%ecx,4),%ebx | |
1481 | + movl MUNG_ARR(sign_mask,%ecx,4),%ebx | |
1482 | andl %ebx,%eax | |
1483 | negl %eax | |
1484 | sarl $31,%eax | |
1485 | @@ -65,14 +76,14 @@ dv_decode_vlc: | |
1486 | *result = broken; | |
1487 | Note that the 'broken' pattern is all ones (i.e. 0xffffffff) | |
1488 | */ | |
1489 | - movl 12(%esp),%ebx /* %ebx is maxbits */ | |
1490 | + movl 16(%esp),%ebx /* %ebx is maxbits */ | |
1491 | subl %ecx,%ebx | |
1492 | sbbl %ebx,%ebx | |
1493 | orl %ebx,%edx | |
1494 | ||
1495 | - movl 16(%esp),%eax | |
1496 | + movl 20(%esp),%eax | |
1497 | movl %edx,(%eax) | |
1498 | - | |
1499 | + popl %ebp | |
1500 | popl %ebx | |
1501 | ret | |
1502 | ||
1503 | @@ -82,21 +93,28 @@ dv_decode_vlc: | |
1504 | .type __dv_decode_vlc,@function | |
1505 | __dv_decode_vlc: | |
1506 | pushl %ebx | |
1507 | + pushl %ebp | |
1508 | ||
1509 | - /* Args are at 8(%esp). */ | |
1510 | - movl 8(%esp),%eax /* %eax is bits */ | |
1511 | + LOAD_PIC_REG_BP() | |
1512 | + | |
1513 | + /* Args are at 12(%esp). */ | |
1514 | + movl 12(%esp),%eax /* %eax is bits */ | |
1515 | ||
1516 | movl %eax,%edx /* %edx is class */ | |
1517 | andl $0xfe00,%edx | |
1518 | sarl $9,%edx | |
1519 | +#ifdef __PIC__ | |
1520 | + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx | |
1521 | +#else | |
1522 | movsbl dv_vlc_class_lookup5(%edx),%edx | |
1523 | - | |
1524 | - movl dv_vlc_index_mask(,%edx,4),%ebx | |
1525 | - movl dv_vlc_index_rshift(,%edx,4),%ecx | |
1526 | +#endif | |
1527 | + | |
1528 | + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx | |
1529 | + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx | |
1530 | andl %eax,%ebx | |
1531 | sarl %cl,%ebx | |
1532 | ||
1533 | - movl dv_vlc_lookups(,%edx,4),%edx | |
1534 | + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx | |
1535 | movl (%edx,%ebx,4),%edx | |
1536 | ||
1537 | /* Now %edx holds result, like this: | |
1538 | @@ -114,7 +132,7 @@ __dv_decode_vlc: | |
1539 | movl %edx,%ecx | |
1540 | sarl $8,%ecx | |
1541 | andl $0xff,%ecx | |
1542 | - movl sign_mask(,%ecx,4),%ecx | |
1543 | + movl MUNG_ARR(sign_mask,%ecx,4),%ecx | |
1544 | andl %ecx,%eax | |
1545 | negl %eax | |
1546 | sarl $31,%eax | |
1547 | @@ -129,9 +147,9 @@ __dv_decode_vlc: | |
1548 | xorl %eax,%edx | |
1549 | subl %eax,%edx | |
1550 | ||
1551 | - movl 12(%esp),%eax | |
1552 | + movl 16(%esp),%eax | |
1553 | movl %edx,(%eax) | |
1554 | - | |
1555 | + popl %ebp | |
1556 | popl %ebx | |
1557 | ret | |
1558 | ||
1559 | @@ -142,13 +160,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_ | |
1560 | */ | |
1561 | .text | |
1562 | .align 4 | |
1563 | +.hidden asm_dv_parse_ac_coeffs_pass0 | |
1564 | +.globl asm_dv_parse_ac_coeffs_pass0 | |
1565 | + asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0 | |
1566 | + | |
1567 | .globl dv_parse_ac_coeffs_pass0 | |
1568 | +.type dv_parse_ac_coeffs_pass0,@function | |
1569 | dv_parse_ac_coeffs_pass0: | |
1570 | pushl %ebx | |
1571 | pushl %edi | |
1572 | pushl %esi | |
1573 | pushl %ebp | |
1574 | ||
1575 | + LOAD_PIC_REG_SI() | |
1576 | + | |
1577 | #define ARGn(N) (20+(4*(N)))(%esp) | |
1578 | ||
1579 | /* | |
1580 | @@ -161,8 +186,10 @@ dv_parse_ac_coeffs_pass0: | |
1581 | ebp bl | |
1582 | */ | |
1583 | movl ARGn(2),%ebp | |
1584 | +#ifndef __PIC__ | |
1585 | movl ARGn(0),%esi | |
1586 | movl bitstream_t_buf(%esi),%esi | |
1587 | +#endif | |
1588 | movl dv_block_t_offset(%ebp),%edi | |
1589 | movl dv_block_t_reorder(%ebp),%ebx | |
1590 | ||
1591 | @@ -172,7 +199,11 @@ dv_parse_ac_coeffs_pass0: | |
1592 | ||
1593 | movq dv_block_t_coeffs(%ebp),%mm1 | |
1594 | pxor %mm0,%mm0 | |
1595 | +#ifdef __PIC__ | |
1596 | + pand const_f_0_0_0@GOTOFF(%esi),%mm1 | |
1597 | +#else | |
1598 | pand const_f_0_0_0,%mm1 | |
1599 | +#endif | |
1600 | movq %mm1,dv_block_t_coeffs(%ebp) | |
1601 | movq %mm0,(dv_block_t_coeffs + 8)(%ebp) | |
1602 | movq %mm0,(dv_block_t_coeffs + 16)(%ebp) | |
1603 | @@ -193,9 +224,17 @@ dv_parse_ac_coeffs_pass0: | |
1604 | readloop: | |
1605 | movl %edi,%ecx | |
1606 | shrl $3,%ecx | |
1607 | +#ifdef __PIC__ | |
1608 | + pushl %esi | |
1609 | + movl ARGn(1),%esi | |
1610 | + movl bitstream_t_buf(%esi),%esi | |
1611 | +#endif | |
1612 | movzbl (%esi,%ecx,1),%eax | |
1613 | movzbl 1(%esi,%ecx,1),%edx | |
1614 | movzbl 2(%esi,%ecx,1),%ecx | |
1615 | +#ifdef __PIC__ | |
1616 | + popl %esi | |
1617 | +#endif | |
1618 | shll $16,%eax | |
1619 | shll $8,%edx | |
1620 | orl %ecx,%eax | |
1621 | @@ -219,7 +258,11 @@ readloop: | |
1622 | ||
1623 | /* Attempt to use the shortcut first. If it hits, then | |
1624 | this vlc term has been decoded. */ | |
1625 | +#ifdef __PIC__ | |
1626 | + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx | |
1627 | +#else | |
1628 | movl dv_vlc_class1_shortcut(,%ecx,4),%edx | |
1629 | +#endif | |
1630 | test $0x80,%edx | |
1631 | je done_decode | |
1632 | ||
1633 | @@ -230,12 +273,19 @@ readloop: | |
1634 | movl %ebx,dv_block_t_reorder(%ebp) | |
1635 | ||
1636 | /* %eax is bits */ | |
1637 | - | |
1638 | +#ifdef __PIC__ | |
1639 | + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx | |
1640 | + | |
1641 | + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx | |
1642 | + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx | |
1643 | + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx | |
1644 | +#else | |
1645 | movsbl dv_vlc_class_lookup5(%ecx),%ecx | |
1646 | ||
1647 | movl dv_vlc_index_mask(,%ecx,4),%ebx | |
1648 | movl dv_vlc_lookups(,%ecx,4),%edx | |
1649 | movl dv_vlc_index_rshift(,%ecx,4),%ecx | |
1650 | +#endif | |
1651 | andl %eax,%ebx | |
1652 | sarl %cl,%ebx | |
1653 | ||
1654 | @@ -258,7 +308,11 @@ readloop: | |
1655 | movl %edx,%ecx | |
1656 | sarl $8,%ecx | |
1657 | andl $0xff,%ecx | |
1658 | +#ifdef __PIC__ | |
1659 | + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx | |
1660 | +#else | |
1661 | movl sign_mask(,%ecx,4),%ecx | |
1662 | +#endif | |
1663 | andl %ecx,%eax | |
1664 | negl %eax | |
1665 | sarl $31,%eax | |
1666 | @@ -328,10 +382,16 @@ alldone: | |
1667 | ||
1668 | slowpath: | |
1669 | /* slow path: use dv_decode_vlc */; | |
1670 | +#ifdef __PIC__ | |
1671 | + pushl %esi | |
1672 | + leal vlc@GOTOFF(%esi),%esi | |
1673 | + xchgl %esi,(%esp) /* last parameter is &vlc */ | |
1674 | +#else | |
1675 | pushl $vlc /* last parameter is &vlc */ | |
1676 | +#endif | |
1677 | pushl %edx /* bits_left */ | |
1678 | pushl %eax /* bits */ | |
1679 | - call dv_decode_vlc | |
1680 | + call asm_dv_decode_vlc | |
1681 | addl $12,%esp | |
1682 | test $0x80,%edx /* If (vlc.run < 0) break */ | |
1683 | jne escape | |
1684 | @@ -361,12 +421,15 @@ show16: | |
1685 | gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) { | |
1686 | */ | |
1687 | .globl dv_parse_video_segment | |
1688 | + .type dv_parse_video_segment,@function | |
1689 | dv_parse_video_segment: | |
1690 | pushl %ebx | |
1691 | pushl %edi | |
1692 | pushl %esi | |
1693 | pushl %ebp | |
1694 | ||
1695 | + LOAD_PIC_REG_SI() | |
1696 | + | |
1697 | #define ARGn(N) (20+(4*(N)))(%esp) | |
1698 | ||
1699 | movl ARGn(1),%eax /* quality */ | |
1700 | @@ -375,7 +438,11 @@ dv_parse_video_segment: | |
1701 | jz its_mono | |
1702 | movl $6,%ebx | |
1703 | its_mono: | |
1704 | +#ifdef __PIC__ | |
1705 | + movl %ebx,n_blocks@GOTOFF(%esi) | |
1706 | +#else | |
1707 | movl %ebx,n_blocks | |
1708 | +#endif | |
1709 | ||
1710 | /* | |
1711 | * ebx seg/b | |
1712 | @@ -386,15 +453,22 @@ its_mono: | |
1713 | * ebp bl | |
1714 | */ | |
1715 | movl ARGn(0),%ebx | |
1716 | +#ifndef __PIC__ | |
1717 | movl dv_videosegment_t_bs(%ebx),%esi | |
1718 | movl bitstream_t_buf(%esi),%esi | |
1719 | +#endif | |
1720 | leal dv_videosegment_t_mb(%ebx),%edi | |
1721 | ||
1722 | movl $0,%eax | |
1723 | movl $0,%ecx | |
1724 | macloop: | |
1725 | +#ifdef __PIC__ | |
1726 | + movl %eax,m@GOTOFF(%esi) | |
1727 | + movl %ecx,mb_start@GOTOFF(%esi) | |
1728 | +#else | |
1729 | movl %eax,m | |
1730 | movl %ecx,mb_start | |
1731 | +#endif | |
1732 | ||
1733 | movl ARGn(0),%ebx | |
1734 | ||
1735 | @@ -402,7 +476,15 @@ macloop: | |
1736 | /* mb->qno = bitstream_get(bs,4); */ | |
1737 | movl %ecx,%edx | |
1738 | shr $3,%edx | |
1739 | +#ifdef __PIC__ | |
1740 | + pushl %esi | |
1741 | + movl dv_videosegment_t_bs(%ebx),%esi | |
1742 | + movl bitstream_t_buf(%esi),%esi | |
1743 | +#endif | |
1744 | movzbl 3(%esi,%edx,1),%edx | |
1745 | +#ifdef __PIC__ | |
1746 | + popl %esi | |
1747 | +#endif | |
1748 | andl $0xf,%edx | |
1749 | movl %edx,dv_macroblock_t_qno(%edi) | |
1750 | ||
1751 | @@ -413,7 +495,11 @@ macloop: | |
1752 | movl %edx,dv_macroblock_t_eob_count(%edi) | |
1753 | ||
1754 | /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */ | |
1755 | +#ifdef __PIC__ | |
1756 | + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx | |
1757 | +#else | |
1758 | movl dv_super_map_vertical(,%eax,4),%edx | |
1759 | +#endif | |
1760 | movl dv_videosegment_t_i(%ebx),%ecx | |
1761 | addl %ecx,%edx | |
1762 | ||
1763 | @@ -424,11 +510,20 @@ skarly: | |
1764 | andl $1,%ecx | |
1765 | shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */ | |
1766 | ||
1767 | +#ifdef __PIC__ | |
1768 | + leal mod_10@GOTOFF(%esi,%edx),%edx | |
1769 | + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */ | |
1770 | +#else | |
1771 | movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */ | |
1772 | +#endif | |
1773 | movl %edx,dv_macroblock_t_i(%edi) | |
1774 | ||
1775 | /* mb->j = dv_super_map_horizontal[m]; */ | |
1776 | +#ifdef __PIC__ | |
1777 | + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx | |
1778 | +#else | |
1779 | movl dv_super_map_horizontal(,%eax,4),%edx | |
1780 | +#endif | |
1781 | movl %edx,dv_macroblock_t_j(%edi) | |
1782 | ||
1783 | /* mb->k = seg->k; */ | |
1784 | @@ -447,12 +542,29 @@ blkloop: | |
1785 | +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | |
1786 | */ | |
1787 | /* dc = bitstream_get(bs,9); */ | |
1788 | +#ifdef __PIC__ | |
1789 | + movl mb_start@GOTOFF(%esi),%ecx | |
1790 | +#else | |
1791 | movl mb_start,%ecx | |
1792 | +#endif | |
1793 | shr $3,%ecx | |
1794 | +#ifdef __PIC__ | |
1795 | + movzbl blk_start@GOTOFF(%esi,%ebx),%edx | |
1796 | +#else | |
1797 | movzbl blk_start(%ebx),%edx | |
1798 | +#endif | |
1799 | addl %ecx,%edx | |
1800 | +#ifdef __PIC__ | |
1801 | + pushl %esi | |
1802 | + movl ARGn(1),%esi | |
1803 | + movl dv_videosegment_t_bs(%esi),%esi | |
1804 | + movl bitstream_t_buf(%esi),%esi | |
1805 | +#endif | |
1806 | movzbl (%esi,%edx,1),%eax /* hi byte */ | |
1807 | movzbl 1(%esi,%edx,1),%ecx /* lo byte */ | |
1808 | +#ifdef __PIC__ | |
1809 | + popl %esi | |
1810 | +#endif | |
1811 | shll $8,%eax | |
1812 | orl %ecx,%eax | |
1813 | ||
1814 | @@ -479,7 +591,11 @@ blkloop: | |
1815 | ||
1816 | /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */ | |
1817 | shll $6,%eax | |
1818 | +#ifdef __PIC__ | |
1819 | + addl dv_reorder@GOTOFF+1(%esi),%eax | |
1820 | +#else | |
1821 | addl $(dv_reorder+1),%eax | |
1822 | +#endif | |
1823 | movl %eax,dv_block_t_reorder(%ebp) | |
1824 | ||
1825 | /* bl->reorder_sentinel = bl->reorder + 63; */ | |
1826 | @@ -487,13 +603,22 @@ blkloop: | |
1827 | movl %eax,dv_block_t_reorder_sentinel(%ebp) | |
1828 | ||
1829 | /* bl->offset= mb_start + dv_parse_bit_start[b]; */ | |
1830 | +#ifdef __PIC__ | |
1831 | + movl mb_start@GOTOFF(%esi),%ecx | |
1832 | + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax | |
1833 | +#else | |
1834 | movl mb_start,%ecx | |
1835 | movl dv_parse_bit_start(,%ebx,4),%eax | |
1836 | +#endif | |
1837 | addl %ecx,%eax | |
1838 | movl %eax,dv_block_t_offset(%ebp) | |
1839 | ||
1840 | /* bl->end= mb_start + dv_parse_bit_end[b]; */ | |
1841 | +#ifdef __PIC__ | |
1842 | + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax | |
1843 | +#else | |
1844 | movl dv_parse_bit_end(,%ebx,4),%eax | |
1845 | +#endif | |
1846 | addl %ecx,%eax | |
1847 | movl %eax,dv_block_t_end(%ebp) | |
1848 | ||
1849 | @@ -505,7 +630,11 @@ blkloop: | |
1850 | /* no AC pass. Just zero out the remaining coeffs */ | |
1851 | movq dv_block_t_coeffs(%ebp),%mm1 | |
1852 | pxor %mm0,%mm0 | |
1853 | +#ifdef __PIC__ | |
1854 | + pand const_f_0_0_0@GOTOFF(%esi),%mm1 | |
1855 | +#else | |
1856 | pand const_f_0_0_0,%mm1 | |
1857 | +#endif | |
1858 | movq %mm1,dv_block_t_coeffs(%ebp) | |
1859 | movq %mm0,(dv_block_t_coeffs + 8)(%ebp) | |
1860 | movq %mm0,(dv_block_t_coeffs + 16)(%ebp) | |
1861 | @@ -530,18 +659,27 @@ do_ac_pass: | |
1862 | pushl %ebp | |
1863 | pushl %edi | |
1864 | pushl %eax | |
1865 | - call dv_parse_ac_coeffs_pass0 | |
1866 | + call asm_dv_parse_ac_coeffs_pass0 | |
1867 | addl $12,%esp | |
1868 | done_ac: | |
1869 | ||
1870 | +#ifdef __PIC__ | |
1871 | + movl n_blocks@GOTOFF(%esi),%eax | |
1872 | +#else | |
1873 | movl n_blocks,%eax | |
1874 | +#endif | |
1875 | addl $dv_block_t_size,%ebp | |
1876 | incl %ebx | |
1877 | cmpl %eax,%ebx | |
1878 | jnz blkloop | |
1879 | ||
1880 | +#ifdef __PIC__ | |
1881 | + movl m@GOTOFF(%esi),%eax | |
1882 | + movl mb_start@GOTOFF(%esi),%ecx | |
1883 | +#else | |
1884 | movl m,%eax | |
1885 | movl mb_start,%ecx | |
1886 | +#endif | |
1887 | addl $(8 * 80),%ecx | |
1888 | addl $dv_macroblock_t_size,%edi | |
1889 | incl %eax | |
1890 | @@ -559,7 +697,7 @@ done_ac: | |
1891 | ||
1892 | andl $DV_QUALITY_AC_MASK,%eax | |
1893 | cmpl $DV_QUALITY_AC_2,%eax | |
1894 | - jz dv_parse_ac_coeffs | |
1895 | + jz asm_dv_parse_ac_coeffs | |
1896 | movl $0,%eax | |
1897 | ret | |
1898 | ||
1899 | --- libdv/vlc_x86_64.S 31 Jan 2005 13:27:54 -0000 1.3 | |
1900 | +++ libdv/vlc_x86_64.S 15 Dec 2005 06:45:40 -0000 | |
1901 | @@ -171,7 +171,8 @@ void dv_parse_ac_coeffs_pass0(bitstream_ | |
1902 | .text | |
1903 | .align 4 | |
1904 | .globl dv_parse_ac_coeffs_pass0 | |
1905 | - | |
1906 | +.type dv_parse_ac_coeffs_pass0,@function | |
1907 | + | |
1908 | dv_parse_ac_coeffs_pass0: | |
1909 | ||
1910 | /* Args are at rdi=bs, rsi=mb, rdx=bl */ | |
1911 | @@ -424,6 +425,7 @@ show16: /* not u | |
1912 | gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) { | |
1913 | */ | |
1914 | .globl dv_parse_video_segment | |
1915 | + .type dv_parse_video_segment,@function | |
1916 | dv_parse_video_segment: | |
1917 | ||
1918 | /* Args are at rdi=seg, rsi=quality */ | |
1919 | --- /dev/null 2005-12-15 06:00:01.513317500 +0000 | |
1920 | +++ libdv/asm_common.S 2005-12-14 19:57:06.000000000 +0000 | |
1921 | @@ -0,0 +1,37 @@ | |
1922 | +/* public domain, do what you want */ | |
1923 | + | |
1924 | +#ifdef __PIC__ | |
1925 | +# define MUNG(sym) sym##@GOTOFF(%ebp) | |
1926 | +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args) | |
1927 | +#else | |
1928 | +# define MUNG(sym) sym | |
1929 | +# define MUNG_ARR(sym, args...) sym(,##args) | |
1930 | +#endif | |
1931 | + | |
1932 | +#ifdef __PIC__ | |
1933 | +# undef __i686 /* gcc define gets in our way */ | |
1934 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits | |
1935 | +.globl __i686.get_pc_thunk.bp | |
1936 | + .hidden __i686.get_pc_thunk.bp | |
1937 | + .type __i686.get_pc_thunk.bp,@function | |
1938 | +__i686.get_pc_thunk.bp: | |
1939 | + movl (%esp), %ebp | |
1940 | + ret | |
1941 | +# define LOAD_PIC_REG_BP() \ | |
1942 | + call __i686.get_pc_thunk.bp ; \ | |
1943 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1944 | + | |
1945 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits | |
1946 | +.globl __i686.get_pc_thunk.si | |
1947 | + .hidden __i686.get_pc_thunk.si | |
1948 | + .type __i686.get_pc_thunk.si,@function | |
1949 | +__i686.get_pc_thunk.si: | |
1950 | + movl (%esp), %esi | |
1951 | + ret | |
1952 | +# define LOAD_PIC_REG_SI() \ | |
1953 | + call __i686.get_pc_thunk.si ; \ | |
1954 | + addl $_GLOBAL_OFFSET_TABLE_, %esi | |
1955 | +#else | |
1956 | +# define LOAD_PIC_REG_BP() | |
1957 | +# define LOAD_PIC_REG_SI() | |
1958 | +#endif |