]>
Commit | Line | Data |
---|---|---|
5e993f12 | 1 | --- libdv-0.104-old/libdv/asm_common.S |
2 | +++ libdv-0.104/libdv/asm_common.S | |
3 | @@ -0,0 +1,29 @@ | |
4 | +/* public domain, do what you want */ | |
5 | + | |
6 | +#ifdef __PIC__ | |
7 | +# define MUNG(sym) sym##@GOTOFF(%ebp) | |
8 | +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args) | |
9 | +#else | |
10 | +# define MUNG(sym) sym | |
11 | +# define MUNG_ARR(sym, args...) sym(,##args) | |
12 | +#endif | |
13 | + | |
14 | +#ifdef __PIC__ | |
15 | +# undef __i686 /* gcc define gets in our way */ | |
16 | +# define LOAD_PIC_REG(reg) \ | |
17 | + .ifndef __i686.get_pc_thunk.reg; \ | |
18 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.reg,"ax",@progbits; \ | |
19 | + .global __i686.get_pc_thunk.reg; \ | |
20 | + .hidden __i686.get_pc_thunk.reg; \ | |
21 | + .type __i686.get_pc_thunk.reg,@function; \ | |
22 | + __i686.get_pc_thunk.reg: \ | |
23 | + movl (%esp), %e##reg; \ | |
24 | + ret; \ | |
25 | + .size __i686.get_pc_thunk.reg,.-__i686.get_pc_thunk.reg; \ | |
26 | + .previous; \ | |
27 | + .endif; \ | |
28 | + call __i686.get_pc_thunk.reg; \ | |
29 | + addl $_GLOBAL_OFFSET_TABLE_, %e##reg | |
30 | +#else | |
31 | +# define LOAD_PIC_REG(reg) | |
32 | +#endif | |
33 | --- libdv-0.104-old/libdv/dct_block_mmx.S | |
34 | +++ libdv-0.104/libdv/dct_block_mmx.S | |
35 | @@ -53,19 +53,22 @@ scratch2: .quad 0 | |
36 | ||
37 | .section .note.GNU-stack, "", @progbits | |
38 | ||
39 | +#include "asm_common.S" | |
40 | + | |
41 | .text | |
42 | ||
43 | .align 8 | |
44 | .global _dv_dct_88_block_mmx | |
45 | .hidden _dv_dct_88_block_mmx | |
46 | .type _dv_dct_88_block_mmx,@function | |
47 | _dv_dct_88_block_mmx: | |
48 | ||
49 | pushl %ebp | |
50 | - movl %esp, %ebp | |
51 | pushl %esi | |
52 | ||
53 | - movl 8(%ebp), %esi # source | |
54 | + LOAD_PIC_REG(bp) | |
55 | + | |
56 | + movl 12(%esp), %esi # source | |
57 | ||
58 | # column 0 | |
59 | movq 16*0(%esi), %mm0 # v0 | |
60 | @@ -86,22 +91,22 @@ _dv_dct_88_block_mmx: | |
61 | ||
62 | movq 16*3(%esi), %mm5 # v3 | |
63 | movq 16*4(%esi), %mm7 # v4 | |
64 | - movq %mm7, scratch1 # scratch1: v4 ; | |
65 | + movq %mm7, MUNG(scratch1) # scratch1: v4 ; | |
66 | movq %mm5, %mm7 # duplicate v3 | |
67 | - paddw scratch1, %mm5 # v03: v3+v4 | |
68 | - psubw scratch1, %mm7 # v04: v3-v4 | |
69 | - movq %mm5, scratch2 # scratch2: v03 | |
70 | + paddw MUNG(scratch1), %mm5 # v03: v3+v4 | |
71 | + psubw MUNG(scratch1), %mm7 # v04: v3-v4 | |
72 | + movq %mm5, MUNG(scratch2) # scratch2: v03 | |
73 | movq %mm0, %mm5 # mm5: v00 | |
74 | ||
75 | - paddw scratch2, %mm0 # v10: v00+v03 | |
76 | - psubw scratch2, %mm5 # v13: v00-v03 | |
77 | - movq %mm3, scratch3 # scratch3: v02 | |
78 | + paddw MUNG(scratch2), %mm0 # v10: v00+v03 | |
79 | + psubw MUNG(scratch2), %mm5 # v13: v00-v03 | |
80 | + movq %mm3, MUNG(scratch3) # scratch3: v02 | |
81 | movq %mm1, %mm3 # duplicate v01 | |
82 | ||
83 | - paddw scratch3, %mm1 # v11: v01+v02 | |
84 | - psubw scratch3, %mm3 # v12: v01-v02 | |
85 | + paddw MUNG(scratch3), %mm1 # v11: v01+v02 | |
86 | + psubw MUNG(scratch3), %mm3 # v12: v01-v02 | |
87 | ||
88 | - movq %mm6, scratch4 # scratch4: v05 | |
89 | + movq %mm6, MUNG(scratch4) # scratch4: v05 | |
90 | movq %mm0, %mm6 # duplicate v10 | |
91 | ||
92 | paddw %mm1, %mm0 # v10+v11 | |
93 | @@ -111,10 +116,10 @@ _dv_dct_88_block_mmx: | |
94 | movq %mm6, 16*4(%esi) # out4: v10-v11 | |
95 | ||
96 | movq %mm4, %mm0 # mm0: v06 | |
97 | - paddw scratch4, %mm4 # v15: v05+v06 | |
98 | + paddw MUNG(scratch4), %mm4 # v15: v05+v06 | |
99 | paddw %mm2, %mm0 # v16: v07+v06 | |
100 | ||
101 | - pmulhw WA3, %mm4 # v35~: WA3*v15 | |
102 | + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15 | |
103 | psllw $1, %mm4 # v35: compensate the coeefient scale | |
104 | ||
105 | movq %mm4, %mm6 # duplicate v35 | |
106 | @@ -123,7 +128,7 @@ _dv_dct_88_block_mmx: | |
107 | ||
108 | paddw %mm5, %mm3 # v22: v12+v13 | |
109 | ||
110 | - pmulhw WA1, %mm3 # v32~: WA1*v22 | |
111 | + pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22 | |
112 | psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale | |
113 | movq %mm5, %mm6 # duplicate v13 | |
114 | ||
115 | @@ -134,13 +139,13 @@ _dv_dct_88_block_mmx: | |
116 | movq %mm6, 16*6(%esi) # out6: v13-v32 | |
117 | ||
118 | ||
119 | - paddw scratch4, %mm7 # v14n: v04+v05 | |
120 | + paddw MUNG(scratch4), %mm7 # v14n: v04+v05 | |
121 | movq %mm0, %mm5 # duplicate v16 | |
122 | ||
123 | psubw %mm7, %mm0 # va1: v16-v14n | |
124 | - pmulhw WA5, %mm0 # va0~: va1*WA5 | |
125 | - pmulhw WA4, %mm5 # v36~~: v16*WA4 | |
126 | - pmulhw WA2, %mm7 # v34~~: v14n*WA2 | |
127 | + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5 | |
128 | + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4 | |
129 | + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2 | |
130 | psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale | |
131 | psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale | |
132 | ||
133 | @@ -188,22 +193,22 @@ _dv_dct_88_block_mmx: | |
134 | ||
135 | movq 16*3(%esi), %mm5 # v3 | |
136 | movq 16*4(%esi), %mm7 # v4 | |
137 | - movq %mm7, scratch1 # scratch1: v4 ; | |
138 | + movq %mm7, MUNG(scratch1) # scratch1: v4 ; | |
139 | movq %mm5, %mm7 # duplicate v3 | |
140 | - paddw scratch1, %mm5 # v03: v3+v4 | |
141 | - psubw scratch1, %mm7 # v04: v3-v4 | |
142 | - movq %mm5, scratch2 # scratch2: v03 | |
143 | + paddw MUNG(scratch1), %mm5 # v03: v3+v4 | |
144 | + psubw MUNG(scratch1), %mm7 # v04: v3-v4 | |
145 | + movq %mm5, MUNG(scratch2) # scratch2: v03 | |
146 | movq %mm0, %mm5 # mm5: v00 | |
147 | ||
148 | - paddw scratch2, %mm0 # v10: v00+v03 | |
149 | - psubw scratch2, %mm5 # v13: v00-v03 | |
150 | - movq %mm3, scratch3 # scratc3: v02 | |
151 | + paddw MUNG(scratch2), %mm0 # v10: v00+v03 | |
152 | + psubw MUNG(scratch2), %mm5 # v13: v00-v03 | |
153 | + movq %mm3, MUNG(scratch3) # scratc3: v02 | |
154 | movq %mm1, %mm3 # duplicate v01 | |
155 | ||
156 | - paddw scratch3, %mm1 # v11: v01+v02 | |
157 | - psubw scratch3, %mm3 # v12: v01-v02 | |
158 | + paddw MUNG(scratch3), %mm1 # v11: v01+v02 | |
159 | + psubw MUNG(scratch3), %mm3 # v12: v01-v02 | |
160 | ||
161 | - movq %mm6, scratch4 # scratc4: v05 | |
162 | + movq %mm6, MUNG(scratch4) # scratc4: v05 | |
163 | movq %mm0, %mm6 # duplicate v10 | |
164 | ||
165 | paddw %mm1, %mm0 # v10+v11 | |
166 | @@ -213,10 +218,10 @@ _dv_dct_88_block_mmx: | |
167 | movq %mm6, 16*4(%esi) # out4: v10-v11 | |
168 | ||
169 | movq %mm4, %mm0 # mm0: v06 | |
170 | - paddw scratch4, %mm4 # v15: v05+v06 | |
171 | + paddw MUNG(scratch4), %mm4 # v15: v05+v06 | |
172 | paddw %mm2, %mm0 # v16: v07+v06 | |
173 | ||
174 | - pmulhw WA3, %mm4 # v35~: WA3*v15 | |
175 | + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15 | |
176 | psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale | |
177 | ||
178 | movq %mm4, %mm6 # duplicate v35 | |
179 | @@ -225,7 +230,7 @@ _dv_dct_88_block_mmx: | |
180 | ||
181 | paddw %mm5, %mm3 # v22: v12+v13 | |
182 | ||
183 | - pmulhw WA1, %mm3 # v32~: WA3*v15 | |
184 | + pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15 | |
185 | psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale | |
186 | movq %mm5, %mm6 # duplicate v13 | |
187 | ||
188 | @@ -235,13 +240,13 @@ _dv_dct_88_block_mmx: | |
189 | movq %mm5, 16*2(%esi) # out2: v13+v32 | |
190 | movq %mm6, 16*6(%esi) # out6: v13-v32 | |
191 | ||
192 | - paddw scratch4, %mm7 # v14n: v04+v05 | |
193 | + paddw MUNG(scratch4), %mm7 # v14n: v04+v05 | |
194 | movq %mm0, %mm5 # duplicate v16 | |
195 | ||
196 | psubw %mm7, %mm0 # va1: v16-v14n | |
197 | - pmulhw WA2, %mm7 # v34~~: v14n*WA2 | |
198 | - pmulhw WA5, %mm0 # va0~: va1*WA5 | |
199 | - pmulhw WA4, %mm5 # v36~~: v16*WA4 | |
200 | + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2 | |
201 | + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5 | |
202 | + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4 | |
203 | psllw $16-NSHIFT, %mm7 | |
204 | psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient | |
205 | # scale note that WA4 is shifted 1 bit less than the others | |
206 | @@ -748,11 +755,12 @@ _dv_dct_block_mmx_postscale_88: | |
207 | _dv_dct_248_block_mmx: | |
208 | ||
209 | pushl %ebp | |
210 | - movl %esp, %ebp | |
211 | pushl %esi | |
212 | pushl %edi | |
213 | ||
214 | - movl 8(%ebp), %esi # source | |
215 | + LOAD_PIC_REG(bp) | |
216 | + | |
217 | + movl 16(%esp), %esi # source | |
218 | ||
219 | # column 0 | |
220 | ||
221 | @@ -779,7 +789,7 @@ _dv_dct_248_block_mmx: | |
222 | paddw %mm1, %mm0 # v20: v10+v11 | |
223 | psubw %mm1, %mm3 # v21: v10-v11 | |
224 | ||
225 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
226 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
227 | movq %mm4, %mm2 | |
228 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
229 | ||
230 | @@ -818,7 +828,7 @@ _dv_dct_248_block_mmx: | |
231 | paddw %mm1, %mm0 # v20: v10+v11 | |
232 | psubw %mm1, %mm3 # v21: v10-v11 | |
233 | ||
234 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
235 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
236 | movq %mm4, %mm2 | |
237 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
238 | ||
239 | @@ -855,7 +865,7 @@ _dv_dct_248_block_mmx: | |
240 | paddw %mm1, %mm0 # v20: v10+v11 | |
241 | psubw %mm1, %mm3 # v21: v10-v11 | |
242 | ||
243 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
244 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
245 | movq %mm4, %mm2 | |
246 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
247 | ||
248 | @@ -892,7 +902,7 @@ _dv_dct_248_block_mmx: | |
249 | paddw %mm1, %mm0 # v20: v10+v11 | |
250 | psubw %mm1, %mm3 # v21: v10-v11 | |
251 | ||
252 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
253 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
254 | movq %mm4, %mm2 | |
255 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
256 | ||
257 | --- libdv-0.104-old/libdv/dv.c | |
258 | +++ libdv-0.104/libdv/dv.c | |
259 | @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp | |
260 | } /* dv_reconfigure */ | |
261 | ||
262 | ||
263 | +extern uint8_t dv_quant_offset[4]; | |
264 | +extern uint8_t dv_quant_shifts[22][4]; | |
265 | + | |
266 | static inline void | |
267 | dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) { | |
268 | int i; | |
269 | @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d | |
270 | dv_idct_248 (co248, mb->b[i].coeffs); | |
271 | } else { | |
272 | #if ARCH_X86 | |
273 | - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); | |
274 | + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts); | |
275 | _dv_idct_88(mb->b[i].coeffs); | |
276 | #elif ARCH_X86_64 | |
277 | _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); | |
278 | @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv | |
279 | dv_idct_248 (co248, mb->b[b].coeffs); | |
280 | } else { | |
281 | #if ARCH_X86 | |
282 | - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no); | |
283 | + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts); | |
284 | _dv_weight_88_inverse(bl->coeffs); | |
285 | _dv_idct_88(bl->coeffs); | |
286 | #elif ARCH_X86_64 | |
287 | --- libdv-0.104-old/libdv/encode.c | |
288 | +++ libdv-0.104/libdv/encode.c | |
289 | @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl | |
290 | } | |
291 | ||
292 | extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, | |
293 | - dv_vlc_entry_t ** out); | |
294 | + dv_vlc_entry_t ** out, | |
295 | + dv_vlc_entry_t * lookup); | |
296 | ||
297 | extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs, | |
298 | dv_vlc_entry_t ** out); | |
299 | @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv | |
300 | #elif ARCH_X86 | |
301 | int num_bits; | |
302 | ||
303 | - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o); | |
304 | + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup); | |
305 | emms(); | |
306 | #else | |
307 | int num_bits; | |
308 | @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv | |
309 | return num_bits; | |
310 | } | |
311 | ||
312 | -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs); | |
313 | +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup); | |
314 | extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); | |
315 | ||
316 | extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs) | |
317 | @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl | |
318 | #elif ARCH_X86_64 | |
319 | return _dv_vlc_num_bits_block_x86_64(coeffs); | |
320 | #else | |
321 | - return _dv_vlc_num_bits_block_x86(coeffs); | |
322 | + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup); | |
323 | #endif | |
324 | } | |
325 | ||
326 | --- libdv-0.104-old/libdv/encode_x86.S | |
327 | +++ libdv-0.104/libdv/encode_x86.S | |
328 | @@ -23,9 +23,6 @@ | |
329 | * The libdv homepage is http://libdv.sourceforge.net/. | |
330 | */ | |
331 | ||
332 | -.data | |
333 | -ALLONE: .word 1,1,1,1 | |
334 | -VLCADDMASK: .byte 255,0,0,0,255,0,0,0 | |
335 | ||
336 | ||
337 | .section .note.GNU-stack, "", @progbits | |
338 | @@ -45,11 +43,14 @@ _dv_vlc_encode_block_mmx: | |
339 | ||
340 | movl $63, %ecx | |
341 | ||
342 | - movl vlc_encode_lookup, %esi | |
343 | + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup | |
344 | ||
345 | pxor %mm0, %mm0 | |
346 | pxor %mm2, %mm2 | |
347 | - movq VLCADDMASK, %mm1 | |
348 | + pushl $0x000000FF # these four lines | |
349 | + pushl $0x000000FF # load VLCADDMASK | |
350 | + movq (%esp), %mm1 # into %mm1 off the stack | |
351 | + addl $8, %esp # --> no TEXTRELs | |
352 | xorl %ebp, %ebp | |
353 | subl $8, %edx | |
354 | vlc_encode_block_mmx_loop: | |
355 | @@ -121,7 +124,7 @@ _dv_vlc_num_bits_block_x86: | |
356 | addl $2, %edi | |
357 | ||
358 | movl $63, %ecx | |
359 | - movl vlc_num_bits_lookup, %esi | |
360 | + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup | |
361 | ||
362 | vlc_num_bits_block_x86_loop: | |
363 | movw (%edi), %ax | |
364 | @@ -579,8 +590,11 @@ _dv_need_dct_248_mmx_rows: | |
365 | paddw %mm5, %mm1 | |
366 | ||
367 | paddw %mm1, %mm0 | |
368 | - | |
369 | - pmaddwd ALLONE, %mm0 | |
370 | + | |
371 | + pushl $0x00010001 # these four lines | |
372 | + pushl $0x00010001 # load ALLONE | |
373 | + pmaddwd (%esp), %mm0 # into %mm0 off the stack | |
374 | + addl $8, %esp # --> no TEXTRELs | |
375 | movq %mm0, %mm1 | |
376 | psrlq $32, %mm1 | |
377 | paddd %mm1, %mm0 | |
378 | --- libdv-0.104-old/libdv/idct_block_mmx.S | |
379 | +++ libdv-0.104/libdv/idct_block_mmx.S | |
380 | @@ -8,17 +8,21 @@ | |
381 | ||
382 | .section .note.GNU-stack, "", @progbits | |
383 | ||
384 | +#include "asm_common.S" | |
385 | + | |
386 | .text | |
387 | .align 4 | |
388 | .global _dv_idct_block_mmx | |
389 | .hidden _dv_idct_block_mmx | |
390 | .type _dv_idct_block_mmx,@function | |
391 | _dv_idct_block_mmx: | |
392 | pushl %ebp | |
393 | - movl %esp,%ebp | |
394 | pushl %esi | |
395 | - leal preSC, %ecx | |
396 | - movl 8(%ebp),%esi /* source matrix */ | |
397 | + | |
398 | + LOAD_PIC_REG(bp) | |
399 | + | |
400 | + leal MUNG(preSC), %ecx | |
401 | + movl 12(%esp),%esi /* source matrix */ | |
402 | ||
403 | /* | |
404 | * column 0: even part | |
405 | @@ -35,7 +41,7 @@ _dv_idct_block_mmx: | |
406 | movq %mm1, %mm2 /* added 11/1/96 */ | |
407 | pmulhw 8*8(%esi),%mm5 /* V8 */ | |
408 | psubsw %mm0, %mm1 /* V16 */ | |
409 | - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ | |
410 | + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */ | |
411 | paddsw %mm0, %mm2 /* V17 */ | |
412 | movq %mm2, %mm0 /* duplicate V17 */ | |
413 | psraw $1, %mm2 /* t75=t82 */ | |
414 | @@ -76,7 +82,7 @@ _dv_idct_block_mmx: | |
415 | paddsw %mm0, %mm3 /* V29 ; free mm0 */ | |
416 | movq %mm7, %mm1 /* duplicate V26 */ | |
417 | psraw $1, %mm3 /* t91=t94 */ | |
418 | - pmulhw x539f539f539f539f,%mm7 /* V33 */ | |
419 | + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */ | |
420 | psraw $1, %mm1 /* t96 */ | |
421 | movq %mm5, %mm0 /* duplicate V2 */ | |
422 | psraw $2, %mm4 /* t85=t87 */ | |
423 | @@ -84,15 +90,15 @@ _dv_idct_block_mmx: | |
424 | psubsw %mm4, %mm0 /* V28 ; free mm4 */ | |
425 | movq %mm0, %mm2 /* duplicate V28 */ | |
426 | psraw $1, %mm5 /* t90=t93 */ | |
427 | - pmulhw x4546454645464546,%mm0 /* V35 */ | |
428 | + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */ | |
429 | psraw $1, %mm2 /* t97 */ | |
430 | movq %mm5, %mm4 /* duplicate t90=t93 */ | |
431 | psubsw %mm2, %mm1 /* V32 ; free mm2 */ | |
432 | - pmulhw x61f861f861f861f8,%mm1 /* V36 */ | |
433 | + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */ | |
434 | psllw $1, %mm7 /* t107 */ | |
435 | paddsw %mm3, %mm5 /* V31 */ | |
436 | psubsw %mm3, %mm4 /* V30 ; free mm3 */ | |
437 | - pmulhw x5a825a825a825a82,%mm4 /* V34 */ | |
438 | + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */ | |
439 | nop | |
440 | psubsw %mm1, %mm0 /* V38 */ | |
441 | psubsw %mm7, %mm1 /* V37 ; free mm7 */ | |
442 | @@ -159,7 +165,7 @@ _dv_idct_block_mmx: | |
443 | psubsw %mm7, %mm1 /* V50 */ | |
444 | pmulhw 8*9(%esi), %mm5 /* V9 */ | |
445 | paddsw %mm7, %mm2 /* V51 */ | |
446 | - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ | |
447 | + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */ | |
448 | movq %mm2, %mm6 /* duplicate V51 */ | |
449 | psraw $1, %mm2 /* t138=t144 */ | |
450 | movq %mm3, %mm4 /* duplicate V1 */ | |
451 | @@ -200,11 +206,11 @@ _dv_idct_block_mmx: | |
452 | * even more by doing the correction step in a later stage when the number | |
453 | * is actually multiplied by 16 | |
454 | */ | |
455 | - paddw x0005000200010001, %mm4 | |
456 | + paddw MUNG(x0005000200010001), %mm4 | |
457 | psubsw %mm6, %mm3 /* V60 ; free mm6 */ | |
458 | psraw $1, %mm0 /* t154=t156 */ | |
459 | movq %mm3, %mm1 /* duplicate V60 */ | |
460 | - pmulhw x539f539f539f539f, %mm1 /* V67 */ | |
461 | + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */ | |
462 | movq %mm5, %mm6 /* duplicate V3 */ | |
463 | psraw $2, %mm4 /* t148=t150 */ | |
464 | paddsw %mm4, %mm5 /* V61 */ | |
465 | @@ -213,13 +219,13 @@ _dv_idct_block_mmx: | |
466 | psllw $1, %mm1 /* t169 */ | |
467 | paddsw %mm0, %mm5 /* V65 -> result */ | |
468 | psubsw %mm0, %mm4 /* V64 ; free mm0 */ | |
469 | - pmulhw x5a825a825a825a82, %mm4 /* V68 */ | |
470 | + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */ | |
471 | psraw $1, %mm3 /* t158 */ | |
472 | psubsw %mm6, %mm3 /* V66 */ | |
473 | movq %mm5, %mm2 /* duplicate V65 */ | |
474 | - pmulhw x61f861f861f861f8, %mm3 /* V70 */ | |
475 | + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */ | |
476 | psllw $1, %mm6 /* t165 */ | |
477 | - pmulhw x4546454645464546, %mm6 /* V69 */ | |
478 | + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */ | |
479 | psraw $1, %mm2 /* t172 */ | |
480 | /* moved from next block */ | |
481 | movq 8*5(%esi), %mm0 /* V56 */ | |
482 | @@ -344,7 +350,7 @@ _dv_idct_block_mmx: | |
483 | * movq 8*13(%esi), %mm4 tmt13 | |
484 | */ | |
485 | psubsw %mm4, %mm3 /* V134 */ | |
486 | - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ | |
487 | + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */ | |
488 | movq 8*9(%esi), %mm6 /* tmt9 */ | |
489 | paddsw %mm4, %mm5 /* V135 ; mm4 free */ | |
490 | movq %mm0, %mm4 /* duplicate tmt1 */ | |
491 | @@ -373,17 +379,17 @@ _dv_idct_block_mmx: | |
492 | psubsw %mm7, %mm0 /* V144 */ | |
493 | movq %mm0, %mm3 /* duplicate V144 */ | |
494 | paddsw %mm7, %mm2 /* V147 ; free mm7 */ | |
495 | - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ | |
496 | + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */ | |
497 | movq %mm1, %mm7 /* duplicate tmt3 */ | |
498 | paddsw %mm5, %mm7 /* V145 */ | |
499 | psubsw %mm5, %mm1 /* V146 ; free mm5 */ | |
500 | psubsw %mm1, %mm3 /* V150 */ | |
501 | movq %mm7, %mm5 /* duplicate V145 */ | |
502 | - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ | |
503 | + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */ | |
504 | psubsw %mm2, %mm5 /* V148 */ | |
505 | - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ | |
506 | + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */ | |
507 | psllw $2, %mm0 /* t311 */ | |
508 | - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ | |
509 | + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */ | |
510 | paddsw %mm2, %mm7 /* V149 ; free mm2 */ | |
511 | psllw $1, %mm1 /* t313 */ | |
512 | nop /* without the nop - freeze here for one clock */ | |
513 | @@ -409,7 +415,7 @@ _dv_idct_block_mmx: | |
514 | paddsw %mm3, %mm6 /* V164 ; free mm3 */ | |
515 | movq %mm4, %mm3 /* duplicate V142 */ | |
516 | psubsw %mm5, %mm4 /* V165 ; free mm5 */ | |
517 | - movq %mm2, scratch7 /* out7 */ | |
518 | + movq %mm2, MUNG(scratch7) /* out7 */ | |
519 | psraw $4, %mm6 | |
520 | psraw $4, %mm4 | |
521 | paddsw %mm5, %mm3 /* V162 */ | |
522 | @@ -420,11 +426,11 @@ _dv_idct_block_mmx: | |
523 | */ | |
524 | movq %mm6, 8*9(%esi) /* out9 */ | |
525 | paddsw %mm1, %mm0 /* V161 */ | |
526 | - movq %mm3, scratch5 /* out5 */ | |
527 | + movq %mm3, MUNG(scratch5) /* out5 */ | |
528 | psubsw %mm1, %mm5 /* V166 ; free mm1 */ | |
529 | movq %mm4, 8*11(%esi) /* out11 */ | |
530 | psraw $4, %mm5 | |
531 | - movq %mm0, scratch3 /* out3 */ | |
532 | + movq %mm0, MUNG(scratch3) /* out3 */ | |
533 | movq %mm2, %mm4 /* duplicate V140 */ | |
534 | movq %mm5, 8*13(%esi) /* out13 */ | |
535 | paddsw %mm7, %mm2 /* V160 */ | |
536 | @@ -434,7 +440,7 @@ _dv_idct_block_mmx: | |
537 | /* moved from the next block */ | |
538 | movq 8*3(%esi), %mm7 | |
539 | psraw $4, %mm4 | |
540 | - movq %mm2, scratch1 /* out1 */ | |
541 | + movq %mm2, MUNG(scratch1) /* out1 */ | |
542 | /* moved from the next block */ | |
543 | movq %mm0, %mm1 | |
544 | movq %mm4, 8*15(%esi) /* out15 */ | |
545 | @@ -491,15 +497,15 @@ _dv_idct_block_mmx: | |
546 | paddsw %mm4, %mm3 /* V113 ; free mm4 */ | |
547 | movq %mm0, %mm4 /* duplicate V110 */ | |
548 | paddsw %mm1, %mm2 /* V111 */ | |
549 | - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ | |
550 | + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */ | |
551 | psubsw %mm1, %mm5 /* V112 ; free mm1 */ | |
552 | psubsw %mm5, %mm4 /* V116 */ | |
553 | movq %mm2, %mm1 /* duplicate V111 */ | |
554 | - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ | |
555 | + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */ | |
556 | psubsw %mm3, %mm2 /* V114 */ | |
557 | - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ | |
558 | + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */ | |
559 | paddsw %mm3, %mm1 /* V115 ; free mm3 */ | |
560 | - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ | |
561 | + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */ | |
562 | psllw $2, %mm0 /* t266 */ | |
563 | movq %mm1, (%esi) /* save V115 */ | |
564 | psllw $1, %mm5 /* t268 */ | |
565 | @@ -517,7 +523,7 @@ _dv_idct_block_mmx: | |
566 | movq %mm6, %mm3 /* duplicate tmt4 */ | |
567 | psubsw %mm0, %mm6 /* V100 */ | |
568 | paddsw %mm0, %mm3 /* V101 ; free mm0 */ | |
569 | - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ | |
570 | + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */ | |
571 | movq %mm7, %mm5 /* duplicate tmt0 */ | |
572 | movq 8*8(%esi), %mm1 /* tmt8 */ | |
573 | paddsw %mm1, %mm7 /* V103 */ | |
574 | @@ -551,10 +557,10 @@ _dv_idct_block_mmx: | |
575 | movq 8*2(%esi), %mm3 /* V123 */ | |
576 | paddsw %mm4, %mm7 /* out0 */ | |
577 | /* moved up from next block */ | |
578 | - movq scratch3, %mm0 | |
579 | + movq MUNG(scratch3), %mm0 | |
580 | psraw $4, %mm7 | |
581 | /* moved up from next block */ | |
582 | - movq scratch5, %mm6 | |
583 | + movq MUNG(scratch5), %mm6 | |
584 | psubsw %mm4, %mm1 /* out14 ; free mm4 */ | |
585 | paddsw %mm3, %mm5 /* out2 */ | |
586 | psraw $4, %mm1 | |
587 | @@ -565,7 +571,7 @@ _dv_idct_block_mmx: | |
588 | movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ | |
589 | psraw $4, %mm2 | |
590 | /* moved up to the prev block */ | |
591 | - movq scratch7, %mm4 | |
592 | + movq MUNG(scratch7), %mm4 | |
593 | /* moved up to the prev block */ | |
594 | psraw $4, %mm0 | |
595 | movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ | |
596 | @@ -579,7 +585,7 @@ _dv_idct_block_mmx: | |
597 | * psraw $4, %mm0 | |
598 | * psraw $4, %mm6 | |
599 | */ | |
600 | - movq scratch1, %mm1 | |
601 | + movq MUNG(scratch1), %mm1 | |
602 | psraw $4, %mm4 | |
603 | movq %mm0, 8*3(%esi) /* out3 */ | |
604 | psraw $4, %mm1 | |
605 | --- libdv-0.104-old/libdv/parse.c | |
606 | +++ libdv-0.104/libdv/parse.c | |
607 | @@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se | |
608 | exit(0); | |
609 | #endif | |
610 | } /* dv_parse_ac_coeffs */ | |
611 | +#if defined __GNUC__ && __ELF__ | |
612 | +# define dv_strong_hidden_alias(name, aliasname) \ | |
613 | + extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden"))) | |
614 | +dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs); | |
615 | +#else | |
616 | +int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); } | |
617 | +#endif | |
618 | ||
619 | /* --------------------------------------------------------------------------- | |
620 | */ | |
621 | --- libdv-0.104-old/libdv/quant.c | |
622 | +++ libdv-0.104/libdv/quant.c | |
623 | @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1 | |
624 | uint32_t dv_quant_248_mul_tab [2] [22] [64]; | |
625 | uint32_t dv_quant_88_mul_tab [2] [22] [64]; | |
626 | ||
627 | -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass); | |
628 | +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts); | |
629 | extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass); | |
630 | static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); | |
631 | static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); | |
632 | @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno | |
633 | _dv_quant_x86_64(block, qno, klass); | |
634 | emms(); | |
635 | #else | |
636 | - _dv_quant_x86(block, qno, klass); | |
637 | + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts); | |
638 | emms(); | |
639 | #endif | |
640 | } | |
641 | --- libdv-0.104-old/libdv/quant.h | |
642 | +++ libdv-0.104/libdv/quant.h | |
643 | @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block, | |
644 | extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass); | |
645 | extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass, | |
646 | dv_248_coeff_t *co); | |
647 | -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass); | |
648 | +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts); | |
649 | extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass); | |
650 | extern void dv_quant_init (void); | |
651 | #ifdef __cplusplus | |
652 | --- libdv-0.104-old/libdv/quant_x86.S | |
653 | +++ libdv-0.104/libdv/quant_x86.S | |
654 | @@ -71,10 +73,13 @@ _dv_quant_88_inverse_x86: | |
655 | ||
656 | /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ | |
657 | movl ARGn(1),%eax /* qno */ | |
658 | + movl ARGn(3),%ebx /* dv_quant_offset */ | |
659 | + addl ARGn(2),%ebx /* class */ | |
660 | + movzbl (%ebx),%ecx | |
661 | movl ARGn(2),%ebx /* class */ | |
662 | - movzbl dv_quant_offset(%ebx),%ecx | |
663 | addl %ecx,%eax | |
664 | - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ | |
665 | + movl ARGn(4),%edx /* dv_quant_shifts */ | |
666 | + leal (%edx,%eax,4),%edx /* edx is pq */ | |
667 | ||
668 | /* extra = (class == 3); */ | |
669 | /* 0 1 2 3 */ | |
670 | @@ -212,11 +219,13 @@ _dv_quant_x86: | |
671 | ||
672 | /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ | |
673 | movl ARGn(1),%eax /* qno */ | |
674 | + movl ARGn(3),%ebx /* offset */ | |
675 | + addl ARGn(2),%ebx /* class */ | |
676 | + movzbl (%ebx),%ecx | |
677 | movl ARGn(2),%ebx /* class */ | |
678 | - | |
679 | - movzbl dv_quant_offset(%ebx),%ecx | |
680 | + movl ARGn(4),%edx /* shifts */ | |
681 | addl %ecx,%eax | |
682 | - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ | |
683 | + leal (%edx,%eax,4),%edx /* edx is pq */ | |
684 | ||
685 | /* extra = (class == 3); */ | |
686 | /* 0 1 2 3 */ | |
687 | --- libdv-0.104-old/libdv/rgbtoyuv.S | |
688 | +++ libdv-0.104/libdv/rgbtoyuv.S | |
689 | @@ -41,9 +41,6 @@ | |
690 | #define DV_WIDTH_SHORT_HALF 720 | |
691 | #define DV_WIDTH_BYTE_HALF 360 | |
692 | ||
693 | -.global _dv_rgbtoycb_mmx | |
694 | -# .global yuvtoycb_mmx | |
695 | - | |
696 | .data | |
697 | ||
698 | .align 8 | |
699 | @@ -110,25 +107,26 @@ VR0GR: .long 0,0 | |
700 | VBG0B: .long 0,0 | |
701 | ||
702 | #endif | |
703 | - | |
704 | + | |
705 | +#include "asm_common.S" | |
706 | + | |
707 | .section .note.GNU-stack, "", @progbits | |
708 | ||
709 | .text | |
710 | ||
711 | -#define _inPtr 8 | |
712 | -#define _rows 12 | |
713 | -#define _columns 16 | |
714 | -#define _outyPtr 20 | |
715 | -#define _outuPtr 24 | |
716 | -#define _outvPtr 28 | |
717 | +#define _inPtr 24+8 | |
718 | +#define _rows 24+12 | |
719 | +#define _columns 24+16 | |
720 | +#define _outyPtr 24+20 | |
721 | +#define _outuPtr 24+24 | |
722 | +#define _outvPtr 24+28 | |
723 | ||
724 | .global _dv_rgbtoycb_mmx | |
725 | .hidden _dv_rgbtoycb_mmx | |
726 | .type _dv_rgbtoycb_mmx,@function | |
727 | _dv_rgbtoycb_mmx: | |
728 | ||
729 | pushl %ebp | |
730 | - movl %esp, %ebp | |
731 | pushl %eax | |
732 | pushl %ebx | |
733 | pushl %ecx | |
734 | @@ -131,46 +132,47 @@ _dv_rgbtoycb_mmx: | |
735 | pushl %esi | |
736 | pushl %edi | |
737 | ||
738 | - leal ZEROSX, %eax #This section gets around a bug | |
739 | + LOAD_PIC_REG(bp) | |
740 | + | |
741 | + leal MUNG(ZEROSX), %eax #This section gets around a bug | |
742 | movq (%eax), %mm0 #unlikely to persist | |
743 | - movq %mm0, ZEROS | |
744 | - leal OFFSETDX, %eax | |
745 | + movq %mm0, MUNG(ZEROS) | |
746 | + leal MUNG(OFFSETDX), %eax | |
747 | movq (%eax), %mm0 | |
748 | - movq %mm0, OFFSETD | |
749 | - leal OFFSETWX, %eax | |
750 | + movq %mm0, MUNG(OFFSETD) | |
751 | + leal MUNG(OFFSETWX), %eax | |
752 | movq (%eax), %mm0 | |
753 | - movq %mm0, OFFSETW | |
754 | - leal OFFSETBX, %eax | |
755 | + movq %mm0, MUNG(OFFSETW) | |
756 | + leal MUNG(OFFSETBX), %eax | |
757 | movq (%eax), %mm0 | |
758 | - movq %mm0, OFFSETB | |
759 | - leal YR0GRX, %eax | |
760 | + movq %mm0, MUNG(OFFSETB) | |
761 | + leal MUNG(YR0GRX), %eax | |
762 | movq (%eax), %mm0 | |
763 | - movq %mm0, YR0GR | |
764 | - leal YBG0BX, %eax | |
765 | + movq %mm0, MUNG(YR0GR) | |
766 | + leal MUNG(YBG0BX), %eax | |
767 | movq (%eax), %mm0 | |
768 | - movq %mm0, YBG0B | |
769 | - leal UR0GRX, %eax | |
770 | + movq %mm0, MUNG(YBG0B) | |
771 | + leal MUNG(UR0GRX), %eax | |
772 | movq (%eax), %mm0 | |
773 | - movq %mm0, UR0GR | |
774 | - leal UBG0BX, %eax | |
775 | + movq %mm0, MUNG(UR0GR) | |
776 | + leal MUNG(UBG0BX), %eax | |
777 | movq (%eax), %mm0 | |
778 | - movq %mm0, UBG0B | |
779 | - leal VR0GRX, %eax | |
780 | + movq %mm0, MUNG(UBG0B) | |
781 | + leal MUNG(VR0GRX), %eax | |
782 | movq (%eax), %mm0 | |
783 | - movq %mm0, VR0GR | |
784 | - leal VBG0BX, %eax | |
785 | + movq %mm0, MUNG(VR0GR) | |
786 | + leal MUNG(VBG0BX), %eax | |
787 | movq (%eax), %mm0 | |
788 | - movq %mm0, VBG0B | |
789 | - | |
790 | - movl _rows(%ebp), %eax | |
791 | - movl _columns(%ebp), %ebx | |
792 | + movq %mm0, MUNG(VBG0B) | |
793 | + movl _rows(%esp), %eax | |
794 | + movl _columns(%esp), %ebx | |
795 | mull %ebx #number pixels | |
796 | shrl $3, %eax #number of loops | |
797 | movl %eax, %edi #loop counter in edi | |
798 | - movl _inPtr(%ebp), %eax | |
799 | - movl _outyPtr(%ebp), %ebx | |
800 | - movl _outuPtr(%ebp), %ecx | |
801 | - movl _outvPtr(%ebp), %edx | |
802 | + movl _inPtr(%esp), %eax | |
803 | + movl _outyPtr(%esp), %ebx | |
804 | + movl _outuPtr(%esp), %ecx | |
805 | + movl _outvPtr(%esp), %edx | |
806 | rgbtoycb_mmx_loop: | |
807 | movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 | |
808 | pxor %mm6, %mm6 #0 -> mm6 | |
809 | @@ -184,29 +186,29 @@ rgbtoycb_mmx_loop: | |
810 | punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 | |
811 | movq %mm0, %mm2 #R1B0G0R0 -> mm2 | |
812 | ||
813 | - pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0 | |
814 | + pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0 | |
815 | movq %mm1, %mm3 #B1G1R1B0 -> mm3 | |
816 | ||
817 | - pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1 | |
818 | + pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1 | |
819 | movq %mm2, %mm4 #R1B0G0R0 -> mm4 | |
820 | ||
821 | - pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2 | |
822 | + pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2 | |
823 | movq %mm3, %mm5 #B1G1R1B0 -> mm5 | |
824 | ||
825 | - pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3 | |
826 | + pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3 | |
827 | punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 | |
828 | ||
829 | - pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4 | |
830 | + pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4 | |
831 | paddd %mm1, %mm0 #Y1Y0 -> mm0 | |
832 | ||
833 | - pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5 | |
834 | + pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5 | |
835 | ||
836 | movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 | |
837 | paddd %mm3, %mm2 #U1U0 -> mm2 | |
838 | ||
839 | movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 | |
840 | ||
841 | - punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1 | |
842 | + punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1 | |
843 | paddd %mm5, %mm4 #V1V0 -> mm4 | |
844 | ||
845 | movq %mm1, %mm5 #B3G3R3B2 -> mm5 | |
846 | @@ -214,29 +216,29 @@ rgbtoycb_mmx_loop: | |
847 | ||
848 | paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 | |
849 | ||
850 | - punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6 | |
851 | + punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6 | |
852 | movq %mm1, %mm3 #R3B2G2R2 -> mm3 | |
853 | ||
854 | - pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1 | |
855 | + pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1 | |
856 | movq %mm5, %mm7 #B3G3R3B2 -> mm7 | |
857 | ||
858 | - pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5 | |
859 | + pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5 | |
860 | psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 | |
861 | ||
862 | - movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0 | |
863 | + movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0 | |
864 | movq %mm3, %mm6 #R3B2G2R2 -> mm6 | |
865 | - pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6 | |
866 | + pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6 | |
867 | psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 | |
868 | ||
869 | paddd %mm5, %mm1 #Y3Y2 -> mm1 | |
870 | movq %mm7, %mm5 #B3G3R3B2 -> mm5 | |
871 | - pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2 | |
872 | + pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2 | |
873 | psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 | |
874 | ||
875 | - pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2 | |
876 | + pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2 | |
877 | packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 | |
878 | ||
879 | - pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5 | |
880 | + pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5 | |
881 | psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 | |
882 | ||
883 | movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 | |
884 | @@ -251,58 +253,58 @@ rgbtoycb_mmx_loop: | |
885 | movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 | |
886 | psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 | |
887 | ||
888 | - paddw OFFSETY, %mm0 | |
889 | + paddw MUNG(OFFSETY), %mm0 | |
890 | movq %mm0, (%ebx) #store Y3Y2Y1Y0 | |
891 | packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 | |
892 | ||
893 | - movq TEMP0, %mm0 #R5B4G4R4 -> mm0 | |
894 | + movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0 | |
895 | addl $8, %ebx | |
896 | - | |
897 | - punpcklbw ZEROS, %mm7 #B5G500 -> mm7 | |
898 | + | |
899 | + punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7 | |
900 | movq %mm0, %mm6 #R5B4G4R4 -> mm6 | |
901 | ||
902 | - movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU | |
903 | + movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU | |
904 | psrlq $32, %mm0 #00R5B4 -> mm0 | |
905 | ||
906 | paddw %mm0, %mm7 #B5G5R5B4 -> mm7 | |
907 | movq %mm6, %mm2 #B5B4G4R4 -> mm2 | |
908 | ||
909 | - pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2 | |
910 | + pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2 | |
911 | movq %mm7, %mm0 #B5G5R5B4 -> mm0 | |
912 | ||
913 | - pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7 | |
914 | + pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7 | |
915 | packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 | |
916 | ||
917 | addl $24, %eax #increment RGB count | |
918 | ||
919 | - movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4 | |
920 | + movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4 | |
921 | movq %mm6, %mm4 #B5B4G4R4 -> mm4 | |
922 | ||
923 | - pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4 | |
924 | + pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4 | |
925 | movq %mm0, %mm3 #B5G5R5B4 -> mm0 | |
926 | ||
927 | - pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4 | |
928 | + pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4 | |
929 | paddd %mm7, %mm2 #Y5Y4 -> mm2 | |
930 | ||
931 | - pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4 | |
932 | + pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4 | |
933 | pxor %mm7, %mm7 #0 -> mm7 | |
934 | ||
935 | - pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3 | |
936 | + pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3 | |
937 | punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 | |
938 | ||
939 | paddd %mm6, %mm0 #U5U4 -> mm0 | |
940 | movq %mm1, %mm6 #B7G7R7B6 -> mm6 | |
941 | ||
942 | - pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6 | |
943 | + pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6 | |
944 | punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 | |
945 | ||
946 | movq %mm5, %mm7 #R7B6G6R6 -> mm7 | |
947 | paddd %mm4, %mm3 #V5V4 -> mm3 | |
948 | ||
949 | - pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5 | |
950 | + pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5 | |
951 | movq %mm1, %mm4 #B7G7R7B6 -> mm4 | |
952 | ||
953 | - pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4 | |
954 | + pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4 | |
955 | psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 | |
956 | ||
957 | psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 | |
958 | @@ -310,25 +312,25 @@ rgbtoycb_mmx_loop: | |
959 | paddd %mm5, %mm6 #Y7Y6 -> mm6 | |
960 | movq %mm7, %mm5 #R7B6G6R6 -> mm5 | |
961 | ||
962 | - pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7 | |
963 | + pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7 | |
964 | psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 | |
965 | ||
966 | - pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1 | |
967 | + pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1 | |
968 | psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 | |
969 | ||
970 | packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 | |
971 | ||
972 | - pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5 | |
973 | + pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5 | |
974 | paddd %mm4, %mm7 #U7U6 -> mm7 | |
975 | ||
976 | psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 | |
977 | - paddw OFFSETY, %mm2 | |
978 | + paddw MUNG(OFFSETY), %mm2 | |
979 | movq %mm2, (%ebx) #store Y7Y6Y5Y4 | |
980 | ||
981 | - movq ALLONE, %mm6 | |
982 | + movq MUNG(ALLONE), %mm6 | |
983 | packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 | |
984 | ||
985 | - movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4 | |
986 | + movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4 | |
987 | pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 | |
988 | ||
989 | pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 | |
990 | @@ -338,8 +340,8 @@ rgbtoycb_mmx_loop: | |
991 | ||
992 | psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 | |
993 | psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 | |
994 | - | |
995 | - movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5 | |
996 | + | |
997 | + movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5 | |
998 | ||
999 | movq %mm4, (%ecx) # store U | |
1000 | ||
1001 | @@ -422,14 +426,15 @@ _dv_ppm_copy_y_block_mmx: | |
1002 | _dv_pgm_copy_y_block_mmx: | |
1003 | ||
1004 | pushl %ebp | |
1005 | - movl %esp, %ebp | |
1006 | pushl %esi | |
1007 | pushl %edi | |
1008 | - | |
1009 | - movl 8(%ebp), %edi # dest | |
1010 | - movl 12(%ebp), %esi # src | |
1011 | ||
1012 | - movq OFFSETY, %mm7 | |
1013 | + LOAD_PIC_REG(bp) | |
1014 | + | |
1015 | + movl 16(%esp), %edi # dest | |
1016 | + movl 20(%esp), %esi # src | |
1017 | + | |
1018 | + movq MUNG(OFFSETY), %mm7 | |
1019 | pxor %mm6, %mm6 | |
1020 | ||
1021 | movq (%esi), %mm0 | |
1022 | @@ -564,14 +571,15 @@ _dv_pgm_copy_y_block_mmx: | |
1023 | _dv_video_copy_y_block_mmx: | |
1024 | ||
1025 | pushl %ebp | |
1026 | - movl %esp, %ebp | |
1027 | pushl %esi | |
1028 | pushl %edi | |
1029 | - | |
1030 | - movl 8(%ebp), %edi # dest | |
1031 | - movl 12(%ebp), %esi # src | |
1032 | ||
1033 | - movq OFFSETBX, %mm7 | |
1034 | + LOAD_PIC_REG(bp) | |
1035 | + | |
1036 | + movl 16(%esp), %edi # dest | |
1037 | + movl 20(%esp), %esi # src | |
1038 | + | |
1039 | + movq MUNG(OFFSETBX), %mm7 | |
1040 | pxor %mm6, %mm6 | |
1041 | ||
1042 | movq (%esi), %mm0 | |
1043 | @@ -852,16 +864,16 @@ _dv_ppm_copy_pal_c_block_mmx: | |
1044 | _dv_pgm_copy_pal_c_block_mmx: | |
1045 | ||
1046 | pushl %ebp | |
1047 | - movl %esp, %ebp | |
1048 | pushl %esi | |
1049 | pushl %edi | |
1050 | pushl %ebx | |
1051 | - | |
1052 | - movl 8(%ebp), %edi # dest | |
1053 | - movl 12(%ebp), %esi # src | |
1054 | ||
1055 | + LOAD_PIC_REG(bp) | |
1056 | + | |
1057 | + movl 20(%esp), %edi # dest | |
1058 | + movl 24(%esp), %esi # src | |
1059 | ||
1060 | - movq OFFSETBX, %mm7 | |
1061 | + movq MUNG(OFFSETBX), %mm7 | |
1062 | pxor %mm6, %mm6 | |
1063 | ||
1064 | ||
1065 | @@ -1000,15 +1014,16 @@ _dv_pgm_copy_pal_c_block_mmx: | |
1066 | _dv_video_copy_pal_c_block_mmx: | |
1067 | ||
1068 | pushl %ebp | |
1069 | - movl %esp, %ebp | |
1070 | pushl %esi | |
1071 | pushl %edi | |
1072 | pushl %ebx | |
1073 | - | |
1074 | - movl 8(%ebp), %edi # dest | |
1075 | - movl 12(%ebp), %esi # src | |
1076 | ||
1077 | - movq OFFSETBX, %mm7 | |
1078 | + LOAD_PIC_REG(bp) | |
1079 | + | |
1080 | + movl 20(%esp), %edi # dest | |
1081 | + movl 24(%esp), %esi # src | |
1082 | + | |
1083 | + movq MUNG(OFFSETBX), %mm7 | |
1084 | paddw %mm7, %mm7 | |
1085 | pxor %mm6, %mm6 | |
1086 | ||
1087 | @@ -1095,18 +1112,18 @@ video_copy_pal_c_block_mmx_loop: | |
1088 | _dv_ppm_copy_ntsc_c_block_mmx: | |
1089 | ||
1090 | pushl %ebp | |
1091 | - movl %esp, %ebp | |
1092 | pushl %esi | |
1093 | pushl %edi | |
1094 | pushl %ebx | |
1095 | - | |
1096 | - movl 8(%ebp), %edi # dest | |
1097 | - movl 12(%ebp), %esi # src | |
1098 | + | |
1099 | + LOAD_PIC_REG(bp) | |
1100 | + | |
1101 | + movl 20(%esp), %edi # dest | |
1102 | + movl 24(%esp), %esi # src | |
1103 | ||
1104 | movl $4, %ebx | |
1105 | ||
1106 | - movq ALLONE, %mm6 | |
1107 | - | |
1108 | + movq MUNG(ALLONE), %mm6 | |
1109 | ppm_copy_ntsc_c_block_mmx_loop: | |
1110 | ||
1111 | movq (%esi), %mm0 | |
1112 | @@ -1168,14 +1187,15 @@ ppm_copy_ntsc_c_block_mmx_loop: | |
1113 | _dv_pgm_copy_ntsc_c_block_mmx: | |
1114 | ||
1115 | pushl %ebp | |
1116 | - movl %esp, %ebp | |
1117 | pushl %esi | |
1118 | pushl %edi | |
1119 | - | |
1120 | - movl 8(%ebp), %edi # dest | |
1121 | - movl 12(%ebp), %esi # src | |
1122 | ||
1123 | - movq OFFSETBX, %mm7 | |
1124 | + LOAD_PIC_REG(bp) | |
1125 | + | |
1126 | + movl 16(%esp), %edi # dest | |
1127 | + movl 20(%esp), %esi # src | |
1128 | + | |
1129 | + movq MUNG(OFFSETBX), %mm7 | |
1130 | paddw %mm7, %mm7 | |
1131 | pxor %mm6, %mm6 | |
1132 | ||
1133 | @@ -1325,15 +1347,16 @@ _dv_pgm_copy_ntsc_c_block_mmx: | |
1134 | _dv_video_copy_ntsc_c_block_mmx: | |
1135 | ||
1136 | pushl %ebp | |
1137 | - movl %esp, %ebp | |
1138 | pushl %esi | |
1139 | pushl %edi | |
1140 | pushl %ebx | |
1141 | - | |
1142 | - movl 8(%ebp), %edi # dest | |
1143 | - movl 12(%ebp), %esi # src | |
1144 | ||
1145 | - movq OFFSETBX, %mm7 | |
1146 | + LOAD_PIC_REG(bp) | |
1147 | + | |
1148 | + movl 20(%esp), %edi # dest | |
1149 | + movl 24(%esp), %esi # src | |
1150 | + | |
1151 | + movq MUNG(OFFSETBX), %mm7 | |
1152 | paddw %mm7, %mm7 | |
1153 | pxor %mm6, %mm6 | |
1154 | ||
1155 | --- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S | |
1156 | +++ libdv-0.104/libdv/rgbtoyuv_x86_64.S | |
1157 | @@ -41,9 +41,6 @@ | |
1158 | #define DV_WIDTH_SHORT_HALF 720 | |
1159 | #define DV_WIDTH_BYTE_HALF 360 | |
1160 | ||
1161 | -.global _dv_rgbtoycb_mmx_x86_64 | |
1162 | -# .global yuvtoycb_mmx_x86_64 | |
1163 | - | |
1164 | .data | |
1165 | ||
1166 | .align 8 | |
1167 | --- libdv-0.104-old/libdv/vlc_x86.S | |
1168 | +++ libdv-0.104/libdv/vlc_x86.S | |
1169 | @@ -1,31 +1,39 @@ | |
1170 | #include "asmoff.h" | |
1171 | .section .note.GNU-stack, "", @progbits | |
1172 | + #include "asm_common.S" | |
1173 | ||
1174 | .text | |
1175 | .align 4 | |
1176 | .globl dv_decode_vlc | |
1177 | +.globl asm_dv_decode_vlc | |
1178 | +.hidden asm_dv_decode_vlc | |
1179 | +asm_dv_decode_vlc = dv_decode_vlc | |
1180 | + | |
1181 | .type dv_decode_vlc,@function | |
1182 | dv_decode_vlc: | |
1183 | pushl %ebx | |
1184 | + pushl %ebp | |
1185 | + | |
1186 | + LOAD_PIC_REG(bp) | |
1187 | ||
1188 | - /* Args are at 8(%esp). */ | |
1189 | - movl 8(%esp),%eax /* %eax is bits */ | |
1190 | - movl 12(%esp),%ebx /* %ebx is maxbits */ | |
1191 | + /* Args are at 12(%esp). */ | |
1192 | + movl 12(%esp),%eax /* %eax is bits */ | |
1193 | + movl 16(%esp),%ebx /* %ebx is maxbits */ | |
1194 | andl $0x3f,%ebx /* limit index range STL*/ | |
1195 | ||
1196 | - movl dv_vlc_class_index_mask(,%ebx,4),%edx | |
1197 | + movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx | |
1198 | andl %eax,%edx | |
1199 | - movl dv_vlc_class_index_rshift(,%ebx,4),%ecx | |
1200 | + movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx | |
1201 | sarl %cl,%edx | |
1202 | - movl dv_vlc_classes(,%ebx,4),%ecx | |
1203 | + movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx | |
1204 | movsbl (%ecx,%edx,1),%edx /* %edx is class */ | |
1205 | ||
1206 | - movl dv_vlc_index_mask(,%edx,4),%ebx | |
1207 | - movl dv_vlc_index_rshift(,%edx,4),%ecx | |
1208 | + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx | |
1209 | + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx | |
1210 | andl %eax,%ebx | |
1211 | sarl %cl,%ebx | |
1212 | ||
1213 | - movl dv_vlc_lookups(,%edx,4),%edx | |
1214 | + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx | |
1215 | movl (%edx,%ebx,4),%edx | |
1216 | ||
1217 | /* Now %edx holds result, like this: | |
1218 | @@ -42,7 +51,7 @@ dv_decode_vlc: | |
1219 | movl %edx,%ecx | |
1220 | sarl $8,%ecx | |
1221 | andl $0xff,%ecx | |
1222 | - movl sign_mask(,%ecx,4),%ebx | |
1223 | + movl MUNG_ARR(sign_mask,%ecx,4),%ebx | |
1224 | andl %ebx,%eax | |
1225 | negl %eax | |
1226 | sarl $31,%eax | |
1227 | @@ -63,14 +72,14 @@ dv_decode_vlc: | |
1228 | *result = broken; | |
1229 | Note that the 'broken' pattern is all ones (i.e. 0xffffffff) | |
1230 | */ | |
1231 | - movl 12(%esp),%ebx /* %ebx is maxbits */ | |
1232 | + movl 16(%esp),%ebx /* %ebx is maxbits */ | |
1233 | subl %ecx,%ebx | |
1234 | sbbl %ebx,%ebx | |
1235 | orl %ebx,%edx | |
1236 | ||
1237 | - movl 16(%esp),%eax | |
1238 | + movl 20(%esp),%eax | |
1239 | movl %edx,(%eax) | |
1240 | - | |
1241 | + popl %ebp | |
1242 | popl %ebx | |
1243 | ret | |
1244 | ||
1245 | @@ -80,21 +89,28 @@ dv_decode_vlc: | |
1246 | .type __dv_decode_vlc,@function | |
1247 | __dv_decode_vlc: | |
1248 | pushl %ebx | |
1249 | + pushl %ebp | |
1250 | + | |
1251 | + LOAD_PIC_REG(bp) | |
1252 | ||
1253 | - /* Args are at 8(%esp). */ | |
1254 | - movl 8(%esp),%eax /* %eax is bits */ | |
1255 | + /* Args are at 12(%esp). */ | |
1256 | + movl 12(%esp),%eax /* %eax is bits */ | |
1257 | ||
1258 | movl %eax,%edx /* %edx is class */ | |
1259 | andl $0xfe00,%edx | |
1260 | sarl $9,%edx | |
1261 | +#ifdef __PIC__ | |
1262 | + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx | |
1263 | +#else | |
1264 | movsbl dv_vlc_class_lookup5(%edx),%edx | |
1265 | - | |
1266 | - movl dv_vlc_index_mask(,%edx,4),%ebx | |
1267 | - movl dv_vlc_index_rshift(,%edx,4),%ecx | |
1268 | +#endif | |
1269 | + | |
1270 | + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx | |
1271 | + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx | |
1272 | andl %eax,%ebx | |
1273 | sarl %cl,%ebx | |
1274 | ||
1275 | - movl dv_vlc_lookups(,%edx,4),%edx | |
1276 | + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx | |
1277 | movl (%edx,%ebx,4),%edx | |
1278 | ||
1279 | /* Now %edx holds result, like this: | |
1280 | @@ -112,7 +128,7 @@ __dv_decode_vlc: | |
1281 | movl %edx,%ecx | |
1282 | sarl $8,%ecx | |
1283 | andl $0xff,%ecx | |
1284 | - movl sign_mask(,%ecx,4),%ecx | |
1285 | + movl MUNG_ARR(sign_mask,%ecx,4),%ecx | |
1286 | andl %ecx,%eax | |
1287 | negl %eax | |
1288 | sarl $31,%eax | |
1289 | @@ -127,9 +143,9 @@ __dv_decode_vlc: | |
1290 | xorl %eax,%edx | |
1291 | subl %eax,%edx | |
1292 | ||
1293 | - movl 12(%esp),%eax | |
1294 | + movl 16(%esp),%eax | |
1295 | movl %edx,(%eax) | |
1296 | - | |
1297 | + popl %ebp | |
1298 | popl %ebx | |
1299 | ret | |
1300 | ||
1301 | @@ -140,14 +156,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_ | |
1302 | */ | |
1303 | .text | |
1304 | .align 4 | |
1305 | +.globl asm_dv_parse_ac_coeffs_pass0 | |
1306 | +.hidden asm_dv_parse_ac_coeffs_pass0 | |
1307 | + asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0 | |
1308 | + | |
1309 | .globl dv_parse_ac_coeffs_pass0 | |
1310 | .type dv_parse_ac_coeffs_pass0,@function | |
1311 | dv_parse_ac_coeffs_pass0: | |
1312 | pushl %ebx | |
1313 | pushl %edi | |
1314 | pushl %esi | |
1315 | pushl %ebp | |
1316 | ||
1317 | + LOAD_PIC_REG(si) | |
1318 | + | |
1319 | #define ARGn(N) (20+(4*(N)))(%esp) | |
1320 | ||
1321 | /* | |
1322 | @@ -159,8 +182,10 @@ dv_parse_ac_coeffs_pass0: | |
1323 | ebp bl | |
1324 | */ | |
1325 | movl ARGn(2),%ebp | |
1326 | +#ifndef __PIC__ | |
1327 | movl ARGn(0),%esi | |
1328 | movl bitstream_t_buf(%esi),%esi | |
1329 | +#endif | |
1330 | movl dv_block_t_offset(%ebp),%edi | |
1331 | movl dv_block_t_reorder(%ebp),%ebx | |
1332 | ||
1333 | @@ -170,7 +195,11 @@ dv_parse_ac_coeffs_pass0: | |
1334 | ||
1335 | movq dv_block_t_coeffs(%ebp),%mm1 | |
1336 | pxor %mm0,%mm0 | |
1337 | +#ifdef __PIC__ | |
1338 | + pand const_f_0_0_0@GOTOFF(%esi),%mm1 | |
1339 | +#else | |
1340 | pand const_f_0_0_0,%mm1 | |
1341 | +#endif | |
1342 | movq %mm1,dv_block_t_coeffs(%ebp) | |
1343 | movq %mm0,(dv_block_t_coeffs + 8)(%ebp) | |
1344 | movq %mm0,(dv_block_t_coeffs + 16)(%ebp) | |
1345 | @@ -191,9 +220,17 @@ dv_parse_ac_coeffs_pass0: | |
1346 | readloop: | |
1347 | movl %edi,%ecx | |
1348 | shrl $3,%ecx | |
1349 | +#ifdef __PIC__ | |
1350 | + movl ARGn(0),%eax | |
1351 | + addl bitstream_t_buf(%eax),%ecx | |
1352 | + movzbl (%ecx),%eax | |
1353 | + movzbl 1(%ecx),%edx | |
1354 | + movzbl 2(%ecx),%ecx | |
1355 | +#else | |
1356 | movzbl (%esi,%ecx,1),%eax | |
1357 | movzbl 1(%esi,%ecx,1),%edx | |
1358 | movzbl 2(%esi,%ecx,1),%ecx | |
1359 | +#endif | |
1360 | shll $16,%eax | |
1361 | shll $8,%edx | |
1362 | orl %ecx,%eax | |
1363 | @@ -217,7 +254,11 @@ readloop: | |
1364 | ||
1365 | /* Attempt to use the shortcut first. If it hits, then | |
1366 | this vlc term has been decoded. */ | |
1367 | +#ifdef __PIC__ | |
1368 | + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx | |
1369 | +#else | |
1370 | movl dv_vlc_class1_shortcut(,%ecx,4),%edx | |
1371 | +#endif | |
1372 | test $0x80,%edx | |
1373 | je done_decode | |
1374 | ||
1375 | @@ -228,12 +269,19 @@ readloop: | |
1376 | movl %ebx,dv_block_t_reorder(%ebp) | |
1377 | ||
1378 | /* %eax is bits */ | |
1379 | - | |
1380 | +#ifdef __PIC__ | |
1381 | + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx | |
1382 | + | |
1383 | + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx | |
1384 | + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx | |
1385 | + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx | |
1386 | +#else | |
1387 | movsbl dv_vlc_class_lookup5(%ecx),%ecx | |
1388 | ||
1389 | movl dv_vlc_index_mask(,%ecx,4),%ebx | |
1390 | movl dv_vlc_lookups(,%ecx,4),%edx | |
1391 | movl dv_vlc_index_rshift(,%ecx,4),%ecx | |
1392 | +#endif | |
1393 | andl %eax,%ebx | |
1394 | sarl %cl,%ebx | |
1395 | ||
1396 | @@ -256,7 +304,11 @@ readloop: | |
1397 | movl %edx,%ecx | |
1398 | sarl $8,%ecx | |
1399 | andl $0xff,%ecx | |
1400 | +#ifdef __PIC__ | |
1401 | + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx | |
1402 | +#else | |
1403 | movl sign_mask(,%ecx,4),%ecx | |
1404 | +#endif | |
1405 | andl %ecx,%eax | |
1406 | negl %eax | |
1407 | sarl $31,%eax | |
1408 | @@ -326,10 +378,16 @@ alldone: | |
1409 | ||
1410 | slowpath: | |
1411 | /* slow path: use dv_decode_vlc */; | |
1412 | +#ifdef __PIC__ | |
1413 | + pushl %esi | |
1414 | + leal vlc@GOTOFF(%esi),%esi | |
1415 | + xchgl %esi,(%esp) /* last parameter is &vlc */ | |
1416 | +#else | |
1417 | pushl $vlc /* last parameter is &vlc */ | |
1418 | +#endif | |
1419 | pushl %edx /* bits_left */ | |
1420 | pushl %eax /* bits */ | |
1421 | - call dv_decode_vlc | |
1422 | + call asm_dv_decode_vlc | |
1423 | addl $12,%esp | |
1424 | test $0x80,%edx /* If (vlc.run < 0) break */ | |
1425 | jne escape | |
1426 | @@ -359,6 +417,8 @@ show16: | |
1427 | pushl %esi | |
1428 | pushl %ebp | |
1429 | ||
1430 | + LOAD_PIC_REG(si) | |
1431 | + | |
1432 | #define ARGn(N) (20+(4*(N)))(%esp) | |
1433 | ||
1434 | movl ARGn(1),%eax /* quality */ | |
1435 | @@ -373,7 +434,11 @@ dv_parse_video_segment: | |
1436 | jz its_mono | |
1437 | movl $6,%ebx | |
1438 | its_mono: | |
1439 | +#ifdef __PIC__ | |
1440 | + movl %ebx,n_blocks@GOTOFF(%esi) | |
1441 | +#else | |
1442 | movl %ebx,n_blocks | |
1443 | +#endif | |
1444 | ||
1445 | /* | |
1446 | * ebx seg/b | |
1447 | @@ -384,15 +449,22 @@ its_mono: | |
1448 | * ebp bl | |
1449 | */ | |
1450 | movl ARGn(0),%ebx | |
1451 | +#ifndef __PIC__ | |
1452 | movl dv_videosegment_t_bs(%ebx),%esi | |
1453 | movl bitstream_t_buf(%esi),%esi | |
1454 | +#endif | |
1455 | leal dv_videosegment_t_mb(%ebx),%edi | |
1456 | ||
1457 | movl $0,%eax | |
1458 | movl $0,%ecx | |
1459 | macloop: | |
1460 | +#ifdef __PIC__ | |
1461 | + movl %eax,m@GOTOFF(%esi) | |
1462 | + movl %ecx,mb_start@GOTOFF(%esi) | |
1463 | +#else | |
1464 | movl %eax,m | |
1465 | movl %ecx,mb_start | |
1466 | +#endif | |
1467 | ||
1468 | movl ARGn(0),%ebx | |
1469 | ||
1470 | @@ -400,7 +472,13 @@ macloop: | |
1471 | /* mb->qno = bitstream_get(bs,4); */ | |
1472 | movl %ecx,%edx | |
1473 | shr $3,%edx | |
1474 | +#ifdef __PIC__ | |
1475 | + movl dv_videosegment_t_bs(%ebx),%ecx | |
1476 | + movl bitstream_t_buf(%ecx),%ecx | |
1477 | + movzbl 3(%ecx,%edx,1),%edx | |
1478 | +#else | |
1479 | movzbl 3(%esi,%edx,1),%edx | |
1480 | +#endif | |
1481 | andl $0xf,%edx | |
1482 | movl %edx,dv_macroblock_t_qno(%edi) | |
1483 | ||
1484 | @@ -411,7 +489,11 @@ macloop: | |
1485 | movl %edx,dv_macroblock_t_eob_count(%edi) | |
1486 | ||
1487 | /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */ | |
1488 | +#ifdef __PIC__ | |
1489 | + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx | |
1490 | +#else | |
1491 | movl dv_super_map_vertical(,%eax,4),%edx | |
1492 | +#endif | |
1493 | movl dv_videosegment_t_i(%ebx),%ecx | |
1494 | addl %ecx,%edx | |
1495 | ||
1496 | @@ -422,11 +504,20 @@ skarly: | |
1497 | andl $1,%ecx | |
1498 | shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */ | |
1499 | ||
1500 | +#ifdef __PIC__ | |
1501 | + leal mod_10@GOTOFF(%esi),%edx | |
1502 | + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */ | |
1503 | +#else | |
1504 | movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */ | |
1505 | +#endif | |
1506 | movl %edx,dv_macroblock_t_i(%edi) | |
1507 | ||
1508 | /* mb->j = dv_super_map_horizontal[m]; */ | |
1509 | +#ifdef __PIC__ | |
1510 | + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx | |
1511 | +#else | |
1512 | movl dv_super_map_horizontal(,%eax,4),%edx | |
1513 | +#endif | |
1514 | movl %edx,dv_macroblock_t_j(%edi) | |
1515 | ||
1516 | /* mb->k = seg->k; */ | |
1517 | @@ -445,12 +536,28 @@ blkloop: | |
1518 | +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | |
1519 | */ | |
1520 | /* dc = bitstream_get(bs,9); */ | |
1521 | +#ifdef __PIC__ | |
1522 | + movl mb_start@GOTOFF(%esi),%ecx | |
1523 | +#else | |
1524 | movl mb_start,%ecx | |
1525 | +#endif | |
1526 | shr $3,%ecx | |
1527 | +#ifdef __PIC__ | |
1528 | + movzbl blk_start@GOTOFF(%esi,%ebx),%edx | |
1529 | +#else | |
1530 | movzbl blk_start(%ebx),%edx | |
1531 | +#endif | |
1532 | addl %ecx,%edx | |
1533 | +#ifdef __PIC__ | |
1534 | + movl ARGn(0),%ecx | |
1535 | + movl dv_videosegment_t_bs(%ecx),%ecx | |
1536 | + movl bitstream_t_buf(%ecx),%ecx | |
1537 | + movzbl (%ecx,%edx,1),%eax /* hi byte */ | |
1538 | + movzbl 1(%ecx,%edx,1),%ecx /* lo byte */ | |
1539 | +#else | |
1540 | movzbl (%esi,%edx,1),%eax /* hi byte */ | |
1541 | movzbl 1(%esi,%edx,1),%ecx /* lo byte */ | |
1542 | +#endif | |
1543 | shll $8,%eax | |
1544 | orl %ecx,%eax | |
1545 | ||
1546 | @@ -477,7 +584,11 @@ blkloop: | |
1547 | ||
1548 | /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */ | |
1549 | shll $6,%eax | |
1550 | +#ifdef __PIC__ | |
1551 | + leal dv_reorder@GOTOFF+1(%esi,%eax),%eax | |
1552 | +#else | |
1553 | addl $(dv_reorder+1),%eax | |
1554 | +#endif | |
1555 | movl %eax,dv_block_t_reorder(%ebp) | |
1556 | ||
1557 | /* bl->reorder_sentinel = bl->reorder + 63; */ | |
1558 | @@ -485,13 +596,22 @@ blkloop: | |
1559 | movl %eax,dv_block_t_reorder_sentinel(%ebp) | |
1560 | ||
1561 | /* bl->offset= mb_start + dv_parse_bit_start[b]; */ | |
1562 | +#ifdef __PIC__ | |
1563 | + movl mb_start@GOTOFF(%esi),%ecx | |
1564 | + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax | |
1565 | +#else | |
1566 | movl mb_start,%ecx | |
1567 | movl dv_parse_bit_start(,%ebx,4),%eax | |
1568 | +#endif | |
1569 | addl %ecx,%eax | |
1570 | movl %eax,dv_block_t_offset(%ebp) | |
1571 | ||
1572 | /* bl->end= mb_start + dv_parse_bit_end[b]; */ | |
1573 | +#ifdef __PIC__ | |
1574 | + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax | |
1575 | +#else | |
1576 | movl dv_parse_bit_end(,%ebx,4),%eax | |
1577 | +#endif | |
1578 | addl %ecx,%eax | |
1579 | movl %eax,dv_block_t_end(%ebp) | |
1580 | ||
1581 | @@ -503,7 +623,11 @@ blkloop: | |
1582 | /* no AC pass. Just zero out the remaining coeffs */ | |
1583 | movq dv_block_t_coeffs(%ebp),%mm1 | |
1584 | pxor %mm0,%mm0 | |
1585 | +#ifdef __PIC__ | |
1586 | + pand const_f_0_0_0@GOTOFF(%esi),%mm1 | |
1587 | +#else | |
1588 | pand const_f_0_0_0,%mm1 | |
1589 | +#endif | |
1590 | movq %mm1,dv_block_t_coeffs(%ebp) | |
1591 | movq %mm0,(dv_block_t_coeffs + 8)(%ebp) | |
1592 | movq %mm0,(dv_block_t_coeffs + 16)(%ebp) | |
1593 | @@ -528,18 +652,27 @@ do_ac_pass: | |
1594 | pushl %ebp | |
1595 | pushl %edi | |
1596 | pushl %eax | |
1597 | - call dv_parse_ac_coeffs_pass0 | |
1598 | + call asm_dv_parse_ac_coeffs_pass0 | |
1599 | addl $12,%esp | |
1600 | done_ac: | |
1601 | ||
1602 | +#ifdef __PIC__ | |
1603 | + movl n_blocks@GOTOFF(%esi),%eax | |
1604 | +#else | |
1605 | movl n_blocks,%eax | |
1606 | +#endif | |
1607 | addl $dv_block_t_size,%ebp | |
1608 | incl %ebx | |
1609 | cmpl %eax,%ebx | |
1610 | jnz blkloop | |
1611 | ||
1612 | +#ifdef __PIC__ | |
1613 | + movl m@GOTOFF(%esi),%eax | |
1614 | + movl mb_start@GOTOFF(%esi),%ecx | |
1615 | +#else | |
1616 | movl m,%eax | |
1617 | movl mb_start,%ecx | |
1618 | +#endif | |
1619 | addl $(8 * 80),%ecx | |
1620 | addl $dv_macroblock_t_size,%edi | |
1621 | incl %eax | |
1622 | @@ -557,7 +690,7 @@ done_ac: | |
1623 | ||
1624 | andl $DV_QUALITY_AC_MASK,%eax | |
1625 | cmpl $DV_QUALITY_AC_2,%eax | |
1626 | - jz dv_parse_ac_coeffs | |
1627 | + jz asm_dv_parse_ac_coeffs | |
1628 | movl $0,%eax | |
1629 | ret | |
1630 |