]>
Commit | Line | Data |
---|---|---|
5e993f12 | 1 | diff -Nurp libdv-0.104-old/libdv/asm_common.S libdv-0.104/libdv/asm_common.S |
2 | --- libdv-0.104-old/libdv/asm_common.S 1970-01-01 01:00:00.000000000 +0100 | |
3 | +++ libdv-0.104/libdv/asm_common.S 2006-01-01 22:44:43.000000000 +0100 | |
4 | @@ -0,0 +1,37 @@ | |
5 | +/* public domain, do what you want */ | |
6 | + | |
7 | +#ifdef __PIC__ | |
8 | +# define MUNG(sym) sym##@GOTOFF(%ebp) | |
9 | +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args) | |
10 | +#else | |
11 | +# define MUNG(sym) sym | |
12 | +# define MUNG_ARR(sym, args...) sym(,##args) | |
13 | +#endif | |
14 | + | |
15 | +#ifdef __PIC__ | |
16 | +# undef __i686 /* gcc define gets in our way */ | |
17 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits | |
18 | +.globl __i686.get_pc_thunk.bp | |
19 | + .hidden __i686.get_pc_thunk.bp | |
20 | + .type __i686.get_pc_thunk.bp,@function | |
21 | +__i686.get_pc_thunk.bp: | |
22 | + movl (%esp), %ebp | |
23 | + ret | |
24 | +# define LOAD_PIC_REG_BP() \ | |
25 | + call __i686.get_pc_thunk.bp ; \ | |
26 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
27 | + | |
28 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits | |
29 | +.globl __i686.get_pc_thunk.si | |
30 | + .hidden __i686.get_pc_thunk.si | |
31 | + .type __i686.get_pc_thunk.si,@function | |
32 | +__i686.get_pc_thunk.si: | |
33 | + movl (%esp), %esi | |
34 | + ret | |
35 | +# define LOAD_PIC_REG_SI() \ | |
36 | + call __i686.get_pc_thunk.si ; \ | |
37 | + addl $_GLOBAL_OFFSET_TABLE_, %esi | |
38 | +#else | |
39 | +# define LOAD_PIC_REG_BP() | |
40 | +# define LOAD_PIC_REG_SI() | |
41 | +#endif | |
42 | diff -Nurp libdv-0.104-old/libdv/dct_block_mmx.S libdv-0.104/libdv/dct_block_mmx.S | |
43 | --- libdv-0.104-old/libdv/dct_block_mmx.S 2006-01-01 22:44:22.000000000 +0100 | |
44 | +++ libdv-0.104/libdv/dct_block_mmx.S 2006-01-01 22:44:43.000000000 +0100 | |
45 | @@ -53,17 +53,22 @@ scratch2: .quad 0 | |
46 | scratch3: .quad 0 | |
47 | scratch4: .quad 0 | |
48 | ||
49 | +#include "asm_common.S" | |
50 | + | |
51 | .text | |
52 | ||
53 | .align 8 | |
54 | .global _dv_dct_88_block_mmx | |
55 | +.hidden _dv_dct_88_block_mmx | |
56 | +.type _dv_dct_88_block_mmx,@function | |
57 | _dv_dct_88_block_mmx: | |
58 | ||
59 | pushl %ebp | |
60 | - movl %esp, %ebp | |
61 | pushl %esi | |
62 | ||
63 | - movl 8(%ebp), %esi # source | |
64 | + LOAD_PIC_REG_BP() | |
65 | + | |
66 | + movl 12(%esp), %esi # source | |
67 | ||
68 | # column 0 | |
69 | movq 16*0(%esi), %mm0 # v0 | |
70 | @@ -86,22 +91,22 @@ _dv_dct_88_block_mmx: | |
71 | ||
72 | movq 16*3(%esi), %mm5 # v3 | |
73 | movq 16*4(%esi), %mm7 # v4 | |
74 | - movq %mm7, scratch1 # scratch1: v4 ; | |
75 | + movq %mm7, MUNG(scratch1) # scratch1: v4 ; | |
76 | movq %mm5, %mm7 # duplicate v3 | |
77 | - paddw scratch1, %mm5 # v03: v3+v4 | |
78 | - psubw scratch1, %mm7 # v04: v3-v4 | |
79 | - movq %mm5, scratch2 # scratch2: v03 | |
80 | + paddw MUNG(scratch1), %mm5 # v03: v3+v4 | |
81 | + psubw MUNG(scratch1), %mm7 # v04: v3-v4 | |
82 | + movq %mm5, MUNG(scratch2) # scratch2: v03 | |
83 | movq %mm0, %mm5 # mm5: v00 | |
84 | ||
85 | - paddw scratch2, %mm0 # v10: v00+v03 | |
86 | - psubw scratch2, %mm5 # v13: v00-v03 | |
87 | - movq %mm3, scratch3 # scratch3: v02 | |
88 | + paddw MUNG(scratch2), %mm0 # v10: v00+v03 | |
89 | + psubw MUNG(scratch2), %mm5 # v13: v00-v03 | |
90 | + movq %mm3, MUNG(scratch3) # scratch3: v02 | |
91 | movq %mm1, %mm3 # duplicate v01 | |
92 | ||
93 | - paddw scratch3, %mm1 # v11: v01+v02 | |
94 | - psubw scratch3, %mm3 # v12: v01-v02 | |
95 | + paddw MUNG(scratch3), %mm1 # v11: v01+v02 | |
96 | + psubw MUNG(scratch3), %mm3 # v12: v01-v02 | |
97 | ||
98 | - movq %mm6, scratch4 # scratch4: v05 | |
99 | + movq %mm6, MUNG(scratch4) # scratch4: v05 | |
100 | movq %mm0, %mm6 # duplicate v10 | |
101 | ||
102 | paddw %mm1, %mm0 # v10+v11 | |
103 | @@ -111,10 +116,10 @@ _dv_dct_88_block_mmx: | |
104 | movq %mm6, 16*4(%esi) # out4: v10-v11 | |
105 | ||
106 | movq %mm4, %mm0 # mm0: v06 | |
107 | - paddw scratch4, %mm4 # v15: v05+v06 | |
108 | + paddw MUNG(scratch4), %mm4 # v15: v05+v06 | |
109 | paddw %mm2, %mm0 # v16: v07+v06 | |
110 | ||
111 | - pmulhw WA3, %mm4 # v35~: WA3*v15 | |
112 | + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15 | |
113 | psllw $1, %mm4 # v35: compensate the coeefient scale | |
114 | ||
115 | movq %mm4, %mm6 # duplicate v35 | |
116 | @@ -123,7 +128,7 @@ _dv_dct_88_block_mmx: | |
117 | ||
118 | paddw %mm5, %mm3 # v22: v12+v13 | |
119 | ||
120 | - pmulhw WA1, %mm3 # v32~: WA1*v22 | |
121 | + pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22 | |
122 | psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale | |
123 | movq %mm5, %mm6 # duplicate v13 | |
124 | ||
125 | @@ -134,13 +139,13 @@ _dv_dct_88_block_mmx: | |
126 | movq %mm6, 16*6(%esi) # out6: v13-v32 | |
127 | ||
128 | ||
129 | - paddw scratch4, %mm7 # v14n: v04+v05 | |
130 | + paddw MUNG(scratch4), %mm7 # v14n: v04+v05 | |
131 | movq %mm0, %mm5 # duplicate v16 | |
132 | ||
133 | psubw %mm7, %mm0 # va1: v16-v14n | |
134 | - pmulhw WA5, %mm0 # va0~: va1*WA5 | |
135 | - pmulhw WA4, %mm5 # v36~~: v16*WA4 | |
136 | - pmulhw WA2, %mm7 # v34~~: v14n*WA2 | |
137 | + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5 | |
138 | + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4 | |
139 | + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2 | |
140 | psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale | |
141 | psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale | |
142 | ||
143 | @@ -188,22 +193,22 @@ _dv_dct_88_block_mmx: | |
144 | ||
145 | movq 16*3(%esi), %mm5 # v3 | |
146 | movq 16*4(%esi), %mm7 # v4 | |
147 | - movq %mm7, scratch1 # scratch1: v4 ; | |
148 | + movq %mm7, MUNG(scratch1) # scratch1: v4 ; | |
149 | movq %mm5, %mm7 # duplicate v3 | |
150 | - paddw scratch1, %mm5 # v03: v3+v4 | |
151 | - psubw scratch1, %mm7 # v04: v3-v4 | |
152 | - movq %mm5, scratch2 # scratch2: v03 | |
153 | + paddw MUNG(scratch1), %mm5 # v03: v3+v4 | |
154 | + psubw MUNG(scratch1), %mm7 # v04: v3-v4 | |
155 | + movq %mm5, MUNG(scratch2) # scratch2: v03 | |
156 | movq %mm0, %mm5 # mm5: v00 | |
157 | ||
158 | - paddw scratch2, %mm0 # v10: v00+v03 | |
159 | - psubw scratch2, %mm5 # v13: v00-v03 | |
160 | - movq %mm3, scratch3 # scratc3: v02 | |
161 | + paddw MUNG(scratch2), %mm0 # v10: v00+v03 | |
162 | + psubw MUNG(scratch2), %mm5 # v13: v00-v03 | |
163 | + movq %mm3, MUNG(scratch3) # scratc3: v02 | |
164 | movq %mm1, %mm3 # duplicate v01 | |
165 | ||
166 | - paddw scratch3, %mm1 # v11: v01+v02 | |
167 | - psubw scratch3, %mm3 # v12: v01-v02 | |
168 | + paddw MUNG(scratch3), %mm1 # v11: v01+v02 | |
169 | + psubw MUNG(scratch3), %mm3 # v12: v01-v02 | |
170 | ||
171 | - movq %mm6, scratch4 # scratc4: v05 | |
172 | + movq %mm6, MUNG(scratch4) # scratc4: v05 | |
173 | movq %mm0, %mm6 # duplicate v10 | |
174 | ||
175 | paddw %mm1, %mm0 # v10+v11 | |
176 | @@ -213,10 +218,10 @@ _dv_dct_88_block_mmx: | |
177 | movq %mm6, 16*4(%esi) # out4: v10-v11 | |
178 | ||
179 | movq %mm4, %mm0 # mm0: v06 | |
180 | - paddw scratch4, %mm4 # v15: v05+v06 | |
181 | + paddw MUNG(scratch4), %mm4 # v15: v05+v06 | |
182 | paddw %mm2, %mm0 # v16: v07+v06 | |
183 | ||
184 | - pmulhw WA3, %mm4 # v35~: WA3*v15 | |
185 | + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15 | |
186 | psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale | |
187 | ||
188 | movq %mm4, %mm6 # duplicate v35 | |
189 | @@ -225,7 +230,7 @@ _dv_dct_88_block_mmx: | |
190 | ||
191 | paddw %mm5, %mm3 # v22: v12+v13 | |
192 | ||
193 | - pmulhw WA1, %mm3 # v32~: WA3*v15 | |
194 | + pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15 | |
195 | psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale | |
196 | movq %mm5, %mm6 # duplicate v13 | |
197 | ||
198 | @@ -235,13 +240,13 @@ _dv_dct_88_block_mmx: | |
199 | movq %mm5, 16*2(%esi) # out2: v13+v32 | |
200 | movq %mm6, 16*6(%esi) # out6: v13-v32 | |
201 | ||
202 | - paddw scratch4, %mm7 # v14n: v04+v05 | |
203 | + paddw MUNG(scratch4), %mm7 # v14n: v04+v05 | |
204 | movq %mm0, %mm5 # duplicate v16 | |
205 | ||
206 | psubw %mm7, %mm0 # va1: v16-v14n | |
207 | - pmulhw WA2, %mm7 # v34~~: v14n*WA2 | |
208 | - pmulhw WA5, %mm0 # va0~: va1*WA5 | |
209 | - pmulhw WA4, %mm5 # v36~~: v16*WA4 | |
210 | + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2 | |
211 | + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5 | |
212 | + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4 | |
213 | psllw $16-NSHIFT, %mm7 | |
214 | psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient | |
215 | # scale note that WA4 is shifted 1 bit less than the others | |
216 | @@ -272,6 +277,8 @@ _dv_dct_88_block_mmx: | |
217 | ||
218 | .align 8 | |
219 | .global _dv_dct_block_mmx_postscale_88 | |
220 | +.hidden _dv_dct_block_mmx_postscale_88 | |
221 | +.type _dv_dct_block_mmx_postscale_88,@function | |
222 | _dv_dct_block_mmx_postscale_88: | |
223 | ||
224 | pushl %ebp | |
225 | @@ -748,14 +755,17 @@ _dv_dct_block_mmx_postscale_88: | |
226 | ||
227 | .align 8 | |
228 | .global _dv_dct_248_block_mmx | |
229 | +.hidden _dv_dct_248_block_mmx | |
230 | +.type _dv_dct_248_block_mmx,@function | |
231 | _dv_dct_248_block_mmx: | |
232 | ||
233 | pushl %ebp | |
234 | - movl %esp, %ebp | |
235 | pushl %esi | |
236 | pushl %edi | |
237 | ||
238 | - movl 8(%ebp), %esi # source | |
239 | + LOAD_PIC_REG_BP() | |
240 | + | |
241 | + movl 16(%esp), %esi # source | |
242 | ||
243 | # column 0 | |
244 | ||
245 | @@ -779,7 +789,7 @@ _dv_dct_248_block_mmx: | |
246 | paddw %mm1, %mm0 # v20: v10+v11 | |
247 | psubw %mm1, %mm3 # v21: v10-v11 | |
248 | ||
249 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
250 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
251 | movq %mm4, %mm2 | |
252 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
253 | ||
254 | @@ -818,7 +828,7 @@ _dv_dct_248_block_mmx: | |
255 | paddw %mm1, %mm0 # v20: v10+v11 | |
256 | psubw %mm1, %mm3 # v21: v10-v11 | |
257 | ||
258 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
259 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
260 | movq %mm4, %mm2 | |
261 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
262 | ||
263 | @@ -855,7 +865,7 @@ _dv_dct_248_block_mmx: | |
264 | paddw %mm1, %mm0 # v20: v10+v11 | |
265 | psubw %mm1, %mm3 # v21: v10-v11 | |
266 | ||
267 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
268 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
269 | movq %mm4, %mm2 | |
270 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
271 | ||
272 | @@ -892,7 +902,7 @@ _dv_dct_248_block_mmx: | |
273 | paddw %mm1, %mm0 # v20: v10+v11 | |
274 | psubw %mm1, %mm3 # v21: v10-v11 | |
275 | ||
276 | - pmulhw WA1, %mm5 # v32~: WA1*v22 | |
277 | + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 | |
278 | movq %mm4, %mm2 | |
279 | psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale | |
280 | ||
281 | @@ -912,6 +922,8 @@ _dv_dct_248_block_mmx: | |
282 | ||
283 | .align 8 | |
284 | .global _dv_dct_248_block_mmx_post_sum | |
285 | +.hidden _dv_dct_248_block_mmx_post_sum | |
286 | +.type _dv_dct_248_block_mmx_post_sum,@function | |
287 | _dv_dct_248_block_mmx_post_sum: | |
288 | ||
289 | pushl %ebp | |
290 | @@ -992,6 +1004,8 @@ _dv_dct_248_block_mmx_post_sum: | |
291 | ||
292 | .align 8 | |
293 | .global _dv_dct_block_mmx_postscale_248 | |
294 | +.hidden _dv_dct_block_mmx_postscale_248 | |
295 | +.type _dv_dct_block_mmx_postscale_248,@function | |
296 | _dv_dct_block_mmx_postscale_248: | |
297 | ||
298 | pushl %ebp | |
299 | diff -Nurp libdv-0.104-old/libdv/dct_block_mmx_x86_64.S libdv-0.104/libdv/dct_block_mmx_x86_64.S | |
300 | --- libdv-0.104-old/libdv/dct_block_mmx_x86_64.S 2006-01-01 22:44:22.000000000 +0100 | |
301 | +++ libdv-0.104/libdv/dct_block_mmx_x86_64.S 2006-01-01 22:44:43.000000000 +0100 | |
302 | @@ -57,6 +57,8 @@ scratch4: .quad 0 | |
303 | ||
304 | .align 8 | |
305 | .global _dv_dct_88_block_mmx_x86_64 | |
306 | +.hidden _dv_dct_88_block_mmx_x86_64 | |
307 | +.type _dv_dct_88_block_mmx_x86_64,@function | |
308 | _dv_dct_88_block_mmx_x86_64: | |
309 | ||
310 | /* void _dv_dct_88_block_mmx_x86_64(int16_t* block); */ | |
311 | @@ -269,6 +271,8 @@ _dv_dct_88_block_mmx_x86_64: | |
312 | ||
313 | .align 8 | |
314 | .global _dv_dct_block_mmx_x86_64_postscale_88 | |
315 | +.hidden _dv_dct_block_mmx_x86_64_postscale_88 | |
316 | +.type _dv_dct_block_mmx_x86_64_postscale_88,@function | |
317 | _dv_dct_block_mmx_x86_64_postscale_88: | |
318 | ||
319 | /* void _dv_dct_block_mmx_x86_64_postscale_88(int16_t* block, int16_t* postscale_matrix); */ | |
320 | diff -Nurp libdv-0.104-old/libdv/dv.c libdv-0.104/libdv/dv.c | |
321 | --- libdv-0.104-old/libdv/dv.c 2004-10-20 05:49:24.000000000 +0200 | |
322 | +++ libdv-0.104/libdv/dv.c 2006-01-01 22:44:43.000000000 +0100 | |
323 | @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp | |
324 | } /* dv_reconfigure */ | |
325 | ||
326 | ||
327 | +extern uint8_t dv_quant_offset[4]; | |
328 | +extern uint8_t dv_quant_shifts[22][4]; | |
329 | + | |
330 | static inline void | |
331 | dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) { | |
332 | int i; | |
333 | @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d | |
334 | dv_idct_248 (co248, mb->b[i].coeffs); | |
335 | } else { | |
336 | #if ARCH_X86 | |
337 | - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); | |
338 | + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts); | |
339 | _dv_idct_88(mb->b[i].coeffs); | |
340 | #elif ARCH_X86_64 | |
341 | _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); | |
342 | @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv | |
343 | dv_idct_248 (co248, mb->b[b].coeffs); | |
344 | } else { | |
345 | #if ARCH_X86 | |
346 | - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no); | |
347 | + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts); | |
348 | _dv_weight_88_inverse(bl->coeffs); | |
349 | _dv_idct_88(bl->coeffs); | |
350 | #elif ARCH_X86_64 | |
351 | diff -Nurp libdv-0.104-old/libdv/encode.c libdv-0.104/libdv/encode.c | |
352 | --- libdv-0.104-old/libdv/encode.c 2004-11-17 04:36:30.000000000 +0100 | |
353 | +++ libdv-0.104/libdv/encode.c 2006-01-01 22:44:43.000000000 +0100 | |
354 | @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl | |
355 | } | |
356 | ||
357 | extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, | |
358 | - dv_vlc_entry_t ** out); | |
359 | + dv_vlc_entry_t ** out, | |
360 | + dv_vlc_entry_t * lookup); | |
361 | ||
362 | extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs, | |
363 | dv_vlc_entry_t ** out); | |
364 | @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv | |
365 | #elif ARCH_X86 | |
366 | int num_bits; | |
367 | ||
368 | - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o); | |
369 | + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup); | |
370 | emms(); | |
371 | #else | |
372 | int num_bits; | |
373 | @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv | |
374 | return num_bits; | |
375 | } | |
376 | ||
377 | -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs); | |
378 | +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup); | |
379 | extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); | |
380 | ||
381 | extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs) | |
382 | @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl | |
383 | #elif ARCH_X86_64 | |
384 | return _dv_vlc_num_bits_block_x86_64(coeffs); | |
385 | #else | |
386 | - return _dv_vlc_num_bits_block_x86(coeffs); | |
387 | + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup); | |
388 | #endif | |
389 | } | |
390 | ||
391 | diff -Nurp libdv-0.104-old/libdv/encode_x86.S libdv-0.104/libdv/encode_x86.S | |
392 | --- libdv-0.104-old/libdv/encode_x86.S 2006-01-01 22:44:22.000000000 +0100 | |
393 | +++ libdv-0.104/libdv/encode_x86.S 2006-01-01 22:44:43.000000000 +0100 | |
394 | @@ -23,13 +23,11 @@ | |
395 | * The libdv homepage is http://libdv.sourceforge.net/. | |
396 | */ | |
397 | ||
398 | -.data | |
399 | -ALLONE: .word 1,1,1,1 | |
400 | -VLCADDMASK: .byte 255,0,0,0,255,0,0,0 | |
401 | - | |
402 | .text | |
403 | ||
404 | .global _dv_vlc_encode_block_mmx | |
405 | +.hidden _dv_vlc_encode_block_mmx | |
406 | +.type _dv_vlc_encode_block_mmx,@function | |
407 | _dv_vlc_encode_block_mmx: | |
408 | pushl %ebx | |
409 | pushl %esi | |
410 | @@ -45,11 +43,14 @@ _dv_vlc_encode_block_mmx: | |
411 | ||
412 | movl $63, %ecx | |
413 | ||
414 | - movl vlc_encode_lookup, %esi | |
415 | + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup | |
416 | ||
417 | pxor %mm0, %mm0 | |
418 | pxor %mm2, %mm2 | |
419 | - movq VLCADDMASK, %mm1 | |
420 | + pushl $0x000000FF # these four lines | |
421 | + pushl $0x000000FF # load VLCADDMASK | |
422 | + movq (%esp), %mm1 # into %mm1 off the stack | |
423 | + addl $8, %esp # --> no TEXTRELs | |
424 | xorl %ebp, %ebp | |
425 | subl $8, %edx | |
426 | vlc_encode_block_mmx_loop: | |
427 | @@ -106,6 +107,8 @@ vlc_encode_block_out: | |
428 | ret | |
429 | ||
430 | .global _dv_vlc_num_bits_block_x86 | |
431 | +.hidden _dv_vlc_num_bits_block_x86 | |
432 | +.type _dv_vlc_num_bits_block_x86,@function | |
433 | _dv_vlc_num_bits_block_x86: | |
434 | pushl %ebx | |
435 | pushl %esi | |
436 | @@ -121,7 +124,7 @@ _dv_vlc_num_bits_block_x86: | |
437 | addl $2, %edi | |
438 | ||
439 | movl $63, %ecx | |
440 | - movl vlc_num_bits_lookup, %esi | |
441 | + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup | |
442 | ||
443 | vlc_num_bits_block_x86_loop: | |
444 | movw (%edi), %ax | |
445 | @@ -164,6 +167,8 @@ vlc_num_bits_block_out: | |
446 | ret | |
447 | ||
448 | .global _dv_vlc_encode_block_pass_1_x86 | |
449 | +.hidden _dv_vlc_encode_block_pass_1_x86 | |
450 | +.type _dv_vlc_encode_block_pass_1_x86,@function | |
451 | _dv_vlc_encode_block_pass_1_x86: | |
452 | pushl %ebx | |
453 | pushl %esi | |
454 | @@ -240,6 +245,8 @@ vlc_encode_block_pass1_x86_out: | |
455 | ret | |
456 | ||
457 | .global _dv_classify_mmx | |
458 | +.hidden _dv_classify_mmx | |
459 | +.type _dv_classify_mmx,@function | |
460 | _dv_classify_mmx: | |
461 | ||
462 | pushl %ebp | |
463 | @@ -345,6 +352,8 @@ _dv_classify_mmx: | |
464 | don't know why... */ | |
465 | ||
466 | .global _dv_reorder_block_mmx | |
467 | +.hidden _dv_reorder_block_mmx | |
468 | +.type _dv_reorder_block_mmx,@function | |
469 | _dv_reorder_block_mmx: | |
470 | ||
471 | pushl %ebp | |
472 | @@ -460,6 +469,8 @@ reorder_loop: | |
473 | ret | |
474 | ||
475 | .global _dv_need_dct_248_mmx_rows | |
476 | +.hidden _dv_need_dct_248_mmx_rows | |
477 | +.type _dv_need_dct_248_mmx_rows,@function | |
478 | _dv_need_dct_248_mmx_rows: | |
479 | ||
480 | pushl %ebp | |
481 | @@ -579,8 +590,11 @@ _dv_need_dct_248_mmx_rows: | |
482 | paddw %mm5, %mm1 | |
483 | ||
484 | paddw %mm1, %mm0 | |
485 | - | |
486 | - pmaddwd ALLONE, %mm0 | |
487 | + | |
488 | + pushl $0x00010001 # these four lines | |
489 | + pushl $0x00010001 # load ALLONE | |
490 | + pmaddwd (%esp), %mm0 # into %mm0 off the stack | |
491 | + addl $8, %esp # --> no TEXTRELs | |
492 | movq %mm0, %mm1 | |
493 | psrlq $32, %mm1 | |
494 | paddd %mm1, %mm0 | |
495 | diff -Nurp libdv-0.104-old/libdv/encode_x86_64.S libdv-0.104/libdv/encode_x86_64.S | |
496 | --- libdv-0.104-old/libdv/encode_x86_64.S 2006-01-01 22:44:22.000000000 +0100 | |
497 | +++ libdv-0.104/libdv/encode_x86_64.S 2006-01-01 22:44:43.000000000 +0100 | |
498 | @@ -30,6 +30,8 @@ VLCADDMASK: .byte 255,0,0,0,255,0,0,0 | |
499 | .text | |
500 | ||
501 | .global _dv_vlc_encode_block_mmx_x86_64 | |
502 | +.hidden _dv_vlc_encode_block_mmx_x86_64 | |
503 | +.type _dv_vlc_encode_block_mmx_x86_64,@function | |
504 | _dv_vlc_encode_block_mmx_x86_64: | |
505 | ||
506 | /* extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, | |
507 | @@ -113,6 +115,8 @@ vlc_encode_block_out: | |
508 | ret | |
509 | ||
510 | .global _dv_vlc_num_bits_block_x86_64 | |
511 | +.hidden _dv_vlc_num_bits_block_x86_64 | |
512 | +.type _dv_vlc_num_bits_block_x86_64,@function | |
513 | _dv_vlc_num_bits_block_x86_64: | |
514 | ||
515 | /* extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); */ | |
516 | @@ -173,6 +177,8 @@ vlc_num_bits_block_out: | |
517 | ret | |
518 | ||
519 | .global _dv_vlc_encode_block_pass_1_x86_64 | |
520 | +.hidden _dv_vlc_encode_block_pass_1_x86_64 | |
521 | +.type _dv_vlc_encode_block_pass_1_x86_64,@function | |
522 | _dv_vlc_encode_block_pass_1_x86_64: | |
523 | ||
524 | /* | |
525 | @@ -251,6 +257,8 @@ vlc_encode_block_pass1_x86_out: | |
526 | ret | |
527 | ||
528 | .global _dv_classify_mmx_x86_64 | |
529 | +.hidden _dv_classify_mmx_x86_64 | |
530 | +.type _dv_classify_mmx_x86_64,@function | |
531 | _dv_classify_mmx_x86_64: | |
532 | ||
533 | /* extern int _dv_classify_mmx_x86_64(dv_coeff_t * a, rdi | |
534 | @@ -355,6 +363,8 @@ _dv_classify_mmx_x86_64: | |
535 | don't know why... */ | |
536 | ||
537 | .global _dv_reorder_block_mmx_x86_64 | |
538 | +.hidden _dv_reorder_block_mmx_x86_64 | |
539 | +.type _dv_reorder_block_mmx_x86_64,@function | |
540 | _dv_reorder_block_mmx_x86_64: | |
541 | ||
542 | /*extern int _dv_reorder_block_mmx_x86_64(dv_coeff_t * a, rdi | |
543 | @@ -469,6 +479,8 @@ reorder_loop: | |
544 | ret | |
545 | ||
546 | .global _dv_need_dct_248_mmx_x86_64_rows | |
547 | +.hidden _dv_need_dct_248_mmx_x86_64_rows | |
548 | +.type _dv_need_dct_248_mmx_x86_64_rows,@function | |
549 | _dv_need_dct_248_mmx_x86_64_rows: | |
550 | ||
551 | /* extern int _dv_need_dct_248_mmx_x86_64_rows(dv_coeff_t * bl); rdi */ | |
552 | diff -Nurp libdv-0.104-old/libdv/idct_block_mmx.S libdv-0.104/libdv/idct_block_mmx.S | |
553 | --- libdv-0.104-old/libdv/idct_block_mmx.S 2006-01-01 22:44:22.000000000 +0100 | |
554 | +++ libdv-0.104/libdv/idct_block_mmx.S 2006-01-01 22:44:43.000000000 +0100 | |
555 | @@ -8,16 +8,22 @@ | |
556 | ||
557 | ||
558 | ||
559 | +#include "asm_common.S" | |
560 | + | |
561 | .text | |
562 | + | |
563 | .align 4 | |
564 | .globl _dv_idct_block_mmx | |
565 | +.hidden _dv_idct_block_mmx | |
566 | .type _dv_idct_block_mmx,@function | |
567 | _dv_idct_block_mmx: | |
568 | pushl %ebp | |
569 | - movl %esp,%ebp | |
570 | pushl %esi | |
571 | - leal preSC, %ecx | |
572 | - movl 8(%ebp),%esi /* source matrix */ | |
573 | + | |
574 | + LOAD_PIC_REG_BP() | |
575 | + | |
576 | + leal MUNG(preSC), %ecx | |
577 | + movl 12(%esp),%esi /* source matrix */ | |
578 | ||
579 | /* | |
580 | * column 0: even part | |
581 | @@ -35,7 +41,7 @@ _dv_idct_block_mmx: | |
582 | movq %mm1, %mm2 /* added 11/1/96 */ | |
583 | pmulhw 8*8(%esi),%mm5 /* V8 */ | |
584 | psubsw %mm0, %mm1 /* V16 */ | |
585 | - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ | |
586 | + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */ | |
587 | paddsw %mm0, %mm2 /* V17 */ | |
588 | movq %mm2, %mm0 /* duplicate V17 */ | |
589 | psraw $1, %mm2 /* t75=t82 */ | |
590 | @@ -76,7 +82,7 @@ _dv_idct_block_mmx: | |
591 | paddsw %mm0, %mm3 /* V29 ; free mm0 */ | |
592 | movq %mm7, %mm1 /* duplicate V26 */ | |
593 | psraw $1, %mm3 /* t91=t94 */ | |
594 | - pmulhw x539f539f539f539f,%mm7 /* V33 */ | |
595 | + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */ | |
596 | psraw $1, %mm1 /* t96 */ | |
597 | movq %mm5, %mm0 /* duplicate V2 */ | |
598 | psraw $2, %mm4 /* t85=t87 */ | |
599 | @@ -84,15 +90,15 @@ _dv_idct_block_mmx: | |
600 | psubsw %mm4, %mm0 /* V28 ; free mm4 */ | |
601 | movq %mm0, %mm2 /* duplicate V28 */ | |
602 | psraw $1, %mm5 /* t90=t93 */ | |
603 | - pmulhw x4546454645464546,%mm0 /* V35 */ | |
604 | + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */ | |
605 | psraw $1, %mm2 /* t97 */ | |
606 | movq %mm5, %mm4 /* duplicate t90=t93 */ | |
607 | psubsw %mm2, %mm1 /* V32 ; free mm2 */ | |
608 | - pmulhw x61f861f861f861f8,%mm1 /* V36 */ | |
609 | + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */ | |
610 | psllw $1, %mm7 /* t107 */ | |
611 | paddsw %mm3, %mm5 /* V31 */ | |
612 | psubsw %mm3, %mm4 /* V30 ; free mm3 */ | |
613 | - pmulhw x5a825a825a825a82,%mm4 /* V34 */ | |
614 | + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */ | |
615 | nop | |
616 | psubsw %mm1, %mm0 /* V38 */ | |
617 | psubsw %mm7, %mm1 /* V37 ; free mm7 */ | |
618 | @@ -159,7 +165,7 @@ _dv_idct_block_mmx: | |
619 | psubsw %mm7, %mm1 /* V50 */ | |
620 | pmulhw 8*9(%esi), %mm5 /* V9 */ | |
621 | paddsw %mm7, %mm2 /* V51 */ | |
622 | - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ | |
623 | + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */ | |
624 | movq %mm2, %mm6 /* duplicate V51 */ | |
625 | psraw $1, %mm2 /* t138=t144 */ | |
626 | movq %mm3, %mm4 /* duplicate V1 */ | |
627 | @@ -200,11 +206,11 @@ _dv_idct_block_mmx: | |
628 | * even more by doing the correction step in a later stage when the number | |
629 | * is actually multiplied by 16 | |
630 | */ | |
631 | - paddw x0005000200010001, %mm4 | |
632 | + paddw MUNG(x0005000200010001), %mm4 | |
633 | psubsw %mm6, %mm3 /* V60 ; free mm6 */ | |
634 | psraw $1, %mm0 /* t154=t156 */ | |
635 | movq %mm3, %mm1 /* duplicate V60 */ | |
636 | - pmulhw x539f539f539f539f, %mm1 /* V67 */ | |
637 | + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */ | |
638 | movq %mm5, %mm6 /* duplicate V3 */ | |
639 | psraw $2, %mm4 /* t148=t150 */ | |
640 | paddsw %mm4, %mm5 /* V61 */ | |
641 | @@ -213,13 +219,13 @@ _dv_idct_block_mmx: | |
642 | psllw $1, %mm1 /* t169 */ | |
643 | paddsw %mm0, %mm5 /* V65 -> result */ | |
644 | psubsw %mm0, %mm4 /* V64 ; free mm0 */ | |
645 | - pmulhw x5a825a825a825a82, %mm4 /* V68 */ | |
646 | + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */ | |
647 | psraw $1, %mm3 /* t158 */ | |
648 | psubsw %mm6, %mm3 /* V66 */ | |
649 | movq %mm5, %mm2 /* duplicate V65 */ | |
650 | - pmulhw x61f861f861f861f8, %mm3 /* V70 */ | |
651 | + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */ | |
652 | psllw $1, %mm6 /* t165 */ | |
653 | - pmulhw x4546454645464546, %mm6 /* V69 */ | |
654 | + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */ | |
655 | psraw $1, %mm2 /* t172 */ | |
656 | /* moved from next block */ | |
657 | movq 8*5(%esi), %mm0 /* V56 */ | |
658 | @@ -344,7 +350,7 @@ _dv_idct_block_mmx: | |
659 | * movq 8*13(%esi), %mm4 tmt13 | |
660 | */ | |
661 | psubsw %mm4, %mm3 /* V134 */ | |
662 | - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ | |
663 | + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */ | |
664 | movq 8*9(%esi), %mm6 /* tmt9 */ | |
665 | paddsw %mm4, %mm5 /* V135 ; mm4 free */ | |
666 | movq %mm0, %mm4 /* duplicate tmt1 */ | |
667 | @@ -373,17 +379,17 @@ _dv_idct_block_mmx: | |
668 | psubsw %mm7, %mm0 /* V144 */ | |
669 | movq %mm0, %mm3 /* duplicate V144 */ | |
670 | paddsw %mm7, %mm2 /* V147 ; free mm7 */ | |
671 | - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ | |
672 | + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */ | |
673 | movq %mm1, %mm7 /* duplicate tmt3 */ | |
674 | paddsw %mm5, %mm7 /* V145 */ | |
675 | psubsw %mm5, %mm1 /* V146 ; free mm5 */ | |
676 | psubsw %mm1, %mm3 /* V150 */ | |
677 | movq %mm7, %mm5 /* duplicate V145 */ | |
678 | - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ | |
679 | + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */ | |
680 | psubsw %mm2, %mm5 /* V148 */ | |
681 | - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ | |
682 | + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */ | |
683 | psllw $2, %mm0 /* t311 */ | |
684 | - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ | |
685 | + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */ | |
686 | paddsw %mm2, %mm7 /* V149 ; free mm2 */ | |
687 | psllw $1, %mm1 /* t313 */ | |
688 | nop /* without the nop - freeze here for one clock */ | |
689 | @@ -409,7 +415,7 @@ _dv_idct_block_mmx: | |
690 | paddsw %mm3, %mm6 /* V164 ; free mm3 */ | |
691 | movq %mm4, %mm3 /* duplicate V142 */ | |
692 | psubsw %mm5, %mm4 /* V165 ; free mm5 */ | |
693 | - movq %mm2, scratch7 /* out7 */ | |
694 | + movq %mm2, MUNG(scratch7) /* out7 */ | |
695 | psraw $4, %mm6 | |
696 | psraw $4, %mm4 | |
697 | paddsw %mm5, %mm3 /* V162 */ | |
698 | @@ -420,11 +426,11 @@ _dv_idct_block_mmx: | |
699 | */ | |
700 | movq %mm6, 8*9(%esi) /* out9 */ | |
701 | paddsw %mm1, %mm0 /* V161 */ | |
702 | - movq %mm3, scratch5 /* out5 */ | |
703 | + movq %mm3, MUNG(scratch5) /* out5 */ | |
704 | psubsw %mm1, %mm5 /* V166 ; free mm1 */ | |
705 | movq %mm4, 8*11(%esi) /* out11 */ | |
706 | psraw $4, %mm5 | |
707 | - movq %mm0, scratch3 /* out3 */ | |
708 | + movq %mm0, MUNG(scratch3) /* out3 */ | |
709 | movq %mm2, %mm4 /* duplicate V140 */ | |
710 | movq %mm5, 8*13(%esi) /* out13 */ | |
711 | paddsw %mm7, %mm2 /* V160 */ | |
712 | @@ -434,7 +440,7 @@ _dv_idct_block_mmx: | |
713 | /* moved from the next block */ | |
714 | movq 8*3(%esi), %mm7 | |
715 | psraw $4, %mm4 | |
716 | - movq %mm2, scratch1 /* out1 */ | |
717 | + movq %mm2, MUNG(scratch1) /* out1 */ | |
718 | /* moved from the next block */ | |
719 | movq %mm0, %mm1 | |
720 | movq %mm4, 8*15(%esi) /* out15 */ | |
721 | @@ -491,15 +497,15 @@ _dv_idct_block_mmx: | |
722 | paddsw %mm4, %mm3 /* V113 ; free mm4 */ | |
723 | movq %mm0, %mm4 /* duplicate V110 */ | |
724 | paddsw %mm1, %mm2 /* V111 */ | |
725 | - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ | |
726 | + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */ | |
727 | psubsw %mm1, %mm5 /* V112 ; free mm1 */ | |
728 | psubsw %mm5, %mm4 /* V116 */ | |
729 | movq %mm2, %mm1 /* duplicate V111 */ | |
730 | - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ | |
731 | + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */ | |
732 | psubsw %mm3, %mm2 /* V114 */ | |
733 | - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ | |
734 | + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */ | |
735 | paddsw %mm3, %mm1 /* V115 ; free mm3 */ | |
736 | - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ | |
737 | + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */ | |
738 | psllw $2, %mm0 /* t266 */ | |
739 | movq %mm1, (%esi) /* save V115 */ | |
740 | psllw $1, %mm5 /* t268 */ | |
741 | @@ -517,7 +523,7 @@ _dv_idct_block_mmx: | |
742 | movq %mm6, %mm3 /* duplicate tmt4 */ | |
743 | psubsw %mm0, %mm6 /* V100 */ | |
744 | paddsw %mm0, %mm3 /* V101 ; free mm0 */ | |
745 | - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ | |
746 | + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */ | |
747 | movq %mm7, %mm5 /* duplicate tmt0 */ | |
748 | movq 8*8(%esi), %mm1 /* tmt8 */ | |
749 | paddsw %mm1, %mm7 /* V103 */ | |
750 | @@ -551,10 +557,10 @@ _dv_idct_block_mmx: | |
751 | movq 8*2(%esi), %mm3 /* V123 */ | |
752 | paddsw %mm4, %mm7 /* out0 */ | |
753 | /* moved up from next block */ | |
754 | - movq scratch3, %mm0 | |
755 | + movq MUNG(scratch3), %mm0 | |
756 | psraw $4, %mm7 | |
757 | /* moved up from next block */ | |
758 | - movq scratch5, %mm6 | |
759 | + movq MUNG(scratch5), %mm6 | |
760 | psubsw %mm4, %mm1 /* out14 ; free mm4 */ | |
761 | paddsw %mm3, %mm5 /* out2 */ | |
762 | psraw $4, %mm1 | |
763 | @@ -565,7 +571,7 @@ _dv_idct_block_mmx: | |
764 | movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ | |
765 | psraw $4, %mm2 | |
766 | /* moved up to the prev block */ | |
767 | - movq scratch7, %mm4 | |
768 | + movq MUNG(scratch7), %mm4 | |
769 | /* moved up to the prev block */ | |
770 | psraw $4, %mm0 | |
771 | movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ | |
772 | @@ -579,7 +585,7 @@ _dv_idct_block_mmx: | |
773 | * psraw $4, %mm0 | |
774 | * psraw $4, %mm6 | |
775 | */ | |
776 | - movq scratch1, %mm1 | |
777 | + movq MUNG(scratch1), %mm1 | |
778 | psraw $4, %mm4 | |
779 | movq %mm0, 8*3(%esi) /* out3 */ | |
780 | psraw $4, %mm1 | |
781 | diff -Nurp libdv-0.104-old/libdv/idct_block_mmx_x86_64.S libdv-0.104/libdv/idct_block_mmx_x86_64.S | |
782 | --- libdv-0.104-old/libdv/idct_block_mmx_x86_64.S 2006-01-01 22:44:22.000000000 +0100 | |
783 | +++ libdv-0.104/libdv/idct_block_mmx_x86_64.S 2006-01-01 22:44:43.000000000 +0100 | |
784 | @@ -17,6 +17,7 @@ | |
785 | .text | |
786 | .align 4 | |
787 | .globl _dv_idct_block_mmx_x86_64 | |
788 | +.hidden _dv_idct_block_mmx_x86_64 | |
789 | .type _dv_idct_block_mmx_x86_64,@function | |
790 | _dv_idct_block_mmx_x86_64: | |
791 | /* void _dv_idct_88(dv_coeff_t *block) */ | |
792 | diff -Nurp libdv-0.104-old/libdv/parse.c libdv-0.104/libdv/parse.c | |
793 | --- libdv-0.104-old/libdv/parse.c 2004-10-20 05:49:24.000000000 +0200 | |
794 | +++ libdv-0.104/libdv/parse.c 2006-01-01 22:44:43.000000000 +0100 | |
795 | @@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se | |
796 | exit(0); | |
797 | #endif | |
798 | } /* dv_parse_ac_coeffs */ | |
799 | +#if defined __GNUC__ && __ELF__ | |
800 | +# define dv_strong_hidden_alias(name, aliasname) \ | |
801 | + extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden"))) | |
802 | +dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs); | |
803 | +#else | |
804 | +int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); } | |
805 | +#endif | |
806 | ||
807 | /* --------------------------------------------------------------------------- | |
808 | */ | |
809 | diff -Nurp libdv-0.104-old/libdv/quant.c libdv-0.104/libdv/quant.c | |
810 | --- libdv-0.104-old/libdv/quant.c 2004-10-20 05:49:24.000000000 +0200 | |
811 | +++ libdv-0.104/libdv/quant.c 2006-01-01 22:44:43.000000000 +0100 | |
812 | @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1 | |
813 | uint32_t dv_quant_248_mul_tab [2] [22] [64]; | |
814 | uint32_t dv_quant_88_mul_tab [2] [22] [64]; | |
815 | ||
816 | -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass); | |
817 | +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t dv_quant_offset[],uint8_t dv_quant_shifts[][]); | |
818 | extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass); | |
819 | static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); | |
820 | static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); | |
821 | @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno | |
822 | _dv_quant_x86_64(block, qno, klass); | |
823 | emms(); | |
824 | #else | |
825 | - _dv_quant_x86(block, qno, klass); | |
826 | + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts); | |
827 | emms(); | |
828 | #endif | |
829 | } | |
830 | diff -Nurp libdv-0.104-old/libdv/quant.h libdv-0.104/libdv/quant.h | |
831 | --- libdv-0.104-old/libdv/quant.h 2004-10-20 05:49:24.000000000 +0200 | |
832 | +++ libdv-0.104/libdv/quant.h 2006-01-01 22:44:43.000000000 +0100 | |
833 | @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block, | |
834 | extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass); | |
835 | extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass, | |
836 | dv_248_coeff_t *co); | |
837 | -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass); | |
838 | +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t offset[], uint8_t shifts[][]); | |
839 | extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass); | |
840 | extern void dv_quant_init (void); | |
841 | #ifdef __cplusplus | |
842 | diff -Nurp libdv-0.104-old/libdv/quant_x86.S libdv-0.104/libdv/quant_x86.S | |
843 | --- libdv-0.104-old/libdv/quant_x86.S 2006-01-01 22:44:22.000000000 +0100 | |
844 | +++ libdv-0.104/libdv/quant_x86.S 2006-01-01 22:44:43.000000000 +0100 | |
845 | @@ -55,6 +55,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl | |
846 | .text | |
847 | .align 4 | |
848 | .globl _dv_quant_88_inverse_x86 | |
849 | +.hidden _dv_quant_88_inverse_x86 | |
850 | +.type _dv_quant_88_inverse_x86,@function | |
851 | _dv_quant_88_inverse_x86: | |
852 | pushl %ebx | |
853 | pushl %esi | |
854 | @@ -71,10 +73,13 @@ _dv_quant_88_inverse_x86: | |
855 | ||
856 | /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ | |
857 | movl ARGn(1),%eax /* qno */ | |
858 | + movl ARGn(3),%ebx /* dv_quant_offset */ | |
859 | + addl ARGn(2),%ebx /* class */ | |
860 | + movzbl (%ebx),%ecx | |
861 | movl ARGn(2),%ebx /* class */ | |
862 | - movzbl dv_quant_offset(%ebx),%ecx | |
863 | addl %ecx,%eax | |
864 | - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ | |
865 | + movl ARGn(4),%edx /* dv_quant_shifts */ | |
866 | + leal (%edx,%eax,4),%edx /* edx is pq */ | |
867 | ||
868 | /* extra = (class == 3); */ | |
869 | /* 0 1 2 3 */ | |
870 | @@ -193,6 +198,8 @@ _dv_quant_88_inverse_x86: | |
871 | ||
872 | .align 4 | |
873 | .globl _dv_quant_x86 | |
874 | +.hidden _dv_quant_x86 | |
875 | +.type _dv_quant_x86,@function | |
876 | _dv_quant_x86: | |
877 | pushl %ebx | |
878 | pushl %ecx | |
879 | @@ -212,11 +219,13 @@ _dv_quant_x86: | |
880 | ||
881 | /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ | |
882 | movl ARGn(1),%eax /* qno */ | |
883 | + movl ARGn(3),%ebx /* offset */ | |
884 | + addl ARGn(2),%ebx /* class */ | |
885 | + movzbl (%ebx),%ecx | |
886 | movl ARGn(2),%ebx /* class */ | |
887 | - | |
888 | - movzbl dv_quant_offset(%ebx),%ecx | |
889 | + movl ARGn(4),%edx /* shifts */ | |
890 | addl %ecx,%eax | |
891 | - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ | |
892 | + leal (%edx,%eax,4),%edx /* edx is pq */ | |
893 | ||
894 | /* extra = (class == 3); */ | |
895 | /* 0 1 2 3 */ | |
896 | diff -Nurp libdv-0.104-old/libdv/quant_x86_64.S libdv-0.104/libdv/quant_x86_64.S | |
897 | --- libdv-0.104-old/libdv/quant_x86_64.S 2006-01-01 22:44:22.000000000 +0100 | |
898 | +++ libdv-0.104/libdv/quant_x86_64.S 2006-01-01 22:44:43.000000000 +0100 | |
899 | @@ -55,6 +55,8 @@ void _dv_quant_88_inverse(dv_coeff_t *bl | |
900 | .text | |
901 | .align 4 | |
902 | .globl _dv_quant_88_inverse_x86_64 | |
903 | +.hidden _dv_quant_88_inverse_x86_64 | |
904 | +.type _dv_quant_88_inverse_x86_64,@function | |
905 | _dv_quant_88_inverse_x86_64: | |
906 | ||
907 | /* Args are at block=rdi, qno=rsi, class=rdx */ | |
908 | @@ -195,6 +197,8 @@ _dv_quant_88_inverse_x86_64: | |
909 | ||
910 | .align 4 | |
911 | .globl _dv_quant_x86_64 | |
912 | +.hidden _dv_quant_x86_64 | |
913 | +.type _dv_quant_x86_64,@function | |
914 | _dv_quant_x86_64: | |
915 | ||
916 | /* Args are at block=rdi, qno=rsi, class=rdx */ | |
917 | diff -Nurp libdv-0.104-old/libdv/rgbtoyuv.S libdv-0.104/libdv/rgbtoyuv.S | |
918 | --- libdv-0.104-old/libdv/rgbtoyuv.S 2006-01-01 22:44:22.000000000 +0100 | |
919 | +++ libdv-0.104/libdv/rgbtoyuv.S 2006-01-01 22:44:43.000000000 +0100 | |
920 | @@ -41,9 +41,6 @@ | |
921 | #define DV_WIDTH_SHORT_HALF 720 | |
922 | #define DV_WIDTH_BYTE_HALF 360 | |
923 | ||
924 | -.global _dv_rgbtoycb_mmx | |
925 | -# .global yuvtoycb_mmx | |
926 | - | |
927 | .data | |
928 | ||
929 | .align 8 | |
930 | @@ -110,20 +107,24 @@ VR0GR: .long 0,0 | |
931 | VBG0B: .long 0,0 | |
932 | ||
933 | #endif | |
934 | - | |
935 | + | |
936 | +#include "asm_common.S" | |
937 | + | |
938 | .text | |
939 | ||
940 | -#define _inPtr 8 | |
941 | -#define _rows 12 | |
942 | -#define _columns 16 | |
943 | -#define _outyPtr 20 | |
944 | -#define _outuPtr 24 | |
945 | -#define _outvPtr 28 | |
946 | +#define _inPtr 24+8 | |
947 | +#define _rows 24+12 | |
948 | +#define _columns 24+16 | |
949 | +#define _outyPtr 24+20 | |
950 | +#define _outuPtr 24+24 | |
951 | +#define _outvPtr 24+28 | |
952 | ||
953 | +.global _dv_rgbtoycb_mmx | |
954 | +.hidden _dv_rgbtoycb_mmx | |
955 | +.type _dv_rgbtoycb_mmx,@function | |
956 | _dv_rgbtoycb_mmx: | |
957 | ||
958 | pushl %ebp | |
959 | - movl %esp, %ebp | |
960 | pushl %eax | |
961 | pushl %ebx | |
962 | pushl %ecx | |
963 | @@ -131,46 +132,47 @@ _dv_rgbtoycb_mmx: | |
964 | pushl %esi | |
965 | pushl %edi | |
966 | ||
967 | - leal ZEROSX, %eax #This section gets around a bug | |
968 | + LOAD_PIC_REG_BP() | |
969 | + | |
970 | + leal MUNG(ZEROSX), %eax #This section gets around a bug | |
971 | movq (%eax), %mm0 #unlikely to persist | |
972 | - movq %mm0, ZEROS | |
973 | - leal OFFSETDX, %eax | |
974 | + movq %mm0, MUNG(ZEROS) | |
975 | + leal MUNG(OFFSETDX), %eax | |
976 | movq (%eax), %mm0 | |
977 | - movq %mm0, OFFSETD | |
978 | - leal OFFSETWX, %eax | |
979 | + movq %mm0, MUNG(OFFSETD) | |
980 | + leal MUNG(OFFSETWX), %eax | |
981 | movq (%eax), %mm0 | |
982 | - movq %mm0, OFFSETW | |
983 | - leal OFFSETBX, %eax | |
984 | + movq %mm0, MUNG(OFFSETW) | |
985 | + leal MUNG(OFFSETBX), %eax | |
986 | movq (%eax), %mm0 | |
987 | - movq %mm0, OFFSETB | |
988 | - leal YR0GRX, %eax | |
989 | + movq %mm0, MUNG(OFFSETB) | |
990 | + leal MUNG(YR0GRX), %eax | |
991 | movq (%eax), %mm0 | |
992 | - movq %mm0, YR0GR | |
993 | - leal YBG0BX, %eax | |
994 | + movq %mm0, MUNG(YR0GR) | |
995 | + leal MUNG(YBG0BX), %eax | |
996 | movq (%eax), %mm0 | |
997 | - movq %mm0, YBG0B | |
998 | - leal UR0GRX, %eax | |
999 | + movq %mm0, MUNG(YBG0B) | |
1000 | + leal MUNG(UR0GRX), %eax | |
1001 | movq (%eax), %mm0 | |
1002 | - movq %mm0, UR0GR | |
1003 | - leal UBG0BX, %eax | |
1004 | + movq %mm0, MUNG(UR0GR) | |
1005 | + leal MUNG(UBG0BX), %eax | |
1006 | movq (%eax), %mm0 | |
1007 | - movq %mm0, UBG0B | |
1008 | - leal VR0GRX, %eax | |
1009 | + movq %mm0, MUNG(UBG0B) | |
1010 | + leal MUNG(VR0GRX), %eax | |
1011 | movq (%eax), %mm0 | |
1012 | - movq %mm0, VR0GR | |
1013 | - leal VBG0BX, %eax | |
1014 | + movq %mm0, MUNG(VR0GR) | |
1015 | + leal MUNG(VBG0BX), %eax | |
1016 | movq (%eax), %mm0 | |
1017 | - movq %mm0, VBG0B | |
1018 | - | |
1019 | - movl _rows(%ebp), %eax | |
1020 | - movl _columns(%ebp), %ebx | |
1021 | + movq %mm0, MUNG(VBG0B) | |
1022 | + movl _rows(%esp), %eax | |
1023 | + movl _columns(%esp), %ebx | |
1024 | mull %ebx #number pixels | |
1025 | shrl $3, %eax #number of loops | |
1026 | movl %eax, %edi #loop counter in edi | |
1027 | - movl _inPtr(%ebp), %eax | |
1028 | - movl _outyPtr(%ebp), %ebx | |
1029 | - movl _outuPtr(%ebp), %ecx | |
1030 | - movl _outvPtr(%ebp), %edx | |
1031 | + movl _inPtr(%esp), %eax | |
1032 | + movl _outyPtr(%esp), %ebx | |
1033 | + movl _outuPtr(%esp), %ecx | |
1034 | + movl _outvPtr(%esp), %edx | |
1035 | rgbtoycb_mmx_loop: | |
1036 | movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 | |
1037 | pxor %mm6, %mm6 #0 -> mm6 | |
1038 | @@ -184,29 +186,29 @@ rgbtoycb_mmx_loop: | |
1039 | punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 | |
1040 | movq %mm0, %mm2 #R1B0G0R0 -> mm2 | |
1041 | ||
1042 | - pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0 | |
1043 | + pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0 | |
1044 | movq %mm1, %mm3 #B1G1R1B0 -> mm3 | |
1045 | ||
1046 | - pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1 | |
1047 | + pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1 | |
1048 | movq %mm2, %mm4 #R1B0G0R0 -> mm4 | |
1049 | ||
1050 | - pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2 | |
1051 | + pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2 | |
1052 | movq %mm3, %mm5 #B1G1R1B0 -> mm5 | |
1053 | ||
1054 | - pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3 | |
1055 | + pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3 | |
1056 | punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 | |
1057 | ||
1058 | - pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4 | |
1059 | + pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4 | |
1060 | paddd %mm1, %mm0 #Y1Y0 -> mm0 | |
1061 | ||
1062 | - pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5 | |
1063 | + pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5 | |
1064 | ||
1065 | movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 | |
1066 | paddd %mm3, %mm2 #U1U0 -> mm2 | |
1067 | ||
1068 | movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 | |
1069 | ||
1070 | - punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1 | |
1071 | + punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1 | |
1072 | paddd %mm5, %mm4 #V1V0 -> mm4 | |
1073 | ||
1074 | movq %mm1, %mm5 #B3G3R3B2 -> mm5 | |
1075 | @@ -214,29 +216,29 @@ rgbtoycb_mmx_loop: | |
1076 | ||
1077 | paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 | |
1078 | ||
1079 | - punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6 | |
1080 | + punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6 | |
1081 | movq %mm1, %mm3 #R3B2G2R2 -> mm3 | |
1082 | ||
1083 | - pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1 | |
1084 | + pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1 | |
1085 | movq %mm5, %mm7 #B3G3R3B2 -> mm7 | |
1086 | ||
1087 | - pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5 | |
1088 | + pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5 | |
1089 | psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 | |
1090 | ||
1091 | - movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0 | |
1092 | + movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0 | |
1093 | movq %mm3, %mm6 #R3B2G2R2 -> mm6 | |
1094 | - pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6 | |
1095 | + pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6 | |
1096 | psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 | |
1097 | ||
1098 | paddd %mm5, %mm1 #Y3Y2 -> mm1 | |
1099 | movq %mm7, %mm5 #B3G3R3B2 -> mm5 | |
1100 | - pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2 | |
1101 | + pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2 | |
1102 | psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 | |
1103 | ||
1104 | - pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2 | |
1105 | + pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2 | |
1106 | packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 | |
1107 | ||
1108 | - pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5 | |
1109 | + pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5 | |
1110 | psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 | |
1111 | ||
1112 | movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 | |
1113 | @@ -251,58 +253,58 @@ rgbtoycb_mmx_loop: | |
1114 | movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 | |
1115 | psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 | |
1116 | ||
1117 | - paddw OFFSETY, %mm0 | |
1118 | + paddw MUNG(OFFSETY), %mm0 | |
1119 | movq %mm0, (%ebx) #store Y3Y2Y1Y0 | |
1120 | packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 | |
1121 | ||
1122 | - movq TEMP0, %mm0 #R5B4G4R4 -> mm0 | |
1123 | + movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0 | |
1124 | addl $8, %ebx | |
1125 | - | |
1126 | - punpcklbw ZEROS, %mm7 #B5G500 -> mm7 | |
1127 | + | |
1128 | + punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7 | |
1129 | movq %mm0, %mm6 #R5B4G4R4 -> mm6 | |
1130 | ||
1131 | - movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU | |
1132 | + movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU | |
1133 | psrlq $32, %mm0 #00R5B4 -> mm0 | |
1134 | ||
1135 | paddw %mm0, %mm7 #B5G5R5B4 -> mm7 | |
1136 | movq %mm6, %mm2 #B5B4G4R4 -> mm2 | |
1137 | ||
1138 | - pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2 | |
1139 | + pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2 | |
1140 | movq %mm7, %mm0 #B5G5R5B4 -> mm0 | |
1141 | ||
1142 | - pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7 | |
1143 | + pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7 | |
1144 | packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 | |
1145 | ||
1146 | addl $24, %eax #increment RGB count | |
1147 | ||
1148 | - movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4 | |
1149 | + movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4 | |
1150 | movq %mm6, %mm4 #B5B4G4R4 -> mm4 | |
1151 | ||
1152 | - pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4 | |
1153 | + pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4 | |
1154 | movq %mm0, %mm3 #B5G5R5B4 -> mm0 | |
1155 | ||
1156 | - pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4 | |
1157 | + pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4 | |
1158 | paddd %mm7, %mm2 #Y5Y4 -> mm2 | |
1159 | ||
1160 | - pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4 | |
1161 | + pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4 | |
1162 | pxor %mm7, %mm7 #0 -> mm7 | |
1163 | ||
1164 | - pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3 | |
1165 | + pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3 | |
1166 | punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 | |
1167 | ||
1168 | paddd %mm6, %mm0 #U5U4 -> mm0 | |
1169 | movq %mm1, %mm6 #B7G7R7B6 -> mm6 | |
1170 | ||
1171 | - pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6 | |
1172 | + pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6 | |
1173 | punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 | |
1174 | ||
1175 | movq %mm5, %mm7 #R7B6G6R6 -> mm7 | |
1176 | paddd %mm4, %mm3 #V5V4 -> mm3 | |
1177 | ||
1178 | - pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5 | |
1179 | + pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5 | |
1180 | movq %mm1, %mm4 #B7G7R7B6 -> mm4 | |
1181 | ||
1182 | - pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4 | |
1183 | + pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4 | |
1184 | psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 | |
1185 | ||
1186 | psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 | |
1187 | @@ -310,25 +312,25 @@ rgbtoycb_mmx_loop: | |
1188 | paddd %mm5, %mm6 #Y7Y6 -> mm6 | |
1189 | movq %mm7, %mm5 #R7B6G6R6 -> mm5 | |
1190 | ||
1191 | - pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7 | |
1192 | + pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7 | |
1193 | psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 | |
1194 | ||
1195 | - pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1 | |
1196 | + pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1 | |
1197 | psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 | |
1198 | ||
1199 | packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 | |
1200 | ||
1201 | - pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5 | |
1202 | + pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5 | |
1203 | paddd %mm4, %mm7 #U7U6 -> mm7 | |
1204 | ||
1205 | psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 | |
1206 | - paddw OFFSETY, %mm2 | |
1207 | + paddw MUNG(OFFSETY), %mm2 | |
1208 | movq %mm2, (%ebx) #store Y7Y6Y5Y4 | |
1209 | ||
1210 | - movq ALLONE, %mm6 | |
1211 | + movq MUNG(ALLONE), %mm6 | |
1212 | packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 | |
1213 | ||
1214 | - movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4 | |
1215 | + movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4 | |
1216 | pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 | |
1217 | ||
1218 | pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 | |
1219 | @@ -338,8 +340,8 @@ rgbtoycb_mmx_loop: | |
1220 | ||
1221 | psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 | |
1222 | psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 | |
1223 | - | |
1224 | - movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5 | |
1225 | + | |
1226 | + movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5 | |
1227 | ||
1228 | movq %mm4, (%ecx) # store U | |
1229 | ||
1230 | @@ -372,6 +374,8 @@ rgbtoycb_mmx_loop: | |
1231 | ret | |
1232 | ||
1233 | .global _dv_ppm_copy_y_block_mmx | |
1234 | +.hidden _dv_ppm_copy_y_block_mmx | |
1235 | +.type _dv_ppm_copy_y_block_mmx,@function | |
1236 | _dv_ppm_copy_y_block_mmx: | |
1237 | ||
1238 | pushl %ebp | |
1239 | @@ -422,17 +426,20 @@ _dv_ppm_copy_y_block_mmx: | |
1240 | ret | |
1241 | ||
1242 | .global _dv_pgm_copy_y_block_mmx | |
1243 | +.hidden _dv_pgm_copy_y_block_mmx | |
1244 | +.type _dv_ppm_copy_y_block_mmx,@function | |
1245 | _dv_pgm_copy_y_block_mmx: | |
1246 | ||
1247 | pushl %ebp | |
1248 | - movl %esp, %ebp | |
1249 | pushl %esi | |
1250 | pushl %edi | |
1251 | - | |
1252 | - movl 8(%ebp), %edi # dest | |
1253 | - movl 12(%ebp), %esi # src | |
1254 | ||
1255 | - movq OFFSETY, %mm7 | |
1256 | + LOAD_PIC_REG_BP() | |
1257 | + | |
1258 | + movl 16(%esp), %edi # dest | |
1259 | + movl 20(%esp), %esi # src | |
1260 | + | |
1261 | + movq MUNG(OFFSETY), %mm7 | |
1262 | pxor %mm6, %mm6 | |
1263 | ||
1264 | movq (%esi), %mm0 | |
1265 | @@ -564,17 +571,20 @@ _dv_pgm_copy_y_block_mmx: | |
1266 | ret | |
1267 | ||
1268 | .global _dv_video_copy_y_block_mmx | |
1269 | +.hidden _dv_video_copy_y_block_mmx | |
1270 | +.type _dv_video_copy_y_block_mmx,@function | |
1271 | _dv_video_copy_y_block_mmx: | |
1272 | ||
1273 | pushl %ebp | |
1274 | - movl %esp, %ebp | |
1275 | pushl %esi | |
1276 | pushl %edi | |
1277 | - | |
1278 | - movl 8(%ebp), %edi # dest | |
1279 | - movl 12(%ebp), %esi # src | |
1280 | ||
1281 | - movq OFFSETBX, %mm7 | |
1282 | + LOAD_PIC_REG_BP() | |
1283 | + | |
1284 | + movl 16(%esp), %edi # dest | |
1285 | + movl 20(%esp), %esi # src | |
1286 | + | |
1287 | + movq MUNG(OFFSETBX), %mm7 | |
1288 | pxor %mm6, %mm6 | |
1289 | ||
1290 | movq (%esi), %mm0 | |
1291 | @@ -709,6 +719,8 @@ _dv_video_copy_y_block_mmx: | |
1292 | ||
1293 | ||
1294 | .global _dv_ppm_copy_pal_c_block_mmx | |
1295 | +.hidden _dv_ppm_copy_pal_c_block_mmx | |
1296 | +.type _dv_ppm_copy_pal_c_block_mmx,@function | |
1297 | _dv_ppm_copy_pal_c_block_mmx: | |
1298 | ||
1299 | pushl %ebp | |
1300 | @@ -852,19 +864,21 @@ _dv_ppm_copy_pal_c_block_mmx: | |
1301 | ret | |
1302 | ||
1303 | .global _dv_pgm_copy_pal_c_block_mmx | |
1304 | +.hidden _dv_ppm_copy_pal_c_block_mmx | |
1305 | +.type _dv_pgm_copy_pal_c_block_mmx,@function | |
1306 | _dv_pgm_copy_pal_c_block_mmx: | |
1307 | ||
1308 | pushl %ebp | |
1309 | - movl %esp, %ebp | |
1310 | pushl %esi | |
1311 | pushl %edi | |
1312 | pushl %ebx | |
1313 | - | |
1314 | - movl 8(%ebp), %edi # dest | |
1315 | - movl 12(%ebp), %esi # src | |
1316 | ||
1317 | + LOAD_PIC_REG_BP() | |
1318 | + | |
1319 | + movl 20(%esp), %edi # dest | |
1320 | + movl 24(%esp), %esi # src | |
1321 | ||
1322 | - movq OFFSETBX, %mm7 | |
1323 | + movq MUNG(OFFSETBX), %mm7 | |
1324 | pxor %mm6, %mm6 | |
1325 | ||
1326 | ||
1327 | @@ -1000,18 +1014,21 @@ _dv_pgm_copy_pal_c_block_mmx: | |
1328 | ret | |
1329 | ||
1330 | .global _dv_video_copy_pal_c_block_mmx | |
1331 | +.hidden _dv_video_copy_pal_c_block_mmx | |
1332 | +.type _dv_video_copy_pal_c_block_mmx,@function | |
1333 | _dv_video_copy_pal_c_block_mmx: | |
1334 | ||
1335 | pushl %ebp | |
1336 | - movl %esp, %ebp | |
1337 | pushl %esi | |
1338 | pushl %edi | |
1339 | pushl %ebx | |
1340 | - | |
1341 | - movl 8(%ebp), %edi # dest | |
1342 | - movl 12(%ebp), %esi # src | |
1343 | ||
1344 | - movq OFFSETBX, %mm7 | |
1345 | + LOAD_PIC_REG_BP() | |
1346 | + | |
1347 | + movl 20(%esp), %edi # dest | |
1348 | + movl 24(%esp), %esi # src | |
1349 | + | |
1350 | + movq MUNG(OFFSETBX), %mm7 | |
1351 | paddw %mm7, %mm7 | |
1352 | pxor %mm6, %mm6 | |
1353 | ||
1354 | @@ -1095,21 +1112,23 @@ video_copy_pal_c_block_mmx_loop: | |
1355 | ret | |
1356 | ||
1357 | .global _dv_ppm_copy_ntsc_c_block_mmx | |
1358 | +.hidden _dv_ppm_copy_ntsc_c_block_mmx | |
1359 | +.type _dv_ppm_copy_ntsc_c_block_mmx,@function | |
1360 | _dv_ppm_copy_ntsc_c_block_mmx: | |
1361 | ||
1362 | pushl %ebp | |
1363 | - movl %esp, %ebp | |
1364 | pushl %esi | |
1365 | pushl %edi | |
1366 | pushl %ebx | |
1367 | - | |
1368 | - movl 8(%ebp), %edi # dest | |
1369 | - movl 12(%ebp), %esi # src | |
1370 | + | |
1371 | + LOAD_PIC_REG_BP() | |
1372 | + | |
1373 | + movl 20(%esp), %edi # dest | |
1374 | + movl 24(%esp), %esi # src | |
1375 | ||
1376 | movl $4, %ebx | |
1377 | ||
1378 | - movq ALLONE, %mm6 | |
1379 | - | |
1380 | + movq MUNG(ALLONE), %mm6 | |
1381 | ppm_copy_ntsc_c_block_mmx_loop: | |
1382 | ||
1383 | movq (%esi), %mm0 | |
1384 | @@ -1168,17 +1187,20 @@ ppm_copy_ntsc_c_block_mmx_loop: | |
1385 | ret | |
1386 | ||
1387 | .global _dv_pgm_copy_ntsc_c_block_mmx | |
1388 | +.hidden _dv_pgm_copy_ntsc_c_block_mmx | |
1389 | +.type _dv_pgm_copy_ntsc_c_block_mmx,@function | |
1390 | _dv_pgm_copy_ntsc_c_block_mmx: | |
1391 | ||
1392 | pushl %ebp | |
1393 | - movl %esp, %ebp | |
1394 | pushl %esi | |
1395 | pushl %edi | |
1396 | - | |
1397 | - movl 8(%ebp), %edi # dest | |
1398 | - movl 12(%ebp), %esi # src | |
1399 | ||
1400 | - movq OFFSETBX, %mm7 | |
1401 | + LOAD_PIC_REG_BP() | |
1402 | + | |
1403 | + movl 16(%esp), %edi # dest | |
1404 | + movl 20(%esp), %esi # src | |
1405 | + | |
1406 | + movq MUNG(OFFSETBX), %mm7 | |
1407 | paddw %mm7, %mm7 | |
1408 | pxor %mm6, %mm6 | |
1409 | ||
1410 | @@ -1325,18 +1347,21 @@ _dv_pgm_copy_ntsc_c_block_mmx: | |
1411 | ret | |
1412 | ||
1413 | .global _dv_video_copy_ntsc_c_block_mmx | |
1414 | +.hidden _dv_video_copy_ntsc_c_block_mmx | |
1415 | +.type _dv_video_copy_ntsc_c_block_mmx,@function | |
1416 | _dv_video_copy_ntsc_c_block_mmx: | |
1417 | ||
1418 | pushl %ebp | |
1419 | - movl %esp, %ebp | |
1420 | pushl %esi | |
1421 | pushl %edi | |
1422 | pushl %ebx | |
1423 | - | |
1424 | - movl 8(%ebp), %edi # dest | |
1425 | - movl 12(%ebp), %esi # src | |
1426 | ||
1427 | - movq OFFSETBX, %mm7 | |
1428 | + LOAD_PIC_REG_BP() | |
1429 | + | |
1430 | + movl 20(%esp), %edi # dest | |
1431 | + movl 24(%esp), %esi # src | |
1432 | + | |
1433 | + movq MUNG(OFFSETBX), %mm7 | |
1434 | paddw %mm7, %mm7 | |
1435 | pxor %mm6, %mm6 | |
1436 | ||
1437 | diff -Nurp libdv-0.104-old/libdv/rgbtoyuv_x86_64.S libdv-0.104/libdv/rgbtoyuv_x86_64.S | |
1438 | --- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S 2006-01-01 22:44:22.000000000 +0100 | |
1439 | +++ libdv-0.104/libdv/rgbtoyuv_x86_64.S 2006-01-01 22:44:43.000000000 +0100 | |
1440 | @@ -41,9 +41,6 @@ | |
1441 | #define DV_WIDTH_SHORT_HALF 720 | |
1442 | #define DV_WIDTH_BYTE_HALF 360 | |
1443 | ||
1444 | -.global _dv_rgbtoycb_mmx_x86_64 | |
1445 | -# .global yuvtoycb_mmx_x86_64 | |
1446 | - | |
1447 | .data | |
1448 | ||
1449 | .align 8 | |
1450 | diff -Nurp libdv-0.104-old/libdv/transpose_x86.S libdv-0.104/libdv/transpose_x86.S | |
1451 | --- libdv-0.104-old/libdv/transpose_x86.S 2006-01-01 22:44:22.000000000 +0100 | |
1452 | +++ libdv-0.104/libdv/transpose_x86.S 2006-01-01 22:44:43.000000000 +0100 | |
1453 | @@ -1,5 +1,7 @@ | |
1454 | .text | |
1455 | .global _dv_transpose_mmx | |
1456 | +.hidden _dv_transpose_mmx | |
1457 | +.type _dv_transpose_mmx,@function | |
1458 | ||
1459 | _dv_transpose_mmx: | |
1460 | pushl %ebp | |
1461 | diff -Nurp libdv-0.104-old/libdv/transpose_x86_64.S libdv-0.104/libdv/transpose_x86_64.S | |
1462 | --- libdv-0.104-old/libdv/transpose_x86_64.S 2006-01-01 22:44:22.000000000 +0100 | |
1463 | +++ libdv-0.104/libdv/transpose_x86_64.S 2006-01-01 22:44:43.000000000 +0100 | |
1464 | @@ -1,5 +1,7 @@ | |
1465 | .text | |
1466 | .global _dv_transpose_mmx_x86_64 | |
1467 | +.hidden _dv_transpose_mmx_x86_64 | |
1468 | +.type _dv_transpose_mmx_x86_64,@function | |
1469 | ||
1470 | _dv_transpose_mmx_x86_64: | |
1471 | ||
1472 | diff -Nurp libdv-0.104-old/libdv/vlc_x86.S libdv-0.104/libdv/vlc_x86.S | |
1473 | --- libdv-0.104-old/libdv/vlc_x86.S 2006-01-01 22:44:22.000000000 +0100 | |
1474 | +++ libdv-0.104/libdv/vlc_x86.S 2006-01-01 22:45:51.000000000 +0100 | |
1475 | @@ -1,29 +1,38 @@ | |
1476 | #include "asmoff.h" | |
1477 | + #include "asm_common.S" | |
1478 | + | |
1479 | .text | |
1480 | .align 4 | |
1481 | .globl dv_decode_vlc | |
1482 | +.globl asm_dv_decode_vlc | |
1483 | +.hidden asm_dv_decode_vlc | |
1484 | +asm_dv_decode_vlc = dv_decode_vlc | |
1485 | + | |
1486 | .type dv_decode_vlc,@function | |
1487 | dv_decode_vlc: | |
1488 | pushl %ebx | |
1489 | + pushl %ebp | |
1490 | ||
1491 | - /* Args are at 8(%esp). */ | |
1492 | - movl 8(%esp),%eax /* %eax is bits */ | |
1493 | - movl 12(%esp),%ebx /* %ebx is maxbits */ | |
1494 | + LOAD_PIC_REG_BP() | |
1495 | + | |
1496 | + /* Args are at 12(%esp). */ | |
1497 | + movl 12(%esp),%eax /* %eax is bits */ | |
1498 | + movl 16(%esp),%ebx /* %ebx is maxbits */ | |
1499 | andl $0x3f,%ebx /* limit index range STL*/ | |
1500 | ||
1501 | - movl dv_vlc_class_index_mask(,%ebx,4),%edx | |
1502 | + movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx | |
1503 | andl %eax,%edx | |
1504 | - movl dv_vlc_class_index_rshift(,%ebx,4),%ecx | |
1505 | + movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx | |
1506 | sarl %cl,%edx | |
1507 | - movl dv_vlc_classes(,%ebx,4),%ecx | |
1508 | + movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx | |
1509 | movsbl (%ecx,%edx,1),%edx /* %edx is class */ | |
1510 | ||
1511 | - movl dv_vlc_index_mask(,%edx,4),%ebx | |
1512 | - movl dv_vlc_index_rshift(,%edx,4),%ecx | |
1513 | + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx | |
1514 | + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx | |
1515 | andl %eax,%ebx | |
1516 | sarl %cl,%ebx | |
1517 | ||
1518 | - movl dv_vlc_lookups(,%edx,4),%edx | |
1519 | + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx | |
1520 | movl (%edx,%ebx,4),%edx | |
1521 | ||
1522 | /* Now %edx holds result, like this: | |
1523 | @@ -42,7 +51,7 @@ dv_decode_vlc: | |
1524 | movl %edx,%ecx | |
1525 | sarl $8,%ecx | |
1526 | andl $0xff,%ecx | |
1527 | - movl sign_mask(,%ecx,4),%ebx | |
1528 | + movl MUNG_ARR(sign_mask,%ecx,4),%ebx | |
1529 | andl %ebx,%eax | |
1530 | negl %eax | |
1531 | sarl $31,%eax | |
1532 | @@ -63,14 +72,14 @@ dv_decode_vlc: | |
1533 | *result = broken; | |
1534 | Note that the 'broken' pattern is all ones (i.e. 0xffffffff) | |
1535 | */ | |
1536 | - movl 12(%esp),%ebx /* %ebx is maxbits */ | |
1537 | + movl 20(%esp),%ebx /* %ebx is maxbits */ | |
1538 | subl %ecx,%ebx | |
1539 | sbbl %ebx,%ebx | |
1540 | orl %ebx,%edx | |
1541 | ||
1542 | - movl 16(%esp),%eax | |
1543 | + movl 24(%esp),%eax | |
1544 | movl %edx,(%eax) | |
1545 | - | |
1546 | + popl %ebp | |
1547 | popl %ebx | |
1548 | ret | |
1549 | ||
1550 | @@ -80,21 +89,28 @@ dv_decode_vlc: | |
1551 | .type __dv_decode_vlc,@function | |
1552 | __dv_decode_vlc: | |
1553 | pushl %ebx | |
1554 | + pushl %ebp | |
1555 | + | |
1556 | + LOAD_PIC_REG_BP() | |
1557 | ||
1558 | - /* Args are at 8(%esp). */ | |
1559 | - movl 8(%esp),%eax /* %eax is bits */ | |
1560 | + /* Args are at 12(%esp). */ | |
1561 | + movl 12(%esp),%eax /* %eax is bits */ | |
1562 | ||
1563 | movl %eax,%edx /* %edx is class */ | |
1564 | andl $0xfe00,%edx | |
1565 | sarl $9,%edx | |
1566 | +#ifdef __PIC__ | |
1567 | + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx | |
1568 | +#else | |
1569 | movsbl dv_vlc_class_lookup5(%edx),%edx | |
1570 | - | |
1571 | - movl dv_vlc_index_mask(,%edx,4),%ebx | |
1572 | - movl dv_vlc_index_rshift(,%edx,4),%ecx | |
1573 | +#endif | |
1574 | + | |
1575 | + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx | |
1576 | + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx | |
1577 | andl %eax,%ebx | |
1578 | sarl %cl,%ebx | |
1579 | ||
1580 | - movl dv_vlc_lookups(,%edx,4),%edx | |
1581 | + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx | |
1582 | movl (%edx,%ebx,4),%edx | |
1583 | ||
1584 | /* Now %edx holds result, like this: | |
1585 | @@ -112,7 +128,7 @@ __dv_decode_vlc: | |
1586 | movl %edx,%ecx | |
1587 | sarl $8,%ecx | |
1588 | andl $0xff,%ecx | |
1589 | - movl sign_mask(,%ecx,4),%ecx | |
1590 | + movl MUNG_ARR(sign_mask,%ecx,4),%ecx | |
1591 | andl %ecx,%eax | |
1592 | negl %eax | |
1593 | sarl $31,%eax | |
1594 | @@ -127,9 +143,9 @@ __dv_decode_vlc: | |
1595 | xorl %eax,%edx | |
1596 | subl %eax,%edx | |
1597 | ||
1598 | - movl 12(%esp),%eax | |
1599 | + movl 16(%esp),%eax | |
1600 | movl %edx,(%eax) | |
1601 | - | |
1602 | + popl %ebp | |
1603 | popl %ebx | |
1604 | ret | |
1605 | ||
1606 | @@ -140,13 +156,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_ | |
1607 | */ | |
1608 | .text | |
1609 | .align 4 | |
1610 | +.globl asm_dv_parse_ac_coeffs_pass0 | |
1611 | +.hidden asm_dv_parse_ac_coeffs_pass0 | |
1612 | + asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0 | |
1613 | + | |
1614 | .globl dv_parse_ac_coeffs_pass0 | |
1615 | +.type dv_parse_ac_coeffs_pass0,@function | |
1616 | dv_parse_ac_coeffs_pass0: | |
1617 | pushl %ebx | |
1618 | pushl %edi | |
1619 | pushl %esi | |
1620 | pushl %ebp | |
1621 | ||
1622 | + LOAD_PIC_REG_SI() | |
1623 | + | |
1624 | #define ARGn(N) (20+(4*(N)))(%esp) | |
1625 | ||
1626 | /* | |
1627 | @@ -159,8 +182,10 @@ dv_parse_ac_coeffs_pass0: | |
1628 | ebp bl | |
1629 | */ | |
1630 | movl ARGn(2),%ebp | |
1631 | +#ifndef __PIC__ | |
1632 | movl ARGn(0),%esi | |
1633 | movl bitstream_t_buf(%esi),%esi | |
1634 | +#endif | |
1635 | movl dv_block_t_offset(%ebp),%edi | |
1636 | movl dv_block_t_reorder(%ebp),%ebx | |
1637 | ||
1638 | @@ -170,7 +195,11 @@ dv_parse_ac_coeffs_pass0: | |
1639 | ||
1640 | movq dv_block_t_coeffs(%ebp),%mm1 | |
1641 | pxor %mm0,%mm0 | |
1642 | +#ifdef __PIC__ | |
1643 | + pand const_f_0_0_0@GOTOFF(%esi),%mm1 | |
1644 | +#else | |
1645 | pand const_f_0_0_0,%mm1 | |
1646 | +#endif | |
1647 | movq %mm1,dv_block_t_coeffs(%ebp) | |
1648 | movq %mm0,(dv_block_t_coeffs + 8)(%ebp) | |
1649 | movq %mm0,(dv_block_t_coeffs + 16)(%ebp) | |
1650 | @@ -191,9 +220,17 @@ dv_parse_ac_coeffs_pass0: | |
1651 | readloop: | |
1652 | movl %edi,%ecx | |
1653 | shrl $3,%ecx | |
1654 | +#ifdef __PIC__ | |
1655 | + pushl %esi | |
1656 | + movl ARGn(1),%esi | |
1657 | + movl bitstream_t_buf(%esi),%esi | |
1658 | +#endif | |
1659 | movzbl (%esi,%ecx,1),%eax | |
1660 | movzbl 1(%esi,%ecx,1),%edx | |
1661 | movzbl 2(%esi,%ecx,1),%ecx | |
1662 | +#ifdef __PIC__ | |
1663 | + popl %esi | |
1664 | +#endif | |
1665 | shll $16,%eax | |
1666 | shll $8,%edx | |
1667 | orl %ecx,%eax | |
1668 | @@ -217,7 +254,11 @@ readloop: | |
1669 | ||
1670 | /* Attempt to use the shortcut first. If it hits, then | |
1671 | this vlc term has been decoded. */ | |
1672 | +#ifdef __PIC__ | |
1673 | + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx | |
1674 | +#else | |
1675 | movl dv_vlc_class1_shortcut(,%ecx,4),%edx | |
1676 | +#endif | |
1677 | test $0x80,%edx | |
1678 | je done_decode | |
1679 | ||
1680 | @@ -228,12 +269,19 @@ readloop: | |
1681 | movl %ebx,dv_block_t_reorder(%ebp) | |
1682 | ||
1683 | /* %eax is bits */ | |
1684 | - | |
1685 | +#ifdef __PIC__ | |
1686 | + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx | |
1687 | + | |
1688 | + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx | |
1689 | + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx | |
1690 | + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx | |
1691 | +#else | |
1692 | movsbl dv_vlc_class_lookup5(%ecx),%ecx | |
1693 | ||
1694 | movl dv_vlc_index_mask(,%ecx,4),%ebx | |
1695 | movl dv_vlc_lookups(,%ecx,4),%edx | |
1696 | movl dv_vlc_index_rshift(,%ecx,4),%ecx | |
1697 | +#endif | |
1698 | andl %eax,%ebx | |
1699 | sarl %cl,%ebx | |
1700 | ||
1701 | @@ -256,7 +304,11 @@ readloop: | |
1702 | movl %edx,%ecx | |
1703 | sarl $8,%ecx | |
1704 | andl $0xff,%ecx | |
1705 | +#ifdef __PIC__ | |
1706 | + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx | |
1707 | +#else | |
1708 | movl sign_mask(,%ecx,4),%ecx | |
1709 | +#endif | |
1710 | andl %ecx,%eax | |
1711 | negl %eax | |
1712 | sarl $31,%eax | |
1713 | @@ -326,10 +378,16 @@ alldone: | |
1714 | ||
1715 | slowpath: | |
1716 | /* slow path: use dv_decode_vlc */; | |
1717 | +#ifdef __PIC__ | |
1718 | + pushl %esi | |
1719 | + leal vlc@GOTOFF(%esi),%esi | |
1720 | + xchgl %esi,(%esp) /* last parameter is &vlc */ | |
1721 | +#else | |
1722 | pushl $vlc /* last parameter is &vlc */ | |
1723 | +#endif | |
1724 | pushl %edx /* bits_left */ | |
1725 | pushl %eax /* bits */ | |
1726 | - call dv_decode_vlc | |
1727 | + call asm_dv_decode_vlc | |
1728 | addl $12,%esp | |
1729 | test $0x80,%edx /* If (vlc.run < 0) break */ | |
1730 | jne escape | |
1731 | @@ -359,12 +417,15 @@ show16: | |
1732 | gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) { | |
1733 | */ | |
1734 | .globl dv_parse_video_segment | |
1735 | + .type dv_parse_video_segment,@function | |
1736 | dv_parse_video_segment: | |
1737 | pushl %ebx | |
1738 | pushl %edi | |
1739 | pushl %esi | |
1740 | pushl %ebp | |
1741 | ||
1742 | + LOAD_PIC_REG_SI() | |
1743 | + | |
1744 | #define ARGn(N) (20+(4*(N)))(%esp) | |
1745 | ||
1746 | movl ARGn(1),%eax /* quality */ | |
1747 | @@ -373,7 +434,11 @@ dv_parse_video_segment: | |
1748 | jz its_mono | |
1749 | movl $6,%ebx | |
1750 | its_mono: | |
1751 | +#ifdef __PIC__ | |
1752 | + movl %ebx,n_blocks@GOTOFF(%esi) | |
1753 | +#else | |
1754 | movl %ebx,n_blocks | |
1755 | +#endif | |
1756 | ||
1757 | /* | |
1758 | * ebx seg/b | |
1759 | @@ -384,15 +449,22 @@ its_mono: | |
1760 | * ebp bl | |
1761 | */ | |
1762 | movl ARGn(0),%ebx | |
1763 | +#ifndef __PIC__ | |
1764 | movl dv_videosegment_t_bs(%ebx),%esi | |
1765 | movl bitstream_t_buf(%esi),%esi | |
1766 | +#endif | |
1767 | leal dv_videosegment_t_mb(%ebx),%edi | |
1768 | ||
1769 | movl $0,%eax | |
1770 | movl $0,%ecx | |
1771 | macloop: | |
1772 | +#ifdef __PIC__ | |
1773 | + movl %eax,m@GOTOFF(%esi) | |
1774 | + movl %ecx,mb_start@GOTOFF(%esi) | |
1775 | +#else | |
1776 | movl %eax,m | |
1777 | movl %ecx,mb_start | |
1778 | +#endif | |
1779 | ||
1780 | movl ARGn(0),%ebx | |
1781 | ||
1782 | @@ -400,7 +472,15 @@ macloop: | |
1783 | /* mb->qno = bitstream_get(bs,4); */ | |
1784 | movl %ecx,%edx | |
1785 | shr $3,%edx | |
1786 | +#ifdef __PIC__ | |
1787 | + pushl %esi | |
1788 | + movl dv_videosegment_t_bs(%ebx),%esi | |
1789 | + movl bitstream_t_buf(%esi),%esi | |
1790 | +#endif | |
1791 | movzbl 3(%esi,%edx,1),%edx | |
1792 | +#ifdef __PIC__ | |
1793 | + popl %esi | |
1794 | +#endif | |
1795 | andl $0xf,%edx | |
1796 | movl %edx,dv_macroblock_t_qno(%edi) | |
1797 | ||
1798 | @@ -411,7 +491,11 @@ macloop: | |
1799 | movl %edx,dv_macroblock_t_eob_count(%edi) | |
1800 | ||
1801 | /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */ | |
1802 | +#ifdef __PIC__ | |
1803 | + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx | |
1804 | +#else | |
1805 | movl dv_super_map_vertical(,%eax,4),%edx | |
1806 | +#endif | |
1807 | movl dv_videosegment_t_i(%ebx),%ecx | |
1808 | addl %ecx,%edx | |
1809 | ||
1810 | @@ -422,11 +506,20 @@ skarly: | |
1811 | andl $1,%ecx | |
1812 | shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */ | |
1813 | ||
1814 | +#ifdef __PIC__ | |
1815 | + leal mod_10@GOTOFF(%esi,%edx),%edx | |
1816 | + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */ | |
1817 | +#else | |
1818 | movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */ | |
1819 | +#endif | |
1820 | movl %edx,dv_macroblock_t_i(%edi) | |
1821 | ||
1822 | /* mb->j = dv_super_map_horizontal[m]; */ | |
1823 | +#ifdef __PIC__ | |
1824 | + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx | |
1825 | +#else | |
1826 | movl dv_super_map_horizontal(,%eax,4),%edx | |
1827 | +#endif | |
1828 | movl %edx,dv_macroblock_t_j(%edi) | |
1829 | ||
1830 | /* mb->k = seg->k; */ | |
1831 | @@ -445,12 +538,29 @@ blkloop: | |
1832 | +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | |
1833 | */ | |
1834 | /* dc = bitstream_get(bs,9); */ | |
1835 | +#ifdef __PIC__ | |
1836 | + movl mb_start@GOTOFF(%esi),%ecx | |
1837 | +#else | |
1838 | movl mb_start,%ecx | |
1839 | +#endif | |
1840 | shr $3,%ecx | |
1841 | +#ifdef __PIC__ | |
1842 | + movzbl blk_start@GOTOFF(%esi,%ebx),%edx | |
1843 | +#else | |
1844 | movzbl blk_start(%ebx),%edx | |
1845 | +#endif | |
1846 | addl %ecx,%edx | |
1847 | +#ifdef __PIC__ | |
1848 | + pushl %esi | |
1849 | + movl ARGn(1),%esi | |
1850 | + movl dv_videosegment_t_bs(%esi),%esi | |
1851 | + movl bitstream_t_buf(%esi),%esi | |
1852 | +#endif | |
1853 | movzbl (%esi,%edx,1),%eax /* hi byte */ | |
1854 | movzbl 1(%esi,%edx,1),%ecx /* lo byte */ | |
1855 | +#ifdef __PIC__ | |
1856 | + popl %esi | |
1857 | +#endif | |
1858 | shll $8,%eax | |
1859 | orl %ecx,%eax | |
1860 | ||
1861 | @@ -477,7 +587,11 @@ blkloop: | |
1862 | ||
1863 | /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */ | |
1864 | shll $6,%eax | |
1865 | +#ifdef __PIC__ | |
1866 | + addl dv_reorder@GOTOFF+1(%esi),%eax | |
1867 | +#else | |
1868 | addl $(dv_reorder+1),%eax | |
1869 | +#endif | |
1870 | movl %eax,dv_block_t_reorder(%ebp) | |
1871 | ||
1872 | /* bl->reorder_sentinel = bl->reorder + 63; */ | |
1873 | @@ -485,13 +599,22 @@ blkloop: | |
1874 | movl %eax,dv_block_t_reorder_sentinel(%ebp) | |
1875 | ||
1876 | /* bl->offset= mb_start + dv_parse_bit_start[b]; */ | |
1877 | +#ifdef __PIC__ | |
1878 | + movl mb_start@GOTOFF(%esi),%ecx | |
1879 | + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax | |
1880 | +#else | |
1881 | movl mb_start,%ecx | |
1882 | movl dv_parse_bit_start(,%ebx,4),%eax | |
1883 | +#endif | |
1884 | addl %ecx,%eax | |
1885 | movl %eax,dv_block_t_offset(%ebp) | |
1886 | ||
1887 | /* bl->end= mb_start + dv_parse_bit_end[b]; */ | |
1888 | +#ifdef __PIC__ | |
1889 | + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax | |
1890 | +#else | |
1891 | movl dv_parse_bit_end(,%ebx,4),%eax | |
1892 | +#endif | |
1893 | addl %ecx,%eax | |
1894 | movl %eax,dv_block_t_end(%ebp) | |
1895 | ||
1896 | @@ -503,7 +626,11 @@ blkloop: | |
1897 | /* no AC pass. Just zero out the remaining coeffs */ | |
1898 | movq dv_block_t_coeffs(%ebp),%mm1 | |
1899 | pxor %mm0,%mm0 | |
1900 | +#ifdef __PIC__ | |
1901 | + pand const_f_0_0_0@GOTOFF(%esi),%mm1 | |
1902 | +#else | |
1903 | pand const_f_0_0_0,%mm1 | |
1904 | +#endif | |
1905 | movq %mm1,dv_block_t_coeffs(%ebp) | |
1906 | movq %mm0,(dv_block_t_coeffs + 8)(%ebp) | |
1907 | movq %mm0,(dv_block_t_coeffs + 16)(%ebp) | |
1908 | @@ -528,18 +655,27 @@ do_ac_pass: | |
1909 | pushl %ebp | |
1910 | pushl %edi | |
1911 | pushl %eax | |
1912 | - call dv_parse_ac_coeffs_pass0 | |
1913 | + call asm_dv_parse_ac_coeffs_pass0 | |
1914 | addl $12,%esp | |
1915 | done_ac: | |
1916 | ||
1917 | +#ifdef __PIC__ | |
1918 | + movl n_blocks@GOTOFF(%esi),%eax | |
1919 | +#else | |
1920 | movl n_blocks,%eax | |
1921 | +#endif | |
1922 | addl $dv_block_t_size,%ebp | |
1923 | incl %ebx | |
1924 | cmpl %eax,%ebx | |
1925 | jnz blkloop | |
1926 | ||
1927 | +#ifdef __PIC__ | |
1928 | + movl m@GOTOFF(%esi),%eax | |
1929 | + movl mb_start@GOTOFF(%esi),%ecx | |
1930 | +#else | |
1931 | movl m,%eax | |
1932 | movl mb_start,%ecx | |
1933 | +#endif | |
1934 | addl $(8 * 80),%ecx | |
1935 | addl $dv_macroblock_t_size,%edi | |
1936 | incl %eax | |
1937 | @@ -557,7 +693,7 @@ done_ac: | |
1938 | ||
1939 | andl $DV_QUALITY_AC_MASK,%eax | |
1940 | cmpl $DV_QUALITY_AC_2,%eax | |
1941 | - jz dv_parse_ac_coeffs | |
1942 | + jz asm_dv_parse_ac_coeffs | |
1943 | movl $0,%eax | |
1944 | ret | |
1945 | ||
1946 | diff -Nurp libdv-0.104-old/libdv/vlc_x86_64.S libdv-0.104/libdv/vlc_x86_64.S | |
1947 | --- libdv-0.104-old/libdv/vlc_x86_64.S 2006-01-01 22:44:23.000000000 +0100 | |
1948 | +++ libdv-0.104/libdv/vlc_x86_64.S 2006-01-01 22:44:43.000000000 +0100 | |
1949 | @@ -169,7 +169,8 @@ void dv_parse_ac_coeffs_pass0(bitstream_ | |
1950 | .text | |
1951 | .align 4 | |
1952 | .globl dv_parse_ac_coeffs_pass0 | |
1953 | - | |
1954 | +.type dv_parse_ac_coeffs_pass0,@function | |
1955 | + | |
1956 | dv_parse_ac_coeffs_pass0: | |
1957 | ||
1958 | /* Args are at rdi=bs, rsi=mb, rdx=bl */ | |
1959 | @@ -422,6 +423,7 @@ show16: /* not u | |
1960 | gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) { | |
1961 | */ | |
1962 | .globl dv_parse_video_segment | |
1963 | + .type dv_parse_video_segment,@function | |
1964 | dv_parse_video_segment: | |
1965 | ||
1966 | /* Args are at rdi=seg, rsi=quality */ | |
1967 | diff -Nurp libdv-0.104-old/libdv-0.104/libdv/asm_common.S libdv-0.104/libdv-0.104/libdv/asm_common.S | |
1968 | --- libdv-0.104-old/libdv-0.104/libdv/asm_common.S 1970-01-01 01:00:00.000000000 +0100 | |
1969 | +++ libdv-0.104/libdv-0.104/libdv/asm_common.S 2006-01-01 22:44:43.000000000 +0100 | |
1970 | @@ -0,0 +1,37 @@ | |
1971 | +/* public domain, do what you want */ | |
1972 | + | |
1973 | +#ifdef __PIC__ | |
1974 | +# define MUNG(sym) sym##@GOTOFF(%ebp) | |
1975 | +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args) | |
1976 | +#else | |
1977 | +# define MUNG(sym) sym | |
1978 | +# define MUNG_ARR(sym, args...) sym(,##args) | |
1979 | +#endif | |
1980 | + | |
1981 | +#ifdef __PIC__ | |
1982 | +# undef __i686 /* gcc define gets in our way */ | |
1983 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits | |
1984 | +.globl __i686.get_pc_thunk.bp | |
1985 | + .hidden __i686.get_pc_thunk.bp | |
1986 | + .type __i686.get_pc_thunk.bp,@function | |
1987 | +__i686.get_pc_thunk.bp: | |
1988 | + movl (%esp), %ebp | |
1989 | + ret | |
1990 | +# define LOAD_PIC_REG_BP() \ | |
1991 | + call __i686.get_pc_thunk.bp ; \ | |
1992 | + addl $_GLOBAL_OFFSET_TABLE_, %ebp | |
1993 | + | |
1994 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits | |
1995 | +.globl __i686.get_pc_thunk.si | |
1996 | + .hidden __i686.get_pc_thunk.si | |
1997 | + .type __i686.get_pc_thunk.si,@function | |
1998 | +__i686.get_pc_thunk.si: | |
1999 | + movl (%esp), %esi | |
2000 | + ret | |
2001 | +# define LOAD_PIC_REG_SI() \ | |
2002 | + call __i686.get_pc_thunk.si ; \ | |
2003 | + addl $_GLOBAL_OFFSET_TABLE_, %esi | |
2004 | +#else | |
2005 | +# define LOAD_PIC_REG_BP() | |
2006 | +# define LOAD_PIC_REG_SI() | |
2007 | +#endif |