This patch splits up the inline assembly in transpose4x4() because otherwise some versions of gcc (at least Debian's 4.3.2) aren't able to handle the amount of variables to assign registers to when compiling this code with -fPIC. Signed-Off: Mathias Krause diff -Nrup a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c --- a/libavcodec/x86/dsputil_mmx.c 2010-04-16 22:04:30.000000000 +0200 +++ b/libavcodec/x86/dsputil_mmx.c 2010-09-03 21:14:43.000000000 +0200 @@ -725,15 +725,23 @@ static void h263_v_loop_filter_mmx(uint8 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... - "movd %4, %%mm0 \n\t" - "movd %5, %%mm1 \n\t" - "movd %6, %%mm2 \n\t" - "movd %7, %%mm3 \n\t" + "movd %0, %%mm0 \n\t" + "movd %1, %%mm1 \n\t" + "movd %2, %%mm2 \n\t" + "movd %3, %%mm3 \n\t" "punpcklbw %%mm1, %%mm0 \n\t" "punpcklbw %%mm3, %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklwd %%mm2, %%mm0 \n\t" "punpckhwd %%mm2, %%mm1 \n\t" + : /* nothing */ + : "m" (*(uint32_t*)(src + 0*src_stride)), + "m" (*(uint32_t*)(src + 1*src_stride)), + "m" (*(uint32_t*)(src + 2*src_stride)), + "m" (*(uint32_t*)(src + 3*src_stride)) + ); + + __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... "movd %%mm0, %0 \n\t" "punpckhdq %%mm0, %%mm0 \n\t" "movd %%mm0, %1 \n\t" @@ -745,10 +753,6 @@ static inline void transpose4x4(uint8_t "=m" (*(uint32_t*)(dst + 1*dst_stride)), "=m" (*(uint32_t*)(dst + 2*dst_stride)), "=m" (*(uint32_t*)(dst + 3*dst_stride)) - : "m" (*(uint32_t*)(src + 0*src_stride)), - "m" (*(uint32_t*)(src + 1*src_stride)), - "m" (*(uint32_t*)(src + 2*src_stride)), - "m" (*(uint32_t*)(src + 3*src_stride)) ); }